[
  {
    "path": ".gitattributes",
    "content": "* text=auto\nmodin/_version.py export-subst\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug-report.yaml",
    "content": "name: Bug report\ndescription: Report incorrect behavior in the Modin library\ntitle: 'BUG: '\nlabels: ['bug 🦗', 'Triage 🩹']\n\nbody:\n  - type: checkboxes\n    id: checks\n    attributes:\n      label: Modin version checks\n      options:\n        - label: >\n            I have checked that this issue has not already been reported.\n          required: true\n        - label: >\n            I have confirmed this bug exists on the latest released version of Modin.\n          required: true\n        - label: >\n            I have confirmed this bug exists on the main branch of Modin. (In order to do this you\n            can follow [this guide](https://modin.readthedocs.io/en/stable/getting_started/installation.html#installing-from-the-github-main-branch).)\n  - type: textarea\n    id: example\n    attributes:\n      label: Reproducible Example\n      description: >\n        Please follow [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) on how to\n        provide a minimal, copy-pastable example.\n      placeholder: >\n        import modin.pandas as pd\n\n        df = pd.DataFrame(range(5))\n\n        ...\n      render: python\n    validations:\n      required: true\n  - type: textarea\n    id: problem\n    attributes:\n      label: Issue Description\n      description: >\n        Please provide a description of the issue shown in the reproducible example.\n    validations:\n      required: true\n  - type: textarea\n    id: expected-behavior\n    attributes:\n      label: Expected Behavior\n      description: >\n        Please describe or show a code example of the expected behavior.\n    validations:\n      required: true\n  - type: textarea\n    id: logs\n    attributes:\n      label: Error Logs\n      description: >\n        Please paste the output of any relevant error logs.\n      value: >\n        <details>\n\n\n        ```python-traceback\n\n\n        Replace this line with the error backtrace (if applicable).\n\n\n        ```\n\n\n        </details>\n  - type: textarea\n    id: version\n    attributes:\n      label: Installed Versions\n      description: >\n        Please paste the output of ``pd.show_versions()``\n      value: >\n        <details>\n\n\n        Replace this line with the output of pd.show_versions()\n\n\n        </details>\n    validations:\n      required: true\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.md",
    "content": "---\nname: Feature request\nabout: Request a new API or feature implementation\ntitle: ''\nlabels: 'new feature/request 💬, Triage 🩹'\nassignees: ''\n\n---\n\n**Is your feature request related to a problem? Please describe.**\nA clear and concise description of what the problem is. What kind of performance improvements would you like to see with this new API?\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/question.md",
    "content": "---\nname: Question\nabout: You want to ask a question\ntitle: ''\nlabels: 'question ❓, Triage 🩹'\nassignees: ''\n\n---\n\n\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "content": "<!--\nThank you for your contribution!\nPlease review the contributing docs: https://modin.readthedocs.io/en/latest/development/contributing.html\nif you have questions about contributing.\n-->\n\n## What do these changes do?\n\n<!-- Please give a short brief about these changes. -->\n\n- [x] first commit message and PR title follow format outlined [here](https://modin.readthedocs.io/en/latest/development/contributing.html#commit-message-formatting)\n  > **_NOTE:_**  If you edit the PR title to match this format, you need to add another commit (even if it's empty) or amend your last commit for the CI job that checks the PR title to pick up the new PR title.\n- [ ] passes `flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py`\n- [ ] passes `black --check modin/ asv_bench/benchmarks scripts/doc_checker.py`\n- [ ] signed commit with `git commit -s` <!-- you can amend your commit with a signature via `git commit -amend -s` -->\n- [ ] Resolves #? <!-- issue must be created for each patch -->\n- [ ] tests added and passing\n- [ ] module layout described at `docs/development/architecture.rst` is up-to-date <!-- if you have added, renamed or removed files or directories please update the documentation accordingly -->\n"
  },
  {
    "path": ".github/actions/mamba-env/action.yml",
    "content": "name: \"Install environment using Mamba\"\ndescription: \"Prepare the environment to run Modin\"\ninputs:\n  python-version:\n    description: \"Python version to install\"\n    default: \"3.9\"\n  environment-file:\n    description: \"Conda environment yml\"\n    required: true\n  activate-environment:\n    description: \"Conda environment to activate\"\n    default: \"modin\"\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Get current week\n      id: get-week\n      # use current week as cache key to periodically refresh the cache,\n      # as cache is based on requirements, but dependencies push\n      # updated versions at some irregular pace\n      run: echo \"thisweek=$(/bin/date -u '+%Y.w%W')\" >> $GITHUB_OUTPUT\n      shell: bash\n    - name: Cache conda\n      id: cache-conda\n      uses: actions/cache@v4\n      with:\n        path: |\n          ~/conda_pkgs_dir\n          ~/.cache/pip\n        key:\n          ${{ runner.os }}-conda-${{ steps.get-week.outputs.thisweek }}-${{ hashFiles(inputs.environment-file) }}\n    - uses: conda-incubator/setup-miniconda@v3\n      with:\n        miniforge-variant: Miniforge3\n        miniforge-version: latest\n        use-mamba: true\n        activate-environment: ${{ inputs.activate-environment }}\n        environment-file: ${{ inputs.environment-file }}\n        python-version: ${{ inputs.python-version }}\n        channel-priority: strict\n        # we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed\n        # for more info see https://github.com/conda-incubator/setup-miniconda/issues/264\n        use-only-tar-bz2: false\n    - shell: bash -l {0}\n      run: |\n        conda run -n ${{ inputs.activate-environment }} pip install .\n        conda list -n ${{ inputs.activate-environment }}\n"
  },
  {
    "path": ".github/actions/python-only/action.yml",
    "content": "name: \"Install Python only\"\ndescription: \"Prepare the environment to run simple tasks\"\ninputs:\n  python-version:\n    description: \"Python version to install\"\n    default: \"3.9\"\n\nruns:\n  using: \"composite\"\n  steps:\n    - uses: actions/setup-python@v5\n      with:\n        python-version: ${{ inputs.python-version }}\n        architecture: \"x64\"\n        cache: 'pip'\n"
  },
  {
    "path": ".github/actions/run-core-tests/action.yml",
    "content": "name: \"Run core Modin tests\"\ndescription: \"Run core Modin tests like dataframe or groupby\"\ninputs:\n  runner:\n    description: \"Runner for tests\"\n    default: \"python -m pytest\"\n  parallel:\n    description: \"How to run tests in parallel\"\n    default: \"-n 2\"\n\nruns:\n  using: \"composite\"\n  steps:\n    - uses: ./.github/actions/run-core-tests/group_1\n      with:\n        runner: ${{ inputs.runner }}\n        parallel: ${{ inputs.parallel }}\n    - uses: ./.github/actions/run-core-tests/group_2\n      with:\n        runner: ${{ inputs.runner }}\n        parallel: ${{ inputs.parallel }}\n    - uses: ./.github/actions/run-core-tests/group_3\n      with:\n        runner: ${{ inputs.runner }}\n        parallel: ${{ inputs.parallel }}\n    - uses: ./.github/actions/run-core-tests/group_4\n      with:\n        runner: ${{ inputs.runner }}\n        parallel: ${{ inputs.parallel }}\n"
  },
  {
    "path": ".github/actions/run-core-tests/group_1/action.yml",
    "content": "name: \"Run core Modin tests - group 1\"\ndescription: \"Run core Modin tests like dataframe or groupby\"\ninputs:\n  runner:\n    description: \"Runner for tests\"\n    default: \"python -m pytest\"\n  parallel:\n    description: \"How to run tests in parallel\"\n    default: \"-n 2\"\n\nruns:\n  using: \"composite\"\n  steps:\n      - run: |\n          echo \"::group::Running dataframe tests (group 1)...\"\n          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_binary.py \\\n                                                      modin/tests/pandas/dataframe/test_default.py \\\n                                                      modin/tests/pandas/dataframe/test_indexing.py \\\n                                                      modin/tests/pandas/dataframe/test_iter.py\n          echo \"::endgroup::\"\n        shell: bash -l {0}\n"
  },
  {
    "path": ".github/actions/run-core-tests/group_2/action.yml",
    "content": "name: \"Run core Modin tests - group 2\"\ndescription: \"Run core Modin tests like dataframe or groupby\"\ninputs:\n  runner:\n    description: \"Runner for tests\"\n    default: \"python -m pytest\"\n  parallel:\n    description: \"How to run tests in parallel\"\n    default: \"-n 2\"\n\nruns:\n  using: \"composite\"\n  steps:\n      - run: |\n          echo \"::group::Running dataframe tests (group 2)...\"\n          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_join_sort.py \\\n                                                      modin/tests/pandas/dataframe/test_reduce.py \\\n                                                      modin/tests/pandas/dataframe/test_udf.py \\\n                                                      modin/tests/pandas/dataframe/test_window.py \\\n                                                      modin/tests/pandas/dataframe/test_pickle.py \\\n                                                      modin/tests/pandas/test_repartition.py \\\n                                                      modin/tests/pandas/test_backend.py\n          echo \"::endgroup::\"\n        shell: bash -l {0}\n"
  },
  {
    "path": ".github/actions/run-core-tests/group_3/action.yml",
    "content": "name: \"Run core Modin tests - group 3\"\ndescription: \"Run core Modin tests like dataframe or groupby\"\ninputs:\n  runner:\n    description: \"Runner for tests\"\n    default: \"python -m pytest\"\n  parallel:\n    description: \"How to run tests in parallel\"\n    default: \"-n 2\"\n\nruns:\n  using: \"composite\"\n  steps:\n      - run: |\n          echo \"::group::Running tests (group 3)...\"\n          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_series.py \\\n                                                      modin/tests/pandas/dataframe/test_map_metadata.py\n          echo \"::endgroup::\"\n        shell: bash -l {0}\n      - run: |\n          echo \"::group::Running range-partitioning tests (group 3)...\"\n          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_groupby.py\n          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_series.py -k \"test_unique or test_nunique or drop_duplicates or test_resample\"\n          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_general.py -k \"test_unique\"\n          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_map_metadata.py -k \"drop_duplicates\"\n          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_join_sort.py -k \"merge\"\n          MODIN_RANGE_PARTITIONING=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/dataframe/test_default.py -k \"resample\"\n          echo \"::endgroup::\"\n        shell: bash -l {0}\n"
  },
  {
    "path": ".github/actions/run-core-tests/group_4/action.yml",
    "content": "name: \"Run core Modin tests - group 4\"\ndescription: \"Run core Modin tests like dataframe or groupby\"\ninputs:\n  runner:\n    description: \"Runner for tests\"\n    default: \"python -m pytest\"\n  parallel:\n    description: \"How to run tests in parallel\"\n    default: \"-n 2\"\n\nruns:\n  using: \"composite\"\n  steps:\n      - run: |\n          echo \"::group::Running tests (group 4)...\"\n          ${{ inputs.runner }} ${{ inputs.parallel }} modin/tests/pandas/test_rolling.py \\\n                                                      modin/tests/pandas/test_expanding.py \\\n                                                      modin/tests/pandas/test_groupby.py \\\n                                                      modin/tests/pandas/test_reshape.py \\\n                                                      modin/tests/pandas/test_general.py\n          echo \"::endgroup::\"\n        shell: bash -l {0}\n      - run: |\n          echo \"::group::Running concat tests (group 4)...\"\n          ${{ inputs.runner }} modin/tests/pandas/test_concat.py # Ray and Dask versions fails with -n 2\n          echo \"::endgroup::\"\n        shell: bash -l {0}\n"
  },
  {
    "path": ".github/actions/upload-coverage/action.yml",
    "content": "name: Upload Coverage\ndescription: Upload coverage files\n\nruns:\n  using: \"composite\"\n\n  steps:\n    - run: |\n        COVERAGE_UUID=$(python3 -c \"import uuid; print(uuid.uuid4())\")\n        mv .coverage .coverage.${COVERAGE_UUID}\n        echo \"COVERAGE_UUID=${COVERAGE_UUID}\" >> $GITHUB_ENV\n      id: coverage-uuid\n      shell: bash\n    - uses: actions/upload-artifact@v4\n      with:\n        name: coverage-data-${{ env.COVERAGE_UUID }}\n        path: .coverage*\n        include-hidden-files: true\n"
  },
  {
    "path": ".github/dependabot.yaml",
    "content": "version: 2\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"monthly\"\n    groups:\n      github-actions:\n        patterns:\n          - \"*\"\n"
  },
  {
    "path": ".github/stale.yml",
    "content": "# Number of days of inactivity before an Issue or Pull Request becomes stale\ndaysUntilStale: 365\n\n# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.\n# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.\ndaysUntilClose: 7\n\n# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)\nonlyLabels: []\n\n# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable\nexemptLabels: []\n\n# Set to true to ignore issues in a project (defaults to false)\nexemptProjects: false\n\n# Set to true to ignore issues in a milestone (defaults to false)\nexemptMilestones: false\n\n# Set to true to ignore issues with an assignee (defaults to false)\nexemptAssignees: false\n\n# Label to use when marking as stale\nstaleLabel: stale\n\n# Comment to post when marking as stale. Set to `false` to disable\nmarkComment: >\n  This issue has been automatically marked as stale because it has not had\n  recent activity. It will be closed if no further activity occurs within the next\n  7 days. Thank you for your contributions.\n\n# Comment to post when removing the stale label.\n# unmarkComment: >\n#   Your comment here.\n\n# Comment to post when closing a stale Issue or Pull Request.\n closeComment: >\n   Closing as stale.\n"
  },
  {
    "path": ".github/workflows/ci-notebooks.yml",
    "content": "name: ci-notebooks\non:\n  pull_request:\n    paths:\n      - modin/**\n      - examples/tutorial/**\n      - .github/workflows/ci-notebooks.yml\n      - setup.cfg\n      - setup.py\n      - requirements/env_unidist_linux.yml\nconcurrency:\n  # Cancel other jobs in the same branch. We don't care whether CI passes\n  # on old commits.\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}\nenv:\n  MODIN_GITHUB_CI: true\njobs:\n  test-tutorial-notebooks:\n    defaults:\n      run:\n        shell: bash -l {0}\n    name: test tutorial notebooks\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        execution: [pandas_on_ray, pandas_on_dask, pandas_on_unidist]\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/python-only\n        if: matrix.execution != 'pandas_on_unidist'\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: requirements/env_unidist_linux.yml\n          activate-environment: modin_on_unidist\n        if: matrix.execution == 'pandas_on_unidist'\n      - name: Cache datasets\n        uses: actions/cache@v4\n        with:\n          path: taxi.csv\n          # update cache only if notebooks require it to be changed\n          key: taxi-csv-dataset-${{ hashFiles('examples/tutorial/jupyter/**') }}\n      # replace modin with . in the tutorial requirements file for `pandas_on_ray` and\n      # `pandas_on_dask` since we need Modin built from sources\n      - run: sed -i 's/modin/./g' examples/tutorial/jupyter/execution/${{ matrix.execution }}/requirements.txt\n        if: matrix.execution != 'pandas_on_unidist'\n      # install dependencies required for notebooks execution for `pandas_on_ray` and `pandas_on_dask`\n      # Override modin-spreadsheet install for now\n      - run: |\n          pip install -r examples/tutorial/jupyter/execution/${{ matrix.execution }}/requirements.txt\n          pip install git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5\n        if: matrix.execution != 'pandas_on_unidist'\n      # Build Modin from sources for `pandas_on_unidist`\n      - run: pip install -e .\n        if: matrix.execution == 'pandas_on_unidist'\n      # install test dependencies\n      # NOTE: If you are changing the set of packages installed here, make sure that\n      # the dev requirements match them.\n      - run: pip install pytest pytest-cov black flake8 flake8-print flake8-no-implicit-concat\n        if: matrix.execution != 'pandas_on_unidist'\n      - run: pip install flake8-print jupyter nbformat nbconvert\n        if: matrix.execution == 'pandas_on_unidist'\n      - run: pip list\n        if: matrix.execution != 'pandas_on_unidist'\n      - run: |\n          conda info\n          conda list\n        if: matrix.execution == 'pandas_on_unidist'\n      # setup kernel configuration for `pandas_on_unidist` execution with mpi backend\n      - run: python examples/tutorial/jupyter/execution/${{ matrix.execution }}/setup_kernel.py\n        if: matrix.execution == 'pandas_on_unidist'\n      - run: jupyter kernelspec list\n      - run: |\n          black --check --diff examples/tutorial/jupyter/execution/${{ matrix.execution }}/test/test_notebooks.py\n          black --check --diff examples/tutorial/jupyter/execution/test/utils.py\n      - run: |\n          flake8 --enable=T examples/tutorial/jupyter/execution/${{ matrix.execution }}/test/test_notebooks.py\n          flake8 --enable=T examples/tutorial/jupyter/execution/test/utils.py\n      - run: python -m pytest examples/tutorial/jupyter/execution/${{ matrix.execution }}/test/test_notebooks.py\n"
  },
  {
    "path": ".github/workflows/ci-required.yml",
    "content": "name: ci-required\non: pull_request\nconcurrency:\n  # Cancel other jobs in the same branch. We don't care whether CI passes\n  # on old commits.\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}\nenv:\n  MODIN_GITHUB_CI: true\n\njobs:\n\n  check-pr-title:\n    runs-on: ubuntu-latest\n    steps:\n    - uses: Slashgear/action-check-pr-title@v4.3.0\n      with:\n        # NOTE: If you change the allowed prefixes here, update\n        # the documentation about them in /docs/development/contributing.rst\n        regexp: '^(?:FEAT|DOCS|FIX|REFACTOR|TEST|PERF)-#\\d+:'\n\n  build-docs:\n    name: build docs\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n        with:\n          fetch-depth: 1\n      - uses: actions/setup-python@v5\n        with:\n          python-version: \"3.9\"\n          architecture: \"x64\"\n          cache: \"pip\"\n          cache-dependency-path: '**/requirements-doc.txt'\n      - run: pip install -r docs/requirements-doc.txt\n      - run: cd docs && sphinx-build -T -E -W -b html . build\n\n  lint-pydocstyle:\n    name: lint (pydocstyle)\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/python-only\n      # The `numpydoc` version here MUST match the versions in the dev requirements files.\n      - run: pip install pytest pytest-cov pydocstyle numpydoc==1.6.0\n      - run: python -m pytest scripts/test\n      - run: pip install -e \".[all]\"\n      - run: |\n          python scripts/doc_checker.py --add-ignore=D101,D102,D103,D105 --disable-numpydoc \\\n            modin/pandas/dataframe.py modin/pandas/series.py \\\n            modin/pandas/groupby.py \\\n            modin/pandas/series_utils.py modin/pandas/general.py \\\n            modin/pandas/plotting.py modin/pandas/utils.py \\\n            modin/pandas/iterator.py modin/pandas/indexing.py \\\n      - run: python scripts/doc_checker.py modin/core/dataframe\n      - run: python scripts/doc_checker.py modin/core/execution/dask\n      - run: |\n          python scripts/doc_checker.py \\\n            modin/pandas/accessor.py modin/pandas/general.py \\\n            modin/pandas/groupby.py modin/pandas/indexing.py \\\n            modin/pandas/iterator.py modin/pandas/plotting.py \\\n            modin/pandas/series_utils.py modin/pandas/utils.py \\\n            modin/pandas/base.py \\\n            modin/pandas/io.py \\\n            asv_bench/benchmarks/utils \\\n            asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \\\n            asv_bench/benchmarks/scalability/__init__.py \\\n            modin/core/io \\\n            modin/pandas/series.py \\\n            modin/core/execution/python \\\n            modin/pandas/dataframe.py \\\n            modin/config/__init__.py \\\n            modin/config/__main__.py \\\n            modin/config/envvars.py \\\n            modin/config/pubsub.py\n      - run: python scripts/doc_checker.py modin/distributed\n      - run: python scripts/doc_checker.py modin/utils.py\n      - run: python scripts/doc_checker.py modin/experimental/sklearn\n      - run: |\n          python scripts/doc_checker.py modin/experimental/xgboost/__init__.py \\\n            modin/experimental/xgboost/utils.py modin/experimental/xgboost/xgboost.py \\\n            modin/experimental/xgboost/xgboost_ray.py\n      - run: python scripts/doc_checker.py modin/core/execution/ray\n      - run: |\n          python scripts/doc_checker.py modin/core/execution/dispatching/factories/factories.py \\\n            modin/core/execution/dispatching/factories/dispatcher.py                            \\\n      - run: python scripts/doc_checker.py scripts/doc_checker.py\n      - run: |\n          python scripts/doc_checker.py modin/experimental/pandas/io.py \\\n            modin/experimental/pandas/__init__.py\n      - run: python scripts/doc_checker.py modin/core/storage_formats/base\n      - run: python scripts/doc_checker.py modin/core/storage_formats/pandas\n      - run: python scripts/doc_checker.py modin/experimental/batch/pipeline.py\n      - run: python scripts/doc_checker.py modin/logging\n\n  lint-black-isort:\n    name: lint (black and isort)\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/python-only\n      - run: pip install black>=24.1.0 isort>=5.12\n      # NOTE: keep the black command here in sync with the pre-commit hook in\n      # /contributing/pre-commit\n      - run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py\n      - run: isort . --check-only\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: ci\non:\n  pull_request:\n    paths:\n      # NOTE: keep these paths in sync with the paths that trigger the\n      # fuzzydata Github Actions in .github/workflows/fuzzydata-test.yml\n      - .github/workflows/**\n      - .github/actions/**\n      - '!.github/workflows/push-to-main.yml'\n      - asv_bench/**\n      - modin/**\n      - requirements/**\n      - scripts/**\n      - environment-dev.yml\n      - requirements-dev.txt\n      - setup.cfg\n      - setup.py\n      - versioneer.py\n  push:\n  schedule:\n    - cron: \"30 2 * * WED\"\n    - cron: \"30 2 * * THU\"\nconcurrency:\n  # Cancel other jobs in the same branch. We don't care whether CI passes\n  # on old commits.\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}\nenv:\n  MODIN_GITHUB_CI: true\n\njobs:\n  python-filter:\n    runs-on: ubuntu-latest\n    outputs:\n      python-version: ${{ steps.choose.outputs.python-version }}\n    steps:\n    - id: choose\n      run: |\n        if [[ \"${{ github.event.schedule }}\" = \"30 2 * * WED\" ]]\n        then\n          echo \"python-version=3.10\" >> \"$GITHUB_OUTPUT\"\n        elif [[ \"${{ github.event.schedule }}\" = \"30 2 * * THU\" ]]\n        then\n          echo \"python-version=3.11\" >> \"$GITHUB_OUTPUT\"\n        else\n          echo \"python-version=3.9\" >> \"$GITHUB_OUTPUT\"\n        fi\n\n  lint-mypy:\n    needs: [python-filter]\n    name: lint (mypy)\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/python-only\n        with:\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      - run: pip install -r requirements-dev.txt\n      - run: mypy --config-file mypy.ini\n\n  lint-flake8:\n    needs: [python-filter]\n    name: lint (flake8)\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/python-only\n        with:\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      # NOTE: If you are changing the set of packages installed here, make sure that\n      # the dev requirements match them.\n      - run: pip install flake8 flake8-print flake8-no-implicit-concat\n      # NOTE: keep the flake8 command here in sync with the pre-commit hook in\n      # /contributing/pre-commit\n      - run: flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py\n\n  test-api-and-no-engine:\n    needs: [python-filter]\n    name: Test API, headers and no-engine mode\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: requirements/requirements-no-engine.yml\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      - run: python -m pytest modin/tests/pandas/test_api.py\n      - run: python -m pytest modin/tests/test_executions_api.py\n      - run: python -m pytest modin/tests/test_headers.py\n      - run: python -m pytest modin/tests/core/test_dispatcher.py::test_add_option\n      - uses: ./.github/actions/upload-coverage\n\n  test-clean-install:\n    needs: [lint-flake8, python-filter]\n    strategy:\n      matrix:\n        os:\n          - ubuntu\n          - windows\n    runs-on: ${{ matrix.os }}-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    name: test-clean-install-${{ matrix.os }}\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/python-only\n        with:\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      - run: python -m pip install -e \".[all]\"\n      - name: Ensure Ray and Dask engines start up\n        run: |\n          MODIN_ENGINE=dask python -c \"import modin.pandas as pd; print(pd.DataFrame([1,2,3]))\"\n          MODIN_ENGINE=ray python -c \"import modin.pandas as pd; print(pd.DataFrame([1,2,3]))\"\n      - name: Ensure MPI engine start up\n        # Install a working MPI implementation beforehand so mpi4py can link to it\n        run: |\n          sudo apt-get update\n          sudo apt-get install software-properties-common\n\n          sudo add-apt-repository \"deb http://archive.ubuntu.com/ubuntu jammy main universe restricted multiverse\"\n          sudo add-apt-repository \"deb http://archive.ubuntu.com/ubuntu jammy-updates main universe restricted multiverse\"\n          sudo add-apt-repository \"deb http://security.ubuntu.com/ubuntu jammy-security main universe restricted multiverse\"\n          sudo apt-get update\n\n          sudo apt-get install libmpich-dev=4.0-3 libmpich12=4.0-3 mpich=4.0-3\n          python -m pip install -e \".[mpi]\"\n          # mpi4py 4.1 does not work with the mpich versions above.\n          # TODO(https://github.com/modin-project/modin/issues/7615): figure out\n          # the correct libmpich versions for mpi4py >= 4.1\n          python -m pip install \"mpi4py<4.1\"\n          MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c \"import modin.pandas as pd; print(pd.DataFrame([1,2,3]))\"\n        if: matrix.os == 'ubuntu'\n\n  test-internals:\n    needs: [lint-flake8, python-filter]\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    name: test-internals\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      - name: Internals tests\n        run: python -m pytest modin/tests/core/test_dispatcher.py\n      - run: python -m pytest modin/tests/config\n      - run: python -m pytest modin/tests/test_envvar_catcher.py\n      - run: python -m pytest modin/tests/core/storage_formats/base/test_internals.py\n      - run: python -m pytest modin/tests/core/storage_formats/pandas/test_internals.py\n      - run: python -m pytest modin/tests/test_envvar_npartitions.py\n      - run: python -m pytest modin/tests/test_utils.py\n      - run: python -m pytest asv_bench/test/test_utils.py\n      - run: python -m pytest modin/tests/interchange/dataframe_protocol/base\n      - run: python -m pytest modin/tests/test_dataframe_api_standard.py\n      - run: python -m pytest modin/tests/test_logging.py\n      - run: python -m pytest modin/tests/test_metrics.py\n      - run: python -m pytest modin/tests/pandas/extensions\n      - uses: ./.github/actions/upload-coverage\n\n  test-defaults:\n    needs: [lint-flake8, python-filter]\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    strategy:\n      matrix:\n        execution: [BaseOnPython]\n    env:\n      MODIN_TEST_DATASET_SIZE: \"small\"\n    name: Test ${{ matrix.execution }} execution, Python ${{ needs.python-filter.outputs.python-version }}\"\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      - name: Install HDF5\n        run: sudo apt update && sudo apt install -y libhdf5-dev\n      - name: xgboost tests\n        run: |\n          # TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost\n          # when we use collective instead of rabit.\n          # Per the thread https://github.com/conda-forge/miniforge/issues/513,\n          # remove unused conda packages and caches to avoid `Found incorrect\n          # download: joblib` error from mamba.\n          mamba clean --all\n          mamba install \"xgboost>=1.7.1,<2.0.0\" scikit-learn -c conda-forge\n          python -m pytest modin/tests/experimental/xgboost/test_default.py --execution=${{ matrix.execution }}\n      - run: python -m pytest -n 2 modin/tests/core/storage_formats/base/test_internals.py --execution=${{ matrix.execution }}\n      - uses: ./.github/actions/run-core-tests\n        with:\n          runner: python -m pytest --execution=${{ matrix.execution }}\n      - uses: ./.github/actions/upload-coverage\n\n  test-asv-benchmarks:\n    if: github.event_name == 'pull_request'\n    needs: [lint-flake8]\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    env:\n      MODIN_ENGINE: ray\n      MODIN_MEMORY: 1000000000\n      MODIN_TEST_DATASET_SIZE: small\n    name: test-asv-benchmarks\n    steps:\n      - uses: actions/checkout@v4\n        with:\n          fetch-depth: 1\n      - uses: conda-incubator/setup-miniconda@v3\n        with:\n          auto-activate-base: true\n          activate-environment: \"\"\n          miniforge-variant: Miniforge3\n          miniforge-version: latest\n          use-mamba: true\n      - name: Running benchmarks\n        run: |\n          git remote add upstream https://github.com/modin-project/modin.git\n          git fetch upstream\n          if git diff upstream/main --name-only | grep -q \"^asv_bench/\"; then\n              cd asv_bench\n\n              mamba env create -f ../environment-dev.yml\n              conda activate modin\n              pip install ..\n\n              asv machine --yes\n\n              # check Modin on Ray\n              asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \\\n                -b ^benchmarks -b ^io -b ^scalability | tee benchmarks.log\n\n              # check pure pandas\n              MODIN_ASV_USE_IMPL=pandas asv run --quick --dry-run --python=same --strict --show-stderr --launch-method=spawn \\\n                -b ^benchmarks -b ^io | tee benchmarks.log\n          else\n              echo \"Benchmarks did not run, no changes detected\"\n          fi\n        if: always()\n\n      - name: Publish benchmarks artifact\n        uses: actions/upload-artifact@v4\n        with:\n          name: Benchmarks log\n          path: asv_bench/benchmarks.log\n          include-hidden-files: true\n        if: failure()\n\n  execution-filter:\n    # Choose which executions we want to run all tests for on a pull request.\n    # We always test 'native' and 'python' executions completely because they\n    # are fast, but we only test ray, dask, and unidist, if we think this pull\n    # request is affecting how we execute with those engines specifically.\n    runs-on: ubuntu-latest\n    outputs:\n      ray: ${{ steps.filter.outputs.ray }}\n      dask: ${{ steps.filter.outputs.dask }}\n      unidist: ${{ steps.filter.outputs.unidist }}\n      engines: ${{ steps.engines.outputs.engines }}\n      experimental: ${{ steps.experimental.outputs.experimental }}\n    steps:\n    - uses: actions/checkout@v4\n    - uses: dorny/paths-filter@v3\n      id: filter\n      with:\n        filters: |\n          shared: &shared\n            - 'modin/core/execution/dispatching/**'\n          ray:\n            - *shared\n            - 'modin/core/execution/ray/**'\n          dask:\n            - *shared\n            - 'modin/core/execution/dask/**'\n          unidist:\n            - *shared\n            - 'modin/core/execution/unidist/**'\n          experimental:\n            - 'modin/experimental/**'\n    - uses: actions/setup-python@v5\n    - id: engines\n      run: |\n        python -c \"import sys, json; print('engines=' + json.dumps(['python', 'native'] + (sys.argv[1] == 'true' and ['ray'] or []) + (sys.argv[2] == 'true' and ['dask'] or []) ))\" \\\n              \"${{ steps.filter.outputs.ray }}\" \"${{ steps.filter.outputs.dask }}\" >> $GITHUB_OUTPUT\n\n  test-all-unidist:\n    needs: [lint-flake8, execution-filter, python-filter]\n    if: github.event_name == 'push' || needs.execution-filter.outputs.unidist == 'true'\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    strategy:\n      matrix:\n        python-version: [ \"${{ needs.python-filter.outputs.python-version }}\" ]\n        unidist-backend: [\"mpi\"]\n    env:\n      MODIN_ENGINE: \"Unidist\"\n      UNIDIST_BACKEND: ${{matrix.unidist-backend}}\n      # Only test reading from SQL server and postgres on ubuntu for now.\n      # Eventually, we should test on Windows, too, but we will have to set up\n      # the servers differently.\n      MODIN_TEST_READ_FROM_SQL_SERVER: true\n      MODIN_TEST_READ_FROM_POSTGRES: true\n    name: test-ubuntu (engine unidist ${{matrix.unidist-backend}}, python ${{matrix.python-version}})\n    services:\n      moto:\n        image: motoserver/moto:5.0.13\n        ports:\n          - 5000:5000\n        env:\n          AWS_ACCESS_KEY_ID: foobar_key\n          AWS_SECRET_ACCESS_KEY: foobar_secret\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: requirements/env_unidist_linux.yml\n          activate-environment: modin_on_unidist\n          python-version: ${{matrix.python-version}}\n      - name: Install HDF5\n        run: sudo apt update && sudo apt install -y libhdf5-dev\n      - name: Set up postgres\n        # Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from\n        # https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3\n        run: |\n          sudo docker pull postgres\n          sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres\n      - run: mpiexec -n 1 python -m pytest modin/tests/pandas/internals/test_benchmark_mode.py\n      - run: mpiexec -n 1 python -m pytest modin/tests/test_partition_api.py\n      - uses: ./.github/actions/run-core-tests\n        with:\n          runner: mpiexec -n 1 python -m pytest\n          parallel: \"\"\n      - run: mpiexec -n 1 python -m pytest modin/tests/numpy\n      - run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh\n      - run: ./.github/workflows/sql_server/set_up_sql_server.sh\n      # need an extra argument \"genv\" to set environment variables for mpiexec. We need\n      # these variables to test writing to the mock s3 filesystem.\n      - uses: nick-fields/retry@v3\n        # to avoid issues with non-stable `to_csv` tests for unidist on MPI backend.\n        # for details see: https://github.com/modin-project/modin/pull/6776\n        with:\n          timeout_minutes: 15\n          max_attempts: 3\n          command: |\n            conda run --no-capture-output -n modin_on_unidist mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key \\\n              -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/tests/pandas/test_io.py --verbose\n      - run: |\n          mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret \\\n            python -m pytest modin/tests/experimental/test_io_exp.py\n      - run: mpiexec -n 1 python -m pytest modin/tests/interchange/dataframe_protocol/test_general.py\n      - run: mpiexec -n 1 python -m pytest modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py\n      - run: |\n          python -m pip install lazy_import\n          mpiexec -n 1 python -m pytest modin/tests/pandas/integrations/\n      - uses: ./.github/actions/upload-coverage\n\n  test-all:\n    needs: [lint-flake8, execution-filter, python-filter]\n    strategy:\n      matrix:\n        os:\n          - ubuntu\n          - windows\n        python-version: [ \"${{ needs.python-filter.outputs.python-version }}\" ]\n        # On push, run the tests for all engines. Otherwise, for pull requests,\n        # only run tests for engines that depend on files changed in this PR.\n        engine: ${{ fromJSON( (github.event_name == 'push' && '[\"python\", \"ray\", \"dask\", \"native\"]') || needs.execution-filter.outputs.engines ) }}\n        test_task:\n          - group_1\n          - group_2\n          - group_3\n          - group_4\n        exclude: # python and native engines only have one task group that contains all the tests\n          - engine: \"python\"\n            test_task: \"group_2\"\n          - engine: \"native\"\n            test_task: \"group_2\"\n          - engine: \"python\"\n            test_task: \"group_3\"\n          - engine: \"native\"\n            test_task: \"group_3\"\n          - engine: \"python\"\n            test_task: \"group_4\"\n          - engine: \"native\"\n            test_task: \"group_4\"\n    runs-on: ${{ matrix.os }}-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    env:\n      MODIN_ENGINE: ${{matrix.engine}}\n      # Only test reading from SQL server and postgres on ubuntu for now.\n      # Eventually, we should test on Windows, too, but we will have to set up\n      # the servers differently.\n      MODIN_TEST_READ_FROM_SQL_SERVER: ${{ matrix.os == 'ubuntu' }}\n      MODIN_TEST_READ_FROM_POSTGRES: ${{ matrix.os == 'ubuntu' }}\n    name: test-${{ matrix.os }} (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}})\n    services:\n      # Using workaround https://github.com/actions/runner/issues/822#issuecomment-1524826092\n      moto:\n        # we only need moto service on Ubuntu and for group_4 task, or for native or python engine.\n        image: ${{ (matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4')) && 'motoserver/moto:5.0.13' || '' }}\n        ports:\n          - 5000:5000\n        env:\n          AWS_ACCESS_KEY_ID: foobar_key\n          AWS_SECRET_ACCESS_KEY: foobar_secret\n    steps:\n      - name: Set native storage format\n        run: echo \"MODIN_STORAGE_FORMAT=Native\" >> $GITHUB_ENV\n        if: matrix.engine == 'native'\n      - name: Limit ray memory\n        run: echo \"MODIN_MEMORY=1000000000\" >> $GITHUB_ENV\n        if: matrix.os == 'ubuntu' && matrix.engine == 'ray'\n      - name: Tell Modin to use existing ray cluster\n        run: echo \"MODIN_RAY_CLUSTER=True\" >> $GITHUB_ENV\n        if: matrix.os == 'windows' && matrix.engine == 'ray'\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n          python-version: ${{matrix.python-version}}\n      - name: Start local ray cluster\n        # Try a few times to start ray to work around\n        # https://github.com/modin-project/modin/issues/4562\n        uses: nick-fields/retry@v3\n        with:\n          timeout_minutes: 5\n          max_attempts: 5\n          command: ray start --head --port=6379 --object-store-memory=1000000000\n        if: matrix.os == 'windows' && matrix.engine == 'ray'\n      - name: Install HDF5\n        run: sudo apt update && sudo apt install -y libhdf5-dev\n        if: matrix.os == 'ubuntu'\n      - name: Set up postgres\n        # Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from\n        # https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3\n        run: |\n          sudo docker pull postgres\n          sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres\n        if: matrix.os == 'ubuntu'\n\n    # BEGIN partitioned execution tests. We run these tests along with group 1,\n    # or if we are on the \"python\" engine, which only has a single group. We\n    # skip these tests on the \"native\" engine, which does not use partitions.\n\n      - run: python -m pytest modin/tests/pandas/internals/test_benchmark_mode.py\n        if: matrix.engine != 'native' && (matrix.engine == 'python' || matrix.test_task == 'group_1')\n      - run: python -m pytest modin/tests/test_partition_api.py\n        # Skip this test for python because we do not define unwrap_partitions()\n        # for python execution.\n        if: matrix.engine != 'native' && matrix.engine != 'python' && matrix.test_task == 'group_1'\n      - name: xgboost tests\n        run: |\n          # TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost\n          # when we use collective instead of rabit.\n          mamba install \"xgboost>=1.7.1,<2.0.0\" scikit-learn -c conda-forge\n          python -m pytest -n 2 \\\n                  modin/tests/experimental/xgboost/test_default.py \\\n                  modin/tests/experimental/xgboost/test_xgboost.py \\\n                  modin/tests/experimental/xgboost/test_dmatrix.py\n        if: matrix.engine != 'native' && matrix.os != 'windows' && (matrix.engine == 'python' || matrix.test_task == 'group_1')\n      - run: python -m pytest -n 2 modin/tests/experimental/test_pipeline.py\n        if: matrix.engine != 'native'  && (matrix.engine == 'python' || matrix.test_task == 'group_1')\n\n\n    # END partitioned execution tests.\n\n\n    # BEGIN test groups.\n    # Run all the tests in the corresponding group for this instance of the\n    # test matrix. For example, if we are in the matrix's 'group_4', run the\n    # tests for 'group_4'. For each of 'native' and 'python' engines,  we run\n    # all tests in a single job, so we ignore the grouping.\n\n      - uses: ./.github/actions/run-core-tests/group_1\n        with:\n          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.\n          # See https://github.com/modin-project/modin/issues/7387.\n          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_1'\n      - uses: ./.github/actions/run-core-tests/group_2\n        with:\n          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.\n          # See https://github.com/modin-project/modin/issues/7387.\n          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_2'\n      - uses: ./.github/actions/run-core-tests/group_3\n        with:\n          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.\n          # See https://github.com/modin-project/modin/issues/7387.\n          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_3'\n      - uses: ./.github/actions/run-core-tests/group_4\n        with:\n          # When running with Ray engine on Windows using 2 pytest workers tests are failing in CI.\n          # See https://github.com/modin-project/modin/issues/7387.\n          parallel: ${{ matrix.engine == 'ray' && matrix.os == 'windows' && '-n 1' || '-n 2' }}\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'\n      - run: python -m pytest -n 2 modin/tests/numpy\n        # Native execution does not support the modin Numpy API.\n        if: matrix.engine == 'python' || matrix.test_task == 'group_4'\n\n    # END test groups.\n\n\n    # BEGIN some tests that we run along with group 4 for engines other than\n    # 'native' and 'python'. 'native' and 'python' jobs will run these tests\n    # along with all other tests in a single group.\n\n      - run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh\n        if: matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4')\n      - run: ./.github/workflows/sql_server/set_up_sql_server.sh\n        if: matrix.os == 'ubuntu' && (matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4')\n      # Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.\n      - run: python -m pytest modin/tests/pandas/test_io.py --verbose\n        timeout-minutes: 60\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'\n      - run: python -m pytest modin/tests/experimental/test_io_exp.py\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'\n      - run: python -m pytest modin/tests/interchange/dataframe_protocol/test_general.py\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'\n      - run: python -m pytest modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'\n      - run: python -m pytest modin/tests/polars/test_dataframe.py\n      - run: |\n          python -m pip install lazy_import\n          python -m pytest modin/tests/pandas/integrations/\n        if: matrix.engine == 'python' || matrix.engine == 'native' || matrix.test_task == 'group_4'\n\n\n    # END tests that run on group 4, or in the single group for 'native' and\n    # python' engines.\n\n      - uses: ./.github/actions/upload-coverage\n      - name: Stop local ray cluster\n        run: ray stop\n        if: matrix.os == 'windows' && matrix.engine == 'ray'\n\n  test-sanity:\n    # The \"sanity\" tests run on each pull request to test that a subset of the\n    # full tests work with the slower engines (ray, dask, and unidist-MPI).\n    needs: [lint-flake8, execution-filter, python-filter]\n    # If we don't need to run any sanity tests, the job matrix that we generate\n    # here gives a single job with all the matrix fields empty (that is, os,\n    # execution, etc. are not set, so we treat them as \"\").\n    # so, if the matrix is going to be empty, we need to skip this job\n    # completely. This bizarre behavior is not in the official documentation,\n    # of GitHub actions matrices, but someone does mention it here:\n    # https://stackoverflow.com/a/77118991\n    if: |\n      github.event_name == 'pull_request' &&\n      (\n        needs.execution-filter.outputs.ray != 'true' ||\n        needs.execution-filter.outputs.dask != 'true' ||\n        needs.execution-filter.outputs.unidist != 'true'\n      )\n    strategy:\n      matrix:\n        os:\n          - ubuntu\n          - windows\n        python-version: [ \"${{ needs.python-filter.outputs.python-version }}\" ]\n        running-all-ray-tests: [ \"${{ needs.execution-filter.outputs.ray }}\" ]\n        running-all-dask-tests: [ \"${{needs.execution-filter.outputs.dask}}\" ]\n        running-all-unidist-tests: [ \"${{needs.execution-filter.outputs.unidist}}\" ]\n        execution: [ray, dask, unidist]\n        # If we're going to run all ray tests because we've detected a\n        # change to the ray engine, we don't need to run these sanity tests\n        # on ray. Likewise for dask and unidist.\n        exclude:\n          - running-all-ray-tests: 'true'\n            execution: ray\n          - running-all-dask-tests: 'true'\n            execution: dask\n          - running-all-unidist-tests: 'true'\n            execution: unidist\n    runs-on: ${{ matrix.os }}-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    env:\n      MODIN_ENGINE: ${{ matrix.execution }}\n      UNIDIST_BACKEND: \"mpi\"\n      PARALLEL: ${{ matrix.execution != 'unidist' && matrix.os != 'windows' && '-n 2' || '' }}\n      PYTEST_COMMAND: >-\n        ${{\n          (\n            (matrix.execution == 'ray' || matrix.execution == 'dask') &&\n            'python -m pytest'\n          ) ||\n          (\n            matrix.execution == 'unidist' &&\n            'mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest'\n          ) ||\n          'UNKNOWN_PYTEST_COMMAND'\n        }}\n    name: test-${{ matrix.os }}-sanity (engine ${{ matrix.execution }}, python ${{matrix.python-version}})\n    services:\n      moto:\n        image: ${{ matrix.os != 'windows' && 'motoserver/moto:5.0.13' || '' }}\n        ports:\n          - 5000:5000\n        env:\n          AWS_ACCESS_KEY_ID: foobar_key\n          AWS_SECRET_ACCESS_KEY: foobar_secret\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: ${{ matrix.os == 'ubuntu' && matrix.execution == 'unidist' && 'requirements/env_unidist_linux.yml' || matrix.os == 'windows' && matrix.execution == 'unidist' && 'requirements/env_unidist_win.yml' || 'environment-dev.yml' }}\n          activate-environment: ${{ matrix.execution == 'unidist' && 'modin_on_unidist' || 'modin' }}\n          python-version: ${{matrix.python-version}}\n      - name: Install HDF5\n        run: sudo apt update && sudo apt install -y libhdf5-dev\n        if: matrix.os != 'windows'\n      - name: Limit ray memory\n        run: echo \"MODIN_MEMORY=1000000000\" >> $GITHUB_ENV\n        if: matrix.os != 'windows' && matrix.execution == 'ray'\n      - name: Tell Modin to use existing ray cluster\n        run: echo \"MODIN_RAY_CLUSTER=True\" >> $GITHUB_ENV\n        if: matrix.os == 'windows' && matrix.execution == 'ray'\n      - name: Start local ray cluster\n        # Try a few times to start ray to work around\n        # https://github.com/modin-project/modin/issues/4562\n        uses: nick-fields/retry@v3\n        with:\n          timeout_minutes: 5\n          max_attempts: 5\n          command: ray start --head --port=6379 --object-store-memory=1000000000\n        if: matrix.os == 'windows' && matrix.execution == 'ray'\n      - run: MODIN_BENCHMARK_MODE=True $PYTEST_COMMAND modin/tests/pandas/internals/test_benchmark_mode.py\n      - run: $PYTEST_COMMAND $PARALLEL modin/tests/test_partition_api.py\n      - run: $PYTEST_COMMAND modin/tests/pandas/extensions\n      - name: xgboost tests\n        run: |\n          # TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost\n          # when we use collective instead of rabit.\n          mamba install \"xgboost>=1.7.1,<2.0.0\" scikit-learn -c conda-forge\n          $PYTEST_COMMAND $PARALLEL \\\n                  modin/tests/experimental/xgboost/test_default.py \\\n                  modin/tests/experimental/xgboost/test_xgboost.py \\\n                  modin/tests/experimental/xgboost/test_dmatrix.py\n        if: matrix.os != 'windows' && needs.execution-filter.outputs.experimental == 'true'\n      - run: $PYTEST_COMMAND $PARALLEL modin/tests/experimental/test_pipeline.py\n        if: matrix.os != 'windows' && matrix.execution != 'unidist' && needs.execution-filter.outputs.experimental == 'true'\n      - name: \"test DF: binary, default, iter\"\n        run: |\n          $PYTEST_COMMAND $PARALLEL \\\n                  modin/tests/pandas/dataframe/test_binary.py \\\n                  modin/tests/pandas/dataframe/test_default.py \\\n                  modin/tests/pandas/dataframe/test_iter.py\n        if: matrix.os != 'windows'\n      - name: \"test DF: reduce, udf, window, pickle\"\n        run: |\n          $PYTEST_COMMAND $PARALLEL \\\n                  modin/tests/pandas/dataframe/test_reduce.py \\\n                  modin/tests/pandas/dataframe/test_udf.py \\\n                  modin/tests/pandas/dataframe/test_window.py \\\n                  modin/tests/pandas/dataframe/test_pickle.py\n        if: matrix.os != 'windows'\n      - run: $PYTEST_COMMAND modin/tests/pandas/test_series.py\n        if: matrix.execution == 'ray'\n      - run: $PYTEST_COMMAND -m \"not exclude_in_sanity\" modin/tests/pandas/test_series.py\n        if: matrix.execution != 'ray'\n      - run: $PYTEST_COMMAND modin/tests/pandas/dataframe/test_map_metadata.py\n        if: matrix.execution == 'ray'\n      - run: $PYTEST_COMMAND -m \"not exclude_in_sanity\" modin/tests/pandas/dataframe/test_map_metadata.py\n        if: matrix.execution != 'ray'\n      - name: \"test rolling, expanding, reshape, general, concat\"\n        run: |\n          $PYTEST_COMMAND $PARALLEL \\\n                  modin/tests/pandas/test_rolling.py \\\n                  modin/tests/pandas/test_expanding.py \\\n                  modin/tests/pandas/test_reshape.py \\\n                  modin/tests/pandas/test_general.py \\\n                  modin/tests/pandas/test_concat.py\n        if: matrix.os != 'windows'\n      - run: $PYTEST_COMMAND $PARALLEL modin/tests/numpy\n      - run: $PYTEST_COMMAND -m \"not exclude_in_sanity\" modin/tests/pandas/test_io.py --verbose\n        if: matrix.execution != 'unidist'\n      - uses: nick-fields/retry@v3\n        # to avoid issues with non-stable `to_csv` tests for unidist on MPI backend.\n        # for details see: https://github.com/modin-project/modin/pull/6776\n        with:\n          timeout_minutes: 15\n          max_attempts: 3\n          command: conda run --no-capture-output -n modin_on_unidist $PYTEST_COMMAND -m \"not exclude_in_sanity\" modin/tests/pandas/test_io.py --verbose\n        if: matrix.execution == 'unidist'\n      - run: $PYTEST_COMMAND modin/tests/experimental/test_io_exp.py\n      - run: $PYTEST_COMMAND $PARALLEL modin/tests/interchange/dataframe_protocol/test_general.py\n      - run: $PYTEST_COMMAND $PARALLEL modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py\n      - name: Stop local ray cluster\n        run: ray stop\n        if: matrix.os == 'windows' && matrix.execution == 'ray'\n      - uses: ./.github/actions/upload-coverage\n\n  test-experimental:\n    needs: [lint-flake8, python-filter]\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    env:\n      MODIN_ENGINE: \"python\"\n      MODIN_EXPERIMENTAL: \"True\"\n    name: test experimental\n    services:\n      moto:\n        image: motoserver/moto:5.0.13\n        ports:\n          - 5000:5000\n        env:\n          AWS_ACCESS_KEY_ID: foobar_key\n          AWS_SECRET_ACCESS_KEY: foobar_secret\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      - name: Install HDF5\n        run: sudo apt update && sudo apt install -y libhdf5-dev\n      - run: python -m pytest -n 2 modin/tests/pandas/dataframe/test_map_metadata.py\n      - run: python -m pytest -n 2 modin/tests/pandas/test_series.py\n      # Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.\n      - run: python -m pytest modin/tests/pandas/test_io.py --verbose\n      - uses: ./.github/actions/upload-coverage\n\n  test-spreadsheet:\n    needs: [lint-flake8, python-filter]\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    strategy:\n      matrix:\n        python-version: [ \"${{ needs.python-filter.outputs.python-version }}\" ]\n        engine: [\"ray\", \"dask\"]\n    env:\n      MODIN_EXPERIMENTAL: \"True\"\n      MODIN_ENGINE: ${{matrix.engine}}\n    name: test-spreadsheet (engine ${{matrix.engine}}, python ${{matrix.python-version}})\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n          python-version: ${{matrix.python-version}}\n      - run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py\n\n  test-native-dataframe-interoperability:\n    needs: [ lint-flake8]\n    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' }}\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    strategy:\n      matrix:\n        python-version: [\"3.9\"]\n    env:\n      # Test interoperability between PandasOnPython dataframes/series and\n      # native dataframes/series.\n      MODIN_ENGINE: \"Python\"\n    name: test-native-dataframe-interoperability python ${{matrix.python-version}})\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n          python-version: ${{matrix.python-version}}\n      - run: python -m pytest modin/tests/pandas/native_df_interoperability/ -n 2\n      - uses: ./.github/actions/upload-coverage\n\n  merge-coverage-artifacts:\n    needs: [test-internals, test-api-and-no-engine, test-defaults, test-all-unidist, test-all, test-experimental, test-sanity, test-native-dataframe-interoperability]\n    if: always()  # we need to run it regardless of some job being skipped, like in PR\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    steps:\n      - name: Merge Artifacts\n        uses: actions/upload-artifact/merge@v4\n        with:\n          name: coverage-data\n          pattern: coverage-data-*\n          include-hidden-files: true\n          delete-merged: true\n\n  upload-coverage:\n    needs: [merge-coverage-artifacts, python-filter]\n    if: always()  # we need to run it regardless of some job being skipped, like in PR\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/python-only\n        with:\n          python-version: ${{ needs.python-filter.outputs.python-version }}\n      - name: Download coverage data\n        uses: actions/download-artifact@v4\n        with:\n          name: coverage-data\n      - run: pip install coverage\n      - name: Combine coverage\n        run: python -m coverage combine\n      - name: Generate coverage report in xml format\n        run: python -m coverage xml\n      - uses: codecov/codecov-action@v4\n        with:\n          fail_ci_if_error: ${{ github.event_name == 'push' }}  # do not care about uploads in PR\n          token: ${{ secrets.CODECOV_TOKEN }} # this token is available at https://app.codecov.io/account/github/modin-project/\n"
  },
  {
    "path": ".github/workflows/codeql/codeql-config.yml",
    "content": "name: \"Modin CodeQL config\"\n\npaths:\n  - modin/** \n"
  },
  {
    "path": ".github/workflows/codeql.yml",
    "content": "name: \"CodeQL\"\n\non:\n  push:\n    branches: [ \"main\" ]\n  pull_request:\n    branches: [ \"main\" ]\n\nconcurrency:\n  # Cancel other jobs in the same branch. We don't care whether CI passes\n  # on old commits.\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}\nenv:\n  MODIN_GITHUB_CI: true\njobs:\n  analyze:\n    name: Analyze\n    runs-on: ubuntu-latest\n    permissions:\n      actions: read\n      contents: read\n      security-events: write\n\n    strategy:\n      fail-fast: false\n      matrix:\n        language: [ python ]\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v4\n\n      - name: Initialize CodeQL\n        uses: github/codeql-action/init@v3\n        with:\n          languages: ${{ matrix.language }}\n          queries: +security-and-quality\n          config-file: ./.github/workflows/codeql/codeql-config.yml\n\n      - name: Autobuild\n        uses: github/codeql-action/autobuild@v3\n\n      - name: Perform CodeQL Analysis\n        uses: github/codeql-action/analyze@v3\n        with:\n          category: \"/language:${{ matrix.language }}\"\n"
  },
  {
    "path": ".github/workflows/fuzzydata-test.yml",
    "content": "name: fuzzy\non:\n  pull_request:\n    paths:\n      # NOTE: keep these paths in sync with the paths that trigger the CI Github\n      # Actions in .github/workflows/ci.yml\n      - .github/workflows/**\n      - '!.github/workflows/push-to-main.yml'\n      - asv_bench/**\n      - modin/**\n      - requirements/**\n      - scripts/**\n      - environment-dev.yml\n      - requirements-dev.txt\n      - setup.cfg\n      - setup.py\n      - versioneer.py\nconcurrency:\n  # Cancel other jobs in the same branch. We don't care whether CI passes\n  # on old commits.\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}\nenv:\n  MODIN_GITHUB_CI: true\njobs:\n  test-fuzzydata:\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    strategy:\n      matrix:\n        python-version: [\"3.9\"]\n        engine: [\"ray\", \"dask\"]\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n          python-version: ${{matrix.python-version}}\n      - name: test-fuzzydata (engine ${{matrix.engine}}, python ${{matrix.python-version}})\n        run: python -m pytest modin/tests/experimental/test_fuzzydata.py -Wignore::UserWarning --log-file=/tmp/fuzzydata-test-wf-${{matrix.engine}}/run.log --log-file-level=INFO\n        env:\n          MODIN_ENGINE: ${{matrix.engine}}\n      - uses: actions/upload-artifact@v4\n        if: success() || failure()\n        with:\n           name: fuzzydata-test-workflow-${{matrix.engine}}\n           path: /tmp/fuzzydata-test-wf-${{matrix.engine}}/* # Must match output dir in test_fuzzydata.py\n           if-no-files-found: error\n           include-hidden-files: true\n"
  },
  {
    "path": ".github/workflows/publish-to-pypi.yml",
    "content": "name: Publish Modin wheel to PyPI\n\non:\n  schedule:\n    - cron: \"42 0 * * WED\"\n  push:\n    tags:        \n      - '*'\n  workflow_dispatch:\n\njobs:\n  build-n-publish:\n    name: Build and publish Modin wheel to PyPI\n    environment: release\n    runs-on: ubuntu-latest\n    permissions:\n      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing\n\n    steps:\n    - uses: actions/checkout@v4\n      with:\n        fetch-depth: 0\n        fetch-tags: true\n    - name: Checkout latest git tag\n      run: git checkout $(git describe --tags \"$(git rev-list --tags --max-count=1)\")\n      if: github.event_name == 'push'\n    - name: Set up Python\n      uses: actions/setup-python@v5\n      with:\n        python-version: \"3.9.x\"\n\n    - name: Install/update tools\n      run: python3 -m pip install --upgrade build wheel\n    - name: Build a pure Python wheel\n      run: python3 setup.py sdist bdist_wheel\n\n    - uses: actions/upload-artifact@v4\n      with:\n        name: modin-wheel-and-source-tarball \n        path: ./dist/\n        include-hidden-files: true\n\n    - name: Publish Modin wheel to PyPI\n      if: github.event_name == 'push'\n      uses: pypa/gh-action-pypi-publish@release/v1\n"
  },
  {
    "path": ".github/workflows/push-to-main.yml",
    "content": "name: push-to-main\non:\n  push:\n    branches:\n      - main\nconcurrency:\n  # Cancel other jobs in the same branch. We don't care whether CI passes\n  # on old commits.\n  group: ${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}\nenv:\n  MODIN_GITHUB_CI: true\njobs:\n  test-ray-master:\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        # `shell: bash -l {0}` - special way to activate modin environment\n        shell: bash -l {0}\n    services:\n      moto:\n        image: motoserver/moto:5.0.13\n        ports:\n          - 5000:5000\n        env:\n          AWS_ACCESS_KEY_ID: foobar_key\n          AWS_SECRET_ACCESS_KEY: foobar_secret\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n      - name: install Ray nightly build\n        # Use --force-reinstall to always reinstall ray and its dependencies.\n        # botocore isn't compatible with urllib3>=2; see #6094 for details\n        run: pip install --force-reinstall \"urllib3<2\" https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl\n      - name: Conda environment\n        run: |\n          conda info\n          conda list\n      - run: sudo apt update && sudo apt install -y libhdf5-dev\n      - name: Run parallelizable Modin Tests\n        run: >\n          python -m pytest -n 2\n          modin/tests/pandas/dataframe/test_binary.py\n          modin/tests/pandas/dataframe/test_default.py\n          modin/tests/pandas/dataframe/test_indexing.py\n          modin/tests/pandas/dataframe/test_iter.py\n          modin/tests/pandas/dataframe/test_join_sort.py\n          modin/tests/pandas/dataframe/test_map_metadata.py\n          modin/tests/pandas/dataframe/test_reduce.py\n          modin/tests/pandas/dataframe/test_udf.py\n          modin/tests/pandas/dataframe/test_window.py\n          modin/tests/pandas/test_series.py\n          modin/tests/numpy/test_array.py\n          modin/tests/numpy/test_array_creation.py\n          modin/tests/numpy/test_array_arithmetic.py\n          modin/tests/numpy/test_array_axis_functions.py\n          modin/tests/numpy/test_array_logic.py\n          modin/tests/numpy/test_array_linalg.py\n          modin/tests/numpy/test_array_indexing.py\n          modin/tests/numpy/test_array_math.py\n          modin/tests/numpy/test_array_shaping.py\n          modin/tests/pandas/test_rolling.py\n          modin/tests/pandas/test_expanding.py\n          modin/tests/pandas/test_concat.py\n          modin/tests/pandas/test_groupby.py\n          modin/tests/pandas/test_reshape.py\n          modin/tests/pandas/test_general.py\n      - name: Run non-parallelizable Modin Tests\n        run: >\n          python -m pytest\n          modin/tests/pandas/test_io.py\n          modin/tests/experimental/test_io_exp.py\n\n  test-docs:\n    runs-on: ubuntu-latest\n    defaults:\n      run:\n        shell: bash -l {0}\n    name: test docs\n    steps:\n      - uses: actions/checkout@v4\n      - uses: ./.github/actions/mamba-env\n        with:\n          environment-file: environment-dev.yml\n      - run: sudo apt update && sudo apt install -y libhdf5-dev\n      - name: Docstring URL validity check\n        run: python -m pytest modin/tests/test_docstring_urls.py\n"
  },
  {
    "path": ".github/workflows/sql_server/set_up_sql_server.sh",
    "content": "# This script sets up a SQL server listening at 0.0.0.0:1234.\n\n# If any step fails, we can't set up a valid SQL server for unit tests.\nset -e\n\n# Pull the 2019 SQL server docker container image by following:\n# https://docs.microsoft.com/en-us/sql/linux/quickstart-install-connect-docker?view=sql-server-ver15&pivots=cs1-powershell#pullandrun2019\nsudo docker pull mcr.microsoft.com/mssql/server:2019-latest\nsudo docker run -d --name example_sql_server -e 'ACCEPT_EULA=Y' -e 'SA_PASSWORD=Strong.Pwd-123' -p 1433:1433 mcr.microsoft.com/mssql/server:2019-latest\n\n\n# Wait 10 seconds because if we don't the server typically will not be ready\n# to accept connections by the time we want to make them.\nsleep 10\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\r\n__pycache__/\r\n*.py[cod]\r\n*$py.class\r\n\r\n# C extensions\r\n*.so\r\n\r\n# Distribution / packaging\r\n.Python\r\nbuild/\r\ndevelop-eggs/\r\ndist/\r\ndownloads/\r\neggs/\r\n.eggs/\r\nlib/\r\nlib64/\r\nparts/\r\nsdist/\r\nvar/\r\nwheels/\r\n*.egg-info/\r\n.installed.cfg\r\n*.egg\r\nMANIFEST\r\nscripts/gh-users-cache.json\r\n\r\n# PyInstaller\r\n#  Usually these files are written by a python script from a template\r\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\r\n*.manifest\r\n*.spec\r\n\r\n# Installer logs\r\npip-log.txt\r\npip-delete-this-directory.txt\r\n\r\n# Unit test / coverage reports\r\nhtmlcov/\r\n.tox/\r\n.coverage\r\n.coverage.*\r\n.cache\r\nnosetests.xml\r\ncoverage.xml\r\n*.cover\r\n.hypothesis/\r\n.pytest_cache/\r\n\r\n# Translations\r\n*.mo\r\n*.pot\r\n\r\n# Django stuff:\r\n*.log\r\nlocal_settings.py\r\ndb.sqlite3\r\n\r\n# Flask stuff:\r\ninstance/\r\n.webassets-cache\r\n\r\n# Scrapy stuff:\r\n.scrapy\r\n\r\n# Sphinx documentation\r\ndocs/_build/\r\ndocs/flow/modin/configs_help.csv\r\n\r\n# PyBuilder\r\ntarget/\r\n\r\n# Jupyter Notebook\r\n.ipynb_checkpoints\r\n\r\n# pyenv\r\n.python-version\r\n\r\n# celery beat schedule file\r\ncelerybeat-schedule\r\n\r\n# SageMath parsed files\r\n*.sage.py\r\n\r\n# Environments\r\n.env\r\n.venv\r\nenv/\r\nvenv/\r\nENV/\r\nenv.bak/\r\nvenv.bak/\r\n\r\n# Spyder project settings\r\n.spyderproject\r\n.spyproject\r\n\r\n# Rope project settings\r\n.ropeproject\r\n\r\n# mkdocs documentation\r\n/site\r\n\r\n# mypy\r\n.mypy_cache/\r\n\r\n# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm\r\n# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839\r\n\r\n# User-specific stuff\r\n.idea/\r\n.idea/**/workspace.xml\r\n.idea/**/tasks.xml\r\n.idea/**/usage.statistics.xml\r\n.idea/**/dictionaries\r\n.idea/**/shelf\r\n*.DS_Store\r\n\r\n# Sensitive or high-churn files\r\n.idea/**/dataSources/\r\n.idea/**/dataSources.ids\r\n.idea/**/dataSources.local.xml\r\n.idea/**/sqlDataSources.xml\r\n.idea/**/dynamic.xml\r\n.idea/**/uiDesigner.xml\r\n.idea/**/dbnavigator.xml\r\n\r\n# Gradle\r\n.idea/**/gradle.xml\r\n.idea/**/libraries\r\n\r\n# vscode settings\r\n.vscode/\r\n\r\n# CMake\r\ncmake-build-*/\r\n\r\n# Mongo Explorer plugin\r\n.idea/**/mongoSettings.xml\r\n\r\n# File-based project format\r\n*.iws\r\n\r\n# IntelliJ\r\nout/\r\n\r\n# mpeltonen/sbt-idea plugin\r\n.idea_modules/\r\n\r\n# JIRA plugin\r\natlassian-ide-plugin.xml\r\n\r\n# Cursive Clojure plugin\r\n.idea/replstate.xml\r\n\r\n# Crashlytics plugin (for Android Studio and IntelliJ)\r\ncom_crashlytics_export_strings.xml\r\ncrashlytics.properties\r\ncrashlytics-build.properties\r\nfabric.properties\r\n\r\n# Editor-based Rest Client\r\n.idea/httpRequests\r\n\r\n# Cscope and Tags\r\ntags\r\ncscope.files\r\ncscope.out\r\n\r\n# PYTest Benchmarks\r\n.benchmarks/\r\n\r\n# Dask workspace\r\ndask-worker-space/\r\nnode_modules\r\n\r\n# Asv stuff\r\nasv_bench/.asv/\r\nasv_bench/modin/\r\n\r\n# Sublime stuff\r\n*.sublime-workspace\r\n*.sublime-project\r\n"
  },
  {
    "path": ".readthedocs.yaml",
    "content": "# .readthedocs.yaml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\n# Set the version of Python and other tools you might need\nbuild:\n  os: ubuntu-20.04\n  tools:\n    python: \"3.9\"\n\n# Build documentation in the docs/ directory with Sphinx\nsphinx:\n   configuration: docs/conf.py\n\nformats: all\n\npython:\n   install:\n   - requirements: docs/requirements-doc.txt\n"
  },
  {
    "path": "CODEOWNERS",
    "content": "# These owners will be the default owners for everything in\n# the repo unless a later match takes precedence,\n*    @modin-project/modin-core @devin-petersohn @mvashishtha @RehanSD @YarShev @vnlitvinov @anmyachev @dchigarev\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontributors and maintainers pledge to making participation in our project and\nour community a harassment-free experience for everyone, regardless of age, body\nsize, disability, ethnicity, sex characteristics, gender identity and expression,\nlevel of experience, education, socio-economic status, nationality, personal\nappearance, race, religion, or sexual identity and orientation.\n\n## Our Standards\n\nExamples of behavior that contributes to creating a positive environment\ninclude:\n\n* Using welcoming and inclusive language\n* Being respectful of differing viewpoints and experiences\n* Gracefully accepting constructive criticism\n* Focusing on what is best for the community\n* Showing empathy towards other community members\n\nExamples of unacceptable behavior by participants include:\n\n* The use of sexualized language or imagery and unwelcome sexual attention or\n advances\n* Trolling, insulting/derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or electronic\n address, without explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n professional setting\n\n## Our Responsibilities\n\nProject maintainers are responsible for clarifying the standards of acceptable\nbehavior and are expected to take appropriate and fair corrective action in\nresponse to any instances of unacceptable behavior.\n\nProject maintainers have the right and responsibility to remove, edit, or\nreject comments, commits, code, wiki edits, issues, and other contributions\nthat are not aligned to this Code of Conduct, or to ban temporarily or\npermanently any contributor for other behaviors that they deem inappropriate,\nthreatening, offensive, or harmful.\n\n## Scope\n\nThis Code of Conduct applies both within project spaces and in public spaces\nwhen an individual is representing the project or its community. Examples of\nrepresenting a project or community include using an official project e-mail\naddress, posting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event. Representation of a project may be\nfurther defined and clarified by project maintainers.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported by contacting the project team at conduct@gr-oss.io. All\ncomplaints will be reviewed and investigated and will result in a response that\nis deemed necessary and appropriate to the circumstances. The project team is\nobligated to maintain confidentiality with regard to the reporter of an incident.\nFurther details of specific enforcement policies may be posted separately.\n\nProject maintainers who do not follow or enforce the Code of Conduct in good\nfaith may face temporary or permanent repercussions as determined by other\nmembers of the project's leadership.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,\navailable at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html\n\n[homepage]: https://www.contributor-covenant.org\n\nFor answers to common questions about this code of conduct, see\nhttps://www.contributor-covenant.org/faq\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n\n\n# Certain code used and distributed in this package is forked from pandas\n# (https://github.com/pandas-dev/pandas). The pandas LICENSE\n# below applies to those certain forked components in this project:\n\nBSD 3-Clause License\n\nCopyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team\nAll rights reserved.\n\nCopyright (c) 2011-2025, Open source contributors.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above copyright notice, this\n  list of conditions and the following disclaimer.\n\n* Redistributions in binary form must reproduce the above copyright notice,\n  this list of conditions and the following disclaimer in the documentation\n  and/or other materials provided with the distribution.\n\n* Neither the name of the copyright holder nor the names of its\n  contributors may be used to endorse or promote products derived from\n  this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
  },
  {
    "path": "LICENSE_HEADER",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "include versioneer.py\ninclude modin/_version.py\ninclude modin/tests/pandas/data/*.csv\n"
  },
  {
    "path": "NOTICE",
    "content": "Modin\n\nCopyright (c) 2018-2024 Modin Developers.\n"
  },
  {
    "path": "README.md",
    "content": "<p align=\"center\"><a href=\"https://modin.readthedocs.io\"><img width=77% alt=\"\" src=\"https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/MODIN_ver2_hrz.png?raw=true\"></a></p>\n<h2 align=\"center\">Scale your pandas workflows by changing one line of code</h2>\n\n<div align=\"center\">\n\n| <h3>Dev Community & Support</h3> | <h3>Forums</h3> | <h3>Socials</h3> | <h3>Docs</h3> |\n|:---: | :---: | :---: | :---: |\n| [![Slack](https://img.shields.io/badge/Slack-4A154B?style=for-the-badge&logo=slack&logoColor=white)](https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA) | [![Stack Overflow](https://img.shields.io/badge/-Stackoverflow-FE7A16?style=for-the-badge&logo=stack-overflow&logoColor=white)](https://stackoverflow.com/questions/tagged/modin) | <a href=\"https://twitter.com/modin_project\"><img alt=\"Twitter Follow\" src=\"https://img.shields.io/twitter/follow/modin_project?style=social\" height=28 align=\"center\"></a> | <a href=\"https://modin.readthedocs.io/en/latest/?badge=latest\"><img alt=\"\" src=\"https://readthedocs.org/projects/modin/badge/?version=latest\" height=28 align=\"center\"></a> |\n\n</div>\n\n<p align=\"center\">\n<a href=\"https://pepy.tech/project/modin\"><img src=\"https://static.pepy.tech/personalized-badge/modin?period=total&units=international_system&left_color=black&right_color=blue&left_text=Downloads\" align=\"center\"></a>\n<a href=\"https://codecov.io/gh/modin-project/modin\"><img src=\"https://codecov.io/gh/modin-project/modin/branch/main/graph/badge.svg\" align=\"center\"/></a>\n<a href=\"https://github.com/modin-project/modin/actions/workflows/push-to-main.yml?query=event%3Apush\"><img src=\"https://github.com/modin-project/modin/actions/workflows/push-to-main.yml/badge.svg?branch=main\" align=\"center\"></a>\n<a href=\"https://github.com/modin-project/modin/actions/workflows/ci.yml?query=event%3Apush\"><img src=\"https://github.com/modin-project/modin/actions/workflows/ci.yml/badge.svg?branch=main\" align=\"center\"></a>\n<a href=\"https://pypi.org/project/modin/\"><img src=\"https://badge.fury.io/py/modin.svg\" alt=\"PyPI version\" align=\"center\"></a>\n<a href=\"https://modin.org/modin-bench/#/\"><img src=\"https://img.shields.io/badge/benchmarked%20by-asv-blue.svg\" align=\"center\"></a>\n</p>\n\n### What is Modin?\n\nModin is a drop-in replacement for [pandas](https://github.com/pandas-dev/pandas). While pandas is\nsingle-threaded, Modin lets you instantly speed up your workflows by scaling pandas so it uses all of your\ncores. Modin works especially well on larger datasets, where pandas becomes painfully slow or runs\n[out of memory](https://modin.readthedocs.io/en/latest/getting_started/why_modin/out_of_core.html).\nAlso, Modin comes with the [additional APIs](https://modin.readthedocs.io/en/latest/usage_guide/advanced_usage/index.html#additional-apis)\nto improve user experience.\n\nBy simply replacing the import statement, Modin offers users effortless speed and scale for their pandas workflows:\n\n<img src=\"https://github.com/modin-project/modin/raw/main/docs/img/Import.gif\" style=\"display: block;margin-left: auto;margin-right: auto;\" width=\"100%\"></img>\n\nIn the GIFs below, Modin (left) and pandas (right) perform *the same pandas operations* on a 2GB dataset. The only difference between the two notebook examples is the import statement. \n\n<table class=\"tg\">\n<thead>\n  <tr>\n    <th class=\"tg-0lax\" style=\"text-align: center;\"><img src=\"https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/MODIN_ver2_hrz.png?raw=True\" height=\"35px\"></th>\n    <th class=\"tg-0lax\" style=\"text-align: center;\"><img src=\"https://pandas.pydata.org/static/img/pandas.svg\" height=\"50px\"></img></th>\n  </tr>\n</thead>\n<tbody>\n  <tr>\n    <td class=\"tg-0lax\"><img src=\"https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/Modin.gif\"></img></td>\n    <td class=\"tg-0lax\"><img src=\"https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/Pandas.gif\"></img></td>\n  </tr>\n</tbody>\n</table>\n\nThe charts below show the speedup you get by replacing pandas with Modin based on the examples above. The example notebooks can be found [here](examples/jupyter). To learn more about the speedups you could get with Modin and try out some examples on your own, check out our [10-minute quickstart guide](https://modin.readthedocs.io/en/latest/getting_started/quickstart.html) to try out some examples on your own!\n\n<img src=\"https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/Modin_Speedup.svg\" style=\"display: block;margin-left: auto;margin-right: auto;\" width=\"100%\"></img>\n\n### Installation\n\n#### From PyPI\n\nModin can be installed with `pip` on Linux, Windows and MacOS:\n\n```bash\npip install \"modin[all]\" # (Recommended) Install Modin with Ray and Dask engines.\n```\n\nIf you want to install Modin with a specific engine, we recommend:\n\n```bash\npip install \"modin[ray]\" # Install Modin dependencies and Ray.\npip install \"modin[dask]\" # Install Modin dependencies and Dask.\npip install \"modin[mpi]\" # Install Modin dependencies and MPI through unidist.\n```\n\nTo get Modin on MPI through unidist (as of unidist 0.5.0) fully working\nit is required to have a working MPI implementation installed beforehand.\nOtherwise, installation of `modin[mpi]` may fail. Refer to\n[Installing with pip](https://unidist.readthedocs.io/en/latest/installation.html#installing-with-pip)\nsection of the unidist documentation for more details about installation.\n\n**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: `ray` instead of `ray[default]`.\nThis means that the dashboard and cluster launcher are no longer installed by default.\nIf you need those, consider installing `ray[default]` along with `modin[ray]`.\n\nModin automatically detects which engine(s) you have installed and uses that for scheduling computation.\n\n#### From conda-forge\n\nInstalling from [conda forge](https://github.com/conda-forge/modin-feedstock) using `modin-all`\nwill install Modin and three engines: [Ray](https://github.com/ray-project/ray), [Dask](https://github.com/dask/dask) and\n[MPI through unidist](https://github.com/modin-project/unidist).\n\n```bash\nconda install -c conda-forge modin-all\n```\n\nEach engine can also be installed individually (and also as a combination of several engines):\n\n```bash\nconda install -c conda-forge modin-ray  # Install Modin dependencies and Ray.\nconda install -c conda-forge modin-dask # Install Modin dependencies and Dask.\nconda install -c conda-forge modin-mpi # Install Modin dependencies and MPI through unidist.\n```\n\n**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: `ray-core` instead of `ray-default`.\nThis means that the dashboard and cluster launcher are no longer installed by default.\nIf you need those, consider installing `ray-default` along with `modin-ray`.\n\nRefer to\n[Installing with conda](https://unidist.readthedocs.io/en/latest/installation.html#installing-with-conda)\nsection of the unidist documentation for more details on how to install a specific MPI implementation to run on.\n\nTo speed up conda installation we recommend using libmamba solver. To do this install it in a base environment:\n\n```bash\nconda install -n base conda-libmamba-solver\n```\n\nand then use it during istallation either like:\n\n```bash\nconda install -c conda-forge modin-ray --experimental-solver=libmamba\n```\n\nor starting from conda 22.11 and libmamba solver 22.12 versions:\n\n```bash\nconda install -c conda-forge modin-ray --solver=libmamba\n```\n\n#### Choosing a Compute Engine\n\nIf you want to choose a specific compute engine to run on, you can set the environment\nvariable `MODIN_ENGINE` and Modin will do computation with that engine:\n\n```bash\nexport MODIN_ENGINE=ray  # Modin will use Ray\nexport MODIN_ENGINE=dask  # Modin will use Dask\nexport MODIN_ENGINE=unidist # Modin will use Unidist\n```\n\nIf you want to choose the Unidist engine, you should set the additional environment \nvariable ``UNIDIST_BACKEND``. Currently, Modin only supports MPI through unidist:\n\n```bash\nexport UNIDIST_BACKEND=mpi # Unidist will use MPI backend\n```\n\nThis can also be done within a notebook/interpreter before you import Modin:\n\n```python\nimport modin.config as modin_cfg\nimport unidist.config as unidist_cfg\n\nmodin_cfg.Engine.put(\"ray\")  # Modin will use Ray\nmodin_cfg.Engine.put(\"dask\")  # Modin will use Dask\n\nmodin_cfg.Engine.put('unidist') # Modin will use Unidist\nunidist_cfg.Backend.put('mpi') # Unidist will use MPI backend\n```\n\n_Note: You should not change the engine after your first operation with Modin as it will result in undefined behavior._\n\n#### Which engine should I use?\n\nOn Linux, MacOS, and Windows you can install and use either Ray, Dask or MPI through unidist. There is no knowledge required\nto use either of these engines as Modin abstracts away all of the complexity, so feel\nfree to pick either!\n\n### Pandas API Coverage\n\n<p align=\"center\">\n\n| pandas Object     | Modin's Ray Engine Coverage                                                          | Modin's Dask Engine Coverage | Modin's Unidist Engine Coverage |\n|-------------------|:------------------------------------------------------------------------------------:|:---------------:|:---------------:|\n| `pd.DataFrame`    | <img src=https://img.shields.io/badge/api%20coverage-90.8%25-hunter.svg> | <img src=https://img.shields.io/badge/api%20coverage-90.8%25-hunter.svg> | <img src=https://img.shields.io/badge/api%20coverage-90.8%25-hunter.svg> |\n| `pd.Series`       | <img src=https://img.shields.io/badge/api%20coverage-88.05%25-green.svg> | <img src=https://img.shields.io/badge/api%20coverage-88.05%25-green.svg> | <img src=https://img.shields.io/badge/api%20coverage-88.05%25-green.svg> \n| `pd.read_csv`     | ✅                                               | ✅ | ✅ |\n| `pd.read_table`   | ✅                                               | ✅ | ✅ |\n| `pd.read_parquet` | ✅                                               | ✅ | ✅ |\n| `pd.read_sql`     | ✅                                               | ✅ | ✅ |\n| `pd.read_feather` | ✅                                               | ✅ | ✅ |\n| `pd.read_excel`   | ✅                                               | ✅ | ✅ |\n| `pd.read_json`    | [✳️](https://github.com/modin-project/modin/issues/554)                                         | [✳️](https://github.com/modin-project/modin/issues/554) | [✳️](https://github.com/modin-project/modin/issues/554) |\n| `pd.read_<other>` | [✴️](https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html) | [✴️](https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html) | [✴️](https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html) |\n\n</p>\nSome pandas APIs are easier to implement than others, so if something is missing feel\nfree to open an issue!\n\n### More about Modin\n\nFor the complete documentation on Modin, visit our [ReadTheDocs](https://modin.readthedocs.io/en/latest/index.html) page.\n\n#### Scale your pandas workflow by changing a single line of code.\n\n_Note: In local mode (without a cluster), Modin will create and manage a local (Dask or Ray) cluster for the execution._\n\nTo use Modin, you do not need to specify how to distribute the data, or even know how many\ncores your system has. In fact, you can continue using your previous\npandas notebooks while experiencing a considerable speedup from Modin, even on a single\nmachine. Once you've changed your import statement, you're ready to use Modin just like\nyou would with pandas!\n\n#### Faster pandas, even on your laptop\n\n<img align=\"right\" style=\"display:inline;\" height=\"350\" width=\"300\" src=\"https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/read_csv_benchmark.png?raw=true\"></a>\n\nThe `modin.pandas` DataFrame is an extremely light-weight parallel DataFrame.\nModin transparently distributes the data and computation so that you can continue using the same pandas API\nwhile working with more data faster. Because it is so light-weight,\nModin provides speed-ups of up to 4x on a laptop with 4 physical cores.\n\nIn pandas, you are only able to use one core at a time when you are doing computation of\nany kind. With Modin, you are able to use all of the CPU cores on your machine. Even with a\ntraditionally synchronous task like `read_csv`, we see large speedups by efficiently\ndistributing the work across your entire machine.\n\n```python\nimport modin.pandas as pd\n\ndf = pd.read_csv(\"my_dataset.csv\")\n```\n\n#### Modin can handle the datasets that pandas can't \n\nOften data scientists have to switch between different tools\nfor operating on datasets of different sizes. Processing large dataframes with pandas\nis slow, and pandas does not support working with dataframes that are too large to fit\ninto the available memory. As a result, pandas workflows that work well\nfor prototyping on a few MBs of data do not scale to tens or hundreds of GBs (depending on the size\nof your machine). Modin supports operating on data that does not fit in memory, so that you can comfortably\nwork with hundreds of GBs without worrying about substantial slowdown or memory errors.\nWith [cluster](https://modin.readthedocs.io/en/latest/getting_started/using_modin/using_modin_cluster.html)\nand [out of core](https://modin.readthedocs.io/en/latest/getting_started/why_modin/out_of_core.html)\nsupport, Modin is a DataFrame library with both great single-node performance and high\nscalability in a cluster.\n\n#### Modin Architecture\n\nWe designed [Modin's architecture](https://modin.readthedocs.io/en/latest/development/architecture.html)\nto be modular so we can plug in different components as they develop and improve:\n\n<img src=\"https://github.com/modin-project/modin/raw/7c009c747caa90554607e30b9ac2bd1b190b8c7d/docs/img/modin_architecture.png\" alt=\"Modin's architecture\" width=\"75%\"></img>\n\n### Other Resources\n\n#### Getting Started with Modin\n\n- [Documentation](https://modin.readthedocs.io/en/latest/)\n- [10-min Quickstart Guide](https://modin.readthedocs.io/en/latest/getting_started/quickstart.html)\n- [Examples and Tutorials](https://modin.readthedocs.io/en/latest/getting_started/examples.html)\n- [Videos and Blogposts](https://modin.readthedocs.io/en/latest/getting_started/examples.html#talks-podcasts)\n- [Benchmarking Modin](https://modin.readthedocs.io/en/latest/usage_guide/benchmarking.html)\n\n#### Modin Community\n\n- [Slack](https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA)\n- [Twitter](https://twitter.com/modin_project)\n- [Mailing List](https://groups.google.com/g/modin-dev)\n- [GitHub Issues](https://github.com/modin-project/modin/issues)\n- [StackOverflow](https://stackoverflow.com/questions/tagged/modin)\n\n#### Learn More about Modin\n\n- [Frequently Asked Questions (FAQs)](https://modin.readthedocs.io/en/latest/getting_started/faq.html)\n- [Troubleshooting Guide](https://modin.readthedocs.io/en/latest/getting_started/troubleshooting.html)\n- [Development Guide](https://modin.readthedocs.io/en/latest/development/index.html)\n- Modin is built on many years of research and development at UC Berkeley. Check out these selected papers to learn more about how Modin works:\n  - [Flexible Rule-Based Decomposition and Metadata Independence in Modin](https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf) (VLDB 2021)\n  - [Dataframe Systems: Theory, Architecture, and Implementation](https://www2.eecs.berkeley.edu/Pubs/TechRpts/2021/EECS-2021-193.pdf) (PhD Dissertation 2021)\n  - [Towards Scalable Dataframe Systems](https://arxiv.org/pdf/2001.00888.pdf) (VLDB 2020)\n\n#### Getting Involved\n\n***`modin.pandas` is currently under active development. Requests and contributions are welcome!***\n\nFor more information on how to contribute to Modin, check out the\n[Modin Contribution Guide](https://modin.readthedocs.io/en/latest/development/contributing.html).\n\n### License\n\n[Apache License 2.0](LICENSE)\n"
  },
  {
    "path": "asv_bench/README.md",
    "content": "# Modin ASV benchmarks\n\n## Here are some scenarios in which [ASV](https://asv.readthedocs.io/en/stable/index.html) can be used:\n\n* Check the impact of the new patch on the performance of a certain set of operations:\n\n  `asv continuous -f 1.05 src/main HEAD -b TimeGroupBy --launch-method=spawn`\n\n* Check for presence of errors inside of benchmarks after changing them or writing new ones:\n\n  `asv run --quick --show-stderr --python=same --launch-method=spawn`\n\n* Run entire benchmark suite to get the current times:\n\n  `asv run --launch-method=spawn`\n\n* Check the range of commits for performance degradation:\n\n  ```\n  asv run [start_hash]..[end_hash] --launch-method=spawn\n  asv publish\n  asv preview\n  ```\n\nFor more consistent results, you may need to use the following parameters which\ndescription is in [ASV docs](https://asv.readthedocs.io/en/stable/benchmarks.html?highlight=sample_time#timing-benchmarks):\n\n* `-a sample_time=1`\n* `-a warmup_time=1`\n\n### Notes about using Modin on Ray with Asv:\n\n* `--launch-method=forkserver` is not working;\n* Each set of parameters for each test is launched in its own process, which brings\n  a large overhead, since for each process redis server and other necessary processes\n  from ray initialization are started and destroyed.\n\n## Adding new benchmark\n\nBasic information on writing benchmarks is present [in ASV documentation](https://asv.readthedocs.io/en/stable/writing_benchmarks.html)\n\nBenchmarks from `benchmarks/benchmarks.py`, `benchmarks/scalability/scalability_benchmarks.py` or `benchmarks/io/csv.py`\ncould be used as a starting point.\n\nRequirements:\n* the benchmark should be able to run both on Modin and on Pandas when the appropriate value\nof the environment variable `MODIN_ASV_USE_IMPL` is selected.\n* the size of the benchmark dataset should depend on the environment variable `MODIN_TEST_DATASET_SIZE`.\n\n## Changing existing benchmark\n\nIt should be remembered that the hash calculated from the benchmark source code is used to display the results.\nWhen changing the benchmark, the old results will no longer be displayed in the dashboard. In general, this is the correct\nbehavior so as not to get a situation when incomparable numbers are displayed in the dashboard.\nBut it should be noted that there could be changes in the source code when it is still correct to compare\nthe \"before\" and \"after\" versions, for example, name of a variable changed, comment added, etc.\nIn this case you must either run a new version of the benchmark for all the commits ever accounted for or manually change\nthe hash in the corresponding result files.\n\n## Pipeline for displaying results in a dashboard\n\nStep 1: checking benchmarks for validity, runs in PRs CI.\n  During the test, the benchmarks are run once on small data.\n  The implementation can be found in `test-asv-benchmarks` job of [ci.yml](https://github.com/modin-project/modin/blob/main/.github/workflows/ci.yml)\n\nStep 2: running benchmarks with saving the results in [modin-bench@master](https://github.com/modin-project/modin-bench).\n  The launch takes place on internal server using specific TeamCity configuration.\n  The description of the server can be found in the [\"Benchmark list\"](https://modin.org/modin-bench/#summarylist?sort=0&dir=asc) tab,\n  on the left when you hover the mouse over the machine name. \n  This step starts as scheduled (now every half hour), subject to the presence of new commits in the Modin `main` branch.\n  Command to run benchmarks: `asv run HASHFILE:hashfile.txt --show-stderr --machine xeon-e5 --launch-method=spawn`.\n  In the file `hashfile.txt` is the last modin commit hash.\n  Writing to a `modin-bench@master` triggers 3 step of the pipeline.\n\nStep 3: converting the results to html representation, which is saved in [modin-bench@gh-pages](https://github.com/modin-project/modin-bench)\n  The implementation can be found in `deploy-gh-pages` job of [push.yml](https://github.com/modin-project/modin-bench/blob/master/.github/workflows/push.yml)\n\nBasic actions for step 2:\n* setup environment variable:\n  * export MODIN_TEST_DATASET=Big\n  * export MODIN_CPUS=44\n* setup git client\n* prepare json file with machine description\n  * This file should be placed in the user's home directory.\n  * ASV does not always automatically create the file with the description of the machine correctly (e.g. due to being run in a container).\n  It is recommended to create a file using [asv machine](https://asv.readthedocs.io/en/stable/commands.html?highlight=machine%20description#asv-machine) command, and manually check the result.\n  [Example](https://github.com/modin-project/modin-bench/blob/master/results/xeon-e5/machine.json)\n* copy old result to folder where new result will appear\n  (conflict resolution will be performed by ASV itself instead of git)\n* push performance result to modin-bench repository\n"
  },
  {
    "path": "asv_bench/asv.conf.dask.json",
    "content": "{\n    // The version of the config file format.  Do not change, unless\n    // you know what you are doing.\n    \"version\": 1,\n\n    // The name of the project being benchmarked\n    \"project\": \"modin\",\n\n    // The project's homepage\n    \"project_url\": \"https://modin.readthedocs.io/\",\n\n    // The URL or local path of the source code repository for the\n    // project being benchmarked\n    \"repo\": \"..\",\n\n    // List of branches to benchmark. If not provided, defaults to \"master\"\n    // (for git) or \"default\" (for mercurial).\n    \"branches\": [\"main\"],\n\n    // Customizable commands for building, installing, and\n    // uninstalling the project. See asv.conf.json documentation.\n    \"install_command\": [\"in-dir={env_dir} python -mpip install {wheel_file}[dask]\"],\n\n    // The tool to use to create environments.  May be \"conda\",\n    // \"virtualenv\" or other value depending on the plugins in use.\n    // If missing or the empty string, the tool will be automatically\n    // determined by looking for tools on the PATH environment\n    // variable.\n    \"environment_type\": \"conda\",\n\n    // timeout in seconds for installing any dependencies in environment\n    // defaults to 10 min\n    \"install_timeout\": 6000,\n\n    // the base URL to show a commit for the project.\n    \"show_commit_url\": \"https://github.com/modin-project/modin/commit/\",\n\n    // The Pythons you'd like to test against.  If not provided, defaults\n    // to the current version of Python used to run `asv`.\n    \"pythons\": [\"3.9\"],\n\n    // The list of conda channel names to be searched for benchmark\n    // dependency packages in the specified order\n    \"conda_channels\": [\"conda-forge\", \"defaults\"],\n\n    // The directory (relative to the current directory) to cache the Python\n    // environments in.  If not provided, defaults to \"env\"\n    \"env_dir\": \".asv/env\",\n\n    // The directory (relative to the current directory) that raw benchmark\n    // results are stored in.  If not provided, defaults to \"results\".\n    \"results_dir\": \".asv/results\",\n\n    // The directory (relative to the current directory) that the html tree\n    // should be written to.  If not provided, defaults to \"html\".\n    \"html_dir\": \".asv/html\",\n}\n"
  },
  {
    "path": "asv_bench/asv.conf.json",
    "content": "{\n    // The version of the config file format.  Do not change, unless\n    // you know what you are doing.\n    \"version\": 1,\n\n    // The name of the project being benchmarked\n    \"project\": \"modin\",\n\n    // The project's homepage\n    \"project_url\": \"https://modin.readthedocs.io/\",\n\n    // The URL or local path of the source code repository for the\n    // project being benchmarked\n    \"repo\": \"..\",\n\n    // List of branches to benchmark. If not provided, defaults to \"master\"\n    // (for git) or \"default\" (for mercurial).\n    \"branches\": [\"main\"],\n\n    // Customizable commands for building, installing, and\n    // uninstalling the project. See asv.conf.json documentation.\n    \"install_command\": [\"in-dir={env_dir} python -mpip install {wheel_file}[ray]\"],\n\n    // The tool to use to create environments.  May be \"conda\",\n    // \"virtualenv\" or other value depending on the plugins in use.\n    // If missing or the empty string, the tool will be automatically\n    // determined by looking for tools on the PATH environment\n    // variable.\n    \"environment_type\": \"conda\",\n\n    // timeout in seconds for installing any dependencies in environment\n    // defaults to 10 min\n    \"install_timeout\": 6000,\n\n    // the base URL to show a commit for the project.\n    \"show_commit_url\": \"https://github.com/modin-project/modin/commit/\",\n\n    // The Pythons you'd like to test against.  If not provided, defaults\n    // to the current version of Python used to run `asv`.\n    \"pythons\": [\"3.9\"],\n\n    // The list of conda channel names to be searched for benchmark\n    // dependency packages in the specified order\n    \"conda_channels\": [\"conda-forge\", \"defaults\"],\n\n    // The directory (relative to the current directory) to cache the Python\n    // environments in.  If not provided, defaults to \"env\"\n    \"env_dir\": \".asv/env\",\n\n    // The directory (relative to the current directory) that raw benchmark\n    // results are stored in.  If not provided, defaults to \"results\".\n    \"results_dir\": \".asv/results\",\n\n    // The directory (relative to the current directory) that the html tree\n    // should be written to.  If not provided, defaults to \"html\".\n    \"html_dir\": \".asv/html\",\n}\n"
  },
  {
    "path": "asv_bench/asv.conf.unidist.json",
    "content": "{\n    // The version of the config file format.  Do not change, unless\n    // you know what you are doing.\n    \"version\": 1,\n\n    // The name of the project being benchmarked\n    \"project\": \"modin\",\n\n    // The project's homepage\n    \"project_url\": \"https://modin.readthedocs.io/\",\n\n    // The URL or local path of the source code repository for the\n    // project being benchmarked\n    \"repo\": \"..\",\n\n    // List of branches to benchmark. If not provided, defaults to \"master\"\n    // (for git) or \"default\" (for mercurial).\n    \"branches\": [\"main\"],\n\n    // Customizable commands for building, installing, and\n    // uninstalling the project. See asv.conf.json documentation.\n    \"install_command\": [\"in-dir={env_dir} python -mpip install {wheel_file}[unidist]\"],\n\n    // The tool to use to create environments.  May be \"conda\",\n    // \"virtualenv\" or other value depending on the plugins in use.\n    // If missing or the empty string, the tool will be automatically\n    // determined by looking for tools on the PATH environment\n    // variable.\n    \"environment_type\": \"conda\",\n\n    // timeout in seconds for installing any dependencies in environment\n    // defaults to 10 min\n    \"install_timeout\": 6000,\n\n    // the base URL to show a commit for the project.\n    \"show_commit_url\": \"https://github.com/modin-project/modin/commit/\",\n\n    // The Pythons you'd like to test against.  If not provided, defaults\n    // to the current version of Python used to run `asv`.\n    \"pythons\": [\"3.9\"],\n\n    // The list of conda channel names to be searched for benchmark\n    // dependency packages in the specified order\n    \"conda_channels\": [\"conda-forge\", \"defaults\"],\n\n    // The directory (relative to the current directory) to cache the Python\n    // environments in.  If not provided, defaults to \"env\"\n    \"env_dir\": \".asv/env\",\n\n    // The directory (relative to the current directory) that raw benchmark\n    // results are stored in.  If not provided, defaults to \"results\".\n    \"results_dir\": \".asv/results\",\n\n    // The directory (relative to the current directory) that the html tree\n    // should be written to.  If not provided, defaults to \"html\".\n    \"html_dir\": \".asv/html\",\n}\n"
  },
  {
    "path": "asv_bench/benchmarks/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin benchmarks.\"\"\"\n"
  },
  {
    "path": "asv_bench/benchmarks/benchmarks.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"General Modin benchmarks.\"\"\"\n\n# define `MODIN_CPUS` env var to control the number of partitions\n# it should be defined before modin.pandas import (in case of using os.environ)\n\n# define `MODIN_ASV_USE_IMPL` env var to choose library for using in performance\n# measurements\n\nimport math\n\nimport numpy as np\n\nfrom .utils import (\n    GROUPBY_NGROUPS,\n    IMPL,\n    RAND_HIGH,\n    RAND_LOW,\n    execute,\n    gen_nan_data,\n    generate_dataframe,\n    get_benchmark_shapes,\n    random_booleans,\n    random_columns,\n    random_string,\n    translator_groupby_ngroups,\n)\n\n\nclass BaseTimeGroupBy:\n    def setup(self, shape, ngroups=5, groupby_ncols=1):\n        ngroups = translator_groupby_ngroups(ngroups, shape)\n        self.df, self.groupby_columns = generate_dataframe(\n            \"int\",\n            *shape,\n            RAND_LOW,\n            RAND_HIGH,\n            groupby_ncols,\n            count_groups=ngroups,\n        )\n\n\nclass TimeGroupByMultiColumn(BaseTimeGroupBy):\n    param_names = [\"shape\", \"ngroups\", \"groupby_ncols\"]\n    params = [\n        get_benchmark_shapes(\"TimeGroupByMultiColumn\"),\n        GROUPBY_NGROUPS,\n        [6],\n    ]\n\n    def time_groupby_agg_quan(self, *args, **kwargs):\n        execute(self.df.groupby(by=self.groupby_columns).agg(\"quantile\"))\n\n    def time_groupby_agg_mean(self, *args, **kwargs):\n        execute(self.df.groupby(by=self.groupby_columns).apply(lambda df: df.mean()))\n\n\nclass TimeGroupByDefaultAggregations(BaseTimeGroupBy):\n    param_names = [\"shape\", \"ngroups\"]\n    params = [\n        get_benchmark_shapes(\"TimeGroupByDefaultAggregations\"),\n        GROUPBY_NGROUPS,\n    ]\n\n    def time_groupby_count(self, *args, **kwargs):\n        execute(self.df.groupby(by=self.groupby_columns).count())\n\n    def time_groupby_size(self, *args, **kwargs):\n        execute(self.df.groupby(by=self.groupby_columns).size())\n\n    def time_groupby_sum(self, *args, **kwargs):\n        execute(self.df.groupby(by=self.groupby_columns).sum())\n\n    def time_groupby_mean(self, *args, **kwargs):\n        execute(self.df.groupby(by=self.groupby_columns).mean())\n\n\nclass TimeGroupByDictionaryAggregation(BaseTimeGroupBy):\n    param_names = [\"shape\", \"ngroups\", \"operation_type\"]\n    params = [\n        get_benchmark_shapes(\"TimeGroupByDictionaryAggregation\"),\n        GROUPBY_NGROUPS,\n        [\"reduce\", \"aggregation\"],\n    ]\n    operations = {\n        \"reduce\": [\"sum\", \"count\", \"prod\"],\n        \"aggregation\": [\"quantile\", \"std\", \"median\"],\n    }\n\n    def setup(self, shape, ngroups, operation_type):\n        super().setup(shape, ngroups)\n        self.cols_to_agg = self.df.columns[1:4]\n        operations = self.operations[operation_type]\n        self.agg_dict = {\n            c: operations[i % len(operations)] for i, c in enumerate(self.cols_to_agg)\n        }\n\n    def time_groupby_dict_agg(self, *args, **kwargs):\n        execute(self.df.groupby(by=self.groupby_columns).agg(self.agg_dict))\n\n\nclass TimeJoin:\n    param_names = [\"shapes\", \"how\", \"sort\"]\n    params = [\n        get_benchmark_shapes(\"TimeJoin\"),\n        [\"left\", \"inner\"],\n        [False],\n    ]\n\n    def setup(self, shapes, how, sort):\n        self.df1 = generate_dataframe(\"int\", *shapes[0], RAND_LOW, RAND_HIGH)\n        self.df2 = generate_dataframe(\"int\", *shapes[1], RAND_LOW, RAND_HIGH)\n\n    def time_join(self, shapes, how, sort):\n        # join dataframes on index to get the predictable shape\n        execute(self.df1.join(self.df2, how=how, lsuffix=\"left_\", sort=sort))\n\n\nclass TimeJoinStringIndex:\n    param_names = [\"shapes\", \"sort\"]\n    params = [\n        get_benchmark_shapes(\"TimeJoinStringIndex\"),\n        [True, False],\n    ]\n\n    def setup(self, shapes, sort):\n        assert shapes[0] % 100 == 0, \"implementation restriction\"\n        level1 = IMPL.Index([f\"i-{i}\" for i in range(10)], dtype=object).values\n        level2 = IMPL.Index(\n            [f\"i-{i}\" for i in range(shapes[0] // 100)], dtype=object\n        ).values\n        codes1 = np.arange(10).repeat(shapes[0] // 100)\n        codes2 = np.tile(np.arange(shapes[0] // 100), 10)\n        index2 = IMPL.MultiIndex(levels=[level1, level2], codes=[codes1, codes2])\n        self.df_multi = IMPL.DataFrame(\n            np.random.randn(len(index2), 4), index=index2, columns=[\"A\", \"B\", \"C\", \"D\"]\n        )\n\n        self.key1 = np.tile(level1.take(codes1), 10)\n        self.key2 = np.tile(level2.take(codes2), 10)\n        self.df = generate_dataframe(\"int\", *shapes, RAND_LOW, RAND_HIGH)\n        # just to keep source shape\n        self.df = self.df.drop(columns=self.df.columns[-2:])\n        self.df[\"key1\"] = self.key1\n        self.df[\"key2\"] = self.key2\n        execute(self.df)\n\n        self.df_key1 = IMPL.DataFrame(\n            np.random.randn(len(level1), 4), index=level1, columns=[\"A\", \"B\", \"C\", \"D\"]\n        )\n        self.df_key2 = IMPL.DataFrame(\n            np.random.randn(len(level2), 4), index=level2, columns=[\"A\", \"B\", \"C\", \"D\"]\n        )\n\n    def time_join_dataframe_index_multi(self, shapes, sort):\n        execute(self.df.join(self.df_multi, on=[\"key1\", \"key2\"], sort=sort))\n\n    def time_join_dataframe_index_single_key_bigger(self, shapes, sort):\n        execute(self.df.join(self.df_key2, on=\"key2\", sort=sort))\n\n    def time_join_dataframe_index_single_key_small(self, shapes, sort):\n        execute(self.df.join(self.df_key1, on=\"key1\", sort=sort))\n\n\nclass TimeMergeDefault:\n    param_names = [\"shapes\", \"how\", \"sort\"]\n    params = [\n        get_benchmark_shapes(\"TimeMergeDefault\"),\n        [\"left\", \"inner\"],\n        [True, False],\n    ]\n\n    def setup(self, shapes, how, sort):\n        self.df1 = generate_dataframe(\"int\", *shapes[0], RAND_LOW, RAND_HIGH)\n        self.df2 = generate_dataframe(\"int\", *shapes[1], RAND_LOW, RAND_HIGH)\n\n    def time_merge(self, shapes, how, sort):\n        execute(IMPL.merge(self.df1, self.df2, how=how, sort=sort))\n\n\nclass TimeMerge:\n    param_names = [\"shapes\", \"how\", \"sort\"]\n    params = [\n        get_benchmark_shapes(\"TimeMerge\"),\n        [\"left\", \"inner\"],\n        [True, False],\n    ]\n\n    def setup(self, shapes, how, sort):\n        self.df1 = generate_dataframe(\"int\", *shapes[0], RAND_LOW, RAND_HIGH)\n        self.df2 = generate_dataframe(\"int\", *shapes[1], RAND_LOW, RAND_HIGH)\n\n    def time_merge(self, shapes, how, sort):\n        # merge dataframes by index to get the predictable shape\n        execute(\n            self.df1.merge(\n                self.df2, left_index=True, right_index=True, how=how, sort=sort\n            )\n        )\n\n    def time_merge_dataframe_empty_right(self, shapes, how, sort):\n        # Getting an empty dataframe using `iloc` should be very fast,\n        # so the impact on the time of the merge operation should be negligible.\n        execute(IMPL.merge(self.df1, self.df2.iloc[:0], how=how, sort=sort))\n\n    def time_merge_dataframe_empty_left(self, shapes, how, sort):\n        # Getting an empty dataframe using `iloc` should be very fast,\n        # so the impact on the time of the merge operation should be negligible.\n        execute(IMPL.merge(self.df1.iloc[:0], self.df2, how=how, sort=sort))\n\n\nclass TimeMergeCategoricals:\n    param_names = [\"shapes\", \"data_type\"]\n    params = [\n        get_benchmark_shapes(\"MergeCategoricals\"),\n        [\"object\", \"category\"],\n    ]\n\n    def setup(self, shapes, data_type):\n        assert len(shapes) == 2\n        assert shapes[1] == 2\n        size = (shapes[0],)\n        self.left = IMPL.DataFrame(\n            {\n                \"X\": np.random.choice(range(0, 10), size=size),\n                \"Y\": np.random.choice([\"one\", \"two\", \"three\"], size=size),\n            }\n        )\n\n        self.right = IMPL.DataFrame(\n            {\n                \"X\": np.random.choice(range(0, 10), size=size),\n                \"Z\": np.random.choice([\"jjj\", \"kkk\", \"sss\"], size=size),\n            }\n        )\n\n        if data_type == \"category\":\n            self.left = self.left.assign(Y=self.left[\"Y\"].astype(\"category\"))\n            execute(self.left)\n            self.right = self.right.assign(Z=self.right[\"Z\"].astype(\"category\"))\n            execute(self.right)\n\n    def time_merge_categoricals(self, shapes, data_type):\n        execute(IMPL.merge(self.left, self.right, on=\"X\"))\n\n\nclass TimeConcat:\n    param_names = [\"shapes\", \"how\", \"axis\", \"ignore_index\"]\n    params = [\n        get_benchmark_shapes(\"TimeConcat\"),\n        [\"inner\", \"outer\"],\n        [0, 1],\n        [True, False],\n    ]\n\n    def setup(self, shapes, how, axis, ignore_index):\n        self.df1 = generate_dataframe(\"int\", *shapes[0], RAND_LOW, RAND_HIGH)\n        self.df2 = generate_dataframe(\"int\", *shapes[1], RAND_LOW, RAND_HIGH)\n\n    def time_concat(self, shapes, how, axis, ignore_index):\n        execute(\n            IMPL.concat(\n                [self.df1, self.df2], axis=axis, join=how, ignore_index=ignore_index\n            )\n        )\n\n\nclass TimeBinaryOp:\n    param_names = [\"shapes\", \"binary_op\", \"axis\"]\n    params = [\n        get_benchmark_shapes(\"TimeBinaryOp\"),\n        [\"mul\"],\n        [0, 1],\n    ]\n\n    def setup(self, shapes, binary_op, axis):\n        self.df1 = generate_dataframe(\"int\", *shapes[0], RAND_LOW, RAND_HIGH)\n        self.df2 = generate_dataframe(\"int\", *shapes[1], RAND_LOW, RAND_HIGH)\n        self.op = getattr(self.df1, binary_op)\n\n    def time_binary_op(self, shapes, binary_op, axis):\n        execute(self.op(self.df2, axis=axis))\n\n\nclass TimeBinaryOpSeries:\n    param_names = [\"shapes\", \"binary_op\"]\n    params = [\n        get_benchmark_shapes(\"TimeBinaryOpSeries\"),\n        [\"mul\"],\n    ]\n\n    def setup(self, shapes, binary_op):\n        df1 = generate_dataframe(\"int\", *shapes[0], RAND_LOW, RAND_HIGH)\n        df2 = generate_dataframe(\"int\", *shapes[1], RAND_LOW, RAND_HIGH)\n        self.series1 = df1[df1.columns[0]]\n        self.series2 = df2[df2.columns[0]]\n        self.op = getattr(self.series1, binary_op)\n        execute(self.series1)\n        execute(self.series2)\n\n    def time_binary_op_series(self, shapes, binary_op):\n        execute(self.op(self.series2))\n\n\nclass BaseTimeSetItem:\n    param_names = [\"shape\", \"item_length\", \"loc\", \"is_equal_indices\"]\n\n    @staticmethod\n    def get_loc(df, loc, axis, item_length):\n        locs_dict = {\n            \"zero\": 0,\n            \"middle\": len(df.axes[axis]) // 2,\n            \"last\": len(df.axes[axis]) - 1,\n        }\n        base_loc = locs_dict[loc]\n        range_based_loc = np.arange(\n            base_loc, min(len(df.axes[axis]), base_loc + item_length)\n        )\n        return (\n            (df.axes[axis][base_loc], base_loc)\n            if len(range_based_loc) == 1\n            else (df.axes[axis][range_based_loc], range_based_loc)\n        )\n\n    def setup(self, shape, item_length, loc, is_equal_indices):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH).copy()\n        self.loc, self.iloc = self.get_loc(\n            self.df, loc, item_length=item_length, axis=1\n        )\n\n        self.item = self.df[self.loc] + 1\n        self.item_raw = self.item.to_numpy()\n        if not is_equal_indices:\n            self.item.index = reversed(self.item.index)\n\n\nclass TimeSetItem(BaseTimeSetItem):\n    params = [\n        get_benchmark_shapes(\"TimeSetItem\"),\n        [1],\n        [\"zero\", \"middle\", \"last\"],\n        [True, False],\n    ]\n\n    def time_setitem_qc(self, *args, **kwargs):\n        self.df[self.loc] = self.item\n        execute(self.df)\n\n    def time_setitem_raw(self, *args, **kwargs):\n        self.df[self.loc] = self.item_raw\n        execute(self.df)\n\n\nclass TimeInsert(BaseTimeSetItem):\n    params = [\n        get_benchmark_shapes(\"TimeInsert\"),\n        [1],\n        [\"zero\", \"middle\", \"last\"],\n        [True, False],\n    ]\n\n    def time_insert_qc(self, *args, **kwargs):\n        self.df.insert(loc=self.iloc, column=random_string(), value=self.item)\n        execute(self.df)\n\n    def time_insert_raw(self, *args, **kwargs):\n        self.df.insert(loc=self.iloc, column=random_string(), value=self.item_raw)\n        execute(self.df)\n\n\nclass TimeArithmetic:\n    param_names = [\"shape\", \"axis\"]\n    params = [\n        get_benchmark_shapes(\"TimeArithmetic\"),\n        [0, 1],\n    ]\n\n    def setup(self, shape, axis):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n\n    def time_sum(self, shape, axis):\n        execute(self.df.sum(axis=axis))\n\n    def time_count(self, shape, axis):\n        execute(self.df.count(axis=axis))\n\n    def time_median(self, shape, axis):\n        execute(self.df.median(axis=axis))\n\n    def time_nunique(self, shape, axis):\n        execute(self.df.nunique(axis=axis))\n\n    def time_apply(self, shape, axis):\n        execute(self.df.apply(lambda df: df.sum(), axis=axis))\n\n    def time_mean(self, shape, axis):\n        execute(self.df.mean(axis=axis))\n\n    def time_mode(self, shape, axis):\n        execute(self.df.mode(axis=axis))\n\n    def time_add(self, shape, axis):\n        execute(self.df.add(2, axis=axis))\n\n    def time_mul(self, shape, axis):\n        execute(self.df.mul(2, axis=axis))\n\n    def time_mod(self, shape, axis):\n        execute(self.df.mod(2, axis=axis))\n\n    def time_abs(self, shape, axis):\n        execute(self.df.abs())\n\n    def time_aggregate(self, shape, axis):\n        execute(self.df.aggregate(lambda df: df.sum(), axis=axis))\n\n    def time_is_in(self, shape, axis):\n        execute(self.df.isin([0, 2]))\n\n    def time_transpose(self, shape, axis):\n        execute(self.df.transpose())\n\n\nclass TimeSortValues:\n    param_names = [\"shape\", \"columns_number\", \"ascending_list\"]\n    params = [\n        get_benchmark_shapes(\"TimeSortValues\"),\n        [1, 2, 10, 100],\n        [False, True],\n    ]\n\n    def setup(self, shape, columns_number, ascending_list):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n        self.columns = random_columns(self.df.columns, columns_number)\n        self.ascending = (\n            random_booleans(columns_number)\n            if ascending_list\n            else bool(random_booleans(1)[0])\n        )\n\n    def time_sort_values(self, shape, columns_number, ascending_list):\n        execute(self.df.sort_values(self.columns, ascending=self.ascending))\n\n\nclass TimeDrop:\n    param_names = [\"shape\", \"axis\", \"drop_ncols\"]\n    params = [\n        get_benchmark_shapes(\"TimeDrop\"),\n        [0, 1],\n        [1, 0.8],\n    ]\n\n    def setup(self, shape, axis, drop_ncols):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n        drop_count = (\n            int(len(self.df.axes[axis]) * drop_ncols)\n            if isinstance(drop_ncols, float)\n            else drop_ncols\n        )\n        self.labels = self.df.axes[axis][:drop_count]\n\n    def time_drop(self, shape, axis, drop_ncols):\n        execute(self.df.drop(self.labels, axis=axis))\n\n\nclass TimeHead:\n    param_names = [\"shape\", \"head_count\"]\n    params = [\n        get_benchmark_shapes(\"TimeHead\"),\n        [5, 0.8],\n    ]\n\n    def setup(self, shape, head_count):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n        self.head_count = (\n            int(head_count * len(self.df.index))\n            if isinstance(head_count, float)\n            else head_count\n        )\n\n    def time_head(self, shape, head_count):\n        execute(self.df.head(self.head_count))\n\n\nclass TimeTail:\n    param_names = [\"shape\", \"tail_count\"]\n    params = [\n        get_benchmark_shapes(\"TimeTail\"),\n        [5, 0.8],\n    ]\n\n    def setup(self, shape, tail_count):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n        self.tail_count = (\n            int(tail_count * len(self.df.index))\n            if isinstance(tail_count, float)\n            else tail_count\n        )\n\n    def time_tail(self, shape, tail_count):\n        execute(self.df.tail(self.tail_count))\n\n\nclass TimeExplode:\n    param_names = [\"shape\"]\n    params = [\n        get_benchmark_shapes(\"TimeExplode\"),\n    ]\n\n    def setup(self, shape):\n        self.df = generate_dataframe(\n            \"int\", *shape, RAND_LOW, RAND_HIGH, gen_unique_key=True\n        )\n\n    def time_explode(self, shape):\n        execute(self.df.explode(\"col1\"))\n\n\nclass TimeFillnaSeries:\n    param_names = [\"value_type\", \"shape\", \"limit\"]\n    params = [\n        [\"scalar\", \"dict\", \"Series\"],\n        get_benchmark_shapes(\"TimeFillnaSeries\"),\n        [None, 0.8],\n    ]\n\n    def setup(self, value_type, shape, limit):\n        self.series = gen_nan_data(*shape)\n\n        if value_type == \"scalar\":\n            self.value = 18.19\n        elif value_type == \"dict\":\n            self.value = {k: k * 1.23 for k in range(shape[0])}\n        elif value_type == \"Series\":\n            self.value = IMPL.Series(\n                [k * 1.23 for k in range(shape[0])], index=IMPL.RangeIndex(shape[0])\n            )\n        else:\n            assert False\n        limit = int(limit * shape[0]) if limit else None\n        self.kw = {\"value\": self.value, \"limit\": limit}\n\n    def time_fillna(self, value_type, shape, limit):\n        execute(self.series.fillna(**self.kw))\n\n    def time_fillna_inplace(self, value_type, shape, limit):\n        self.series.fillna(inplace=True, **self.kw)\n        execute(self.series)\n\n\nclass TimeFillnaDataFrame:\n    param_names = [\"value_type\", \"shape\", \"limit\"]\n    params = [\n        [\"scalar\", \"dict\", \"DataFrame\", \"Series\"],\n        get_benchmark_shapes(\"TimeFillnaDataFrame\"),\n        [None, 0.8],\n    ]\n\n    def setup(self, value_type, shape, limit):\n        self.df = gen_nan_data(*shape)\n        columns = self.df.columns\n\n        if value_type == \"scalar\":\n            self.value = 18.19\n        elif value_type == \"dict\":\n            self.value = {k: i * 1.23 for i, k in enumerate(columns)}\n        elif value_type == \"Series\":\n            self.value = IMPL.Series(\n                [i * 1.23 for i in range(len(columns))], index=columns\n            )\n        elif value_type == \"DataFrame\":\n            self.value = IMPL.DataFrame(\n                {\n                    k: [i + j * 1.23 for j in range(shape[0])]\n                    for i, k in enumerate(columns)\n                },\n                index=IMPL.RangeIndex(shape[0]),\n                columns=columns,\n            )\n        else:\n            assert False\n        limit = int(limit * shape[0]) if limit else None\n        self.kw = {\"value\": self.value, \"limit\": limit}\n\n    def time_fillna(self, value_type, shape, limit):\n        execute(self.df.fillna(**self.kw))\n\n    def time_fillna_inplace(self, value_type, shape, limit):\n        self.df.fillna(inplace=True, **self.kw)\n        execute(self.df)\n\n\nclass BaseTimeValueCounts:\n    def setup(self, shape, ngroups=5, subset=1):\n        ngroups = translator_groupby_ngroups(ngroups, shape)\n        self.df, self.subset = generate_dataframe(\n            \"int\",\n            *shape,\n            RAND_LOW,\n            RAND_HIGH,\n            groupby_ncols=subset,\n            count_groups=ngroups,\n        )\n\n\nclass TimeValueCountsFrame(BaseTimeValueCounts):\n    param_names = [\"shape\", \"ngroups\", \"subset\"]\n    params = [\n        get_benchmark_shapes(\"TimeValueCountsFrame\"),\n        GROUPBY_NGROUPS,\n        [2, 10],\n    ]\n\n    def time_value_counts(self, *args, **kwargs):\n        execute(self.df.value_counts(subset=self.subset))\n\n\nclass TimeValueCountsSeries(BaseTimeValueCounts):\n    param_names = [\"shape\", \"ngroups\", \"bins\"]\n    params = [\n        get_benchmark_shapes(\"TimeValueCountsSeries\"),\n        GROUPBY_NGROUPS,\n        [None, 3],\n    ]\n\n    def setup(self, shape, ngroups, bins):\n        super().setup(ngroups=ngroups, shape=shape)\n        self.df = self.df[self.subset[0]]\n\n    def time_value_counts(self, shape, ngroups, bins):\n        execute(self.df.value_counts(bins=bins))\n\n\nclass TimeIndexing:\n    param_names = [\"shape\", \"indexer_type\"]\n    params = [\n        get_benchmark_shapes(\"TimeIndexing\"),\n        [\n            \"bool_array\",\n            \"bool_series\",\n            \"scalar\",\n            \"slice\",\n            \"continuous_slice\",\n            \"numpy_array_take_all_values\",\n            \"python_list_take_10_values\",\n            \"function\",\n        ],\n    ]\n\n    indexer_getters = {\n        \"bool_array\": lambda df: np.array([False, True] * (len(df) // 2)),\n        # This boolean-Series is a projection of the source frame, it shouldn't\n        # be reimported or triggered to execute:\n        \"bool_series\": lambda df: df.iloc[:, 0] > 50,\n        \"scalar\": lambda df: len(df) // 2,\n        \"slice\": lambda df: slice(0, len(df), 2),\n        \"continuous_slice\": lambda df: slice(len(df) // 2),\n        \"numpy_array_take_all_values\": lambda df: np.arange(len(df)),\n        \"python_list_take_10_values\": lambda df: list(range(min(10, len(df)))),\n        \"function\": lambda df: (lambda df: df.index[::-2]),\n    }\n\n    def setup(self, shape, indexer_type):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n\n        self.indexer = self.indexer_getters[indexer_type](self.df)\n        if isinstance(self.indexer, (IMPL.Series, IMPL.DataFrame)):\n            # HACK: Triggering `dtypes` meta-data computation in advance,\n            # so it won't affect the `loc/iloc` time:\n            self.indexer.dtypes\n\n    def time_iloc(self, shape, indexer_type):\n        # Pandas doesn't implement `df.iloc[series boolean_mask]` and raises an exception on it.\n        # Replacing this with the semantically equivalent construction:\n        if indexer_type != \"bool_series\":\n            execute(self.df.iloc[self.indexer])\n        else:\n            execute(self.df[self.indexer])\n\n    def time_loc(self, shape, indexer_type):\n        execute(self.df.loc[self.indexer])\n\n\nclass TimeIndexingColumns:\n    param_names = [\"shape\"]\n    params = [get_benchmark_shapes(\"TimeIndexing\")]\n\n    def setup(self, shape):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n        self.numeric_indexer = [0, 1]\n        self.labels_indexer = self.df.columns[self.numeric_indexer].tolist()\n\n    def time_iloc(self, shape):\n        execute(self.df.iloc[:, self.numeric_indexer])\n\n    def time_loc(self, shape):\n        execute(self.df.loc[:, self.labels_indexer])\n\n    def time___getitem__(self, shape):\n        execute(self.df[self.labels_indexer])\n\n\nclass TimeMultiIndexing:\n    param_names = [\"shape\"]\n    params = [get_benchmark_shapes(\"TimeMultiIndexing\")]\n\n    def setup(self, shape):\n        df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n\n        index = IMPL.MultiIndex.from_product(\n            [df.index[: shape[0] // 2], [\"bar\", \"foo\"]]\n        )\n        columns = IMPL.MultiIndex.from_product(\n            [df.columns[: shape[1] // 2], [\"buz\", \"fuz\"]]\n        )\n\n        df.index = index\n        df.columns = columns\n\n        self.df = df.sort_index(axis=1)\n\n    def time_multiindex_loc(self, shape):\n        execute(\n            self.df.loc[\n                self.df.index[2] : self.df.index[-2],\n                self.df.columns[2] : self.df.columns[-2],\n            ]\n        )\n\n\nclass TimeResetIndex:\n    param_names = [\"shape\", \"drop\", \"level\"]\n    params = [\n        get_benchmark_shapes(\"TimeResetIndex\"),\n        [False, True],\n        [None, \"level_1\"],\n    ]\n\n    def setup(self, shape, drop, level):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n\n        if level:\n            index = IMPL.MultiIndex.from_product(\n                [self.df.index[: shape[0] // 2], [\"bar\", \"foo\"]],\n                names=[\"level_1\", \"level_2\"],\n            )\n            self.df.index = index\n\n    def time_reset_index(self, shape, drop, level):\n        execute(self.df.reset_index(drop=drop, level=level))\n\n\nclass TimeAstype:\n    param_names = [\"shape\", \"dtype\", \"astype_ncolumns\"]\n    params = [\n        get_benchmark_shapes(\"TimeAstype\"),\n        [\"float64\", \"category\"],\n        [\"one\", \"all\"],\n    ]\n\n    def setup(self, shape, dtype, astype_ncolumns):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n        if astype_ncolumns == \"all\":\n            self.astype_arg = dtype\n        elif astype_ncolumns == \"one\":\n            self.astype_arg = {\"col1\": dtype}\n        else:\n            raise ValueError(f\"astype_ncolumns: {astype_ncolumns} isn't supported\")\n\n    def time_astype(self, shape, dtype, astype_ncolumns):\n        execute(self.df.astype(self.astype_arg))\n\n\nclass TimeDescribe:\n    param_names = [\"shape\"]\n    params = [\n        get_benchmark_shapes(\"TimeDescribe\"),\n    ]\n\n    def setup(self, shape):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n\n    def time_describe(self, shape):\n        execute(self.df.describe())\n\n\nclass TimeProperties:\n    param_names = [\"shape\"]\n    params = [\n        get_benchmark_shapes(\"TimeProperties\"),\n    ]\n\n    def setup(self, shape):\n        self.df = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH)\n\n    def time_shape(self, shape):\n        return self.df.shape\n\n    def time_columns(self, shape):\n        return self.df.columns\n\n    def time_index(self, shape):\n        return self.df.index\n\n\nclass TimeIndexingNumericSeries:\n    param_names = [\"shape\", \"dtype\", \"index_structure\"]\n    params = [\n        get_benchmark_shapes(\"TimeIndexingNumericSeries\"),\n        (np.int64, np.uint64, np.float64),\n        (\"unique_monotonic_inc\", \"nonunique_monotonic_inc\"),\n    ]\n\n    def setup(self, shape, dtype, index_structure):\n        N = shape[0]\n        indices = {\n            \"unique_monotonic_inc\": IMPL.Index(range(N), dtype=dtype),\n            \"nonunique_monotonic_inc\": IMPL.Index(\n                list(range(N // 100)) + [(N // 100) - 1] + list(range(N // 100, N - 1)),\n                dtype=dtype,\n            ),\n        }\n        self.data = IMPL.Series(np.random.rand(N), index=indices[index_structure])\n        self.array = np.arange(N // 2)\n        self.index_to_query = N // 2\n        self.array_list = self.array.tolist()\n        execute(self.data)\n\n    def time_getitem_scalar(self, shape, index, index_structure):\n        # not calling execute as execute function fails for scalar\n        self.data[self.index_to_query]\n\n    def time_getitem_slice(self, shape, index, index_structure):\n        execute(self.data[: self.index_to_query])\n\n    def time_getitem_list_like(self, shape, index, index_structure):\n        execute(self.data[[self.index_to_query]])\n\n    def time_getitem_array(self, shape, index, index_structure):\n        execute(self.data[self.array])\n\n    def time_getitem_lists(self, shape, index, index_structure):\n        execute(self.data[self.array_list])\n\n    def time_iloc_array(self, shape, index, index_structure):\n        execute(self.data.iloc[self.array])\n\n    def time_iloc_list_like(self, shape, index, index_structure):\n        execute(self.data.iloc[[self.index_to_query]])\n\n    def time_iloc_scalar(self, shape, index, index_structure):\n        # not calling execute as execute function fails for scalar\n        self.data.iloc[self.index_to_query]\n\n    def time_iloc_slice(self, shape, index, index_structure):\n        execute(self.data.iloc[: self.index_to_query])\n\n    def time_loc_array(self, shape, index, index_structure):\n        execute(self.data.loc[self.array])\n\n    def time_loc_list_like(self, shape, index, index_structure):\n        execute(self.data.loc[[self.index_to_query]])\n\n    def time_loc_scalar(self, shape, index, index_structure):\n        self.data.loc[self.index_to_query]\n\n    def time_loc_slice(self, shape, index, index_structure):\n        execute(self.data.loc[: self.index_to_query])\n\n\nclass TimeReindex:\n    param_names = [\"shape\"]\n    params = [get_benchmark_shapes(\"TimeReindex\")]\n\n    def setup(self, shape):\n        rows, cols = shape\n        rng = IMPL.date_range(start=\"1/1/1970\", periods=rows, freq=\"1min\")\n        self.df = IMPL.DataFrame(\n            np.random.rand(rows, cols), index=rng, columns=range(cols)\n        )\n        self.df[\"foo\"] = \"bar\"\n        self.rng_subset = IMPL.Index(rng[::2])\n        self.df2 = IMPL.DataFrame(\n            index=range(rows), data=np.random.rand(rows, cols), columns=range(cols)\n        )\n        level1 = IMPL.Index(\n            [f\"i-{i}\" for i in range(rows // 10)], dtype=object\n        ).values.repeat(10)\n        level2 = np.tile(\n            IMPL.Index([f\"i-{i}\" for i in range(10)], dtype=object).values, rows // 10\n        )\n        index = IMPL.MultiIndex.from_arrays([level1, level2])\n        self.s = IMPL.Series(np.random.randn(rows), index=index)\n        self.s_subset = self.s[::2]\n        self.s_subset_no_cache = self.s[::2].copy()\n\n        mi = IMPL.MultiIndex.from_product([rng[: len(rng) // 10], range(10)])\n        self.s2 = IMPL.Series(np.random.randn(len(mi)), index=mi)\n        self.s2_subset = self.s2[::2].copy()\n        execute(self.df), execute(self.df2)\n        execute(self.s), execute(self.s_subset)\n        execute(self.s2), execute(self.s2_subset)\n        execute(self.s_subset_no_cache)\n\n    def time_reindex_dates(self, shape):\n        execute(self.df.reindex(self.rng_subset))\n\n    def time_reindex_columns(self, shape):\n        execute(self.df2.reindex(columns=self.df.columns[1:5]))\n\n    def time_reindex_multiindex_with_cache(self, shape):\n        # MultiIndex._values gets cached (pandas specific)\n        execute(self.s.reindex(self.s_subset.index))\n\n    def time_reindex_multiindex_no_cache(self, shape):\n        # Copy to avoid MultiIndex._values getting cached (pandas specific)\n        execute(self.s.reindex(self.s_subset_no_cache.index.copy()))\n\n    def time_reindex_multiindex_no_cache_dates(self, shape):\n        # Copy to avoid MultiIndex._values getting cached (pandas specific)\n        execute(self.s2_subset.reindex(self.s2.index.copy()))\n\n\nclass TimeReindexMethod:\n    params = [\n        get_benchmark_shapes(\"TimeReindexMethod\"),\n        [\"pad\", \"backfill\"],\n        [IMPL.date_range, IMPL.period_range],\n    ]\n    param_names = [\"shape\", \"method\", \"constructor\"]\n\n    def setup(self, shape, method, constructor):\n        N = shape[0]\n        self.idx = constructor(\"1/1/2000\", periods=N, freq=\"1min\")\n        self.ts = IMPL.Series(np.random.randn(N), index=self.idx)[::2]\n        execute(self.ts)\n\n    def time_reindex_method(self, shape, method, constructor):\n        execute(self.ts.reindex(self.idx, method=method))\n\n\nclass TimeFillnaMethodSeries:\n    params = [get_benchmark_shapes(\"TimeFillnaMethodSeries\"), [\"pad\", \"backfill\"]]\n    param_names = [\"shape\", \"method\"]\n\n    def setup(self, shape, method):\n        N = shape[0]\n        self.idx = IMPL.date_range(\"1/1/2000\", periods=N, freq=\"1min\")\n        ts = IMPL.Series(np.random.randn(N), index=self.idx)[::2]\n        self.ts_reindexed = ts.reindex(self.idx)\n        self.ts_float32 = self.ts_reindexed.astype(\"float32\")\n        execute(self.ts_reindexed), execute(self.ts_float32)\n\n    def time_reindexed(self, shape, method):\n        execute(self.ts_reindexed.fillna(method=method))\n\n    def time_float_32(self, shape, method):\n        execute(self.ts_float32.fillna(method=method))\n\n\nclass TimeFillnaMethodDataframe:\n    params = [get_benchmark_shapes(\"TimeFillnaMethodDataframe\"), [\"pad\", \"backfill\"]]\n    param_names = [\"shape\", \"method\"]\n\n    def setup(self, shape, method):\n        self.idx = IMPL.date_range(\"1/1/2000\", periods=shape[0], freq=\"1min\")\n        df_ts = IMPL.DataFrame(np.random.randn(*shape), index=self.idx)[::2]\n        self.df_ts_reindexed = df_ts.reindex(self.idx)\n        self.df_ts_float32 = self.df_ts_reindexed.astype(\"float32\")\n        execute(self.df_ts_reindexed), execute(self.df_ts_float32)\n\n    def time_reindexed(self, shape, method):\n        execute(self.df_ts_reindexed.fillna(method=method))\n\n    def time_float_32(self, shape, method):\n        execute(self.df_ts_float32.fillna(method=method))\n\n\nclass TimeLevelAlign:\n    params = [get_benchmark_shapes(\"TimeLevelAlign\")]\n    param_names = [\"shapes\"]\n\n    def setup(self, shapes):\n        rows, cols = shapes[0]\n        rows_sqrt = round(math.sqrt(rows))\n        # the new number of rows may differ from the requested (slightly, so ok)\n        rows = rows_sqrt * rows_sqrt\n        self.index = IMPL.MultiIndex(\n            levels=[np.arange(10), np.arange(rows_sqrt), np.arange(rows_sqrt)],\n            codes=[\n                np.arange(10).repeat(rows),\n                np.tile(np.arange(rows_sqrt).repeat(rows_sqrt), 10),\n                np.tile(np.tile(np.arange(rows_sqrt), rows_sqrt), 10),\n            ],\n        )\n        self.df1 = IMPL.DataFrame(\n            np.random.randn(len(self.index), cols), index=self.index\n        )\n        self.df2 = IMPL.DataFrame(np.random.randn(*shapes[1]))\n        execute(self.df1), execute(self.df2)\n\n    def time_align_level(self, shapes):\n        left, right = self.df1.align(self.df2, level=1, copy=False)\n        execute(left), execute(right)\n\n    def time_reindex_level(self, shapes):\n        # `reindex` returns the same result here as `align`.\n        # Approximately the same performance is expected.\n        execute(self.df2.reindex(self.index, level=1))\n\n\nclass TimeDropDuplicatesDataframe:\n    params = [get_benchmark_shapes(\"TimeDropDuplicatesDataframe\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        rows, cols = shape\n        N = rows // 10\n        K = 10\n        data = {}\n        # dataframe would have cols-1 keys(strings) and one value(int) column\n        for col in range(cols - 1):\n            data[\"key\" + str(col + 1)] = IMPL.Index(\n                [f\"i-{i}\" for i in range(N)], dtype=object\n            ).values.repeat(K)\n        data[\"value\"] = np.random.randn(N * K)\n        self.df = IMPL.DataFrame(data)\n        execute(self.df)\n\n    def time_drop_dups(self, shape):\n        execute(self.df.drop_duplicates(self.df.columns[:-1]))\n\n    def time_drop_dups_inplace(self, shape):\n        self.df.drop_duplicates(self.df.columns[:-1], inplace=True)\n        execute(self.df)\n\n\nclass TimeDropDuplicatesSeries:\n    params = [get_benchmark_shapes(\"TimeDropDuplicatesSeries\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        rows = shape[0]\n        self.series = IMPL.Series(\n            np.tile(\n                IMPL.Index([f\"i-{i}\" for i in range(rows // 10)], dtype=object).values,\n                10,\n            )\n        )\n        execute(self.series)\n\n    def time_drop_dups(self, shape):\n        execute(self.series.drop_duplicates())\n\n    def time_drop_dups_string(self, shape):\n        self.series.drop_duplicates(inplace=True)\n        execute(self.series)\n\n\nclass TimeDatetimeAccessor:\n    params = [get_benchmark_shapes(\"TimeDatetimeAccessor\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        self.series = IMPL.Series(\n            IMPL.timedelta_range(\"1 days\", periods=shape[0], freq=\"h\")\n        )\n        execute(self.series)\n\n    def time_dt_accessor(self, shape):\n        execute(self.series.dt)\n\n    def time_timedelta_days(self, shape):\n        execute(self.series.dt.days)\n\n    def time_timedelta_seconds(self, shape):\n        execute(self.series.dt.seconds)\n\n\nclass BaseCategories:\n    def setup(self, shape):\n        rows = shape[0]\n        arr = [f\"s{i:04d}\" for i in np.random.randint(0, rows // 10, size=rows)]\n        self.ts = IMPL.Series(arr).astype(\"category\")\n        execute(self.ts)\n\n\nclass TimeSetCategories(BaseCategories):\n    params = [get_benchmark_shapes(\"TimeSetCategories\")]\n    param_names = [\"shape\"]\n\n    def time_set_categories(self, shape):\n        execute(self.ts.cat.set_categories(self.ts.cat.categories[::2]))\n\n\nclass TimeRemoveCategories(BaseCategories):\n    params = [get_benchmark_shapes(\"TimeRemoveCategories\")]\n    param_names = [\"shape\"]\n\n    def time_remove_categories(self, shape):\n        execute(self.ts.cat.remove_categories(self.ts.cat.categories[::2]))\n\n\nclass BaseReshape:\n    def setup(self, shape):\n        rows, cols = shape\n        k = 10\n        arrays = [\n            np.arange(rows // k).repeat(k),\n            np.roll(np.tile(np.arange(rows // k), k), 25),\n        ]\n        index = IMPL.MultiIndex.from_arrays(arrays)\n        self.df = IMPL.DataFrame(np.random.randn(rows, cols), index=index)\n        execute(self.df)\n\n\nclass TimeStack(BaseReshape):\n    params = [get_benchmark_shapes(\"TimeStack\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        super().setup(shape)\n        self.udf = self.df.unstack(1)\n        execute(self.udf)\n\n    def time_stack(self, shape):\n        execute(self.udf.stack())\n\n\nclass TimeUnstack(BaseReshape):\n    params = [get_benchmark_shapes(\"TimeUnstack\")]\n    param_names = [\"shape\"]\n\n    def time_unstack(self, shape):\n        execute(self.df.unstack(1))\n\n\nclass TimeReplace:\n    params = [get_benchmark_shapes(\"TimeReplace\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        rows, cols = shape\n        self.to_replace = {i: getattr(IMPL, \"Timestamp\")(i) for i in range(rows)}\n        self.df = IMPL.DataFrame(np.random.randint(rows, size=(rows, cols)))\n        execute(self.df)\n\n    def time_replace(self, shape):\n        execute(self.df.replace(self.to_replace))\n\n\nclass TimeGroups:\n    params = [get_benchmark_shapes(\"TimeGroups\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        self.series = IMPL.Series(np.random.randint(0, 100, size=shape[0]))\n        execute(self.series)\n\n    # returns a pretty dict thus not calling execute\n    def time_series_groups(self, shape):\n        self.series.groupby(self.series).groups\n\n    # returns a dict thus not calling execute\n    def time_series_indices(self, shape):\n        self.series.groupby(self.series).indices\n\n\nclass TimeRepr:\n    params = [get_benchmark_shapes(\"TimeRepr\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        self.df = IMPL.DataFrame(np.random.randn(*shape))\n        execute(self.df)\n\n    # returns a string thus not calling execute\n    def time_repr(self, shape):\n        repr(self.df)\n\n\nclass TimeMaskBool:\n    params = [get_benchmark_shapes(\"TimeMaskBool\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        self.df = IMPL.DataFrame(np.random.randn(*shape))\n        self.mask = self.df < 0\n        execute(self.df), execute(self.mask)\n\n    def time_frame_mask(self, shape):\n        execute(self.df.mask(self.mask))\n\n\nclass TimeIsnull:\n    params = [get_benchmark_shapes(\"TimeIsnull\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        sample = np.array([np.nan, 1.0])\n        data = np.random.choice(sample, (shape[0], shape[1]))\n        self.df = IMPL.DataFrame(data)\n        execute(self.df)\n\n    def time_isnull(self, shape):\n        execute(IMPL.isnull(self.df))\n\n\nclass TimeDropna:\n    params = ([\"all\", \"any\"], [0, 1], get_benchmark_shapes(\"TimeDropna\"))\n    param_names = [\"how\", \"axis\", \"shape\"]\n\n    def setup(self, how, axis, shape):\n        row, col = shape\n        self.df = IMPL.DataFrame(np.random.randn(row, col))\n        self.df.iloc[row // 20 : row // 10, col // 3 : col // 2] = np.nan\n        self.df[\"foo\"] = \"bar\"\n        execute(self.df)\n\n    def time_dropna(self, how, axis, shape):\n        execute(self.df.dropna(how=how, axis=axis))\n\n\nclass TimeEquals:\n    params = [get_benchmark_shapes(\"TimeEquals\")]\n    param_names = [\"shape\"]\n\n    def setup(self, shape):\n        self.df = IMPL.DataFrame(np.random.randn(*shape))\n        self.df.iloc[-1, -1] = np.nan\n        execute(self.df)\n\n    # returns a boolean thus not calling execute\n    def time_frame_float_equal(self, shape):\n        self.df.equals(self.df)\n\n\nfrom .utils import setup  # noqa: E402, F401\n"
  },
  {
    "path": "asv_bench/benchmarks/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"IO Modin benchmarks.\"\"\"\n"
  },
  {
    "path": "asv_bench/benchmarks/io/csv.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\n\nfrom ..utils import (\n    ASV_USE_IMPL,\n    IMPL,\n    RAND_HIGH,\n    RAND_LOW,\n    execute,\n    generate_dataframe,\n    get_benchmark_shapes,\n    get_shape_id,\n    prepare_io_data,\n)\n\n\nclass BaseReadCsv:\n    # test data file should be created only once\n    def setup_cache(self, test_filename=\"io_test_file\"):\n        test_filenames = prepare_io_data(\n            test_filename, self.data_type, get_benchmark_shapes(self.__class__.__name__)\n        )\n        return test_filenames\n\n    def setup(self, test_filenames, shape, *args, **kwargs):\n        # ray init\n        if ASV_USE_IMPL == \"modin\":\n            IMPL.DataFrame([])\n        self.shape_id = get_shape_id(shape)\n\n\nclass TimeReadCsvSkiprows(BaseReadCsv):\n    shapes = get_benchmark_shapes(\"TimeReadCsvSkiprows\")\n    skiprows_mapping = {\n        \"lambda_even_rows\": lambda x: x % 2,\n        \"range_uniform\": np.arange(1, shapes[0][0] // 10),\n        \"range_step2\": np.arange(1, shapes[0][0], 2),\n    }\n    data_type = \"str_int\"\n\n    param_names = [\"shape\", \"skiprows\"]\n    params = [\n        shapes,\n        [None, \"lambda_even_rows\", \"range_uniform\", \"range_step2\"],\n    ]\n\n    def setup(self, test_filenames, shape, skiprows):\n        super().setup(test_filenames, shape, skiprows)\n        self.skiprows = self.skiprows_mapping[skiprows] if skiprows else None\n\n    def time_skiprows(self, test_filenames, shape, skiprows):\n        execute(IMPL.read_csv(test_filenames[self.shape_id], skiprows=self.skiprows))\n\n\nclass TimeReadCsvTrueFalseValues(BaseReadCsv):\n    data_type = \"true_false_int\"\n\n    param_names = [\"shape\"]\n    params = [get_benchmark_shapes(\"TimeReadCsvTrueFalseValues\")]\n\n    def time_true_false_values(self, test_filenames, shape):\n        execute(\n            IMPL.read_csv(\n                test_filenames[self.shape_id],\n                true_values=[\"Yes\", \"true\"],\n                false_values=[\"No\", \"false\"],\n            ),\n        )\n\n\nclass TimeReadCsvNamesDtype:\n    shapes = get_benchmark_shapes(\"TimeReadCsvNamesDtype\")\n    _dtypes_params = [\"Int64\", \"Int64_Timestamp\"]\n    _timestamp_columns = [\"col1\", \"col2\"]\n\n    param_names = [\"shape\", \"names\", \"dtype\"]\n    params = [\n        shapes,\n        [\"array-like\"],\n        _dtypes_params,\n    ]\n\n    def _get_file_id(self, shape, dtype):\n        return get_shape_id(shape) + dtype\n\n    def _add_timestamp_columns(self, df):\n        df = df.copy()\n        date_column = IMPL.date_range(\"2000\", periods=df.shape[0], freq=\"ms\")\n        for col in self._timestamp_columns:\n            df[col] = date_column\n        return df\n\n    def setup_cache(self, test_filename=\"io_test_file_csv_names_dtype\"):\n        # filenames with a metadata of saved dataframes\n        cache = {}\n        for shape in self.shapes:\n            for dtype in self._dtypes_params:\n                df = generate_dataframe(\n                    \"int\", *shape, RAND_LOW, RAND_HIGH, impl=\"pandas\"\n                )\n                if dtype == \"Int64_Timestamp\":\n                    df = self._add_timestamp_columns(df)\n\n                file_id = self._get_file_id(shape, dtype)\n                cache[file_id] = (\n                    f\"{test_filename}_{file_id}.csv\",\n                    df.columns.to_list(),\n                    df.dtypes.to_dict(),\n                )\n                df.to_csv(cache[file_id][0], index=False)\n        return cache\n\n    def setup(self, cache, shape, names, dtype):\n        # ray init\n        if ASV_USE_IMPL == \"modin\":\n            IMPL.DataFrame([])\n        file_id = self._get_file_id(shape, dtype)\n        self.filename, self.names, self.dtype = cache[file_id]\n\n        self.parse_dates = None\n        if dtype == \"Int64_Timestamp\":\n            # cached version of dtype should not change\n            self.dtype = self.dtype.copy()\n            for col in self._timestamp_columns:\n                del self.dtype[col]\n            self.parse_dates = self._timestamp_columns\n\n    def time_read_csv_names_dtype(self, cache, shape, names, dtype):\n        execute(\n            IMPL.read_csv(\n                self.filename,\n                names=self.names,\n                header=0,\n                dtype=self.dtype,\n                parse_dates=self.parse_dates,\n            )\n        )\n\n\nfrom ..utils import setup  # noqa: E402, F401\n"
  },
  {
    "path": "asv_bench/benchmarks/io/parquet.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom ..utils import (\n    ASV_USE_IMPL,\n    IMPL,\n    execute,\n    get_benchmark_shapes,\n    get_shape_id,\n    prepare_io_data_parquet,\n)\n\n\nclass TimeReadParquet:\n    shapes = get_benchmark_shapes(\"TimeReadParquet\")\n    data_type = \"str_int\"\n\n    param_names = [\"shape\"]\n    params = [\n        shapes,\n    ]\n\n    # test data file should be created only once\n    def setup_cache(self, test_filename=\"io_test_file\"):\n        test_filenames = prepare_io_data_parquet(\n            test_filename, self.data_type, get_benchmark_shapes(self.__class__.__name__)\n        )\n        return test_filenames\n\n    def setup(self, test_filenames, shape):\n        # ray init\n        if ASV_USE_IMPL == \"modin\":\n            IMPL.DataFrame([])\n        self.shape_id = get_shape_id(shape)\n\n    def time_read_parquet(self, test_filenames, shape):\n        execute(\n            IMPL.read_parquet(\n                test_filenames[self.shape_id],\n            )\n        )\n\n\nfrom ..utils import setup  # noqa: E402, F401\n"
  },
  {
    "path": "asv_bench/benchmarks/scalability/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Benchmarks measuring how Modin performance scales when MODIN_CPUS are changed.\"\"\"\n"
  },
  {
    "path": "asv_bench/benchmarks/scalability/scalability_benchmarks.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"These benchmarks are supposed to be run only for modin, since they do not make sense for pandas.\"\"\"\n\nimport modin.pandas as pd\n\ntry:\n    from modin.pandas.io import from_pandas\nexcept ImportError:\n    from modin.pandas.utils import from_pandas\n\ntry:\n    from modin.pandas.io import to_numpy, to_pandas\nexcept ImportError:\n    try:\n        from modin.utils import to_numpy, to_pandas\n    except ImportError:\n        # This provides compatibility with older versions of the Modin, allowing us to test old commits.\n        from modin.pandas.utils import to_pandas\n\nimport pandas\n\nfrom ..utils import (\n    RAND_HIGH,\n    RAND_LOW,\n    execute,\n    gen_data,\n    generate_dataframe,\n    get_benchmark_shapes,\n)\n\n\nclass TimeFromPandas:\n    param_names = [\"shape\", \"cpus\"]\n    params = [\n        get_benchmark_shapes(\"TimeFromPandas\"),\n        [4, 16, 32],\n    ]\n\n    def setup(self, shape, cpus):\n        self.data = pandas.DataFrame(gen_data(\"int\", *shape, RAND_LOW, RAND_HIGH))\n        from modin.config import NPartitions\n\n        NPartitions.get = lambda: cpus\n        # trigger ray init\n        pd.DataFrame([])\n\n    def time_from_pandas(self, shape, cpus):\n        execute(from_pandas(self.data))\n\n\nclass TimeToPandas:\n    param_names = [\"shape\", \"cpus\"]\n    params = [\n        get_benchmark_shapes(\"TimeToPandas\"),\n        [4, 16, 32],\n    ]\n\n    def setup(self, shape, cpus):\n        from modin.config import NPartitions\n\n        NPartitions.get = lambda: cpus\n        self.data = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH, impl=\"modin\")\n\n    def time_to_pandas(self, shape, cpus):\n        # to_pandas is already synchronous\n        to_pandas(self.data)\n\n\nclass TimeToNumPy:\n    param_names = [\"shape\", \"cpus\"]\n    params = [\n        get_benchmark_shapes(\"TimeToNumPy\"),\n        [4, 16, 32],\n    ]\n\n    def setup(self, shape, cpus):\n        from modin.config import NPartitions\n\n        NPartitions.get = lambda: cpus\n        self.data = generate_dataframe(\"int\", *shape, RAND_LOW, RAND_HIGH, impl=\"modin\")\n\n    def time_to_numpy(self, shape, cpus):\n        # to_numpy is already synchronous\n        to_numpy(self.data)\n\n\nfrom ..utils import setup  # noqa: E402, F401\n"
  },
  {
    "path": "asv_bench/benchmarks/utils/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin benchmarks utils.\"\"\"\n\nfrom .common import (\n    IMPL,\n    execute,\n    gen_data,\n    gen_nan_data,\n    generate_dataframe,\n    get_shape_id,\n    prepare_io_data,\n    prepare_io_data_parquet,\n    random_booleans,\n    random_columns,\n    random_string,\n    setup,\n    translator_groupby_ngroups,\n)\nfrom .compatibility import ASV_USE_IMPL, ASV_USE_STORAGE_FORMAT\nfrom .data_shapes import GROUPBY_NGROUPS, RAND_HIGH, RAND_LOW, get_benchmark_shapes\n\n__all__ = [\n    \"ASV_USE_IMPL\",\n    \"ASV_USE_STORAGE_FORMAT\",\n    \"RAND_LOW\",\n    \"RAND_HIGH\",\n    \"GROUPBY_NGROUPS\",\n    \"get_benchmark_shapes\",\n    \"IMPL\",\n    \"execute\",\n    \"get_shape_id\",\n    \"gen_data\",\n    \"gen_nan_data\",\n    \"generate_dataframe\",\n    \"prepare_io_data\",\n    \"prepare_io_data_parquet\",\n    \"random_string\",\n    \"random_columns\",\n    \"random_booleans\",\n    \"translator_groupby_ngroups\",\n    \"setup\",\n]\n"
  },
  {
    "path": "asv_bench/benchmarks/utils/common.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nThe module contains the functionality that is used when benchmarking Modin commits.\n\nIn the case of using utilities from the main Modin code, there is a chance that when\nbenchmarking old commits, the utilities changed, which in turn can unexpectedly affect\nthe performance results, hence some utility functions are duplicated here.\n\"\"\"\n\nimport logging\nimport uuid\nfrom typing import Optional, Union\n\nimport numpy as np\nimport pandas\n\nimport modin.pandas\n\nfrom .compatibility import ASV_DATASET_SIZE, ASV_USE_ENGINE, ASV_USE_IMPL\nfrom .data_shapes import RAND_HIGH, RAND_LOW\n\nPOSSIBLE_IMPL = {\n    \"modin\": modin.pandas,\n    \"pandas\": pandas,\n}\nIMPL = POSSIBLE_IMPL[ASV_USE_IMPL]\n\n\ndef translator_groupby_ngroups(groupby_ngroups: Union[str, int], shape: tuple) -> int:\n    \"\"\"\n    Translate a string representation of the number of groups, into a number.\n\n    Parameters\n    ----------\n    groupby_ngroups : str or int\n        Number of groups that will be used in `groupby` operation.\n    shape : tuple\n        Same as pandas.Dataframe.shape.\n\n    Returns\n    -------\n    int\n    \"\"\"\n    if ASV_DATASET_SIZE == \"big\":\n        if groupby_ngroups == \"huge_amount_groups\":\n            return min(shape[0] // 2, 5000)\n        return groupby_ngroups\n    else:\n        return groupby_ngroups\n\n\nclass weakdict(dict):  # noqa: GL08\n    __slots__ = (\"__weakref__\",)\n\n\ndata_cache = dict()\ndataframes_cache = dict()\n\n\ndef gen_nan_data(nrows: int, ncols: int) -> dict:\n    \"\"\"\n    Generate nan data with caching.\n\n    The generated data are saved in the dictionary and on a subsequent call,\n    if the keys match, saved data will be returned. Therefore, we need\n    to carefully monitor the changing of saved data and make its copy if needed.\n\n    Parameters\n    ----------\n    nrows : int\n        Number of rows.\n    ncols : int\n        Number of columns.\n\n    Returns\n    -------\n    modin.pandas.DataFrame or pandas.DataFrame or modin.pandas.Series or pandas.Series\n        DataFrame or Series with shape (nrows, ncols) or (nrows,), respectively.\n    \"\"\"\n    cache_key = (ASV_USE_IMPL, nrows, ncols)\n    if cache_key in data_cache:\n        return data_cache[cache_key]\n\n    logging.info(\"Generating nan data {} rows and {} columns\".format(nrows, ncols))\n\n    if ncols > 1:\n        columns = [f\"col{x}\" for x in range(ncols)]\n        data = IMPL.DataFrame(np.nan, index=IMPL.RangeIndex(nrows), columns=columns)\n    elif ncols == 1:\n        data = IMPL.Series(np.nan, index=IMPL.RangeIndex(nrows))\n    else:\n        assert False, \"Number of columns (ncols) should be >= 1\"\n\n    data_cache[cache_key] = data\n    return data\n\n\ndef gen_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:\n    \"\"\"\n    Generate int data.\n\n    Parameters\n    ----------\n    nrows : int\n        Number of rows.\n    ncols : int\n        Number of columns.\n    rand_low : int\n        Low bound for random generator.\n    rand_high : int\n        High bound for random generator.\n\n    Returns\n    -------\n    dict\n        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.\n    \"\"\"\n    data = {\n        \"col{}\".format(i): np.random.randint(rand_low, rand_high, size=(nrows))\n        for i in range(ncols)\n    }\n    return data\n\n\ndef gen_str_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:\n    \"\"\"\n    Generate int data and string data.\n\n    Parameters\n    ----------\n    nrows : int\n        Number of rows.\n    ncols : int\n        Number of columns.\n    rand_low : int\n        Low bound for random generator.\n    rand_high : int\n        High bound for random generator.\n\n    Returns\n    -------\n    dict\n        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.\n        One of the columns with string values.\n    \"\"\"\n    data = gen_int_data(nrows, ncols, rand_low, rand_high).copy()\n    # convert values in arbitary column to string type\n    key = list(data.keys())[0]\n    data[key] = [f\"str_{x}\" for x in data[key]]\n    return data\n\n\ndef gen_true_false_int_data(nrows, ncols, rand_low, rand_high):\n    \"\"\"\n    Generate int data and string data \"true\" and \"false\" values.\n\n    Parameters\n    ----------\n    nrows : int\n        Number of rows.\n    ncols : int\n        Number of columns.\n    rand_low : int\n        Low bound for random generator.\n    rand_high : int\n        High bound for random generator.\n\n    Returns\n    -------\n    dict\n        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.\n        One half of the columns with integer values, another half - with \"true\" and\n        \"false\" string values.\n    \"\"\"\n    data = gen_int_data(nrows // 2, ncols // 2, rand_low, rand_high)\n\n    data_true_false = {\n        \"tf_col{}\".format(i): np.random.choice(\n            [\"Yes\", \"true\", \"No\", \"false\"], size=(nrows - nrows // 2)\n        )\n        for i in range(ncols - ncols // 2)\n    }\n    data.update(data_true_false)\n    return data\n\n\ndef gen_data(\n    data_type: str,\n    nrows: int,\n    ncols: int,\n    rand_low: int,\n    rand_high: int,\n) -> dict:\n    \"\"\"\n    Generate data with caching.\n\n    The generated data are saved in the dictionary and on a subsequent call,\n    if the keys match, saved data will be returned. Therefore, we need\n    to carefully monitor the changing of saved data and make its copy if needed.\n\n    Parameters\n    ----------\n    data_type : {\"int\", \"str_int\", \"true_false_int\"}\n        Type of data generation.\n    nrows : int\n        Number of rows.\n    ncols : int\n        Number of columns.\n    rand_low : int\n        Low bound for random generator.\n    rand_high : int\n        High bound for random generator.\n\n    Returns\n    -------\n    dict\n        Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.\n\n    Notes\n    -----\n    Returned data type depends on the `data_type` parameter in the next way:\n    - `data_type`==\"int\" - all columns will be contain only integer values;\n    - `data_type`==\"str_int\" some of the columns will be of string type;\n    - `data_type`==\"true_false_int\" half of the columns will be filled with\n      string values representing \"true\" and \"false\" values and another half - with\n      integers.\n    \"\"\"\n    type_to_generator = {\n        \"int\": gen_int_data,\n        \"str_int\": gen_str_int_data,\n        \"true_false_int\": gen_true_false_int_data,\n    }\n    cache_key = (data_type, nrows, ncols, rand_low, rand_high)\n    if cache_key in data_cache:\n        return data_cache[cache_key]\n\n    logging.info(\n        \"Generating {} data {} rows and {} columns [{}-{}]\".format(\n            data_type, nrows, ncols, rand_low, rand_high\n        )\n    )\n    assert data_type in type_to_generator\n    data_generator = type_to_generator[data_type]\n\n    data = data_generator(nrows, ncols, rand_low, rand_high)\n    data_cache[cache_key] = weakdict(data)\n\n    return data\n\n\ndef generate_dataframe(\n    data_type: str,\n    nrows: int,\n    ncols: int,\n    rand_low: int,\n    rand_high: int,\n    groupby_ncols: Optional[int] = None,\n    count_groups: Optional[int] = None,\n    gen_unique_key: bool = False,\n    cache_prefix: str = None,\n    impl: str = None,\n) -> Union[modin.pandas.DataFrame, pandas.DataFrame]:\n    \"\"\"\n    Generate DataFrame with caching.\n\n    The generated dataframes are saved in the dictionary and on a subsequent call,\n    if the keys match, one of the saved dataframes will be returned. Therefore, we need\n    to carefully monitor that operations that change the dataframe work with its copy.\n\n    Parameters\n    ----------\n    data_type : str\n        Type of data generation;\n        supported types: {\"int\", \"str_int\"}.\n    nrows : int\n        Number of rows.\n    ncols : int\n        Number of columns.\n    rand_low : int\n        Low bound for random generator.\n    rand_high : int\n        High bound for random generator.\n    groupby_ncols : int, default: None\n        Number of columns for which `groupby` will be called in the future;\n        to get more stable performance results, we need to have the same number of values\n        in each group every benchmarking time.\n    count_groups : int, default: None\n        Count of groups in groupby columns.\n    gen_unique_key : bool, default: False\n        Generate `col1` column where all elements are unique.\n    cache_prefix : str, optional\n        Prefix to add to the cache key of the requested frame.\n    impl : str, optional\n        Implementation used to create the dataframe;\n        supported implemetations: {\"modin\", \"pandas\"}.\n\n    Returns\n    -------\n    modin.pandas.DataFrame or pandas.DataFrame [and list]\n\n    Notes\n    -----\n    The list of groupby columns names returns when groupby columns are generated.\n    \"\"\"\n    assert not (\n        (groupby_ncols is None) ^ (count_groups is None)\n    ), \"You must either specify both parameters 'groupby_ncols' and 'count_groups' or none of them.\"\n\n    if groupby_ncols and count_groups:\n        ncols -= groupby_ncols\n\n    if impl is None:\n        impl = ASV_USE_IMPL\n\n    cache_key = (\n        impl,\n        data_type,\n        nrows,\n        ncols,\n        rand_low,\n        rand_high,\n        groupby_ncols,\n        count_groups,\n        gen_unique_key,\n    )\n\n    if cache_prefix is not None:\n        cache_key = (cache_prefix, *cache_key)\n\n    if cache_key in dataframes_cache:\n        return dataframes_cache[cache_key]\n\n    logging.info(\n        \"Allocating {} DataFrame {}: {} rows and {} columns [{}-{}]\".format(\n            impl, data_type, nrows, ncols, rand_low, rand_high\n        )\n    )\n    data = gen_data(data_type, nrows, ncols, rand_low, rand_high)\n\n    if groupby_ncols and count_groups:\n        groupby_columns = [f\"groupby_col{x}\" for x in range(groupby_ncols)]\n        for groupby_col in groupby_columns:\n            data[groupby_col] = np.tile(np.arange(count_groups), nrows // count_groups)\n\n    if gen_unique_key:\n        data[\"col1\"] = np.arange(nrows)\n\n    df = POSSIBLE_IMPL[impl].DataFrame(data)\n\n    if groupby_ncols and count_groups:\n        dataframes_cache[cache_key] = df, groupby_columns\n        return df, groupby_columns\n\n    dataframes_cache[cache_key] = df\n    return df\n\n\ndef random_string() -> str:\n    \"\"\"\n    Create a 36-character random string.\n\n    Returns\n    -------\n    str\n    \"\"\"\n    return str(uuid.uuid4())\n\n\ndef random_columns(df_columns: list, columns_number: int) -> list:\n    \"\"\"\n    Pick sublist of random columns from a given sequence.\n\n    Parameters\n    ----------\n    df_columns : list\n        Columns to choose from.\n    columns_number : int\n        How many columns to pick.\n\n    Returns\n    -------\n    list\n    \"\"\"\n    return list(np.random.choice(df_columns, size=columns_number))\n\n\ndef random_booleans(number: int) -> list:\n    \"\"\"\n    Create random list of booleans with `number` elements.\n\n    Parameters\n    ----------\n    number : int\n        Count of booleans in result list.\n\n    Returns\n    -------\n    list\n    \"\"\"\n    return list(np.random.choice([True, False], size=number))\n\n\ndef execute(df: Union[modin.pandas.DataFrame, pandas.DataFrame]):\n    \"\"\"\n    Make sure the calculations are finished.\n\n    Parameters\n    ----------\n    df : modin.pandas.DataFrame or pandas.Datarame\n        DataFrame to be executed.\n    \"\"\"\n    if ASV_USE_IMPL == \"modin\":\n        partitions = df._query_compiler._modin_frame._partitions.flatten()\n        mgr_cls = df._query_compiler._modin_frame._partition_mgr_cls\n        if len(partitions) and hasattr(mgr_cls, \"wait_partitions\"):\n            mgr_cls.wait_partitions(partitions)\n            return\n\n        # compatibility with old Modin versions\n        all(\n            map(\n                lambda partition: partition.drain_call_queue() or True,\n                partitions,\n            )\n        )\n        if ASV_USE_ENGINE == \"ray\":\n            from ray import wait\n\n            all(map(lambda partition: wait([partition._data]), partitions))\n        elif ASV_USE_ENGINE == \"dask\":\n            from dask.distributed import wait\n\n            all(map(lambda partition: wait(partition._data), partitions))\n        elif ASV_USE_ENGINE == \"python\":\n            pass\n\n    elif ASV_USE_IMPL == \"pandas\":\n        pass\n\n\ndef get_shape_id(shape: tuple) -> str:\n    \"\"\"\n    Join shape numbers into a string with `_` delimiters.\n\n    Parameters\n    ----------\n    shape : tuple\n        Same as pandas.Dataframe.shape.\n\n    Returns\n    -------\n    str\n    \"\"\"\n    return \"_\".join([str(element) for element in shape])\n\n\ndef prepare_io_data(test_filename: str, data_type: str, shapes: list):\n    \"\"\"\n    Prepare data for IO tests with caching.\n\n    Parameters\n    ----------\n    test_filename : str\n        Unique file identifier that is used to distinguish data\n        for different tests.\n    data_type : {\"int\", \"str_int\", \"true_false_int\"}\n        Type of data generation.\n    shapes : list\n        Data shapes to prepare.\n\n    Returns\n    -------\n    test_filenames : dict\n        Dictionary that maps dataset shape to the file on disk.\n    \"\"\"\n    test_filenames = {}\n    for shape in shapes:\n        shape_id = get_shape_id(shape)\n        test_filenames[shape_id] = f\"{test_filename}_{shape_id}_{data_type}.csv\"\n        df = generate_dataframe(data_type, *shape, RAND_LOW, RAND_HIGH, impl=\"pandas\")\n        df.to_csv(test_filenames[shape_id], index=False)\n\n    return test_filenames\n\n\ndef prepare_io_data_parquet(test_filename: str, data_type: str, shapes: list):\n    \"\"\"\n    Prepare data for IO tests with caching.\n\n    Parameters\n    ----------\n    test_filename : str\n        Unique file identifier that is used to distinguish data\n        for different tests.\n    data_type : \"str_int\"\n        Type of data generation.\n    shapes : list\n        Data shapes to prepare.\n\n    Returns\n    -------\n    test_filenames : dict\n        Dictionary that maps dataset shape to the file on disk.\n    \"\"\"\n    test_filenames = {}\n    for shape in shapes:\n        shape_id = get_shape_id(shape)\n        test_filenames[shape_id] = f\"{test_filename}_{shape_id}_{data_type}.parquet\"\n        df = generate_dataframe(data_type, *shape, RAND_LOW, RAND_HIGH, impl=\"pandas\")\n        df.to_parquet(test_filenames[shape_id], index=False)\n\n    return test_filenames\n\n\ndef setup(*args, **kwargs):  # noqa: GL08\n    # This function just needs to be imported into each benchmark file to\n    # set up the random seed before each function. ASV run it automatically.\n    # https://asv.readthedocs.io/en/latest/writing_benchmarks.html\n    np.random.seed(42)\n"
  },
  {
    "path": "asv_bench/benchmarks/utils/compatibility.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Compatibility layer for parameters used by ASV.\"\"\"\n\nimport os\n\nimport modin.pandas as pd\n\ntry:\n    from modin.config import NPartitions\n\n    NPARTITIONS = NPartitions.get()\nexcept ImportError:\n    NPARTITIONS = pd.DEFAULT_NPARTITIONS\n\ntry:\n    from modin.config import AsvImplementation, Engine, StorageFormat, TestDatasetSize\n\n    ASV_USE_IMPL = AsvImplementation.get()\n    ASV_DATASET_SIZE = TestDatasetSize.get() or \"Small\"\n    ASV_USE_ENGINE = Engine.get()\n    ASV_USE_STORAGE_FORMAT = StorageFormat.get()\nexcept ImportError:\n    # The same benchmarking code can be run for different versions of Modin, so in\n    # case of an error importing important variables, we'll just use predefined values\n    ASV_USE_IMPL = os.environ.get(\"MODIN_ASV_USE_IMPL\", \"modin\")\n    ASV_DATASET_SIZE = os.environ.get(\"MODIN_TEST_DATASET_SIZE\", \"Small\")\n    ASV_USE_ENGINE = os.environ.get(\"MODIN_ENGINE\", \"Ray\")\n    ASV_USE_STORAGE_FORMAT = os.environ.get(\"MODIN_STORAGE_FORMAT\", \"Pandas\")\n\nASV_USE_IMPL = ASV_USE_IMPL.lower()\nASV_DATASET_SIZE = ASV_DATASET_SIZE.lower()\nASV_USE_ENGINE = ASV_USE_ENGINE.lower()\nASV_USE_STORAGE_FORMAT = ASV_USE_STORAGE_FORMAT.lower()\n\nassert ASV_USE_IMPL in (\"modin\", \"pandas\")\nassert ASV_DATASET_SIZE in (\"big\", \"small\")\nassert ASV_USE_ENGINE in (\"ray\", \"dask\", \"python\", \"unidist\")\nassert ASV_USE_STORAGE_FORMAT in (\"pandas\")\n"
  },
  {
    "path": "asv_bench/benchmarks/utils/data_shapes.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Define data shapes.\"\"\"\n\nimport json\nimport os\n\nfrom .compatibility import ASV_DATASET_SIZE\n\nRAND_LOW = 0\nRAND_HIGH = 100\n\nBINARY_OP_DATA_SIZE = {\n    \"big\": [\n        [[5000, 5000], [5000, 5000]],\n        # the case extremely inefficient\n        # [[20, 500_000], [10, 1_000_000]],\n        [[500_000, 20], [1_000_000, 10]],\n    ],\n    \"small\": [[[250, 250], [250, 250]], [[10_000, 20], [25_000, 10]]],\n}\nUNARY_OP_DATA_SIZE = {\n    \"big\": [\n        [5000, 5000],\n        # the case extremely inefficient\n        # [10, 1_000_000],\n        [1_000_000, 10],\n    ],\n    \"small\": [[250, 250], [10_000, 10]],\n}\nSERIES_DATA_SIZE = {\n    \"big\": [[100_000, 1]],\n    \"small\": [[10_000, 1]],\n}\nBINARY_OP_SERIES_DATA_SIZE = {\n    \"big\": [\n        [[500_000, 1], [1_000_000, 1]],\n        [[500_000, 1], [500_000, 1]],\n    ],\n    \"small\": [[[5_000, 1], [10_000, 1]]],\n}\n\n\nDEFAULT_GROUPBY_NGROUPS = {\n    \"big\": [100, \"huge_amount_groups\"],\n    \"small\": [5],\n}\nGROUPBY_NGROUPS = DEFAULT_GROUPBY_NGROUPS[ASV_DATASET_SIZE]\n\n_DEFAULT_CONFIG_T = [\n    (\n        UNARY_OP_DATA_SIZE[ASV_DATASET_SIZE],\n        [\n            # Pandas storage format benchmarks\n            \"TimeGroupByMultiColumn\",\n            \"TimeGroupByDefaultAggregations\",\n            \"TimeGroupByDictionaryAggregation\",\n            \"TimeSetItem\",\n            \"TimeInsert\",\n            \"TimeArithmetic\",\n            \"TimeSortValues\",\n            \"TimeDrop\",\n            \"TimeHead\",\n            \"TimeTail\",\n            \"TimeExplode\",\n            \"TimeFillna\",\n            \"TimeFillnaDataFrame\",\n            \"TimeValueCountsFrame\",\n            \"TimeValueCountsSeries\",\n            \"TimeIndexing\",\n            \"TimeMultiIndexing\",\n            \"TimeResetIndex\",\n            \"TimeAstype\",\n            \"TimeDescribe\",\n            \"TimeProperties\",\n            \"TimeReindex\",\n            \"TimeReindexMethod\",\n            \"TimeFillnaMethodDataframe\",\n            \"TimeDropDuplicatesDataframe\",\n            \"TimeStack\",\n            \"TimeUnstack\",\n            \"TimeRepr\",\n            \"TimeMaskBool\",\n            \"TimeIsnull\",\n            \"TimeDropna\",\n            \"TimeEquals\",\n            # IO benchmarks\n            \"TimeReadCsvSkiprows\",\n            \"TimeReadCsvTrueFalseValues\",\n            \"TimeReadCsvNamesDtype\",\n            \"TimeReadParquet\",\n            # Scalability benchmarks\n            \"TimeFromPandas\",\n            \"TimeToPandas\",\n            \"TimeToNumPy\",\n        ],\n    ),\n    (\n        BINARY_OP_DATA_SIZE[ASV_DATASET_SIZE],\n        [\n            # Pandas storage format benchmarks\n            \"TimeJoin\",\n            \"TimeMerge\",\n            \"TimeMergeDefault\",\n            \"TimeConcat\",\n            \"TimeAppend\",\n            \"TimeBinaryOp\",\n            \"TimeLevelAlign\",\n        ],\n    ),\n    (\n        SERIES_DATA_SIZE[ASV_DATASET_SIZE],\n        [\n            # Pandas storage format benchmarks\n            \"TimeFillnaSeries\",\n            \"TimeGroups\",\n            \"TimeIndexingNumericSeries\",\n            \"TimeFillnaMethodSeries\",\n            \"TimeDatetimeAccessor\",\n            \"TimeSetCategories\",\n            \"TimeRemoveCategories\",\n            \"TimeDropDuplicatesSeries\",\n        ],\n    ),\n    (\n        BINARY_OP_SERIES_DATA_SIZE[ASV_DATASET_SIZE],\n        [\n            # Pandas storage format benchmarks\n            \"TimeBinaryOpSeries\",\n        ],\n    ),\n]\n\nDEFAULT_CONFIG = {}\nDEFAULT_CONFIG[\"MergeCategoricals\"] = (\n    [[10_000, 2]] if ASV_DATASET_SIZE == \"big\" else [[1_000, 2]]\n)\nDEFAULT_CONFIG[\"TimeJoinStringIndex\"] = (\n    [[100_000, 64]] if ASV_DATASET_SIZE == \"big\" else [[1_000, 4]]\n)\nDEFAULT_CONFIG[\"TimeReplace\"] = (\n    [[10_000, 2]] if ASV_DATASET_SIZE == \"big\" else [[1_000, 2]]\n)\nfor config in (_DEFAULT_CONFIG_T,):\n    for _shape, _names in config:\n        DEFAULT_CONFIG.update({_name: _shape for _name in _names})\n\n# Correct shapes in the case when the operation ended with a timeout error\nif ASV_DATASET_SIZE == \"big\":\n    DEFAULT_CONFIG[\"TimeMergeDefault\"] = [\n        [[1000, 1000], [1000, 1000]],\n        [[500_000, 20], [1_000_000, 10]],\n    ]\n    DEFAULT_CONFIG[\"TimeLevelAlign\"] = [\n        [[2500, 2500], [2500, 2500]],\n        [[250_000, 20], [500_000, 10]],\n    ]\n    DEFAULT_CONFIG[\"TimeStack\"] = [\n        [1500, 1500],\n        [100_000, 10],\n    ]\n    DEFAULT_CONFIG[\"TimeUnstack\"] = DEFAULT_CONFIG[\"TimeStack\"]\n\nCONFIG_FROM_FILE = None\n\n\ndef get_benchmark_shapes(bench_id: str):\n    \"\"\"\n    Get custom benchmark shapes from a json file stored in MODIN_ASV_DATASIZE_CONFIG.\n\n    If `bench_id` benchmark is not found in the file, then the default value will\n    be used.\n\n    Parameters\n    ----------\n    bench_id : str\n        Unique benchmark identifier that is used to get shapes.\n\n    Returns\n    -------\n    list\n        Benchmark shapes.\n    \"\"\"\n    global CONFIG_FROM_FILE\n    if not CONFIG_FROM_FILE:\n        try:\n            from modin.config import AsvDataSizeConfig\n\n            filename = AsvDataSizeConfig.get()\n        except ImportError:\n            filename = os.environ.get(\"MODIN_ASV_DATASIZE_CONFIG\", None)\n        if filename:\n            # should be json\n            with open(filename) as _f:\n                CONFIG_FROM_FILE = json.load(_f)\n\n    if CONFIG_FROM_FILE and bench_id in CONFIG_FROM_FILE:\n        return CONFIG_FROM_FILE[bench_id]\n    return DEFAULT_CONFIG[bench_id]\n"
  },
  {
    "path": "asv_bench/test/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "asv_bench/test/test_utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom unittest.mock import Mock, mock_open, patch\n\nimport numpy as np\nimport pytest\nfrom benchmarks.utils import data_shapes, execute, get_benchmark_shapes\n\nimport modin.pandas as pd\nfrom modin.config import AsvDataSizeConfig\n\n\n@pytest.mark.parametrize(\n    \"asv_config_content, result\",\n    [\n        (\n            '{\"TimeJoin\": [[[10, 10], [15, 15]], [[11, 11], [13, 13]]], \\\n                \"TimeGroupBy\": [[11, 11], [13, 13]]}',\n            [\n                [\n                    # binary shapes\n                    [[10, 10], [15, 15]],\n                    [[11, 11], [13, 13]],\n                ],\n                [\n                    # unary shapes\n                    [11, 11],\n                    [13, 13],\n                ],\n            ],\n        ),\n    ],\n)\n@patch.object(data_shapes, \"CONFIG_FROM_FILE\", new=None)\ndef test_get_benchmark_shapes(asv_config_content, result):\n    AsvDataSizeConfig.put(\"mock_filename\")\n    with patch(\"builtins.open\", mock_open(read_data=asv_config_content)):\n        assert result[0] == get_benchmark_shapes(\"TimeJoin\")\n        assert result[1] == get_benchmark_shapes(\"TimeGroupBy\")\n\n\n@pytest.mark.parametrize(\n    \"asv_config_content, result\",\n    [\n        (\n            '{\"TimeJoin\": [[[10, 10], [15, 15]]]',\n            [[100, 100]],\n        ),\n    ],\n)\n@patch.object(data_shapes, \"CONFIG_FROM_FILE\", new=None)\ndef test_get_benchmark_shapes_default(asv_config_content, result):\n    AsvDataSizeConfig.put(None)\n    with patch.object(data_shapes, \"DEFAULT_CONFIG\", new={\"TimeJoin\": result}):\n        assert result == get_benchmark_shapes(\"TimeJoin\")\n\n\ndef test_execute():\n    df = pd.DataFrame(np.random.rand(100, 64))\n    partitions = df._query_compiler._modin_frame._partitions.flatten()\n    mgr_cls = df._query_compiler._modin_frame._partition_mgr_cls\n    with patch.object(mgr_cls, \"wait_partitions\", new=Mock()):\n        execute(df)\n        mgr_cls.wait_partitions.assert_called_once()\n        assert (mgr_cls.wait_partitions.call_args[0] == partitions).all()\n"
  },
  {
    "path": "ci/teamcity/Dockerfile.teamcity-ci",
    "content": "# Create images from this container like this (in modin repo root):\n#\n# git rev-parse HEAD > ci/teamcity/git-rev\n#\n# tar cf ci/teamcity/modin.tar .\n#\n# docker build --build-arg ENVIRONMENT=environment-dev.yml -t modin-project/teamcity-ci:${BUILD_NUMBER} -f ci/teamcity/Dockerfile.teamcity-ci ci/teamcity\n\nFROM rayproject/ray:latest\n\nARG ENVIRONMENT=environment-dev.yml\n\nADD modin.tar /modin\nADD git-rev /modin/git-rev\n\nWORKDIR /modin\nRUN sudo chown -R ray /modin\n\n# Make RUN commands use `bash --login`:\nSHELL [\"/bin/bash\", \"--login\", \"-c\"]\n\n# Initialize conda in bash config files:\nRUN conda init bash\nENV PATH /home/ray/anaconda3/envs/modin/bin:$PATH\n\nRUN conda config --set channel_priority strict\nRUN conda update python -y\nRUN conda env create -f ${ENVIRONMENT}\nRUN conda install curl PyGithub\n\n# Activate the environment, and make sure it's activated:\n# The following line also removed conda initialization from\n# ~/.bashrc so conda starts complaining that it should be\n# initialized for bash. But it is necessary to do it because\n# activation is not always executed when \"docker exec\" is used\n# and then conda initialization overwrites PATH with its base\n# environment where python doesn't have any packages installed.\nRUN echo \"conda activate modin\" > ~/.bashrc\nRUN echo \"Make sure environment is activated\"\nRUN conda list -n modin\n"
  },
  {
    "path": "ci/teamcity/build-docker.py",
    "content": "import os\nimport sys\n\n\ndef execute_command(cmd):\n    status = os.system(cmd)\n    ec = os.WEXITSTATUS(status)\n    if ec != 0:\n        raise SystemExit('Command \"{}\" failed'.format(cmd))\n\n\nif sys.platform.startswith(\"linux\"):\n    execute_command(\"git rev-parse HEAD > git-rev\")\n    execute_command(\n        \"(cd ../.. && git archive -o ci/teamcity/modin.tar $(cat ci/teamcity/git-rev))\"\n    )\n    base_image = \"ray-project/deploy\"\n    requirements = \"requirements-dev.txt\"\n    execute_command(\n        \"docker build -f Dockerfile.modin-base --build-arg BASE_IMAGE={} -t modin-project/modin-base .\".format(\n            base_image\n        )\n    )\nelse:\n    raise SystemExit(\n        \"TeamCity CI in Docker containers is supported only on Linux at the moment.\"\n    )\n\nexecute_command(\n    \"docker build -f Dockerfile.teamcity-ci --build-arg REQUIREMENTS={} -t modin-project/teamcity-ci .\".format(\n        requirements\n    )\n)\n\nif sys.platform.startswith(\"linux\"):\n    execute_command(\"rm ./modin.tar ./git-rev\")\n"
  },
  {
    "path": "ci/teamcity/comment_on_pr.py",
    "content": "\"\"\"\nPost the comment like the following to the PR:\n```\n:robot: TeamCity test results bot :robot:\n\n<Logs from pytest>\n```\n\"\"\"\n\nimport os\nimport sys\n\nfrom github import Github\n\n# Check if this is a pull request or not based on the environment variable\ntry:\n    pr_id = int(os.environ[\"GITHUB_PR_NUMBER\"].split(\"/\")[-1])\nexcept Exception:\n    sys.exit(0)\n\nengine = os.environ[\"MODIN_ENGINE\"]\n\nheader = \"\"\"<h1 align=\"center\"><img width=7% alt=\"\" src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/8/86/Teamcity_Logo.png/600px-Teamcity_Logo.png\">\n    TeamCity {} test results bot</h1>\\n\\n\"\"\".format(\n    engine.title()\n)\nif engine == \"ray\":\n    pytest_outputs = [\"ray_tests.log\"]\nelif engine == \"dask\":\n    pytest_outputs = [\"dask_tests.log\"]\nelif engine == \"python\":\n    pytest_outputs = [\"python_tests.log\"]\nelse:\n    raise Exception(\"Unknown Engine, set `MODIN_ENGINE` environment variable\")\n\nfull_comment = \"\"\n# Do not include coverage info in PR comment\nsplit_by_first = (\n    \"----------- coverage: platform linux, python 3.7.5-final-0 -----------\"\n)\nsplit_by_second = \"--------------------------------------------------------------------------------------\"\n\ntests_failed = False\nfor out in pytest_outputs:\n    content = open(out, \"r\").read()\n    full_comment += \"\".join(\n        \"\".join(\n            [\n                i.split(split_by_first)[0],\n                i.split(split_by_first)[-1].split(split_by_second)[-1],\n            ]\n        )\n        for i in content.split(\"+ python3 -m pytest \")\n    )\n    tests_failed = tests_failed or (\"FAILURES\" in full_comment)\n    if len(full_comment) > 65_000:\n        full_comment = (\n            full_comment[-65_000:] + \"\\n\\n<b>Remaining output truncated<b>\\n\\n\"\n        )\n    full_comment = \"<details><summary>Tests Logs</summary>\\n\\n\\n```\\n\" + full_comment\n    full_comment += \"\\n```\\n\\n</details>\\n\"\n\nif not tests_failed:\n    header += '<h3 align=\"center\">Tests PASSed</h3>\\n\\n'\nelse:\n    header += '<h3 align=\"center\">Tests FAILed</h3>\\n\\n'\n\nfull_comment = header + full_comment\n\ntoken = os.environ[\"GITHUB_TOKEN\"]\ng = Github(token)\nrepo = g.get_repo(\"modin-project/modin\")\n\npr = repo.get_pull(pr_id)\nif any(\n    i.user.login == \"modin-bot\"\n    and \"TeamCity {} test results bot\".format(engine).lower() in i.body.lower()\n    for i in pr.get_issue_comments()\n):\n    pr_comment_list = [\n        i\n        for i in list(pr.get_issue_comments())\n        if i.user.login == \"modin-bot\"\n        and \"TeamCity {} test results bot\".format(engine).lower() in i.body.lower()\n    ]\n    assert len(pr_comment_list) == 1, \"Too many comments from modin-bot already\"\n    pr_comment_list[0].edit(full_comment)\nelse:\n    pr.create_issue_comment(full_comment)\n"
  },
  {
    "path": "codecov.yml",
    "content": "comment: false\ncoverage:\n  status:\n    project:\n      default:\n        branches:\n          - main\n        target: 85%\n    patch:\n      default:\n        target: 30%\n"
  },
  {
    "path": "contributing/contributing.md",
    "content": "# Modin dev onboarding\n\n1. [Set up git](https://docs.github.com/en/get-started/quickstart/set-up-git)\n1. [install anaconda](https://www.anaconda.com/products/individual#macos). Once installed,\nyou should reopen your terminal to find \"(base)\" next to your prompt: ![](conda_prompt.png)\n1. [Generate an SSH key](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent) for GitHub\n1. Fork the [modin repo](https://github.com/modin-project/modin) on GitHub\n1. Clone the forked repo in a local directory of your choice: \n    ```\n    git clone ${PATH_TO_REPO}\n    ```\n    where the path can be found here: ![](clone_my_modin.png)\n4. Inside the cloned \"modin\" directory, add a remote branch called \"upstream\":\n   ```\n   git remote add upstream git@github.com:modin-project/modin.git\n   ```\n   where the upstream link comes from here: ![](clone_upstream_modin.png)\n1. Fetch the upstream branch:\n    ```\n    git fetch upstream\n    ```\n1. Set the default remote branch for your local main branch. \n    ```\n     git branch --set-upstream-to=upstream/main main\n    ```\n1. Install modin from local source code, and install all its dependencies:\n    ```\n     pip install -e \".[all]\"\n    ```\n1. Install ipython:\n    ```\n    pip install ipython\n    ```\n1. If you ever want to install modin at a release version (not the editable version from your machine): \n    ```\n    pip install modin\n    ```\n1. If you want a specific version:\n    ```\n    pip install modin==0.11\n    ```\n1. To upgrade modin to the newest available version:\n    ```\n    pip install -U modin\n    ```\n1. Now go back to local modin.\n    ```\n    pip install -e .\n    ```\n1. Try out modin in ipython:\n    ```\n    ipython\n    import modin\n    modin.__version__\n    ```\n    You should see the Modin version, which consists of the version, the last commit number, and the last commit hash.\n\n1. List Modin versions:\n    ```\n    git tag\n    ```\n\n1. Get a summary of a particular release:\n    ```\n    git tag -l --format='%(contents)' 0.11.0\n    ```\n\n1. Check out the developer requirements in `requirements-dev.txt`. Install them with:\n    ```\n    pip install -r requirements-dev.txt\n    ```\n\n1. Try a unit test:\n    ```\n    pytest modin/tests/pandas/test_concat.py\n    ```\n\n1. [Add a GPG key](https://docs.github.com/en/authentication/managing-commit-signature-verification/adding-a-new-gpg-key-to-your-github-account ) to your Modin account. Your commits need to be signed with a GPG key. For mac, you can use [Mac GPG](https://gpgtools.org/).\n\n\n1. (Optional) We recommend a few workflow settings:\n\n    1. If you use Visual Studio Code, auto-format with [black](https://black.readthedocs.io/en/stable/) every time you save changes:\n        1. Install [Microsoft's Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python)\n        1. Open your VSCode settings, in `Code -> Preferences -> Settings`.\n        1. Search for \"python formatting provider\" and select \"black\" from the dropdown menu.\n        1. Again in settings, search for \"format on save\" and enable the \"Editor: Format on Save\" option.\n    2. Add a pre-commit hook:\n        1. In your modin repository, copy [this pre-commit file](pre-commit) to `.git/hooks/pre-commit`\n        1. Every time you try to commit, git will try to run flake8 and mypy, and abort the commit if either one fails. This lets you make sure your commits pass these tests before you push to GitHub.\n        1. To bypass the pre-commit hook (e.g. if you don't want to create a pull request, or you already know your code will pass the tests), commit with the flag `--no-verify`."
  },
  {
    "path": "contributing/pre-commit",
    "content": "#!/bin/sh\n#\n# Called by \"git commit\" with no arguments.  The hook should\n# exit with non-zero status after issuing an appropriate message if\n# it wants to stop the commit.\n#\n\nset -e\n\nprintf \"running black. This script will preempt the commit if black fails.\\n\"\nblack --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py\nprintf 'black passed!\\n'\n\nprintf \"running isort. This script will preempt the commit if isort fails.\\n\"\nisort . --check-only\nprintf 'isort passed!\\n'\n\nprintf \"running flake8. This script will preempt the commit if flake8 fails.\\n\"\nflake8 modin/ asv_bench/benchmarks scripts/doc_checker.py\nprintf \"flake8 passed!\\n\"\n\nprintf \"running mypy. This script will preempt the commit if mypy fails.\\n\"\nmypy --config-file mypy.ini\nprintf \"mypy passed!\\n\"\nprintf \"pre-commit hook finished!\\n\"\n"
  },
  {
    "path": "docker/Dockerfile",
    "content": "FROM continuumio/miniconda3\n\nRUN conda install -c conda-forge psutil setproctitle\nRUN pip install modin"
  },
  {
    "path": "docs/_static/custom.js",
    "content": "document.addEventListener(\"DOMContentLoaded\", function () {\n  var script = document.createElement(\"script\");\n  script.type = \"module\";\n  script.id = \"runllm-widget-script\"\n\n  script.src = \"https://widget.runllm.com\";\n\n  script.setAttribute(\"runllm-keyboard-shortcut\", \"Mod+j\"); // cmd-j or ctrl-j to open the widget.\n  script.setAttribute(\"runllm-name\", \"Modin\");\n  script.setAttribute(\"runllm-position\", \"BOTTOM_RIGHT\");\n  script.setAttribute(\"runllm-assistant-id\", \"164\");\n\n  script.async = true;\n  document.head.appendChild(script);\n});\n"
  },
  {
    "path": "docs/_templates/layout.html",
    "content": "{% extends \"!layout.html\" %}\n  {% block footer %} {{ super() }}\n\n  <style>\n         .wy-nav-content { max-width: 65em; }\n  </style>\n\n{% endblock %}"
  },
  {
    "path": "docs/conf.py",
    "content": "# -*- coding: utf-8 -*-\n#\n# Configuration file for the Sphinx documentation builder.\n#\n# This file does only contain a selection of the most common options. For a\n# full list see the documentation:\n# http://www.sphinx-doc.org/en/stable/config\n\nimport os\n\n# -- Project information -----------------------------------------------------\nimport sys\nimport types\n\nimport ray\n\n\n# stub ray.remote to be a no-op so it doesn't shadow docstrings\ndef noop_decorator(*args, **kwargs):\n    if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):\n        # This is the case where the decorator is just @ray.remote without parameters.\n        return args[0]\n    return lambda cls_or_func: cls_or_func\n\n\nray.remote = noop_decorator\n\n# fake modules if they're missing\nfor mod_name in (\n    \"xgboost\",\n    \"unidist\",\n    \"unidist.config\",\n):\n    try:\n        __import__(mod_name)\n    except ImportError:\n        sys.modules[mod_name] = types.ModuleType(\n            mod_name, f\"fake {mod_name} for building docs\"\n        )\nif not hasattr(sys.modules[\"xgboost\"], \"Booster\"):\n    sys.modules[\"xgboost\"].Booster = type(\"Booster\", (object,), {})\nif not hasattr(sys.modules[\"unidist\"], \"remote\"):\n    sys.modules[\"unidist\"].remote = noop_decorator\nif not hasattr(sys.modules[\"unidist\"], \"core\"):\n    sys.modules[\"unidist\"].core = type(\"core\", (object,), {})\nif not hasattr(sys.modules[\"unidist\"].core, \"base\"):\n    sys.modules[\"unidist\"].core.base = type(\"base\", (object,), {})\nif not hasattr(sys.modules[\"unidist\"].core.base, \"object_ref\"):\n    sys.modules[\"unidist\"].core.base.object_ref = type(\"object_ref\", (object,), {})\nif not hasattr(sys.modules[\"unidist\"].core.base.object_ref, \"ObjectRef\"):\n    sys.modules[\"unidist\"].core.base.object_ref.ObjectRef = type(\"ObjectRef\", (object,), {})\n\nsys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), \"..\")))\nimport modin\nfrom modin.config.__main__ import export_config_help\n\nconfigs_file_path = os.path.abspath(\n    os.path.join(os.path.dirname(__file__), \"flow/modin/configs_help.csv\")\n)\n# Export configs help to create configs table in the docs/flow/modin/config.rst\nexport_config_help(configs_file_path)\n\nproject = \"Modin\"\ncopyright = \"2018-2024, Modin Developers.\"\nauthor = \"Modin contributors\"\n\n# The short X.Y version\nversion = \"{}\".format(modin.__version__)\n# The full version, including alpha/beta/rc tags\nrelease = version\n\n\n# -- General configuration ---------------------------------------------------\n\n# If your documentation needs a minimal Sphinx version, state it here.\n#\n# needs_sphinx = '1.0'\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    \"sphinx.ext.autodoc\",\n    \"sphinx.ext.napoleon\",\n    \"sphinx.ext.intersphinx\",\n    \"sphinx.ext.todo\",\n    \"sphinx.ext.mathjax\",\n    \"sphinx.ext.githubpages\",\n    \"sphinx.ext.graphviz\",\n    \"sphinxcontrib.plantuml\",\n    \"sphinx_issues\",\n]\n\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = [\"_templates\"]\n\n# The suffix(es) of source filenames.\n# You can specify multiple suffix as a list of string:\n#\n# source_suffix = ['.rst', '.md']\nsource_suffix = \".rst\"\n\n# The master toctree document.\nmaster_doc = \"index\"\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n#\n# This is also used if you do content translation via gettext catalogs.\n# Usually you set \"language\" from the command line for these cases.\nlanguage = \"en\"\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This pattern also affects html_static_path and html_extra_path .\nexclude_patterns = [\"_build\", \"Thumbs.db\", \".DS_Store\"]\nhtml_static_path = [\"_static\"]\nhtml_js_files = [\"custom.js\"]\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = \"sphinx\"\n\n\n# -- Options for HTML output -------------------------------------------------\n\n# Maps git branches to Sphinx themes\ndefault_html_theme = \"pydata_sphinx_theme\"\ncurrent_branch = \"nature\"\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n#\nhtml_theme = \"pydata_sphinx_theme\"\n\nhtml_favicon = \"img/MODIN_ver2.ico\"\n\nhtml_logo = \"img/MODIN_ver2.png\"\n\nhtml_context = {\"default_mode\": \"light\"}\n\n# Theme options are theme-specific and customize the look and feel of a theme\n# further.  For a list of options available for each theme, see the\n# documentation.\n#\nhtml_theme_options = {\n    \"navbar_end\": [\"navbar-icon-links\"],\n    \"sidebarwidth\": 270,\n    \"collapse_navigation\": False,\n    \"navigation_depth\": 4,\n    \"show_toc_level\": 2,\n    \"github_url\": \"https://github.com/modin-project/modin\",\n    \"icon_links\": [\n        {\n            \"name\": \"PyPI\",\n            \"url\": \"https://pypi.org/project/modin\",\n            \"icon\": \"fab fa-python\",\n        },\n        {\n            \"name\": \"conda-forge\",\n            \"url\": \"https://anaconda.org/conda-forge/modin\",\n            \"icon\": \"fas fa-circle-notch\",\n        },\n        {\n            \"name\": \"Join the Slack\",\n            \"url\": \"https://modin.org/slack.html\",\n            \"icon\": \"fab fa-slack\",\n        },\n        {\n            \"name\": \"Mailing List\",\n            \"url\": \"https://groups.google.com/forum/#!forum/modin-dev\",\n            \"icon\": \"fas fa-envelope-square\",\n        },\n    ],\n    \"navigation_with_keys\": True,\n}\n\n# Custom sidebar templates, must be a dictionary that maps document names\n# to template names.\n#\n# The default sidebars (for documents that don't match any pattern) are\n# defined by theme itself.  Builtin themes are using these templates by\n# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',\n# 'searchbox.html']``.\n#\n# The default pydata_sphinx_theme sidebar templates are\n# sidebar-nav-bs.html and search-field.html.\nhtml_sidebars = {}\n\nissues_github_path = \"modin-project/modin\"\n"
  },
  {
    "path": "docs/contact.rst",
    "content": "Contact\r\n=======\r\n\r\nSlack\r\n-----\r\n\r\nJoin our `Slack`_ community to connect with Modin users and contributors,\r\ndiscuss, and ask questions about all things Modin-related.\r\n\r\nMailing List\r\n------------\r\n\r\nGeneral questions, potential contributors, and ideas can be directed to the\r\n`developer mailing list`_. It is an open Google Group, so feel free to join anytime! If\r\nyou are unsure about where to ask or post something, the mailing list is a good place to\r\nask as well.\r\n\r\nIssues\r\n------\r\n\r\nBug reports and feature requests can be directed to the issues_ page of the Modin\r\nGitHub repo.\r\n\r\n.. _Slack: https://modin.org/slack.html\r\n.. _developer mailing list: https://groups.google.com/forum/#!forum/modin-dev\r\n.. _issues: https://github.com/modin-project/modin/issues\r\n"
  },
  {
    "path": "docs/development/architecture.rst",
    "content": "System Architecture\n===================\n\nIn this section, we will lay out the overall system architecture for\nModin, as well as go into detail about the component design, implementation and\nother important details. This document also contains important reference\ninformation for those interested in contributing new functionality, bugfixes\nand enhancements.\n\nHigh-Level Architectural View\n-----------------------------\nThe diagram below outlines the general layered view to the components of Modin\nwith a short description of each major section of the documentation following.\n\n\n.. image:: /img/modin_architecture.png\n   :align: center\n\nModin is logically separated into different layers that represent the hierarchy of a\ntypical Database Management System. Abstracting out each component allows us to\nindividually optimize and swap out components without affecting the rest of the system.\nWe can implement, for example, new compute kernels that are optimized for a certain type\nof data and can simply plug it in to the existing infrastructure by implementing a small\ninterface. It can still be distributed by our choice of compute engine with the\nlogic internally.\n\nSystem View\n-----------\nA top-down view of Modin’s architecture is detailed below:\n\n.. image:: /img/10000_meter.png\n   :align: center\n\nThe user - Data Scientist interacts with the Modin system by sending interactive or\nbatch commands through API and Modin executes them using various execution\nengines: Ray, Dask and MPI are currently supported.\n\nSubsystem/Container View\n------------------------\nIf we click down to the next level of details we will see that inside Modin the layered\narchitecture is implemented using several interacting components:\n\n.. image:: /img/component_view.png\n   :align: center\n\nFor the simplicity the other execution systems - Dask and MPI are omitted and only Ray execution is shown.\n\n* Dataframe subsystem is the backbone of the dataframe holding and query compilation. It is responsible for\n  dispatching the ingress/egress to the appropriate module, getting the pandas API and calling the query\n  compiler to convert calls to the internal intermediate Dataframe Algebra.\n* Data Ingress/Egress Module is working in conjunction with Dataframe and Partitions subsystem to read data\n  split into partitions and send data into the appropriate node for storing.\n* Query Planner is subsystem that translates the pandas API to intermediate Dataframe Algebra representation\n  DAG and performs an initial set of optimizations.\n* Query Executor is responsible for getting the Dataframe Algebra DAG, performing further optimizations based\n  on a selected storage format and mapping or compiling the Dataframe Algebra DAG to and actual\n  execution sequence.\n* Storage formats module is responsible for mapping the abstract operation to an actual executor call, e.g. pandas,\n  custom format.\n* Orchestration subsystem is responsible for spawning and controlling the actual execution environment for the\n  selected execution. It spawns the actual nodes, fires up the execution environment, e.g. Ray, monitors the state\n  of executors and provides telemetry\n\nComponent View\n--------------\n\nUser queries which perform data transformation, data ingress or data egress pass through the Modin components\ndetailed below. The path the query takes is mostly similar across execution systems.\n\nData Transformation\n'''''''''''''''''''\n\n.. image:: /img/generic_data_transform.svg\n   :align: center\n\nQuery Compiler\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe :ref:`Query Compiler <query_compiler_def>` receives queries from the pandas API layer. The API layer is\nresponsible for ensuring a clean input to the Query Compiler. The Query Compiler must\nhave knowledge of the compute kernels and in-memory format of the data in order to\nefficiently compile the query.\n\nThe Query Compiler is responsible for sending the compiled query to the Core Modin Dataframe.\nIn this design, the Query Compiler does not have information about where or when the\nquery will be executed, and gives the control of the partition layout to the Modin\nDataframe.\n\nIn the interest of reducing the pandas API, the Query Compiler layer closely follows the\npandas API, but cuts out a large majority of the repetition.\n\n.. _auto-switch architecture:\n\nAutomatic Engine Switching and Casting\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nQueryCompilers which are derived from QueryCompilerCaster can participate in automatic casting when\ndifferent query compilers, representing different underlying engines, are used together in a\nfunction. A relative \"cost\" of casting is used to determine which query compiler everything should\nbe moved to. Each query compiler must implement the functions, `move_to_cost`, `move_to_me_cost`, \n`max_cost` and `stay_cost` to provide information and query costs associated with different decision\npoints in cost opimization. With the exception of `max_cost` these methods need to return a \nQCCoercionCost in the range of 0-1000.\n\nThese functions have precise meanings:\n\n* `move_to_cost` is the transmission cost of moving the data, including known serialization costs\n  from the perspective of that particular compiler. Colloquially, the question being asked of the\n  query compiler is, \"What is the normalized cost of moving my data to the other engine?\"\n* `move_to_me_cost` is the execution cost for the data and operation on the proposed *destination*\n  query compiler. Since this method is called before the data has been migrated this is a class\n  method and the destination query_compiler may have very limited information on the possible cost\n  after migration. Factors that may be considered here include available memory, cpu, and the\n  unique characteristics of the engine. The question being asked is, \"If this data were moved to\n  me, what would be the normalized execution cost to perform that operation?\"\n* `stay_cost` is the execution cost on the current query compilier ( where the data is ). The question\n  asked of the query compiler is, \"If I were to keep this data on my engine, what would be the normalized\n  execution cost?\"\n* `max_cost` is the maximum cost allowed by this query compiler across all data movements. This method\n  sets a normalized upper bound for situations where multiple data frames from different engines all\n  need to move to the same engine. The value returned by this method can exceed \n  QCCoercionCost.COST_IMPOSSIBLE\n\nThere are generally two places where automatic casting is considered: When two or more DataFrames on\ndifferent engines are participating in an operation ( such as pd.concat ) or at registered functions\nfor particular engines through the `register_function_for_pre_op_switch` and \n`register_function_for_post_op_switch` methods.\n\nCore Modin Dataframe\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nAt this layer, operations can be performed lazily. Currently, Modin executes most\noperations eagerly in an attempt to behave as pandas does. Some operations, e.g.\n``transpose`` are expensive and create full copies of the data in-memory. In these\ncases, we can wait until another operation triggers computation. In the future, we plan\nto add additional query planning and laziness to Modin to ensure that queries are\nperformed efficiently.\n\nThe structure of the Core Modin Dataframe is extensible, such that any operation that could\nbe better optimized for a given execution can be overridden and optimized in that way.\n\nThis layer has a significantly reduced API from the QueryCompiler and the user-facing\nAPI. Each of these APIs represents a single way of performing a given operation or\nbehavior.\n\nCore Modin Dataframe API\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nMore documentation can be found internally in the code_. This API is not complete, but\nrepresents an overwhelming majority of operations and behaviors.\n\nThis API can be implemented by other distributed/parallel DataFrame libraries and\nplugged in to Modin as well. Create an issue_ or discuss\non our `Slack <https://modin.org/slack.html>`_ for more information!\n\nThe :doc:`Core Modin Dataframe </flow/modin/core/dataframe/base/index>` is responsible for the data layout and shuffling, partitioning,\nand serializing the tasks that get sent to each partition. Other implementations of the\nModin Dataframe interface will have to handle these as well.\n\nPartition Manager\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe Partition Manager can change the size and shape of the partitions based on the type\nof operation. For example, certain operations are complex and require access to an\nentire column or row. The Partition Manager can convert the block partitions to row\npartitions or column partitions. This gives Modin the flexibility to perform operations\nthat are difficult in row-only or column-only partitioning schemas.\n\nAnother important component of the Partition Manager is the serialization and shipment\nof compiled queries to the Partitions. It maintains metadata for the length and width of\neach partition, so when operations only need to operate on or extract a subset of the\ndata, it can ship those queries directly to the correct partition. This is particularly\nimportant for some operations in pandas which can accept different arguments and\noperations for different columns, e.g. ``fillna`` with a dictionary.\n\nThis abstraction separates the actual data movement and function application from the\nDataframe layer to keep the Core Dataframe API small and separately optimize the data\nmovement and metadata management.\n\nPartitions\n\"\"\"\"\"\"\"\"\"\"\n\nPartitions are responsible for managing a subset of the Dataframe. As mentioned\nbelow, the Dataframe is partitioned both row and column-wise. This gives Modin\nscalability in both directions and flexibility in data layout. There are a number of\noptimizations in Modin that are implemented in the partitions. Partitions are specific\nto the execution framework and in-memory format of the data, allowing Modin to\nexploit potential optimizations across both. These optimizations are explained\nfurther on the pages specific to the execution framework.\n\nExecution Engine\n''''''''''''''''\n\nThis layer performs computation on partitions of the data. The\nModin Dataframe is designed to work with `task parallel`_ frameworks, but integration with\ndata parallel frameworks should be possible with some effort.\n\nStorage Format\n''''''''''''''\n\nThe :doc:`storage format </flow/modin/core/storage_formats/index>` describes the in-memory partition type.\nThe base storage format in Modin is pandas. In the default case, the Modin Dataframe operates on partitions that contain ``pandas.DataFrame`` objects.\n\nData Ingress\n''''''''''''\n\n.. note::\n   Data ingress operations (e.g. ``read_csv``) in Modin load data from the source into\n   partitions and vice versa for data egress (e.g. ``to_csv``) operation.\n   Improved performance is achieved by reading/writing in partitions in parallel.\n\nData ingress starts with a function in the pandas API layer (e.g. ``read_csv``). Then the user's\nquery is passed to the :doc:`Factory Dispatcher </flow/modin/core/execution/dispatching>`,\nwhich defines a factory specific for the execution. The factory for execution contains an IO class\n(e.g. ``PandasOnRayIO``) whose responsibility is to perform a parallel read/write from/to a file.\nThis IO class contains class methods with interfaces and names that are similar to pandas IO functions\n(e.g. ``PandasOnRayIO.read_csv``). The IO class declares the Modin Dataframe and Query Compiler\nclasses specific for the execution engine and storage format to ensure the correct object is constructed.\nIt also declares IO methods that are mix-ins containing a combination of the engine-specific class for\ndeploying remote tasks, the class for parsing the given file format and the class handling the chunking\nof the format-specific file on the head node (see dispatcher classes implementation\n:doc:`details </flow/modin/core/io/index>`). The output from the IO class data ingress function is\na :doc:`Modin Dataframe </flow/modin/core/dataframe/pandas/dataframe>`.\n\n.. image:: /img/generic_data_ingress.svg\n   :align: center\n\nData Egress\n'''''''''''\n\nData egress operations (e.g. ``to_csv``) are similar to data ingress operations up to\nexecution-specific IO class functions construction. Data egress functions of the IO class\nare defined slightly different from data ingress functions and created only\nspecifically for the engine since partitions already have information about its storage\nformat. Using the IO class, data is exported from partitions to the target file.\n\n.. image:: /img/generic_data_egress.svg\n   :align: center\n\nSupported Execution Engines and Storage Formats\n'''''''''''''''''''''''''''''''''''''''''''''''\n\nThis is a list of execution engines and in-memory formats supported in Modin. If you\nwould like to contribute a new execution engine or in-memory format, please see the\ndocumentation page on :doc:`contributing </development/contributing>`.\n\n- :doc:`pandas on Ray </development/using_pandas_on_ray>`\n    - Uses the Ray_ execution framework.\n    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.\n    - For more information on the execution path, see the :doc:`pandas on Ray </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>` page.\n- :doc:`pandas on Dask </development/using_pandas_on_dask>`\n    - Uses the `Dask Futures`_ execution framework.\n    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.\n    - For more information on the execution path, see the :doc:`pandas on Dask </flow/modin/core/execution/dask/implementations/pandas_on_dask/index>` page.\n- :doc:`pandas on MPI </development/using_pandas_on_mpi>`\n    - Uses MPI_ through the Unidist_ execution framework.\n    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.\n    - For more information on the execution path, see the :doc:`pandas on Unidist </flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index>` page.\n- :doc:`pandas on Python </development/using_pandas_on_python>`\n    - Uses native python execution - mainly used for debugging.\n    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.\n    - For more information on the execution path, see the :doc:`pandas on Python </flow/modin/core/execution/python/implementations/pandas_on_python/index>` page.\n- pandas on Snowflake\n    - Uses the Snowpark Python library to transpile pandas API calls to SQL queries.\n    - The storage format is the custom-defined `Snowflake` format; data remains within Snowflake warehouses until retrieved by pandas API calls.\n    - For more information on pandas on Snowflake, refer to Snowflake's `documentation <https://docs.snowflake.com/en/developer-guide/snowpark/python/pandas-on-snowflake>`_ (external link).\n\n.. _directory-tree:\n\nDataFrame Partitioning\n----------------------\n\nThe Modin DataFrame architecture follows in the footsteps of modern architectures for\ndatabase and high performance matrix systems. We chose a partitioning schema that\npartitions along both columns and rows because it gives Modin flexibility and\nscalability in both the number of columns and the number of rows. The\nfollowing figure illustrates this concept.\n\n.. image:: /img/block_partitions_diagram.png\n   :align: center\n\nCurrently, the main in-memory format of each partition is a\n`pandas DataFrame`_ (:doc:`pandas storage format </flow/modin/core/storage_formats/pandas/index>`).\n\nIndex\n-----\n\nWe currently use the ``pandas.Index`` object for indexing both columns and rows. In the\nfuture, we will implement a distributed, pandas-compatible Index object in order to remove\nthis scaling limitation from the system. Most workloads will not be affected by this scalability limit\nsince it only appears when operating on more than 10's of billions of columns or rows.\n**Important note**: If you are using the\ndefault index (``pandas.RangeIndex``) there is a fixed memory overhead (~200 bytes) and\nthere will be no scalability issues with the index.\n\nAPI\n---\n\nThe API is the outer-most layer that faces users. The following classes contain Modin's implementation of the pandas API:\n\n.. toctree::\n   /flow/modin/pandas/base\n   /flow/modin/pandas/dataframe\n   /flow/modin/pandas/series\n\nModule/Class View\n-----------------\n\nModin's modules layout is shown below. Click on the links to deep dive into Modin's internal implementation\ndetails. The documentation covers most modules, with more docs being added everyday!\n\n.. parsed-literal::\n   ├───.github\n   ├───asv_bench\n   ├───ci\n   ├───docker\n   ├───docs\n   ├───examples\n   ├───modin\n   │   ├─── :doc:`config </flow/modin/config>`\n   |   ├─── :doc:`utils </flow/modin/utils>`\n   │   ├───core\n   │   │   ├─── :doc:`dataframe </flow/modin/core/dataframe/index>`\n   │   │   │   ├─── :doc:`algebra </flow/modin/core/dataframe/algebra>`\n   │   │   │   ├─── :doc:`base </flow/modin/core/dataframe/base/index>`\n   │   │   │   └─── :doc:`pandas </flow/modin/core/dataframe/pandas/index>`\n   │   │   ├───execution\n   │   │   │   ├───dask\n   │   │   │   │   ├───common\n   │   │   │   │   └───implementations\n   │   │   │   │       └─── :doc:`pandas_on_dask </flow/modin/core/execution/dask/implementations/pandas_on_dask/index>`\n   │   │   │   ├─── :doc:`dispatching </flow/modin/core/execution/dispatching>`\n   │   │   │   ├───python\n   │   │   │   │   └───implementations\n   │   │   │   │       └─── :doc:`pandas_on_python </flow/modin/core/execution/python/implementations/pandas_on_python/index>`\n   │   │   │   ├───ray\n   │   │   │   │   ├───common\n   │   │   │   │   ├─── :doc:`generic </flow/modin/core/execution/ray/generic>`\n   │   │   │   │   └───implementations\n   │   │   │   │       └─── :doc:`pandas_on_ray </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>`\n   │   │   │   └───unidist\n   │   │   │       ├───common\n   │   │   │       ├─── :doc:`generic </flow/modin/core/execution/unidist/generic>`\n   │   │   │       └───implementations\n   │   │   │           └─── :doc:`pandas_on_unidist </flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index>`\n   │   │   ├─── :doc:`io </flow/modin/core/io/index>`\n   │   │   └─── :doc:`storage_formats </flow/modin/core/storage_formats/index>`\n   │   │       ├─── :doc:`base </flow/modin/core/storage_formats/base/query_compiler>`\n   │   │       └─── :doc:`pandas </flow/modin/core/storage_formats/pandas/index>`\n   │   ├───distributed\n   │   │   ├───dataframe\n   │   │   │   └─── :doc:`pandas </flow/modin/distributed/dataframe/pandas>`\n   │   ├─── :doc:`experimental </flow/modin/experimental/index>`\n   │   │   ├───core\n   |   |   |   └─── :doc:`io </flow/modin/experimental/core/io/index>`\n   │   │   ├─── :doc:`pandas </flow/modin/experimental/pandas>`\n   │   │   ├─── :doc:`sklearn </flow/modin/experimental/sklearn>`\n   │   │   ├───spreadsheet\n   │   │   ├─── :doc:`xgboost </flow/modin/experimental/xgboost>`\n   │   │   └─── :doc:`batch </flow/modin/experimental/batch>`\n   │   └───pandas\n   │       ├─── :doc:`dataframe </flow/modin/pandas/dataframe>`\n   │       └─── :doc:`series </flow/modin/pandas/series>`\n   ├───requirements\n   ├───scripts\n   └───stress_tests\n\n.. _pandas Dataframe: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html\n.. _Ray: https://github.com/ray-project/ray\n.. _Unidist: https://github.com/modin-project/unidist\n.. _MPI: https://www.mpi-forum.org/\n.. _code: https://github.com/modin-project/modin/blob/main/modin/core/dataframe\n.. _Dask: https://github.com/dask/dask\n.. _Dask Futures: https://docs.dask.org/en/latest/futures.html\n.. _issue: https://github.com/modin-project/modin/issues\n.. _task parallel: https://en.wikipedia.org/wiki/Task_parallelism\n.. _experimental features: /usage_guide/advanced_usage/index.html\n"
  },
  {
    "path": "docs/development/contributing.rst",
    "content": "Contributing\n============\n\nGetting Started\n---------------\n\nIf you're interested in getting involved in the development of Modin, but aren't sure\nwhere start, take a look at the issues tagged `Good first issue`_ or Documentation_.\nThese are issues that would be good for getting familiar with the codebase and better\nunderstanding some of the more complex components of the architecture. There is\ndocumentation here about the :doc:`architecture </development/architecture>` that you will\nwant to review in order to get started.\n\nAlso, feel free to join the discussions on the `developer mailing list`_.\n\nIf you want a quick guide to getting your development environment setup, please\nuse `the contributing instructions on GitHub`_.\n\nCertificate of Origin\n---------------------\n\nTo keep a clear track of who did what, we use a `sign-off` procedure (same requirements\nfor using the signed-off-by process as the Linux kernel has\nhttps://www.kernel.org/doc/html/v4.17/process/submitting-patches.html) on patches or pull\nrequests that are being sent. The sign-off is a simple line at the end of the explanation\nfor the patch, which certifies that you wrote it or otherwise have the right to pass it\non as an open-source patch. The rules are pretty simple: if you can certify the below:\n\nCERTIFICATE OF ORIGIN V 1.1\n^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\"By making a contribution to this project, I certify that:\n\n1.) The contribution was created in whole or in part by me and I have the right to\nsubmit it under the open source license indicated in the file; or\n2.) The contribution is based upon previous work that, to the best of my knowledge, is\ncovered under an appropriate open source license and I have the right under that license\nto submit that work with modifications, whether created in whole or in part by me, under\nthe same open source license (unless I am permitted to submit under a different\nlicense), as indicated in the file; or\n3.) The contribution was provided directly to me by some other person who certified (a),\n(b) or (c) and I have not modified it.\n4.) I understand and agree that this project and the contribution are public and that a\nrecord of the contribution (including all personal information I submit with it,\nincluding my sign-off) is maintained indefinitely and may be redistributed consistent\nwith this project or the open source license(s) involved.\"\n\n\n.. code-block:: bash\n\n   This is my commit message\n\n   Signed-off-by: Awesome Developer <developer@example.org>\n\n\nCode without a proper signoff cannot be merged into the\nmain branch. Note: You must use your real name (sorry, no pseudonyms or anonymous\ncontributions.)\n\nThe text can either be manually added to your commit body, or you can add either ``-s``\nor ``--signoff`` to your usual ``git commit`` commands:\n\n\n\n.. code-block:: bash\n\n   git commit --signoff -m \"This is my commit message\"\n   git commit -s -m \"This is my commit message\"\n\nThis will use your default git configuration which is found in .git/config. To change\nthis, you can use the following commands:\n\n.. code-block:: bash\n\n   git config --global user.name \"Awesome Developer\"\n   git config --global user.email \"awesome.developer.@example.org\"\n\nIf you have authored a commit that is missing the signed-off-by line, you can amend your\ncommits and push them to GitHub.\n\n.. code-block:: bash\n\n   git commit --amend --signoff\n\nIf you've pushed your changes to GitHub already you'll need to force push your branch\nafter this with ``git push -f``.\n\nCommit Message formatting\n-------------------------\nWe request that your first commit follow a particular format, and we\n**require** that your PR title follow the format. The format is:\n\n.. code-block:: bash\n\n    FEAT-#9999: Add `DataFrame.rolling` functionality, to enable rolling window operations\n\nThe ``FEAT`` component represents the type of commit. This component of the commit\nmessage can be one of the following:\n\n* FEAT: A new feature that is added\n* DOCS: Documentation improvements or updates\n* FIX: A bugfix contribution\n* REFACTOR: Moving or removing code without change in functionality\n* TEST: Test updates or improvements\n* PERF: Performance enhancements\n\nThe ``#9999`` component of the commit message should be the issue number in the Modin\nGitHub issue tracker: https://github.com/modin-project/modin/issues. This is important\nbecause it links commits to their issues.\n\nThe commit message should follow a colon (:) and be descriptive and succinct.\n\nA Modin CI job on GitHub will enforce that your pull request title follows the\nformat we suggest. Note that if you update the PR title, you have to push\nanother commit (even if it's empty) or amend your last commit for the job to\npick up the new PR title. Re-running the job in Github Actions won't work.\n\nGeneral Rules for committers\n----------------------------\n\n- Try to write a PR name as descriptive as possible.\n- Try to keep PRs as small as possible. One PR should be making one semantically atomic change.\n- Don't merge your own PRs even if you are technically able to do it.\n\nDevelopment Dependencies\n------------------------\n\nWe recommend doing development in a virtualenv or conda environment, though this decision\nis ultimately yours. You will want to run the following in order to install all of the required\ndependencies for running the tests and formatting the code:\n\n.. code-block:: bash\n\n  conda env create --file environment-dev.yml\n  # or\n  pip install -r requirements-dev.txt\n\nCode Formatting and Lint\n------------------------\n\nWe use black_ for code formatting. Before you submit a pull request, please make sure\nthat you run the following from the project root:\n\n.. code-block:: bash\n\n  black modin/ asv_bench/benchmarks scripts/doc_checker.py\n\nWe also use flake8_ to check linting errors. Running the following from the project root\nwill ensure that it passes the lint checks on Github Actions:\n\n.. code-block:: bash\n\n  flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py\n\nWe test that this has been run on our `Github Actions`_ test suite. If you do this and find\nthat the tests are still failing, try updating your version of black and flake8.\n\nAdding a test\n-------------\n\nIf you find yourself fixing a bug or adding a new feature, don't forget to add a test to\nthe test suite to verify its correctness! More on testing and the layout of the tests\ncan be found in our testing documentation. We ask that you follow the existing\nstructure of the tests for ease of maintenance.\n\nRunning the tests\n-----------------\n\nTo run the entire test suite, run the following from the project root:\n\n.. code-block:: bash\n\n  pytest modin/pandas/test\n\nThe test suite is very large, and may take a long time if you run every test. If you've\nonly modified a small amount of code, it may be sufficient to run a single test or some\nsubset of the test suite. In order to run a specific test run:\n\n.. code-block:: bash\n\n  pytest modin/pandas/test::test_new_functionality\n\nThe entire test suite is automatically run for each pull request.\n\nPerformance measurement\n-----------------------\n\nWe use Asv_ tool for performance tracking of various Modin functionality. The results\ncan be viewed here: `Asv dashboard`_.\n\nMore information can be found in the `Asv readme`_.\n\n\nBuilding documentation\n----------------------\n\nTo build the documentation, please follow the steps below from the project root:\n\n.. code-block:: bash\n\n    pip install -r docs/requirements-doc.txt\n    sphinx-build -b html docs docs/build\n\nTo visualize the documentation locally, run the following from `build` folder:\n\n.. code-block:: bash\n\n    python -m http.server <port>\n    # python -m http.server 1234\n\nthen open the browser at `0.0.0.0:<port>` (e.g. `0.0.0.0:1234`).\n\nContributing a new execution framework or in-memory format\n----------------------------------------------------------\n\nIf you are interested in contributing support for a new execution framework or in-memory\nformat, please make sure you understand the :doc:`architecture </development/architecture>` of Modin.\n\nThe best place to start the discussion for adding a new execution framework or in-memory\nformat is the `developer mailing list`_.\n\nMore docs on this coming soon...\n\n.. _Good first issue: https://github.com/modin-project/modin/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue+%3Abeginner%3A%22\n.. _Documentation: https://github.com/modin-project/modin/issues?q=is%3Aissue+is%3Aopen+label%3A%22documentation+%3Abookmark_tabs%3A%22\n.. _black: https://github.com/ambv/black\n.. _flake8: http://flake8.pycqa.org/en/latest/\n.. _Github Actions: https://github.com/features/actions\n.. _Asv: https://github.com/airspeed-velocity/asv#airspeed-velocity\n.. _developer mailing list: https://groups.google.com/forum/#!forum/modin-dev\n.. _Asv dashboard: https://modin.org/modin-bench/#/\n.. _Asv readme: https://github.com/modin-project/modin/blob/main/asv_bench/README.md\n.. _the contributing instructions on GitHub: https://github.com/modin-project/modin/blob/main/contributing/contributing.md"
  },
  {
    "path": "docs/development/index.rst",
    "content": "Development\n===========\n\n.. toctree::\n    :maxdepth: 4\n\n    contributing\n    architecture\n    partition_api\n    using_pandas_on_ray\n    using_pandas_on_dask\n    using_pandas_on_python\n    using_pandas_on_mpi\n\n.. meta::\n    :description lang=en:\n        Development-specific documentation.\n"
  },
  {
    "path": "docs/development/partition_api.rst",
    "content": "Partition API in Modin\n======================\n\nWhen you are working with a :py:class:`~modin.pandas.dataframe.DataFrame`, you can unwrap its remote partitions\nto get the raw futures objects compatible with the execution engine (e.g. ``ray.ObjectRef`` for Ray).\nIn addition to unwrapping of the remote partitions we also provide an API to construct a ``modin.pandas.DataFrame``\nfrom raw futures objects.\n\nPartition IPs\n-------------\nFor finer grained placement control, Modin also provides an API to get the IP addresses of the nodes that hold each partition.\nYou can pass the partitions having needed IPs to your function. It can help with minimizing of data movement between nodes.\n\nPartition API implementations\n-----------------------------\nBy default, a :py:class:`~modin.pandas.dataframe.DataFrame` stores underlying partitions as ``pandas.DataFrame`` objects.\nYou can find the specific implementation of Modin's Partition Interface in :doc:`pandas Partition API </flow/modin/distributed/dataframe/pandas>`.\n\n.. toctree::\n  :hidden:\n\n  /flow/modin/distributed/dataframe/pandas\n\nRay engine\n----------\nHowever, it is worth noting that for Modin on ``Ray`` engine with ``pandas`` in-memory format IPs of the remote partitions may not match\nactual locations if the partitions are lower than 100 kB. Ray saves such objects (<= 100 kB, by default) in in-process store\nof the calling process (please, refer to `Ray documentation`_ for more information). We can't get IPs for such objects while maintaining good performance.\nSo, you should keep in mind this for unwrapping of the remote partitions with their IPs. Several options are provided to handle the case in\n``How to handle Ray objects that are lower 100 kB`` section.\n\nDask engine\n-----------\nThere is no mentioned above issue for Modin on ``Dask`` engine with ``pandas`` in-memory format because ``Dask`` saves any objects\nin the worker process that processes a function (please, refer to `Dask documentation`_ for more information).\n\nUnidist engine\n--------------\nCurrently, Modin only supports MPI through unidist. There is no mentioned above issue for\nModin on ``Unidist`` engine using ``MPI`` backend with ``pandas`` in-memory format\nbecause ``Unidist`` saves any objects in the MPI worker process that processes a function\n(please, refer to `Unidist documentation`_ for more information).\n\nHow to handle Ray objects that are lower than 100 kB\n----------------------------------------------------\n\n* If you are sure that each of the remote partitions being unwrapped is higher than 100 kB, you can just import Modin or perform ``ray.init()`` manually.\n\n* If you don't know partition sizes you can pass the option ``_system_config={\"max_direct_call_object_size\": <nbytes>,}``, where ``nbytes`` is threshold for objects that will be stored in in-process store, to ``ray.init()``.\n\n* You can also start Ray as follows: ``ray start --head --system-config='{\"max_direct_call_object_size\":<nbytes>}'``.\n\nNote that when specifying the threshold the performance of some Modin operations may change.\n\n.. _`Ray documentation`: https://docs.ray.io/en/master/index.html#\n.. _`Dask documentation`: https://distributed.dask.org/en/latest/index.html\n.. _`Unidist documentation`: https://unidist.readthedocs.io/en/latest/index.html\n"
  },
  {
    "path": "docs/development/using_pandas_on_dask.rst",
    "content": "pandas on Dask\n==============\n\nThis section describes usage related documents for the pandas on Dask component of Modin.\n\nModin uses pandas as a primary memory format of the underlying partitions and optimizes queries\ningested from the API layer in a specific way to this format. Thus, there is no need to care of choosing it\nbut you can explicitly specify it anyway as shown below.\n\nOne of the execution engines that Modin uses is Dask. To enable the pandas on Dask execution you should set the following environment variables:\n\n.. code-block:: bash\n\n   export MODIN_ENGINE=dask\n   export MODIN_STORAGE_FORMAT=pandas\n\nor turn them on in source code:\n\n.. code-block:: python\n\n   import modin.config as cfg\n   cfg.Engine.put('dask')\n   cfg.StorageFormat.put('pandas')\n\nUsing Modin on Dask locally\n---------------------------\n\nIf you want to run Modin on Dask locally using a single node, just set Modin engine to ``Dask`` and \ncontinue working with a Modin DataFrame as if it was a pandas DataFrame.\nYou can either initialize a Dask client on your own and Modin connects to the existing Dask cluster or\nallow Modin itself to initialize a Dask client.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import modin.config as modin_cfg\n\n  modin_cfg.Engine.put(\"dask\")\n  df = pd.DataFrame(...)\n\nUsing Modin on Dask in a Cluster\n--------------------------------\n\nIf you want to run Modin on Dask in a cluster, you should set up a Dask cluster and initialize a Dask client.\nOnce the Dask client is initialized, Modin will be able to connect to it and use the Dask cluster.\n\n.. code-block:: python\n\n  from distributed import Client\n  import modin.pandas as pd\n  import modin.config as modin_cfg\n  \n  # Define your cluster here\n  cluster = ...\n  client = Client(cluster)\n\n  modin_cfg.Engine.put(\"dask\")\n  df = pd.DataFrame(...)\n\nTo get more information on how to deploy and run a Dask cluster, visit the `Deploy Dask Clusters`_ page.\n\nConversion between Modin DataFrame and Dask DataFrame\n-----------------------------------------------------\n\nModin DataFrame can be converted to/from Dask DataFrame with no-copy partition conversion.\nThis allows you to take advantage of both Modin and Dask libraries for maximum performance.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import modin.config as modin_cfg\n  from modin.pandas.io import to_dask, from_dask\n\n  modin_cfg.Engine.put(\"dask\")\n  df = pd.DataFrame(...)\n\n  # Convert Modin to Dask DataFrame\n  dask_df = to_dask(df)\n  \n  # Convert Dask to Modin DataFrame\n  modin_df = from_dask(dask_df)\n\n.. _Deploy Dask Clusters: https://docs.dask.org/en/stable/deploying.html\n"
  },
  {
    "path": "docs/development/using_pandas_on_mpi.rst",
    "content": "pandas on MPI through unidist\n=============================\n\nThis section describes usage related documents for the pandas on MPI through unidist component of Modin.\n\nModin uses pandas as a primary memory format of the underlying partitions and optimizes queries\ningested from the API layer in a specific way to this format. Thus, there is no need to care of choosing it\nbut you can explicitly specify it anyway as shown below.\n\nOne of the execution engines that Modin uses is MPI through unidist.\nTo enable the pandas on MPI through unidist execution you should set the following environment variables:\n\n.. code-block:: bash\n\n   export MODIN_ENGINE=unidist\n   export MODIN_STORAGE_FORMAT=pandas\n   export UNIDIST_BACKEND=mpi\n\nor turn it on in source code:\n\n.. code-block:: python\n\n   import modin.config as modin_cfg\n   import unidist.config as unidist_cfg\n\n   modin_cfg.Engine.put('unidist')\n   modin_cfg.StorageFormat.put('pandas')\n   unidist_cfg.Backend.put('mpi')\n\nTo run a python application you should use ``mpiexec -n 1 python <script.py>`` command.\n\n.. code-block:: bash\n\n   mpiexec -n 1 python script.py\n\nFor more information on how to run a python application with unidist on MPI backend\nplease refer to `Unidist on MPI`_ section of the unidist documentation.\n\nAs of unidist 0.5.0 there is support for a shared object store for MPI backend.\nThe feature allows to improve performance in the workloads,\nwhere workers use same data multiple times by reducing data copies.\nYou can enable the feature by setting the following environment variable:\n\n.. code-block:: bash\n\n   export UNIDIST_MPI_SHARED_OBJECT_STORE=True\n\nor turn it on in source code:\n\n.. code-block:: python\n\n   import unidist.config as unidist_cfg\n\n   unidist_cfg.MpiSharedObjectStore.put(True)\n\n.. _`Unidist on MPI`: https://unidist.readthedocs.io/en/latest/using_unidist/unidist_on_mpi.html"
  },
  {
    "path": "docs/development/using_pandas_on_python.rst",
    "content": "pandas on Python\n================\n\nThis section describes usage related documents for the pandas on Python component of Modin.\n\nModin uses pandas as the primary memory format of the underlying partitions and optimizes queries\nfrom the API layer in a specific way to this format. Since it is a default, you do not need to specify\nthe pandas memory format, but we show how to explicitly set it below.\n\nOne of the execution engines that Modin uses is Python. This engine is sequential and used for debugging.\nTo enable the pandas on Python execution you should set the following environment variables:\n\n.. code-block:: bash\n\n   export MODIN_ENGINE=python\n   export MODIN_STORAGE_FORMAT=pandas\n\nor turn a debug mode on:\n\n.. code-block:: bash\n\n   export MODIN_DEBUG=True\n   export MODIN_STORAGE_FORMAT=pandas\n\nor do the same in source code:\n\n.. code-block:: python\n\n   import modin.config as cfg\n   cfg.Engine.put('python')\n   cfg.StorageFormat.put('pandas')\n\n.. code-block:: python\n\n   import modin.config as cfg\n   cfg.IsDebug.put(True)\n   cfg.StorageFormat.put('pandas')"
  },
  {
    "path": "docs/development/using_pandas_on_ray.rst",
    "content": "pandas on Ray\n=============\n\nThis section describes usage related documents for the pandas on Ray component of Modin.\n\nModin uses pandas as a primary memory format of the underlying partitions and optimizes queries\ningested from the API layer in a specific way to this format. Thus, there is no need to care of choosing it\nbut you can explicitly specify it anyway as shown below.\n\nOne of the execution engines that Modin uses is Ray. If you have Ray installed in your system,\nModin also uses it by default to distribute computations.\n\nIf you want to be explicit, you could set the following environment variables:\n\n.. code-block:: bash\n\n   export MODIN_ENGINE=ray\n   export MODIN_STORAGE_FORMAT=pandas\n\nor turn it on in source code:\n\n.. code-block:: python\n\n   import modin.config as cfg\n   cfg.Engine.put('ray')\n   cfg.StorageFormat.put('pandas')\n"
  },
  {
    "path": "docs/ecosystem.rst",
    "content": "Ecosystem\n=========\n\nThere is a constantly growing number of users and packages using pandas\nto address their specific needs in data preparation, analysis and visualization.\npandas is being used ubiquitously and is a good choise to handle small-sized data.\nHowever, pandas scales poorly and is non-interactive on moderate to large datasets.\nModin provides a drop-in replacement API for pandas and scales computation across nodes and\nCPUs available. What you need to do to switch to Modin is just replace a single line of code.\n\n.. code-block:: python\n\n    # import pandas as pd\n    import modin.pandas as pd\n\nWhile most packages can consume a pandas DataFrame and operate it efficiently,\nthis is not the case with a Modin DataFrame due to its distributed nature.\nThus, some packages may lack support for handling Modin DataFrame(s) correctly and,\nmoreover, efficiently. Modin implements such methods as ``__array__``, ``__dataframe__``, etc.\nto facilitate other libraries to consume a Modin DataFrame. If you feel that a certain library\ncan operate efficiently with a specific format of data, it is possible to convert a Modin DataFrame\nto the format preferred.\n\nto_pandas\n---------\n\nYou can refer to `pandas ecosystem`_ page to get more details on\nwhere pandas can be used and what libraries it powers.\n\n.. code-block:: python\n\n    from modin.pandas.io import to_pandas\n\n    pandas_df = to_pandas(modin_df)\n\nto_numpy\n--------\n\nYou can refer to `NumPy ecosystem`_ section of NumPy documentation to get more details on\nwhere NumPy can be used and what libraries it powers.\n\n.. code-block:: python\n\n    from modin.pandas.io import to_numpy\n\n    numpy_arr = to_numpy(modin_df)\n\nto_ray\n------\n\nYou can refer to `Ray Data`_ page to get more details on\nwhere Ray Dataset can be used and what libraries it powers.\n\n.. code-block:: python\n\n    from modin.pandas.io import to_ray\n\n    ray_dataset = to_ray(modin_df)\n\nto_dask\n-------\n\nYou can refer to `Dask DataFrame`_ page to get more details on\nwhere Dask DataFrame can be used and what libraries it powers.\n\n.. code-block:: python\n\n    from modin.pandas.io import to_dask\n\n    dask_df = to_dask(modin_df)\n\n.. _pandas ecosystem: https://pandas.pydata.org/community/ecosystem.html\n.. _NumPy ecosystem: https://numpy.org\n.. _Ray Data: https://docs.ray.io/en/latest/data/data.html\n.. _Dask DataFrame: https://docs.dask.org/en/stable/dataframe.html\n\n"
  },
  {
    "path": "docs/flow/modin/config.rst",
    "content": ":orphan:\n\nModin Configuration Settings\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nTo adjust Modin's default behavior, you can set the value of Modin\nconfigs by setting an environment variable or by using the\n``modin.config`` API. To list all available configs in Modin, please\nrun ``python -m modin.config`` to print all\nModin configs with descriptions.\n\nPublic API\n''''''''''\n\nPotentially, the source of configs can be any, but for now only environment\nvariables are implemented. Any environment variable originate from\n:class:`~modin.config.envvars.EnvironmentVariable`, which contains most of\nthe config API implementation.\n\n.. autoclass:: modin.config.envvars.EnvironmentVariable\n  :members: get, put, get_help, get_value_source, once, subscribe\n\nModin Configs List\n''''''''''''''''''\n\n.. csv-table::\n   :file: configs_help.csv\n   :header-rows: 1\n\nUsage Guide\n'''''''''''\n\nSee example of interaction with Modin configs below, as it can be seen config\nvalue can be set either by setting the environment variable or by using config\nAPI.\n\n.. code-block:: python\n\n    import os\n\n    # Setting `MODIN_ENGINE` environment variable.\n    # Also can be set outside the script.\n    os.environ[\"MODIN_ENGINE\"] = \"Dask\"\n\n    import modin.config\n    import modin.pandas as pd\n\n    # Checking initially set `Engine` config,\n    # which corresponds to `MODIN_ENGINE` environment\n    # variable\n    print(modin.config.Engine.get()) # prints 'Dask'\n\n    # Checking default value of `NPartitions`\n    print(modin.config.NPartitions.get()) # prints '8'\n\n    # Changing value of `NPartitions`\n    modin.config.NPartitions.put(16)\n    print(modin.config.NPartitions.get()) # prints '16'\n\nOne can also use config variables with a context manager in order to use\nsome config only for a certain part of the code:\n\n.. code-block:: python\n\n    import modin.config as cfg\n\n    # Default value for this config is 'False'\n    print(cfg.RangePartitioning.get()) # False\n\n    # Set the config to 'True' inside of the context-manager\n    with cfg.context(RangePartitioning=True):\n        print(cfg.RangePartitioning.get()) # True\n        df.merge(...) # will use range-partitioning impl\n\n    # Once the context is over, the config gets back to its previous value\n    print(cfg.RangePartitioning.get()) # False\n\n    # You can also set multiple config at once when you pass a dictionary to 'cfg.context'\n    print(cfg.AsyncReadMode.get()) # False\n\n    with cfg.context(RangePartitioning=True, AsyncReadMode=True):\n        print(cfg.RangePartitioning.get()) # True\n        print(cfg.AsyncReadMode.get()) # True\n    print(cfg.RangePartitioning.get()) # False\n    print(cfg.AsyncReadMode.get()) # False\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/algebra.rst",
    "content": ":orphan:\n\nOperators Module Description\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nBrief description\n'''''''''''''''''\nMost of the functions that are evaluated by `QueryCompiler` can be categorized into\none of the patterns: Map, TreeReduce, Binary, Reduce, etc., called core operators. The ``modin.core.dataframe.algebra``\nmodule provides templates to easily build such types of functions. These templates\nare supposed to be used at the `QueryCompiler` level since each built function accepts\nand returns `QueryCompiler`.\n\nHigh-Level Module Overview\n''''''''''''''''''''''''''\nEach template class implements a\n``register`` method, which takes functions to apply and\ninstantiate the related template. Functions that are passed to ``register`` will be executed\nagainst converted to pandas and preprocessed in a template-specific way partition, so the function\nwould take one of the pandas object: ``pandas.DataFrame``, ``pandas.Series`` or ``pandas.DataFrameGroupbyObject``.\n\n.. note:: \n    Currently, functions that are built in that way are supported only in a pandas\n    storage format (i.e. can be used only in `PandasQueryCompiler`).\n\nAlgebra module provides templates for this type of function:\n\nMap operator\n-------------\nUniformly apply a function argument to each partition in parallel. \n**Note**: map function should not change the shape of the partitions.\n\n.. figure:: /img/map_evaluation.svg\n    :align: center\n\nThis operator performs best when the number of partitions equals to the number of CPUs\nso that each single partition gets processed in parallel. When the number of partitions is 1.5x greater than\nthe number of CPUs, Modin applies a heuristic to join some partitions to get \"ideal\" partitioning so that\neach new partition gets processed in parallel.\n\nReduce operator\n---------------\nApplies an argument function that reduces each column or row on the specified axis into a scalar, but requires knowledge about the whole axis.\nBe aware that providing this knowledge may be expensive because the execution engine has to\nconcatenate partitions along the specified axis. Also, note that the execution engine expects\nthat the reduce function returns a one dimensional frame.\n\n.. figure:: /img/reduce_evaluation.svg\n    :align: center\n\nThis operator performs best when the number of partitions (row or column partitions in depend on the specified axis)\nequals to the number of CPUs so that each single axis partition gets processed in parallel.\n\nTreeReduce operator\n-------------------\nApplies an argument function that reduces specified axis into a scalar. First applies map function to each partition\nin parallel, then concatenates resulted partitions along the specified axis and applies reduce\nfunction. In contrast with `Map function` template, here you're allowed to change partition shape\nin the map phase. Note that the execution engine expects that the reduce function returns a one dimensional frame.\n\nThis operator performs best when the number of partitions (including the initial and intermediate stages)\nequals to the number of CPUs so that each single axis partition gets processed in parallel.\n\nBinary operator\n---------------\nApplies an argument function, that takes exactly two operands (first is always `QueryCompiler`).\nIf both operands are query compilers then the execution engine broadcasts partitions of\nthe right operand to the left.\n\n.. figure:: /img/binary_evaluation.svg\n    :align: center\n\n.. warning::\n    To be able to do frame broadcasting, partitioning along the index axis of both frames\n    has to be equal, otherwise they need to be aligned first. The execution engine will do\n    it automatically but note that this requires repartitioning, which is a much \n    more expensive operation than the binary function itself.\n\nThis operator performs best when both operands have identical partitioning and the number of partitions of an operand\nequals to the number of CPUs so that each single partition gets processed in parallel.\n\nFold operator\n-------------\nApplies an argument function that requires knowledge of the whole axis. Be aware that providing this knowledge may be\nexpensive because the execution engine has to concatenate partitions along the specified axis.\n\nThis operator performs best when the number of partitions (row or column partitions in depend on the specified axis)\nequals to the number of CPUs so that each single axis partition gets processed in parallel.\n\nGroupBy operator\n----------------\nEvaluates GroupBy aggregation for that type of functions that can be executed via TreeReduce approach.\nTo be able to form groups engine broadcasts ``by`` partitions to each partition of the source frame.\n\nThis operator performs best when the cardinality of ``by`` columns is low (small number of output groups).\nAt the ``Map`` stage, the operator computes the aggregation for each row partition individually, meaning,\nthat the ``Reduce`` stage takes a dataframe with the following number of rows:\n``num_groups * n_row_parts``. If the number of groups is too high, there's a risk of getting a dataframe\nwith even bigger than the initial shape at the ``Reduce`` stage.\n\nDefault-to-pandas operator\n--------------------------\nDo :doc:`fallback to pandas </supported_apis/defaulting_to_pandas>` for passed function.\n\nThis operator has a performance penalty for going from a partitioned Modin DataFrame to pandas because of\nthe communication cost and single-threaded nature of pandas.\n\n\nHow to register your own function\n'''''''''''''''''''''''''''''''''\nLet's examine an example of how to use the algebra module to create your own\nnew functions.\n\nImagine you have a complex aggregation that can be implemented into a single query but\ndoesn't have any implementation in pandas API. If you know how to implement this\naggregation efficiently in a distributed frame, you may want to use one of the above described\npatterns (e.g. ``TreeReduce``).\n\nLet's implement a function that counts non-NA values for each column or row\n(``pandas.DataFrame.count``). First, we need to determine the function type.\nTreeReduce approach would be great: in a map phase, we'll count non-NA cells in each\npartition in parallel and then just sum its results in the reduce phase.\n\nTo define the TreeReduce function that does `count` + `sum` we just need to register the\nappropriate functions and then assign the result to the picked `QueryCompiler`\n(`PandasQueryCompiler` in our case):\n\n.. code-block:: python\n\n    from modin.core.storage_formats import PandasQueryCompiler\n    from modin.core.dataframe.algebra import TreeReduce\n\n    PandasQueryCompiler.custom_count = TreeReduce.register(pandas.DataFrame.count, pandas.DataFrame.sum)\n\nThen, we want to handle it from the :py:class:`~modin.pandas.dataframe.DataFrame`, so we need to create a way to do that:\n\n.. code-block:: python\n\n    import modin.pandas as pd\n\n    def count_func(self, **kwargs):\n        # The constructor allows you to pass in a query compiler as a keyword argument\n        return self.__constructor__(query_compiler=self._query_compiler.custom_count(**kwargs))\n\n    pd.DataFrame.count_custom = count_func\n\nAnd then you can use it like you usually would:\n\n.. code-block:: python\n\n    df.count_custom(axis=1)\n\nMany of the `pandas` API functions can be easily implemented this way, so if you find\nout that one of your favorite function is still defaulted to pandas and decide to\ncontribute to Modin to add its implementation, you may use this example as a reference.\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/base/dataframe.rst",
    "content": "ModinDataframe\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` is the parent class for all dataframes - regardless of what storage format they are backed by. Its purpose is to define the algebra operators that must be exposed by a dataframe.\n\nThis class exposes the dataframe algebra and is meant to be subclassed by all dataframe implementations.\nDescendants of this class implement the algebra, and act as the intermediate level\nbetween the query compiler and the underlying execution details (e.g. the conforming partition manager). The class provides\na significantly reduced set of operations that can be composed to form any pandas query.\n\nThe :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` is an example of a descendant of this class. It currently has implementations for some of the operators\nexposed in this class, and is currently being refactored to include implementations for all of the algebra operators. Please\nrefer to the :doc:`PandasDataframe documentation </flow/modin/core/dataframe/pandas/dataframe>` for more information.\n\nThe :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` is independent of implementation specific details such as partitioning, storage format, or execution engine.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.base.dataframe.dataframe.ModinDataframe\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/base/index.rst",
    "content": "Purpose\n=======\n\nThe :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` serves the purpose of describing and defining the :doc:`Core Dataframe Algebra </flow/modin/core/dataframe/algebra>`.\n\nIt is the core construction element and serves as the client for the :doc:`Modin Query Compiler</flow/modin/core/storage_formats/base/query_compiler>`. Descendants that offer implementations execute the queries from the compiler by invoking functions over partitions via a partition manager.\n\nThe partitions and partition manager interfaces are currently implementation-specific, but may\nbe standardized in the future.\n\nThe :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe` and axis partitions are the interfaces that must be implemented by any :doc:`execution backend</flow/modin/core/execution/dispatching>` in order for it to be plugged in to Modin.\nThese classes are mostly abstract, however very simple and generic enough methods like\n:py:meth:`~modin.core.dataframe.base.partitioning.BaseDataframeAxisPartition.force_materialization` can be implemented at the base level because for now we do not expect them to differ in any implementation.\n\nModinDataframe Interface\n========================\n\n* :doc:`ModinDataframe <dataframe>` is an abstract class which represents the algebra operators a dataframe must expose.\n* :doc:`BaseDataframeAxisPartition <partitioning/axis_partition>` is an abstract class, representing a joined group of partitions along some axis (either rows or labels).\n\n.. toctree::\n    :hidden:\n\n    dataframe\n    partitioning/axis_partition\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/base/partitioning/axis_partition.rst",
    "content": "BaseDataframeAxisPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is base for any axis partition class and serves as the last level on which\noperations that were conveyed from the partition manager are being performed on an entire column or row.\n\n**Note**: ``modin.core.dataframe.base`` intentionally does not describe any particular partition interface,\nas it is the partition manager responsibility (if said partition manager is implemented), i.e. it is\ntoo low-level to be present on the base, abstract level.\n\nThe class provides an API that has to be overridden by the child classes in order to manipulate\non a list of block partitions (making up column or row partition) they store.\n\nThe procedures that use this class and its methods assume that they have some global knowledge\nabout the entire axis. This may require the implementation to use concatenation or append on the\nlist of block partitions.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.base.partitioning.axis_partition.BaseDataframeAxisPartition\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/index.rst",
    "content": ":orphan:\n\nCore Modin Dataframe Objects\n============================\n\nModin partitions data to scale efficiently.\nTo keep track of everything a few key classes are introduced: ``Dataframe``, ``Partition``, ``AxisPartiton`` and ``PartitionManager``.\n\n* ``Dataframe`` is the class conforming to Dataframe Algebra.\n* ``Partition`` is an element of a NxM grid which, when combined, represents the ``Dataframe``\n* ``AxisPartition`` is a joined group of ``Partition``-s along some axis (either rows or columns)\n* ``PartitionManager`` is the manager that implements the primitives used for Dataframe Algebra operations over ``Partition``-s\n\nEach :doc:`storage format </flow/modin/core/storage_formats/index>`, execution engine, and each execution system (storage format + execution engine)\nmay have its own implementations of these Core Dataframe's entities.\nCurrent stable implementations are the following:\n\n* :doc:`Base ModinDataframe <base/index>` defines a common interface and algebra operators for `Dataframe` implementations.\n\nStorage format specific:\n\n* :doc:`Modin PandasDataframe <pandas/index>` is an implementation for any frame class of :doc:`pandas storage format </flow/modin/core/storage_formats/pandas/index>`.\n\nEngine specific:\n\n* :doc:`Modin GenericRayDataframe </flow/modin/core/execution/ray/generic>` is an implementation for any frame class that works on Ray execution engine.\n* :doc:`Modin GenericUnidistDataframe </flow/modin/core/execution/unidist/generic>` is an implementation for any frame class that works on Unidist execution engine.\n\nExecution system specific:\n\n* :doc:`Modin PandasOnRayDataframe </flow/modin/core/execution/ray/implementations/pandas_on_ray/index>` is a specialization of the Core Modin Dataframe for ``PandasOnRay`` execution.\n* :doc:`Modin PandasOnDaskDataframe </flow/modin/core/execution/dask/implementations/pandas_on_dask/index>` is specialization of the Core Modin Dataframe for ``PandasOnDask`` execution.\n* :doc:`Modin PandasOnPythonDataframe </flow/modin/core/execution/python/implementations/pandas_on_python/index>` is a specialization of the Core Modin Dataframe for ``PandasOnPython`` execution.\n* :doc:`Modin PandasOnUnidistDataframe </flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index>` is a specialization of the Core Modin Dataframe for ``PandasOnUnidist`` execution.\n\n.. note::\n    At the current stage of Modin development, the base interfaces of the Dataframe objects are not defined yet.\n    So for now the origin of all changes in the Dataframe interfaces is the :doc:`Dataframe for pandas storage format<pandas/index>`.\n\n.. toctree::\n    :hidden:\n\n    base/index\n    pandas/index\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/pandas/dataframe.rst",
    "content": "PandasDataframe\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\n:py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` is a direct descendant of :py:class:`~modin.core.dataframe.base.dataframe.dataframe.ModinDataframe`. Its purpose is to implement the abstract interfaces for usage with all ``pandas``-based :doc:`storage formats</flow/modin/core/storage_formats/index>`.\n:py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` could be inherited and augmented further by any specific implementation which needs it to take special care of some behavior or to improve performance for certain execution engine.\n\nThe class serves as the intermediate level\nbetween ``pandas`` query compiler and conforming partition manager. All queries formed\nat the query compiler layer are ingested by this class and then conveyed jointly with the stored partitions\ninto the partition manager for processing. Direct partitions manipulation by this class is prohibited except\ncases if an operation is strictly private or protected and called inside of the class only. The class provides\nsignificantly reduced set of operations that fit plenty of pandas operations.\n\nMain tasks of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` are storage of partitions, manipulation with labels of axes and\nproviding set of methods to perform operations on the internal data.\n\nAs mentioned above, ``PandasDataframe`` shouldn't work with stored partitions directly and\nthe responsibility for modifying partitions array has to lay on :doc:`partitioning/partition_manager`. For example, method\n:meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.broadcast_apply_full_axis` redirects applying\nfunction to :meth:`~PandasDataframePartitionManager.broadcast_axis_partitions` method.\n\n``Modin PandasDataframe`` can be created from ``pandas.DataFrame``, ``pyarrow.Table``\n(methods :meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.from_pandas`,\n:meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.from_arrow` are used respectively). Also,\n``PandasDataframe`` can be converted to ``np.array``, ``pandas.DataFrame``\n(methods :meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.to_numpy`,\n:meth:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.to_pandas` are used respectively).\n\nManipulation with labels of axes happens using internal methods for changing labels on the new,\nadding prefixes/suffixes etc.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/pandas/index.rst",
    "content": "Modin PandasDataframe Objects\n=============================\n\n``modin.core.dataframe.pandas`` is the package which houses common implementations\nof different Modin internal classes used by most `pandas`-based :doc:`storage formats</flow/modin/core/storage_formats/index>`.\n\nIt also double-serves as the full example of how to implement Modin execution backend pieces (sans the :doc:`execution part</flow/modin/core/execution/dispatching>` which is absent here),\nas it implements everything an execution backend needs to be fully conformant to Modin expectations.\n\n* :doc:`PandasDataframe <dataframe>` is the class conforming to Dataframe Algebra.\n* :doc:`PandasDataframePartition <partitioning/partition>` implements ``Partition`` interface holding ``pandas.DataFrame``.\n* :doc:`PandasDataframeAxisPartition <partitioning/axis_partition>` is a joined group of ``PandasDataframePartition``-s along some axis (either rows or labels)\n* :doc:`PandasDataframePartitionManager <partitioning/partition_manager>` is the manager that implements the primitives used for Dataframe Algebra operations over ``PandasDataframePartition``-s\n* :doc:`ModinDtypes <metadata/dtypes>`\n* :doc:`ModinIndex <metadata/index>`\n\n.. toctree::\n    :hidden:\n\n    dataframe\n    partitioning/partition\n    partitioning/axis_partition\n    partitioning/partition_manager\n    metadata/dtypes\n    metadata/index\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/pandas/metadata/dtypes.rst",
    "content": "ModinDtypes\n\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.pandas.metadata.dtypes.ModinDtypes\n  :members:"
  },
  {
    "path": "docs/flow/modin/core/dataframe/pandas/metadata/index.rst",
    "content": "ModinIndex\n\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.pandas.metadata.index.ModinIndex\n  :members:"
  },
  {
    "path": "docs/flow/modin/core/dataframe/pandas/partitioning/axis_partition.rst",
    "content": "PandasDataframeAxisPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class implements abstract interface methods from :py:class:`~modin.core.dataframe.base.partitioning.axis_partition.BaseDataframeAxisPartition`\ngiving the means for a sibling :doc:`partition manager<partition_manager>` to actually work with the axis-wide partitions.\n\nThe class is base for any axis partition class of ``pandas`` storage format.\n\nSubclasses must implement ``list_of_blocks`` which represents data wrapped by the :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`\nobjects and creates something interpretable as a ``pandas.DataFrame``.\n\nSee :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.axis_partition.PandasOnRayDataframeAxisPartition`\nfor an example on how to override/use this class when the implementation needs to be augmented.\n\nThe :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition` object has an invariant that requires that this\nobject is never returned from a function. It assumes that there will always be\n``PandasDataframeAxisPartition`` object stored and structures itself accordingly.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/pandas/partitioning/partition.rst",
    "content": "PandasDataframePartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is base for any partition class of ``pandas`` storage format and serves as the last level\non which operations that were conveyed from the partition manager are being performed on an\nindividual block partition.\n\nThe class provides an API that has to be overridden by child classes in order to manipulate\non data and metadata they store.\n\nThe public API exposed by the children of this class is used in :py:class:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager`.\n\nThe objects wrapped by the child classes are treated as immutable by ``PandasDataframePartitionManager`` subclasses\nand no logic for updating inplace.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/dataframe/pandas/partitioning/partition_manager.rst",
    "content": "PandasDataframePartitionManager\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is base for any partition manager class of ``pandas`` storage format and serves as\nintermediate level between :doc:`Modin PandasDataframe <../dataframe>` and conforming :doc:`partition <partition>` class.\nThe class is responsible for partitions manipulation and applying a function to individual partitions:\nblock partitions, row partitions or column partitions, i.e. the class can form axis partitions from\nblock partitions to apply a function if an operation requires access to an entire column or row.\nThe class translates frame API into partition API and also can have some preprocessing operations\ndepending on the partition type for improving performance (for example,\n:meth:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager.preprocess_func`).\n\nMain task of partition manager is to keep knowledge of how partitions are stored and managed\ninternal to itself, so surrounding code could use it via lean enough API without worrying about\nimplementation details.\n\nPartition manager can apply user-passed (arbitrary) function in different modes:\n\n* block-wise (apply a function to individual block partitions):\n\n  * optionally accepting partition indices along each axis\n  * optionally accepting an item to be split so parts of it would be sent to each partition\n\n* along a full axis (apply a function to an entire column or row made up of block partitions when user function needs information about the whole axis)\n\nIt can also broadcast partitions from `right` to `left` when executing certain operations making\n`right` partitions available for functions executed where `left` live.\n\n..\n  TODO: insert more text explaining \"broadcast\" term\n\nPartition manager also is used to create \"logical\" partitions, or :doc:`axis partitions <axis_partition>`\nby joining existing partitions along specified axis (either rows or labels),\nand to concatenate different partition sets along given axis.\n\nIt also maintains mapping from \"external\" (end user-visible) indices along all axes to internal\nindices which are actually pairs of indices of partitions and indices inside the partitions,\nas well as manages conversion to numpy and pandas representations.\n\n\nPublic API\n----------\n\n.. autoclass:: modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/dataframe.rst",
    "content": "PandasOnDaskDataframe\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is the specific implementation of the dataframe algebra for the `Dask` execution engine.\nIt serves as an intermediate level between ``pandas`` query compiler and\n:py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartitionManager`.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.dataframe.PandasOnDaskDataframe\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/index.rst",
    "content": ":orphan:\n\nPandasOnDask Execution\n======================\n\nQueries that perform data transformation, data ingress or data egress using the `pandas on Dask` execution\npass through the Modin components detailed below.\n\nTo enable `pandas on Dask` execution, please refer to the usage section in :doc:`pandas on Dask </development/using_pandas_on_dask>`.\n\nData Transformation\n'''''''''''''''''''\n\n.. image:: /img/pandas_on_dask_data_transform.svg\n   :align: center\n\nWhen a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer\nto be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,\nfor example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query\nand defining specific intermediate values to provide more context to the query compiler.\nThe :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for\nprocessing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,\nto determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`\nstorage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of\nthe :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.dataframe.PandasOnDaskDataframe` which inherits\ngeneric functionality from the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.\n\nPandasOnDask Dataframe implementation\n-------------------------------------\n\nModin implements ``Dataframe``, ``PartitionManager``, ``AxisPartition`` and ``Partition`` classes\nspecifically for the `PandasOnDask` execution.\n\n* :doc:`PandasOnDaskDataframe <dataframe>`\n* :doc:`PandasOnDaskDataframePartition <partitioning/partition>`\n* :doc:`PandasOnDaskDataframeVirtualPartition <partitioning/virtual_partition>`\n* :doc:`PandasOnDaskDataframePartitionManager <partitioning/partition_manager>`\n\n.. toctree::\n    :hidden:\n\n    dataframe\n    partitioning/partition\n    partitioning/virtual_partition\n    partitioning/partition_manager\n\n\nData Ingress\n''''''''''''\n\n.. image:: /img/pandas_on_dask_data_ingress.svg\n   :align: center\n\nData Egress\n'''''''''''\n\n.. image:: /img/pandas_on_dask_data_egress.svg\n   :align: center\n\n\nWhen a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the\n:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for\nthe execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnDaskFactory`. The factory, in turn,\nexposes the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` class\nwhose responsibility is to perform a parallel read/write from/to a file.\n\nWhen reading data from a CSV file, for example, the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` class forwards\nthe user query to the :meth:`~modin.core.io.text.CSVDispatcher._read` method of :py:class:`~modin.core.io.text.CSVDispatcher`, where the query's parameters are preprocessed\nto check if they are supported by the execution (defaulting to pandas if they are not) and computes some metadata\ncommon for all partitions to be read. Then, the file is split into row chunks, and this data is used to launch remote tasks on the Dask workers\nvia the :meth:`~modin.core.execution.dask.common.engine_wrapper.DaskWrapper.deploy` method of :py:class:`~modin.core.execution.dask.common.engine_wrapper.DaskWrapper`.\nOn each Dask worker, the :py:class:`~modin.core.storage_formats.pandas.parsers.PandasCSVParser` parses data.\nAfter the remote tasks are finished, additional result postprocessing is performed,\nand a new query compiler with the data read is returned.\n\nWhen writing data to a CSV file, for example, the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` processes\nthe user query to execute it on Dask workers. Then, the :py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO` asks the\n:py:class:`~modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskDataframe` to decompose the data into row-wise partitions\nthat will be written into the file in parallel in Dask workers.\n\n.. note::\n   Currently, data egress uses default `pandas` implementation for `pandas on Dask` execution.\n"
  },
  {
    "path": "docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.rst",
    "content": "PandasOnDaskDataframePartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,\nproviding the API to perform operations on a block partition, namely, ``pandas.DataFrame``, using Dask as the execution engine.\n\nIn addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:\n\n* ``length`` - length of ``pandas.DataFrame`` wrapped\n* ``width`` - width of ``pandas.DataFrame`` wrapped\n* ``ip`` - node IP address that holds ``pandas.DataFrame`` wrapped\n\nAn operation on a block partition can be performed in two modes:\n\n* asynchronously_ - via :meth:`~modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartition.apply`\n* lazily_ - via :meth:`~modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartition.add_to_apply_calls`\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartition\n  :members:\n\n  .. _asynchronously: https://en.wikipedia.org/wiki/Asynchrony_(computer_programming)\n  .. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation\n"
  },
  {
    "path": "docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.rst",
    "content": "PandasOnDaskDataframePartitionManager\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager`\nusing Dask as the execution engine. This class is responsible for partition manipulation and applying a function to\nblock/row/column partitions.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframePartitionManager\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/virtual_partition.rst",
    "content": "PandasOnDaskDataframeVirtualPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.virtual_partition.PandasOnDaskDataframeVirtualPartition`,\nproviding the API to perform operations on an axis (column or row) partition using Dask as the execution engine.\nThe axis partition is a wrapper over a list of block partitions that are stored in this class.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframeVirtualPartition\n  :members:\n\nPandasOnDaskDataframeColumnPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframeColumnPartition\n  :members:\n\nPandasOnDaskDataframeRowPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.dask.implementations.pandas_on_dask.partitioning.PandasOnDaskDataframeRowPartition\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/dispatching.rst",
    "content": ":orphan:\n\n..\n    TODO: add links to documentation for mentioned modules.\n\nFactories Module Description\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nBrief description\n'''''''''''''''''\nModin has several execution engines and storage formats, combining them together forms certain executions. \nCalling any :py:class:`~modin.pandas.dataframe.DataFrame` API function will end up in some execution-specific method. The responsibility of dispatching high-level API calls to\nexecution-specific function belongs to the :ref:`QueryCompiler <query_compiler_def>`, which is determined at the time of the dataframe's creation by the factory of\nthe corresponding execution. The mission of this module is to route IO function calls from\nthe API level to its actual execution-specific implementations, which builds the\n`QueryCompiler` of the appropriate execution.\n\nExecution representation via Factories\n''''''''''''''''''''''''''''''''''''''\nExecution is a combination of the :doc:`storage format </flow/modin/core/storage_formats/index>` and an actual execution engine.\nFor example, ``PandasOnRay`` execution means the combination of the `pandas storage format` and `Ray` engine.\n\nEach storage format has its own :ref:`Query Compiler <query_compiler_def>` which compiles the most efficient queries\nfor the corresponding :doc:`Core Modin Dataframe </flow/modin/core/dataframe/index>` implementation. Speaking about ``PandasOnRay``\nexecution, its Query Compiler is :doc:`PandasQueryCompiler </flow/modin/core/storage_formats/pandas/query_compiler>` and the\nDataframe implementation is :doc:`PandasDataframe </flow/modin/core/dataframe/pandas/dataframe>`,\nwhich is general implementation for every execution of the pandas storage format. The actual implementation of ``PandasOnRay`` dataframe\nis defined by the :doc:`PandasOnRayDataframe </flow/modin/core/execution/ray/implementations/pandas_on_ray/dataframe>` class that\nextends ``PandasDataframe``.\n\nIn the scope of this module, each execution is represented with a factory class located in\n``modin/core/execution/dispatching/factories/factories.py``. Each factory contains a field that identifies the IO module of the corresponding execution. This IO module is\nresponsible for dispatching calls of IO functions to their actual implementations in the\nunderlying IO module. For more information about IO module visit :doc:`IO </flow/modin/core/io/index>` page.\n\nFactory Dispatcher\n''''''''''''''''''\nThe :py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` class provides \npublic methods whose interface corresponds to pandas IO functions, the only difference is that they return `QueryCompiler` of the\nselected storage format instead of high-level :py:class:`~modin.pandas.dataframe.DataFrame`. ``FactoryDispatcher`` is responsible for routing\nthese IO calls to the factory which represents the selected execution.\n\nSo when you call ``read_csv()`` function and your execution is ``PandasOnRay`` then the\ntrace would be the following:\n\n.. figure:: /img/factory_dispatching.svg\n    :align: center\n\n``modin.pandas.read_csv`` calls ``FactoryDispatcher.read_csv``, which calls ``._read_csv``\nfunction of the factory of the selected execution, in our case it's ``PandasOnRayFactory._read_csv``,\nwhich in turn forwards this call to the actual implementation of ``read_csv`` — to the\n``PandasOnRayIO.read_csv``. The result of ``modin.pandas.read_csv`` will return a high-level Modin\nDataFrame with the appropriate `QueryCompiler` bound to it, which is responsible for\ndispatching all of the further function calls.\n\nPublic API\n''''''''''\n\n.. automodule:: modin.core.execution.dispatching.factories.factories\n    :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/python/implementations/pandas_on_python/dataframe.rst",
    "content": "PandasOnPythonDataframe\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is specific implementation of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`\nfor `Python` execution engine. It serves as an intermediate level between\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` and\n:py:class:`~modin.core.execution.python.implementations.pandas_on_python.partitioning.partition_manager.PandasOnPythonDataframePartitionManager`.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe.PandasOnPythonDataframe\n  :members:"
  },
  {
    "path": "docs/flow/modin/core/execution/python/implementations/pandas_on_python/index.rst",
    "content": ":orphan:\n\nPandasOnPython Execution\n========================\n\nQueries that perform data transformation, data ingress or data egress using the `pandas on Python` execution\npass through the Modin components detailed below.\n\n`pandas on Python` execution is sequential and it's used for the debug purposes. To enable `pandas on Python` execution,\nplease refer to the usage section in :doc:`pandas on Python </development/using_pandas_on_python>`.\n\nData Transformation\n'''''''''''''''''''\n\n.. image:: /img/pandas_on_python_data_transform.svg\n   :align: center\n\nWhen a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer\nto be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,\nfor example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query\nand defining specific intermediate values to provide more context to the query compiler.\nThe :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for\nprocessing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,\nto determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`\nstorage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of\nthe :py:class:`~modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe.PandasOnPythonDataframe` which inherits\ngeneric functionality from the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.\n\nPandasOnPython Dataframe implementation\n---------------------------------------\n\nThis page describes implementation of :doc:`Modin PandasDataframe Objects </flow/modin/core/dataframe/pandas/index>`\nspecific for `PandasOnPython` execution. Since Python engine doesn't allow computation parallelization,\noperations on partitions are performed sequentially. The absence of parallelization doesn't give any\nperformance speed-up, so ``PandasOnPython`` is used for testing purposes only.\n\n* :doc:`PandasOnPythonDataframe <dataframe>`\n* :doc:`PandasOnPythonDataframePartition <partitioning/partition>`\n* :doc:`PandasOnPythonDataframeAxisPartition <partitioning/axis_partition>`\n* :doc:`PandasOnPythonDataframePartitionManager <partitioning/partition_manager>`\n\n.. toctree::\n    :hidden:\n\n    dataframe\n    partitioning/partition\n    partitioning/axis_partition\n    partitioning/partition_manager\n\n\nData Ingress\n''''''''''''\n\n.. image:: /img/pandas_on_python_data_ingress.svg\n   :align: center\n\nData Egress\n'''''''''''\n\n.. image:: /img/pandas_on_python_data_egress.svg\n   :align: center\n\n\nWhen a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the\n:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for\nthe execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnPythonFactory`. The factory, in turn,\nexposes the :py:class:`~modin.core.execution.python.implementations.pandas_on_python.io.PandasOnPythonIO` class\nwhose responsibility is a read/write from/to a file.\n\nWhen reading data from a CSV file, for example, the :py:class:`~modin.core.execution.python.implementations.pandas_on_python.io.io.PandasOnPythonIO` class\nreads the data using corresponding `pandas` function (``pandas.read_csv()`` in this case). After the reading is complete, a new query compiler is created from `pandas` object\nusing :py:meth:`~modin.core.execution.python.implementations.pandas_on_python.io.io.PandasOnPythonIO.from_pandas` and returned.\n\nWhen writing data to a CSV file, for example, the :py:class:`~modin.core.execution.python.implementations.pandas_on_python.io.PandasOnPythonIO` converts a query compiler\nto `pandas` object using :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.to_pandas`. After that, `pandas` writes the data to the file using\ncorresponding function (``pandas.to_csv()`` in this case)."
  },
  {
    "path": "docs/flow/modin/core/execution/python/implementations/pandas_on_python/partitioning/axis_partition.rst",
    "content": "PandasOnPythonDataframeAxisPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition`,\nproviding the API to perform operations on an axis partition, using Python\nas the execution engine. The axis partition is made up of list of block\npartitions that are stored in this class.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.virtual_partition.PandasOnPythonDataframeAxisPartition\n\nPandasOnPythonFrameColumnPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.virtual_partition.PandasOnPythonDataframeColumnPartition\n  :members:\n\nPandasOnPythonFrameRowPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.virtual_partition.PandasOnPythonDataframeRowPartition\n  :members:"
  },
  {
    "path": "docs/flow/modin/core/execution/python/implementations/pandas_on_python/partitioning/partition.rst",
    "content": "PandasOnPythonDataframePartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,\nproviding the API to perform operations on a block partition using Python as the execution engine.\n\nIn addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:\n\n* ``length`` - length of ``pandas.DataFrame`` wrapped\n* ``width`` - width of ``pandas.DataFrame`` wrapped\n\nAn operation on a block partition can be performed in two modes:\n\n* immediately via :meth:`~modin.core.execution.python.implementations.pandas_on_python.partitioning.partition.PandasOnPythonDataframePartition.apply` - \n  in this case accumulated call queue and new function will be executed\n  immediately.\n* lazily_ via :meth:`~modin.core.execution.python.implementations.pandas_on_python.partitioning.partition.PandasOnPythonDataframePartition.add_to_apply_calls` -\n  in this case function will be added to the call queue and no computations\n  will be done at the moment.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.partition.PandasOnPythonDataframePartition\n  :members:\n\n  .. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation"
  },
  {
    "path": "docs/flow/modin/core/execution/python/implementations/pandas_on_python/partitioning/partition_manager.rst",
    "content": "PandasOnPythonDataframePartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition_manager.PandasDataframePartitionManager`\nusing Python as the execution engine. This class is responsible for partitions manipulation and applying\na function to block/row/column partitions.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.python.implementations.pandas_on_python.partitioning.partition_manager.PandasOnPythonDataframePartitionManager\n  :members:"
  },
  {
    "path": "docs/flow/modin/core/execution/ray/generic.rst",
    "content": ":orphan:\n\nGeneric Ray-based members\n=========================\n\nObjects which are storage format agnostic but require specific Ray implementation\nare placed in ``modin.core.execution.ray.generic``.\n\nTheir purpose is to implement certain parallel I/O operations and to serve\nas a foundation for building storage format specific objects:\n\n.. autoclass:: modin.core.execution.ray.generic.io.RayIO\n  :members:\n\n.. autoclass:: modin.core.execution.ray.generic.partitioning.GenericRayDataframePartitionManager\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/dataframe.rst",
    "content": "PandasOnRayDataframe\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is specific implementation of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`\nclass using Ray distributed engine. It serves as an intermediate level between\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` and\n:py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartitionManager`.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.dataframe.PandasOnRayDataframe\n  :members:"
  },
  {
    "path": "docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/index.rst",
    "content": ":orphan:\n\nPandasOnRay Execution\n=====================\n\nQueries that perform data transformation, data ingress or data egress using the `pandas on Ray` execution\npass through the Modin components detailed below.\n\nTo enable `pandas on Ray` execution, please refer to the usage section in :doc:`pandas on Ray </development/using_pandas_on_ray>`.\n\nData Transformation\n'''''''''''''''''''\n\n.. image:: /img/pandas_on_ray_data_transform.svg\n   :align: center\n\nWhen a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer\nto be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,\nfor example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query\nand defining specific intermediate values to provide more context to the query compiler.\nThe :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for\nprocessing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,\nto determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`\nstorage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of\nthe :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.dataframe.PandasOnRayDataframe` which inherits\ngeneric functionality from the ``GenericRayDataframe`` and the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.\n\n..\n  TODO: insert a link to ``GenericRayDataframe`` once we add an implementatiton of the class\n\nPandasOnRay Dataframe implementation\n------------------------------------\n\nModin implements ``Dataframe``, ``PartitionManager``, ``VirtualPartition`` (a specific kind of ``AxisPartition`` with the capability\nto combine smaller partitions into the one \"virtual\") and ``Partition`` classes specifically for the ``PandasOnRay`` execution:\n\n* :doc:`PandasOnRayDataframe <dataframe>`\n* :doc:`PandasOnRayDataframePartition <partitioning/partition>`\n* :doc:`PandasOnRayDataframeVirtualPartition <partitioning/axis_partition>`\n* :doc:`PandasOnRayDataframePartitionManager <partitioning/partition_manager>`\n\n.. toctree::\n    :hidden:\n\n    dataframe\n    partitioning/partition\n    partitioning/axis_partition\n    partitioning/partition_manager\n\nData Ingress\n''''''''''''\n\n.. image:: /img/pandas_on_ray_data_ingress.svg\n   :align: center\n\nData Egress\n'''''''''''\n\n.. image:: /img/pandas_on_ray_data_egress.svg\n   :align: center\n\n\nWhen a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the\n:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for\nthe execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnRayFactory`. The factory, in turn,\nexposes the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` class\nwhose responsibility is to perform a parallel read/write from/to a file.\n\nWhen reading data from a CSV file, for example, the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` class forwards\nthe user query to the :meth:`~modin.core.io.text.CSVDispatcher._read` method of :py:class:`~modin.core.io.text.CSVDispatcher`, where the query's parameters are preprocessed\nto check if they are supported by the execution (defaulting to pandas if they are not) and computes some metadata\ncommon for all partitions to be read. Then, the file is split into row chunks, and this data is used to launch remote tasks on the Ray workers\nvia the :meth:`~modin.core.execution.ray.common.RayWrapper.deploy` method of :py:class:`~modin.core.execution.ray.common.RayWrapper`.\nOn each Ray worker, the :py:class:`~modin.core.storage_formats.pandas.parsers.PandasCSVParser` parses data.\nAfter the remote tasks are finished, additional result postprocessing is performed,\nand a new query compiler with the data read is returned.\n\nWhen writing data to a CSV file, for example, the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` processes\nthe user query to execute it on Ray workers. Then, the :py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO` asks the\n:py:class:`~modin.core.execution.ray.implementations.pandas_on_ray.dataframe.PandasOnRayDataframe` to decompose the data into row-wise partitions\nthat will be written into the file in parallel in Ray workers."
  },
  {
    "path": "docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/axis_partition.rst",
    "content": "PandasOnRayDataframeVirtualPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition`,\nproviding the API to perform operations on an axis partition, using Ray as an execution engine. The virtual partition is\na wrapper over a list of block partitions, which are stored in this class, with the capability to combine the smaller partitions into the one \"virtual\".\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframeVirtualPartition\n  :members:\n\nPandasOnRayDataframeColumnPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframeColumnPartition\n  :members:\n\nPandasOnRayDataframeRowPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframeRowPartition\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.rst",
    "content": "PandasOnRayDataframePartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,\nproviding the API to perform operations on a block partition, namely, ``pandas.DataFrame``, using Ray as an execution engine.\n\nIn addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:\n\n* ``length`` - length of ``pandas.DataFrame`` wrapped\n* ``width`` - width of ``pandas.DataFrame`` wrapped\n* ``ip`` - node IP address that holds ``pandas.DataFrame`` wrapped\n\nAn operation on a block partition can be performed in two modes:\n\n* asynchronously_ - via :meth:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartition.apply`\n* lazily_ - via :meth:`~modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartition.add_to_apply_calls`\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartition\n  :members:\n\n.. _asynchronously: https://en.wikipedia.org/wiki/Asynchrony_(computer_programming)\n.. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation\n"
  },
  {
    "path": "docs/flow/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.rst",
    "content": "PandasOnRayDataframePartitionManager\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis class is the specific implementation of :py:class:`~modin.core.execution.ray.generic.partitioning.GenericRayDataframePartitionManager`\nusing Ray distributed engine. This class is responsible for partition manipulation and applying a function to\nblock/row/column partitions.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.ray.implementations.pandas_on_ray.partitioning.PandasOnRayDataframePartitionManager\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/unidist/generic.rst",
    "content": ":orphan:\n\nGeneric Unidist-based members\n=============================\n\nObjects which are storage format agnostic but require specific Unidist implementation\nare placed in ``modin.core.execution.unidist.generic``.\n\nTheir purpose is to implement certain parallel I/O operations and to serve\nas a foundation for building storage format specific objects:\n\n.. autoclass:: modin.core.execution.unidist.generic.io.UnidistIO\n  :members:\n\n.. autoclass:: modin.core.execution.unidist.generic.partitioning.GenericUnidistDataframePartitionManager\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/dataframe.rst",
    "content": "PandasOnUnidistDataframe\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is specific implementation of :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`\nclass using Unidist distributed engine. It serves as an intermediate level between\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` and\n:py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartitionManager`.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.dataframe.PandasOnUnidistDataframe\n  :members:"
  },
  {
    "path": "docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/index.rst",
    "content": ":orphan:\n\nPandasOnUnidist Execution\n=========================\n\nQueries that perform data transformation, data ingress or data egress using the `pandas on Unidist` execution\npass through the Modin components detailed below.\n\nTo enable `pandas on MPI through unidist` execution,\nplease refer to the usage section in :doc:`pandas on MPI through unidist </development/using_pandas_on_mpi>`.\n\nData Transformation\n'''''''''''''''''''\n\n.. image:: /img/pandas_on_unidist_data_transform.svg\n   :align: center\n\nWhen a user calls any :py:class:`~modin.pandas.dataframe.DataFrame` API, a query starts forming at the `API` layer\nto be executed at the `Execution` layer. The `API` layer is responsible for processing the query appropriately,\nfor example, determining whether the final result should be a ``DataFrame`` or ``Series`` object. This layer is also responsible for sanitizing the input to the\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler`, e.g. validating a parameter from the query\nand defining specific intermediate values to provide more context to the query compiler.\nThe :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for\nprocessing the query, received from the :py:class:`~modin.pandas.dataframe.DataFrame` `API` layer,\nto determine how to apply it to a subset of the data - either cell-wise or along an axis-wise partition backed by the `pandas`\nstorage format. The :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` maps the query to one of the :doc:`Core Algebra Operators </flow/modin/core/dataframe/algebra>` of\nthe :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.dataframe.PandasOnUnidistDataframe` which inherits\ngeneric functionality from the ``GenericUnidistDataframe`` and the :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.\n\n..\n  TODO: insert a link to ``GenericUnidistDataframe`` once we add an implementatiton of the class\n\nPandasOnUnidist Dataframe implementation\n----------------------------------------\n\nModin implements ``Dataframe``, ``PartitionManager``, ``VirtualPartition`` (a specific kind of ``AxisPartition`` with the capability\nto combine smaller partitions into the one \"virtual\") and ``Partition`` classes specifically for the ``PandasOnUnidist`` execution:\n\n* :doc:`PandasOnUnidistDataframe <dataframe>`\n* :doc:`PandasOnUnidistDataframePartition <partitioning/partition>`\n* :doc:`PandasOnUnidistDataframeVirtualPartition <partitioning/axis_partition>`\n* :doc:`PandasOnUnidistDataframePartitionManager <partitioning/partition_manager>`\n\n.. toctree::\n    :hidden:\n\n    dataframe\n    partitioning/partition\n    partitioning/axis_partition\n    partitioning/partition_manager\n\nData Ingress\n''''''''''''\n\n.. image:: /img/pandas_on_unidist_data_ingress.svg\n   :align: center\n\nData Egress\n'''''''''''\n\n.. image:: /img/pandas_on_unidist_data_egress.svg\n   :align: center\n\n\nWhen a user calls any IO function from the ``modin.pandas.io`` module, the `API` layer queries the\n:py:class:`~modin.core.execution.dispatching.factories.dispatcher.FactoryDispatcher` which defines a factory specific for\nthe execution, namely, the :py:class:`~modin.core.execution.dispatching.factories.factories.PandasOnUnidistFactory`. The factory, in turn,\nexposes the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` class\nwhose responsibility is to perform a parallel read/write from/to a file.\n\nWhen reading data from a CSV file, for example, the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` class forwards\nthe user query to the :meth:`~modin.core.io.text.CSVDispatcher._read` method of :py:class:`~modin.core.io.text.CSVDispatcher`, where the query's parameters are preprocessed\nto check if they are supported by the execution (defaulting to pandas if they are not) and computes some metadata\ncommon for all partitions to be read. Then, the file is split into row chunks, and this data is used to launch remote tasks on the Unidist workers\nvia the :meth:`~modin.core.execution.unidist.common.UnidistWrapper.deploy` method of :py:class:`~modin.core.execution.unidist.common.UnidistWrapper`.\nOn each Unidist worker, the :py:class:`~modin.core.storage_formats.pandas.parsers.PandasCSVParser` parses data.\nAfter the remote tasks are finished, additional result postprocessing is performed,\nand a new query compiler with the data read is returned.\n\nWhen writing data to a CSV file, for example, the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` processes\nthe user query to execute it on Unidist workers. Then, the :py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.io.PandasOnUnidistIO` asks the\n:py:class:`~modin.core.execution.unidist.implementations.pandas_on_unidist.dataframe.PandasOnUnidistDataframe` to decompose the data into row-wise partitions\nthat will be written into the file in parallel in Unidist workers."
  },
  {
    "path": "docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/axis_partition.rst",
    "content": "PandasOnUnidistDataframeVirtualPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.axis_partition.PandasDataframeAxisPartition`,\nproviding the API to perform operations on an axis partition, using Unidist as an execution engine. The virtual partition is\na wrapper over a list of block partitions, which are stored in this class, with the capability to combine the smaller partitions into the one \"virtual\".\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframeVirtualPartition\n  :members:\n\nPandasOnUnidistDataframeColumnPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframeColumnPartition\n  :members:\n\nPandasOnUnidistDataframeRowPartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframeRowPartition\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.rst",
    "content": "PandasOnUnidistDataframePartition\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class is the specific implementation of :py:class:`~modin.core.dataframe.pandas.partitioning.partition.PandasDataframePartition`,\nproviding the API to perform operations on a block partition, namely, ``pandas.DataFrame``, using Unidist as an execution engine.\n\nIn addition to wrapping a ``pandas.DataFrame``, the class also holds the following metadata:\n\n* ``length`` - length of ``pandas.DataFrame`` wrapped\n* ``width`` - width of ``pandas.DataFrame`` wrapped\n* ``ip`` - node IP address that holds ``pandas.DataFrame`` wrapped\n\nAn operation on a block partition can be performed in two modes:\n\n* asynchronously_ - via :meth:`~modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartition.apply`\n* lazily_ - via :meth:`~modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartition.add_to_apply_calls`\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartition\n  :members:\n\n.. _asynchronously: https://en.wikipedia.org/wiki/Asynchrony_(computer_programming)\n.. _lazily: https://en.wikipedia.org/wiki/Lazy_evaluation\n"
  },
  {
    "path": "docs/flow/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition_manager.rst",
    "content": "PandasOnUnidistDataframePartitionManager\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis class is the specific implementation of :py:class:`~modin.core.execution.unidist.generic.partitioning.GenericUnidistDataframePartitionManager`\nusing Unidist distributed engine. This class is responsible for partition manipulation and applying a function to\nblock/row/column partitions.\n\nPublic API\n----------\n\n.. autoclass:: modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning.PandasOnUnidistDataframePartitionManager\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/core/io/index.rst",
    "content": ":orphan:\n\nIO Module Description\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nDispatcher Classes Workflow Overview\n''''''''''''''''''''''''''''''''''''\n\nCalls from ``read_*`` functions of execution-specific IO classes (for example, ``PandasOnRayIO`` for\nRay engine and pandas storage format) are forwarded to the ``_read`` function of the file\nformat-specific class (for example ``CSVDispatcher`` for CSV files), where function parameters are\npreprocessed to check if they are supported (defaulting to pandas if not)\nand common metadata is computed for all partitions. The file is then split\ninto chunks (splitting mechanism described below) and the data is used to launch tasks\non the remote workers. After the remote tasks finish, additional\npostprocessing is performed on the results, and a new query compiler with the imported data will\nbe returned.\n\nData File Splitting Mechanism\n'''''''''''''''''''''''''''''\n\nModin's file splitting mechanism differs depending on the data format type:\n\n* text format type - the file is split into bytes according to user specified arguments.\n  In the simplest case, when no row related parameters (such as ``nrows`` or\n  ``skiprows``) are passed, data chunk limits (start and end bytes) are derived\n  by dividing the file size by the number of partitions (chunks can\n  slightly differ between each other because usually end byte may occurs inside a\n  line and in that case the last byte of the line should be used instead of initial\n  value). In other cases the same splitting mechanism is used, but chunks sizes are\n  defined according to the number of lines that each partition should contain.\n\n* columnar store type - the file is split so that each chunk contains approximately the same number of columns.\n\n* SQL type - chunking is obtained by wrapping initial SQL query with a query that\n  specifies initial row offset and number of rows in the chunk.\n\nAfter file splitting is complete, chunks data is passed to the parser functions\n(``PandasCSVParser.parse`` for ``read_csv`` function with pandas storage format) for\nfurther processing on each worker.\n\nSubmodules Description\n''''''''''''''''''''''\n\n``modin.core.io`` module is used mostly for storing utils and dispatcher\nclasses for reading files of different formats.\n\n* ``io.py`` - class containing basic utils and default implementation of IO functions.\n\n* ``file_dispatcher.py`` - class reading data from different kinds of files and\n  handling some util functions common for all formats. Also this class contains ``read``\n  function which is entry point function for all dispatchers ``_read`` functions.\n\n* text - directory for storing all text file format dispatcher classes  \n  \n  * ``text_file_dispatcher.py`` - class for reading text formats files. This class\n    holds ``partitioned_file`` function for splitting text format files into chunks,\n    ``offset`` function for moving file offset at the specified amount of bytes,\n    ``_read_rows`` function for moving file offset at the specified amount of rows\n    and many other functions.\n  \n  * format/feature specific dispatchers: ``csv_dispatcher.py``, ``excel_dispatcher.py``,\n    ``fwf_dispatcher.py`` and ``json_dispatcher.py``.\n\n* column_stores - directory for storing all columnar store file format dispatcher classes\n  \n  * ``column_store_dispatcher.py`` - class for reading columnar type files. This class\n    holds ``build_query_compiler`` function that performs file splitting, deploying remote\n    tasks and results postprocessing and many other functions.\n  \n  * format/feature specific dispatchers: ``feather_dispatcher.py``, ``hdf_dispatcher.py``\n    and ``parquet_dispatcher.py``.\n\n* sql - directory for storing SQL dispatcher class\n  \n  * ``sql_dispatcher.py`` -  class for reading SQL queries or database tables.\n\nPublic API\n''''''''''\n\n.. automodule:: modin.core.io\n    :members:\n\nHandling ``skiprows`` Parameter\n'''''''''''''''''''''''''''''''\n\nHandling ``skiprows`` parameter by pandas import functions can be very tricky, especially\nfor ``read_csv`` function because of interconnection with ``header`` parameter. In this section\nthe techniques of ``skiprows`` processing by both pandas and Modin are covered.\n\nProcessing ``skiprows`` by pandas\n=================================\n\nLet's consider a simple snippet with ``pandas.read_csv`` in order to understand interconnection\nof ``header`` and ``skiprows`` parameters:\n\n.. code-block:: python\n\n  import pandas\n  from io import StringIO\n\n  data = \"\"\"0\n  1\n  2\n  3\n  4\n  5\n  6\n  7\n  8\n  \"\"\"\n\n  # `header` parameter absence is equivalent to `header=\"infer\"` or `header=0`\n  # rows 1, 5, 6, 7, 8 are read with header \"0\"\n  df = pandas.read_csv(StringIO(data), skiprows=[2, 3, 4])\n  # rows 5, 6, 7, 8 are read with header \"1\", row 0 is skipped additionally\n  df = pandas.read_csv(StringIO(data), skiprows=[2, 3, 4], header=1)\n  # rows 6, 7, 8 are read with header \"5\", rows 0, 1 are skipped additionally\n  df = pandas.read_csv(StringIO(data), skiprows=[2, 3, 4], header=2)\n\nIn the examples above list-like ``skiprows`` values are fixed and ``header`` is varied. In the first\nexample with no ``header`` provided, rows 2, 3, 4 are skipped and row 0 is considered as the header.\nIn the second example ``header == 1``, so the zeroth row is skipped and the next available row is\nconsidered the header. The third example illustrates when the ``header`` and ``skiprows`` parameters\nvalues are both present - in this case ``skiprows`` rows are dropped first and then the ``header`` is derived\nfrom the remaining rows (rows before header are skipped too).\n\nIn the examples above only list-like ``skiprows`` and integer ``header`` parameters are considered,\nbut the same logic is applicable for other types of the parameters.\n\nProcessing ``skiprows`` by Modin\n================================\n\nAs it can be seen, skipping rows in the pandas import functions is complicated and distributing\nthis logic across multiple workers can complicate it even more. Thus in some rare corner cases\ndefault pandas implementation is used in Modin to avoid excessive Modin code complication.\n\nModin uses two techniques for skipping rows:\n\n1) During file partitioning (setting file limits that should be read by each partition)\nexact rows can be excluded from partitioning scope, thus they won't be read at all and can be\nconsidered as skipped. This is the most effective way of skipping rows since it doesn't require\nany actual data reading and postprocessing, but in this case ``skiprows`` parameter can be an\ninteger only. When it is possible Modin always uses this approach.\n\n2) Rows for skipping can be dropped after full dataset import. This is more expensive way since\nit requires extra IO work and postprocessing afterwards, but ``skiprows`` parameter can be of any\nnon-integer type supported by ``pandas.read_csv``.\n\nIn some cases, if ``skiprows`` is uniformly distributed array (e.g. [1, 2, 3]), ``skiprows`` can be\n\"squashed\" and represented as an integer to make a fastpath by skipping these rows during file partitioning\n(using the first option). But if there is a gap between the first row for skipping\nand the last line of the header (that will be skipped too since header is read by each partition\nto ensure metadata is defined properly), then this gap should be assigned for reading first\nby assigning the first partition to read these rows by setting ``pre_reading`` parameter.\n\nLet's consider an example of skipping rows during partitioning when ``header=\"infer\"`` and\n``skiprows=[3, 4, 5]``. In this specific case fastpath can be done since ``skiprows`` is uniformly\ndistributed array, so we can \"squash\" it to an integer and set \"partitioning\" skiprows to 3. But\nif no additional action is done, these three rows will be skipped right after header line,\nthat corresponds to ``skiprows=[1, 2, 3]``. To avoid this discrepancy, we need to assign the first\npartition to read data between header line and the first row for skipping by setting special\n``pre_reading`` parameter to 2. Then, after the skipping of rows considered to be skipped during\npartitioning, the rest data will be divided between the rest of partitions, see rows assignment\nbelow:\n\n.. code-block::\n\n  0 - header line (skip during partitioning)\n  1 - pre reading (assign to read by the first partition)\n  2 - pre reading (assign to read by the first partition)\n  3 - \"partitioning\" skiprows (skip during partitioning)\n  4 - \"partitioning\" skiprows (skip during partitioning)\n  5 - \"partitioning\" skiprows (skip during partitioning)\n  6 - data to partition (divide between the rest of partitions)\n  7 - data to partition (divide between the rest of partitions)\n"
  },
  {
    "path": "docs/flow/modin/core/storage_formats/base/query_compiler.rst",
    "content": "BaseQueryCompiler\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nBrief description\n'''''''''''''''''\n:py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` is an abstract class of query compiler, and sets a common interface\nthat every other query compiler implementation in Modin must follow. The Base class contains a basic\nimplementations for most of the interface methods, all of which\n:doc:`fallback to pandas </supported_apis/defaulting_to_pandas>`.\n\nSubclassing :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler`\n'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''\nIf you want to add new type of query compiler to Modin the new class needs to inherit\nfrom :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` and implement the abstract methods:\n\n- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.from_pandas` build query compiler from pandas DataFrame.\n- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.from_arrow` build query compiler from Arrow Table.\n- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.to_pandas` get query compiler representation as pandas DataFrame.\n- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.default_to_pandas` do :doc:`fallback to pandas </supported_apis/defaulting_to_pandas>` for the passed function. \n- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.finalize` finalize object constructing.\n- :py:meth:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler.free` trigger memory cleaning.\n\n(Please refer to the code documentation to see the full documentation for these functions).\n\nThis is a minimum set of operations to ensure a new query compiler will function in the Modin architecture,\nand the rest of the API can safely default to the pandas implementation via the base class implementation. \nTo add a storage format specific implementation for some of the query compiler operations, just override \nthe corresponding method in your query compiler class.\n\nExample\n'''''''\nAs an exercise let's define a new query compiler in `Modin`, just to see how easy it is.\nUsually, the query compiler routes formed queries to the underlying :doc:`frame </flow/modin/core/dataframe/index>` class,\nwhich submits operators to an execution engine. For the sake\nof simplicity and independence of this example, our execution engine will be the pandas itself.\n\nWe need to inherit a new class from :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` and implement all of the abstract methods.\nIn this case, with `pandas` as an execution engine, it's trivial:\n\n.. code-block:: python\n\n    from modin.core.storage_formats import BaseQueryCompiler\n\n    class DefaultToPandasQueryCompiler(BaseQueryCompiler):\n        def __init__(self, pandas_df):\n            self._pandas_df = pandas_df\n\n        @classmethod\n        def from_pandas(cls, df, *args, **kwargs):\n            return cls(df)\n\n        @classmethod\n        def from_arrow(cls, at, *args, **kwargs):\n            return cls(at.to_pandas())\n\n        def to_pandas(self):\n            return self._pandas_df.copy()\n\n        def default_to_pandas(self, pandas_op, *args, **kwargs):\n            return type(self)(pandas_op(self.to_pandas(), *args, **kwargs))\n        \n        def finalize(self):\n            pass\n\n        def free(self):\n            pass\n\nAll done! Now you've got a fully functional query compiler, which is ready for extensions\nand already can be used in Modin DataFrame:\n\n.. code-block:: python\n\n    import pandas\n    pandas_df = pandas.DataFrame({\"col1\": [1, 2, 2, 1], \"col2\": [10, 2, 3, 40]})\n    # Building our query compiler from pandas object\n    qc = DefaultToPandasQueryCompiler.from_pandas(pandas_df)\n\n    import modin.pandas as pd\n    # Building Modin DataFrame from newly created query compiler\n    modin_df = pd.DataFrame(query_compiler=qc)\n\n    # Got fully functional Modin DataFrame\n    >>> print(modin_df.groupby(\"col1\").sum().reset_index())\n       col1  col2\n    0     1    50\n    1     2     5\n\nTo be able to select this query compiler as default via ``modin.config`` you also need\nto define the combination of your query compiler and pandas engine as an execution\nby adding the corresponding factory. To find more information about factories,\nvisit :doc:`dispatching </flow/modin/core/execution/dispatching>` page.\n\nQuery Compiler API\n''''''''''''''''''\n\n.. autoclass:: modin.core.storage_formats.base.query_compiler.BaseQueryCompiler\n    :members:\n"
  },
  {
    "path": "docs/flow/modin/core/storage_formats/index.rst",
    "content": ":orphan:\n\nStorage Formats\n===============\nStorage format is one of the components that form Modin's execution, it describes the type(s)\nof objects that are stored in the partitions of the selected Core Modin Dataframe implementation.\n\nThe base storage format in Modin is pandas. In that format, Modin Dataframe operates with\npartitions that hold ``pandas.DataFrame`` objects. Pandas is the most natural storage format\nsince high-level DataFrame objects mirror its API.\n\nThe storage format + execution engine (Ray, Dask, etc.) form the execution backend. \nThe Query Compiler (QC) converts high-level pandas API calls to queries that are understood \nby the execution backend.\n\n.. _query_compiler_def:\n\nQuery Compiler\n==============\n\n.. toctree::\n    :hidden:\n\n    base/query_compiler\n    pandas/index\n\nModin supports several execution backends (storage format + execution engine). Calling any\nDataFrame API function will end up in some execution-specific method. The query compiler is\na bridge between pandas DataFrame API and the actual Core Modin Dataframe implementation for the\ncorresponding execution.\n\n.. image:: /img/simplified_query_flow.svg\n    :align: right\n    :width: 300px\n\nEach storage format has its own Query Compiler class that implements the most optimal\nquery routing for the selected format.\n\nQuery compilers of all storage formats implement a common API, which is used by the high-level Modin DataFrame\nto support dataframe queries. The role of the query compiler is to translate its API into\na pairing of known user-defined functions and dataframe algebra operators. Each query compiler instance contains a\n:doc:`Core Modin Dataframe </flow/modin/core/dataframe/base/index>` of the selected execution implementation and queries\nit with the compiled queries to get the result. The query compiler object is immutable,\nso the result of every method is a new query compiler.\n\nThe query compilers API is defined by the :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler` class\nand may resemble the pandas API, however, they're not equal. The query compilers API\nis significantly reduced in comparison with pandas, since many corner cases or even the\nwhole methods can be handled at the API layer with the existing API.\n\nThe query compiler is the level where Modin stops distinguishing DataFrame and Series (or column) objects.\nA Series is represented by a `1xN` query compiler, where the Series name is the column label.\nIf Series is unnamed, then the label is ``MODIN_UNNAMED_SERIES_LABEL``, which is equal to ``\"__reduced__\"``. The high-level DataFrame API layer\ninterprets a one-column query compiler as Series or DataFrame depending on the operation context.\n\n.. note::\n    Although we're declaring that there is no difference between DataFrame and Series at the query compiler,\n    you still may find methods like ``method_ser`` and ``method_df`` which are implemented differently because they're\n    emulating either Series or DataFrame logic, or you may find parameters, which indicates whether this one-column\n    query compiler is representing Series or not. All of these are hacks, and we're working on getting rid of them.\n\nHigh-level module overview\n''''''''''''''''''''''''''\n\nThis module houses submodules of all of the stable storage formats:\n\n- :doc:`Base module <base/query_compiler>` contains an abstract query compiler class which defines common API.\n- :doc:`Pandas module <pandas/index>` contains query compiler and text parsers for pandas storage format.\n"
  },
  {
    "path": "docs/flow/modin/core/storage_formats/pandas/index.rst",
    "content": ":orphan:\n\nPandas storage format\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\n.. toctree::\n    :hidden:\n\n    query_compiler\n    parsers\n\nHigh-Level Module Overview\n''''''''''''''''''''''''''\nThis module houses submodules which are responsible for communication between\nthe query compiler level and execution implementation level for pandas storage format:\n\n- :doc:`Query compiler <query_compiler>` is responsible for compiling efficient queries for :doc:`PandasDataframe </flow/modin/core/dataframe/pandas/dataframe>`.\n- :doc:`Parsers <parsers>` are responsible for parsing data on workers during IO operations.\n"
  },
  {
    "path": "docs/flow/modin/core/storage_formats/pandas/parsers.rst",
    "content": "Pandas Parsers Module Description\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\nHigh-Level Module Overview\n''''''''''''''''''''''''''\n\nThis module houses parser classes (classes that are used for data parsing on the workers)\nand util functions for handling parsing results. ``PandasParser`` is base class for parser\nclasses with pandas storage format, that contains methods common for all child classes. Other\nmodule classes implement ``parse`` function that performs parsing of specific format data\nbasing on the chunk information computed in the ``modin.core.io`` module. After\nthe chunk is parsed, the resulting ``DataFrame``-s will be split into smaller\n``DataFrame``-s according to the ``num_splits`` parameter, data type, or number of\nrows/columns in the parsed chunk. These frames, along with some additional metadata, are then returned.\n\n.. note:: \n    If you are interested in the data parsing mechanism implementation details, please refer\n    to the source code documentation.\n\nPublic API\n''''''''''\n\n.. automodule:: modin.core.storage_formats.pandas.parsers\n    :members:\n"
  },
  {
    "path": "docs/flow/modin/core/storage_formats/pandas/query_compiler.rst",
    "content": "PandasQueryCompiler\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` is responsible for compiling\na set of known predefined functions and pairing those with dataframe algebra operators in the\n:doc:`PandasDataframe </flow/modin/core/dataframe/pandas/dataframe>`, specifically for dataframes backed by\n``pandas.DataFrame`` objects.\n\nEach :py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` contains an instance of\n:py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` which it queries to get the result.\n\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` supports methods built by \nthe :doc:`algebra module </flow/modin/core/dataframe/algebra>`.\nIf you want to add an implementation for a query compiler method, visit the algebra module documentation\nto see whether the new operation fits one of the existing function templates and can be easily implemented\nwith them.\n\nPublic API\n''''''''''\n:py:class:`~modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler` implements common query compilers API\ndefined by the :py:class:`~modin.core.storage_formats.base.query_compiler.BaseQueryCompiler`. Some functionalities\nare inherited from the base class, in the following section only overridden methods are presented.\n\n.. autoclass:: modin.core.storage_formats.pandas.query_compiler.PandasQueryCompiler\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/distributed/dataframe/pandas.rst",
    "content": "Pandas partitioning API\n=======================\n\nThis page contains a description of the API to extract partitions from and build Modin Dataframes.\n\nunwrap_partitions\n-----------------\n\n.. autofunction:: modin.distributed.dataframe.pandas.unwrap_partitions\n\nfrom_partitions\n---------------\n.. autofunction:: modin.distributed.dataframe.pandas.from_partitions\n\nExample\n-------\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.distributed.dataframe.pandas import unwrap_partitions, from_partitions\n  import numpy as np\n  data = np.random.randint(0, 100, size=(2 ** 10, 2 ** 8))\n  df = pd.DataFrame(data)\n  partitions = unwrap_partitions(df, axis=0, get_ip=True)\n  print(partitions)\n  new_df = from_partitions(partitions, axis=0)\n  print(new_df)\n"
  },
  {
    "path": "docs/flow/modin/experimental/batch.rst",
    "content": "Batch Pipeline API \n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis API exposes the ability to pipeline row-parallel batch queries on a Modin DataFrame. Currently,\nthis feature is only supported for the ``PandasOnRay`` execution.\n\nAPI\n'''\n\n.. automodule:: modin.experimental.batch.pipeline\n    :members:\n\n"
  },
  {
    "path": "docs/flow/modin/experimental/core/io/index.rst",
    "content": ":orphan:\n\nExperimental IO Module Description\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe module is used mostly for storing experimental utils and\ndispatcher classes for reading/writing files of different formats.\n\nSubmodules Description\n''''''''''''''''''''''\n\n* text - directory for storing all text file format dispatcher classes\n\n  * format/feature specific dispatchers: ``csv_glob_dispatcher.py``,\n    ``custom_text_dispatcher.py``.\n\n* sql - directory for storing SQL dispatcher class\n\n  * format/feature specific dispatchers: ``sql_dispatcher.py``\n\n* pickle - directory for storing Pickle dispatcher class\n\n  * format/feature specific dispatchers: ``pickle_dispatcher.py``\n\nPublic API\n''''''''''\n\n.. automodule:: modin.experimental.core.io\n    :members:\n"
  },
  {
    "path": "docs/flow/modin/experimental/index.rst",
    "content": ":orphan:\n\nExperimental Modules Overview\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nIn some cases Modin can give the user the opportunity to extend (not modify) typical pandas\nAPI or to try new functionality in order to get more flexibility. Depending on the exact\nexperimental feature user may need to install additional packages, change configurations or\nreplace the standard Modin import statement ``import modin.pandas as pd`` with modified version\n``import modin.experimental.pandas as pd``.\n\n``modin.experimental`` holds experimental functionality that is under development right now\nand provides a limited set of functionality:\n\n* :doc:`xgboost <xgboost>`\n* :doc:`sklearn <sklearn>`\n* :doc:`batch <batch>`\n\n\n.. toctree::\n    :hidden:\n\n    sklearn\n    xgboost\n    batch\n"
  },
  {
    "path": "docs/flow/modin/experimental/pandas.rst",
    "content": ":orphan:\n\nExperimental Pandas API\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\n.. automodule:: modin.experimental.pandas\n  :noindex:\n\nExperimental API Reference\n''''''''''''''''''''''''''\n\n.. autofunction:: read_sql\n.. autofunction:: read_csv_glob\n.. autofunction:: read_custom_text\n.. autofunction:: read_pickle_glob\n.. autofunction:: read_parquet_glob\n.. autofunction:: read_json_glob\n.. autofunction:: read_xml_glob\n.. automethod:: modin.pandas.DataFrame.modin::to_pandas\n.. automethod:: modin.pandas.DataFrame.modin::to_ray\n.. automethod:: modin.pandas.DataFrame.modin::to_pickle_glob\n.. automethod:: modin.pandas.DataFrame.modin::to_parquet_glob\n.. automethod:: modin.pandas.DataFrame.modin::to_json_glob\n.. automethod:: modin.pandas.DataFrame.modin::to_xml_glob\n"
  },
  {
    "path": "docs/flow/modin/experimental/range_partitioning_groupby.rst",
    "content": ":orphan:\n\n.. redirect to the new page\n.. raw:: html\n\n    <script type=\"text/javascript\">\n        window.location.href = '../../../usage_guide/optimization_notes/index.html#range-partitioning-in-modin';\n    </script>"
  },
  {
    "path": "docs/flow/modin/experimental/reshuffling_groupby.rst",
    "content": ":orphan:\n\n.. redirect to the new page\n.. raw:: html\n\n    <script type=\"text/javascript\">\n        window.location.href = '../../../usage_guide/optimization_notes/index.html#range-partitioning-in-modin';\n    </script>\n"
  },
  {
    "path": "docs/flow/modin/experimental/sklearn.rst",
    "content": "Scikit-learn module description\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis module holds experimental scikit-learn-specific functionality for Modin.\n\nAPI\n'''\n.. automodule:: modin.experimental.sklearn.model_selection\n    :members:\n\n"
  },
  {
    "path": "docs/flow/modin/experimental/xgboost.rst",
    "content": "Modin XGBoost module description\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\nHigh-level Module Overview\n''''''''''''''''''''''''''\n\nThis module holds classes, public interface and internal functions for distributed XGBoost in Modin.\n\nPublic classes :py:class:`~modin.experimental.xgboost.Booster`, :py:class:`~modin.experimental.xgboost.DMatrix`\nand function :py:func:`~modin.experimental.xgboost.train` provide the user with familiar XGBoost interfaces.\nThey are located in the ``modin.experimental.xgboost.xgboost`` module.\n\nThe internal module ``modin.experimental.xgboost.xgboost.xgboost_ray`` contains the implementation of Modin XGBoost\nfor the Ray execution engine. This module mainly consists of the Ray actor-class :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor`,\na function to distribute Modin's partitions between actors :py:func:`~modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors`,\nan internal :py:func:`~modin.experimental.xgboost.xgboost_ray._train`/:py:func:`~modin.experimental.xgboost.xgboost_ray._predict`\nfunction used from the public interfaces and additional util functions for computing cluster resources, actor creations etc.\n\nPublic interfaces\n'''''''''''''''''\n\n:py:class:`~modin.experimental.xgboost.DMatrix` inherits original class ``xgboost.DMatrix`` and overrides\nits constructor, which currently supports only `data` and `label` parameters. Both of the parameters must\nbe ``modin.pandas.DataFrame``, which will be internally unwrapped to lists of delayed objects of Modin's\nrow partitions using the function :py:func:`~modin.distributed.dataframe.pandas.unwrap_partitions`.\n\n.. autoclass:: modin.experimental.xgboost.DMatrix\n  :members:\n\n:py:class:`~modin.experimental.xgboost.Booster` inherits original class ``xgboost.Booster`` and\noverrides method ``predict``. The difference from original class interface for ``predict``\nmethod is changing the type of the `data` parameter to :py:class:`~modin.experimental.xgboost.DMatrix`.\n\n.. autoclass:: modin.experimental.xgboost.Booster\n    :members:\n\n:py:func:`~modin.experimental.xgboost.train` function has 2 differences from the original ``train`` function - (1) the\ndata type of `dtrain` parameter is :py:class:`~modin.experimental.xgboost.DMatrix` and (2) a new parameter `num_actors`.\n\n.. autofunction:: modin.experimental.xgboost.train\n\nInternal execution flow on Ray engine\n'''''''''''''''''''''''''''''''''''''\n\nInternal functions :py:func:`~modin.experimental.xgboost.xgboost_ray._train` and\n:py:func:`~modin.experimental.xgboost.xgboost_ray._predict` work similar to xgboost.\n\n\nTraining\n********\n\n1. The data is passed to the :py:func:`~modin.experimental.xgboost.xgboost_ray._train`\n   function as a :py:class:`~modin.experimental.xgboost.DMatrix` object. Lists of ``ray.ObjectRef``\n   corresponding to row partitions of Modin DataFrames are extracted by iterating over the \n   :py:class:`~modin.experimental.xgboost.DMatrix`. Example:\n\n   .. code-block:: python\n\n     # Extract lists of row partitions from dtrain (DMatrix object)\n     X_row_parts, y_row_parts = dtrain\n   ..\n\n2. On this step, the parameter `num_actors` is processed. The internal function :py:func:`~modin.experimental.xgboost.xgboost_ray._get_num_actors`\n   examines the value provided by the user. In case the value isn't provided, the `num_actors` will be computed using condition that 1 actor should use maximum 2 CPUs.\n   This condition was chosen for using maximum parallel workers with multithreaded XGBoost training (2 threads\n   per worker will be used in this case).\n\n.. note:: `num_actors` parameter is made available for public function :py:func:`~modin.experimental.xgboost.train` to allow\n  fine-tuning for obtaining the best performance in specific use cases.\n\n3. :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor` objects are created.\n\n4. Data `dtrain` is split between actors evenly. The internal function\n   :py:func:`~modin.experimental.xgboost.xgboost_ray._split_data_across_actors` runs assigning row partitions to actors\n   using internal function :py:func:`~modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors`.\n   This function creates a dictionary in the form: `{actor_rank: ([part_i0, part_i3, ..], [0, 3, ..]), ..}`.\n\n.. note:: :py:func:`~modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors` takes into account IP\n  addresses of row partitions of `dtrain` data to minimize excess data transfer.\n\n5. For each :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor` object ``set_train_data`` method is\n   called remotely. This method runs loading row partitions in actor according to the dictionary with partitions\n   distribution from previous step. When data is passed to the actor, the row partitions are automatically materialized\n   (``ray.ObjectRef`` -> ``pandas.DataFrame``).\n\n6. ``train`` method of :py:class:`~modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor` class object is called remotely. This method\n   runs XGBoost training on local data of actor, connects to ``Rabit Tracker`` for sharing training state between\n   actors and returns dictionary with `booster` and `evaluation results`.\n\n7. At the final stage results from actors are returned. `booster` and `evals_result` are returned using ``ray.get``\n   function from remote actor.\n\n\nPrediction\n**********\n\n1. The data is passed to :py:func:`~modin.experimental.xgboost.xgboost_ray._predict`\n   function as a :py:class:`~modin.experimental.xgboost.DMatrix` object.\n\n2. :py:func:`~modin.experimental.xgboost.xgboost_ray._map_predict` function is applied remotely for each partition\n   of the data to make a partial prediction.\n\n3. Result ``modin.pandas.DataFrame`` is created from ``ray.ObjectRef`` objects, obtained in the previous step.\n\n\nInternal API\n''''''''''''\n.. autoclass:: modin.experimental.xgboost.xgboost_ray.ModinXGBoostActor\n  :members:\n  :private-members:\n\n.. autofunction:: modin.experimental.xgboost.xgboost_ray._assign_row_partitions_to_actors\n.. autofunction:: modin.experimental.xgboost.xgboost_ray._train\n.. autofunction:: modin.experimental.xgboost.xgboost_ray._predict\n.. autofunction:: modin.experimental.xgboost.xgboost_ray._get_num_actors\n.. autofunction:: modin.experimental.xgboost.xgboost_ray._split_data_across_actors\n.. autofunction:: modin.experimental.xgboost.xgboost_ray._map_predict\n"
  },
  {
    "path": "docs/flow/modin/pandas/base.rst",
    "content": "Base pandas Dataset API\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe class implements functionality that is common to Modin's pandas API for both ``DataFrame`` and ``Series`` classes.\n\nPublic API\n----------\n\n.. autoclass:: modin.pandas.base.BasePandasDataset\n  :noindex:\n  :members:\n"
  },
  {
    "path": "docs/flow/modin/pandas/dataframe.rst",
    "content": ":orphan:\n\nDataFrame Module Overview\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin's ``pandas.DataFrame`` API\n''''''''''''''''''''''''''''''''\n\nModin's ``pandas.DataFrame`` API is backed by a distributed object providing an identical\nAPI to pandas. After the user calls some ``DataFrame`` function, this call is internally\nrewritten into a representation that can be processed in parallel by the partitions. These\nresults can be e.g., reduced to single output, identical to the single threaded\npandas ``DataFrame`` method output.\n\n..\n    TODO: add link to the docs with detailed description of queries compilation\n    and execution ater DOCS-#2996 is merged.\n\nPublic API\n----------\n\n.. autoclass:: modin.pandas.dataframe.DataFrame\n\nUsage Guide\n'''''''''''\n\nThe most efficient way to create Modin ``DataFrame`` is to import data from external\nstorage using the highly efficient Modin IO methods (for example using ``pd.read_csv``,\nsee details for Modin IO methods in the :doc:`IO </flow/modin/core/io/index>` page),\nbut even if the data does not originate from a file, any pandas supported data type or\n``pandas.DataFrame`` can be used. Internally, the ``DataFrame`` data is divided into\npartitions, which number along an axis usually corresponds to the number of the user's hardware CPUs. If needed,\nthe number of partitions can be changed by setting ``modin.config.NPartitions``.\n\nLet's consider simple example of creation and interacting with Modin ``DataFrame``:\n\n.. code-block:: python\n\n    import modin.config\n\n    # This explicitly sets the number of partitions\n    modin.config.NPartitions.put(4)\n\n    import modin.pandas as pd\n    import pandas\n\n    # Create Modin DataFrame from the external file\n    pd_dataframe = pd.read_csv(\"test_data.csv\")\n    # Create Modin DataFrame from the python object\n    # data = {f'col{x}': [f'col{x}_{y}' for y in range(100, 356)] for x in range(4)}\n    # pd_dataframe = pd.DataFrame(data)\n    # Create Modin DataFrame from the pandas object\n    # pd_dataframe = pd.DataFrame(pandas.DataFrame(data))\n\n    # Show created DataFrame\n    print(pd_dataframe)\n\n    # List DataFrame partitions. Note, that internal API is intended for\n    # developers needs and was used here for presentation purposes\n    # only.\n    partitions = pd_dataframe._query_compiler._modin_frame._partitions\n    print(partitions)\n\n    # Show the first DataFrame partition\n    print(partitions[0][0].get())\n\n    Output:\n\n    # created DataFrame\n\n            col0      col1      col2      col3\n    0    col0_100  col1_100  col2_100  col3_100\n    1    col0_101  col1_101  col2_101  col3_101\n    2    col0_102  col1_102  col2_102  col3_102\n    3    col0_103  col1_103  col2_103  col3_103\n    4    col0_104  col1_104  col2_104  col3_104\n    ..        ...       ...       ...       ...\n    251  col0_351  col1_351  col2_351  col3_351\n    252  col0_352  col1_352  col2_352  col3_352\n    253  col0_353  col1_353  col2_353  col3_353\n    254  col0_354  col1_354  col2_354  col3_354\n    255  col0_355  col1_355  col2_355  col3_355\n\n    [256 rows x 4 columns]\n\n    # List of DataFrame partitions\n\n    [[<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e607f0>]\n    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e9a4f0>]\n    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e60820>]\n    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e609d0>]]\n\n    # The first DataFrame partition\n    \n            col0      col1      col2      col3\n    0   col0_100  col1_100  col2_100  col3_100\n    1   col0_101  col1_101  col2_101  col3_101\n    2   col0_102  col1_102  col2_102  col3_102\n    3   col0_103  col1_103  col2_103  col3_103\n    4   col0_104  col1_104  col2_104  col3_104\n    ..       ...       ...       ...       ...\n    60  col0_160  col1_160  col2_160  col3_160\n    61  col0_161  col1_161  col2_161  col3_161\n    62  col0_162  col1_162  col2_162  col3_162\n    63  col0_163  col1_163  col2_163  col3_163\n    64  col0_164  col1_164  col2_164  col3_164\n\n    [65 rows x 4 columns]\n\nAs we show in the example above, Modin ``DataFrame`` can be easily created, and supports any input that pandas ``DataFrame`` supports.\nAlso note that tuning of the ``DataFrame`` partitioning can be done by just setting a single config.\n"
  },
  {
    "path": "docs/flow/modin/pandas/series.rst",
    "content": ":orphan:\n\nSeries Module Overview\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin's ``pandas.Series`` API\n'''''''''''''''''''''''''''''\n\nModin's ``pandas.Series`` API is backed by a distributed object providing an identical\nAPI to pandas. After the user calls some ``Series`` function, this call is internally rewritten\ninto a representation that can be processed in parallel by the partitions. These\nresults can be e.g., reduced to single output, identical to the single threaded\npandas ``Series`` method output.\n\n..\n    TODO: add link to the docs with detailed description of queries compilation\n    and execution ater DOCS-#2996 is merged.\n\nPublic API\n----------\n\n.. autoclass:: modin.pandas.series.Series\n\nUsage Guide\n'''''''''''\n\nThe most efficient way to create Modin ``Series`` is to import data from external\nstorage using the highly efficient Modin IO methods (for example using ``pd.read_csv``,\nsee details for Modin IO methods in the :doc:`IO </flow/modin/core/io/index>` page),\nbut even if the data does not originate from a file, any pandas supported data type or\n``pandas.Series`` can be used. Internally, the ``Series`` data is divided into\npartitions, which number along an axis usually corresponds to the number of the user's hardware CPUs. If needed,\nthe number of partitions can be changed by setting ``modin.config.NPartitions``.\n\nLet's consider simple example of creation and interacting with Modin ``Series``:\n\n.. code-block:: python\n\n    import modin.config\n\n    # This explicitly sets the number of partitions\n    modin.config.NPartitions.put(4)\n\n    import modin.pandas as pd\n    import pandas\n\n    # Create Modin Series from the external file\n    pd_series = pd.read_csv(\"test_data.csv\", header=None).squeeze()\n    # Create Modin Series from the python object\n    # pd_series = pd.Series([x for x in range(256)])\n    # Create Modin Series from the pandas object\n    # pd_series = pd.Series(pandas.Series([x for x in range(256)]))\n\n    # Show created `Series`\n    print(pd_series)\n\n    # List `Series` partitions. Note, that internal API is intended for\n    # developers needs and was used here for presentation purposes\n    # only.\n    partitions = pd_series._query_compiler._modin_frame._partitions\n    print(partitions)\n\n    # Show the first `Series` partition\n    print(partitions[0][0].get())\n\n    Output:\n\n    # created `Series`\n\n    0      100\n    1      101\n    2      102\n    3      103\n    4      104\n        ...\n    251    351\n    252    352\n    253    353\n    254    354\n    255    355\n    Name: 0, Length: 256, dtype: int64\n\n    # List of `Series` partitions\n\n    [[<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e607f0>]\n    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e9a4f0>]\n    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e60820>]\n    [<modin.core.execution.ray.implementations.pandas_on_ray.partitioning.partition.PandasOnRayDataframePartition object at 0x7fc554e609d0>]]\n\n    # The first `Series` partition\n    \n        0\n    0   100\n    1   101\n    2   102\n    3   103\n    4   104\n    ..  ...\n    60  160\n    61  161\n    62  162\n    63  163\n    64  164\n\n    [65 rows x 1 columns]\n\nAs we show in the example above, Modin ``Series`` can be easily created, and supports any input that pandas ``Series`` supports.\nAlso note that tuning of the ``Series`` partitioning can be done by just setting a single config.\n"
  },
  {
    "path": "docs/flow/modin/utils.rst",
    "content": ":orphan:\n\nModin Utils\n\"\"\"\"\"\"\"\"\"\"\"\n\nHere are utilities that can be useful when working with Modin.\n\nPublic API\n''''''''''\n\n.. autofunction:: modin.utils.try_cast_to_pandas\n.. autofunction:: modin.utils.execute\n"
  },
  {
    "path": "docs/getting_started/examples.rst",
    "content": "Examples and Resources\n======================\n\nHere you can find additional resources to learn about Modin. To learn more about \nadvanced usage for Modin, please refer to :doc:`Usage Guide </usage_guide/index>` section..\n\nUsage Examples\n''''''''''''''\n\nThe following notebooks demonstrate how Modin can be used for scalable data science:\n\n- Quickstart Guide to Modin [`Source <https://github.com/modin-project/modin/tree/main/examples/quickstart.ipynb>`__]\n- Using Modin with the NYC Taxi Dataset [`Source <https://github.com/modin-project/modin/blob/main/examples/jupyter/Modin_Taxi.ipynb>`__]\n- Modin for Machine Learning with scikit-learn [`Source <https://github.com/modin-project/modin/blob/main/examples/modin-scikit-learn-example.ipynb>`__]\n\nTutorials\n'''''''''\n\nThe following tutorials cover the basic usage of Modin. `Here <https://www.youtube.com/watch?v=NglkafEmbhE>`__ is a one hour video tutorial that walks through these basic exercises.\n\n- Exercise 1: Introduction to Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_1.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_1.ipynb>`__]\n- Exercise 2: Speed Improvements with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_2.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_2.ipynb>`__]\n- Exercise 3: Defaulting to pandas with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_3.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_3.ipynb>`__]\n\nThe following tutorials covers more advanced features in Modin:\n\n- Exercise 4: Experimental Features in Modin (Spreadsheet, Progress Bar) [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_4.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_4.ipynb>`__]\n- Exercise 5: Setting up Modin in a Cluster Environment [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.ipynb>`__]\n- Exercise 6: Running Modin in a Cluster Environment [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_6.ipynb>`__]\n\nHow to get required dependencies for the tutorial notebooks and to run them please refer to the respective `README.md <https://github.com/modin-project/modin/tree/main/examples/tutorial/jupyter/README.md>`__ file.\n\nTalks & Podcasts\n''''''''''''''''\n\n- `Scaling Interactive Data Science with Modin and Ray <https://www.youtube.com/watch?v=ycSf1IbBGWk>`_ (20 minute, Ray Summit 2021)\n- `Unleash The Power Of Dataframes At Any Scale With Modin <https://www.pythonpodcast.com/modin-parallel-dataframe-episode-324/>`_  (40 minute, Python Podcast 2021)\n- `[Russian] Distributed Data Processing and XGBoost Training and Prediction with Modin <https://www.youtube.com/watch?v=oo_lxUjsFTM&t=1s>`_ (30 minute, PyCon Russia 2021)\n- `[Russian] Efficient Data Science with Modin <https://www.youtube.com/watch?v=cOM82kHRwkM&t=6568s>`_ (30 minute, ISP RAS Open 2021)\n- `Modin: Scaling the Capabilities of the Data Scientist, not the Machine <https://www.youtube.com/watch?v=NglkafEmbhE>`_ (1 hour, RISE Camp 2020)\n- `Modin: Pandas Scalability with Devin Petersohn <https://softwareengineeringdaily.com/2020/07/23/modin-pandas-scalability-with-devin-petersohn/>`_ (1 hour, Software Engineering Daily Podcast 2020)\n- `Introduction to the DataFrame and Modin <https://www.youtube.com/watch?v=_0eVVLXrtfY>`_ (20 minute, RISECamp 2019)\n- `Scaling Interactive Pandas Workflows with Modin <https://www.youtube.com/watch?v=-HjLd_3ahCw>`_ (40 minute, PyData NYC 2018)\n\nCommunity contributions\n'''''''''''''''''''''''\n\nHere are some blogposts and articles about Modin:\n\n- `Anaconda Blog: Scale your pandas workflow with Modin by Vasilij Litvinov <https://www.anaconda.com/blog/scale-your-pandas-workflow-with-modin>`_\n- `The Modin view of Scaling Pandas by Devin Petersohn <https://towardsdatascience.com/the-modin-view-of-scaling-pandas-825215533122>`_\n- `Data Science at Scale with Modin by Areg Melik-Adamyan <https://medium.com/intel-analytics-software/data-science-at-scale-with-modin-5319175e6b9a>`_\n- `Speed up Pandas using Modin by Eric D. Brown, D.Sc. <https://pythondata.com/quick-tip-speed-up-pandas-using-modin/>`_\n- `Explore Python Libraries: Make Your DataFrames Parallel With Modin by Zachary Bennett <https://www.pluralsight.com/guides/explore-python-libraries:-make-your-dataframes-parallel-with-modin>`_\n- `Get faster pandas with Modin, even on your laptops by Parul Pandey <https://towardsdatascience.com/get-faster-pandas-with-modin-even-on-your-laptops-b527a2eeda74>`_\n- `How to speedup pandas by changing one line of code by Shrivarsheni <https://www.machinelearningplus.com/python/modin-speedup-pandas/>`_\n- `How To Accelerate Pandas With Just One Line Of Code by Analytics India <https://analyticsindiamag.com/how-to-accelerate-pandas-with-just-one-line-of-code-modin/>`_\n- `An Easy Introduction to Modin: A Step-by-Step Guide to Accelerating Pandas by Intel <https://www.intel.com/content/www/us/en/developer/articles/technical/modin-step-by-step-guide-to-accelerating-pandas.html#gs.c69er5>`_\n\n\nHere are some articles contributed by the international community:\n\n- `[Chinese] 用 Modin 来提速 pandas 工作流程 by Python Chinese Community <https://blog.csdn.net/BF02jgtRS00XKtCx/article/details/90709222>`_\n- `[German] Was ist Modin? by Dipl.-Ing. (FH) Stefan Luber <https://www.bigdata-insider.de/was-ist-modin-a-982826/>`_\n- `[Russian] Ускоряем Pandas при помощи модуля modin by Разработка <https://vc.ru/dev/187095-uskoryaem-pandas-pri-pomoshchi-modulya-modin>`_\n- `[Korean] modin 으로 pandas 더 빠르게 사용하기 by 분석뉴비 <https://data-newbie.tistory.com/279>`_\n\nIf you would like your articles to be featured here, please `submit a pull request <https://github.com/modin-project/modin/pulls>`_ to let us know!\n"
  },
  {
    "path": "docs/getting_started/faq.rst",
    "content": "Frequently Asked Questions (FAQs)\n=================================\n\nBelow, you will find answers to the most commonly asked questions about\nModin. If you still cannot find the answer you are looking for, please post your\nquestion on the #support channel on our Slack_ community or open a Github issue_.\n\nFAQs: Why choose Modin?\n-----------------------\n\nWhat’s wrong with pandas and why should I use Modin?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nWhile pandas works extremely well on small datasets, as soon as you start working with\nmedium to large datasets that are more than a few GBs, pandas can become painfully\nslow or run out of memory. This is because pandas is single-threaded. In other words,\nyou can only process your data with one core at a time. This approach does not scale to\nlarger data sets and adding more hardware does not lead to more performance gain.\n\nThe :py:class:`~modin.pandas.dataframe.DataFrame` is a highly\nscalable, parallel DataFrame. Modin transparently distributes the data and computation so\nthat you can continue using the same pandas API while being able to work with more data faster.\nModin lets you use all the CPU cores on your machine, and because it is lightweight, it\noften has less memory overhead than pandas. See :doc:` Why Modin? </getting_started/why_modin/pandas>`\npage to learn more about how Modin is different from pandas.\n\nWhy not just improve pandas?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\npandas is a massive community and well established codebase. Many of the issues\nwe have identified and resolved with pandas are fundamental to its current\nimplementation. While we would be happy to donate parts of Modin that\nmake sense in pandas, many of these components would require significant (or\ntotal) redesign of the pandas architecture. Modin's architecture goes beyond\npandas, which is why the pandas API is just a thin layer at the user level. To learn\nmore about Modin's architecture, see the :doc:`architecture </development/architecture>` documentation.\n\nHow much faster can I go with Modin compared to pandas?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin is designed to scale with the amount of hardware available.\nEven in a traditionally serial task like ``read_csv``, we see large gains by efficiently\ndistributing the work across your entire machine. Because it is so light-weight,\nModin provides speed-ups of up to 4x on a laptop with 4 physical cores. This speedup scales\nefficiently to larger machines with more cores. We have several published papers_ that\ninclude performance results and comparisons against pandas.\n\nHow much more data would I be able to process with Modin?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nOften data scientists have to use different tools for operating on datasets of different sizes.\nThis is not only because processing large dataframes is slow, but also pandas does not support working\nwith dataframes that don't fit into the available memory. As a result, pandas workflows that work well\nfor prototyping on a few MBs of data do not scale to tens or hundreds of GBs (depending on the size\nof your machine). Modin supports operating on data that does not fit in memory, so that you can comfortably\nwork with hundreds of GBs without worrying about substantial slowdown or memory errors. For more information,\nsee :doc:`out-of-memory support </getting_started/why_modin/out_of_core>` for Modin.\n\nHow does Modin compare to Dask DataFrame and Koalas?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nTLDR: Modin has better coverage of the pandas API, has a flexible backend, better ordering semantics,\nand supports both row and column-parallel operations.\nCheck out :doc:`Modin vs Dask vs Koalas </getting_started/why_modin/modin_vs_dask_vs_koalas>` page detailing\nthe differences!\n\nHow does Modin work under the hood?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin is logically separated into different layers that represent the hierarchy of a\ntypical Database Management System. User queries which perform data transformation,\ndata ingress or data egress pass through the Modin Query Compiler which translates\nqueries from the top-level pandas API Layer that users interact with to the Modin Core\nDataframe layer.\nThe Modin Core DataFrame is our efficient DataFrame implementation that utilizes a partitioning schema\nwhich allows for distributing tasks and queries. From here, the Modin DataFrame works with engines like\nRay, Dask or Unidist to execute computation, and then return the results to the user.\n\nFor more details, take a look at our system :doc:`architecture </development/architecture>`.\n\nFAQs: How to use Modin?\n-----------------------\n\nIf I’m only using my laptop, can I still get the benefits of Modin?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nAbsolutely! Unlike other parallel DataFrame systems, Modin is an extremely\nlight-weight, robust DataFrame. Because it is so light-weight, Modin provides\nspeed-ups of up to 4x on a laptop with 4 physical cores\nand allows you to work on data that doesn't fit in your laptop's RAM.\n\nHow do I use Jupyter or Colab notebooks with Modin?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nYou can take a look at this Google Colab installation guide_ and\nthis notebook tutorial_. Once Modin is installed, simply replace your pandas\nimport with Modin import:\n\n.. code-block:: python\n\n    # import pandas as pd\n    import modin.pandas as pd\n\nWhich execution engine (Ray, Dask or Unidist) should I use for Modin?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin lets you effortlessly speed up your pandas workflows with either Ray_'s, Dask_'s or Unidist_'s execution engine.\nYou don't need to know anything about either engine in order to use it with Modin. If you only have one engine\ninstalled, Modin will automatically detect which engine you have installed and use that for scheduling computation.\nIf you don't have a preference, we recommend starting with Modin's default Ray engine.\nIf you want to use a specific compute engine, you can set the environment variable ``MODIN_ENGINE``\nand Modin will do computation with that engine:\n\n.. code-block:: bash\n\n    pip install \"modin[ray]\" # Install Modin dependencies and Ray to run on Ray\n    export MODIN_ENGINE=ray  # Modin will use Ray\n\n    pip install \"modin[dask]\" # Install Modin dependencies and Dask to run on Dask\n    export MODIN_ENGINE=dask  # Modin will use Dask\n\n    pip install \"modin[mpi]\" # Install Modin dependencies and MPI to run on MPI through unidist.\n    export MODIN_ENGINE=unidist  # Modin will use Unidist\n    export UNIDIST_BACKEND=mpi   # Unidist will use MPI backend.\n\nThis can also be done with:\n\n.. code-block:: python\n\n    import modin.config as modin_cfg\n    import unidist.config as unidist_cfg\n\n    modin_cfg.Engine.put(\"ray\")  # Modin will use Ray\n    modin_cfg.Engine.put(\"dask\")  # Modin will use Dask\n\n    modin_cfg.Engine.put('unidist') # Modin will use Unidist\n    unidist_cfg.Backend.put('mpi') # Unidist will use MPI backend\n\nWe plan to support more execution engines in future. If you have a specific request,\nplease post on the #feature-requests channel on our Slack_ community.\n\nHow do I connect Modin to a database via `read_sql`?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nTo read from a SQL database, you have two options:\n\n1) Pass a connection string, e.g. ``postgresql://reader:NWDMCE5xdipIjRrp@hh-pgsql-public.ebi.ac.uk:5432/pfmegrnargs``\n2) Pass an open database connection, e.g. for psycopg2, ``psycopg2.connect(\"dbname=pfmegrnargs user=reader password=NWDMCE5xdipIjRrp host=hh-pgsql-public.ebi.ac.uk\")``\n\nThe first option works with both Modin and pandas. If you try the second option\nin Modin, Modin will default to pandas because open database connections cannot be pickled.\nPickling is required to send connection details to remote workers.\nTo handle the unique requirements of distributed database access, Modin has a distributed\ndatabase connection called ``ModinDatabaseConnection``:\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    from modin.db_conn import ModinDatabaseConnection\n    con = ModinDatabaseConnection(\n        'psycopg2',\n        host='hh-pgsql-public.ebi.ac.uk',\n        dbname='pfmegrnargs',\n        user='reader',\n        password='NWDMCE5xdipIjRrp')\n    df = pd.read_sql(\"SELECT * FROM rnc_database\",\n            con,\n            index_col=None,\n            coerce_float=True,\n            params=None,\n            parse_dates=None,\n            chunksize=None)\n\n\nThe ``ModinDatabaseConnection`` will save any arguments you supply it and forward\nthem to the workers to make their own connections.\n\nHow can I contribute to Modin?\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\n**Modin is currently under active development. Requests and contributions are welcome!**\n\nIf you are interested in contributing please check out the :doc:`Contributing Guide</development/contributing>`\nand then refer to the :doc:`Development Documentation</development/index>`,\nwhere you can find system architecture, internal implementation details, and other useful information.\nAlso check out the `Github`_ to view open issues and make contributions.\n\n.. _issue: https://github.com/modin-project/modin/issues\n.. _Slack: https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA\n.. _Github: https://github.com/modin-project/modin\n.. _Ray: https://github.com/ray-project/ray/\n.. _Dask: https://github.com/dask/dask\n.. _Unidist: https://github.com/modin-project/unidist\n.. _papers: https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf\n.. _guide: https://modin.readthedocs.io/en/latest/getting_started/installation.html#installing-on-google-colab\n.. _tutorial: https://github.com/modin-project/modin/tree/main/examples/tutorial\n"
  },
  {
    "path": "docs/getting_started/installation.rst",
    "content": "=============\nInstallation\n=============\n\n.. note:: \n  | *Estimated Reading Time: 15 minutes*\n  | If you already installed Modin on your machine, you can skip this section.\n\nThere are several ways to install Modin. Most users will want to install with\n``pip`` or using ``conda`` tool, but some users may want to build from the main branch\non the `GitHub repo`_. The main branch has the most recent patches, but may be less\nstable than a release installed from ``pip`` or ``conda``.\n\nInstalling with pip\n-------------------\n\nStable version\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin can be installed with ``pip`` on Linux, Windows and MacOS. \nTo install the most recent stable release run the following:\n\n.. code-block:: bash\n\n  pip install -U modin # -U for upgrade in case you have an older version\n\nModin can be used with :doc:`Ray</development/using_pandas_on_ray>`, :doc:`Dask</development/using_pandas_on_dask>`,\n:doc:`Unidist</development/using_pandas_on_mpi>` engines.\nIf you don't have Ray_, Dask_ or Unidist_ installed, you will need to install Modin with one of the targets:\n\n.. code-block:: bash\n\n  pip install \"modin[ray]\" # Install Modin dependencies and Ray to run on Ray\n  pip install \"modin[dask]\" # Install Modin dependencies and Dask to run on Dask\n  pip install \"modin[mpi]\" # Install Modin dependencies and MPI to run on MPI through unidist\n  pip install \"modin[all]\" # Install Ray and Dask\n\nTo get Modin on MPI through unidist (as of unidist 0.5.0) fully working\nit is required to have a working MPI implementation installed beforehand.\nOtherwise, installation of ``modin[mpi]`` may fail. Refer to\n`Installing with pip`_ section of the unidist documentation for more details about installation.\n\n**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: ``ray`` instead of ``ray[default]``.\nThis means that the dashboard and cluster launcher are no longer installed by default.\nIf you need those, consider installing ``ray[default]`` along with ``modin[ray]``.\n\nModin will automatically detect which engine you have installed and use that for\nscheduling computation!\n\nRelease candidates\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nBefore most major releases, we will upload a release candidate to test and check if there are any problems. If you would like to install a pre-release of Modin, run the following:\n\n.. code-block:: bash\n\n  pip install --pre modin\n\nThese pre-releases are uploaded for dependencies and users to test their existing code\nto ensure that it still works. If you find something wrong, please raise an issue_ or\nemail the bug reporter: bug_reports@modin.org.\n\nInstalling specific dependency sets\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin has a number of specific dependency sets for running Modin on different execution engines and\nstorage formats or for different functionalities of Modin. Here is a list of dependency sets for Modin:\n\n.. code-block:: bash\n\n  pip install \"modin[ray]\" # If you want to use the Ray execution engine\n\n.. code-block:: bash\n\n  pip install \"modin[dask]\" # If you want to use the Dask execution engine\n\n.. code-block:: bash\n\n  pip install \"modin[mpi]\" # If you want to use MPI through unidist execution engine\n\n\nConsortium Standard-compatible implementation based on Modin\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\n.. code-block:: bash\n\n  pip install \"modin[consortium-standard]\"\n\n\nInstalling on Google Colab\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin can be used with Google Colab_ via the ``pip`` command, by running the following code in a new cell:\n\n.. code-block:: bash\n\n  !pip install \"modin[all]\"\n\nSince Colab preloads several of Modin's dependencies by default, we need to restart the Colab environment once Modin is installed by either clicking on the :code:`\"RESTART RUNTIME\"` button in the installation output or by run the following code:\n\n.. code-block:: python\n\n  # Post-install automatically kill and restart Colab environment\n  import os\n  os.kill(os.getpid(), 9)\n\nOnce you have restarted the Colab environment, you can use Modin in Colab in subsequent sessions.\n\nNote that on the free version of Colab, there is a `limit on the compute resource <https://research.google.com/colaboratory/faq.html>`_. To leverage the full power of Modin, you may have to upgrade to Colab Pro to get access to more compute resources.\n\nInstalling with conda\n---------------------\n\nUsing conda-forge channel\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin releases can be installed using ``conda`` from conda-forge channel. Starting from 0.10.1\nit is possible to install modin with chosen engine(s) alongside. Current options are:\n\n+---------------------------------+---------------------------+-----------------------------+\n| **Package name in conda-forge** | **Engine(s)**             | **Supported OSs**           |\n+---------------------------------+---------------------------+-----------------------------+\n| modin                           | Dask_                     |   Linux, Windows, MacOS     |\n+---------------------------------+---------------------------+-----------------------------+\n| modin-dask                      | Dask                      |   Linux, Windows, MacOS     |\n+---------------------------------+---------------------------+-----------------------------+\n| modin-ray                       | Ray_                      |       Linux, Windows        |\n+---------------------------------+---------------------------+-----------------------------+\n| modin-mpi                       | MPI_ through unidist_     |   Linux, Windows, MacOS     |\n+---------------------------------+---------------------------+-----------------------------+\n| modin-all                       | Dask, Ray, Unidist        |          Linux              |\n+---------------------------------+---------------------------+-----------------------------+\n\n**Note:** Since Modin 0.30.0 we use a reduced set of Ray dependencies: ``ray-core`` instead of ``ray-default``.\nThis means that the dashboard and cluster launcher are no longer installed by default.\nIf you need those, consider installing ``ray-default`` along with ``modin-ray``.\n\nFor installing Dask, Ray and MPI through unidist engines into conda environment following command should be used:\n\n.. code-block:: bash\n\n  conda install -c conda-forge modin-ray modin-dask modin-mpi\n\nAll set of engines could be available in conda environment by specifying:\n\n.. code-block:: bash\n\n  conda install -c conda-forge modin-all\n\nor explicitly:\n\n.. code-block:: bash\n\n  conda install -c conda-forge modin-ray modin-dask modin-mpi\n\nRefer to `Installing with conda`_ section of the unidist documentation\nfor more details on how to install a specific MPI implementation to run on.\n\n``conda`` may be slow installing ``modin-all`` or combitations of execution engines so we currently recommend using libmamba solver for the installation process.\nTo do this install it in a base environment:\n\n.. code-block:: bash\n\n  conda install -n base conda-libmamba-solver\n\nThen it can be used during installation either like\n\n.. code-block:: bash\n\n  conda install -c conda-forge modin-ray modin- --experimental-solver=libmamba\n\nor starting from conda 22.11 and libmamba solver 22.12 versions\n\n.. code-block:: bash\n\n  conda install -c conda-forge modin-ray --solver=libmamba\n\n\nInstalling from the GitHub main branch\n--------------------------------------\n\nIf you'd like to try Modin using the most recent updates from the main branch, you can\nalso use ``pip``.\n\n.. code-block:: bash\n\n  pip install \"modin[all] @ git+https://github.com/modin-project/modin\"\n\nThis will install directly from the repo without you having to manually clone it! Please be aware\nthat these changes have not made it into a release and may not be completely stable.\n\nIf you would like to install Modin with a specific engine, you can use ``modin[ray]`` or ``modin[dask]`` or ``modin[mpi]`` instead of ``modin[all]`` in the command above.\n\nWindows\n-------\n\nAll Modin engines are available both on Windows and Linux as mentioned above.\nDefault engine on Windows is :doc:`Ray</development/using_pandas_on_ray>`.\nIt is also possible to use Windows Subsystem For Linux (WSL_), but this is generally \nnot recommended due to the limitations and poor performance of Ray on WSL, a roughly \n2-3x worse than native Windows. \n\nBuilding Modin from Source\n--------------------------\n\nIf you're planning on :doc:`contributing </development/contributing>` to Modin, you will need to ensure that you are\nbuilding Modin from the local repository that you are working off of. Occasionally,\nthere are issues in overlapping Modin installs from pypi and from source. To avoid these\nissues, we recommend uninstalling Modin before you install from source:\n\n.. code-block:: bash\n\n  pip uninstall modin\n\nTo build from source, you first must clone the repo. We recommend forking the repository first\nthrough the GitHub interface, then cloning as follows:\n\n.. code-block:: bash\n\n  git clone https://github.com/<your-github-username>/modin.git\n\nOnce cloned, ``cd`` into the ``modin`` directory and use ``pip`` to install:\n\n.. code-block:: bash\n\n  cd modin\n  pip install -e .\n  pip install -e \".[all]\"  # will install dependencies for all engines\n\n.. _`GitHub repo`: https://github.com/modin-project/modin/tree/main\n.. _issue: https://github.com/modin-project/modin/issues\n.. _WSL: https://docs.microsoft.com/en-us/windows/wsl/install-win10\n.. _Ray: http://ray.readthedocs.io\n.. _Dask: https://github.com/dask/dask\n.. _MPI: https://www.mpi-forum.org/\n.. _Unidist: https://github.com/modin-project/unidist\n.. _`Installing with pip`: https://unidist.readthedocs.io/en/latest/installation.html#installing-with-pip\n.. _`Installing with conda`: https://unidist.readthedocs.io/en/latest/installation.html#installing-with-conda\n.. _`Intel Distribution of Modin`: https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/distribution-of-modin.html#gs.86stqv\n.. _`Intel Distribution of Modin Getting Started`: https://www.intel.com/content/www/us/en/developer/articles/technical/intel-distribution-of-modin-getting-started-guide.html\n.. |reg|    unicode:: U+000AE .. REGISTERED SIGN\n.. _Colab: https://colab.research.google.com/\n"
  },
  {
    "path": "docs/getting_started/quickstart.rst",
    "content": "Getting Started\n===============\n\n.. note:: \n  | *Estimated Reading Time: 10 minutes*\n  | You can follow along this tutorial in a Jupyter notebook `here <https://github.com/modin-project/modin/tree/main/examples/quickstart.ipynb>`_. \n\n.. toctree::\n    :hidden:\n    :maxdepth: 4\n    \n    10-min Quickstart Guide <self>\n    installation\n    using_modin/using_modin\n    why_modin/why_modin\n    examples\n    faq\n    troubleshooting\n\n.. meta::\n    :description lang=en:\n        Introduction to Modin.\n\nQuick Start Guide\n-----------------\n\nTo install the most recent stable release for Modin run the following:\n\n.. code-block:: bash\n\n  pip install \"modin[all]\" \n\nFor further instructions on how to install Modin with conda or for specific platforms \nor engines, see our detailed `installation guide <../getting_started/installation.html>`_.\n\nModin acts as a drop-in replacement for pandas so you simply have to replace the import \nof pandas with the import of Modin as follows to speed up your pandas workflows:\n\n.. code-block:: bash\n\n  # import pandas as pd\n  import modin.pandas as pd\n\nExample: Instant Scalability with No Extra Effort\n-------------------------------------------------\n\nWhen working on large datasets, pandas becomes painfully slow or :doc:`runs out of memory</getting_started/why_modin/out_of_core>`. Modin automatically scales up your \npandas workflows by parallelizing the dataframe operations, so that you can more \neffectively leverage the compute resources available.\n\nFor the purpose of demonstration, we will load in modin as ``pd`` and pandas as \n``pandas``.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import pandas\n\n  #############################################\n  ### For the purpose of timing comparisons ###\n  #############################################\n  import time\n  import ray\n  # Look at the Ray documentation with respect to the Ray configuration suited to you most.\n  ray.init()\n  #############################################\n\nIn this toy example, we look at the NYC taxi dataset, which is around 200MB in size. You can download `this dataset <https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv>`_ to run the example locally.\n\n.. code-block:: python\n\n  # This may take a few minutes to download\n  import urllib.request\n  dataset_url = \"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\"\n  urllib.request.urlretrieve(dataset_url, \"taxi.csv\")  \n\nFaster Data Loading with ``read_csv``\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. code-block:: python\n   \n  start = time.time()\n\n  pandas_df = pandas.read_csv(dataset_url, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n\n  end = time.time()\n  pandas_duration = end - start\n  print(\"Time to read with pandas: {} seconds\".format(round(pandas_duration, 3)))\n\nBy running the same command ``read_csv`` with Modin, we generally get around 4X speedup \nfor loading in the data in parallel. \n\n.. code-block:: python\n\n  start = time.time()\n\n  modin_df = pd.read_csv(dataset_url, parse_dates=[\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"], quoting=3)\n\n  end = time.time()\n  modin_duration = end - start\n  print(\"Time to read with Modin: {} seconds\".format(round(modin_duration, 3)))\n\n  print(\"Modin is {}x faster than pandas at `read_csv`!\".format(round(pandas_duration / modin_duration, 2)))\n\nFaster ``concat`` across multiple dataframes\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nOur previous ``read_csv`` example operated on a relatively small dataframe. In the \nfollowing example, we duplicate the same taxi dataset 100 times and then concatenate \nthem together, resulting in a dataset around 19GB in size.\n\n.. code-block:: python\n\n  start = time.time()\n\n  big_pandas_df = pandas.concat([pandas_df for _ in range(25)])\n\n  end = time.time()\n  pandas_duration = end - start\n  print(\"Time to concat with pandas: {} seconds\".format(round(pandas_duration, 3)))\n\n.. code-block:: python\n\n  start = time.time()\n\n  big_modin_df = pd.concat([modin_df for _ in range(25)])\n\n  end = time.time()\n  modin_duration = end - start\n  print(\"Time to concat with Modin: {} seconds\".format(round(modin_duration, 3)))\n\n  print(\"Modin is {}x faster than pandas at `concat`!\".format(round(pandas_duration / modin_duration, 2)))\n\nModin speeds up the ``concat`` operation by more than 60X, taking less than a second to \ncreate the large dataframe, while pandas took close to a minute.\n\n\nFaster ``apply`` over a single column\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nThe performance benefits of Modin become apparent when we operate on large \ngigabyte-scale datasets. Let's say we want to round up values \nacross a single column via the ``apply`` operation. \n\n.. code-block:: python\n\n  start = time.time()\n  rounded_trip_distance_pandas = big_pandas_df[\"trip_distance\"].apply(round)\n\n  end = time.time()\n  pandas_duration = end - start\n  print(\"Time to apply with pandas: {} seconds\".format(round(pandas_duration, 3)))\n\n.. code-block:: python\n  \n  start = time.time()\n\n  rounded_trip_distance_modin = big_modin_df[\"trip_distance\"].apply(round)\n\n  end = time.time()\n  modin_duration = end - start\n  print(\"Time to apply with Modin: {} seconds\".format(round(modin_duration, 3)))\n\n  print(\"Modin is {}x faster than pandas at `apply` on one column!\".format(round(pandas_duration / modin_duration, 2)))\n\nModin is more than 30X faster at applying a single column of data, operating on 130+ \nmillion rows in a second.\n\nIn short, Modin provides orders of magnitude speed up over pandas for a variety of operations out of the box. \n\n.. figure:: ../img/quickstart_speedup.svg\n   :align: center\n\nSummary\n-------\n\nHopefully, this tutorial demonstrated how Modin delivers significant speedup on pandas \noperations without the need for any extra effort. Throughout example, we moved from \nworking with 100MBs of data to 20GBs of data all without having to change anything or \nmanually optimize our code to achieve the level of scalable performance that Modin \nprovides.\n\nNote that in this quickstart example, we've only shown ``read_csv``, ``concat``, \n``apply``, but these are not the only pandas operations that Modin optimizes for. In \nfact, Modin covers `more than 90\\% of the pandas API <https://github.com/modin-project/modin/blob/main/README.md#pandas-api-coverage>`_, yielding considerable speedups for \nmany common operations.\n"
  },
  {
    "path": "docs/getting_started/troubleshooting.rst",
    "content": "Troubleshooting\n===============\n\nWe hope your experience with Modin is bug-free, but there are some quirks about Modin\nthat may require troubleshooting. If you are still having issues, please post on\nthe #support channel on our Slack_ community or open a Github issue_.\n\nFrequently encountered issues\n-----------------------------\n\nThis is a list of the most frequently encountered issues when using Modin. Some of these\nare working as intended, while others are known bugs that are being actively worked on.\n\nWarning during execution: ``defaulting to pandas``\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nPlease note, that while Modin covers a large portion of the pandas API, not all functionality is implemented. For methods that are not yet implemented, such as ``asfreq``, you may see the following:\n\n.. code-block:: text\n\n  UserWarning: `DataFrame.asfreq` defaulting to pandas implementation.\n\nTo understand which functions will lead to this warning, we have compiled a list of :doc:`currently supported methods </supported_apis/index>`. When you see this warning, Modin defaults to pandas by converting the Modin dataframe to pandas to perform the operation. Once the operation is complete in pandas, it is converted back to a Modin dataframe. These operations will have a high overhead due to the communication involved and will take longer than pandas. When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. You can learn more about this :doc:`here </supported_apis/defaulting_to_pandas>`.\n\nIf you would like to request a particular method be implemented, feel free to open an\n`issue`_. Before you open an issue please make sure that someone else has not already\nrequested that functionality.\n\nHanging on ``import modin.pandas as pd``\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThis can happen when Ray fails to start. It will keep retrying, but often it is faster\nto just restart the notebook or interpreter. Generally, this should not happen. Most\ncommonly this is encountered when starting multiple notebooks or interpreters in quick\nsuccession.\n\n**Solution**\n\nRestart your interpreter or notebook kernel.\n\n**Avoiding this Error**\n\nAvoid starting many Modin notebooks or interpreters in quick succession. Wait 2-3\nseconds before starting the next one.\n\nImporting heterogeneous data using ``read_csv``\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nSince Modin's ``read_csv`` imports data in parallel, it is possible for data across\npartitions to be heterogeneously typed (this can happen when columns contain\nheterogeneous data, i.e. values in the same column are of different types). An example\nof how this is handled is shown below.\n\n.. code-block:: python\n\n  import os\n  import pandas\n  import modin.pandas as pd\n  from modin.config import NPartitions\n\n  NPartitions.put(2)\n\n  test_filename = \"test.csv\"\n  # data with heterogeneous values in the first column\n  data = \"\"\"one,2\n  3,4\n  5,6\n  7,8\n  9.0,10\n  \"\"\"\n  kwargs = {\n      # names of the columns to set, if `names` parameter is set,\n      # header inffering from the first data row/rows will be disabled\n      \"names\": [\"col1\", \"col2\"],\n\n      # explicit setting of data type of column/columns with heterogeneous\n      # data will force partitions to read data with correct dtype\n      # \"dtype\": {\"col1\": str},\n  }\n\n\n  try :\n      with open(test_filename, \"w\") as f:\n          f.write(data)\n\n      pandas_df = pandas.read_csv(test_filename, **kwargs)\n      pd_df = pd.read_csv(test_filename, **kwargs)\n\n      print(pandas_df)\n      print(pd_df)\n  finally:\n      os.remove(test_filename)\n\n  Output:\n\n  pandas_df:\n    col1  col2\n  0  one     2\n  1    3     4\n  2    5     6\n  3    7     8\n  4  9.0    10\n\n  pd_df:\n    col1  col2\n  0  one     2\n  1    3     4\n  2    5     6\n  3  7.0     8\n  4  9.0    10\n\n\nIn this case, ``col1`` of the `DataFrame` read by pandas contains only ``str`` data\nbecause the first value (\"one\") is inferred to have type ``str``, which forces pandas to handle the rest of the values in the column\nas strings. The first Modin partition (the first three rows) handles the data as pandas does,\nbut the second partition (the last two rows) reads the data as floats. This is because the\nsecond column contains an int and a float, and thus the column type is inferred to be float. As a\nresult, `7` is interpreted as `7.0`, which differs from the pandas output.\n\nThe above example demonstrates heterogenous data import with str, int, and float types,\nbut heterogeneous data consisting of other data/parameter combinations can also result in \ndata type mismatches with pandas.\n\n**Solution**\n\nWhen heterogeneous data is detected, a warning will be raised.\nCurrently, these discrepancies aren't properly handled\nby Modin, so to avoid this issue, you need to set the ``dtype`` parameter of ``read_csv``\nmanually to force the correct data type coercion during data import. Note that \nto avoid excessive performance degradation, the ``dtype`` value should only be set for columns that may contain heterogenous data.\nas possible (specify ``dtype`` parameter only for columns with heterogeneous data).\n\nSpecifying the ``dtype`` parameter will work well in most cases. If the file\ncontains a column that should be interpreted as the index\n(the ``index_col`` parameter is specified) there may still be type discrepancies in the index, since the ``dtype`` parameter is only responsible for data\nfields. If in the above example, ``kwargs`` was set like so:\n\n.. code-block:: python\n\n  kwargs = {\n      \"names\": [\"col1\", \"col2\"],\n      \"dtype\": {\"col1\": str},\n      \"index_col\": \"col1\",\n  }\n\nThe resulting Modin DataFrame will contain incorrect values - just as if ``dtype``\nhad not been specified:\n\n.. code-block:: python\n\n  col1\n  one      2\n  3        4\n  5        6\n  7.0      8\n  9.0     10\n\nOne workaround is to import the data without setting the ``index_col`` parameter, and then \nset the index column using the ``DataFrame.set_index`` function as shown in\nthe example below:\n\n.. code-block:: python\n\n  pd_df = pd.read_csv(filename, dtype=data_dtype, index_col=None)\n  pd_df = pd_df.set_index(index_col_name)\n  pd_df.index.name = None\n\n\nUsing Modin with python multiprocessing\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nWe strongly recommend against using a distributed execution engine (e.g. Ray or Dask)\nin conjunction with Python multiprocessing because that can lead to undefined behavior.\nOne such example is shown below:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n\n  # Ray engine is used by default\n  df = pandas.DataFrame([1, 2, 3])\n\n  def f(arg):\n    return df + arg\n\n  if __name__ == '__main__':\n    from multiprocessing import Pool\n\n    with Pool(5) as p:\n        print(p.map(f, [1]))\n\nAlthough this example may work on your machine, we do not recommend it, because\nthe Python multiprocessing library will duplicate Ray clusters, causing both\nexcessive resource usage and conflict over the available resources.\n\nPoor performance of the first operation with Modin on Ray engine\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThere might be cases when the first operation with Modin on Ray engine is much slower than the subsequent calls of the operation.\nThat happens because Ray workers may not be fully set up yet to perform computation after initialization of the engine\nwith ``ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})``, which is the default behavior of Modin on Ray engine\nif Ray has not been initialised yet. Modin intentionaly initializes Ray this way to import ``pandas`` in workers\nonce Python interpreter is started in them so that to avoid a race condition in Ray between the import thread and the thread executing the code.\n\n..\n      See more details on why we started using ``ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})` in\n      https://github.com/modin-project/modin/pull/4603.\n\n.. code-block:: python\n\n  import time\n  import pandas\n  import numpy as np\n  import ray\n  import modin.pandas as pd\n  import modin.config as cfg\n\n  # Look at the Ray documentation with respect to the Ray configuration suited to you most.\n  ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})\n\n  pandas_df = pandas.DataFrame(\n    np.random.randint(0, 100, size=(1000000, 13))\n  )\n  pandas_df.to_csv(\"foo.csv\", index=False)\n\n  def read_csv_with_pandas():\n    start_time = time.time()\n    pandas_df = pandas.read_csv(\"foo.csv\", index_col=0)\n    end_time = time.time()\n    pandas_duration = end_time - start_time\n    print(\"Time to read_csv with pandas: {} seconds\".format(round(pandas_duration, 3)))\n    return pandas_df\n\n  def read_csv_with_modin():\n    start_time = time.time()\n    modin_df = pd.read_csv(\"foo.csv\", index_col=0)\n    end_time = time.time()\n    modin_duration = end_time - start_time\n    print(\"Time to read_csv with Modin: {} seconds\".format(round(modin_duration, 3))) \n    return modin_df\n\n  for i in range(5):\n    pandas_df = read_csv_with_pandas()\n    modin_df = read_csv_with_modin()\n\n  Time to read_csv with pandas: 0.708 seconds\n  Time to read_csv with Modin: 4.132 seconds\n  Time to read_csv with pandas: 0.735 seconds\n  Time to read_csv with Modin: 0.37 seconds\n  Time to read_csv with pandas: 0.646 seconds\n  Time to read_csv with Modin: 0.377 seconds\n  Time to read_csv with pandas: 0.673 seconds\n  Time to read_csv with Modin: 0.371 seconds\n  Time to read_csv with pandas: 0.672 seconds\n  Time to read_csv with Modin: 0.379 seconds\n\n**Solution**\n\nSo far there is no a solution to fix or work around the problem rather than not to pass a non-empty runtime_env to ``ray.init()``.\nHowever, this may lead to other problem regarding a race condition in Ray between the import thread and the thread executing the code.\nSo for now we just highlight the problem in hope of a future fix in Ray itself.\n\nAlso, it is worth noting that every distributed engine by its nature has a little overhead for the first operation being called,\nwhich may be important for microbenchmarks. What you likely want to do is warm up worker processes\neither by excluding the time of the first iteration from your measurements or execute a simple function in workers to fully set up them.\n\nCommon errors\n-------------\n\nError when using Dask engine: ``RuntimeError: if __name__ == '__main__':``\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nThe following `script.py` uses Modin with Dask as an execution engine and produces errors:\n\n.. code-block:: python\n\n  # script.py\n  import modin.pandas as pd\n  import modin.config as cfg\n\n  cfg.Engine.put(\"dask\")\n\n  df = pd.DataFrame([0,1,2,3])\n  print(df)\n\nA part of the produced errors by the script above would be the following:\n\n.. code-block::\n\n  File \"/path/python3.9/multiprocessing/spawn.py\", line 134, in _check_not_importing_main\n    raise RuntimeError('''\n    RuntimeError: \n        An attempt has been made to start a new process before the\n        current process has finished its bootstrapping phase.\n\n        This probably means that you are not using fork to start your\n        child processes and you have forgotten to use the proper idiom\n        in the main module:\n\n            if __name__ == '__main__':\n                freeze_support()\n                ...\n\n        The \"freeze_support()\" line can be omitted if the program\n        is not going to be frozen to produce an executable.\n\nThis happens because Dask Client uses `fork <https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods>`_\nto start processes.\n\n**Solution**\n\nTo avoid the problem the Dask Client creation code needs to be moved into the ``__main__`` scope of the module.\n\nThe corrected `script.py` would look like:\n\n.. code-block:: python\n\n  # script.py\n  import modin.pandas as pd\n  import modin.config as cfg\n\n  cfg.Engine.put(\"dask\")\n\n  if __name__ == \"__main__\":\n    df = pd.DataFrame([0, 1, 2, 3]) # Dask Client creation is hidden in the first call of Modin functionality.\n    print(df)\n\nor\n\n.. code-block:: python\n\n  # script.py\n  from distributed import Client\n  import modin.pandas as pd\n  import modin.config as cfg\n\n  cfg.Engine.put(\"dask\")\n\n  if __name__ == \"__main__\":\n    # Explicit Dask Client creation.\n    # Look at the Dask Distributed documentation with respect to the Client configuration suited to you most.\n    client = Client()\n    df = pd.DataFrame([0, 1, 2, 3])\n    print(df)\n\nSpurious error \"cannot import partially initialised pandas module\" on custom Ray cluster\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nIf you're using some pre-configured Ray cluster to run Modin, it's possible you would\nbe seeing spurious errors like\n\n.. code-block::\n\n  ray.exceptions.RaySystemError: System error: partially initialized module 'pandas' has no attribute 'core' (most likely due to a circular import)\n  traceback: Traceback (most recent call last):\n    File \"/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py\", line 340, in deserialize_objects\n      obj = self._deserialize_object(data, metadata, object_ref)\n    File \"/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py\", line 237, in _deserialize_object\n      return self._deserialize_msgpack_data(data, metadata_fields)\n    File \"/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py\", line 192, in _deserialize_msgpack_data\n      python_objects = self._deserialize_pickle5_data(pickle5_data)\n    File \"/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/ray/serialization.py\", line 180, in _deserialize_pickle5_data\n      obj = pickle.loads(in_band, buffers=buffers)\n    File \"/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/pandas/__init__.py\", line 135, in <module>\n      from pandas import api, arrays, errors, io, plotting, testing, tseries\n    File \"/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/pandas/testing.py\", line 6, in <module>\n      from pandas._testing import (\n    File \"/usr/share/miniconda/envs/modin/lib/python3.8/site-packages/pandas/_testing/__init__.py\", line 979, in <module>\n      cython_table = pd.core.common._cython_table.items()\n  AttributeError: partially initialized module 'pandas' has no attribute 'core' (most likely due to a circular import)\n\n**Solution**\n\nModin contains a workaround that should automatically do ``import pandas`` upon worker process starts.\n\nIt is triggered by the presence of non-empty ``__MODIN_AUTOIMPORT_PANDAS__`` environment variable which\nModin sets up automatically on the Ray clusters it spawns, but it might be missing on pre-configured clusters.\n\nSo if you're seeing the issue like shown above, please make sure you set this environment variable on all\nworker nodes of your cluster before actually spawning the workers.\n\n.. _issue: https://github.com/modin-project/modin/issues\n.. _Slack: https://modin.org/slack.html\n"
  },
  {
    "path": "docs/getting_started/using_modin/using_modin.rst",
    "content": "Using Modin\n===========\n\nIn this section, we show how Modin can be used to accelerate your pandas workflows on a \nsingle machine up to multiple machines in a cluster setting.\n\n.. toctree::\n    :maxdepth: 4\n    \n    using_modin_locally\n    using_modin_cluster\n    "
  },
  {
    "path": "docs/getting_started/using_modin/using_modin_cluster.rst",
    "content": "Using Modin in a Cluster\n========================\n\n.. note::\n  | *Estimated Reading Time: 15 minutes*\n\nOften in practice we have a need to exceed the capabilities of a single machine.\nModin works and performs well in both local mode and in a cluster environment.\nThe key advantage of Modin is that your python code does not change between\nlocal development and cluster execution. Users are not required to think about\nhow many workers exist or how to distribute and partition their data;\nModin handles all of this seamlessly and transparently.\n\n.. note::\n   It is possible to use a Jupyter notebook, but you will have to deploy a Jupyter server \n   on the remote cluster head node and connect to it.\n\n.. image:: ../../img/modin_cluster.png\n   :alt: Modin cluster\n   :align: center\n\nExtra requirements for AWS authentication\n-----------------------------------------\n\nFirst of all, install the necessary dependencies in your environment:\n\n.. code-block:: bash\n\n   pip install boto3\n\nThe next step is to setup your AWS credentials. One can set  ``AWS_ACCESS_KEY_ID``, \n``AWS_SECRET_ACCESS_KEY`` and ``AWS_SESSION_TOKEN`` (Optional)\n(refer to `AWS CLI environment variables`_ to get more insight on this) or  \njust run the following command:\n\n.. code-block:: bash\n\n   aws configure\n\nStarting and connecting to the cluster\n--------------------------------------\n\nThis example starts 1 head node (m5.24xlarge) and 5 worker nodes (m5.24xlarge), 576 total CPUs.\nYou can check the `Amazon EC2 pricing`_ page.\n\nIt is possble to manually create AWS EC2 instances and configure them or just use the `Ray CLI`_ to \ncreate and initialize a Ray cluster on AWS using `Modin's Ray cluster setup config`_,\nwhich we are going to utilize in this example.\nRefer to `Ray's autoscaler options`_ page on how to modify the file.\n\nMore details on how to launch a Ray cluster can be found on `Ray's cluster docs`_.\n\nTo start up the Ray cluster, run the following command in your terminal:\n\n.. code-block:: bash\n\n   ray up modin-cluster.yaml\n\nOnce the head node has completed initialization, you can optionally connect to it by running the following command.\n\n.. code-block:: bash\n\n   ray attach modin-cluster.yaml\n\nTo exit the ssh session and return back into your local shell session, type:\n\n.. code-block:: bash\n\n   exit\n\nExecuting in a cluster environment\n----------------------------------\n\n.. note::\n   Be careful when using the `Ray client`_ to connect to a remote cluster.\n   We don't recommend this connection mode, beacuse it may not work. Known bugs:\n   - https://github.com/ray-project/ray/issues/38713,\n   - https://github.com/modin-project/modin/issues/6641.\n\nModin lets you instantly speed up your workflows with a large data by scaling pandas\non a cluster. In this tutorial, we will use a 12.5 GB ``big_yellow.csv`` file that was\ncreated by concatenating a 200MB `NYC Taxi dataset`_ file 64 times. Preparing this\nfile was provided as part of our `Modin's Ray cluster setup config`_.\n\nIf you want to use the other dataset, you should provide it to each of\nthe cluster nodes with the same path. We recomnend doing this by customizing the\n``setup_commands`` section of the `Modin's Ray cluster setup config`_.\n\nTo run any script in a remote cluster, you need to submit it to the Ray. In this way,\nthe script file is sent to the the remote cluster head node and executed there. \n\nIn this tutorial, we provide the `exercise_5.py`_ script, which reads the data from the\nCSV file and executes such pandas operations as count, groupby and map.\nAs the result, you will see the size of the file being read and the execution time of the entire script.\n\nYou can submit this script to the existing remote cluster by running the following command.\n\n.. code-block:: bash\n\n   ray submit modin-cluster.yaml exercise_5.py\n\nTo download or upload files to the cluster head node, use ``ray rsync_down`` or ``ray rsync_up``.\nIt may help if you want to use some other Python modules that should be available to\nexecute your own script or download a result file after executing the script.\n\n.. code-block:: bash\n\n   # download a file from the cluster to the local machine:\n   ray rsync_down modin-cluster.yaml '/path/on/cluster' '/local/path'\n   # upload a file from the local machine to the cluster:\n   ray rsync_up modin-cluster.yaml '/local/path' '/path/on/cluster'\n\nShutting down the cluster\n--------------------------\n\nNow that we have finished the computation, we need to shut down the cluster with `ray down` command.\n\n.. code-block:: bash\n\n   ray down modin-cluster.yaml\n\n.. _`Ray's autoscaler options`: https://docs.ray.io/en/latest/cluster/vms/references/ray-cluster-configuration.html#cluster-config\n.. _`Ray's cluster docs`: https://docs.ray.io/en/latest/cluster/getting-started.html\n.. _`NYC Taxi dataset`: https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\n.. _`Modin's Ray cluster setup config`: https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/modin-cluster.yaml\n.. _`Amazon EC2 pricing`: https://aws.amazon.com/ec2/pricing/on-demand/\n.. _`exercise_5.py`: https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.py\n.. _`Ray client`: https://docs.ray.io/en/latest/cluster/running-applications/job-submission/ray-client.html\n.. _`Ray CLI`: https://docs.ray.io/en/latest/cluster/vms/getting-started.html#running-applications-on-a-ray-cluster\n.. _`AWS CLI environment variables`: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html"
  },
  {
    "path": "docs/getting_started/using_modin/using_modin_locally.rst",
    "content": "===================\nUsing Modin Locally\n===================\n\n.. note::\n  | *Estimated Reading Time: 5 minutes*\n  | You can follow along this tutorial in the `Jupyter notebook`_.\n\nIn our quickstart example, we have already seen how you can achieve considerable\nspeedup from Modin, even on a single machine. Users do not need to know how many\ncores their system has, nor do they need to specify how to distribute the data. In fact,\nusers can **continue using their existing pandas code** while experiencing a\nconsiderable speedup from Modin, even on a single machine.\n\nTo use Modin on a single machine, only a modification of the import statement is needed.\nOnce you've changed your import statement, you're ready to use Modin\njust like you would pandas, since the API is identical to pandas.\n\n.. code-block:: python\n\n  # import pandas as pd\n  import modin.pandas as pd\n\n**That's it. You're ready to use Modin on your previous pandas workflows!**\n\nAdvanced: Configuring the resources Modin uses\n----------------------------------------------\n\nModin automatically check the number of CPUs available on your machine and sets the\nnumber of partitions to be equal to the number of CPUs. You can verify this by running\nthe following code:\n\n.. code-block:: python\n\n   import modin\n   print(modin.config.NPartitions.get()) #prints 16 on a laptop with 16 physical cores\n\nModin fully utilizes the resources on your machine. To read more about how this works,\nsee :doc:`Why Modin? </getting_started/why_modin/pandas/>` page for more details.\n\nSince Modin will use all of the resources available on your machine by default, at\ntimes, it is possible that you may like to limit the amount of resources Modin uses to\nfree resources for another task or user. Here is how you would limit the number of CPUs\nModin used in your bash environment variables:\n\n.. code-block:: bash\n\n   export MODIN_CPUS=4\n\n\nYou can also specify this in your python script with ``os.environ``:\n\n.. code-block:: python\n\n   import os\n   os.environ[\"MODIN_CPUS\"] = \"4\"\n   import modin.pandas as pd\n\nIf you're using a specific engine and want more control over the environment Modin\nuses, you can start Ray or Dask in your environment and Modin will connect to it.\n\n.. code-block:: python\n\n   import ray\n   ray.init(num_cpus=4)\n   import modin.pandas as pd\n\nSpecifying ``num_cpus`` limits the number of processors that Modin uses. You may also\nspecify more processors than you have available on your machine; however this will not\nimprove the performance (and might end up hurting the performance of the system).\n\n.. note::\n   Make sure to update the ``MODIN_CPUS`` configuration and initialize your preferred\n   engine before you start working with the first operation using Modin! Otherwise,\n   Modin will opt for the default setting.\n\n\n.. _`Jupyter notebook`: https://github.com/modin-project/modin/tree/main/examples/quickstart.ipynb\n"
  },
  {
    "path": "docs/getting_started/why_modin/modin_vs_dask_vs_koalas.rst",
    "content": "Modin vs. Dask DataFrame vs. Koalas\n===================================\n\nLibraries such as `Dask DataFrame <https://docs.dask.org/en/stable/dataframe.html>`_ (DaskDF for short) and `Koalas <https://koalas.readthedocs.io/en/latest/>`_ aim to support the pandas API on top of distributed computing frameworks, Dask and Spark respectively. Instead, Modin aims to preserve the pandas API and behavior as is, while abstracting away the details of the distributed computing framework underneath. Thus, the aims of these libraries are fundamentally different.\n\nSpecifically, Modin enables pandas-like\n\n* row and column-parallel operations, unlike DaskDF and Koalas that only support row-parallel operations\n* indexing & ordering semantics, unlike DaskDF and Koalas that deviate from these semantics\n* eager execution, unlike DaskDF and Koalas that provide lazy execution\n\nAs a result, Modin's coverage is `more than 90% <https://github.com/modin-project/modin#pandas-api-coverage>`_ of the pandas API, while DaskDF and Koalas' coverage is about 55%. \n\n.. figure:: ../../img/api_coverage_comparison.svg\n   :align: center\n   :alt: Percentage coverage of the pandas API after deduplication\n\nFor more technical details please see our VLDB 2022 research paper, referenced `here <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_. \n\nBrief Overview of DaskDF and Koalas\n-----------------------------------\n\nDask's `DataFrame <https://docs.dask.org/en/stable/dataframe.html>`_ (DaskDF) is effectively a meta-DataFrame, partitioning and scheduling many smaller ``pandas.DataFrame`` objects. Users construct a task graph of dataframe computation step by step and then trigger computation using the ``compute`` function.\n\nSpark's `Koalas <https://koalas.readthedocs.io/en/latest/>`_ provides the pandas API on Spark, leveraging the preexisting Spark SQL optimizer to execute select pandas commands. Like DaskDF, Koalas also employs lazy computation, only triggering computation when the user requests to see the results.\n\nPartitioning and Parallelization\n--------------------------------\n\nModin, DaskDF, Koalas are all examples of parallel dataframe systems. Parallelism is achieved by partitioning a large dataframe into smaller ones that can be operated on in parallel. As a result, the partitioning scheme chosen by the system dictates the pandas functions that can or can not be supported.\n\n**DaskDF and Koalas only support row-oriented partitioning and parallelism.** This approach is analogous to relational databases. The dataframe is conceptually broken down into horizontal partitions along rows, where each partition is independently processed if possible. When DaskDF or Koalas are required to perform column-parallel operations that to be done on columns independently (e.g., dropping columns with null values via ``dropna`` on the column ``axis``), they either perform very poorly with no parallelism or do not support that operation.\n\n**Modin supports both row, column, and cell-oriented partitioning and parallelism**. That is, the dataframe can be conceptually broken down as groups of rows, groups of columns, or both groups of rows and groups of columns (effectively a block or sub-matrix). Modin will transparently reshape the partitioning as necessary for the corresponding operation, based on whether the operation is row-parallel, column-parallel, or cell-parallel (independently applied to each unit cell). This allows Modin to support more of the pandas API and do so efficiently. Due to the finer-grained control over the partitioning, Modin can support a number of operations that are very challenging to parallelize in row-oriented systems (e.g., ``transpose``, ``median``, ``quantile``). This flexibility in partitioning also gives Modin tremendous power to implement efficient straggler mitigation and improve utilization over the entire cluster.\n\nAPI Coverage\n------------\n\nOne of the key benefits of pandas is its versatility, due to the wide array of operations, with more than 600+ API operations for data cleaning, feature engineering, data transformation, data summarization, data exploration, and machine learning. However, it is not trivial to develop scalable implementations of each of these operations in a dataframe system.\n**DaskDF and Koalas only implements about** `55%  <https://arxiv.org/abs/2001.00888>`_ **of the pandas API**; they do not implement certain APIs that would deviate from the row-wise partitioning approach, or would be inefficient with the row-wise parallelization. For example, Dask does not implement ``iloc``, ``MultiIndex``, ``apply(axis=0)``, ``quantile`` (only approximate quantile is available), ``median``, and more. Given DaskDF's row-oriented architecture, ``iloc``, for example, can technically be implemented, but it would be inefficient, and column-wise operations such as ``apply(axis=0)`` would be impossible to implement. Similarly, Koalas does not implement ``apply(axis=0)`` (it only applies the function per row partition, giving a different result), ``quantile``, ``median`` (only approximate quantile/median is available), ``MultiIndex``, ``combine``, ``compare`` and more.\n\n**Modin supports all of the above pandas API functions, as well as others, with** `more than 90% <https://github.com/modin-project/modin#pandas-api-coverage>`_ **coverage of the pandas API.**  Modin additionally acts as a drop-in replacement for pandas, such that even if the API is not yet supported, it still works by falling back to running vanilla pandas. One of the key features of being a drop-in replacement is that not only will it work for existing code, if a user wishes to go back to running pandas directly, they are not locked in to using Modin and can switch between Modin and pandas at no cost. In other words, scripts and notebooks written in Modin can be converted to and from pandas as the user desires by simply replacing the import statement.\n\nExecution Semantics\n---------------------\n\n**DaskDF and Koalas make use of lazy evaluation, which means that the computation is delayed until users explicitly evaluate the results.** This mode of evaluation places a lot of optimization responsibility on the user, forcing them to think about when it would be useful to inspect the intermediate results or delay doing so. Specifically, DaskDF's API differs from pandas in that it requires users to explicitly call ``.compute()`` to materialize the result of the computation. Often if that computation corresponds to a long chain of operators, this call can take a very long time to execute. Overall, the need to explicitly trigger computation makes the API less convenient to work with, but gives DaskDF and Koalas the opportunity to perform holistic optimizations over the entire dataflow graph. However, to the best of our knowledge, neither DaskDF nor Koalas actually leverage holistic optimizations.\n\n**Modin employs eager evaluation, like pandas.** Eager evaluation is the default mode of operation for data scientists when working with pandas in an interactive environment, such as Jupyter Notebooks. Modin reproduces this familiar behavior by performing all computations eagerly as soon as it is issued, so that users can inspect intermediate results and quickly see the results of their computations without having to wait or explicitly trigger computation. This is especially useful during interactive data analysis, where users often iterate on their dataframe workflows or build up their dataframe queries in an incremental fashion. We also have developed techniques for `opportunistic evaluation <https://arxiv.org/pdf/2103.02145.pdf>`_ that bridges the gap between lazy and eager evaluation that will be incorporated in Modin in the future.\n\nOrdering Semantics\n------------------\n\nBy default, pandas preserves the order of the dataframe, so that users can expect a consistent, ordered view as they are operating on their dataframe. \n\n**Both DaskDF and Koalas make no guarantees about the order of rows in the DataFrame.**  This is because DaskDF sorts the ``index`` for optimization purposes to speed up computations that involve the row index; and as a result, it does not support user-specified order. Likewise, Koalas `does not support ordering <https://koalas.readthedocs.io/en/latest/whatsnew/v0.27.0.html#head-ordering>`_ by default because it will lead to a performance overhead when operating on distributed datasets. \n\n**DaskDF additionally does not support multi-indexing or sorting.** \nDaskDF sorts the data based on a single set of row labels for fast row lookups, and builds an indexing structure based on these row labels. Data is both logically and physically stored in the same order. As a result, DaskDF does not support a `sort` function.\n\n**Modin reproduces the intuitive behavior in pandas where the order of the DataFrame is preserved, and supports multi-indexing.** Enforcing ordering on a parallel dataframe system like Modin requires non-trivial effort that involves decoupling of the logical and physical representation of the data, enabling the order to be lazily kept up-to-date, but eagerly computed based on user needs (See Section 4.2 in `our recent paper <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_). Modin abstracts away the physical representation of the data and provides an ordered view that is consistent with user's expectations.\n\nCompatibility with Computational Frameworks\n-------------------------------------------\n\n**DaskDF and Koalas are meant to be run on Dask and Spark respectively.** They are highly tuned to the corresponding frameworks, and cannot be ported to other computational frameworks.\n\n**Modin's highly modular design is architected to run on a variety of systems, and support a variety of APIs.** The goal for the extensible design is that users can take the same notebook or script and seamlessly move between different clusters and environments, with Modin being able to support the pandas API on your preexisting infrastructure. Currently, Modin support running on Dask's compute engine in addition to Ray. The modular design makes it easier for developers to different execution engines or compile to different memory formats. Modin can run on a Dask cluster in the same way that DaskDF can, but they differ in the ways described above. In addition, Modin is continually expanding to support popular data processing APIs (SQL in addition to pandas, among other DSLs for data processing) while leveraging the same underlying execution framework. Modin's flexible architecture also means that as the `pandas API continues to evolve <https://data-apis.org/blog/announcing_the_consortium/>`_, Modin can quickly move towards supporting new versions of the pandas API.\n\n.. figure:: ../../img/performance-all-supported.svg\n   :align: center\n   :alt: Scalability of operators supported by Modin and other systems\n   :width: 95%\n\nPerformance Comparison\n----------------------\n\n**On operations supported by all systems, Modin provides substantial speedups.** Thanks to its optimized design, Modin is able to take advantage of multiple cores relative to both Koalas and DaskDF to efficiently execute pandas operations. It is notable that Koalas is often slower than pandas, due to the overhead of Spark. \n\n.. figure:: ../../img/performance-not-all-supported.svg\n   :align: center\n   :alt: Scalability of operators supported by Modin but not by other systems\n\n**Modin provides substantial speedups even on operators not supported by other systems.** Thanks to its flexible partitioning schemes that enable it to support the vast majority of pandas operations — be it row, column, or cell-oriented - Modin provides benefits on operations such as ``join``, ``median``, and ``infer_types``. While Koalas performs ``join`` slower than Pandas, Dask failed to support ``join`` on more than 20M rows, likely due poor support for `shuffles <https://coiled.io/blog/better-shuffling-in-dask-a-proof-of-concept/>`_. Details of the benchmark and additional join experiments can be found in `our paper <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_.\n\n.. _documentation: http://docs.dask.org/en/latest/DataFrame.html#design.\n.. _Modin's documentation: https://modin.readthedocs.io/en/latest/development/architecture.html\n"
  },
  {
    "path": "docs/getting_started/why_modin/out_of_core.rst",
    "content": "Out-of-memory data with Modin\n=============================\n\n.. note::\n  | *Estimated Reading Time: 10 minutes*\n  \nWhen using pandas, you might run into a memory error if you are working with large datasets that cannot fit in memory or perform certain memory-intensive operations (e.g., joins). \n\nModin solves this problem by spilling over to disk, in other words, it uses your disk as an overflow for memory so that you can work with datasets that are too large to fit in memory. By default, Modin leverages out-of-core methods to handle datasets that don't fit in memory for both Ray and Dask engines.\n\n.. note::\n  Object spilling is disabled in a multi-node Ray cluster by default. To enable object spilling\n  use `Ray instruction <https://docs.ray.io/en/latest/ray-core/objects/object-spilling.html#cluster-mode>`_.\n\n\nMotivating Example: Memory error with pandas\n--------------------------------------------\n\npandas makes use of in-memory data structures to store and operate on data, which means that if you have a dataset that is too large to fit in memory, it will cause an error on pandas. As an example, let's creates a 80GB DataFrame by appending together 40 different 2GB DataFrames. \n\n.. code-block:: python\n\n  import pandas\n  import numpy as np\n  df = pandas.concat([pandas.DataFrame(np.random.randint(0, 100, size=(2**20, 2**8))) for _ in range(40)]) # Memory Error!\n\nWhen we run this on a laptop with 32GB of RAM, pandas will run out of memory and throw an error (e.g., :code:`MemoryError` , :code:`Killed: 9`). \n\nThe `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/scale.html>`_ has a great section on recommendations for scaling your analysis to these larger datasets. However, this generally involves loading in less data or rewriting your pandas code to process the data in smaller chunks. \n\nOperating on out-of-memory data with Modin\n------------------------------------------\n\nIn order to work with data that exceeds memory constraints, you can use Modin to handle these large datasets.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import numpy as np\n  df = pd.concat([pd.DataFrame(np.random.randint(0, 100, size=(2**20, 2**8))) for _ in range(40)]) # 40x2GB frames -- Working!\n  df.info()\n\nNot only does Modin let you work with datasets that are too large to fit in memory, we can perform various operations on them without worrying about memory constraints. \n\nAdvanced: Configuring out-of-core settings\n------------------------------------------\n\n.. why would you want to disable out of core?\n\nBy default, out-of-core functionality is enabled by the compute engine selected. \nTo disable it, start your preferred compute engine with the appropriate arguments. For example:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import ray\n\n  ray.init(_plasma_directory=\"/tmp\")  # setting to disable out of core in Ray\n  df = pd.read_csv(\"some.csv\")\n\nIf you are using Dask, you have to modify local configuration files. Visit the\nDask documentation_ on object spilling for more details.\n\n\n.. _documentation: https://distributed.dask.org/en/latest/worker.html#memory-management\n"
  },
  {
    "path": "docs/getting_started/why_modin/pandas.rst",
    "content": "How does Modin differ from pandas?\n==================================\n\n.. note:: \n  | *Estimated Reading Time: 10 minutes*\n\nIn the earlier tutorials, we have seen how Modin can be used to speed up pandas workflows. Here, we discuss at a high level how Modin works, in particular, how Modin's dataframe implementation differs from pandas. \n\nScalablity of implementation\n----------------------------\n\nModin exposes the pandas API through ``modin.pandas``, but it does not inherit the same pitfalls and design decisions that make it difficult to scale. \nThe pandas implementation is inherently single-threaded. This means that only one of\nyour CPU cores can be utilized at any given time. In a laptop, it would look something\nlike this with pandas:\n\n.. image:: /img/pandas_multicore.png\n   :alt: pandas is single threaded!\n   :align: center\n   :scale: 80%\n\nHowever, Modin's implementation enables you to use all of the cores on your machine, or\nall of the cores in an entire cluster. On a laptop, it will look something like this:\n\n.. image:: /img/modin_multicore.png\n   :alt: modin uses all of the cores!\n   :align: center\n   :scale: 80%\n\nThe additional utilization leads to improved performance, however if you want to scale\nto an entire cluster, Modin suddenly looks something like this:\n\n.. image:: /img/modin_cluster.png\n   :alt: modin works on a cluster too!\n   :align: center\n   :scale: 30%\n\nModin is able to efficiently make use of all of the hardware available to it!\n\nMemory usage and immutability\n-----------------------------\n\nThe pandas API contains many cases of \"inplace\" updates, which are known to be\ncontroversial. This is due in part to the way pandas manages memory:  the user may\nthink they are saving memory, but pandas is usually copying the data whether an\noperation was inplace or not.\n\nModin allows for inplace semantics, but the underlying data structures within Modin's\nimplementation are immutable, unlike pandas. This immutability gives Modin the ability\nto internally chain operators and better manage memory layouts, because they will not\nbe changed. This leads to improvements over pandas in memory usage in many common cases,\ndue to the ability to share common memory blocks among all dataframes.\n\nModin provides the inplace semantics by having a mutable pointer to the immutable\ninternal Modin dataframe. This pointer can change, but the underlying data cannot, so\nwhen an inplace update is triggered, Modin will treat it as if it were not inplace and\njust update the pointer to the resulting Modin dataframe.\n\nAPI vs implementation\n---------------------\n\nIt is well known that the pandas API contains many duplicate ways of performing the same\noperation. Modin instead enforces that any one behavior have one and only one\nimplementation internally. This guarantee enables Modin to focus on and optimize a\nsmaller code footprint while still guaranteeing that it covers the entire pandas API.\nModin has an internal algebra, which is roughly 15 operators, narrowed down from the\noriginal >200 that exist in pandas. The algebra is grounded in both practical and\ntheoretical work. Learn more in our `VLDB 2020 paper`_. More information about this\nalgebra can be found in the :doc:`architecture </development/architecture>` documentation.\n\n.. _VLDB 2020 paper: https://arxiv.org/abs/2001.00888\n"
  },
  {
    "path": "docs/getting_started/why_modin/why_modin.rst",
    "content": "Why Modin?\n==========\n\nIn this section, we explain the design and motivation behind Modin and why you should use Modin to scale up your pandas workflows. We first describe the architectural differences between pandas and Modin. Then we describe how Modin can also help resolve out-of-memory issues common to pandas. Finally, we look at the key differences between Modin and other distributed dataframe libraries. \n\n.. toctree::\n    :maxdepth: 4\n    \n    pandas\n    out_of_core\n    modin_vs_dask_vs_koalas\n\nModin is built on many years of research and development at UC Berkeley. For more information on how this works underneath the hoods, check out our publications in this space:\n\n- `Flexible Rule-Based Decomposition and Metadata Independence in Modin <https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf>`_ (VLDB 2021)\n- `Enhancing the Interactivity of Dataframe Queries by Leveraging Think Time <https://arxiv.org/pdf/2103.02145.pdf>`_ (IEEE Data Eng 2021)\n- `Dataframe Systems: Theory, Architecture, and Implementation <https://www2.eecs.berkeley.edu/Pubs/TechRpts/2021/EECS-2021-193.pdf>`_ (PhD Dissertation 2021)\n- `Scaling Data Science does not mean Scaling Machines <http://cidrdb.org/cidr2021/papers/cidr2021_abstract11.pdf>`_ (CIDR 2021)\n- `Towards Scalable Dataframe Systems <https://arxiv.org/pdf/2001.00888.pdf>`_ (VLDB 2020)\n"
  },
  {
    "path": "docs/index.rst",
    "content": ".. image:: img/MODIN_ver2_hrz.png\n   :width: 400px\n   :alt: modin logo\n   :align: center\n\n====\n\n.. toctree::\n   :hidden:\n\n   getting_started/quickstart\n   usage_guide/index\n   supported_apis/index\n   development/index\n   ecosystem\n   contact\n\n.. raw:: html\n\n    <p align=\"center\"><b>To use Modin, replace the pandas import:</b></p>\n\n.. figure:: img/Modin_Pandas_Import.gif\n   :align: center\n\nScale your pandas workflow by changing a single line of code\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin uses Ray_, Dask_ or Unidist_ to provide an effortless way to speed up your pandas notebooks,\nscripts, and libraries. Unlike other distributed DataFrame libraries, Modin provides\nseamless integration and compatibility with existing pandas code. Even using the\nDataFrame constructor is identical.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import numpy as np\n\n  frame_data = np.random.randint(0, 100, size=(2**10, 2**8))\n  df = pd.DataFrame(frame_data)\n\nIt is not necessary to know in advance the available hardware resources in order to use Modin.\nAdditionally, it is not necessary to specify how to distribute or place data.\nModin acts as a drop-in replacement for pandas, which means that you can continue using your previous\npandas notebooks, *unchanged*, while experiencing a considerable speedup thanks to Modin, even on a single\nmachine. Once you've changed your import statement, you’re ready to use Modin just like\nyou would pandas.\n\nInstallation and choosing your compute engine\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin can be installed from PyPI:\n\n.. code-block:: bash\n\n   pip install modin\n\n\nIf you don't have Ray_, Dask_ or Unidist_ installed, you will need to install Modin with one\nof the targets:\n\n.. code-block:: bash\n\n   pip install \"modin[ray]\" # Install Modin dependencies and Ray to run on Ray\n   pip install \"modin[dask]\" # Install Modin dependencies and Dask to run on Dask\n   pip install \"modin[mpi]\" # Install Modin dependencies and MPI to run on MPI through unidist\n   pip install \"modin[all]\" # Install all of the above\n\nModin will automatically detect which engine you have installed and use that for\nscheduling computation!\n\nIf you want to choose a specific compute engine to run on, you can set the environment\nvariable ``MODIN_ENGINE`` and Modin will do computation with that engine:\n\n.. code-block:: bash\n\n   export MODIN_ENGINE=ray  # Modin will use Ray\n   export MODIN_ENGINE=dask  # Modin will use Dask\n   export MODIN_ENGINE=unidist # Modin will use Unidist\n\nIf you want to choose the Unidist engine, you should set the additional environment \nvariable ``UNIDIST_BACKEND``, because currently Modin only supports MPI through unidist:\n\n.. code-block:: bash\n\n   export UNIDIST_BACKEND=mpi # Unidist will use MPI backend\n\nThis can also be done within a notebook/interpreter before you import Modin:\n\n.. code-block:: python\n\n   import os\n\n   os.environ[\"MODIN_ENGINE\"] = \"ray\"  # Modin will use Ray\n   os.environ[\"MODIN_ENGINE\"] = \"dask\"  # Modin will use Dask\n\n   os.environ[\"MODIN_ENGINE\"] = \"unidist\" # Modin will use Unidist\n   os.environ[\"UNIDIST_BACKEND\"] = \"mpi\" # Unidist will use MPI backend\n\n   import modin.pandas as pd\n\nFaster pandas, even on your laptop\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\n.. image:: img/read_csv_benchmark.png\n   :height: 350px\n   :width: 300px\n   :alt: Plot of read_csv\n   :align: right\n\nThe ``modin.pandas`` `DataFrame`_ is an extremely light-weight parallel DataFrame. Modin\ntransparently distributes the data and computation so that all you need to do is\ncontinue using the pandas API as you were before installing Modin. Unlike other parallel\nDataFrame systems, Modin is an extremely light-weight, robust DataFrame. Because it is so\nlight-weight, Modin provides speed-ups of up to 4x on a laptop with 4 physical cores.\n\nIn pandas, you are only able to use one core at a time when you are doing computation of\nany kind. With Modin, you are able to use all of the CPU cores on your machine. Even in\n``read_csv``, we see large gains by efficiently distributing the work across your entire\nmachine.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n\n  df = pd.read_csv(\"my_dataset.csv\")\n\nModin is a DataFrame for datasets from 1MB to 1TB+\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nWe have focused heavily on bridging the solutions between DataFrames for small data\n(e.g. pandas) and large data. Often data scientists require different tools for doing\nthe same thing on different sizes of data. The DataFrame solutions that exist for 1MB do\nnot scale to 1TB+, and the overheads of the solutions for 1TB+ are too costly for\ndatasets in the 1KB range. With Modin, because of its light-weight, robust, and scalable\nnature, you get a fast DataFrame at 1MB and 1TB+.\n\n**Modin is currently under active development. Requests and contributions are welcome!**\n\nIf you are interested in learning more about Modin, please check out the :doc:`Getting Started</getting_started/quickstart>`\nguide then refer to the :doc:`Developer Documentation</development/index>` section,\nwhere you can find system architecture, internal implementation details, and other useful information.\nAlso check out the `Github`_ to view open issues and make contributions.\n\n.. _Dataframe: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html\n.. _Ray: https://github.com/ray-project/ray/\n.. _Dask: https://dask.org/\n.. _Unidist: https://github.com/modin-project/unidist/\n.. _Github: https://github.com/modin-project/modin\n"
  },
  {
    "path": "docs/release-procedure.md",
    "content": "## Versioning\n\n### Patch release\n\nModin uses semantic versioning. So when doing a patch release, please make a separate branch\noff the previous release tag, and `git cherry-pick` **only** the commits we would like to have in our\npatch release (assuming previous release was versioned `X.Y.Z`):\n\n        git checkout -b release-X.Y.Z+1 X.Y.Z\n\n### Major and Minor releases\n\nA major (`xx.0.0`) or minor (`0.xx.0`) release could be done by branching from `main`:\n\n        git checkout -b release-X.Y.0 main\n\n## Preparing the release\n\nBefore continuing with the release process, make sure that automated CI which runs on each commit passed successfully with the commit you deem as a \"release candidate\".\n\nModin follows the \"no push\" logic, which is _only_ circumvented for cherry-picked commits,\nas reviewing them again would not add a lot of value but would add lots of excess work.\n\nHence non-cherry-pick commits should happen in a separate branch in your own fork, and\nbe delivered to the release branch by using a PR.\n\nNote that Modin uses fully signed commits, so you have to have GPG keys set up. See [onboarding instructions](https://github.com/modin-project/modin/blob/main/contributing/contributing.md) on where to get started.\n\nTo update Modin version, follow the instructions below.\n\n### Preparing the repo for a Major or Minor Version\n\n**Note**: this should be done in your fork of Modin.\n\nFirst, update your fork of Modin's main with the main repo's main. From your main, create a new\nbranch called `release-X.Y.0` off of main. Create an empty commit in your new branch with the message\n`Release version X.Y.0`. Make sure to sign this commit with both your GPG key\nand with the conventional `git commit -s` (so `git commit -s -S`). Open a PR against modin-project/modin with just this commit.\n\n### Preparing the repo for a Patch Version\n\n**Note**: this should be done in the original Modin repository (in `upstream`) .\n\nFirst, you must create a new branch in the upstream (main modin-project/modin) repo for the new release.\nThis branch must be named `release-X.Y.Z`, and should be made off of the tag for the last release. To\ndo this, use `git checkout -b release-X.Y.Z+1 X.Y.Z` to create the branch for the new release. Once\nthis branch has been created, cherry-pick the commits that will go into this release, and push this\nbranch to `upstream`.\n\n**Note**: now you must switch to your fork of Modin.\n\nFrom your fork of Modin, fetch the upstream repo, and checkout the release branch you made above.\nFrom this release branch, create a new branch.\n\nFrom your new branch, edit the `README.md` so that the PyPi badge will\npoint to the badge for this specific version (instead of latest) and so that the docs link will point\nto the docs for this specific version (rather than latest).\n\nOnce the badges have been edited, create a commit, the same as for a major or minor version,\nwith the message `Release version X.Y.Z`, and make sure to sign it with both your GPG key, and the\ntraditional git sign-off. Create a PR using your branch against the `release-X.Y.Z` branch in the\noriginal Modin repo.\n\n### Tag commit\n\nAfter the PR has been merged, clone a clean copy of the Modin repo from the modin-project organization.\nYou now need to tag the commit that corresponds to the above PR with the appropriate tag for this release.\n\n**Note**: from now on you work on the `main` branch (in `upstream`) for a major or minor release,\nor the `release-X.Y.Z` branch (in `upstream`) for a patch release.\n\n        git tag -as X.Y.Z\n\n  * Use `scripts/release.py` to draft the release notes (might be as simple as `python scripts/release.py notes > draft.txt`)\n    * If you're experiencing [rate limiting by GitHub](https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting) during username resolving, pass a token via `--token` option to the script\n    * Fill in the placeholder for summary of the release\n    * Please look into PR sections and split them if necessary into smaller but better fitting ones, as the script only categorizes by prefix (`FIX-`, `TEST-`, etc.)\n    * Make sure to correctly resolve contributors whom script failed to transform to GitHub usernames if there are any!\n  * Include release documentation in the annotation and make sure it is signed.\n  * Push the tag to `main` or `release-X.Y.Z` branch: `git push upstream X.Y.Z`\n    * If you're re-pushing a tag (beware! you shouldn't be doing that, no, _really_!), you can remove remote tag and push a local one by `git push upstream :refs/tags/X.Y.Z`\n\n\n### Build wheel:\n\n**Note**: This should be done from your clean clone of the `upstream` Modin\nrepository from the modin-project organization, where you made the release tag.\n\n```bash\n# Install/update tools\npip install --upgrade build twine\n# Build a pure Python wheel.\npython3 setup.py sdist bdist_wheel\n```\n\nYou may see the wheel in the `dist` folder: `ls -l dist`. Make sure the version is correct.\nAlso make sure there is a `tar` file that contains the source.\n\n### Upload wheels:\n\nMake sure you have an active PyPI account which has write access to Modin pypi repo, and make sure you have a pypi token set up.\n\nUse `twine` to upload wheels:\n\n```bash\ntwine upload dist/*\n```\n\nWhen asked for account, provide `__token__` (exactly as stated), when asked for password, present your token from pypi.\n\n### Check with `pip install`:\n\nRun `pip install -U \"modin[all]\"` on Linux, Mac, and Windows systems in a new environment\nto test that the wheels were uploaded correctly.\n\n## Make Github and conda-forge release\n\n### Github\n\nOnce the tag has been published, we need to make the release on GitHub. Go to the\n[Release page](https://github.com/modin-project/modin/releases), and click on `Draft a new release`.\nChoose the tag you made above from the dropdown menu, and copy paste the name of the release \nin the `Release title` box. Next, copy paste the release notes from above into the box labelled\n`Describe this release`. This will ensure that the release notes on GitHub are Markdown formatted.\n\nDouble check that everything looks good by clicking `Preview`, and then hit the green `Publish release`\nbutton!\n\n### Conda-forge\n\nConda-forge has a bot which watches for new releases of software packaged through it,\nand in case of Modin it waits either for Github releases or for tags and then makes\na new automatic PR with version increment.\n\nYou should watch for that PR and, fixing any issues if there are some, merge it\nto make new Modin release appear in `conda-forge` channel. For detailed instructions\non how to ensure the PR passes CI and is merge-able, check out [the how-to page in the modin-feedstock repo](https://github.com/conda-forge/modin-feedstock/blob/main/HOWTO.md)!\n\n## Publicize Release\nOnce the release has been finalized, make sure to post an announcement\nin the #general channel of the public Modin Slack!\n"
  },
  {
    "path": "docs/release_notes/release_notes-0.14.0.rst",
    "content": ":orphan:\n\nModin 0.14.0\n\nKey Features and Updates\n------------------------\n\n* Stability and Bugfixes\n  * FIX-#4058: Allow pickling empty dataframes and series (#4095)\n  * FIX-#4136: Fix exercise_3.ipynb example notebook (#4137)\n  * FIX-#4105: Fix names of pandas options to avoid `OptionError` (#4109)\n  * FIX-#3417: Fix read_csv with skiprows and header parameters (#3419)\n  * FIX-#4142: Fix OmniSci enabling (#4146)\n  * FIX-#4162: Use `skipif` instead of `skip` for compatibility with pytest 7.0 (#4163)\n  * FIX-#4158: Do not print OmniSci logs to stdout by default (#4159)\n  * FIX-#4177: Support read_feather from pathlike objects (#4177)\n  * FIX-#4234: Upgrade pandas to 1.4.1 (#4235)\n  * FIX-#3368: support unsigned integers in OmniSci backend (#4256)\n  * FIX-#4057: Allow reading an empty parquet file (#4075)\n  * FIX-#3884: Fix read_excel() dropping empty rows (#4161)\n  * FIX-#4257: Fix Categorical() for scalar categories (#4258)\n  * FIX-#4300: Fix Modin Categorical column dtype categories (#4276)\n  * FIX-#4208: Fix lazy metadata update for `PandasDataFrame.from_labels` (#4209)\n  * FIX-#3981, FIX-#3801, FIX-#4149: Stop broadcasting scalars to set items (#4160)\n  * FIX-#4185: Fix rolling across column partitions (#4262)\n  * FIX-#4303: Fix the syntax error in reading from postgres (#4304)\n  * FIX-#4308: Add proper error handling in df.set_index (#4309)\n  * FIX-#4056: Allow an empty parse_date list in `read_csv_glob` (#4074)\n  * FIX-#4312: Fix constructing categorical frame with duplicate column names (#4313).\n  * FIX-#4314: Allow passing a series of dtypes to astype (#4318)\n  * FIX-#4310: Handle lists of lists of ints in read_csv_glob (#4319)\n* Performance enhancements\n  * FIX-#4138, FIX-#4009: remove redundant sorting in the internal '.mask()' flow (#4140)\n  * FIX-#4183: Stop shallow copies from creating global shared state. (#4184)\n* Benchmarking enhancements\n  * FIX-#4221: add `wait` method for `PandasOnRayDataframeColumnPartition` class (#4231)\n* Refactor Codebase\n  * REFACTOR-#3990: remove code duplication in `PandasDataframePartition` hierarchy (#3991)\n  * REFACTOR-#4229: remove unused `dask_client` global variable in `modin\\pandas\\__init__.py` (#4230)\n  * REFACTOR-#3997: remove code duplication for `broadcast_apply` method (#3996)\n  * REFACTOR-#3994: remove code duplication for `get_indices` function (#3995)\n  * REFACTOR-#4331: remove code duplication for `to_pandas`, `to_numpy` functions in `QueryCompiler` hierarchy (#4332)\n  * REFACTOR-#4213: Refactor `modin/examples/tutorial/` directory (#4214)\n  * REFACTOR-#4206: add assert check into `__init__` method of `PandasOnDaskDataframePartition` class (#4207)\n  * REFACTOR-#3900: add flake8-no-implicit-concat plugin and refactor flake8 error codes (#3901)\n  * REFACTOR-#4093: Refactor base to be smaller (#4220)\n  * REFACTOR-#4047: Rename `cluster` directory to `cloud` in examples (#4212)\n  * REFACTOR-#3853: interacting with Dask interface through `DaskWrapper` class (#3854)\n  * REFACTOR-#4322: Move is_reduce_fn outside of groupby_agg (#4323)\n* Pandas API implementations and improvements\n  * FEAT-#3603: add experimental `read_custom_text` function that can read custom line-by-line text files (#3441)\n  * FEAT-#979: Enable reading from SQL server (#4279)\n* OmniSci enhancements\n  *\n* XGBoost enhancements\n  *\n* Developer API enhancements\n  * FEAT-#4245: Define base interface for dataframe exchange protocol (#4246)\n  * FEAT-#4244: Implement dataframe exchange protocol for HdkOnNative execution (#4269)\n  * FEAT-#4144: Implement dataframe exchange protocol for pandas storage format (#4150)\n  * FEAT-#4342: Support `from_dataframe`` for pandas storage format (#4343)\n* Update testing suite\n  * TEST-#3628: Report coverage data for `test-internals` CI job (#4198)\n  * TEST-#3938: Test tutorial notebooks in CI (#4145)\n  * TEST-#4153: Fix condition of running lint-commit and set of CI triggers (#4156)\n  * TEST-#4201: Add read_parquet, explode, tail, and various arithmetic functions to asv_bench (#4203)\n* Documentation improvements\n  * DOCS-#4077: Add release notes template to docs folder (#4078)\n  * DOCS-#4082: Add pdf/epub/htmlzip formats for doc builds (#4083)\n  * DOCS-#4168: Fix rendering the examples on troubleshooting page (#4169)\n  * DOCS-#4151: Add info in troubleshooting page related to Dask engine usage (#4152)\n  * DOCS-#4172: Refresh Intel Distribution of Modin paragraph (#4175)\n  * DOCS-#4173: Mention strict channel priority in conda install section (#4178)\n  * DOCS-#4176: Update OmniSci usage section (#4192)\n  * DOCS-#4027: Add GIF images and chart to Modin README demonstrating speedups (#4232)\n  * DOCS-#3954: Add Dask example notebooks (#4139)\n  * DOCS-#4272: Add bar chart comparisons to quick start guide (#4277)\n  * DOCS-#3953: Add docs and notebook examples on running Modin with OmniSci (#4001)\n  * DOCS-#4280: Change links in jupyter notebooks (#4281)\n  * DOCS-#4290: Add changes for OmniSci notebooks (#4291)\n  * DOCS-#4241: Update warnings and docs regarding defaulting to pandas (#4242)\n  * DOCS-#3099: Fix `BasePandasDataSet` docstrings warnings (#4333)\n  * DOCS-#4339: Reformat I/O functions docstrings (#4341)\n  * DOCS-#4336: Reformat general utilities docstrings (#4338)\n* Dependencies\n  * FIX-#4113, FIX-#4116, FIX-#4115: Apply new `black` formatting, fix pydocstyle check and readthedocs build (#4114)\n  * TEST-#3227: Use codecov github action instead of bash form in GA workflows (#3226)\n  * FIX-#4115: Unpin `pip` in readthedocs deps list (#4170)\n  * TEST-#4217: Pin `Dask<2022.2.0` as a temporary fix of CI (#4218)\n\nContributors\n------------\n\n@prutskov\n@amyskov\n@paulovn\n@anmyachev\n@YarShev\n@RehanSD\n@devin-petersohn\n@dchigarev\n@Garra1980\n@mvashishtha\n@naren-ponder\n@jeffreykennethli\n@dorisjlee\n@Rubtsowa\n"
  },
  {
    "path": "docs/release_notes/release_notes-0.15.0.rst",
    "content": ":orphan:\n\nModin 0.15.0\n\nKey Features and Updates\n------------------------\n\n* Stability and Bugfixes\n  * FIX-#4376: Upgrade pandas to 1.4.2 (#4377)\n  * FIX-#3615: Relax some deps in development env (#4365)\n  * FIX-#4370: Fix broken docstring links (#4375)\n  * FIX-#4392: Align Modin XGBoost with xgb>=1.6 (#4393)\n  * FIX-#4385: Get rid of `use-deprecated` option in `pip` (#4386)\n  * FIX-#3527: Fix parquet partitioning issue causing negative row length partitions (#4368)\n  * FIX-#4330: Override the memory limit to start ray 1.11.0 on Macs (#4335)\n  * FIX-#4407: Align `insert` function with pandas in case of numpy array with several columns (#4408)\n  * FIX-#4373: Fix invalid file path when trying `read_csv_glob` with `usecols` parameter (#4405)\n  * FIX-#4394: Fix issue with multiindex metadata desync (#4395)\n  * FIX-#4438: Fix `reindex` function that doesn't preserve initial index metadata (#4442)\n  * FIX-#4425: Add parameters to groupby pct_change (#4429)\n  * FIX-#4457: Fix `loc` in case when need reindex item (#4457)\n  * FIX-#4414: Add missing f prefix on f-strings found at https://codereview.doctor (#4415)\n  * FIX-#4461: Fix S3 CSV data path (#4462)\n  * FIX-#4467: `drop_duplicates` no longer removes items based on index values (#4468)\n  * FIX-#4449: Drain the call queue before waiting on result in benchmark mode (#4472)\n  * FIX-#4518: Fix Modin Logging to report specific Modin warnings/errors (#4519)\n  * FIX-#4481: Allow clipping with a Modin Series of bounds (#4486)  \n  * FIX-#4504: Support na_action in applymap (#4505)\n  * FIX-#4503: Stop the memory logging thread after session exit (#4515)\n  * FIX-#4531: Fix a makedirs race condition in to_parquet (#4533)\n  * FIX-#4464: Refactor Ray utils and quick fix groupby.count failing on virtual partitions (#4490)\n  * FIX-#4436: Fix to_pydatetime dtype for timezone None (#4437)\n  * FIX-#4541: Fix merge_asof with non-unique right index (#4542)\n* Performance enhancements\n  * FEAT-#4320: Add connectorx as an alternative engine for read_sql (#4346)\n  * PERF-#4493: Use partition size caches more in Modin dataframe (#4495)\n* Benchmarking enhancements\n  * FEAT-#4371: Add logging to Modin (#4372)\n  * FEAT-#4501: Add RSS Memory Profiling to Modin Logging (#4502)\n  * FEAT-#4524: Split Modin API and Memory log files (#4526)\n* Refactor Codebase\n  * REFACTOR-#4284: use variable length unpacking when getting results from `deploy` function (#4285)\n  * REFACTOR-#3642: Move PyArrow storage format usage from main feature to experimental ones (#4374)\n  * REFACTOR-#4003: Delete the deprecated cloud mortgage example (#4406)\n  * REFACTOR-#4513: Fix spelling mistakes in docs and docstrings (#4514)\n  * REFACTOR-#4510: Align experimental and regular IO modules initializations (#4511)\n* Pandas API implementations and improvements\n  *\n* OmniSci enhancements\n  *\n* XGBoost enhancements\n  *\n* Developer API enhancements\n  * FEAT-#4359: Add __dataframe__ method to the protocol dataframe (#4360)\n* Update testing suite\n  * TEST-#4363: Use Ray from pypi in CI (#4364)\n  * FIX-#4422: get rid of case sensitivity for `warns_that_defaulting_to_pandas` (#4423)\n  * TEST-#4426: Stop passing is_default kwarg to Modin and pandas (#4428)\n  * FIX-#4439: Fix flake8 CI fail (#4440)\n  * FIX-#4409: Fix `eval_insert` utility that doesn't actually check results of `insert` function (#4410)\n  * TEST-#4482: Fix getitem and loc with series of bools (#4483).\n* Documentation improvements\n  * DOCS-#4296: Fix docs warnings (#4297)\n  * DOCS-#4388: Turn off fail_on_warning option for docs build (#4389)\n  * DOCS-#4469: Say that commit messages can start with PERF (#4470).\n  * DOCS-#4466: Recommend GitHub issues over bug_reports@modin.org (#4474).  \n  * DOCS-#4487: Recommend GitHub issues over feature_requests@modin.org (#4489).\n  * DOCS-#4545: Add socials to README (#4555).\n* Dependencies\n  * FIX-#4327: Update min pin for xgboost version (#4328)\n  * FIX-#4383: Remove `pathlib` from deps (#4384)\n  * FIX-#4390: Add `redis` to Modin dependencies (#4396)\n  * FIX-#3689: Add black and flake8 into development environment files (#4480)\n  * TEST-#4516: Add numpydoc to developer requirements (#4517)\n* New Features\n  * FEAT-#4412: Add Batch Pipeline API to Modin (#4452)\n\nContributors\n------------\n@YarShev\n@Garra1980\n@prutskov\n@alexander3774\n@amyskov\n@wangxiaoying\n@jeffreykennethli\n@mvashishtha\n@anmyachev\n@dchigarev\n@devin-petersohn\n@jrsacher\n@orcahmlee\n@naren-ponder\n@RehanSD\n"
  },
  {
    "path": "docs/release_notes/release_notes-0.16.0.rst",
    "content": ":orphan:\n\nModin 0.16.0\n\nKey Features and Updates\n------------------------\n\n* Stability and Bugfixes\n  * FIX-#4570: Replace ``np.bool`` -> ``np.bool_`` (#4571)\n  * FIX-#4543: Fix `read_csv` in case skiprows=<0, []> (#4544)\n  * FIX-#4059: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)\n  * FIX-#4589: Pin protobuf<4.0.0 to fix ray (#4590)\n  * FIX-#4577: Set attribute of Modin dataframe to updated value (#4588)\n  * FIX-#4411: Fix binary_op between datetime64 Series and pandas timedelta (#4592)\n  * FIX-#4604: Fix `groupby` + `agg` in case when multicolumn can arise (#4642)\n  * FIX-#4582: Inherit custom log layer (#4583)\n  * FIX-#4639: Fix `storage_options` usage for `read_csv` and `read_csv_glob` (#4644)\n  * FIX-#4593: Ensure Modin warns when setting columns via attributes (#4621)\n  * FIX-#4584: Enable pdb debug when running cloud tests (#4585)\n  * FIX-#4564: Workaround import issues in Ray: auto-import pandas on python start if env var is set (#4603)\n  * FIX-#4641: Reindex pandas partitions in `df.describe()` (#4651)\n  * FIX-#2064: Fix `iloc`/`loc` assignment when dataframe is empty (#4677)\n  * FIX-#4634: Check for FrozenList as `by` in `df.groupby()` (#4667)\n  * FIX-#4680: Fix `read_csv` that started defaulting to pandas again in case of reading from a buffer and when a buffer has a non-zero starting position (#4681)\n  * FIX-#4491: Wait for all partitions in parallel in benchmark mode (#4656)\n  * FIX-#4358: MultiIndex `loc` shouldn't drop levels for full-key lookups (#4608)\n  * FIX-#4658: Expand exception handling for `read_*` functions from s3 storages (#4659)\n  * FIX-#4672: Fix incorrect warning when setting `frame.index` or `frame.columns` (#4721)\n  * FIX-#4686: Propagate metadata and drain call queue in unwrap_partitions (#4697)\n  * FIX-#4652: Support categorical data in `from_dataframe` (#4737)\n  * FIX-#4756: Correctly propagate `storage_options` in `read_parquet` (#4764)\n  * FIX-#4657: Use `fsspec` for handling s3/http-like paths instead of `s3fs` (#4710)\n  * FIX-#4676: drain sub-virtual-partition call queues (#4695)\n  * FIX-#4782: Exclude certain non-parquet files in `read_parquet` (#4783)\n  * FIX-#4808: Set dtypes correctly after column rename (#4809)\n  * FIX-#4811: Apply dataframe -> not_dataframe functions to virtual partitions (#4812)\n  * FIX-#4099: Use mangled column names but keep the original when building frames from arrow (#4767)\n  * FIX-#4838: Bump up modin-spreadsheet to latest master (#4839)\n  * FIX-#4840: Change modin-spreadsheet version for notebook requirements (#4841)\n  * FIX-#4835: Handle Pathlike paths in `read_parquet` (#4837)\n  * FIX-#4872: Stop checking the private ray mac memory limit (#4873)\n  * FIX-#4914: `base_lengths` should be computed from `base_frame` instead of `self` in `copartition` (#4915)\n  * FIX-#4848: Fix rebalancing partitions when NPartitions == 1 (#4874)\n  * FIX-#4927: Fix `dtypes` computation in `dataframe.filter` (#4928)\n  * FIX-#4907: Implement `radd` for Series and DataFrame (#4908)\n  * FIZ-#4945: Fix `_take_2d_positional` that loses indexes due to filtering empty dataframes (#4951)\n  * FIX-#4818, PERF-#4825: Fix where by using the new n-ary operator (#4820)\n  * FIX-#3983: FIX-#4107: Materialize 'rowid' columns when selecting rows by position (#4834)\n  * FIX-#4845: Fix KeyError from `__getitem_bool` for single row dataframes (#4845)\n  * FIX-#4734: Handle Series.apply when return type is a DataFrame (#4830)\n  * FIX-#4983: Set `frac` to `None` in _sample when `n=0` (#4984)\n  * FIX-#4993: Return `_default_to_pandas` in `df.attrs` (#4995)\n  * FIX-#5043: Fix `execute` function in ASV utils failed if `len(partitions) == 0` (#5044)\n  * FIX-#4597: Refactor Partition handling of func, args, kwargs (#4715)\n  * FIX-#4996: Evaluate BenchmarkMode at each function call (#4997)\n  * FIX-#4022: Fixed empty data frame with index (#4910)\n  * FIX-#4090: Fixed check if the index is trivial (#4936)\n  * FIX-#4966: Fix `to_timedelta` to return Series instead of TimedeltaIndex (#5028)\n  * FIX-#5042: Fix series __getitem__ with invalid strings (#5048)\n  * FIX-#4691: Fix binary operations between virtual partitions (#5049)  \n  * FIX-#5045: Fix ray virtual_partition.wait with duplicate object refs (#5058)\n* Performance enhancements\n  * PERF-#4182: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)\n  * PERF-#4288: Improve perf of `groupby.mean` for narrow data (#4591)\n  * PERF-#4772: Remove `df.copy` call from `from_pandas` since it is not needed for Ray and Dask (#4781)\n  * PERF-#4325: Improve perf of multi-column assignment in `__setitem__` when no new column names are assigning (#4455)\n  * PERF-#3844: Improve perf of `drop` operation (#4694)\n  * PERF-#4727: Improve perf of `concat` operation (#4728)\n  * PERF-#4705: Improve perf of arithmetic operations between `Series` objects with shared `.index` (#4689)\n  * PERF-#4703: Improve performance in accessing `ser.cat.categories`, `ser.cat.ordered`, and `ser.__array_priority__` (#4704)\n  * PERF-#4305: Parallelize `read_parquet` over row groups (#4700)\n  * PERF-#4773: Compute `lengths` and `widths` in `put` method of Dask partition like Ray do (#4780)\n  * PERF-#4732: Avoid overwriting already-evaluated `PandasOnRayDataframePartition._length_cache` and `PandasOnRayDataframePartition._width_cache` (#4754)\n  * PERF-#4862: Don't call `compute_sliced_len.remote` when `row_labels/col_labels == slice(None)` (#4863)\n  * PERF-#4713: Stop overriding the ray MacOS object store size limit (#4792)\n  * PERF-#4851: Compute `dtypes` for binary operations that can only return bool type and the right operand is not a Modin object (#4852)\n  * PERF-#4842: `copy` should not trigger any previous computations (#4843)\n  * PERF-#4849: Compute `dtypes` in `concat` also for ROW_WISE case when possible (#4850)\n  * PERF-#4929: Compute `dtype` when using `Series.dt` accessor (#4930)\n  * PERF-#4892: Compute `lengths` in `rebalance_partitions` when possible (#4893)\n  * PERF-#4794: Compute caches in `_propagate_index_objs` (#4888)\n  * PERF-#4860: `PandasDataframeAxisPartition.deploy_axis_func` should be serialized only once (#4861)\n  * PERF-#4890: `PandasDataframeAxisPartition.drain` should be serialized only once (#4891)\n  * PERF-#4870: Avoid index materialization in `__getattribute__` and `__getitem__` (4911)\n  * PERF-#4886: Use lazy index and columns evaluation in `query` method (#4887)\n  * PERF-#4866: `iloc` function that used in `partition.mask` should be serialized only once (#4901)\n  * PERF-#4920: Avoid index and cache computations in `take_2d_labels_or_positional` unless they are needed (#4921)\n  * PERF-#4999: don't call `apply` in virtual partition' `drain_call_queue` if `call_queue` is empty (#4975)\n  * PERF-#4268: Implement partition-parallel __getitem__ for bool Series masks (#4753)\n  * PERF-#5017: `reset_index` shouldn't trigger index materialization if possible (#5018)\n  * PERF-#4963: Use partition `width/length` methods instead of `_compute_axis_labels_and_lengths` if index is already known (#4964)\n  * PERF-#4940: Optimize categorical dtype check in `concatenate` (#4953)\n* Benchmarking enhancements\n  * TEST-#5066: Add outer join case for `TimeConcat` benchmark (#5067)\n  * TEST-#5083: Add `merge` op with categorical data (#5084)\n  * FEAT-#4706: Add Modin ClassLogger to PandasDataframePartitionManager (#4707)\n  * TEST-#5014: Simplify adding new ASV benchmarks (#5015)\n  * TEST-#5064: Update `TimeConcat` benchmark with new parameter `ignore_index` (#5065)\n  * PERF-#4944: Avoid default_to_pandas in ``Series.cat.codes``, ``Series.dt.tz``, and ``Series.dt.to_pytimedelta`` (#4833)\n  * TEST-#5068: Add binary op benchmark for Series (#5069)\n* Refactor Codebase\n  * REFACTOR-#4530: Standardize access to physical data in partitions (#4563)\n  * REFACTOR-#4534: Replace logging meta class with class decorator (#4535)\n  * REFACTOR-#4708: Delete combine dtypes (#4709)\n  * REFACTOR-#4629: Add type annotations to modin/config (#4685)\n  * REFACTOR-#4717: Improve PartitionMgr.get_indices() usage (#4718)\n  * REFACTOR-#4730: make Indexer immutable (#4731)\n  * REFACTOR-#4774: remove `_build_treereduce_func` call from `_compute_dtypes` (#4775)\n  * REFACTOR-#4750: Delete BaseDataframeAxisPartition.shuffle (#4751)\n  * REFACTOR-#4722: Stop suppressing undefined name lint (#4723)\n  * REFACTOR-#4832: unify `split_result_of_axis_func_pandas` (#4831)\n  * REFACTOR-#4796: Introduce constant for __reduced__ column name (#4799)\n  * REFACTOR-#4000: Remove code duplication for `PandasOnRayDataframePartitionManager` (#4895)\n  * REFACTOR-#3780: Remove code duplication for `PandasOnDaskDataframe` (#3781)\n  * REFACTOR-#4530: Unify access to physical data for any partition type (#4829)\n  * REFACTOR-#4978: Align `modin/core/execution/dask/common/__init__.py` with `modin/core/execution/ray/common/__init__.py` (#4979)\n  * REFACTOR-#4949: Remove code duplication in `default2pandas/dataframe.py` and `default2pandas/any.py` (#4950)\n  * REFACTOR-#4976: Rename `RayTask` to `RayWrapper` in accordance with Dask (#4977)\n  * REFACTOR-#4885: De-duplicated take_2d_labels_or_positional methods (#4883)\n  * REFACTOR-#5005: Use `finalize` method instead of list comprehension + `drain_call_queue` (#5006)\n  * REFACTOR-#5001: Remove `jenkins` stuff (#5002)\n  * REFACTOR-#5026: Change exception names to simplify grepping (#5027)\n  * REFACTOR-#4970: Rewrite base implementations of a partition' `width/length` (#4971)  \n  * REFACTOR-#4942: Remove `call` method in favor of `register` due to duplication (4943)\n  * REFACTOR-#4922: Helpers for take_2d_labels_or_positional (#4865)\n  * REFACTOR-#5024: Make `_row_lengths` and `_column_widths` public (#5025)\n  * REFACTOR-#5009: Use `RayWrapper.materialize` instead of `ray.get` (#5010)\n  * REFACTOR-#4755: Rewrite Pandas version mismatch warning (#4965)\n  * REFACTOR-#5012: Add mypy checks for singleton files in base modin directory (#5013)\n  * REFACTOR-#5038: Remove unnecessary _method argument from resamplers (#5039)\n  * REFACTOR-#5081: Remove `c323f7fe385011ed849300155de07645.db` file (#5082)\n* Pandas API implementations and improvements\n  * FEAT-#4670: Implement convert_dtypes by mapping across partitions (#4671)\n* OmniSci enhancements\n  * FEAT-#4913: Enabling pyhdk\n* XGBoost enhancements\n  *\n* Developer API enhancements\n  *\n* Update testing suite\n  * TEST-#4508: Reduce test_partition_api pytest threads to deflake it (#4551)\n  * TEST-#4550: Use much less data in test_partition_api (#4554)\n  * TEST-#4610: Remove explicit installation of `black`/`flake8` for omnisci ci-notebooks (#4609)\n  * TEST-#2564: Add caching and use mamba for conda setups in GH (#4607)\n  * TEST-#4557: Delete multiindex sorts instead of xfailing (#4559)\n  * TEST-#4698: Stop passing invalid storage_options param (#4699)\n  * TEST-#4745: Pin flake8 to <5 to workaround installation conflict (#4752)\n  * TEST-#4875: XFail tests failing due to file gone missing (#4876)\n  * TEST-#4879: Use pandas `ensure_clean()` in place of `io_tests_data` (#4881)\n  * TEST-#4562: Use local Ray cluster in CI to resolve flaky `test-compat-win` (#5007)\n  * TEST-#5040: Rework test_series using eval_general() (#5041)\n  * TEST-#5050: Add black to pre-commit hook (#5051)\n* Documentation improvements\n  * DOCS-#4552: Change default sphinx language to en to fix sphinx >= 5.0.0 build (#4553)\n  * DOCS-#4628: Add to_parquet partial support notes (#4648)\n  * DOCS-#4668: Set light theme for readthedocs page, remove theme switcher (#4669)\n  * DOCS-#4748: Apply the Triage label to new issues (#4749)\n  * DOCS-#4790: Give all templates issue type and triage labels (#4791)\n  * DOCS-#4521: Document how to benchmark modin (#5020)\n* Dependencies\n  * FEAT-#4598: Add support for pandas 1.4.3 (#4599)\n  * FEAT-#4619: Integrate mypy static type checking (#4620)\n  * FEAT-#4202: Allow dask past 2022.2.0 (#4769)\n  * FEAT-#4925: Upgrade pandas to 1.4.4 (#4926)\n  * TEST-#4998: Add flake8 plugins to dev requirements (#5000)\n* New Features\n  * FEAT-4463: Add experimental fuzzydata integration for testing against a randomized dataframe workflow (#4556)\n  * FEAT-#4419: Extend virtual partitioning API to pandas on Dask (#4420)\n  * FEAT-#4147: Add partial compatibility with Python 3.6 and pandas 1.1 (#4301)\n  * FEAT-#4569: Add error message when `read_` function defaults to pandas (#4647)\n  * FEAT-#4725: Make index and columns lazy in Modin DataFrame (#4726)\n  * FEAT-#4664: Finalize compatibility support for Python 3.6 (#4800)\n  * FEAT-#4746: Sync interchange protocol with recent API changes (#4763)\n  * FEAT-#4733: Support fastparquet as engine for `read_parquet` (#4807)\n  * FEAT-#4766: Support fsspec URLs in `read_csv` and `read_csv_glob` (#4898)\n  * FEAT-#4827: Implement `infer_types` dataframe algebra operator (#4871)\n  * FEAT-#4989: Switch pandas version to 1.5 (#5037)\n\nContributors\n------------\n@mvashishtha\n@NickCrews\n@prutskov\n@vnlitvinov\n@pyrito\n@suhailrehman\n@RehanSD\n@helmeleegy\n@anmyachev\n@d33bs\n@noloerino\n@devin-petersohn\n@YarShev\n@naren-ponder\n@jbrockmendel\n@ienkovich\n@Garra1980\n@Billy2551\n"
  },
  {
    "path": "docs/release_notes/release_notes-template.rst",
    "content": ":orphan:\n\nModin X.X.X\n\nKey Features and Updates\n------------------------\n\n* Stability and Bugfixes\n  *\n* Performance enhancements\n  *\n* Benchmarking enhancements\n  *\n* Refactor Codebase\n  *\n* Pandas API implementations and improvements\n  *\n* HDK enhancements\n  *\n* XGBoost enhancements\n  *\n* Developer API enhancements\n  *\n* Update testing suite\n  *\n* Documentation improvements\n  *\n* Dependencies\n  *\n* New Features\n\nContributors\n------------\n\n"
  },
  {
    "path": "docs/requirements-doc.txt",
    "content": "# install current modin checkout to bring all required dependencies\n.[all]\n# now install some more optional dependencies\ncolorama\nclick\nflatbuffers\nfuncsigs\nmock\nopencv-python\npydata_sphinx_theme\npyyaml\nrecommonmark\nsphinx<6.0.0\nsphinx-click\nray>=2.10.0,<3\n# Override to latest version of modin-spreadsheet\ngit+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5\nsphinxcontrib_plantuml\nsphinx-issues\n"
  },
  {
    "path": "docs/supported_apis/dataframe_supported.rst",
    "content": "``pd.DataFrame`` supported APIs\n===================================\n\nThe following table lists both implemented and not implemented methods. If you have need\nof an operation that is listed as not implemented, feel free to open an issue on the\n`GitHub repository`_, or give a thumbs up to already created issues. Contributions are\nalso welcome!\n\nThe following table is structured as follows: The first column contains the method name.\nThe second column contains link to a description of corresponding pandas method.\nThe third column is a flag for whether or not there is an implementation in Modin for\nthe method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands\nfor partial (meaning some parameters may not be supported yet), and ``D`` stands for\ndefault to pandas.\n\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| DataFrame method           | pandas Doc link           | Implemented? (Y/N/P/D) | Notes for Current implementation                   |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``T``                      | `T`_                      | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``abs``                    | `abs`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``add``                    | `add`_                    | Y                      | **Ray** and **Dask**: Shuffles data in operations  |\n|                            |                           |                        | between DataFrames.                                |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``add_prefix``             | `add_prefix`_             | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``add_suffix``             | `add_suffix`_             | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``agg`` / ``aggregate``    | `agg`_ / `aggregate`_     | P                      | - Dictionary ``func`` parameter defaults to pandas |\n|                            |                           |                        | - Numpy operations default to pandas               |\n|                            |                           |                        |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``align``                  | `align`_                  | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``all``                    | `all`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``any``                    | `any`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``apply``                  | `apply`_                  | Y                      | See ``agg``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``applymap``               | `applymap`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``asfreq``                 | `asfreq`_                 | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``asof``                   | `asof`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``assign``                 | `assign`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``astype``                 | `astype`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``at``                     | `at`_                     | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``at_time``                | `at_time`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``axes``                   | `axes`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``between_time``           | `between_time`_           | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``bfill``                  | `bfill`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``bool``                   | `bool`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``boxplot``                | `boxplot`_                | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``clip``                   | `clip`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``combine``                | `combine`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``combine_first``          | `combine_first`_          | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``compare``                | `compare`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``copy``                   | `copy`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``corr``                   | `corr`_                   | P                      | Correlation floating point precision may slightly  |\n|                            |                           |                        | differ from pandas. For now pearson method is      |\n|                            |                           |                        | available only. For other methods and for          |\n|                            |                           |                        | ``numeric_only`` defaults to pandas.               |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``corrwith``               | `corrwith`_               | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``count``                  | `count`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``cov``                    | `cov`_                    | P                      | Covariance floating point precision may slightly   |\n|                            |                           |                        | differ from pandas. For ``numeric_only``           |\n|                            |                           |                        | defaults to pandas.                                |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``cummax``                 | `cummax`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``cummin``                 | `cummin`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``cumprod``                | `cumprod`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``cumsum``                 | `cumsum`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``describe``               | `describe`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``diff``                   | `diff`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``div``                    | `div`_                    | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``divide``                 | `divide`_                 | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``dot``                    | `dot`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``drop``                   | `drop`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``droplevel``              | `droplevel`_              | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``drop_duplicates``        | `drop_duplicates`_        | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``dropna``                 | `dropna`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``dtypes``                 | `dtypes`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``duplicated``             | `duplicated`_             | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``empty``                  | `empty`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``eq``                     | `eq`_                     | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``equals``                 | `equals`_                 | Y                      | Requires shuffle, can be further optimized         |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``eval``                   | `eval`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``ewm``                    | `ewm`_                    | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``expanding``              | `expanding`_              | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``explode``                | `explode`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``ffill``                  | `ffill`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``fillna``                 | `fillna`_                 | P                      | ``value`` parameter of type DataFrame defaults to  |\n|                            |                           |                        | pandas.                                            |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``filter``                 | `filter`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``first``                  | `first`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``first_valid_index``      | `first_valid_index`_      | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``floordiv``               | `floordiv`_               | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``from_dict``              | `from_dict`_              | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``from_records``           | `from_records`_           | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``ge``                     | `ge`_                     | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``get``                    | `get`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``groupby``                | `groupby`_                | Y                      | Not yet optimized for all operations.              |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``gt``                     | `gt`_                     | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``head``                   | `head`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``hist``                   | `hist`_                   | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``iat``                    | `iat`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``idxmax``                 | `idxmax`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``idxmin``                 | `idxmin`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``iloc``                   | `iloc`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``infer_objects``          | `infer_objects`_          | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``info``                   | `info`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``insert``                 | `insert`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``interpolate``            | `interpolate`_            | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``isetitem``               | `isetitem`_               | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``isin``                   | `isin`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``isna``                   | `isna`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``isnull``                 | `isnull`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``items``                  | `items`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``iterrows``               | `iterrows`_               | P                      | Modin does not parallelize iteration in Python     |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``itertuples``             | `itertuples`_             | P                      | Modin does not parallelize iteration in Python     |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``join``                   | `join`_                   | P                      | When ``on`` is set to ``right`` or ``outer`` or    |\n|                            |                           |                        | when ``validate`` is given defaults to pandas      |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``keys``                   | `keys`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``kurt``                   | `kurt`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``kurtosis``               | `kurtosis`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``last``                   | `last`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``last_valid_index``       | `last_valid_index`_       | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``le``                     | `le`_                     | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``loc``                    | `loc`_                    | P                      | We do not support: boolean array, callable.        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``lt``                     | `lt`_                     | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``mask``                   | `mask`_                   | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``max``                    | `max`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``mean``                   | `mean`_                   | P                      | Modin defaults to pandas if given the ``level``    |\n|                            |                           |                        | param.                                             |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``median``                 | `median`_                 | P                      | Modin defaults to pandas if given the ``level``    |\n|                            |                           |                        | param.                                             |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``melt``                   | `melt`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``memory_usage``           | `memory_usage`_           | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n|                            |                           |                        | Implemented the following cases:                   |\n|                            |                           |                        | ``left_index=True`` and ``right_index=True``,      |\n|                            |                           |                        | ``how=left`` and ``how=inner`` for all values      |\n| ``merge``                  | `merge`_                  | P                      | of parameters except ``left_index=True`` and       |\n|                            |                           |                        | ``right_index=False`` or ``left_index=False``      |\n|                            |                           |                        | and ``right_index=True``.                          |\n|                            |                           |                        | Defaults to pandas otherwise.                      |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``min``                    | `min`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``mod``                    | `mod`_                    | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``mode``                   | `mode`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``mul``                    | `mul`_                    | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``multiply``               | `multiply`_               | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``ndim``                   | `ndim`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``ne``                     | `ne`_                     | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``nlargest``               | `nlargest`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``notna``                  | `notna`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``notnull``                | `notnull`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``nsmallest``              | `nsmallest`_              | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``nunique``                | `nunique`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``pct_change``             | `pct_change`_             | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``pipe``                   | `pipe`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``pivot``                  | `pivot`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``pivot_table``            | `pivot_table`_            | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``plot``                   | `plot`_                   | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``pop``                    | `pop`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``pow``                    | `pow`_                    | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``prod``                   | `prod`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``product``                | `product`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``quantile``               | `quantile`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``query``                  | `query`_                  | P                      | Local variables not yet supported                  |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``radd``                   | `radd`_                   | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rank``                   | `rank`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rdiv``                   | `rdiv`_                   | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``reindex``                | `reindex`_                | Y                      | Shuffles data                                      |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``reindex_like``           | `reindex_like`_           | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rename``                 | `rename`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rename_axis``            | `rename_axis`_            | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``reorder_levels``         | `reorder_levels`_         | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``replace``                | `replace`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``resample``               | `resample`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``reset_index``            | `reset_index`_            | P                      | **Ray** and **Dask**: ``D`` when ``names`` or      |\n|                            |                           |                        | ``allow_duplicates`` is non-default                |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rfloordiv``              | `rfloordiv`_              | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rmod``                   | `rmod`_                   | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rmul``                   | `rmul`_                   | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rolling``                | `rolling`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``round``                  | `round`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rpow``                   | `rpow`_                   | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rsub``                   | `rsub`_                   | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``rtruediv``               | `rtruediv`_               | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``sample``                 | `sample`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``select_dtypes``          | `select_dtypes`_          | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``sem``                    | `sem`_                    | P                      | Modin defaults to pandas if given the ``level``    |\n|                            |                           |                        | param.                                             |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``set_axis``               | `set_axis`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``set_index``              | `set_index`_              | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``shape``                  | `shape`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``shift``                  | `shift`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``size``                   | `size`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``skew``                   | `skew`_                   | P                      | Modin defaults to pandas if given the ``level``    |\n|                            |                           |                        | param                                              |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``sort_index``             | `sort_index`_             | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``sort_values``            | `sort_values`_            | Y                      | Shuffles data. Order of indexes that have the      |\n|                            |                           |                        | same sort key is not guaranteed to be the same     |\n|                            |                           |                        | across sorts                                       |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``sparse``                 | `sparse`_                 | N                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``squeeze``                | `squeeze`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``stack``                  | `stack`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``std``                    | `std`_                    | P                      | Modin defaults to pandas if given the ``level``    |\n|                            |                           |                        | param.                                             |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``style``                  | `style`_                  | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``sub``                    | `sub`_                    | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``subtract``               | `subtract`_               | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``sum``                    | `sum`_                    | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``swapaxes``               | `swapaxes`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``swaplevel``              | `swaplevel`_              | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``tail``                   | `tail`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``take``                   | `take`_                   | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_clipboard``           | `to_clipboard`_           | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_csv``                 | `to_csv`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_dict``                | `to_dict`_                | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_excel``               | `to_excel`_               | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_feather``             | `to_feather`_             | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_gbq``                 | `to_gbq`_                 | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_hdf``                 | `to_hdf`_                 | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_html``                | `to_html`_                | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_json``                | `to_json`_                | D                      |                                                    |\n|                            |                           |                        | Experimental implementation:                       |\n|                            |                           |                        | DataFrame.modin.to_json_glob                       |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_xml``                 | `to_xml`_                 | D                      |                                                    |\n|                            |                           |                        | Experimental implementation:                       |\n|                            |                           |                        | DataFrame.modin.to_xml_glob                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_latex``               | `to_latex`_               | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_orc``                 | `to_orc`_                 | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_parquet``             | `to_parquet`_             | P                      | **Ray/Dask/Unidist**: Parallel implementation only |\n|                            |                           |                        | if path parameter is a string. In that case, the   |\n|                            |                           |                        | ``path`` parameter specifies a directory where one |\n|                            |                           |                        | file is written per row partition of the Modin     |\n|                            |                           |                        | dataframe.                                         |\n|                            |                           |                        | Experimental implementation:                       |\n|                            |                           |                        | DataFrame.modin.to_parquet_glob                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_period``              | `to_period`_              | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_pickle``              | `to_pickle`_              | D                      | Experimental implementation:                       |\n|                            |                           |                        | DataFrame.modin.to_pickle_glob                     |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_records``             | `to_records`_             | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_sql``                 | `to_sql`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_stata``               | `to_stata`_               | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_string``              | `to_string`_              | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_timestamp``           | `to_timestamp`_           | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``to_xarray``              | `to_xarray`_              | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``transform``              | `transform`_              | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``transpose``              | `transpose`_              | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``truediv``                | `truediv`_                | Y                      | See ``add``                                        |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``truncate``               | `truncate`_               | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``tz_convert``             | `tz_convert`_             | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``tz_localize``            | `tz_localize`_            | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``unstack``                | `unstack`_                | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``update``                 | `update`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``values``                 | `values`_                 | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``value_counts``           | `value_counts`_           | D                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``var``                    | `var`_                    | P                      | Modin defaults to pandas if given the ``level``    |\n|                            |                           |                        | param.                                             |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n| ``where``                  | `where`_                  | Y                      |                                                    |\n+----------------------------+---------------------------+------------------------+----------------------------------------------------+\n\n.. _`GitHub repository`: https://github.com/modin-project/modin/issues\n.. _`T`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.T.html#pandas.DataFrame.T\n.. _`abs`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.abs.html#pandas.DataFrame.abs\n.. _`add`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.add.html#pandas.DataFrame.add\n.. _`add_prefix`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.add_prefix.html#pandas.DataFrame.add_prefix\n.. _`add_suffix`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.add_suffix.html#pandas.DataFrame.add_suffix\n.. _`agg`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.agg.html#pandas.DataFrame.agg\n.. _`aggregate`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.aggregate.html#pandas.DataFrame.aggregate\n.. _`align`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.align.html#pandas.DataFrame.align\n.. _`all`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.all.html#pandas.DataFrame.all\n.. _`any`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.any.html#pandas.DataFrame.any\n.. _`apply`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html#pandas.DataFrame.apply\n.. _`applymap`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.applymap.html#pandas.DataFrame.applymap\n.. _`asfreq`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq\n.. _`asof`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asof.html#pandas.DataFrame.asof\n.. _`assign`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.assign.html#pandas.DataFrame.assign\n.. _`astype`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.astype.html#pandas.DataFrame.astype\n.. _`at`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.at.html#pandas.DataFrame.at\n.. _`at_time`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.at_time.html#pandas.DataFrame.at_time\n.. _`axes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.axes.html#pandas.DataFrame.axes\n.. _`between_time`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.between_time.html#pandas.DataFrame.between_time\n.. _`bfill`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.bfill.html#pandas.DataFrame.bfill\n.. _`bool`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.bool.html#pandas.DataFrame.bool\n.. _`boxplot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.boxplot.html#pandas.DataFrame.boxplot\n.. _`clip`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.clip.html#pandas.DataFrame.clip\n.. _`combine`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.combine.html#pandas.DataFrame.combine\n.. _`combine_first`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.combine_first.html#pandas.DataFrame.combine_first\n.. _`compare`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.compare.html#pandas.DataFrame.compare\n.. _`compound`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.compound.html#pandas.DataFrame.compound\n.. _`copy`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html#pandas.DataFrame.copy\n.. _`corr`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.corr.html#pandas.DataFrame.corr\n.. _`corrwith`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.corrwith.html#pandas.DataFrame.corrwith\n.. _`count`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.count.html#pandas.DataFrame.count\n.. _`cov`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cov.html#pandas.DataFrame.cov\n.. _`cummax`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cummax.html#pandas.DataFrame.cummax\n.. _`cummin`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cummin.html#pandas.DataFrame.cummin\n.. _`cumprod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cumprod.html#pandas.DataFrame.cumprod\n.. _`cumsum`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cumsum.html#pandas.DataFrame.cumsum\n.. _`describe`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.describe.html#pandas.DataFrame.describe\n.. _`diff`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.diff.html#pandas.DataFrame.diff\n.. _`div`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.div.html#pandas.DataFrame.div\n.. _`divide`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.divide.html#pandas.DataFrame.divide\n.. _`dot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dot.html#pandas.DataFrame.dot\n.. _`drop`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html#pandas.DataFrame.drop\n.. _`droplevel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.droplevel.html\n.. _`drop_duplicates`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop_duplicates.html#pandas.DataFrame.drop_duplicates\n.. _`dropna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html#pandas.DataFrame.dropna\n.. _`dtypes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dtypes.html#pandas.DataFrame.dtypes\n.. _`duplicated`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.duplicated.html#pandas.DataFrame.duplicated\n.. _`empty`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.empty.html#pandas.DataFrame.empty\n.. _`eq`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.eq.html#pandas.DataFrame.eq\n.. _`equals`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.equals.html#pandas.DataFrame.equals\n.. _`eval`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.eval.html#pandas.DataFrame.eval\n.. _`ewm`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html#pandas.DataFrame.ewm\n.. _`expanding`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.expanding.html#pandas.DataFrame.expanding\n.. _`explode`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.explode.html#pandas-dataframe-explode\n.. _`ffill`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ffill.html#pandas.DataFrame.ffill\n.. _`fillna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html#pandas.DataFrame.fillna\n.. _`filter`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.filter.html#pandas.DataFrame.filter\n.. _`first`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.first.html#pandas.DataFrame.first\n.. _`first_valid_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.first_valid_index.html#pandas.DataFrame.first_valid_index\n.. _`floordiv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.floordiv.html#pandas.DataFrame.floordiv\n.. _`from_dict`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.from_dict.html#pandas.DataFrame.from_dict\n.. _`from_records`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.from_records.html#pandas.DataFrame.from_records\n.. _`ge`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ge.html#pandas.DataFrame.ge\n.. _`get`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get.html#pandas.DataFrame.get\n.. _`get_dtype_counts`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_dtype_counts.html#pandas.DataFrame.get_dtype_counts\n.. _`get_ftype_counts`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_ftype_counts.html#pandas.DataFrame.get_ftype_counts\n.. _`get_value`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_value.html#pandas.DataFrame.get_value\n.. _`get_values`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.get_values.html#pandas.DataFrame.get_values\n.. _`groupby`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html#pandas.DataFrame.groupby\n.. _`gt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.gt.html#pandas.DataFrame.gt\n.. _`head`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.head.html#pandas.DataFrame.head\n.. _`hist`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.hist.html#pandas.DataFrame.hist\n.. _`iat`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iat.html#pandas.DataFrame.iat\n.. _`idxmax`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.idxmax.html#pandas.DataFrame.idxmax\n.. _`idxmin`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.idxmin.html#pandas.DataFrame.idxmin\n.. _`iloc`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html#pandas.DataFrame.iloc\n.. _`infer_objects`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.infer_objects.html#pandas.DataFrame.infer_objects\n.. _`info`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.info.html#pandas.DataFrame.info\n.. _`insert`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.insert.html#pandas.DataFrame.insert\n.. _`interpolate`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html#pandas.DataFrame.interpolate\n.. _`is_copy`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.is_copy.html#pandas.DataFrame.is_copy\n.. _`isetitem`: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isetitem.html?#pandas-dataframe-isetitem\n.. _`isin`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isin.html#pandas.DataFrame.isin\n.. _`isna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isna.html#pandas.DataFrame.isna\n.. _`isnull`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isnull.html#pandas.DataFrame.isnull\n.. _`items`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.items.html#pandas.DataFrame.items\n.. _`iterrows`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows\n.. _`itertuples`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.itertuples.html#pandas.DataFrame.itertuples\n.. _`ix`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ix.html#pandas.DataFrame.ix\n.. _`join`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.join.html#pandas.DataFrame.join\n.. _`keys`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.keys.html#pandas.DataFrame.keys\n.. _`kurt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurt.html#pandas.DataFrame.kurt\n.. _`kurtosis`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html#pandas.DataFrame.kurtosis\n.. _`last`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.last.html#pandas.DataFrame.last\n.. _`last_valid_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.last_valid_index.html#pandas.DataFrame.last_valid_index\n.. _`le`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.le.html#pandas.DataFrame.le\n.. _`loc`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html#pandas.DataFrame.loc\n.. _`lt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lt.html#pandas.DataFrame.lt\n.. _`mask`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mask.html#pandas.DataFrame.mask\n.. _`max`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.max.html#pandas.DataFrame.max\n.. _`mean`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mean.html#pandas.DataFrame.mean\n.. _`median`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.median.html#pandas.DataFrame.median\n.. _`melt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.melt.html#pandas.DataFrame.melt\n.. _`memory_usage`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.memory_usage.html#pandas.DataFrame.memory_usage\n.. _`merge`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.merge.html#pandas.DataFrame.merge\n.. _`min`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.min.html#pandas.DataFrame.min\n.. _`mod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mod.html#pandas.DataFrame.mod\n.. _`mode`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mode.html#pandas.DataFrame.mode\n.. _`mul`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mul.html#pandas.DataFrame.mul\n.. _`multiply`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.multiply.html#pandas.DataFrame.multiply\n.. _`ndim`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ndim.html#pandas.DataFrame.ndim\n.. _`ne`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ne.html#pandas.DataFrame.ne\n.. _`nlargest`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.nlargest.html#pandas.DataFrame.nlargest\n.. _`notna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.notna.html#pandas.DataFrame.notna\n.. _`notnull`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.notnull.html#pandas.DataFrame.notnull\n.. _`nsmallest`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.nsmallest.html#pandas.DataFrame.nsmallest\n.. _`nunique`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.nunique.html#pandas.DataFrame.nunique\n.. _`pct_change`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pct_change.html#pandas.DataFrame.pct_change\n.. _`pipe`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pipe.html#pandas.DataFrame.pipe\n.. _`pivot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot.html#pandas.DataFrame.pivot\n.. _`pivot_table`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot_table.html#pandas.DataFrame.pivot_table\n.. _`plot`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html#pandas.DataFrame.plot\n.. _`pop`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pop.html#pandas.DataFrame.pop\n.. _`pow`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pow.html#pandas.DataFrame.pow\n.. _`prod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.prod.html#pandas.DataFrame.prod\n.. _`product`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.product.html#pandas.DataFrame.product\n.. _`quantile`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.quantile.html#pandas.DataFrame.quantile\n.. _`query`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html#pandas.DataFrame.query\n.. _`radd`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.radd.html#pandas.DataFrame.radd\n.. _`rank`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html#pandas.DataFrame.rank\n.. _`rdiv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rdiv.html#pandas.DataFrame.rdiv\n.. _`reindex`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reindex.html#pandas.DataFrame.reindex\n.. _`reindex_like`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reindex_like.html#pandas.DataFrame.reindex_like\n.. _`rename`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rename.html#pandas.DataFrame.rename\n.. _`rename_axis`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rename_axis.html#pandas.DataFrame.rename_axis\n.. _`reorder_levels`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reorder_levels.html#pandas.DataFrame.reorder_levels\n.. _`replace`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.replace.html#pandas.DataFrame.replace\n.. _`resample`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html#pandas.DataFrame.resample\n.. _`reset_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reset_index.html#pandas.DataFrame.reset_index\n.. _`rfloordiv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rfloordiv.html#pandas.DataFrame.rfloordiv\n.. _`rmod`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rmod.html#pandas.DataFrame.rmod\n.. _`rmul`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rmul.html#pandas.DataFrame.rmul\n.. _`rolling`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html#pandas.DataFrame.rolling\n.. _`round`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.round.html#pandas.DataFrame.round\n.. _`rpow`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rpow.html#pandas.DataFrame.rpow\n.. _`rsub`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rsub.html#pandas.DataFrame.rsub\n.. _`rtruediv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rtruediv.html#pandas.DataFrame.rtruediv\n.. _`sample`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html#pandas.DataFrame.sample\n.. _`select_dtypes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.select_dtypes.html#pandas.DataFrame.select_dtypes\n.. _`sem`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sem.html#pandas.DataFrame.sem\n.. _`set_axis`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.set_axis.html#pandas.DataFrame.set_axis\n.. _`set_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.set_index.html#pandas.DataFrame.set_index\n.. _`set_value`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.set_value.html#pandas.DataFrame.set_value\n.. _`shape`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shape.html#pandas.DataFrame.shape\n.. _`shift`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shift.html#pandas.DataFrame.shift\n.. _`size`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.size.html#pandas.DataFrame.size\n.. _`skew`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.skew.html#pandas.DataFrame.skew\n.. _`sort_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_index.html#pandas.DataFrame.sort_index\n.. _`sort_values`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html#pandas.DataFrame.sort_values\n.. _`sparse`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sparse.html#pandas-dataframe-sparse\n.. _`squeeze`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.squeeze.html#pandas.DataFrame.squeeze\n.. _`stack`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.stack.html#pandas.DataFrame.stack\n.. _`std`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.std.html#pandas.DataFrame.std\n.. _`style`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.style.html#pandas.DataFrame.style\n.. _`sub`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sub.html#pandas.DataFrame.sub\n.. _`subtract`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.subtract.html#pandas.DataFrame.subtract\n.. _`sum`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sum.html#pandas.DataFrame.sum\n.. _`swapaxes`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.swapaxes.html#pandas.DataFrame.swapaxes\n.. _`swaplevel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.swaplevel.html#pandas.DataFrame.swaplevel\n.. _`tail`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tail.html#pandas.DataFrame.tail\n.. _`take`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.take.html#pandas.DataFrame.take\n.. _`to_clipboard`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_clipboard.html#pandas.DataFrame.to_clipboard\n.. _`to_csv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html#pandas.DataFrame.to_csv\n.. _`to_dict`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_dict.html#pandas.DataFrame.to_dict\n.. _`to_excel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_excel.html#pandas.DataFrame.to_excel\n.. _`to_feather`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_feather.html#pandas.DataFrame.to_feather\n.. _`to_gbq`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html#pandas.DataFrame.to_gbq\n.. _`to_hdf`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_hdf.html#pandas.DataFrame.to_hdf\n.. _`to_html`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html#pandas.DataFrame.to_html\n.. _`to_json`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html#pandas.DataFrame.to_json\n.. _`to_xml`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_xml.html#pandas.DataFrame.to_xml\n.. _`to_latex`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_latex.html#pandas.DataFrame.to_latex\n.. _`to_orc`: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_orc.html#pandas.DataFrame.to_orc\n.. _`to_parquet`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet\n.. _`to_period`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_period.html#pandas.DataFrame.to_period\n.. _`to_pickle`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_pickle.html#pandas.DataFrame.to_pickle\n.. _`to_records`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_records.html#pandas.DataFrame.to_records\n.. _`to_sql`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html#pandas.DataFrame.to_sql\n.. _`to_stata`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_stata.html#pandas.DataFrame.to_stata\n.. _`to_string`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_string.html#pandas.DataFrame.to_string\n.. _`to_timestamp`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_timestamp.html#pandas.DataFrame.to_timestamp\n.. _`to_xarray`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_xarray.html#pandas.DataFrame.to_xarray\n.. _`transform`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.transform.html#pandas.DataFrame.transform\n.. _`transpose`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.transpose.html#pandas.DataFrame.transpose\n.. _`truediv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.truediv.html#pandas.DataFrame.truediv\n.. _`truncate`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.truncate.html#pandas.DataFrame.truncate\n.. _`tz_convert`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tz_convert.html#pandas.DataFrame.tz_convert\n.. _`tz_localize`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tz_localize.html#pandas.DataFrame.tz_localize\n.. _`unstack`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.unstack.html#pandas.DataFrame.unstack\n.. _`update`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.update.html#pandas.DataFrame.update\n.. _`value_counts`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.value_counts.html#pandas.DataFrame.value_counts\n.. _`values`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.values.html#pandas.DataFrame.values\n.. _`var`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.var.html#pandas.DataFrame.var\n.. _`where`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.where.html#pandas.DataFrame.where\n.. _`xs`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.xs.html#pandas.DataFrame.xs\n"
  },
  {
    "path": "docs/supported_apis/defaulting_to_pandas.rst",
    "content": "Defaulting to pandas\n====================\n\nCurrently Modin does not support distributed execution for all methods from pandas API.\nThe remaining unimplemented methods are being executed in a mode called \"default to pandas\".\nThis allows users to continue using Modin even though their workloads contain functions not yet implemented in Modin.\nHere is a diagram of how we convert to pandas and perform the operation:\n\n.. image:: /img/convert_to_pandas.png\n   :align: center\n\nWe first convert to a pandas DataFrame, then perform the operation. **There is a\nperformance penalty for going from a partitioned Modin DataFrame to pandas because of\nthe communication cost and single-threaded nature of pandas.** Once the pandas operation\nhas completed, we convert the DataFrame back into a partitioned Modin DataFrame. This\nway, operations performed after something defaults to pandas will be optimized with\nModin.\n\nThe exact methods we have implemented are listed in the respective subsections:\n\n* :doc:`DataFrame </supported_apis/dataframe_supported>`\n* :doc:`Series </supported_apis/series_supported>`\n* :doc:`utilities </supported_apis/utilities_supported>`\n* :doc:`I/O </supported_apis/io_supported>`\n\nWe have taken a community-driven approach to implementing new methods. We did a `study\non pandas usage`_ to learn what the most-used APIs are. Modin currently supports **93%**\nof the pandas API based on our study of pandas usage, and we are actively expanding the\nAPI.\n**To request implementation, file an issue at https://github.com/modin-project/modin/issues\nor send an email to feature_requests@modin.org.**\n\n.. _`study on pandas usage`: https://github.com/modin-project/study_kaggle_usage\n"
  },
  {
    "path": "docs/supported_apis/index.rst",
    "content": "Supported APIs\n==============\n\nFor your convenience, we have compiled a list of currently implemented APIs and methods\navailable in Modin. This documentation is updated as new methods and APIs are merged\ninto the main branch, and not necessarily correct as of the most recent release. \n\nTo view the docs for the most recent release, check that you're viewing the \n`stable version`_ of the docs.\n\nIn order to install the latest version of Modin, follow the directions found on the\n:doc:`installation page </getting_started/installation>`.\n\nQuestions on implementation details\n-----------------------------------\n\nIf you have a question about the implementation details or would like more information\nabout an API or method in Modin, please contact the Modin `developer mailing list`_.\n\n.. toctree::\n   :titlesonly:\n   :hidden:\n\n   defaulting_to_pandas\n   dataframe_supported\n   series_supported\n   utilities_supported\n   io_supported\n   older_pandas_compat\n\n.. meta::\n    :description lang=en:\n        Compilation of implemented pandas APIs in Modin.\n\n.. _developer mailing list: https://groups.google.com/forum/#!forum/modin-dev\n.. _stable version: https://modin.readthedocs.io/en/stable/supported_apis/index.html\n"
  },
  {
    "path": "docs/supported_apis/io_supported.rst",
    "content": "``pd.read_<file>`` and I/O APIs\r\n=================================\r\n\r\nA number of IO methods default to pandas. We have parallelized ``read_csv``,\r\n``read_parquet`` and some more (see table), though many of the remaining methods\r\ncan be relatively easily parallelized. Some of the operations default to the\r\npandas implementation, meaning it will read in serially as a single, non-distributed\r\nDataFrame and distribute it. Performance will be affected by this.\r\n\r\nThe following table is structured as follows: The first column contains the method name.\r\nThe second column is a flag for whether or not there is an implementation in Modin for\r\nthe method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands\r\nfor partial (meaning some parameters may not be supported yet), and ``D`` stands for\r\ndefault to pandas.\r\n\r\n.. note::\r\n    Support for fully asynchronous reading has been added for the following functions:\r\n    ``read_csv``, ``read_fwf``, ``read_table``, ``read_custom_text``.\r\n    This mode is disabled by default, one can enable it using ``MODIN_ASYNC_READ_MODE=True``\r\n    environment variable. Some parameter combinations are not supported and the function\r\n    will be executed in synchronous mode.\r\n\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| IO method         | Modin Implementation? (Y/N/P/D) | Notes for Current implementation                       |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_csv`_       | Y                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_fwf`_       | Y                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_table`_     | Y                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_parquet`_   | P                               | Parameters besides ``filters`` and ``storage_options`` |\r\n|                   |                                 | passed via ``**kwargs`` are not supported.             |\r\n|                   |                                 | ``use_nullable_dtypes`` == True is not supported.      |\r\n|                   |                                 |                                                        |\r\n|                   |                                 | Experimental implementation: read_parquet_glob         |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_json`_      | P                               | Implemented for ``lines=True``                         |\r\n|                   |                                 | Experimental implementation: read_json_glob            |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_xml`        | D                               | Experimental implementation: read_xml_glob             |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_html`_      | D                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_clipboard`_ | D                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_excel`_     | D                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_hdf`_       | D                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_feather`_   | Y                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_stata`_     | D                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_sas`_       | D                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_pickle`_    | D                               | Experimental implementation:                           |\r\n|                   |                                 | read_pickle_glob                                       |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n| `read_sql`_       | Y                               |                                                        |\r\n+-------------------+---------------------------------+--------------------------------------------------------+\r\n\r\n.. _`read_csv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html#pandas.read_csv\r\n.. _`read_fwf`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_fwf.html#pandas.read_fwf\r\n.. _`read_table`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_table.html#pandas.read_table\r\n.. _`read_parquet`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_parquet.html#pandas.read_parquet\r\n.. _`read_json`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_json.html#pandas.read_json\r\n.. _`read_html`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_html.html#pandas.read_html\r\n.. _`read_clipboard`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_clipboard.html#pandas.read_clipboard\r\n.. _`read_excel`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html#pandas.read_excel\r\n.. _`read_hdf`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_hdf.html#pandas.read_hdf\r\n.. _`read_feather`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html#pandas.read_feather\r\n.. _`read_stata`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_stata.html#pandas.read_stata\r\n.. _`read_sas`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sas.html#pandas.read_sas\r\n.. _`read_pickle`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_pickle.html#pandas.read_pickle\r\n.. _`read_sql`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql\r\n"
  },
  {
    "path": "docs/supported_apis/older_pandas_compat.rst",
    "content": "===================================\nPandas backwards compatibility mode\n===================================\n\nModin verions 0.16 and 0.17, but no later minor versions, had limited support\nfor running with legacy pandas versions. The latest version of Modin no longer\nhas such support.\n\nMotivation for compatibility mode\n---------------------------------\n\nModin aims to keep compatibility with latest pandas release, hopefully catching up each release\nwithin a few days.\n\nHowever, due to certain restrictions like need to use Python 3.6 it forces some users to\nuse older pandas (1.1.x for Python 3.6, specifically), which normally would mean they're\nbound to be using ancient Modin as well.\n\nTo overcome this, Modin has special \"compatibility mode\" where some basic functionality\nworks, but please note that the support is \"best possible effort\" (e.g. not all older bugs\nare worth fixing).\n\nKnown issues with pandas 1.1.x\n------------------------------\n\n* ``pd.append()`` does not preserve the order of columns in older pandas while Modin does\n* ``.astype()`` produces different error type on incompatible dtypes\n* ``read_csv()`` does not support reading from ZIP file *with compression* in parallel mode\n* ``read_*`` do not support ``storage_option`` named argument\n* ``to_csv()`` does not support binary mode for output file\n* ``read_excel()`` does not support ``.xlsx`` files\n* ``read_fwf()`` has a bug with list of skiprows and non-None nrows: `pandas-dev#10261`_\n* ``.agg(int-value)`` produces TypeError in older pandas but Modin raises AssertionError\n* ``Series.reset_index(drop=True)`` does not ignore ``name`` in older pandas while Modin ignores it\n* ``.sort_index(ascending=None)`` does not raise ValueError in older pandas while Modin raises it\n\nPlease keep in mind that there are probably more issues which are not yet uncovered!\n\n.. _`pandas-dev#10261`: https://github.com/pandas-dev/pandas/issues/10261\n"
  },
  {
    "path": "docs/supported_apis/series_supported.rst",
    "content": "``pd.Series`` supported APIs\r\n============================\r\n\r\nThe following table lists both implemented and not implemented methods. If you have need\r\nof an operation that is listed as not implemented, feel free to open an issue on the\r\n`GitHub repository`_, or give a thumbs up to already created issues. Contributions are\r\nalso welcome!\r\n\r\nThe following table is structured as follows: The first column contains the method name.\r\nThe second column is a flag for whether or not there is an implementation in Modin for\r\nthe method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands\r\nfor partial (meaning some parameters may not be supported yet), and ``D`` stands for\r\ndefault to pandas. To learn more about the implementations that default to pandas, see\r\nthe related section on :doc:`Defaulting to pandas </supported_apis/index>`.\r\n\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| Series method               | Modin Implementation? (Y/N/P/D) | Notes for Current implementation                   |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``abs``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``add``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``add_prefix``              | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``add_suffix``              | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``agg``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``aggregate``               | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``align``                   | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``all``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``any``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``apply``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``argmax``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``argmin``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``argsort``                 | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``array``                   | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``asfreq``                  | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``asobject``                | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``asof``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``astype``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``at``                      | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``at_time``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``autocorr``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``axes``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``base``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``between``                 | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``between_time``            | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``bfill``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``bool``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``cat``                     | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``clip``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``combine``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``combine_first``           | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``compare``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``compress``                | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``copy``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``corr``                    | Y                               | Correlation floating point precision may slightly  |\r\n|                             |                                 | differ from pandas. For now pearson method is      |\r\n|                             |                                 | available only. For other methods defaults to      |\r\n|                             |                                 | pandas.                                            |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``count``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``cov``                     | Y                               | Covariance floating point precision may slightly   |\r\n|                             |                                 | differ from pandas.                                |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``cummax``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``cummin``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``cumprod``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``cumsum``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``data``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``describe``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``diff``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``div``                     | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``divide``                  | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``divmod``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``dot``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``drop``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``drop_duplicates``         | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``droplevel``               | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``dropna``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``dt``                      | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``dtype``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``dtypes``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``duplicated``              | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``empty``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``eq``                      | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``equals``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ewm``                     | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``expanding``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``explode``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``factorize``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ffill``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``fillna``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``filter``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``first``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``first_valid_index``       | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``flags``                   | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``floordiv``                | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``from_array``              | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ftype``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ge``                      | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``get``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``get_dtype_counts``        | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``get_ftype_counts``        | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``get_value``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``get_values``              | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``groupby``                 | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``gt``                      | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``hasnans``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``head``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``hist``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``iat``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``idxmax``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``idxmin``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``iloc``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``imag``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``index``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``infer_objects``           | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``interpolate``             | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``is_monotonic_decreasing`` | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``is_monotonic_increasing`` | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``is_unique``               | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``isin``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``isna``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``isnull``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``item``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``items``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``itemsize``                | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``keys``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``kurt``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``kurtosis``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``last``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``last_valid_index``        | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``le``                      | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``loc``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``lt``                      | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``map``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``mask``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``max``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``mean``                    | P                               | Modin defaults to pandas if given the ``level``    |\r\n|                             |                                 | param.                                             |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``median``                  | P                               | Modin defaults to pandas if given the ``level``    |\r\n|                             |                                 | param.                                             |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``memory_usage``            | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``min``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``mod``                     | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``mode``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``mul``                     | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``multiply``                | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``name``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``nbytes``                  | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ndim``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ne``                      | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``nlargest``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``nonzero``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``notna``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``notnull``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``nsmallest``               | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``nunique``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``pct_change``              | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``pipe``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``plot``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``pop``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``pow``                     | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``prod``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``product``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ptp``                     | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``put``                     | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``quantile``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``radd``                    | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rank``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``ravel``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rdiv``                    | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rdivmod``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``real``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``reindex``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``reindex_like``            | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rename``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rename_axis``             | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``reorder_levels``          | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``repeat``                  | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``replace``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``resample``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``reset_index``             | P                               | **Ray** and **Dask**: ``D`` when ``names`` or      |\r\n|                             |                                 | ``allow_duplicates`` is non-default                |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rfloordiv``               | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rmod``                    | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rmul``                    | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rolling``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``round``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rpow``                    | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rsub``                    | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``rtruediv``                | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``sample``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``searchsorted``            | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``sem``                     | P                               | Modin defaults to pandas if given the ``level``    |\r\n|                             |                                 | param.                                             |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``set_axis``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``set_value``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``shape``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``shift``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``size``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``skew``                    | P                               | Modin defaults to pandas if given the ``level``    |\r\n|                             |                                 | param.                                             |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``sort_index``              | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``sort_values``             | Y                               | Order of indexes that have the same sort key       |\r\n|                             |                                 | is not guaranteed to be the same across sorts;     |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``sparse``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``squeeze``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``std``                     | P                               | Modin defaults to pandas if given the ``level``    |\r\n|                             |                                 | param.                                             |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``str``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``strides``                 | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``sub``                     | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``subtract``                | Y                               | See ``add``;                                       |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``sum``                     | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``swapaxes``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``swaplevel``               | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``tail``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``take``                    | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_clipboard``            | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_csv``                  | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_dict``                 | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_excel``                | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_frame``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_hdf``                  | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_json``                 | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_latex``                | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_list``                 | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_numpy``                | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_period``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_pickle``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_sql``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_string``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_timestamp``            | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``to_xarray``               | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``tolist``                  | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``transform``               | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``transpose``               | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``truediv``                 | Y                               | See ``add``                                        |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``truncate``                | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``tz_convert``              | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``tz_localize``             | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``unique``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``unstack``                 | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``update``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``valid``                   | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``value_counts``            | Y                               | The indices order of resulting object may differ   |\r\n|                             |                                 | from pandas.                                       |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``values``                  | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``var``                     | P                               | Modin defaults to pandas if given the ``level``    |\r\n|                             |                                 | param.                                             |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``view``                    | D                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n| ``where``                   | Y                               |                                                    |\r\n+-----------------------------+---------------------------------+----------------------------------------------------+\r\n\r\n.. _`GitHub repository`: https://github.com/modin-project/modin/issues\r\n"
  },
  {
    "path": "docs/supported_apis/utilities_supported.rst",
    "content": "pandas Utilities Supported\r\n==========================\r\n\r\nIf you run ``import modin.pandas as pd``, the following operations are available from\r\n``pd.<op>``, e.g. ``pd.concat``. If you do not see an operation that pandas enables and\r\nwould like to request it, feel free to `open an issue`_. Make sure you tell us your\r\nprimary use-case so we can make it happen faster!\r\n\r\nThe following table is structured as follows: The first column contains the method name.\r\nThe second column is a flag for whether or not there is an implementation in Modin for\r\nthe method in the left column. ``Y`` stands for yes, ``N`` stands for no, ``P`` stands\r\nfor partial (meaning some parameters may not be supported yet), and ``D`` stands for\r\ndefault to pandas.\r\n\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| Utility method            | Modin Implementation? (Y/N/P/D) | Notes for Current implementation                   |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.concat`_              | Y                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.eval`_                | Y                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.unique`_              | Y                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| ``pd.value_counts``       | Y                               | The indices order of resulting object may differ   |\r\n|                           |                                 | from pandas.                                       |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.cut`_                 | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.to_numeric`_          | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.factorize`_           | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.from_dummies`_        | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.qcut`_                | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| ``pd.match``              | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.to_datetime`_         | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.get_dummies`_         | Y                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.date_range`_          | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.bdate_range`_         | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| `pd.to_timedelta`_        | D                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n| ``pd.options``            | Y                               |                                                    |\r\n+---------------------------+---------------------------------+----------------------------------------------------+\r\n\r\nOther objects & structures\r\n--------------------------\r\n\r\nThis list is a list of objects not currently distributed by Modin. All of these objects\r\nare compatible with the distributed components of Modin. If you are interested in\r\ncontributing a distributed version of any of these objects, feel free to open a\r\n`pull request`_.\r\n\r\n* Panel\r\n* Index\r\n* MultiIndex\r\n* CategoricalIndex\r\n* DatetimeIndex\r\n* Timedelta\r\n* Timestamp\r\n* NaT\r\n* PeriodIndex\r\n* Categorical\r\n* Interval\r\n* UInt8Dtype\r\n* UInt16Dtype\r\n* UInt32Dtype\r\n* UInt64Dtype\r\n* SparseDtype\r\n* Int8Dtype\r\n* Int16Dtype\r\n* Int32Dtype\r\n* Int64Dtype\r\n* CategoricalDtype\r\n* DatetimeTZDtype\r\n* IntervalDtype\r\n* PeriodDtype\r\n* RangeIndex\r\n* TimedeltaIndex\r\n* IntervalIndex\r\n* IndexSlice\r\n* TimeGrouper\r\n* Grouper\r\n* array\r\n* Period\r\n* DateOffset\r\n* ExcelWriter\r\n* SparseArray\r\n\r\n.. _open an issue: https://github.com/modin-project/modin/issues\r\n.. _pull request: https://github.com/modin-project/modin/pulls\r\n.. _`pd.concat`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.concat.html#pandas.concat\r\n.. _`pd.eval`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.eval.html#pandas.eval\r\n.. _`pd.unique`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.unique.html#pandas.unique\r\n.. _`pd.cut`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.cut.html#pandas.cut\r\n.. _`pd.to_numeric`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_numeric.html#pandas.to_numeric\r\n.. _`pd.factorize`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.factorize.html#pandas.factorize\r\n.. _`pd.from_dummies`: https://pandas.pydata.org/docs/reference/api/pandas.from_dummies.html#pandas-from-dummies\r\n.. _`pd.qcut`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.qcut.html#pandas.qcut\r\n.. _`pd.to_datetime`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html#pandas.to_datetime\r\n.. _`pd.get_dummies`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.get_dummies.html#pandas.get_dummies\r\n.. _`pd.date_range`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html#pandas.date_range\r\n.. _`pd.bdate_range`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.bdate_range.html#pandas.bdate_range\r\n.. _`pd.to_timedelta`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_timedelta.html#pandas.to_timedelta\r\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/batch.rst",
    "content": "Batch Pipline API Usage Guide\n=============================\n\nModin provides an experimental batching feature that pipelines row-parallel queries. This feature \nis currently only supported for the ``PandasOnRay`` engine. Please note that this feature is experimental\nand behavior or interfaces could be changed.\n\nUsage examples\n--------------\n\nIn examples below we build and run some pipelines. It is important to note that the queries passed to\nthe pipeline operate on Modin DataFrame partitions, which are backed by ``pandas``. When using ``pandas``-\nmodule level functions, please make sure to import and use ``pandas`` rather than ``modin.pandas``.\n\nSimple Batch Pipelining\n^^^^^^^^^^^^^^^^^^^^^^^\n\nThis example walks through a simple batch pipeline in order to familiarize the user with the API.\n\n.. code-block:: python\n\n    from modin.experimental.batch import PandasQueryPipeline\n    import modin.pandas as pd\n    import numpy as np\n\n    df = pd.DataFrame(\n        np.random.randint(0, 100, (100, 100)),\n        columns=[f\"col {i}\" for i in range(1, 101)],\n    ) # Build the dataframe we will pipeline.\n    pipeline = PandasQueryPipeline(df) # Build the pipeline.\n    pipeline.add_query(lambda df: df + 1, is_output=True) # Add the first query and specify that\n                                                          # it is an output query.\n    pipeline.add_query(\n        lambda df: df.rename(columns={f\"col {i}\":f\"col {i-1}\" for i in range(1, 101)})\n    ) # Add a second query.\n    pipeline.add_query(\n        lambda df: df.drop(columns=['col 99']),\n        is_output=True,\n    ) # Add a third query and specify that it is an output query.\n    new_df = pd.DataFrame(\n        np.ones((100, 100)),\n        columns=[f\"col {i}\" for i in range(1, 101)],\n    ) # Build a second dataframe that we will pipeline now instead.\n    pipeline.update_df(new_df) # Update the dataframe that we will pipeline to be `new_df`\n                               # instead of `df`.\n    result_dfs = pipeline.compute_batch() # Begin batch processing.\n\n    # Print pipeline results\n    print(f\"Result of Query 1:\\n{result_dfs[0]}\")\n    print(f\"Result of Query 2:\\n{result_dfs[1]}\")\n    # Output IDs can also be specified\n    pipeline = PandasQueryPipeline(df) # Build the pipeline.\n    pipeline.add_query(\n        lambda df: df + 1,\n        is_output=True,\n        output_id=1,\n    ) # Add the first query, specify that it is an output query, as well as specify an output id.\n    pipeline.add_query(\n        lambda df: df.rename(columns={f\"col {i}\":f\"col {i-1}\" for i in range(1, 101)})\n    ) # Add a second query.\n    pipeline.add_query(\n        lambda df: df.drop(columns=['col 99']),\n        is_output=True,\n        output_id=2,\n    ) # Add a third query, specify that it is an output query, and specify an output_id.\n    result_dfs = pipeline.compute_batch() # Begin batch processing.\n\n    # Print pipeline results - should be a dictionary mapping Output IDs to resulting dataframes:\n    print(f\"Mapping of Output ID to dataframe:\\n{result_dfs}\")\n    # Print results\n    for query_id, res_df in result_dfs.items():\n        print(f\"Query {query_id} resulted in\\n{res_df}\")\n\nBatch Pipelining with Postprocessing\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nA postprocessing function can also be provided when calling ``pipeline.compute_batch``. The example\nbelow runs a similar pipeline as above, but the postprocessing function writes the output dfs to \na parquet file.\n\n.. code-block:: python\n\n    from modin.experimental.batch import PandasQueryPipeline\n    import modin.pandas as pd\n    import numpy as np\n    import os\n    import shutil\n\n    df = pd.DataFrame(\n        np.random.randint(0, 100, (100, 100)),\n        columns=[f\"col {i}\" for i in range(1, 101)],\n    ) # Build the dataframe we will pipeline.\n    pipeline = PandasQueryPipeline(df) # Build the pipeline.\n    pipeline.add_query(\n        lambda df: df + 1,\n        is_output=True,\n        output_id=1,\n    ) # Add the first query, specify that it is an output query, as well as specify an output id.\n    pipeline.add_query(\n        lambda df: df.rename(columns={f\"col {i}\":f\"col {i-1}\" for i in range(1, 101)})\n    ) # Add a second query.\n    pipeline.add_query(\n        lambda df: df.drop(columns=['col 99']),\n        is_output=True,\n        output_id=2,\n    ) # Add a third query, specify that it is an output query, and specify an output_id.\n    def postprocessing_func(df, output_id, partition_id):\n        filepath = f\"query_{output_id}/\"\n        os.makedirs(filepath, exist_ok=True)\n        filepath += f\"part-{partition_id:04d}.parquet\"\n        df.to_parquet(filepath)\n        return df\n    result_dfs = pipeline.compute_batch(\n        postprocessor=postprocessing_func,\n        pass_partition_id=True,\n        pass_output_id=True,\n    ) # Begin computation, pass in a postprocessing function, and specify that partition ID and \n      # output ID should be passed to that postprocessing function.\n\n    print(os.system(\"ls query_1/\")) # Should show `NPartitions.get()` parquet files - which\n                                    # correspond to partitions of the output of query 1.\n    print(os.system(\"ls query_2/\")) # Should show `NPartitions.get()` parquet files - which\n                                    # correspond to partitions of the output of query 2.\n\n    for query_id, res_df in result_dfs.items():\n        written_df = pd.read_parquet(f\"query_{query_id}/\")\n        shutil.rmtree(f\"query_{query_id}/\") # Clean up\n        print(f\"Written and Computed DF are \" +\n              f\"{'equal' if res_df.equals(written_df) else 'not equal'} for query {query_id}\")\n\nBatch Pipelining with Fan Out\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nIf the input dataframe to a query is small (consisting of only one partition), it is possible to \ninduce additional parallelism using the ``fan_out`` argument. The ``fan_out`` argument replicates\nthe input partition, applies the query to each replica, and then coalesces all of the replicas back\nto one partition using the ``reduce_fn`` that must also be specified when ``fan_out`` is ``True``.\n\nIt is possible to control the parallelism via the ``num_partitions`` parameter passed to the\nconstructor of the ``PandasQueryPipeline``. This parameter designates the desired number of partitions,\nand defaults to ``NPartitions.get()`` when not specified. During fan out, the input partition is replicated\n``num_partitions`` times. In the previous examples, ``num_partitions`` was not specified, and so defaulted\nto ``NPartitions.get()``.\n\nThe example below demonstrates the usage of ``fan_out`` and ``num_partitions``. We first demonstrate\nan example of a function that would benefit from this computation pattern:\n\n.. code-block:: python\n\n    import glob\n    from PIL import Image\n    import torchvision.transforms as T\n    import torchvision\n\n    transforms = T.Compose([T.ToTensor()])\n    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)\n    model.eval()\n    COCO_INSTANCE_CATEGORY_NAMES = [\n        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',\n        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',\n        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n        'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',\n        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n        'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',\n        'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',\n        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n        'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',\n        'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',\n        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',\n        'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'\n    ]\n\n    def contains_cat(image, model):\n        image = transforms(image)\n        labels = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in model([image])[0]['labels']]\n        return 'cat' in labels\n\n    def serial_query(df):\n        \"\"\"\n        This function takes as input a dataframe with a single row corresponding to a folder\n        containing images to parse. Each image in the folder is passed through a neural network\n        that detects whether it contains a cat, in serial, and a new column is computed for the\n        dataframe that counts the number of images containing cats.\n\n        Parameters\n        ----------\n        df : a dataframe\n            The dataframe to process\n        \n        Returns\n        -------\n        The same dataframe as before, with an additional column containing the count of images \n        containing cats.\n        \"\"\"\n        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)\n        model.eval()\n        img_folder = df['images'][0]\n        images = sorted(glob.glob(f\"{img_folder}/*.jpg\"))\n        cats = 0\n        for img in images:\n            cats = cats + 1 if contains_cat(Image.open(img), model) else cats\n        df['cat_count'] = cats\n        return df\n    \nTo download the image files to test out this code, run the following bash script, which downloads\nthe images from the fast-ai-coco S3 bucket to a folder called ``images`` in your current working\ndirectory:\n\n.. code-block:: shell\n\n    aws s3 cp s3://fast-ai-coco/coco_tiny.tgz . --no-sign-request; tar -xf coco_tiny.tgz; mkdir \\\n        images; mv coco_tiny/train/* images/; rm -rf coco_tiny; rm -rf coco_tiny.tgz\n\nWe can pipeline that code like so:\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    from modin.experimental.batch import PandasQueryPipeline\n    from time import time\n    df = pd.DataFrame([['images']], columns=['images'])\n    pipeline = PandasQueryPipeline(df)\n    pipeline.add_query(serial_query, is_output=True)\n    serial_start = time()\n    df_with_cat_count = pipeline.compute_batch()[0]\n    serial_end = time()\n    print(f\"Result of pipeline:\\n{df_with_cat_count}\")\n\nWe can induce `8x` parallelism into the pipeline above by combining the ``fan_out`` and ``num_partitions`` parameters like so:\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    from modin.experimental.batch import PandasQueryPipeline\n    import shutil\n    from time import time\n    df = pd.DataFrame([['images']], columns=['images'])\n    desired_num_partitions = 8\n    def parallel_query(df, partition_id):\n        \"\"\"\n        This function takes as input a dataframe with a single row corresponding to a folder\n        containing images to parse. It parses `total_images/desired_num_partitions` images every\n        time it is called. A new column is computed for the dataframe that counts the number of\n        images containing cats.\n\n        Parameters\n        ----------\n        df : a dataframe\n            The dataframe to process\n        partition_id : int\n            The partition id of the dataframe that this function runs on.\n        \n        Returns\n        -------\n        The same dataframe as before, with an additional column containing the count of images\n        containing cats.\n        \"\"\"\n        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)\n        model.eval()\n        img_folder = df['images'][0]\n        images = sorted(glob.glob(f\"{img_folder}/*.jpg\"))\n        total_images = len(images)\n        cats = 0\n        start_index = partition_id * (total_images // desired_num_partitions)\n        if partition_id == desired_num_partitions - 1: # Last partition must parse to end of list\n            images = images[start_index:]\n        else:\n            end_index = (partition_id + 1) * (total_images // desired_num_partitions)\n            images = images[start_index:end_index]\n        for img in images:\n            cats = cats + 1 if contains_cat(Image.open(img), model) else cats\n        df['cat_count'] = cats\n        return df\n\n    def reduce_fn(dfs):\n        \"\"\"\n        Coalesce the results of fanning out the `parallel_query` query.\n\n        Parameters\n        ----------\n        dfs : a list of dataframes\n            The resulting dataframes from fanning out `parallel_query`\n        \n        Returns\n        -------\n        A new dataframe whose `cat_count` column is the sum of the `cat_count` column of all\n        dataframes in `dfs`\n        \"\"\"\n        df = dfs[0]\n        cat_count = df['cat_count'][0]\n        for dataframe in dfs[1:]:\n            cat_count += dataframe['cat_count'][0]\n        df['cat_count'] = cat_count\n        return df\n    pipeline = PandasQueryPipeline(df, desired_num_partitions)\n    pipeline.add_query(\n        parallel_query,\n        fan_out=True,\n        reduce_fn=reduce_fn,\n        is_output=True,\n        pass_partition_id=True\n    )\n    parallel_start = time()\n    df_with_cat_count = pipeline.compute_batch()[0]\n    parallel_end = time()\n    print(f\"Result of pipeline:\\n{df_with_cat_count}\")\n    print(f\"Total Time in Serial: {serial_end - serial_start}\")\n    print(f\"Total time with induced parallelism: {parallel_end - parallel_start}\")\n    shutil.rmtree(\"images/\") # Clean up\n\nBatch Pipelining with Dynamic Repartitioning\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nSimilarly, it is also possible to hint to the Pipeline API to repartition after a node completes\ncomputation. This is currently only supported if the input dataframe consists of only one partition.\nThe number of partitions after repartitioning is controlled by the ``num_partitions`` parameter\npassed to the constructor of the ``PandasQueryPipeline``.\n\nThe following example demonstrates how to use the ``repartition_after`` parameter.\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    from modin.experimental.batch import PandasQueryPipeline\n    import numpy as np\n\n    small_df = pd.DataFrame([[1, 2, 3]]) # Create a small dataframe\n    \n    def increase_dataframe_size(df):\n        import pandas\n        new_df = pandas.concat([df] * 1000)\n        new_df = new_df.reset_index(drop=True) # Get a new range index that isn't duplicated\n        return new_df\n    \n    desired_num_partitions = 24 # We will repartition to 24 partitions\n\n    def add_partition_id_to_df(df, partition_id):\n        import pandas\n        new_col = pandas.Series([partition_id]*len(df), name=\"partition_id\", index=df.index)\n        return pandas.concat([df, new_col], axis=1)\n    \n    pipeline = PandasQueryPipeline(small_df, desired_num_partitions)\n    pipeline.add_query(increase_dataframe_size, repartition_after=True)\n    pipeline.add_query(add_partition_id_to_df, pass_partition_id=True, is_output=True)\n    result_df = pipeline.compute_batch()[0]\n    print(f\"Number of partitions passed to second query: \" + \n          f\"{len(np.unique(result_df['partition_id'].values))}\")\n    print(f\"Result of pipeline:\\n{result_df}\")\n\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/index.rst",
    "content": "Advanced Usage\n==============\n\n.. toctree::\n   :titlesonly:\n   :hidden:\n\n   /flow/modin/distributed/dataframe/pandas\n   spreadsheets_api\n   progress_bar\n   modin_xgboost\n   modin_logging\n   modin_metrics\n   batch\n   modin_engines\n\n.. meta::\n    :description lang=en:\n        Description of Modin's advanced features.\n\nModin aims to not only optimize pandas, but also provide a comprehensive,\nintegrated toolkit for data scientists. We are actively developing data science tools\nsuch as DataFrame spreadsheet integration, DataFrame algebra, progress bars, SQL queries\non DataFrames, and more. Join us on `Slack`_ for the latest updates!\n\nModin engines\n-------------\n\nModin supports a series of execution engines such as Ray_, Dask_, `MPI through unidist`_,\neach of which might be a more beneficial choice for a specific scenario. When doing the first operation\nwith Modin it automatically initializes one of the engines to further perform distributed/parallel computation.\nIf you are familiar with a concrete execution engine, it is possible to initialize the engine on your own and\nModin will automatically attach to it. Refer to :doc:`Modin engines </usage_guide/advanced_usage/modin_engines>` page\nfor more details.\n\nAdditional APIs\n---------------\n\nModin also supports these additional APIs on top of pandas to improve user experience.\n\n- :py:meth:`~modin.pandas.DataFrame.modin.to_pandas` -- convert a Modin DataFrame/Series to a pandas DataFrame/Series.\n- :py:meth:`~modin.pandas.DataFrame.get_backend` -- Get the ``Backend`` :doc:`configuration variable </flow/modin/config>` of this ``DataFrame``.\n- :py:meth:`~modin.pandas.DataFrame.move_to` -- Move data and execution for this ``DataFrame`` to the given ``Backend`` :doc:`configuration variable </flow/modin/config>`. This method is an alias for ``DataFrame.set_backend``.\n- :py:meth:`~modin.pandas.DataFrame.set_backend` -- Move data and execution for this ``DataFrame`` to the given ``Backend`` :doc:`configuration variable </flow/modin/config>`. This method is an alias for ``DatFrame.move_to``.\n- :py:func:`~modin.pandas.io.from_pandas` -- convert a pandas DataFrame to a Modin DataFrame.\n- :py:meth:`~modin.pandas.DataFrame.modin.to_ray` -- convert a Modin DataFrame/Series to a Ray Dataset.\n- :py:func:`~modin.pandas.io.from_ray` -- convert a Ray Dataset to a Modin DataFrame.\n- :py:meth:`~modin.pandas.DataFrame.modin.to_dask` -- convert a Modin DataFrame/Series to a Ray Dataset.\n- :py:func:`~modin.pandas.io.from_dask` -- convert a Modin DataFrame/Series to a Dask DataFrame/Series.\n- :py:func:`~modin.pandas.io.from_map` -- create a Modin DataFrame from map function applied to an iterable object.\n- :py:func:`~modin.pandas.io.from_arrow` -- convert an Arrow Table to a Modin DataFrame.\n- :py:func:`~modin.experimental.pandas.read_csv_glob` -- read multiple files in a directory.\n- :py:func:`~modin.experimental.pandas.read_sql` -- add optional parameters for the database connection.\n- :py:func:`~modin.experimental.pandas.read_custom_text` -- read custom text data from file.\n- :py:func:`~modin.experimental.pandas.read_pickle_glob`  -- read multiple pickle files in a directory.\n- :py:func:`~modin.experimental.pandas.read_parquet_glob`  -- read multiple parquet files in a directory.\n- :py:func:`~modin.experimental.pandas.read_json_glob`  -- read multiple json files in a directory.\n- :py:func:`~modin.experimental.pandas.read_xml_glob`  -- read multiple xml files in a directory.\n- :py:meth:`~modin.pandas.DataFrame.modin.to_pickle_glob` -- write to multiple pickle files in a directory.\n- :py:meth:`~modin.pandas.DataFrame.modin.to_parquet_glob` -- write to multiple parquet files in a directory.\n- :py:meth:`~modin.pandas.DataFrame.modin.to_json_glob` -- write to multiple json files in a directory.\n- :py:meth:`~modin.pandas.DataFrame.modin.to_xml_glob` -- write to multiple xml files in a directory.\n\nDataFrame partitioning API\n--------------------------\n\nModin DataFrame provides an API to directly access partitions: you can extract physical partitions from\na :py:class:`~modin.pandas.dataframe.DataFrame`, modify their structure by reshuffling or applying some\nfunctions, and create a DataFrame from those modified partitions. Visit\n:doc:`pandas partitioning API </flow/modin/distributed/dataframe/pandas>` documentation to learn more.\n\nModin Spreadsheet API\n---------------------\n\nThe Spreadsheet API for Modin allows you to render the dataframe as a spreadsheet to easily explore\nyour data and perform operations on a graphical user interface. The API also includes features for recording\nthe changes made to the dataframe and exporting them as reproducible code. Built on top of Modin and SlickGrid,\nthe spreadsheet interface is able to provide interactive response times even at a scale of billions of rows.\nSee our `Modin Spreadsheet API documentation`_ for more details.\n\n.. figure:: /img/modin_spreadsheet_mini_demo.gif\n   :align: center\n   :width: 650px\n   :height: 350px\n\nProgress Bar\n------------\n\nVisual progress bar for Dataframe operations such as groupby and fillna, as well as for file reading operations such as\nread_csv. Built using the `tqdm`_ library and Ray execution engine. See `Progress Bar documentation`_ for more details.\n\n.. figure:: /img/progress_bar_example.png\n   :align: center\n\nDataframe Algebra\n-----------------\n\nA minimal set of operators that can be composed to express any dataframe query for use in query planning and optimization.\nSee our `paper`_ for more information, and full documentation is coming soon!\n\nDistributed XGBoost on Modin\n----------------------------\n\nModin provides an implementation of `distributed XGBoost`_ machine learning algorithm on Modin DataFrames. See our\n:doc:`Distributed XGBoost on Modin documentation <modin_xgboost>` for details about installation and usage, as well as\n:doc:`Modin XGBoost architecture documentation </flow/modin/experimental/xgboost>` for information about implementation and\ninternal execution flow.\n\nLogging with Modin\n------------------\n\nModin logging offers users greater insight into their queries by logging internal Modin API calls, partition metadata,\nand system memory. Logging is disabled by default, but when it is enabled, log files are written to a local `.modin` directory\nat the same directory level as the notebook/script used to run Modin. See our :doc:`Logging with Modin documentation <modin_logging>`\nfor usage information.\n\nBatch Pipeline API\n------------------\nModin provides an experimental batched API that pipelines row parallel queries. See our :doc:`Batch Pipline API Usage Guide <batch>`\nfor a walkthrough on how to use this feature, as well as :doc:`Batch Pipeline API documentation </flow/modin/experimental/batch>`\nfor more information about the API.\n\nFuzzydata Testing\n-----------------\n\nAn experimental GitHub Action on pull request has been added to Modin, which automatically runs the Modin codebase against\n`fuzzydata`, a random dataframe workflow generator. The resulting workflow that was used to test Modin codebase can be\ndownloaded as an artifact from the GitHub Actions tab for further inspection. See `fuzzydata`_ for more details.\n\n.. _`Modin Spreadsheet API documentation`: spreadsheets_api.html\n.. _`Progress Bar documentation`: progress_bar.html\n.. _`Paper`: https://arxiv.org/pdf/2001.00888.pdf\n.. _`Slack`: https://modin.org/slack.html\n.. _`tqdm`: https://github.com/tqdm/tqdm\n.. _`distributed XGBoost`: https://medium.com/intel-analytics-software/distributed-xgboost-with-modin-on-ray-fc17edef7720\n.. _`fuzzydata`: https://github.com/suhailrehman/fuzzydata\n.. _Ray: https://github.com/ray-project/ray\n.. _Dask: https://github.com/dask/distributed\n.. _`MPI through unidist`: https://github.com/modin-project/unidist\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/modin_engines.rst",
    "content": "Modin engines\n=============\n\nAs a rule, you don't have to worry about initialization of an execution engine as\nModin itself automatically initializes one when performing the first operation.\nAlso, Modin has a broad range of :doc:`configuration settings </flow/modin/config>`, which\nyou can use to configure an execution engine. If there is a reason to initialize an execution engine\non your own and you are sure what to do, Modin will automatically attach to whichever engine is available.\nBelow, you can find some examples on how to initialize a specific execution engine on your own.\n\nRay\n---\n\nYou can initialize Ray engine with a specific number of CPUs (worker processes) to perform computation.\n\n.. code-block:: python\n\n  import ray\n  import modin.config as modin_cfg\n\n  ray.init(num_cpus=<N>)\n  modin_cfg.Engine.put(\"ray\") # Modin will use Ray engine\n  modin_cfg.CpuCount.put(<N>)\n\nTo get more details on all possible parameters for initialization refer to `Ray documentation`_.\n\nDask\n----\n\nYou can initialize Dask engine with a specific number of worker processes and threads per worker to perform computation.\n\n.. code-block:: python\n\n  from distributed import Client\n  import modin.config as modin_cfg\n\n  client = Client(n_workers=<N1>, threads_per_worker=<N2>)\n  modin_cfg.Engine.put(\"dask\") # # Modin will use Dask engine\n  modin_cfg.CpuCount.put(<N1>)\n\nTo get more details on all possible parameters for initialization refer to `Dask Distributed documentation`_.\n\nMPI through unidist\n-------------------\n\nYou can initialize MPI through unidist engine with a specific number of CPUs (worker processes) to perform computation.\n\n.. code-block:: python\n\n  import unidist\n  import unidist.config as unidist_cfg\n  import modin.config as modin_cfg\n\n  unidist_cfg.Backend.put(\"mpi\")\n  unidist_cfg.CpuCount.put(<N>)\n  unidist.init()\n\n  modin_cfg.Engine.put(\"unidist\") # # Modin will use MPI through unidist engine\n  modin_cfg.CpuCount.put(<N>)\n\nTo get more details on all possible parameters for initialization refer to `unidist documentation`_.\n\n.. _`Ray documentation`: https://docs.ray.io/en/latest\n.. _Dask Distributed documentation: https://distributed.dask.org/en/latest\n.. _`unidist documentation`: https://unidist.readthedocs.io/en/latest\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/modin_logging.rst",
    "content": "Modin Logging\n=============\n\nModin logging offers users greater insight into their queries by logging internal Modin API calls, partition metadata,\nand profiling system memory. When Modin logging is enabled (default disabled), log files are written to a local ``.modin`` directory at the same\ndirectory level as the notebook/script used to run Modin.\n\nThe logs generated by Modin Logging will be written to a ``.modin/logs/job_<uuid>`` directory, uniquely named after the job uuid.\nThe logs that contain the Modin API stack traces are named ``trace.log``. The logs that contain the memory utilization metrics are\nnamed ``memory.log``. By default, if any log file exceeds 10MB (configurable with ``LogFileSize``), that file will be saved and a \nseparate log file will be created. For instance, if users have 20MB worth of Modin API logs, they can expect to find ``trace.log.1`` \nand ``trace.log.2`` in the ``.modin/logs/job_<uuid>`` directory. After ``10 * LogFileSize`` MB or by default 100MB of logs, the logs will \nrollover and the original log files beginning with ``trace.log.1`` will be overwritten with the new log lines.\n\n**Developer Warning:** In some cases, running services like JupyterLab in the ``modin/modin`` directory may result in circular dependency issues.\nThis is due to a naming conflict between the ``modin/logging`` directory and the Python ``logging`` module, which may be used as a default in\nsuch environments. To resolve this, please run Jupyterlab or other similar services from directories other than ``modin/modin``.\n\nUsage examples\n--------------\n\nIn the example below, we enable logging for internal Modin API calls, partition metadata and memory profiling.\nWe can set the granularity (in seconds) at which the system memory utilization is logged using ``LogMemoryInterval``.\nWe can also set the maximum size of the logs (in MBs) using ``LogFileSize``.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.config import LogMode, LogMemoryInterval, LogFileSize\n  LogMode.enable()\n  LogMemoryInterval.put(2) # Defaults to 5 seconds, new interval is 2 seconds\n  LogFileSize.put(5) # Defaults to 10 MB per log file, new size is 5 MB\n\n  # User code goes here\n\nDisable Modin logging like so:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.config import LogMode\n  LogMode.disable()\n\n  # User code goes here\n\nIn Modin the lower-level functionality is logged in debug level, and higher level functionality in info level.\nBy default when logging is enabled in Modin, both high level and low level functionality are logged.\nThe below example script could be used to switch between logging all functions vs only logging higher level functions.\nSetting logger level to ``logging.INFO`` logs only higher level functions.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.logging.config import get_logger\n  from modin.config import LogMode\n  import logging\n  LogMode.enable()\n  logger = get_logger()\n  logger.setLevel(logging.INFO) # Replace with logger.setLevel(logging.DEBUG)  for lower level logs\n  df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})\n  df = pd.concat([df, df])\n\n\nDebugging from user defined functions:\n\n.. warning:: \n    When attempting to use Modin logging in user defined functions that execute in workers for logging lower-level operators\n    as in example below, multiple log directories ``.modin/logs/job_**`` would be created for each worker executing the UDF.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n\n  def udf(x):\n      from modin.config import LogMode\n      \n      LogMode.enable()\n      \n      return x + 1\n  \n  modin_df = pd.DataFrame([0, 1, 2, 3])\n  print(modin_df.map(udf))\n\nSo the **recommended** approach would be to use a different logger as in the below snipet\nto log from user defined functions that execute on workers.\nBelow is an an example to log from UDF. For this the logger config has to be specified inside the UDF that would execute on a remote worker.\n\n.. code-block:: python\n\n  import logging\n  import modin.pandas as pd\n  \n  def udf(x):\n      logging.basicConfig(filename='modin_udf.log', level=logging.INFO)\n      logging.info(\"This log message will be written to modin_udf.log \")\n\n      # User code goes here\n      return x + 1\n\n  modin_df = pd.DataFrame([0, 1, 2, 3])\n  print(modin_df.map(udf))\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/modin_metrics.rst",
    "content": "Modin Metrics\n=============\n\nModin allows for third-party systems to register a metrics handler to collect specific API statistics.\nMetrics have a name and a value, can be aggregated, discarded, or emitted without impact to the program.\n\nCPU load, memory usage, and disk usage are all typical metrics; but modin currently only emits metrics on API timings which can be used to optimize end-user interactive performance. New metrics may \nbe added in the future.\n\nIt is the responsibility of the handler to process or forward these metrics. The name of the metric will \nbe in \"dot format\" and all lowercase, similar to graphite or rrd. The value is an integer or float.\n\nExample metric names include:\n\n* 'modin.core-dataframe.pandasdataframe.copy_index_cache'\n* 'modin.core-dataframe.pandasdataframe.transpose'\n* 'modin.query-compiler.pandasquerycompiler.transpose'\n* 'modin.query-compiler.basequerycompiler.columnarize'\n* 'modin.pandas-api.series.__init__'\n* 'modin.pandas-api.dataframe._reduce_dimension'\n* 'modin.pandas-api.dataframe.sum'\n\nHandlers are functions of the form: `fn(str, int|float)` and can be registered with:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.logging.metrics import add_metric_handler\n\n  def func(name: str, value: int | float):\n    print(f\"Got metric {name} value {value}\")\n\n  add_metric_handler(func)\n\n.. warning:: \n  A metric handler should be non-blocking, returning within 100ms, although this is not enforced. It must not throw exceptions or it will\n  be deregistered. These restrictions are to help guard against the implementation of a metrics collector which would impact\n  interactice performance significantly. The data from metrics should generally be offloaded to another system for processing\n  and not involve any blocking network calls.\n\nMetrics are enabled by default. Modin metrics can be disabled like so:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.config import MetricsMode\n  MetricsMode.disable()\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/modin_xgboost.rst",
    "content": "Distributed XGBoost on Modin\n============================\n\nModin provides an implementation of `distributed XGBoost`_ machine learning\nalgorithm on Modin DataFrames. Please note that this feature is experimental and behavior or\ninterfaces could be changed.\n\nInstall XGBoost on Modin\n------------------------\n\nModin comes with all the dependencies except ``xgboost`` package by default.\nCurrently, distributed XGBoost on Modin is only supported on the Ray execution engine, therefore, see\nthe :doc:`installation page </getting_started/installation>` for more information on installing Modin with the Ray engine.\nTo install ``xgboost`` package you can use ``pip``:\n\n.. code-block:: bash\n\n  pip install xgboost\n\n\nXGBoost Train and Predict\n-------------------------\n\nDistributed XGBoost functionality is placed in ``modin.experimental.xgboost`` module.\n``modin.experimental.xgboost`` provides a drop-in replacement API for ``train`` and ``Booster.predict`` xgboost functions.\n\n.. automodule:: modin.experimental.xgboost\n  :noindex:\n  :members: train\n\n.. autoclass:: modin.experimental.xgboost.Booster\n  :noindex:\n  :members: predict\n\n\nModinDMatrix\n------------\n\nData is passed to ``modin.experimental.xgboost`` functions via a Modin ``DMatrix`` object.\n\n.. automodule:: modin.experimental.xgboost\n  :noindex:\n  :members: DMatrix\n\nCurrently, the Modin ``DMatrix`` supports ``modin.pandas.DataFrame`` only as an input.\n\n\nA Single Node / Cluster setup\n-----------------------------\n\nThe XGBoost part of Modin uses a Ray resources by similar way as all Modin functions.\n\nTo start the Ray runtime on a single node:\n\n.. code-block:: python\n\n  import ray\n  # Look at the Ray documentation with respect to the Ray configuration suited to you most.\n  ray.init()\n\nIf you already had the Ray cluster you can connect to it by next way:\n\n.. code-block:: python\n\n  import ray\n  ray.init(address='auto')\n\nA detailed information about initializing the Ray runtime you can find in `starting ray`_  page.\n\n\nUsage example\n-------------\n\nIn example below we train XGBoost model using `the Iris Dataset`_ and get prediction on the same data.\nAll processing will be in a `single node` mode.\n\n.. code-block:: python\n\n  from sklearn import datasets\n\n  import ray\n  # Look at the Ray documentation with respect to the Ray configuration suited to you most.\n  ray.init() # Start the Ray runtime for single-node\n\n  import modin.pandas as pd\n  import modin.experimental.xgboost as xgb\n\n  # Load iris dataset from sklearn\n  iris = datasets.load_iris()\n\n  # Create Modin DataFrames\n  X = pd.DataFrame(iris.data)\n  y = pd.DataFrame(iris.target)\n\n  # Create DMatrix\n  dtrain = xgb.DMatrix(X, y)\n  dtest = xgb.DMatrix(X, y)\n\n  # Set training parameters\n  xgb_params = {\n      \"eta\": 0.3,\n      \"max_depth\": 3,\n      \"objective\": \"multi:softprob\",\n      \"num_class\": 3,\n      \"eval_metric\": \"mlogloss\",\n  }\n  steps = 20\n\n  # Create dict for evaluation results\n  evals_result = dict()\n\n  # Run training\n  model = xgb.train(\n      xgb_params,\n      dtrain,\n      steps,\n      evals=[(dtrain, \"train\")],\n      evals_result=evals_result\n  )\n\n  # Print evaluation results\n  print(f'Evals results:\\n{evals_result}')\n\n  # Predict results\n  prediction = model.predict(dtest)\n\n  # Print prediction results\n  print(f'Prediction results:\\n{prediction}')\n\n\n\n.. _Dataframe: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html\n.. _`starting ray`: https://docs.ray.io/en/master/starting-ray.html\n.. _`the Iris Dataset`: https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html\n.. _`distributed XGBoost`: https://medium.com/intel-analytics-software/distributed-xgboost-with-modin-on-ray-fc17edef7720\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/progress_bar.rst",
    "content": "Progress Bar\n============\n\nThe progress bar allows users to see the estimated progress and completion time of each line they run, \nin environments such as a shell or Jupyter notebook.\n\n.. figure:: /img/progress_bar.gif\n   :align: center\n\nQuickstart\n\"\"\"\"\"\"\"\"\"\"\n\nThe progress bar uses the `tqdm` library to visualize displays:\n\n.. code-block:: bash\n\n   pip install tqdm\n\n\nImport the progress bar into your notebook by running the following:\n\n\n.. code-block:: python\n\n    from modin.config import ProgressBar\n    ProgressBar.enable()\n"
  },
  {
    "path": "docs/usage_guide/advanced_usage/spreadsheets_api.rst",
    "content": "Modin Spreadsheets API\n======================\n\nGetting started\n---------------\nInstall Modin-spreadsheet using pip:\n\n.. code-block:: bash\n\n    pip install \"modin[spreadsheet]\"\n\n\nThe following code snippet creates a spreadsheet using the FiveThirtyEight dataset on labor force information by college majors (licensed under CC BY 4.0):\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    import modin.experimental.spreadsheet as mss\n    df = pd.read_csv('https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/all-ages.csv')\n    spreadsheet = mss.from_dataframe(df)\n    spreadsheet\n\n\n.. figure:: /img/modin_spreadsheets_installation.png\n    :align: center\n\nBasic Manipulations through User Interface\n------------------------------------------\n\nThe Spreadsheet API allows users to manipulate the DataFrame with simple graphical controls for sorting, filtering, and editing. \n\nHere are the instructions for each operation:\n    * **Sort**: Click on the column header of the column to sort on.\n    * **Filter**: Click on the filter button on the column header and apply the desired filter to the column. The filter dropdown changes depending on the type of the column. Multiple filters are automatically combined.\n    * **Edit Cell**: Double click on a cell and enter the new value.\n    * **Add Rows**: Click on the “Add Row” button in the toolbar to duplicate the last row in the DataFrame. The duplicated values provide a convenient default and can be edited as necessary.\n    * **Remove Rows**: Select row(s) and click the “Remove Row” button. Select a single row by clicking on it. Multiple rows can be selected with Cmd+Click (Windows: Ctrl+Click) on the desired rows or with Shift+Click to specify a range of rows. \n\nSome of these operations can also be done through the spreadsheet’s programmatic interface. Sorts and filters can be reset using the toolbar buttons. Edits and adding/removing rows can only be undone manually.\n\nVirtual Rendering\n-----------------\n\nThe spreadsheet will only render data based on the user’s viewport. This allows for quick rendering \neven on very large DataFrames because only a handful of rows are loaded at any given time. As a result, scrolling and viewing your data is smooth and responsive.\n\nTransformation History and Exporting Code\n-----------------------------------------\n\nAll operations on the spreadsheet are recorded and are easily exported as code for sharing or reproducibility. \nThis history is automatically displayed in the history cell, which is generated below the spreadsheet whenever the spreadsheet widget is displayed. \nThe history cell is displayed on default, but this can be turned off. Modin Spreadsheet API provides a few methods for interacting with the history:\n\n    * `SpreadsheetWidget.get_history()`: Retrieves the transformation history in the form of reproducible code. \n    * `SpreadsheetWidget.filter_relevant_history(persist=True)`: Returns the transformation history that contains only code relevant to the final state of the spreadsheet. The `persist` parameter determines whether the internal state and the displayed history is also filtered.\n    * `SpreadsheetWidget.reset_history()`: Clears the history of transformation.\n\nCustomizable Interface\n----------------------\n\nThe spreadsheet widget provides a number of options that allows the user to change the appearance and the interactivity of the spreadsheet. Options include:\n\n    * Row height/Column width\n    * Preventing edits, sorts, or filters on the whole spreadsheet or on a per-column basis\n    * Hiding the toolbar and history cell\n    * Float precision\n    * Highlighting of cells and rows\n    * Viewport size\n\nConverting Spreadsheets To and From Dataframes\n----------------------------------------------\n\n.. automodule:: modin.experimental.spreadsheet.general\n    :noindex:\n    :members: from_dataframe\n\n    \n.. automodule:: modin.experimental.spreadsheet.general\n    :noindex:\n    :members: to_dataframe\n\n\nFurther API Documentation\n-------------------------\n\n.. automodule:: modin_spreadsheet.grid\n    :noindex:\n    :members: SpreadsheetWidget"
  },
  {
    "path": "docs/usage_guide/benchmarking.rst",
    "content": "Benchmarking Modin\n==================\n\nSummary\n-------\nTo benchmark a single Modin function, often turning on the\n:doc:`configuration variable </flow/modin/config>` variable\n:code:`BenchmarkMode` will suffice.\n\nThere is no simple way to benchmark more complex Modin workflows, though\nbenchmark mode or calling ``modin.utils.execute`` on Modin objects may be useful.\nThe :doc:`Modin logs </usage_guide/advanced_usage/modin_logging>` may help you\nidentify bottlenecks in your code, and they may also help profile the execution\nof each Modin function.\n\nModin's execution and benchmark mode\n------------------------------------\n\nMost of Modin's execution happens asynchronously, i.e. in separate processes that run\nindependently of the main program flow. Some execution is also lazy, meaning that it\ndoesn't start immediately once the user calls a Modin function. While Modin provides\nthe same API as pandas, lazy and asynchronous execution can often make it hard to\ntell how much time each Modin function call takes, as well as to compare Modin's\nperformance to pandas and other similar libraries.\n\n.. note::\n    All examples in this doc use the system specified at the bottom of this page.\n\nConsider the following ipython script:\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    from modin.config import MinRowPartitionSize\n    import time\n    import ray\n\n    # Look at the Ray documentation with respect to the Ray configuration suited to you most.\n    ray.init()\n    df = pd.DataFrame(list(range(MinRowPartitionSize.get() * 2)))\n    %time result = df.map(lambda x: time.sleep(0.1) or x)\n    %time print(result)\n\n\nModin takes just 2.68 milliseconds for the ``map``, and 3.78 seconds to print\nthe result. However, if we run this script in pandas by replacing\n:code:`import modin.pandas as pd` with :code:`import pandas as pd`, the ``map``\ntakes 6.63 seconds, and printing the result takes just 5.53 milliseconds.\n\nBoth pandas and Modin start executing the ``map`` as soon as the interpreter\nevalutes it. While pandas blocks until the ``map`` has finished, Modin just kicks\noff asynchronous functions in remote ray processes. Printing the function result\nis fairly fast in pandas and Modin, but before Modin can print the data, it has to\nwait until all the remote functions complete.\n\nTo time how long Modin takes for a single operation, you should typically use\nbenchmark mode. Benchmark mode will wait for all asynchronous remote execution\nto complete. You can turn on benchmark mode on at any point as follows:\n\n.. code-block:: python\n\n    from modin.config import BenchmarkMode\n    BenchmarkMode.put(True)\n\nRerunning the script above with benchmark mode on, the Modin ``map`` takes\n3.59 seconds, and the ``print`` takes 183 milliseconds. These timings better\nreflect where Modin is spending its execution time.\n\nA caveat about benchmark mode\n-----------------------------\n\nWhile benchmark code is often good for measuring the performance of a single\nModin function call, it can underestimate Modin's performance in cases where\nModin's asynchronous execution improves Modin's performance. Consider the\nfollowing script with benchmark mode on:\n\n.. code-block:: python\n\n    import numpy as np\n    import time\n    import ray\n    from io import BytesIO\n\n    import modin.pandas as pd\n    from modin.config import BenchmarkMode, MinRowPartitionSize\n\n    BenchmarkMode.put(True)\n\n    start = time.time()\n    df = pd.DataFrame(list(range(MinRowPartitionSize.get())), columns=['A'])\n    result1 = df.map(lambda x: time.sleep(0.2) or x + 1)\n    result2 = df.map(lambda x: time.sleep(0.2) or x + 2)\n    result1.to_parquet(BytesIO())\n    result2.to_parquet(BytesIO())\n    end = time.time()\n    print(f'map and write to parquet took {end - start} seconds.')\n\n.. code-block::python\n\nThe script does two slow ``map`` on a dataframe and then writes each result\nto a buffer. The whole script takes 13 seconds with benchmark mode on, but\njust 7 seconds with benchmark mode off. Because Modin can run the ``map``\nasynchronously, it can start writing the first result to its buffer while\nit's still computing the second result. With benchmark mode on, Modin has to\nexecute every function synchronously instead.\n\nHow to benchmark complex workflows\n----------------------------------\n\nTypically, to benchmark Modin's overall performance on your workflow, you\nshould start by looking at end-to-end performance with benchmark mode off.\nIt's common for Modin worfklows to end with writing results to one or more\nfiles, or with printing some Modin objects to an interactive console. Such\nend points are natural ways to make sure that all of the Modin execution that\nyou require is complete.\n\nTo measure more fine-grained performance, it can be helpful to turn\nbenchmark mode on, but beware that doing so may reduce your script's overall\nperformance and thus may not reflect where Modin is normally spending execution\ntime, as pointed out above.\n\nTurning on :doc:`Modin logging </usage_guide/advanced_usage/modin_logging>` and\nusing the Modin logs can also help you profile your workflow. The Modin logs\ncan also give a detailed break down of the performance of each Modin function\nat each Modin :doc:`layer </development/architecture>`. Log mode is more\nuseful when used in conjuction with benchmark mode.\n\nSometimes, if you don't have a natural end-point to your workflow, you can\njust call ``modin.utils.execute`` on the workflow's final Modin objects.\nThat will typically block on any asynchronous computation:\n\n.. code-block:: python\n\n    import time\n    import ray\n    from io import BytesIO\n\n    import modin.pandas as pd\n    from modin.config import MinRowPartitionSize, NPartitions\n    import modin.utils\n\n    MinRowPartitionSize.put(32)\n    NPartitions.put(16)\n\n    def slow_add_one(x):\n      if x == 5000:\n        time.sleep(10)\n      return x + 1\n\n    # Look at the Ray documentation with respect to the Ray configuration suited to you most.\n    ray.init()\n    df1 = pd.DataFrame(list(range(10_000)), columns=['A'])\n    result = df1.map(slow_add_one)\n    # %time modin.utils.execute(result)\n    %time result.to_parquet(BytesIO())\n.. code-block::python\n\nWriting the result to a buffer takes 9.84 seconds. However, if you uncomment\nthe :code:`%time modin.utils.execute(result)` before the :code:`to_parquet`\ncall, the :code:`to_parquet` takes just 23.8 milliseconds!\n\n.. note::\n    If you see any Modin documentation touting Modin's speed without using\n    benchmark mode or otherwise guaranteeing that Modin is finishing all asynchronous\n    and deferred computation, you should file an issue on the Modin GitHub. It's\n    not fair to compare the speed of an async Modin function call to an equivalent\n    synchronous call using another library.\n\nAppendix: System Information\n----------------------------\nThe example scripts here were run on the following system:\n\n- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**: macOS Monterey 12.4\n- **Modin version**: d6d503ac7c3028d871c34d9e99e925ddb0746df6\n- **Ray version**: 2.0.0\n- **Python version**: 3.10.4\n- **Machine**: MacBook Pro (16-inch, 2019)\n- **Processor**: 2.3 GHz 8-core Intel Core i9 processor\n- **Memory**: 16 GB 2667 MHz DDR4\n"
  },
  {
    "path": "docs/usage_guide/examples/index.rst",
    "content": "Modin Usage Examples\n====================\n\nThis section shows Modin usage examples in different scenarios like Modin on a local/remote cluster,\nthe use of Modin spreadsheet.\n\nTutorials\n'''''''''\n\nThe following tutorials cover the basic usage of Modin. `Here <https://www.youtube.com/watch?v=NglkafEmbhE>`_ is a one hour video tutorial that walks through these basic exercises.\n\n- Exercise 1: Introduction to Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_1.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_1.ipynb>`__]\n- Exercise 2: Speed Improvements with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_2.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_2.ipynb>`__]\n- Exercise 3: Defaulting to pandas with Modin [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_3.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_3.ipynb>`__]\n\nThe following tutorials covers more advanced features in Modin:\n\n- Exercise 4: Experimental Features in Modin (Spreadsheet, Progress Bar) [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_4.ipynb>`__, `Source PandasOnDask <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_4.ipynb>`__]\n- Exercise 5: Setting up Modin in a Cluster Environment [`Source PandasOnRay <https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.py>`__]\n\nHow to get required dependencies for the tutorial notebooks and to run them please refer to the respective `README.md <https://github.com/modin-project/modin/tree/main/examples/tutorial/jupyter/README.md>`__ file.\n\n\nData Science Benchmarks\n'''''''''''''''''''''''\n\n- Using Modin with the NYC Taxi Dataset [`Source <https://github.com/modin-project/modin/blob/main/examples/jupyter/Modin_Taxi.ipynb>`__]\n- Using Modin with the Census Dataset (coming soon...)\n- Using Modin with the Plasticc Dataset (coming soon...)\n\nModin Spreadsheets\n''''''''''''''''''\n\n- Using Modin along with the Spreadsheets API [`Source <https://github.com/modin-project/modin/blob/main/examples/spreadsheet/tutorial.ipynb>`__]\n\nModin with scikit-learn\n'''''''''''''''''''''''\n\n- Modin for Machine Learning with scikit-learn [`Source <https://github.com/modin-project/modin/blob/main/examples/modin-scikit-learn-example.ipynb>`__]\n"
  },
  {
    "path": "docs/usage_guide/index.rst",
    "content": "Usage Guide\n===========\n\nThis guide describes both basic and advanced Modin usage, including usage examples, \ndetails regarding Modin configuration settings, as well as tips and tricks on \nhow you can further optimize the performance of your workload with Modin.\n\n.. toctree::\n    :maxdepth: 4\n\n    /flow/modin/config\n    examples/index\n    advanced_usage/index\n    optimization_notes/index\n    benchmarking\n    integrations\n\n.. meta::\n    :description lang=en:\n        Usage-specific documentation."
  },
  {
    "path": "docs/usage_guide/integrations.rst",
    "content": "Third Party Library Integrations\n================================\n\nModin is a drop-in replacement for Pandas, so we want it to interoperate with third-party libraries just as Pandas does. \nTo see where Modin performs well and where it needs to improve, we've selected a number of important machine learning + visualization + statistics libraries, \nand then looked at examples (from their documentation, if possible) about how they work with Pandas. Then we ran those same workflows with Modin, and \ntracked what worked, and what failed.\n\nIn the table below, you'll see, for each third-party library we tested, the number of successful test calls / total test calls, and a qualitative description of how both Pandas and Modin integrate with that library.\n\nIn the deeper dive, you can view the Jupyter notebook we have used to test API calls and the corresponding Github issues filed. If you come across other issues/ examples \nin your own workflows we encourage you to file an `issue <https://github.com/modin-project/modin/issues/new/choose>`_ or contribute a `PR <https://github.com/modin-project/modin/pulls>`_!\n\n\n.. note::\n    These interoperability metrics are preliminary and not all APIs for each library have been tested. Feel free to add more!\n\n\nModin Interoperability by Library\n'''''''''''''''''''''''''''''''''\n.. list-table::\n   :widths: 5 5 20\n   :header-rows: 1\n\n   * - Library\n     - API successes / calls\n     - Interoperability\n     \n   * - seaborn\n     - 73% (11/15)\n     - **Pandas**: Accepts Pandas DataFrames as inputs for producing plot |br|\n       **Modin**: Mostly accepts Modin DataFrames as inputs for producing plots, but fails completely in some cases (pairplot, lmplot), and in others (catplot, objects.Plot) only works for some parameter combinations\n\n   * - plotly\n     - 78% (7 / 9)\n     - **Pandas**: Accepts Pandas DataFrames as inputs for producing plots, including specifying X and Y parameters as df columns |br|\n       **Modin**: Mostly accepts Modin DataFrames as inputs for producing plots (the exception is choropleth), but fails when specifying X and Y parameters as df columns\n   \n   * - matplotlib\n     - 100% (5 / 5)\n     - **Pandas**: Accepts Pandas DataFrames as inputs for producing plots like scatter, barh, etc. |br|\n       **Modin**: Accepts Modin DataFrames as inputs for producing plots like scatter, barh, etc.\n  \n   * - altair\n     - 0% (0 / 1)\n     - **Pandas**: Accepts Pandas DataFrames as inputs for producing charts through Chart |br|\n       **Modin**: Does not accept Modin DataFrames as inputs for producing charts through Chart\n\n   * - bokeh\n     - 0% (0 / 1)\n     - **Pandas**: Loads Pandas DataFrames through ColumnDataSource |br|\n       **Modin**: Does not load Modin DataFrames through ColumnDataSource\n     \n   * - sklearn\n     - 100% (6 / 6)\n     - **Pandas**: Many functions take Pandas DataFrames as inputs |br|\n       **Modin**: Many functions take Modin DataFrames as inputs\n    \n   * - Hugging Face (Transformers, Datasets)\n     - 100% (2 / 2) \n     - **Pandas**: Loads Pandas DataFrames into Datasets, and processes Pandas DataFrame rows as inputs using Transformers.InputExample (deprecated) |br|\n       **Modin**: Loads Modin DataFrames into Datasets (though slowly), and processes Modin DataFrame rows as inputs through Transformers.InputExample (deprecated)\n     \n   * - Tensorflow\n     - 75% (3 / 4)\n     - **Pandas**: Converts Pandas dataframes to tensors |br|\n       **Modin**: Converts Modin DataFrames to tensors, but specialized APIs like Keras might not work yet\n     \n   * - NLTK\n     - 100% (1 / 1)\n     - **Pandas**: Performs transformations like tokenization on Pandas DataFrames |br|\n       **Modin**: Performs transformations like tokenization on Modin DataFrames\n    \n   * - XGBoost\n     - 100% (1 / 1)\n     - **Pandas**: Loads Pandas DataFrames through the DMatrix function |br|\n       **Modin**: Loads Modin DataFrames through the DMatrix function\n    \n   * - statsmodels\n     - 50% (1 / 2)\n     - **Pandas**: Can accept Pandas DataFrames when fitting models |br|\n       **Modin**: Sometimes accepts Modin DataFrames when fitting models (e.g., formula.api.ols), but does not in others (e.g., api.OLS)\n     \n.. |br| raw:: html\n\n     <br>\n\nA Deeper Dive\n''''''''''''''\n\n**seaborn**\n-----------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/seaborn.ipynb>`__\n\nGithub Issues\n    * https://github.com/modin-project/modin/issues/5435 \n    * https://github.com/modin-project/modin/issues/5433\n\n**plotly**\n----------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/plotly.ipynb>`__\n\nGithub Issues\n    * https://github.com/modin-project/modin/issues/5447 \n    * https://github.com/modin-project/modin/issues/5445\n\n**matplotlib**\n--------------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/matplotlib.ipynb>`__\n\n\n**altair**\n----------\n\n`Jupyter Notebook <https://github.com/lmodin-project/modin/blob/main/examples/jupyter/integrations/altair.ipynb>`__\n\nGithub Issues\n    * https://github.com/modin-project/modin/issues/5438\n\n**bokeh**\n---------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/bokeh.ipynb>`__\n\nGithub Issues\n    * https://github.com/modin-project/modin/issues/5437\n\n**sklearn**\n-----------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/sklearn.ipynb>`__\n\n**Hugging Face**\n----------------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/huggingface.ipynb>`__\n\n**Tensorflow**\n--------------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/tensorflow.ipynb>`__\n\nGithub Issues\n    * https://github.com/modin-project/modin/issues/5439\n\n**NLTK**\n---------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/NLTK.ipynb>`__\n\n**XGBoost**\n-----------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/xgboost.ipynb>`__\n\n**statsmodels**\n---------------\n\n`Jupyter Notebook <https://github.com/modin-project/modin/blob/main/examples/jupyter/integrations/statsmodels.ipynb>`__\n\nGithub Issues\n    * https://github.com/modin-project/modin/issues/5440\n\nAppendix: System Information\n'''''''''''''''''''''''''''''\nThe example scripts here were run on the following system:\n\n- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**: macOS Big Sur 11.5.2\n- **Modin version**: 0.18.0+3.g4114183f\n- **Ray version**: 2.0.1\n- **Python version**: 3.9.7.final.0\n- **Machine**: MacBook Pro (16-inch, 2019)\n- **Processor**: 2.3 GHz 8-core Intel Core i9 processor\n- **Memory**: 16 GB 2667 MHz DDR4\n"
  },
  {
    "path": "docs/usage_guide/optimization_notes/index.rst",
    "content": "Optimization Notes\n==================\n\nModin has chosen default values for a lot of the configurations here that provide excellent performance in most\ncases. This page is for those who love to optimize their code and those who are curious about existing optimizations\nwithin Modin. Here you can find more information about Modin's optimizations both for a pipeline of operations as\nwell as for specific operations. If you want to go ahead and tune the Modin behavior on your own, refer to\n:doc:`Modin Configuration Settings </flow/modin/config>` page for the full set of configurations available in Modin.\n\nRange-partitioning in Modin\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin utilizes a range-partitioning approach for specific operations, significantly enhancing\nparallelism and reducing memory consumption in certain scenarios. Range-partitioning is typically\nengaged for operations that has key columns (to group on, to merge on, etc).\n\nYou can enable `range-partitioning`_ by specifying ``cfg.RangePartitioning`` :doc:`configuration variable: </flow/modin/config>`\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    import modin.config as cfg\n\n    cfg.RangePartitioning.put(True) # past this point methods that support range-partitioning\n                                    # will use it\n\n    pd.DataFrame(...).groupby(...).mean() # use range-partitioning for groupby.mean()\n\n    cfg.Range-partitioning.put(False)\n\n    pd.DataFrame(...).groupby(...).mean() # use MapReduce implementation for groupby.mean()\n\nBuilding range-partitioning assumes data reshuffling, which may result into breaking the original\norder of rows, for some operation, it will mean that the result will be different from Pandas.\n\nRange-partitioning is not a silver bullet, meaning that enabling it is not always beneficial. Below you find\na link to the list of operations that have support for range-partitioning and practical advices on when one should\nenable it: :doc:`operations that support range-partitioning </usage_guide/optimization_notes/range_partitioning_ops>`.\n\nDynamic-partitioning in Modin\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nRay engine experiences slowdowns when running a large number of small remote tasks at the same time. Ray Core recommends to `avoid tiny task`_.\nWhen modin DataFrame has a large number of partitions, some functions produce a large number of remote tasks, which can cause slowdowns. \nTo solve this problem, Modin suggests using dynamic partitioning. This approach reduces the number of remote tasks \nby combining multiple partitions into a single virtual partition and perform a common remote task on them.\n\nDynamic partitioning is typically used for operations that are fully or partially executed on all partitions separately.\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    from modin.config import context\n\n    df = pd.DataFrame(...)\n\n    with context(DynamicPartitioning=True):\n        df.abs()\n\nDynamic partitioning is also not always useful, and this approach is usually used for medium-sized DataFrames with a large number of columns.\nIf the number of columns is small, the number of partitions will be close to the number of CPUs, and Ray will not have this problem.\nIf the DataFrame has too many rows, this is also not a good case for using Dynamic-partitioning, since each task is no longer tiny and performing \nthe combined tasks carries more overhead than assigning them separately.\n\nUnfortunately, the use of Dynamic-partitioning depends on various factors such as data size, number of CPUs, operations performed, \nand it is up to the user to determine whether Dynamic-partitioning will give a boost in his case or not.\n\n..\n  TODO: Define heuristics to automatically enable dynamic partitioning without performance penalty.\n  `Issue #7370 <https://github.com/modin-project/modin/issues/7370>`_\n\nUnderstanding Modin's partitioning mechanism\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin's partitioning is crucial for performance; so we recommend expert users to understand Modin's\npartitioning mechanism and how to tune it in order to achieve better performance.\n\nHow Modin partitions a dataframe\n--------------------------------\n\nModin uses a partitioning scheme that partitions a dataframe along both axes, resulting in a matrix\nof partitions. The row and column chunk sizes are computed independently based\non the length of the appropriate axis and Modin's special :doc:`configuration variables </flow/modin/config>`\n(``NPartitions``, ``MinRowPartitionSize`` and ``MinColumnPartitionSize``):\n\n- ``NPartitions`` is the maximum number of splits along an axis; by default, it equals to the number of cores\n  on your local machine or cluster of nodes.\n- ``MinRowPartitionSize`` is the minimum number of rows to do a split. For instance, if ``MinRowPartitionSize``\n  is 32, the row axis will not be split unless the amount of rows is greater than 32. If it is is greater, for example, 34,\n  then the row axis is sliced into two partitions: containing 32 and 2 rows accordingly.\n- ``MinColumnPartitionSize`` is the minimum number of columns to do a split. For instance, if ``MinColumnPartitionSize``\n  is 32, the column axis will not be split unless the amount of columns is greater than 32. If it is is greater, for example, 34,\n  then the column axis is sliced into two partitions: containing 32 and 2 columns accordingly.\n\nBeware that ``NPartitions`` specifies a limit for the number of partitions `along a single axis`, which means, that\nthe actual limit for the entire dataframe itself is the square of ``NPartitions``.\n\n.. figure:: /img/partitioning_mechanism/partitioning_examples.svg\n   :align: center\n\nFull-axis functions\n-------------------\n\nSome of the aggregation functions require knowledge about the entire axis, for example at ``.apply(foo, axis=0)``\nthe passed function ``foo`` expects to receive data for the whole column at once.\n\nWhen a full-axis function is applied, the partitions along this axis are collected at a single worker\nthat processes the function. After the function is done, the partitioning of the data is back to normal.\n\n.. figure:: /img/partitioning_mechanism/full_axis_function.svg\n   :align: center\n\nNote that the amount of remote calls is equal to the number of partitions, which means that since the number\nof partitions is decreased for full-axis functions it also decreases the potential for parallelism.\n\nAlso note, that reduce functions such as ``.sum()``, ``.mean()``, ``.max()``, etc, are not considered\nto be full-axis, so they do not suffer from the decreasing level of parallelism.\n\nHow to tune partitioning\n------------------------\n\nConfigure Modin's default partitioning scheme\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nAs you can see from the examples above, the more the dataframe's shape is closer to a square, the closer the number of\npartitions to the square of ``NPartitions``. In the case of ``NPartitions`` equals to the number of workers,\nthat means that a single worker is going to process multiple partitions at once, which slows down overall performance.\n\nIf your workflow mainly operates with wide dataframes and non-full-axis functions, it makes sense to reduce the\n``NPartitions`` value so a single worker would process a single partition.\n\n.. figure:: /img/partitioning_mechanism/repartition_square_frames.svg\n   :align: center\n\nCopy-pastable example, showing how tuning ``NPartitions`` value for wide frames may improve performance on your machine:\n\n.. code-block:: python\n\n  from multiprocessing import cpu_count\n  from modin.distributed.dataframe.pandas import unwrap_partitions\n  import modin.config as cfg\n  import modin.pandas as pd\n  import numpy as np\n  import timeit\n\n  # Generating data for a square-like dataframe\n  data = np.random.randint(0, 100, size=(5000, 5000))\n\n  # Explicitly setting `NPartitions` to its default value\n  cfg.NPartitions.put(cpu_count())\n\n  # Each worker processes `cpu_count()` amount of partitions\n  df = pd.DataFrame(data)\n  print(f\"NPartitions: {cfg.NPartitions.get()}\")\n  # Getting raw partitions to count them\n  partitions_shape = np.array(unwrap_partitions(df)).shape\n  print(\n      f\"The frame has {partitions_shape[0]}x{partitions_shape[1]}={np.prod(partitions_shape)} partitions \"\n      f\"when the CPU has only {cpu_count()} cores.\"\n  )\n  print(f\"10 times of .abs(): {timeit.timeit(lambda: df.abs(), number=10)}s.\")\n  # Possible output:\n  #   NPartitions: 112\n  #   The frame has 112x112=12544 partitions when the CPU has only 112 cores.\n  #   10 times of .abs(): 23.64s.\n\n  # Taking a square root of the the current `cpu_count` to make more even partitioning\n  cfg.NPartitions.put(int(cpu_count() ** 0.5))\n\n  # Each worker processes a single partition\n  df = pd.DataFrame(data)\n  print(f\"NPartitions: {cfg.NPartitions.get()}\")\n  # Getting raw partitions to count them\n  partitions_shape = np.array(unwrap_partitions(df)).shape\n  print(\n      f\"The frame has {partitions_shape[0]}x{partitions_shape[1]}={np.prod(partitions_shape)} \"\n      f\"when the CPU has {cpu_count()} cores.\"\n  )\n  print(f\"10 times of .abs(): {timeit.timeit(lambda: df.abs(), number=10)}s.\")\n  # Possible output:\n  #   NPartitions: 10\n  #   The frame has 10x10=100 partitions when the CPU has 112 cores.\n  #   10 times of .abs(): 0.25s.\n\nManually trigger repartitioning\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nIf you're getting unexpectedly poor performance, although you configured ``MODIN_NPARTITIONS``\ncorrectly, then this might be caused by unbalanced partitioning that occurred during the\nworkflow's execution.\n\nModin's idealogy is to handle partitioning internally and not let users worry about the possible\nconsequences of applying a lot of \"bad\" operations that may affect DataFrame's partitioning.\nWe're constantly making efforts to find and fix cases where partitioning may cause a headache\nfor users.\n\nHowever, if you feel that you're dealing with unbalanced partitioning you may try to call an\ninternal :py:meth:`modin.pandas.dataframe.DataFrame._repartition` method on your :py:class:`~modin.pandas.dataframe.DataFrame` in order to manually\ntrigger partitions rebalancing and see whether it improves performance for your case.\n\n.. automethod:: modin.pandas.dataframe.DataFrame._repartition\n\nAn actual use-case for this method may be the following:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import timeit\n\n  df = pd.DataFrame({\"col0\": [1, 2, 3, 4]})\n\n  # Appending a lot of columns may result into unbalanced partitioning\n  for i in range(1, 128):\n      df[f\"col{i}\"] = pd.Series([1, 2, 3, 4])\n\n  print(\n      \"DataFrame with unbalanced partitioning:\",\n      timeit.timeit(lambda: df.sum(), number=10)\n  ) # 1.44s\n\n  df = df._repartition()\n  print(\n      \"DataFrame after '._repartition()':\",\n      timeit.timeit(lambda: df.sum(), number=10)\n  ) # 0.21s.\n\nAvoid iterating over Modin DataFrame\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nUse ``df.apply()`` or other aggregation methods when possible instead of iterating over a dataframe.\nFor-loops don't scale and forces the distributed data to be collected back at the driver.\n\nCopy-pastable example, showing how replacing a for-loop to the equivalent ``.apply()`` may improve performance:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import numpy as np\n  from timeit import default_timer as timer\n\n  data = np.random.randint(1, 100, (2 ** 10, 2 ** 2))\n\n  md_df = pd.DataFrame(data)\n\n  result = []\n  t1 = timer()\n  # Iterating over a dataframe forces to collect distributed data to the driver and doesn't scale\n  for idx, row in md_df.iterrows():\n      result.append((row[1] + row[2]) / row[3])\n  print(f\"Filling a list by iterating a Modin frame: {timer() - t1:.2f}s.\")\n  # Possible output: 36.15s.\n\n  t1 = timer()\n  # Using `.apply()` perfectly scales to all axis-partitions\n  result = md_df.apply(lambda row: (row[1] + row[2]) / row[3], axis=1).to_numpy().tolist()\n  print(f\"Filling a list by using '.apply()' and converting the result to a list: {timer() - t1:.2f}s.\")\n  # Possible output: 0.22s.\n\nUse Modin's Dataframe Algebra API to implement custom parallel functions\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nModin provides a set of low-level parallel-implemented operators which can be used to build most of the\naggregation functions. These operators are present in the :doc:`algebra module </flow/modin/core/dataframe/algebra>`.\nModin DataFrame allows users to use their own aggregations built with this module. Visit the\n:doc:`DataFrame's algebra </flow/modin/core/dataframe/algebra>` page of the documentation for the steps to do it.\n\nAvoid mixing pandas and Modin DataFrames\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nAlthough Modin is considered to be a drop-in replacement for pandas, Modin and pandas are not intended to be used together\nin a single flow. Passing a pandas DataFrame as an argument for a Modin's DataFrame method may either slowdown\nthe function (because it has to process non-distributed object) or raise an error. You would also get an undefined\nbehavior if you pass a Modin DataFrame as an input to pandas methods, since pandas identifies Modin's objects as a simple iterable,\nand so can't leverage its benefits as a distributed dataframe.\n\nCopy-pastable example, showing how mixing pandas and Modin DataFrames in a single flow may bottleneck performance:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import numpy as np\n  import timeit\n  import pandas\n\n  data = np.random.randint(0, 100, (2 ** 20, 2 ** 2))\n\n  md_df, md_df_copy = pd.DataFrame(data), pd.DataFrame(data)\n  pd_df, pd_df_copy = pandas.DataFrame(data), pandas.DataFrame(data)\n\n  print(\"concat modin frame + pandas frame:\")\n  # Concatenating modin frame + pandas frame using modin '.concat()'\n  # This case is bad because Modin have to process non-distributed pandas object\n  time = timeit.timeit(lambda: pd.concat([md_df, pd_df]), number=10)\n  print(f\"\\t{time}s.\\n\")\n  # Possible output: 0.44s.\n\n  print(\"concat modin frame + modin frame:\")\n  # Concatenating modin frame + modin frame using modin '.concat()'\n  # This is an ideal case, Modin is being used as intended\n  time = timeit.timeit(lambda: pd.concat([md_df, md_df_copy]), number=10)\n  print(f\"\\t{time}s.\\n\")\n  # Possible output: 0.05s.\n\n  print(\"concat pandas frame + pandas frame:\")\n  # Concatenating pandas frame + pandas frame using pandas '.concat()'\n  time = timeit.timeit(lambda: pandas.concat([pd_df, pd_df_copy]), number=10)\n  print(f\"\\t{time}s.\\n\")\n  # Possible output: 0.31s.\n\n  print(\"concat pandas frame + modin frame:\")\n  # Concatenating pandas frame + modin frame using pandas '.concat()'\n  time = timeit.timeit(lambda: pandas.concat([pd_df, md_df]), number=10)\n  print(f\"\\t{time}s.\\n\")\n  # Possible output: TypeError\n\n\nUsing pandas to execute queries with Modin's ``\"Pandas\"`` backend\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nBy default, Modin distributes the data in a dataframe (or series) and attempts\nto process data for different partitions in parallel.\n\nHowever, for certain scenarios, such as handling small datasets, Modin's\nparallel execution may introduce unnecessary overhead. In such cases, it's more\nefficient to use serial execution with a single, unpartitioned pandas dataframe.\nYou can enable this kind of local pandas execution by setting Modin's\n``Backend``\n:doc:`configuration variable </flow/modin/config>` to ``\"Pandas\"``.\n\nDataFrames created while Modin's global backend is set to ``\"Pandas\"``\nwill continue to use native execution even if you switch the global backend\nlater. Modin supports interoperability between distributed Modin DataFrames\nand those using the pandas backend.\n\nHere is an example of using the pandas backend.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.config import Backend\n\n  # This dataframe will use Modin's default, distributed execution.\n  original_backend = Backend.get()\n  assert original_backend != \"Pandas\"\n  distributed_df_1 = pd.DataFrame([0])\n\n  # Set backend to \"Pandas\" for local pandas execution.\n  Backend.put(\"Pandas\")\n  modin_on_pandas_df = pd.DataFrame([1])\n  assert modin_on_pandas_df.get_backend() == \"Pandas\"\n\n  # Revert to default settings for distributed execution\n  Backend.put(original_backend)\n  distributed_df_2 = pd.DataFrame([2])\n  assert distributed_df_2.get_backend() == original_backend\n\nYou can also use the pandas backend for some dataframes while using different\nbackends for other dataframes. You can switch the backend of an individual\ndataframe or series with ``set_backend()`` or its synonym ``move_to()``.\nHere's an example of switching the backend for an individual dataframe.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n\n  # This dataframe will use Modin's default, distributed execution.\n  original_backend = Backend.get()\n  assert original_backend != \"Pandas\"\n  distributed_df_1 = pd.DataFrame([0])\n\n  pandas_df_1 = distributed_df_1.move_to(\"Pandas\")\n  assert pandas_df_1.get_backend() == \"Pandas\"\n  pandas_df_1 = pandas_df_1.sort_values(0)\n  assert pandas_df_1.get_backend() == \"Pandas\"\n\n  new_df = pandas_df_1.move_to(original_backend)\n  assert new_df.get_backend() == original_backend\n\n  new_df.set_backend(\"Pandas\", inplace=True)\n  assert new_df.get_backend() == \"Pandas\"\n\n\nAutomatic backend switching\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\n*This feature is under active development, and the API is subject to change.*\n\nModin's backends may define heuristics for whether to automatically move data to another backend\nfor more efficient computation of certain operations. Modin does not currently define these heuristics\nfor any of its default backends, but any backends that wish to do so should implement the query\ncompiler methods discussed in\n:ref:`the architecture document<auto-switch architecture>`.\n\nAfter implementing the relevant query compiler methods, the following APIs can be used to control\nwhen automatic switching occurs:\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  from modin.core.storage_formats.pandas.query_compiler_caster import (\n    register_function_for_post_op_switch,\n    register_function_for_pre_op_switch,\n  )\n  from modin.config import AutoSwitchBackend\n\n  # Enable automatic switching BEFORE computation for DataFrame.apply\n  # when the DataFrame's backend is Pandas\n  register_function_for_pre_op_switch(\n    class_name=\"DataFrame\",\n    method=\"apply\",\n    backend=\"Pandas\",\n  )\n\n  # Enable automatic switching AFTER computation for Series.max\n  # when the Series's backend is Pandas\n  register_function_for_post_op_switch(\n    class_name=\"Series\",\n    method=\"max\",\n    backend=\"Pandas\",\n  )\n\n  # Enable automatic switching globally (use .disable() to turn off)\n  AutoSwitchBackend.enable()\n\n  df = pd.DataFrame([[1, 2, 3]])\n  # \"pin\" a single DataFrame/Series, preventing it from\n  # automatically switching backends\n  df.pin_backend(inplace=True)\n  # \"unpin\" it to re-enable automatic switching\n  df.unpin_backend(inplace=True)\n\n\nOperation-specific optimizations\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n\nmerge\n-----\n\n``merge`` operation in Modin uses the broadcast join algorithm: combining a right Modin DataFrame into a pandas DataFrame and\nbroadcasting it to the row partitions of the left Modin DataFrame. In order to minimize interprocess communication cost when doing\nan inner join you may want to swap left and right DataFrames.\n\n.. code-block:: python\n\n  import modin.pandas as pd\n  import numpy as np\n\n  left_data = np.random.randint(0, 100, size=(2**8, 2**8))\n  right_data = np.random.randint(0, 100, size=(2**12, 2**12))\n\n  left_df = pd.DataFrame(left_data)\n  right_df = pd.DataFrame(right_data)\n  %timeit left_df.merge(right_df, how=\"inner\", on=10)\n  3.59 s  107 ms per loop (mean  std. dev. of 7 runs, 1 loop each)\n\n  %timeit right_df.merge(left_df, how=\"inner\", on=10)\n  1.22 s  40.1 ms per loop (mean  std. dev. of 7 runs, 1 loop each)\n\nNote that result columns order may differ for first and second ``merge``.\n\n.. _range-partitioning: https://www.techopedia.com/definition/31994/range-partitioning\n.. _`avoid tiny task`: https://docs.ray.io/en/latest/ray-core/tips-for-first-time.html#tip-2-avoid-tiny-tasks\n"
  },
  {
    "path": "docs/usage_guide/optimization_notes/range_partitioning_ops.rst",
    "content": ":orphan:\n\nOperations that support range-partitioning in Modin\n###################################################\n\nThe following operations change their behavior once ``cfg.RangePartitioning`` variable is set to ``True``.\nGo through the list find out when it could be beneficial to engage range-partitioning for a certain method.\n\nGroupBy\n=======\n\n.. note::\n    When grouping on multiple columns using range-partitioning implementation, the result\n    may not be sorted even if ``groupby(sort=True, ...)`` was passed: https://github.com/modin-project/modin/issues/6875.\n\nRange-partitioning groupby implementation is automatically engaged for ``groupby.apply()``, ``groupby.transform()``,\n``groupby.rolling()``. For groupby aggregations from `this list`_, MapReduce implementation is used by default.\nMapReduce tends to show better performance for groupby with low-cardinality. If the cardinality of your columns\nto group is expected to be high, it's recommended to engage range-partitioning implementation.\n\n\nMerge\n=====\n\n.. note::\n    Range-partitioning approach is implemented only for \"left\" and \"inner\" merge and only\n    when merging on a single column using `on` argument.\n\nRange-partitioning merge replaces broadcast merge. It is recommended to use range-partitioning implementation\nif the right dataframe in merge is as big as the left dataframe. In this case, range-partitioning\nimplementation works faster and consumes less RAM.\n\nUnder the spoiler you can find performance comparison of range-partitioning and broadcast merge in different\nscenarios:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Performance measurements for merge</a></summary>\n\nThe performance was measured on `h2o join queries`_ using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores),\nwith the number of cores allocated for Modin limited by 44 (``MODIN_CPUS=44``).\n\nMeasurements for small 500mb data:\n\n.. image:: /img/range_partitioning_measurements/merge_h2o_500mb.jpg\n   :align: center\n\nMeasurements for medium 5gb data:\n\n.. image:: /img/range_partitioning_measurements/merge_h2o_5gb.png\n   :align: center\n\n.. raw:: html\n\n   </details>\n\n\n``.unique()`` and ``.drop_duplicates()``\n========================================\n\n.. note::\n    When range-partitioning is enabled, both ``.unique()`` and ``.drop_duplicates()`` will\n    yield results that are sorted along rows. If range-partitioning is disabled,\n    the original order will be maintained.\n\nRange-partitioning implementation of ``.unique()`` / ``.drop_duplicates()`` works best when the input data size is big (more than\n5_000_000 rows) and when the output size is also expected to be big (no more than 80% values are duplicates).\n\nUnder the spoiler you can find performance comparisons in different scenarios:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Performance measurements for ``.unique()``</a></summary>\n\nThe performance was measured on randomly generated data using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).\nThe `duplicate rate` shows the procentage of duplicated rows in the dataset. You can learn more about this micro-benchmark\nby reading its source code:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Micro-benchmark's source code</a></summary>\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    import numpy as np\n    import modin.config as cfg\n\n    from modin.utils import execute\n    from timeit import default_timer as timer\n    import pandas\n\n    cfg.CpuCount.put(16)\n\n    def get_data(nrows, dtype):\n        if dtype == int:\n            return np.arange(nrows)\n        elif dtype == float:\n            return np.arange(nrows).astype(float)\n        elif dtype == str:\n            return np.array([f\"value{i}\" for i in range(nrows)])\n        else:\n            raise NotImplementedError(dtype)\n\n    pd.DataFrame(np.arange(cfg.NPartitions.get() * cfg.MinRowPartitionSize.get())).to_numpy()\n\n    nrows = [1_000_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000, 100_000_000]\n    duplicate_rate = [0, 0.1, 0.5, 0.95]\n    dtypes = [int, str]\n    use_range_part = [True, False]\n\n    columns = pandas.MultiIndex.from_product([dtypes, duplicate_rate, use_range_part], names=[\"dtype\", \"duplicate rate\", \"use range-part\"])\n    result = pandas.DataFrame(index=nrows, columns=columns)\n\n    i = 0\n    total_its = len(nrows) * len(duplicate_rate) * len(dtypes) * len(use_range_part)\n\n    for dt in dtypes:\n        for nrow in nrows:\n            data = get_data(nrow, dt)\n            np.random.shuffle(data)\n            for dpr in duplicate_rate:\n                data_c = data.copy()\n                dupl_val = data_c[0]\n\n                num_duplicates = int(dpr * nrow)\n                dupl_indices = np.random.choice(np.arange(nrow), num_duplicates, replace=False)\n                data_c[dupl_indices] = dupl_val\n\n                for impl in use_range_part:\n                    print(f\"{round((i / total_its) * 100, 2)}%\")\n                    i += 1\n                    cfg.RangePartitioning.put(impl)\n\n                    sr = pd.Series(data_c)\n                    execute(sr)\n\n                    t1 = timer()\n                    # returns a list, so no need for materialization\n                    sr.unique()\n                    tm = timer() - t1\n                    print(nrow, dpr, dt, impl, tm)\n                    result.loc[nrow, (dt, dpr, impl)] = tm\n                    result.to_excel(\"unique.xlsx\")\n\n.. raw:: html\n\n   </details>\n\nMeasurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):\n\n.. image:: /img/range_partitioning_measurements/unique_16cpus.jpg\n   :align: center\n\nMeasurements with 44 cores being allocated for Modin (``MODIN_CPUS=44``):\n\n.. image:: /img/range_partitioning_measurements/unique_44cpus.jpg\n   :align: center\n\n.. raw:: html\n\n   </details>\n\n\n.. raw:: html\n\n   <details>\n   <summary><a>Performance measurements for ``.drop_duplicates()``</a></summary>\n\nThe performance was measured on randomly generated data using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).\nThe `duplicate rate` shows the procentage of duplicated rows in the dataset. The `subset size` shows the number of\ncolumns being specified as a ``subset`` parameter for ``df.drop_duplicates()``. You can learn more about this\nmicro-benchmark by reading its source code:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Micro-benchmark's source code</a></summary>\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    import numpy as np\n    import modin.config as cfg\n\n    from modin.utils import execute\n    from timeit import default_timer as timer\n    import pandas\n\n    cfg.CpuCount.put(16)\n\n    pd.DataFrame(np.arange(cfg.NPartitions.get() * cfg.MinRowPartitionSize.get())).to_numpy()\n\n    nrows = [1_000_000, 5_000_000, 10_000_000, 25_000_000]\n    duplicate_rate = [0, 0.1, 0.5, 0.95]\n    subset = [[\"col0\"], [\"col1\", \"col2\", \"col3\", \"col4\"], None]\n    ncols = 15\n    use_range_part = [True, False]\n\n    columns = pandas.MultiIndex.from_product(\n        [\n            [len(sbs) if sbs is not None else ncols for sbs in subset],\n            duplicate_rate,\n            use_range_part\n        ],\n        names=[\"subset size\", \"duplicate rate\", \"use range-part\"]\n    )\n    result = pandas.DataFrame(index=nrows, columns=columns)\n\n    i = 0\n    total_its = len(nrows) * len(duplicate_rate) * len(subset) * len(use_range_part)\n\n    for sbs in subset:\n        for nrow in nrows:\n            data = {f\"col{i}\": np.arange(nrow) for i in range(ncols)}\n            pandas_df = pandas.DataFrame(data)\n\n            for dpr in duplicate_rate:\n                pandas_df_c = pandas_df.copy()\n                dupl_val = pandas_df_c.iloc[0]\n\n                num_duplicates = int(dpr * nrow)\n                dupl_indices = np.random.choice(np.arange(nrow), num_duplicates, replace=False)\n                pandas_df_c.iloc[dupl_indices] = dupl_val\n\n                for impl in use_range_part:\n                    print(f\"{round((i / total_its) * 100, 2)}%\")\n                    i += 1\n                    cfg.RangePartitioning.put(impl)\n\n                    md_df = pd.DataFrame(pandas_df_c)\n                    execute(md_df)\n\n                    t1 = timer()\n                    res = md_df.drop_duplicates(subset=sbs)\n                    execute(res)\n                    tm = timer() - t1\n\n                    sbs_s = len(sbs) if sbs is not None else ncols\n                    print(\"len()\", res.shape, nrow, dpr, sbs_s, impl, tm)\n                    result.loc[nrow, (sbs_s, dpr, impl)] = tm\n                    result.to_excel(\"drop_dupl.xlsx\")\n\n.. raw:: html\n\n   </details>\n\nMeasurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):\n\n.. image:: /img/range_partitioning_measurements/drop_duplicates_16cpus.jpg\n   :align: center\n\nMeasurements with 44 cores being allocated for Modin (``MODIN_CPUS=44``):\n\n.. image:: /img/range_partitioning_measurements/drop_duplicates_44cpus.jpg\n   :align: center\n\n.. raw:: html\n\n   </details>\n\n\n'.nunique()'\n============\n\n.. note::\n\n    Range-partitioning approach is implemented only for ``pd.Series.nunique()`` and 1-column dataframes.\n    For multi-column dataframes ``.nunique()`` can only use full-axis reduce implementation.\n\nRange-partitioning implementation of '.nunique()'' works best when the input data size is big (more than\n5_000_000 rows) and when the output size is also expected to be big (no more than 80% values are duplicates).\n\nUnder the spoiler you can find performance comparisons in different scenarios:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Performance measurements for ``.nunique()``</a></summary>\n\nThe performance was measured on randomly generated data using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).\nThe `duplicate rate` shows the procentage of duplicated rows in the dataset. You can learn more about this micro-benchmark\nby reading its source code:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Micro-benchmark's source code</a></summary>\n\n.. code-block:: python\n\n    import modin.pandas as pd\n    import numpy as np\n    import modin.config as cfg\n\n    from modin.utils import execute\n    from timeit import default_timer as timer\n    import pandas\n\n    cfg.CpuCount.put(16)\n\n    def get_data(nrows, dtype):\n        if dtype == int:\n            return np.arange(nrows)\n        elif dtype == float:\n            return np.arange(nrows).astype(float)\n        elif dtype == str:\n            return np.array([f\"value{i}\" for i in range(nrows)])\n        else:\n            raise NotImplementedError(dtype)\n\n    pd.DataFrame(np.arange(cfg.NPartitions.get() * cfg.MinRowPartitionSize.get())).to_numpy()\n\n    nrows = [1_000_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000, 100_000_000]\n    duplicate_rate = [0, 0.1, 0.5, 0.95]\n    dtypes = [int, str]\n    use_range_part = [True, False]\n\n    columns = pandas.MultiIndex.from_product([dtypes, duplicate_rate, use_range_part], names=[\"dtype\", \"duplicate rate\", \"use range-part\"])\n    result = pandas.DataFrame(index=nrows, columns=columns)\n\n    i = 0\n    total_its = len(nrows) * len(duplicate_rate) * len(dtypes) * len(use_range_part)\n\n    for dt in dtypes:\n        for nrow in nrows:\n            data = get_data(nrow, dt)\n            np.random.shuffle(data)\n            for dpr in duplicate_rate:\n                data_c = data.copy()\n                dupl_val = data_c[0]\n\n                num_duplicates = int(dpr * nrow)\n                dupl_indices = np.random.choice(np.arange(nrow), num_duplicates, replace=False)\n                data_c[dupl_indices] = dupl_val\n\n                for impl in use_range_part:\n                    print(f\"{round((i / total_its) * 100, 2)}%\")\n                    i += 1\n                    cfg.RangePartitioning.put(impl)\n\n                    sr = pd.Series(data_c)\n                    execute(sr)\n\n                    t1 = timer()\n                    # returns a scalar, so no need for materialization\n                    res = sr.nunique()\n                    tm = timer() - t1\n                    print(nrow, dpr, dt, impl, tm)\n                    result.loc[nrow, (dt, dpr, impl)] = tm\n                    result.to_excel(\"nunique.xlsx\")\n\n.. raw:: html\n\n   </details>\n\nMeasurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):\n\n.. image:: /img/range_partitioning_measurements/nunique_16cpus.jpg\n   :align: center\n\n\n.. raw:: html\n\n   </details>\n\nResample\n========\n\n.. note::\n\n    Range-partitioning approach doesn't support transform-like functions (like `.interpolate()`, `.ffill()`, `.bfill()`, ...)\n\nIt is recommended to use range-partitioning for resampling if you're dealing with a dataframe that has more than\n5_000_000 rows and the expected output is also expected to be big (more than 500_000 rows).\n\nUnder the spoiler you can find performance comparisons in different scenarios:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Performance measurements for ``.resample()``</a></summary>\n\nThe script below measures performance of ``df.resample(rule).sum()`` using Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz (56 cores).\nYou can learn more about this micro-benchmark by reading its source code:\n\n.. raw:: html\n\n   <details>\n   <summary><a>Micro-benchmark's source code</a></summary>\n\n.. code-block:: python\n\n    import pandas\n    import numpy as np\n    import modin.pandas as pd\n    import modin.config as cfg\n\n    from timeit import default_timer as timer\n\n    from modin.utils import execute\n\n    cfg.CpuCount.put(16)\n\n    nrows = [1_000_000, 5_000_000, 10_000_000]\n    ncols = [5, 33]\n    rules = [\n        \"500ms\", # doubles nrows\n        \"30s\", # decreases nrows in 30 times\n        \"5min\", # decreases nrows in 300\n    ]\n    use_rparts = [True, False]\n\n    cols = pandas.MultiIndex.from_product([rules, ncols, use_rparts], names=[\"rule\", \"ncols\", \"USE RANGE PART\"])\n    rres = pandas.DataFrame(index=nrows, columns=cols)\n\n    total_nits = len(nrows) * len(ncols) * len(rules) * len(use_rparts)\n    i = 0\n\n    for nrow in nrows:\n        for ncol in ncols:\n            index = pandas.date_range(\"31/12/2000\", periods=nrow, freq=\"s\")\n            data = {f\"col{i}\": np.arange(nrow) for i in range(ncol)}\n            pd_df = pandas.DataFrame(data, index=index)\n            for rule in rules:\n                for rparts in use_rparts:\n                    print(f\"{round((i / total_nits) * 100, 2)}%\")\n                    i += 1\n                    cfg.RangePartitioning.put(rparts)\n\n                    df = pd.DataFrame(data, index=index)\n                    execute(df)\n\n                    t1 = timer()\n                    res = df.resample(rule).sum()\n                    execute(res)\n                    ts = timer() - t1\n                    print(nrow, ncol, rule, rparts, ts)\n\n                    rres.loc[nrow, (rule, ncol, rparts)] = ts\n                    rres.to_excel(\"resample.xlsx\")\n\n.. raw:: html\n\n   </details>\n\nMeasurements with 16 cores being allocated for Modin (``MODIN_CPUS=16``):\n\n.. image:: /img/range_partitioning_measurements/resample_16cpus.jpg\n   :align: center\n\n\n.. raw:: html\n\n   </details>\n\npivot_table\n===========\n\nRange-partitioning implementation is automatically applied for ``df.pivot_table``\nwhenever possible, users can't control this.\n\nsort_values\n===========\n\nRange-partitioning implementation is automatically applied for ``df.sort_values``\nwhenever possible, users can't control this.\n\n\n.. _h2o join queries: https://h2oai.github.io/db-benchmark/\n.. _this list: https://github.com/modin-project/modin/blob/7b233e4a920d5f03dce7a82847847b92ae7ad617/modin/core/storage_formats/pandas/groupby.py#L236-L247\n"
  },
  {
    "path": "environment-dev.yml",
    "content": "name: modin\nchannels:\n  - conda-forge\ndependencies:\n  - pip\n\n  # required dependencies\n  - pandas>=2.2,<2.4\n  - numpy>=1.22.4\n  - fsspec>=2022.11.0\n  - packaging>=21.0\n  - psutil>=5.8.0\n\n  # optional dependencies\n  # NOTE Keep the ray and dask dependencies in sync with the Linux and Windows\n  # Unidist environment dependencies.\n  - ray-core>=2.10.0,<3\n  - pyarrow>=10.0.1\n  # workaround for https://github.com/conda/conda/issues/11744\n  - grpcio!=1.45.*\n  - grpcio!=1.46.*\n  - dask>=2.22.0\n  - distributed>=2.22.0\n  - xarray>=2022.12.0\n  - jinja2>=3.1.2\n  - scipy>=1.10.0\n  - s3fs>=2022.11.0\n  - lxml>=4.9.2\n  - openpyxl>=3.1.0\n  - xlrd>=2.0.1\n  - matplotlib>=3.6.3\n  - sqlalchemy>=2.0.0\n  - pandas-gbq>=0.19.0\n  - pytables>=3.8.0\n  # pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429\n  - pymssql>=2.1.5,!=2.2.8\n  - psycopg2>=2.9.6\n  - fastparquet>=2022.12.0\n  - tqdm>=4.60.0\n  - numexpr>=2.8.4\n\n  # dependencies for making release\n  - pygithub>=v1.58.0\n  - pygit2>=1.9.2\n\n  # test dependencies\n  - coverage>=7.1.0\n  - moto>=4.1.0\n  - pytest>=7.3.2\n  - pytest-benchmark>=4.0.0\n  - pytest-cov>=4.0.0\n  - pytest-xdist>=3.2.0\n  - typing_extensions\n\n  # code linters\n  - black>=24.1.0\n  - flake8>=6.0.0\n  - flake8-no-implicit-concat>=0.3.4\n  - flake8-print>=5.0.0\n  - mypy>=1.0.0\n  - pandas-stubs>=2.0.0\n  - isort>=5.12\n\n  - pip:\n      - dataframe-api-compat>=0.2.7\n      - asv==0.5.1\n      # no conda package for windows so we install it with pip\n      - connectorx>=0.2.6a4\n      - fuzzydata>=0.0.11\n      # Fixes breaking ipywidgets changes, but didn't release yet.\n      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5\n      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.\n      - numpydoc==1.6.0\n      - polars\n"
  },
  {
    "path": "examples/data/boston_housing.csv",
    "content": ",CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE\n0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0\n1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6\n2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7\n3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4\n4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2\n5,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21,28.7\n6,0.08829,12.5,7.87,0.0,0.524,6.012,66.6,5.5605,5.0,311.0,15.2,395.6,12.43,22.9\n7,0.14455,12.5,7.87,0.0,0.524,6.172,96.1,5.9505,5.0,311.0,15.2,396.9,19.15,27.1\n8,0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93,16.5\n9,0.17004,12.5,7.87,0.0,0.524,6.004,85.9,6.5921,5.0,311.0,15.2,386.71,17.1,18.9\n10,0.22489,12.5,7.87,0.0,0.524,6.377,94.3,6.3467,5.0,311.0,15.2,392.52,20.45,15.0\n11,0.11747,12.5,7.87,0.0,0.524,6.009,82.9,6.2267,5.0,311.0,15.2,396.9,13.27,18.9\n12,0.09378,12.5,7.87,0.0,0.524,5.889,39.0,5.4509,5.0,311.0,15.2,390.5,15.71,21.7\n13,0.62976,0.0,8.14,0.0,0.538,5.949,61.8,4.7075,4.0,307.0,21.0,396.9,8.26,20.4\n14,0.63796,0.0,8.14,0.0,0.538,6.096,84.5,4.4619,4.0,307.0,21.0,380.02,10.26,18.2\n15,0.62739,0.0,8.14,0.0,0.538,5.834,56.5,4.4986,4.0,307.0,21.0,395.62,8.47,19.9\n16,1.05393,0.0,8.14,0.0,0.538,5.935,29.3,4.4986,4.0,307.0,21.0,386.85,6.58,23.1\n17,0.7842,0.0,8.14,0.0,0.538,5.99,81.7,4.2579,4.0,307.0,21.0,386.75,14.67,17.5\n18,0.80271,0.0,8.14,0.0,0.538,5.456,36.6,3.7965,4.0,307.0,21.0,288.99,11.69,20.2\n19,0.7258,0.0,8.14,0.0,0.538,5.727,69.5,3.7965,4.0,307.0,21.0,390.95,11.28,18.2\n20,1.25179,0.0,8.14,0.0,0.538,5.57,98.1,3.7979,4.0,307.0,21.0,376.57,21.02,13.6\n21,0.85204,0.0,8.14,0.0,0.538,5.965,89.2,4.0123,4.0,307.0,21.0,392.53,13.83,19.6\n22,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4.0,307.0,21.0,396.9,18.72,15.2\n23,0.98843,0.0,8.14,0.0,0.538,5.813,100.0,4.0952,4.0,307.0,21.0,394.54,19.88,14.5\n24,0.75026,0.0,8.14,0.0,0.538,5.924,94.1,4.3996,4.0,307.0,21.0,394.33,16.3,15.6\n25,0.84054,0.0,8.14,0.0,0.538,5.599,85.7,4.4546,4.0,307.0,21.0,303.42,16.51,13.9\n26,0.67191,0.0,8.14,0.0,0.538,5.813,90.3,4.682,4.0,307.0,21.0,376.88,14.81,16.6\n27,0.95577,0.0,8.14,0.0,0.538,6.047,88.8,4.4534,4.0,307.0,21.0,306.38,17.28,14.8\n28,0.77299,0.0,8.14,0.0,0.538,6.495,94.4,4.4547,4.0,307.0,21.0,387.94,12.8,18.4\n29,1.00245,0.0,8.14,0.0,0.538,6.674,87.3,4.239,4.0,307.0,21.0,380.23,11.98,21.0\n30,1.13081,0.0,8.14,0.0,0.538,5.713,94.1,4.233,4.0,307.0,21.0,360.17,22.6,12.7\n31,1.35472,0.0,8.14,0.0,0.538,6.072,100.0,4.175,4.0,307.0,21.0,376.73,13.04,14.5\n32,1.38799,0.0,8.14,0.0,0.538,5.95,82.0,3.99,4.0,307.0,21.0,232.6,27.71,13.2\n33,1.15172,0.0,8.14,0.0,0.538,5.701,95.0,3.7872,4.0,307.0,21.0,358.77,18.35,13.1\n34,1.61282,0.0,8.14,0.0,0.538,6.096,96.9,3.7598,4.0,307.0,21.0,248.31,20.34,13.5\n35,0.06417,0.0,5.96,0.0,0.499,5.933,68.2,3.3603,5.0,279.0,19.2,396.9,9.68,18.9\n36,0.09744,0.0,5.96,0.0,0.499,5.841,61.4,3.3779,5.0,279.0,19.2,377.56,11.41,20.0\n37,0.08014,0.0,5.96,0.0,0.499,5.85,41.5,3.9342,5.0,279.0,19.2,396.9,8.77,21.0\n38,0.17505,0.0,5.96,0.0,0.499,5.966,30.2,3.8473,5.0,279.0,19.2,393.43,10.13,24.7\n39,0.02763,75.0,2.95,0.0,0.428,6.595,21.8,5.4011,3.0,252.0,18.3,395.63,4.32,30.8\n40,0.03359,75.0,2.95,0.0,0.428,7.024,15.8,5.4011,3.0,252.0,18.3,395.62,1.98,34.9\n41,0.12744,0.0,6.91,0.0,0.448,6.77,2.9,5.7209,3.0,233.0,17.9,385.41,4.84,26.6\n42,0.1415,0.0,6.91,0.0,0.448,6.169,6.6,5.7209,3.0,233.0,17.9,383.37,5.81,25.3\n43,0.15936,0.0,6.91,0.0,0.448,6.211,6.5,5.7209,3.0,233.0,17.9,394.46,7.44,24.7\n44,0.12269,0.0,6.91,0.0,0.448,6.069,40.0,5.7209,3.0,233.0,17.9,389.39,9.55,21.2\n45,0.17142,0.0,6.91,0.0,0.448,5.682,33.8,5.1004,3.0,233.0,17.9,396.9,10.21,19.3\n46,0.18836,0.0,6.91,0.0,0.448,5.786,33.3,5.1004,3.0,233.0,17.9,396.9,14.15,20.0\n47,0.22927,0.0,6.91,0.0,0.448,6.03,85.5,5.6894,3.0,233.0,17.9,392.74,18.8,16.6\n48,0.25387,0.0,6.91,0.0,0.448,5.399,95.3,5.87,3.0,233.0,17.9,396.9,30.81,14.4\n49,0.21977,0.0,6.91,0.0,0.448,5.602,62.0,6.0877,3.0,233.0,17.9,396.9,16.2,19.4\n50,0.08873,21.0,5.64,0.0,0.439,5.963,45.7,6.8147,4.0,243.0,16.8,395.56,13.45,19.7\n51,0.04337,21.0,5.64,0.0,0.439,6.115,63.0,6.8147,4.0,243.0,16.8,393.97,9.43,20.5\n52,0.0536,21.0,5.64,0.0,0.439,6.511,21.1,6.8147,4.0,243.0,16.8,396.9,5.28,25.0\n53,0.04981,21.0,5.64,0.0,0.439,5.998,21.4,6.8147,4.0,243.0,16.8,396.9,8.43,23.4\n54,0.0136,75.0,4.0,0.0,0.41,5.888,47.6,7.3197,3.0,469.0,21.1,396.9,14.8,18.9\n55,0.01311,90.0,1.22,0.0,0.403,7.249,21.9,8.6966,5.0,226.0,17.9,395.93,4.81,35.4\n56,0.02055,85.0,0.74,0.0,0.41,6.383,35.7,9.1876,2.0,313.0,17.3,396.9,5.77,24.7\n57,0.01432,100.0,1.32,0.0,0.411,6.816,40.5,8.3248,5.0,256.0,15.1,392.9,3.95,31.6\n58,0.15445,25.0,5.13,0.0,0.453,6.145,29.2,7.8148,8.0,284.0,19.7,390.68,6.86,23.3\n59,0.10328,25.0,5.13,0.0,0.453,5.927,47.2,6.932,8.0,284.0,19.7,396.9,9.22,19.6\n60,0.14932,25.0,5.13,0.0,0.453,5.741,66.2,7.2254,8.0,284.0,19.7,395.11,13.15,18.7\n61,0.17171,25.0,5.13,0.0,0.453,5.966,93.4,6.8185,8.0,284.0,19.7,378.08,14.44,16.0\n62,0.11027,25.0,5.13,0.0,0.453,6.456,67.8,7.2255,8.0,284.0,19.7,396.9,6.73,22.2\n63,0.1265,25.0,5.13,0.0,0.453,6.762,43.4,7.9809,8.0,284.0,19.7,395.58,9.5,25.0\n64,0.01951,17.5,1.38,0.0,0.4161,7.104,59.5,9.2229,3.0,216.0,18.6,393.24,8.05,33.0\n65,0.03584,80.0,3.37,0.0,0.398,6.29,17.8,6.6115,4.0,337.0,16.1,396.9,4.67,23.5\n66,0.04379,80.0,3.37,0.0,0.398,5.787,31.1,6.6115,4.0,337.0,16.1,396.9,10.24,19.4\n67,0.05789,12.5,6.07,0.0,0.409,5.878,21.4,6.498,4.0,345.0,18.9,396.21,8.1,22.0\n68,0.13554,12.5,6.07,0.0,0.409,5.594,36.8,6.498,4.0,345.0,18.9,396.9,13.09,17.4\n69,0.12816,12.5,6.07,0.0,0.409,5.885,33.0,6.498,4.0,345.0,18.9,396.9,8.79,20.9\n70,0.08826,0.0,10.81,0.0,0.413,6.417,6.6,5.2873,4.0,305.0,19.2,383.73,6.72,24.2\n71,0.15876,0.0,10.81,0.0,0.413,5.961,17.5,5.2873,4.0,305.0,19.2,376.94,9.88,21.7\n72,0.09164,0.0,10.81,0.0,0.413,6.065,7.8,5.2873,4.0,305.0,19.2,390.91,5.52,22.8\n73,0.19539,0.0,10.81,0.0,0.413,6.245,6.2,5.2873,4.0,305.0,19.2,377.17,7.54,23.4\n74,0.07896,0.0,12.83,0.0,0.437,6.273,6.0,4.2515,5.0,398.0,18.7,394.92,6.78,24.1\n75,0.09512,0.0,12.83,0.0,0.437,6.286,45.0,4.5026,5.0,398.0,18.7,383.23,8.94,21.4\n76,0.10153,0.0,12.83,0.0,0.437,6.279,74.5,4.0522,5.0,398.0,18.7,373.66,11.97,20.0\n77,0.08707,0.0,12.83,0.0,0.437,6.14,45.8,4.0905,5.0,398.0,18.7,386.96,10.27,20.8\n78,0.05646,0.0,12.83,0.0,0.437,6.232,53.7,5.0141,5.0,398.0,18.7,386.4,12.34,21.2\n79,0.08387,0.0,12.83,0.0,0.437,5.874,36.6,4.5026,5.0,398.0,18.7,396.06,9.1,20.3\n80,0.04113,25.0,4.86,0.0,0.426,6.727,33.5,5.4007,4.0,281.0,19.0,396.9,5.29,28.0\n81,0.04462,25.0,4.86,0.0,0.426,6.619,70.4,5.4007,4.0,281.0,19.0,395.63,7.22,23.9\n82,0.03659,25.0,4.86,0.0,0.426,6.302,32.2,5.4007,4.0,281.0,19.0,396.9,6.72,24.8\n83,0.03551,25.0,4.86,0.0,0.426,6.167,46.7,5.4007,4.0,281.0,19.0,390.64,7.51,22.9\n84,0.05059,0.0,4.49,0.0,0.449,6.389,48.0,4.7794,3.0,247.0,18.5,396.9,9.62,23.9\n85,0.05735,0.0,4.49,0.0,0.449,6.63,56.1,4.4377,3.0,247.0,18.5,392.3,6.53,26.6\n86,0.05188,0.0,4.49,0.0,0.449,6.015,45.1,4.4272,3.0,247.0,18.5,395.99,12.86,22.5\n87,0.07151,0.0,4.49,0.0,0.449,6.121,56.8,3.7476,3.0,247.0,18.5,395.15,8.44,22.2\n88,0.0566,0.0,3.41,0.0,0.489,7.007,86.3,3.4217,2.0,270.0,17.8,396.9,5.5,23.6\n89,0.05302,0.0,3.41,0.0,0.489,7.079,63.1,3.4145,2.0,270.0,17.8,396.06,5.7,28.7\n90,0.04684,0.0,3.41,0.0,0.489,6.417,66.1,3.0923,2.0,270.0,17.8,392.18,8.81,22.6\n91,0.03932,0.0,3.41,0.0,0.489,6.405,73.9,3.0921,2.0,270.0,17.8,393.55,8.2,22.0\n92,0.04203,28.0,15.04,0.0,0.464,6.442,53.6,3.6659,4.0,270.0,18.2,395.01,8.16,22.9\n93,0.02875,28.0,15.04,0.0,0.464,6.211,28.9,3.6659,4.0,270.0,18.2,396.33,6.21,25.0\n94,0.04294,28.0,15.04,0.0,0.464,6.249,77.3,3.615,4.0,270.0,18.2,396.9,10.59,20.6\n95,0.12204,0.0,2.89,0.0,0.445,6.625,57.8,3.4952,2.0,276.0,18.0,357.98,6.65,28.4\n96,0.11504,0.0,2.89,0.0,0.445,6.163,69.6,3.4952,2.0,276.0,18.0,391.83,11.34,21.4\n97,0.12083,0.0,2.89,0.0,0.445,8.069,76.0,3.4952,2.0,276.0,18.0,396.9,4.21,38.7\n98,0.08187,0.0,2.89,0.0,0.445,7.82,36.9,3.4952,2.0,276.0,18.0,393.53,3.57,43.8\n99,0.0686,0.0,2.89,0.0,0.445,7.416,62.5,3.4952,2.0,276.0,18.0,396.9,6.19,33.2\n100,0.14866,0.0,8.56,0.0,0.52,6.727,79.9,2.7778,5.0,384.0,20.9,394.76,9.42,27.5\n101,0.11432,0.0,8.56,0.0,0.52,6.781,71.3,2.8561,5.0,384.0,20.9,395.58,7.67,26.5\n102,0.22876,0.0,8.56,0.0,0.52,6.405,85.4,2.7147,5.0,384.0,20.9,70.8,10.63,18.6\n103,0.21161,0.0,8.56,0.0,0.52,6.137,87.4,2.7147,5.0,384.0,20.9,394.47,13.44,19.3\n104,0.1396,0.0,8.56,0.0,0.52,6.167,90.0,2.421,5.0,384.0,20.9,392.69,12.33,20.1\n105,0.13262,0.0,8.56,0.0,0.52,5.851,96.7,2.1069,5.0,384.0,20.9,394.05,16.47,19.5\n106,0.1712,0.0,8.56,0.0,0.52,5.836,91.9,2.211,5.0,384.0,20.9,395.67,18.66,19.5\n107,0.13117,0.0,8.56,0.0,0.52,6.127,85.2,2.1224,5.0,384.0,20.9,387.69,14.09,20.4\n108,0.12802,0.0,8.56,0.0,0.52,6.474,97.1,2.4329,5.0,384.0,20.9,395.24,12.27,19.8\n109,0.26363,0.0,8.56,0.0,0.52,6.229,91.2,2.5451,5.0,384.0,20.9,391.23,15.55,19.4\n110,0.10793,0.0,8.56,0.0,0.52,6.195,54.4,2.7778,5.0,384.0,20.9,393.49,13.0,21.7\n111,0.10084,0.0,10.01,0.0,0.547,6.715,81.6,2.6775,6.0,432.0,17.8,395.59,10.16,22.8\n112,0.12329,0.0,10.01,0.0,0.547,5.913,92.9,2.3534,6.0,432.0,17.8,394.95,16.21,18.8\n113,0.22212,0.0,10.01,0.0,0.547,6.092,95.4,2.548,6.0,432.0,17.8,396.9,17.09,18.7\n114,0.14231,0.0,10.01,0.0,0.547,6.254,84.2,2.2565,6.0,432.0,17.8,388.74,10.45,18.5\n115,0.17134,0.0,10.01,0.0,0.547,5.928,88.2,2.4631,6.0,432.0,17.8,344.91,15.76,18.3\n116,0.13158,0.0,10.01,0.0,0.547,6.176,72.5,2.7301,6.0,432.0,17.8,393.3,12.04,21.2\n117,0.15098,0.0,10.01,0.0,0.547,6.021,82.6,2.7474,6.0,432.0,17.8,394.51,10.3,19.2\n118,0.13058,0.0,10.01,0.0,0.547,5.872,73.1,2.4775,6.0,432.0,17.8,338.63,15.37,20.4\n119,0.14476,0.0,10.01,0.0,0.547,5.731,65.2,2.7592,6.0,432.0,17.8,391.5,13.61,19.3\n120,0.06899,0.0,25.65,0.0,0.581,5.87,69.7,2.2577,2.0,188.0,19.1,389.15,14.37,22.0\n121,0.07165,0.0,25.65,0.0,0.581,6.004,84.1,2.1974,2.0,188.0,19.1,377.67,14.27,20.3\n122,0.09299,0.0,25.65,0.0,0.581,5.961,92.9,2.0869,2.0,188.0,19.1,378.09,17.93,20.5\n123,0.15038,0.0,25.65,0.0,0.581,5.856,97.0,1.9444,2.0,188.0,19.1,370.31,25.41,17.3\n124,0.09849,0.0,25.65,0.0,0.581,5.879,95.8,2.0063,2.0,188.0,19.1,379.38,17.58,18.8\n125,0.16902,0.0,25.65,0.0,0.581,5.986,88.4,1.9929,2.0,188.0,19.1,385.02,14.81,21.4\n126,0.38735,0.0,25.65,0.0,0.581,5.613,95.6,1.7572,2.0,188.0,19.1,359.29,27.26,15.7\n127,0.25915,0.0,21.89,0.0,0.624,5.693,96.0,1.7883,4.0,437.0,21.2,392.11,17.19,16.2\n128,0.32543,0.0,21.89,0.0,0.624,6.431,98.8,1.8125,4.0,437.0,21.2,396.9,15.39,18.0\n129,0.88125,0.0,21.89,0.0,0.624,5.637,94.7,1.9799,4.0,437.0,21.2,396.9,18.34,14.3\n130,0.34006,0.0,21.89,0.0,0.624,6.458,98.9,2.1185,4.0,437.0,21.2,395.04,12.6,19.2\n131,1.19294,0.0,21.89,0.0,0.624,6.326,97.7,2.271,4.0,437.0,21.2,396.9,12.26,19.6\n132,0.59005,0.0,21.89,0.0,0.624,6.372,97.9,2.3274,4.0,437.0,21.2,385.76,11.12,23.0\n133,0.32982,0.0,21.89,0.0,0.624,5.822,95.4,2.4699,4.0,437.0,21.2,388.69,15.03,18.4\n134,0.97617,0.0,21.89,0.0,0.624,5.757,98.4,2.346,4.0,437.0,21.2,262.76,17.31,15.6\n135,0.55778,0.0,21.89,0.0,0.624,6.335,98.2,2.1107,4.0,437.0,21.2,394.67,16.96,18.1\n136,0.32264,0.0,21.89,0.0,0.624,5.942,93.5,1.9669,4.0,437.0,21.2,378.25,16.9,17.4\n137,0.35233,0.0,21.89,0.0,0.624,6.454,98.4,1.8498,4.0,437.0,21.2,394.08,14.59,17.1\n138,0.2498,0.0,21.89,0.0,0.624,5.857,98.2,1.6686,4.0,437.0,21.2,392.04,21.32,13.3\n139,0.54452,0.0,21.89,0.0,0.624,6.151,97.9,1.6687,4.0,437.0,21.2,396.9,18.46,17.8\n140,0.2909,0.0,21.89,0.0,0.624,6.174,93.6,1.6119,4.0,437.0,21.2,388.08,24.16,14.0\n141,1.62864,0.0,21.89,0.0,0.624,5.019,100.0,1.4394,4.0,437.0,21.2,396.9,34.41,14.4\n142,3.32105,0.0,19.58,1.0,0.871,5.403,100.0,1.3216,5.0,403.0,14.7,396.9,26.82,13.4\n143,4.0974,0.0,19.58,0.0,0.871,5.468,100.0,1.4118,5.0,403.0,14.7,396.9,26.42,15.6\n144,2.77974,0.0,19.58,0.0,0.871,4.903,97.8,1.3459,5.0,403.0,14.7,396.9,29.29,11.8\n145,2.37934,0.0,19.58,0.0,0.871,6.13,100.0,1.4191,5.0,403.0,14.7,172.91,27.8,13.8\n146,2.15505,0.0,19.58,0.0,0.871,5.628,100.0,1.5166,5.0,403.0,14.7,169.27,16.65,15.6\n147,2.36862,0.0,19.58,0.0,0.871,4.926,95.7,1.4608,5.0,403.0,14.7,391.71,29.53,14.6\n148,2.33099,0.0,19.58,0.0,0.871,5.186,93.8,1.5296,5.0,403.0,14.7,356.99,28.32,17.8\n149,2.73397,0.0,19.58,0.0,0.871,5.597,94.9,1.5257,5.0,403.0,14.7,351.85,21.45,15.4\n150,1.6566,0.0,19.58,0.0,0.871,6.122,97.3,1.618,5.0,403.0,14.7,372.8,14.1,21.5\n151,1.49632,0.0,19.58,0.0,0.871,5.404,100.0,1.5916,5.0,403.0,14.7,341.6,13.28,19.6\n152,1.12658,0.0,19.58,1.0,0.871,5.012,88.0,1.6102,5.0,403.0,14.7,343.28,12.12,15.3\n153,2.14918,0.0,19.58,0.0,0.871,5.709,98.5,1.6232,5.0,403.0,14.7,261.95,15.79,19.4\n154,1.41385,0.0,19.58,1.0,0.871,6.129,96.0,1.7494,5.0,403.0,14.7,321.02,15.12,17.0\n155,3.53501,0.0,19.58,1.0,0.871,6.152,82.6,1.7455,5.0,403.0,14.7,88.01,15.02,15.6\n156,2.44668,0.0,19.58,0.0,0.871,5.272,94.0,1.7364,5.0,403.0,14.7,88.63,16.14,13.1\n157,1.22358,0.0,19.58,0.0,0.605,6.943,97.4,1.8773,5.0,403.0,14.7,363.43,4.59,41.3\n158,1.34284,0.0,19.58,0.0,0.605,6.066,100.0,1.7573,5.0,403.0,14.7,353.89,6.43,24.3\n159,1.42502,0.0,19.58,0.0,0.871,6.51,100.0,1.7659,5.0,403.0,14.7,364.31,7.39,23.3\n160,1.27346,0.0,19.58,1.0,0.605,6.25,92.6,1.7984,5.0,403.0,14.7,338.92,5.5,27.0\n161,1.46336,0.0,19.58,0.0,0.605,7.489,90.8,1.9709,5.0,403.0,14.7,374.43,1.73,50.0\n162,1.83377,0.0,19.58,1.0,0.605,7.802,98.2,2.0407,5.0,403.0,14.7,389.61,1.92,50.0\n163,1.51902,0.0,19.58,1.0,0.605,8.375,93.9,2.162,5.0,403.0,14.7,388.45,3.32,50.0\n164,2.24236,0.0,19.58,0.0,0.605,5.854,91.8,2.422,5.0,403.0,14.7,395.11,11.64,22.7\n165,2.924,0.0,19.58,0.0,0.605,6.101,93.0,2.2834,5.0,403.0,14.7,240.16,9.81,25.0\n166,2.01019,0.0,19.58,0.0,0.605,7.929,96.2,2.0459,5.0,403.0,14.7,369.3,3.7,50.0\n167,1.80028,0.0,19.58,0.0,0.605,5.877,79.2,2.4259,5.0,403.0,14.7,227.61,12.14,23.8\n168,2.3004,0.0,19.58,0.0,0.605,6.319,96.1,2.1,5.0,403.0,14.7,297.09,11.1,23.8\n169,2.44953,0.0,19.58,0.0,0.605,6.402,95.2,2.2625,5.0,403.0,14.7,330.04,11.32,22.3\n170,1.20742,0.0,19.58,0.0,0.605,5.875,94.6,2.4259,5.0,403.0,14.7,292.29,14.43,17.4\n171,2.3139,0.0,19.58,0.0,0.605,5.88,97.3,2.3887,5.0,403.0,14.7,348.13,12.03,19.1\n172,0.13914,0.0,4.05,0.0,0.51,5.572,88.5,2.5961,5.0,296.0,16.6,396.9,14.69,23.1\n173,0.09178,0.0,4.05,0.0,0.51,6.416,84.1,2.6463,5.0,296.0,16.6,395.5,9.04,23.6\n174,0.08447,0.0,4.05,0.0,0.51,5.859,68.7,2.7019,5.0,296.0,16.6,393.23,9.64,22.6\n175,0.06664,0.0,4.05,0.0,0.51,6.546,33.1,3.1323,5.0,296.0,16.6,390.96,5.33,29.4\n176,0.07022,0.0,4.05,0.0,0.51,6.02,47.2,3.5549,5.0,296.0,16.6,393.23,10.11,23.2\n177,0.05425,0.0,4.05,0.0,0.51,6.315,73.4,3.3175,5.0,296.0,16.6,395.6,6.29,24.6\n178,0.06642,0.0,4.05,0.0,0.51,6.86,74.4,2.9153,5.0,296.0,16.6,391.27,6.92,29.9\n179,0.0578,0.0,2.46,0.0,0.488,6.98,58.4,2.829,3.0,193.0,17.8,396.9,5.04,37.2\n180,0.06588,0.0,2.46,0.0,0.488,7.765,83.3,2.741,3.0,193.0,17.8,395.56,7.56,39.8\n181,0.06888,0.0,2.46,0.0,0.488,6.144,62.2,2.5979,3.0,193.0,17.8,396.9,9.45,36.2\n182,0.09103,0.0,2.46,0.0,0.488,7.155,92.2,2.7006,3.0,193.0,17.8,394.12,4.82,37.9\n183,0.10008,0.0,2.46,0.0,0.488,6.563,95.6,2.847,3.0,193.0,17.8,396.9,5.68,32.5\n184,0.08308,0.0,2.46,0.0,0.488,5.604,89.8,2.9879,3.0,193.0,17.8,391.0,13.98,26.4\n185,0.06047,0.0,2.46,0.0,0.488,6.153,68.8,3.2797,3.0,193.0,17.8,387.11,13.15,29.6\n186,0.05602,0.0,2.46,0.0,0.488,7.831,53.6,3.1992,3.0,193.0,17.8,392.63,4.45,50.0\n187,0.07875,45.0,3.44,0.0,0.437,6.782,41.1,3.7886,5.0,398.0,15.2,393.87,6.68,32.0\n188,0.12579,45.0,3.44,0.0,0.437,6.556,29.1,4.5667,5.0,398.0,15.2,382.84,4.56,29.8\n189,0.0837,45.0,3.44,0.0,0.437,7.185,38.9,4.5667,5.0,398.0,15.2,396.9,5.39,34.9\n190,0.09068,45.0,3.44,0.0,0.437,6.951,21.5,6.4798,5.0,398.0,15.2,377.68,5.1,37.0\n191,0.06911,45.0,3.44,0.0,0.437,6.739,30.8,6.4798,5.0,398.0,15.2,389.71,4.69,30.5\n192,0.08664,45.0,3.44,0.0,0.437,7.178,26.3,6.4798,5.0,398.0,15.2,390.49,2.87,36.4\n193,0.02187,60.0,2.93,0.0,0.401,6.8,9.9,6.2196,1.0,265.0,15.6,393.37,5.03,31.1\n194,0.01439,60.0,2.93,0.0,0.401,6.604,18.8,6.2196,1.0,265.0,15.6,376.7,4.38,29.1\n195,0.01381,80.0,0.46,0.0,0.422,7.875,32.0,5.6484,4.0,255.0,14.4,394.23,2.97,50.0\n196,0.04011,80.0,1.52,0.0,0.404,7.287,34.1,7.309,2.0,329.0,12.6,396.9,4.08,33.3\n197,0.04666,80.0,1.52,0.0,0.404,7.107,36.6,7.309,2.0,329.0,12.6,354.31,8.61,30.3\n198,0.03768,80.0,1.52,0.0,0.404,7.274,38.3,7.309,2.0,329.0,12.6,392.2,6.62,34.6\n199,0.0315,95.0,1.47,0.0,0.403,6.975,15.3,7.6534,3.0,402.0,17.0,396.9,4.56,34.9\n200,0.01778,95.0,1.47,0.0,0.403,7.135,13.9,7.6534,3.0,402.0,17.0,384.3,4.45,32.9\n201,0.03445,82.5,2.03,0.0,0.415,6.162,38.4,6.27,2.0,348.0,14.7,393.77,7.43,24.1\n202,0.02177,82.5,2.03,0.0,0.415,7.61,15.7,6.27,2.0,348.0,14.7,395.38,3.11,42.3\n203,0.0351,95.0,2.68,0.0,0.4161,7.853,33.2,5.118,4.0,224.0,14.7,392.78,3.81,48.5\n204,0.02009,95.0,2.68,0.0,0.4161,8.034,31.9,5.118,4.0,224.0,14.7,390.55,2.88,50.0\n205,0.13642,0.0,10.59,0.0,0.489,5.891,22.3,3.9454,4.0,277.0,18.6,396.9,10.87,22.6\n206,0.22969,0.0,10.59,0.0,0.489,6.326,52.5,4.3549,4.0,277.0,18.6,394.87,10.97,24.4\n207,0.25199,0.0,10.59,0.0,0.489,5.783,72.7,4.3549,4.0,277.0,18.6,389.43,18.06,22.5\n208,0.13587,0.0,10.59,1.0,0.489,6.064,59.1,4.2392,4.0,277.0,18.6,381.32,14.66,24.4\n209,0.43571,0.0,10.59,1.0,0.489,5.344,100.0,3.875,4.0,277.0,18.6,396.9,23.09,20.0\n210,0.17446,0.0,10.59,1.0,0.489,5.96,92.1,3.8771,4.0,277.0,18.6,393.25,17.27,21.7\n211,0.37578,0.0,10.59,1.0,0.489,5.404,88.6,3.665,4.0,277.0,18.6,395.24,23.98,19.3\n212,0.21719,0.0,10.59,1.0,0.489,5.807,53.8,3.6526,4.0,277.0,18.6,390.94,16.03,22.4\n213,0.14052,0.0,10.59,0.0,0.489,6.375,32.3,3.9454,4.0,277.0,18.6,385.81,9.38,28.1\n214,0.28955,0.0,10.59,0.0,0.489,5.412,9.8,3.5875,4.0,277.0,18.6,348.93,29.55,23.7\n215,0.19802,0.0,10.59,0.0,0.489,6.182,42.4,3.9454,4.0,277.0,18.6,393.63,9.47,25.0\n216,0.0456,0.0,13.89,1.0,0.55,5.888,56.0,3.1121,5.0,276.0,16.4,392.8,13.51,23.3\n217,0.07013,0.0,13.89,0.0,0.55,6.642,85.1,3.4211,5.0,276.0,16.4,392.78,9.69,28.7\n218,0.11069,0.0,13.89,1.0,0.55,5.951,93.8,2.8893,5.0,276.0,16.4,396.9,17.92,21.5\n219,0.11425,0.0,13.89,1.0,0.55,6.373,92.4,3.3633,5.0,276.0,16.4,393.74,10.5,23.0\n220,0.35809,0.0,6.2,1.0,0.507,6.951,88.5,2.8617,8.0,307.0,17.4,391.7,9.71,26.7\n221,0.40771,0.0,6.2,1.0,0.507,6.164,91.3,3.048,8.0,307.0,17.4,395.24,21.46,21.7\n222,0.62356,0.0,6.2,1.0,0.507,6.879,77.7,3.2721,8.0,307.0,17.4,390.39,9.93,27.5\n223,0.6147,0.0,6.2,0.0,0.507,6.618,80.8,3.2721,8.0,307.0,17.4,396.9,7.6,30.1\n224,0.31533,0.0,6.2,0.0,0.504,8.266,78.3,2.8944,8.0,307.0,17.4,385.05,4.14,44.8\n225,0.52693,0.0,6.2,0.0,0.504,8.725,83.0,2.8944,8.0,307.0,17.4,382.0,4.63,50.0\n226,0.38214,0.0,6.2,0.0,0.504,8.04,86.5,3.2157,8.0,307.0,17.4,387.38,3.13,37.6\n227,0.41238,0.0,6.2,0.0,0.504,7.163,79.9,3.2157,8.0,307.0,17.4,372.08,6.36,31.6\n228,0.29819,0.0,6.2,0.0,0.504,7.686,17.0,3.3751,8.0,307.0,17.4,377.51,3.92,46.7\n229,0.44178,0.0,6.2,0.0,0.504,6.552,21.4,3.3751,8.0,307.0,17.4,380.34,3.76,31.5\n230,0.537,0.0,6.2,0.0,0.504,5.981,68.1,3.6715,8.0,307.0,17.4,378.35,11.65,24.3\n231,0.46296,0.0,6.2,0.0,0.504,7.412,76.9,3.6715,8.0,307.0,17.4,376.14,5.25,31.7\n232,0.57529,0.0,6.2,0.0,0.507,8.337,73.3,3.8384,8.0,307.0,17.4,385.91,2.47,41.7\n233,0.33147,0.0,6.2,0.0,0.507,8.247,70.4,3.6519,8.0,307.0,17.4,378.95,3.95,48.3\n234,0.44791,0.0,6.2,1.0,0.507,6.726,66.5,3.6519,8.0,307.0,17.4,360.2,8.05,29.0\n235,0.33045,0.0,6.2,0.0,0.507,6.086,61.5,3.6519,8.0,307.0,17.4,376.75,10.88,24.0\n236,0.52058,0.0,6.2,1.0,0.507,6.631,76.5,4.148,8.0,307.0,17.4,388.45,9.54,25.1\n237,0.51183,0.0,6.2,0.0,0.507,7.358,71.6,4.148,8.0,307.0,17.4,390.07,4.73,31.5\n238,0.08244,30.0,4.93,0.0,0.428,6.481,18.5,6.1899,6.0,300.0,16.6,379.41,6.36,23.7\n239,0.09252,30.0,4.93,0.0,0.428,6.606,42.2,6.1899,6.0,300.0,16.6,383.78,7.37,23.3\n240,0.11329,30.0,4.93,0.0,0.428,6.897,54.3,6.3361,6.0,300.0,16.6,391.25,11.38,22.0\n241,0.10612,30.0,4.93,0.0,0.428,6.095,65.1,6.3361,6.0,300.0,16.6,394.62,12.4,20.1\n242,0.1029,30.0,4.93,0.0,0.428,6.358,52.9,7.0355,6.0,300.0,16.6,372.75,11.22,22.2\n243,0.12757,30.0,4.93,0.0,0.428,6.393,7.8,7.0355,6.0,300.0,16.6,374.71,5.19,23.7\n244,0.20608,22.0,5.86,0.0,0.431,5.593,76.5,7.9549,7.0,330.0,19.1,372.49,12.5,17.6\n245,0.19133,22.0,5.86,0.0,0.431,5.605,70.2,7.9549,7.0,330.0,19.1,389.13,18.46,18.5\n246,0.33983,22.0,5.86,0.0,0.431,6.108,34.9,8.0555,7.0,330.0,19.1,390.18,9.16,24.3\n247,0.19657,22.0,5.86,0.0,0.431,6.226,79.2,8.0555,7.0,330.0,19.1,376.14,10.15,20.5\n248,0.16439,22.0,5.86,0.0,0.431,6.433,49.1,7.8265,7.0,330.0,19.1,374.71,9.52,24.5\n249,0.19073,22.0,5.86,0.0,0.431,6.718,17.5,7.8265,7.0,330.0,19.1,393.74,6.56,26.2\n250,0.1403,22.0,5.86,0.0,0.431,6.487,13.0,7.3967,7.0,330.0,19.1,396.28,5.9,24.4\n251,0.21409,22.0,5.86,0.0,0.431,6.438,8.9,7.3967,7.0,330.0,19.1,377.07,3.59,24.8\n252,0.08221,22.0,5.86,0.0,0.431,6.957,6.8,8.9067,7.0,330.0,19.1,386.09,3.53,29.6\n253,0.36894,22.0,5.86,0.0,0.431,8.259,8.4,8.9067,7.0,330.0,19.1,396.9,3.54,42.8\n254,0.04819,80.0,3.64,0.0,0.392,6.108,32.0,9.2203,1.0,315.0,16.4,392.89,6.57,21.9\n255,0.03548,80.0,3.64,0.0,0.392,5.876,19.1,9.2203,1.0,315.0,16.4,395.18,9.25,20.9\n256,0.01538,90.0,3.75,0.0,0.394,7.454,34.2,6.3361,3.0,244.0,15.9,386.34,3.11,44.0\n257,0.61154,20.0,3.97,0.0,0.647,8.704,86.9,1.801,5.0,264.0,13.0,389.7,5.12,50.0\n258,0.66351,20.0,3.97,0.0,0.647,7.333,100.0,1.8946,5.0,264.0,13.0,383.29,7.79,36.0\n259,0.65665,20.0,3.97,0.0,0.647,6.842,100.0,2.0107,5.0,264.0,13.0,391.93,6.9,30.1\n260,0.54011,20.0,3.97,0.0,0.647,7.203,81.8,2.1121,5.0,264.0,13.0,392.8,9.59,33.8\n261,0.53412,20.0,3.97,0.0,0.647,7.52,89.4,2.1398,5.0,264.0,13.0,388.37,7.26,43.1\n262,0.52014,20.0,3.97,0.0,0.647,8.398,91.5,2.2885,5.0,264.0,13.0,386.86,5.91,48.8\n263,0.82526,20.0,3.97,0.0,0.647,7.327,94.5,2.0788,5.0,264.0,13.0,393.42,11.25,31.0\n264,0.55007,20.0,3.97,0.0,0.647,7.206,91.6,1.9301,5.0,264.0,13.0,387.89,8.1,36.5\n265,0.76162,20.0,3.97,0.0,0.647,5.56,62.8,1.9865,5.0,264.0,13.0,392.4,10.45,22.8\n266,0.7857,20.0,3.97,0.0,0.647,7.014,84.6,2.1329,5.0,264.0,13.0,384.07,14.79,30.7\n267,0.57834,20.0,3.97,0.0,0.575,8.297,67.0,2.4216,5.0,264.0,13.0,384.54,7.44,50.0\n268,0.5405,20.0,3.97,0.0,0.575,7.47,52.6,2.872,5.0,264.0,13.0,390.3,3.16,43.5\n269,0.09065,20.0,6.96,1.0,0.464,5.92,61.5,3.9175,3.0,223.0,18.6,391.34,13.65,20.7\n270,0.29916,20.0,6.96,0.0,0.464,5.856,42.1,4.429,3.0,223.0,18.6,388.65,13.0,21.1\n271,0.16211,20.0,6.96,0.0,0.464,6.24,16.3,4.429,3.0,223.0,18.6,396.9,6.59,25.2\n272,0.1146,20.0,6.96,0.0,0.464,6.538,58.7,3.9175,3.0,223.0,18.6,394.96,7.73,24.4\n273,0.22188,20.0,6.96,1.0,0.464,7.691,51.8,4.3665,3.0,223.0,18.6,390.77,6.58,35.2\n274,0.05644,40.0,6.41,1.0,0.447,6.758,32.9,4.0776,4.0,254.0,17.6,396.9,3.53,32.4\n275,0.09604,40.0,6.41,0.0,0.447,6.854,42.8,4.2673,4.0,254.0,17.6,396.9,2.98,32.0\n276,0.10469,40.0,6.41,1.0,0.447,7.267,49.0,4.7872,4.0,254.0,17.6,389.25,6.05,33.2\n277,0.06127,40.0,6.41,1.0,0.447,6.826,27.6,4.8628,4.0,254.0,17.6,393.45,4.16,33.1\n278,0.07978,40.0,6.41,0.0,0.447,6.482,32.1,4.1403,4.0,254.0,17.6,396.9,7.19,29.1\n279,0.21038,20.0,3.33,0.0,0.4429,6.812,32.2,4.1007,5.0,216.0,14.9,396.9,4.85,35.1\n280,0.03578,20.0,3.33,0.0,0.4429,7.82,64.5,4.6947,5.0,216.0,14.9,387.31,3.76,45.4\n281,0.03705,20.0,3.33,0.0,0.4429,6.968,37.2,5.2447,5.0,216.0,14.9,392.23,4.59,35.4\n282,0.06129,20.0,3.33,1.0,0.4429,7.645,49.7,5.2119,5.0,216.0,14.9,377.07,3.01,46.0\n283,0.01501,90.0,1.21,1.0,0.401,7.923,24.8,5.885,1.0,198.0,13.6,395.52,3.16,50.0\n284,0.00906,90.0,2.97,0.0,0.4,7.088,20.8,7.3073,1.0,285.0,15.3,394.72,7.85,32.2\n285,0.01096,55.0,2.25,0.0,0.389,6.453,31.9,7.3073,1.0,300.0,15.3,394.72,8.23,22.0\n286,0.01965,80.0,1.76,0.0,0.385,6.23,31.5,9.0892,1.0,241.0,18.2,341.6,12.93,20.1\n287,0.03871,52.5,5.32,0.0,0.405,6.209,31.3,7.3172,6.0,293.0,16.6,396.9,7.14,23.2\n288,0.0459,52.5,5.32,0.0,0.405,6.315,45.6,7.3172,6.0,293.0,16.6,396.9,7.6,22.3\n289,0.04297,52.5,5.32,0.0,0.405,6.565,22.9,7.3172,6.0,293.0,16.6,371.72,9.51,24.8\n290,0.03502,80.0,4.95,0.0,0.411,6.861,27.9,5.1167,4.0,245.0,19.2,396.9,3.33,28.5\n291,0.07886,80.0,4.95,0.0,0.411,7.148,27.7,5.1167,4.0,245.0,19.2,396.9,3.56,37.3\n292,0.03615,80.0,4.95,0.0,0.411,6.63,23.4,5.1167,4.0,245.0,19.2,396.9,4.7,27.9\n293,0.08265,0.0,13.92,0.0,0.437,6.127,18.4,5.5027,4.0,289.0,16.0,396.9,8.58,23.9\n294,0.08199,0.0,13.92,0.0,0.437,6.009,42.3,5.5027,4.0,289.0,16.0,396.9,10.4,21.7\n295,0.12932,0.0,13.92,0.0,0.437,6.678,31.1,5.9604,4.0,289.0,16.0,396.9,6.27,28.6\n296,0.05372,0.0,13.92,0.0,0.437,6.549,51.0,5.9604,4.0,289.0,16.0,392.85,7.39,27.1\n297,0.14103,0.0,13.92,0.0,0.437,5.79,58.0,6.32,4.0,289.0,16.0,396.9,15.84,20.3\n298,0.06466,70.0,2.24,0.0,0.4,6.345,20.1,7.8278,5.0,358.0,14.8,368.24,4.97,22.5\n299,0.05561,70.0,2.24,0.0,0.4,7.041,10.0,7.8278,5.0,358.0,14.8,371.58,4.74,29.0\n300,0.04417,70.0,2.24,0.0,0.4,6.871,47.4,7.8278,5.0,358.0,14.8,390.86,6.07,24.8\n301,0.03537,34.0,6.09,0.0,0.433,6.59,40.4,5.4917,7.0,329.0,16.1,395.75,9.5,22.0\n302,0.09266,34.0,6.09,0.0,0.433,6.495,18.4,5.4917,7.0,329.0,16.1,383.61,8.67,26.4\n303,0.1,34.0,6.09,0.0,0.433,6.982,17.7,5.4917,7.0,329.0,16.1,390.43,4.86,33.1\n304,0.05515,33.0,2.18,0.0,0.472,7.236,41.1,4.022,7.0,222.0,18.4,393.68,6.93,36.1\n305,0.05479,33.0,2.18,0.0,0.472,6.616,58.1,3.37,7.0,222.0,18.4,393.36,8.93,28.4\n306,0.07503,33.0,2.18,0.0,0.472,7.42,71.9,3.0992,7.0,222.0,18.4,396.9,6.47,33.4\n307,0.04932,33.0,2.18,0.0,0.472,6.849,70.3,3.1827,7.0,222.0,18.4,396.9,7.53,28.2\n308,0.49298,0.0,9.9,0.0,0.544,6.635,82.5,3.3175,4.0,304.0,18.4,396.9,4.54,22.8\n309,0.3494,0.0,9.9,0.0,0.544,5.972,76.7,3.1025,4.0,304.0,18.4,396.24,9.97,20.3\n310,2.63548,0.0,9.9,0.0,0.544,4.973,37.8,2.5194,4.0,304.0,18.4,350.45,12.64,16.1\n311,0.79041,0.0,9.9,0.0,0.544,6.122,52.8,2.6403,4.0,304.0,18.4,396.9,5.98,22.1\n312,0.26169,0.0,9.9,0.0,0.544,6.023,90.4,2.834,4.0,304.0,18.4,396.3,11.72,19.4\n313,0.26938,0.0,9.9,0.0,0.544,6.266,82.8,3.2628,4.0,304.0,18.4,393.39,7.9,21.6\n314,0.3692,0.0,9.9,0.0,0.544,6.567,87.3,3.6023,4.0,304.0,18.4,395.69,9.28,23.8\n315,0.25356,0.0,9.9,0.0,0.544,5.705,77.7,3.945,4.0,304.0,18.4,396.42,11.5,16.2\n316,0.31827,0.0,9.9,0.0,0.544,5.914,83.2,3.9986,4.0,304.0,18.4,390.7,18.33,17.8\n317,0.24522,0.0,9.9,0.0,0.544,5.782,71.7,4.0317,4.0,304.0,18.4,396.9,15.94,19.8\n318,0.40202,0.0,9.9,0.0,0.544,6.382,67.2,3.5325,4.0,304.0,18.4,395.21,10.36,23.1\n319,0.47547,0.0,9.9,0.0,0.544,6.113,58.8,4.0019,4.0,304.0,18.4,396.23,12.73,21.0\n320,0.1676,0.0,7.38,0.0,0.493,6.426,52.3,4.5404,5.0,287.0,19.6,396.9,7.2,23.8\n321,0.18159,0.0,7.38,0.0,0.493,6.376,54.3,4.5404,5.0,287.0,19.6,396.9,6.87,23.1\n322,0.35114,0.0,7.38,0.0,0.493,6.041,49.9,4.7211,5.0,287.0,19.6,396.9,7.7,20.4\n323,0.28392,0.0,7.38,0.0,0.493,5.708,74.3,4.7211,5.0,287.0,19.6,391.13,11.74,18.5\n324,0.34109,0.0,7.38,0.0,0.493,6.415,40.1,4.7211,5.0,287.0,19.6,396.9,6.12,25.0\n325,0.19186,0.0,7.38,0.0,0.493,6.431,14.7,5.4159,5.0,287.0,19.6,393.68,5.08,24.6\n326,0.30347,0.0,7.38,0.0,0.493,6.312,28.9,5.4159,5.0,287.0,19.6,396.9,6.15,23.0\n327,0.24103,0.0,7.38,0.0,0.493,6.083,43.7,5.4159,5.0,287.0,19.6,396.9,12.79,22.2\n328,0.06617,0.0,3.24,0.0,0.46,5.868,25.8,5.2146,4.0,430.0,16.9,382.44,9.97,19.3\n329,0.06724,0.0,3.24,0.0,0.46,6.333,17.2,5.2146,4.0,430.0,16.9,375.21,7.34,22.6\n330,0.04544,0.0,3.24,0.0,0.46,6.144,32.2,5.8736,4.0,430.0,16.9,368.57,9.09,19.8\n331,0.05023,35.0,6.06,0.0,0.4379,5.706,28.4,6.6407,1.0,304.0,16.9,394.02,12.43,17.1\n332,0.03466,35.0,6.06,0.0,0.4379,6.031,23.3,6.6407,1.0,304.0,16.9,362.25,7.83,19.4\n333,0.05083,0.0,5.19,0.0,0.515,6.316,38.1,6.4584,5.0,224.0,20.2,389.71,5.68,22.2\n334,0.03738,0.0,5.19,0.0,0.515,6.31,38.5,6.4584,5.0,224.0,20.2,389.4,6.75,20.7\n335,0.03961,0.0,5.19,0.0,0.515,6.037,34.5,5.9853,5.0,224.0,20.2,396.9,8.01,21.1\n336,0.03427,0.0,5.19,0.0,0.515,5.869,46.3,5.2311,5.0,224.0,20.2,396.9,9.8,19.5\n337,0.03041,0.0,5.19,0.0,0.515,5.895,59.6,5.615,5.0,224.0,20.2,394.81,10.56,18.5\n338,0.03306,0.0,5.19,0.0,0.515,6.059,37.3,4.8122,5.0,224.0,20.2,396.14,8.51,20.6\n339,0.05497,0.0,5.19,0.0,0.515,5.985,45.4,4.8122,5.0,224.0,20.2,396.9,9.74,19.0\n340,0.06151,0.0,5.19,0.0,0.515,5.968,58.5,4.8122,5.0,224.0,20.2,396.9,9.29,18.7\n341,0.01301,35.0,1.52,0.0,0.442,7.241,49.3,7.0379,1.0,284.0,15.5,394.74,5.49,32.7\n342,0.02498,0.0,1.89,0.0,0.518,6.54,59.7,6.2669,1.0,422.0,15.9,389.96,8.65,16.5\n343,0.02543,55.0,3.78,0.0,0.484,6.696,56.4,5.7321,5.0,370.0,17.6,396.9,7.18,23.9\n344,0.03049,55.0,3.78,0.0,0.484,6.874,28.1,6.4654,5.0,370.0,17.6,387.97,4.61,31.2\n345,0.03113,0.0,4.39,0.0,0.442,6.014,48.5,8.0136,3.0,352.0,18.8,385.64,10.53,17.5\n346,0.06162,0.0,4.39,0.0,0.442,5.898,52.3,8.0136,3.0,352.0,18.8,364.61,12.67,17.2\n347,0.0187,85.0,4.15,0.0,0.429,6.516,27.7,8.5353,4.0,351.0,17.9,392.43,6.36,23.1\n348,0.01501,80.0,2.01,0.0,0.435,6.635,29.7,8.344,4.0,280.0,17.0,390.94,5.99,24.5\n349,0.02899,40.0,1.25,0.0,0.429,6.939,34.5,8.7921,1.0,335.0,19.7,389.85,5.89,26.6\n350,0.06211,40.0,1.25,0.0,0.429,6.49,44.4,8.7921,1.0,335.0,19.7,396.9,5.98,22.9\n351,0.0795,60.0,1.69,0.0,0.411,6.579,35.9,10.7103,4.0,411.0,18.3,370.78,5.49,24.1\n352,0.07244,60.0,1.69,0.0,0.411,5.884,18.5,10.7103,4.0,411.0,18.3,392.33,7.79,18.6\n353,0.01709,90.0,2.02,0.0,0.41,6.728,36.1,12.1265,5.0,187.0,17.0,384.46,4.5,30.1\n354,0.04301,80.0,1.91,0.0,0.413,5.663,21.9,10.5857,4.0,334.0,22.0,382.8,8.05,18.2\n355,0.10659,80.0,1.91,0.0,0.413,5.936,19.5,10.5857,4.0,334.0,22.0,376.04,5.57,20.6\n356,8.98296,0.0,18.1,1.0,0.77,6.212,97.4,2.1222,24.0,666.0,20.2,377.73,17.6,17.8\n357,3.8497,0.0,18.1,1.0,0.77,6.395,91.0,2.5052,24.0,666.0,20.2,391.34,13.27,21.7\n358,5.20177,0.0,18.1,1.0,0.77,6.127,83.4,2.7227,24.0,666.0,20.2,395.43,11.48,22.7\n359,4.26131,0.0,18.1,0.0,0.77,6.112,81.3,2.5091,24.0,666.0,20.2,390.74,12.67,22.6\n360,4.54192,0.0,18.1,0.0,0.77,6.398,88.0,2.5182,24.0,666.0,20.2,374.56,7.79,25.0\n361,3.83684,0.0,18.1,0.0,0.77,6.251,91.1,2.2955,24.0,666.0,20.2,350.65,14.19,19.9\n362,3.67822,0.0,18.1,0.0,0.77,5.362,96.2,2.1036,24.0,666.0,20.2,380.79,10.19,20.8\n363,4.22239,0.0,18.1,1.0,0.77,5.803,89.0,1.9047,24.0,666.0,20.2,353.04,14.64,16.8\n364,3.47428,0.0,18.1,1.0,0.718,8.78,82.9,1.9047,24.0,666.0,20.2,354.55,5.29,21.9\n365,4.55587,0.0,18.1,0.0,0.718,3.561,87.9,1.6132,24.0,666.0,20.2,354.7,7.12,27.5\n366,3.69695,0.0,18.1,0.0,0.718,4.963,91.4,1.7523,24.0,666.0,20.2,316.03,14.0,21.9\n367,13.5222,0.0,18.1,0.0,0.631,3.863,100.0,1.5106,24.0,666.0,20.2,131.42,13.33,23.1\n368,4.89822,0.0,18.1,0.0,0.631,4.97,100.0,1.3325,24.0,666.0,20.2,375.52,3.26,50.0\n369,5.66998,0.0,18.1,1.0,0.631,6.683,96.8,1.3567,24.0,666.0,20.2,375.33,3.73,50.0\n370,6.53876,0.0,18.1,1.0,0.631,7.016,97.5,1.2024,24.0,666.0,20.2,392.05,2.96,50.0\n371,9.2323,0.0,18.1,0.0,0.631,6.216,100.0,1.1691,24.0,666.0,20.2,366.15,9.53,50.0\n372,8.26725,0.0,18.1,1.0,0.668,5.875,89.6,1.1296,24.0,666.0,20.2,347.88,8.88,50.0\n373,11.1081,0.0,18.1,0.0,0.668,4.906,100.0,1.1742,24.0,666.0,20.2,396.9,34.77,13.8\n374,18.4982,0.0,18.1,0.0,0.668,4.138,100.0,1.137,24.0,666.0,20.2,396.9,37.97,13.8\n375,19.6091,0.0,18.1,0.0,0.671,7.313,97.9,1.3163,24.0,666.0,20.2,396.9,13.44,15.0\n376,15.288,0.0,18.1,0.0,0.671,6.649,93.3,1.3449,24.0,666.0,20.2,363.02,23.24,13.9\n377,9.82349,0.0,18.1,0.0,0.671,6.794,98.8,1.358,24.0,666.0,20.2,396.9,21.24,13.3\n378,23.6482,0.0,18.1,0.0,0.671,6.38,96.2,1.3861,24.0,666.0,20.2,396.9,23.69,13.1\n379,17.8667,0.0,18.1,0.0,0.671,6.223,100.0,1.3861,24.0,666.0,20.2,393.74,21.78,10.2\n380,88.9762,0.0,18.1,0.0,0.671,6.968,91.9,1.4165,24.0,666.0,20.2,396.9,17.21,10.4\n381,15.8744,0.0,18.1,0.0,0.671,6.545,99.1,1.5192,24.0,666.0,20.2,396.9,21.08,10.9\n382,9.18702,0.0,18.1,0.0,0.7,5.536,100.0,1.5804,24.0,666.0,20.2,396.9,23.6,11.3\n383,7.99248,0.0,18.1,0.0,0.7,5.52,100.0,1.5331,24.0,666.0,20.2,396.9,24.56,12.3\n384,20.0849,0.0,18.1,0.0,0.7,4.368,91.2,1.4395,24.0,666.0,20.2,285.83,30.63,8.8\n385,16.8118,0.0,18.1,0.0,0.7,5.277,98.1,1.4261,24.0,666.0,20.2,396.9,30.81,7.2\n386,24.3938,0.0,18.1,0.0,0.7,4.652,100.0,1.4672,24.0,666.0,20.2,396.9,28.28,10.5\n387,22.5971,0.0,18.1,0.0,0.7,5.0,89.5,1.5184,24.0,666.0,20.2,396.9,31.99,7.4\n388,14.3337,0.0,18.1,0.0,0.7,4.88,100.0,1.5895,24.0,666.0,20.2,372.92,30.62,10.2\n389,8.15174,0.0,18.1,0.0,0.7,5.39,98.9,1.7281,24.0,666.0,20.2,396.9,20.85,11.5\n390,6.96215,0.0,18.1,0.0,0.7,5.713,97.0,1.9265,24.0,666.0,20.2,394.43,17.11,15.1\n391,5.29305,0.0,18.1,0.0,0.7,6.051,82.5,2.1678,24.0,666.0,20.2,378.38,18.76,23.2\n392,11.5779,0.0,18.1,0.0,0.7,5.036,97.0,1.77,24.0,666.0,20.2,396.9,25.68,9.7\n393,8.64476,0.0,18.1,0.0,0.693,6.193,92.6,1.7912,24.0,666.0,20.2,396.9,15.17,13.8\n394,13.3598,0.0,18.1,0.0,0.693,5.887,94.7,1.7821,24.0,666.0,20.2,396.9,16.35,12.7\n395,8.71675,0.0,18.1,0.0,0.693,6.471,98.8,1.7257,24.0,666.0,20.2,391.98,17.12,13.1\n396,5.87205,0.0,18.1,0.0,0.693,6.405,96.0,1.6768,24.0,666.0,20.2,396.9,19.37,12.5\n397,7.67202,0.0,18.1,0.0,0.693,5.747,98.9,1.6334,24.0,666.0,20.2,393.1,19.92,8.5\n398,38.3518,0.0,18.1,0.0,0.693,5.453,100.0,1.4896,24.0,666.0,20.2,396.9,30.59,5.0\n399,9.91655,0.0,18.1,0.0,0.693,5.852,77.8,1.5004,24.0,666.0,20.2,338.16,29.97,6.3\n400,25.0461,0.0,18.1,0.0,0.693,5.987,100.0,1.5888,24.0,666.0,20.2,396.9,26.77,5.6\n401,14.2362,0.0,18.1,0.0,0.693,6.343,100.0,1.5741,24.0,666.0,20.2,396.9,20.32,7.2\n402,9.59571,0.0,18.1,0.0,0.693,6.404,100.0,1.639,24.0,666.0,20.2,376.11,20.31,12.1\n403,24.8017,0.0,18.1,0.0,0.693,5.349,96.0,1.7028,24.0,666.0,20.2,396.9,19.77,8.3\n404,41.5292,0.0,18.1,0.0,0.693,5.531,85.4,1.6074,24.0,666.0,20.2,329.46,27.38,8.5\n405,67.9208,0.0,18.1,0.0,0.693,5.683,100.0,1.4254,24.0,666.0,20.2,384.97,22.98,5.0\n406,20.7162,0.0,18.1,0.0,0.659,4.138,100.0,1.1781,24.0,666.0,20.2,370.22,23.34,11.9\n407,11.9511,0.0,18.1,0.0,0.659,5.608,100.0,1.2852,24.0,666.0,20.2,332.09,12.13,27.9\n408,7.40389,0.0,18.1,0.0,0.597,5.617,97.9,1.4547,24.0,666.0,20.2,314.64,26.4,17.2\n409,14.4383,0.0,18.1,0.0,0.597,6.852,100.0,1.4655,24.0,666.0,20.2,179.36,19.78,27.5\n410,51.1358,0.0,18.1,0.0,0.597,5.757,100.0,1.413,24.0,666.0,20.2,2.6,10.11,15.0\n411,14.0507,0.0,18.1,0.0,0.597,6.657,100.0,1.5275,24.0,666.0,20.2,35.05,21.22,17.2\n412,18.811,0.0,18.1,0.0,0.597,4.628,100.0,1.5539,24.0,666.0,20.2,28.79,34.37,17.9\n413,28.6558,0.0,18.1,0.0,0.597,5.155,100.0,1.5894,24.0,666.0,20.2,210.97,20.08,16.3\n414,45.7461,0.0,18.1,0.0,0.693,4.519,100.0,1.6582,24.0,666.0,20.2,88.27,36.98,7.0\n415,18.0846,0.0,18.1,0.0,0.679,6.434,100.0,1.8347,24.0,666.0,20.2,27.25,29.05,7.2\n416,10.8342,0.0,18.1,0.0,0.679,6.782,90.8,1.8195,24.0,666.0,20.2,21.57,25.79,7.5\n417,25.9406,0.0,18.1,0.0,0.679,5.304,89.1,1.6475,24.0,666.0,20.2,127.36,26.64,10.4\n418,73.5341,0.0,18.1,0.0,0.679,5.957,100.0,1.8026,24.0,666.0,20.2,16.45,20.62,8.8\n419,11.8123,0.0,18.1,0.0,0.718,6.824,76.5,1.794,24.0,666.0,20.2,48.45,22.74,8.4\n420,11.0874,0.0,18.1,0.0,0.718,6.411,100.0,1.8589,24.0,666.0,20.2,318.75,15.02,16.7\n421,7.02259,0.0,18.1,0.0,0.718,6.006,95.3,1.8746,24.0,666.0,20.2,319.98,15.7,14.2\n422,12.0482,0.0,18.1,0.0,0.614,5.648,87.6,1.9512,24.0,666.0,20.2,291.55,14.1,20.8\n423,7.05042,0.0,18.1,0.0,0.614,6.103,85.1,2.0218,24.0,666.0,20.2,2.52,23.29,13.4\n424,8.79212,0.0,18.1,0.0,0.584,5.565,70.6,2.0635,24.0,666.0,20.2,3.65,17.16,11.7\n425,15.8603,0.0,18.1,0.0,0.679,5.896,95.4,1.9096,24.0,666.0,20.2,7.68,24.39,8.3\n426,12.2472,0.0,18.1,0.0,0.584,5.837,59.7,1.9976,24.0,666.0,20.2,24.65,15.69,10.2\n427,37.6619,0.0,18.1,0.0,0.679,6.202,78.7,1.8629,24.0,666.0,20.2,18.82,14.52,10.9\n428,7.36711,0.0,18.1,0.0,0.679,6.193,78.1,1.9356,24.0,666.0,20.2,96.73,21.52,11.0\n429,9.33889,0.0,18.1,0.0,0.679,6.38,95.6,1.9682,24.0,666.0,20.2,60.72,24.08,9.5\n430,8.49213,0.0,18.1,0.0,0.584,6.348,86.1,2.0527,24.0,666.0,20.2,83.45,17.64,14.5\n431,10.0623,0.0,18.1,0.0,0.584,6.833,94.3,2.0882,24.0,666.0,20.2,81.33,19.69,14.1\n432,6.44405,0.0,18.1,0.0,0.584,6.425,74.8,2.2004,24.0,666.0,20.2,97.95,12.03,16.1\n433,5.58107,0.0,18.1,0.0,0.713,6.436,87.9,2.3158,24.0,666.0,20.2,100.19,16.22,14.3\n434,13.9134,0.0,18.1,0.0,0.713,6.208,95.0,2.2222,24.0,666.0,20.2,100.63,15.17,11.7\n435,11.1604,0.0,18.1,0.0,0.74,6.629,94.6,2.1247,24.0,666.0,20.2,109.85,23.27,13.4\n436,14.4208,0.0,18.1,0.0,0.74,6.461,93.3,2.0026,24.0,666.0,20.2,27.49,18.05,9.6\n437,15.1772,0.0,18.1,0.0,0.74,6.152,100.0,1.9142,24.0,666.0,20.2,9.32,26.45,8.7\n438,13.6781,0.0,18.1,0.0,0.74,5.935,87.9,1.8206,24.0,666.0,20.2,68.95,34.02,8.4\n439,9.39063,0.0,18.1,0.0,0.74,5.627,93.9,1.8172,24.0,666.0,20.2,396.9,22.88,12.8\n440,22.0511,0.0,18.1,0.0,0.74,5.818,92.4,1.8662,24.0,666.0,20.2,391.45,22.11,10.5\n441,9.72418,0.0,18.1,0.0,0.74,6.406,97.2,2.0651,24.0,666.0,20.2,385.96,19.52,17.1\n442,5.66637,0.0,18.1,0.0,0.74,6.219,100.0,2.0048,24.0,666.0,20.2,395.69,16.59,18.4\n443,9.96654,0.0,18.1,0.0,0.74,6.485,100.0,1.9784,24.0,666.0,20.2,386.73,18.85,15.4\n444,12.8023,0.0,18.1,0.0,0.74,5.854,96.6,1.8956,24.0,666.0,20.2,240.52,23.79,10.8\n445,0.6718,0.0,18.1,0.0,0.74,6.459,94.8,1.9879,24.0,666.0,20.2,43.06,23.98,11.8\n446,6.28807,0.0,18.1,0.0,0.74,6.341,96.4,2.072,24.0,666.0,20.2,318.01,17.79,14.9\n447,9.92485,0.0,18.1,0.0,0.74,6.251,96.6,2.198,24.0,666.0,20.2,388.52,16.44,12.6\n448,9.32909,0.0,18.1,0.0,0.713,6.185,98.7,2.2616,24.0,666.0,20.2,396.9,18.13,14.1\n449,7.52601,0.0,18.1,0.0,0.713,6.417,98.3,2.185,24.0,666.0,20.2,304.21,19.31,13.0\n450,6.71772,0.0,18.1,0.0,0.713,6.749,92.6,2.3236,24.0,666.0,20.2,0.32,17.44,13.4\n451,5.44114,0.0,18.1,0.0,0.713,6.655,98.2,2.3552,24.0,666.0,20.2,355.29,17.73,15.2\n452,5.09017,0.0,18.1,0.0,0.713,6.297,91.8,2.3682,24.0,666.0,20.2,385.09,17.27,16.1\n453,8.24809,0.0,18.1,0.0,0.713,7.393,99.3,2.4527,24.0,666.0,20.2,375.87,16.74,17.8\n454,9.51363,0.0,18.1,0.0,0.713,6.728,94.1,2.4961,24.0,666.0,20.2,6.68,18.71,14.9\n455,4.75237,0.0,18.1,0.0,0.713,6.525,86.5,2.4358,24.0,666.0,20.2,50.92,18.13,14.1\n456,4.66883,0.0,18.1,0.0,0.713,5.976,87.9,2.5806,24.0,666.0,20.2,10.48,19.01,12.7\n457,8.20058,0.0,18.1,0.0,0.713,5.936,80.3,2.7792,24.0,666.0,20.2,3.5,16.94,13.5\n458,7.75223,0.0,18.1,0.0,0.713,6.301,83.7,2.7831,24.0,666.0,20.2,272.21,16.23,14.9\n459,6.80117,0.0,18.1,0.0,0.713,6.081,84.4,2.7175,24.0,666.0,20.2,396.9,14.7,20.0\n460,4.81213,0.0,18.1,0.0,0.713,6.701,90.0,2.5975,24.0,666.0,20.2,255.23,16.42,16.4\n461,3.69311,0.0,18.1,0.0,0.713,6.376,88.4,2.5671,24.0,666.0,20.2,391.43,14.65,17.7\n462,6.65492,0.0,18.1,0.0,0.713,6.317,83.0,2.7344,24.0,666.0,20.2,396.9,13.99,19.5\n463,5.82115,0.0,18.1,0.0,0.713,6.513,89.9,2.8016,24.0,666.0,20.2,393.82,10.29,20.2\n464,7.83932,0.0,18.1,0.0,0.655,6.209,65.4,2.9634,24.0,666.0,20.2,396.9,13.22,21.4\n465,3.1636,0.0,18.1,0.0,0.655,5.759,48.2,3.0665,24.0,666.0,20.2,334.4,14.13,19.9\n466,3.77498,0.0,18.1,0.0,0.655,5.952,84.7,2.8715,24.0,666.0,20.2,22.01,17.15,19.0\n467,4.42228,0.0,18.1,0.0,0.584,6.003,94.5,2.5403,24.0,666.0,20.2,331.29,21.32,19.1\n468,15.5757,0.0,18.1,0.0,0.58,5.926,71.0,2.9084,24.0,666.0,20.2,368.74,18.13,19.1\n469,13.0751,0.0,18.1,0.0,0.58,5.713,56.7,2.8237,24.0,666.0,20.2,396.9,14.76,20.1\n470,4.34879,0.0,18.1,0.0,0.58,6.167,84.0,3.0334,24.0,666.0,20.2,396.9,16.29,19.9\n471,4.03841,0.0,18.1,0.0,0.532,6.229,90.7,3.0993,24.0,666.0,20.2,395.33,12.87,19.6\n472,3.56868,0.0,18.1,0.0,0.58,6.437,75.0,2.8965,24.0,666.0,20.2,393.37,14.36,23.2\n473,4.64689,0.0,18.1,0.0,0.614,6.98,67.6,2.5329,24.0,666.0,20.2,374.68,11.66,29.8\n474,8.05579,0.0,18.1,0.0,0.584,5.427,95.4,2.4298,24.0,666.0,20.2,352.58,18.14,13.8\n475,6.39312,0.0,18.1,0.0,0.584,6.162,97.4,2.206,24.0,666.0,20.2,302.76,24.1,13.3\n476,4.87141,0.0,18.1,0.0,0.614,6.484,93.6,2.3053,24.0,666.0,20.2,396.21,18.68,16.7\n477,15.0234,0.0,18.1,0.0,0.614,5.304,97.3,2.1007,24.0,666.0,20.2,349.48,24.91,12.0\n478,10.233,0.0,18.1,0.0,0.614,6.185,96.7,2.1705,24.0,666.0,20.2,379.7,18.03,14.6\n479,14.3337,0.0,18.1,0.0,0.614,6.229,88.0,1.9512,24.0,666.0,20.2,383.32,13.11,21.4\n480,5.82401,0.0,18.1,0.0,0.532,6.242,64.7,3.4242,24.0,666.0,20.2,396.9,10.74,23.0\n481,5.70818,0.0,18.1,0.0,0.532,6.75,74.9,3.3317,24.0,666.0,20.2,393.07,7.74,23.7\n482,5.73116,0.0,18.1,0.0,0.532,7.061,77.0,3.4106,24.0,666.0,20.2,395.28,7.01,25.0\n483,2.81838,0.0,18.1,0.0,0.532,5.762,40.3,4.0983,24.0,666.0,20.2,392.92,10.42,21.8\n484,2.37857,0.0,18.1,0.0,0.583,5.871,41.9,3.724,24.0,666.0,20.2,370.73,13.34,20.6\n485,3.67367,0.0,18.1,0.0,0.583,6.312,51.9,3.9917,24.0,666.0,20.2,388.62,10.58,21.2\n486,5.69175,0.0,18.1,0.0,0.583,6.114,79.8,3.5459,24.0,666.0,20.2,392.68,14.98,19.1\n487,4.83567,0.0,18.1,0.0,0.583,5.905,53.2,3.1523,24.0,666.0,20.2,388.22,11.45,20.6\n488,0.15086,0.0,27.74,0.0,0.609,5.454,92.7,1.8209,4.0,711.0,20.1,395.09,18.06,15.2\n489,0.18337,0.0,27.74,0.0,0.609,5.414,98.3,1.7554,4.0,711.0,20.1,344.05,23.97,7.0\n490,0.20746,0.0,27.74,0.0,0.609,5.093,98.0,1.8226,4.0,711.0,20.1,318.43,29.68,8.1\n491,0.10574,0.0,27.74,0.0,0.609,5.983,98.8,1.8681,4.0,711.0,20.1,390.11,18.07,13.6\n492,0.11132,0.0,27.74,0.0,0.609,5.983,83.5,2.1099,4.0,711.0,20.1,396.9,13.35,20.1\n493,0.17331,0.0,9.69,0.0,0.585,5.707,54.0,2.3817,6.0,391.0,19.2,396.9,12.01,21.8\n494,0.27957,0.0,9.69,0.0,0.585,5.926,42.6,2.3817,6.0,391.0,19.2,396.9,13.59,24.5\n495,0.17899,0.0,9.69,0.0,0.585,5.67,28.8,2.7986,6.0,391.0,19.2,393.29,17.6,23.1\n496,0.2896,0.0,9.69,0.0,0.585,5.39,72.9,2.7986,6.0,391.0,19.2,396.9,21.14,19.7\n497,0.26838,0.0,9.69,0.0,0.585,5.794,70.6,2.8927,6.0,391.0,19.2,396.9,14.1,18.3\n498,0.23912,0.0,9.69,0.0,0.585,6.019,65.3,2.4091,6.0,391.0,19.2,396.9,12.92,21.2\n499,0.17783,0.0,9.69,0.0,0.585,5.569,73.5,2.3999,6.0,391.0,19.2,395.77,15.1,17.5\n500,0.22438,0.0,9.69,0.0,0.585,6.027,79.7,2.4982,6.0,391.0,19.2,396.9,14.33,16.8\n501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4\n502,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08,20.6\n503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64,23.9\n504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0\n505,0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88,11.9\n"
  },
  {
    "path": "examples/data/census_1k.csv",
    "content": "\"YEAR\",\"DATANUM\",\"SERIAL\",\"CBSERIAL\",\"HHWT\",\"CPI99\",\"GQ\",\"QGQ\",\"PERNUM\",\"PERWT\",\"SEX\",\"AGE\",\"EDUC\",\"EDUCD\",\"INCTOT\",\"SEX_HEAD\",\"SEX_MOM\",\"SEX_POP\",\"SEX_SP\",\"SEX_MOM2\",\"SEX_POP2\",\"AGE_HEAD\",\"AGE_MOM\",\"AGE_POP\",\"AGE_SP\",\"AGE_MOM2\",\"AGE_POP2\",\"EDUC_HEAD\",\"EDUC_MOM\",\"EDUC_POP\",\"EDUC_SP\",\"EDUC_MOM2\",\"EDUC_POP2\",\"EDUCD_HEAD\",\"EDUCD_MOM\",\"EDUCD_POP\",\"EDUCD_SP\",\"EDUCD_MOM2\",\"EDUCD_POP2\",\"INCTOT_HEAD\",\"INCTOT_MOM\",\"INCTOT_POP\",\"INCTOT_SP\",\"INCTOT_MOM2\",\"INCTOT_POP2\"\n1970,2,1,,100,4.54,1,0,1,100,1,39,6,60,12450,1,,,2,,,39,,,36,,,6,,,3,,,60,,,30,,,12450,,,3450,,\n1970,2,1,,100,4.54,1,0,2,100,2,36,3,30,3450,1,,,1,,,39,,,39,,,6,,,6,,,60,,,60,,,12450,,,12450,,\n1970,2,2,,100,4.54,1,0,1,100,1,56,7,70,9050,1,,,2,,,56,,,54,,,7,,,6,,,70,,,60,,,9050,,,0,,\n1970,2,2,,100,4.54,1,0,2,100,2,54,6,60,0,1,,,1,,,56,,,56,,,7,,,7,,,70,,,70,,,9050,,,9050,,\n1970,2,4,,100,4.54,1,0,1,100,1,82,1,17,7450,1,,,2,,,82,,,74,,,1,,,2,,,17,,,23,,,7450,,,650,,\n1970,2,4,,100,4.54,1,0,2,100,2,74,2,23,650,1,,,1,,,82,,,82,,,1,,,1,,,17,,,17,,,7450,,,7450,,\n1970,2,5,,100,4.54,1,0,1,100,1,66,10,100,6950,1,,,2,,,66,,,62,,,10,,,6,,,100,,,60,,,6950,,,250,,\n1970,2,5,,100,4.54,1,0,2,100,2,62,6,60,250,1,,,1,,,66,,,66,,,10,,,10,,,100,,,100,,,6950,,,6950,,\n1970,2,6,,100,4.54,1,0,1,100,2,70,4,40,1250,2,,,,,,70,,,,,,4,,,,,,40,,,,,,1250,,,,,\n1970,2,7,,100,4.54,1,0,1,100,1,25,6,60,11150,1,,,2,,,25,,,22,,,6,,,6,,,60,,,60,,,11150,,,4050,,\n1970,2,7,,100,4.54,1,0,2,100,2,22,6,60,4050,1,,,1,,,25,,,25,,,6,,,6,,,60,,,60,,,11150,,,11150,,\n1970,2,7,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,25,22,25,,,,6,6,6,,,,60,60,60,,,,11150,4050,11150,,,\n1970,2,8,,100,4.54,3,0,1,100,2,98,2,26,550,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,9,,100,4.54,1,0,1,100,1,25,10,100,6150,1,,,2,,,25,,,27,,,10,,,9,,,100,,,90,,,6150,,,1050,,\n1970,2,9,,100,4.54,1,0,2,100,2,27,9,90,1050,1,,,1,,,25,,,25,,,10,,,10,,,100,,,100,,,6150,,,6150,,\n1970,2,10,,100,4.54,1,0,1,100,1,41,11,111,8050,1,2,,,,,41,78,,,,,11,6,,,,,111,60,,,,,8050,0,,,,\n1970,2,10,,100,4.54,1,0,2,100,2,78,6,60,0,1,,,,,,41,,,,,,11,,,,,,111,,,,,,8050,,,,,\n1970,2,10,,100,4.54,1,0,3,100,2,38,6,60,7150,1,2,,,,,41,78,,,,,11,6,,,,,111,60,,,,,8050,0,,,,\n1970,2,11,,100,4.54,1,0,1,100,1,20,6,60,2050,1,,,,,,20,,,,,,6,,,,,,60,,,,,,2050,,,,,\n1970,2,13,,100,4.54,1,0,1,100,1,37,6,65,16850,1,,,2,,,37,,,30,,,6,,,8,,,65,,,80,,,16850,,,350,,\n1970,2,13,,100,4.54,1,0,2,100,2,30,8,80,350,1,,,1,,,37,,,37,,,6,,,6,,,65,,,65,,,16850,,,16850,,\n1970,2,13,,100,4.54,1,0,3,100,1,5,0,2,9999999,1,2,1,,,,37,30,37,,,,6,8,6,,,,65,80,65,,,,16850,350,16850,,,\n1970,2,13,,100,4.54,1,0,4,100,2,1,0,1,9999999,1,2,1,,,,37,30,37,,,,6,8,6,,,,65,80,65,,,,16850,350,16850,,,\n1970,2,14,,100,4.54,1,0,1,100,1,49,2,23,8850,1,,,2,,,49,,,35,,,2,,,3,,,23,,,30,,,8850,,,4850,,\n1970,2,14,,100,4.54,1,0,2,100,2,35,3,30,4850,1,,,1,,,49,,,49,,,2,,,2,,,23,,,23,,,8850,,,8850,,\n1970,2,14,,100,4.54,1,0,3,100,2,17,3,30,250,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,\n1970,2,14,,100,4.54,1,0,4,100,2,14,2,25,0,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,\n1970,2,14,,100,4.54,1,0,5,100,1,10,1,15,9999999,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,\n1970,2,14,,100,4.54,1,0,6,100,2,8,1,14,9999999,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,\n1970,2,14,,100,4.54,1,0,7,100,1,0,0,1,9999999,1,2,1,,,,49,35,49,,,,2,3,2,,,,23,30,23,,,,8850,4850,8850,,,\n1970,2,15,,100,4.54,1,0,1,100,2,62,7,70,7750,2,,,,,,62,,,,,,7,,,,,,70,,,,,,7750,,,,,\n1970,2,15,,100,4.54,1,0,2,100,1,35,11,111,5350,2,2,,,,,62,62,,,,,7,7,,,,,70,70,,,,,7750,7750,,,,\n1970,2,16,,100,4.54,1,0,1,100,1,57,4,40,11250,1,,,2,,,57,,,54,,,4,,,2,,,40,,,26,,,11250,,,150,,\n1970,2,16,,100,4.54,1,0,2,100,2,54,2,26,150,1,2,,1,,,57,86,,57,,,4,2,,4,,,40,26,,40,,,11250,1250,,11250,,\n1970,2,16,,100,4.54,1,0,3,100,2,86,2,26,1250,1,,,,,,57,,,,,,4,,,,,,40,,,,,,11250,,,,,\n1970,2,17,,100,4.54,1,0,1,100,1,54,6,60,6050,1,,,,,,54,,,,,,6,,,,,,60,,,,,,6050,,,,,\n1970,2,17,,100,4.54,1,0,2,100,2,64,2,26,0,1,,,,,,54,,,,,,6,,,,,,60,,,,,,6050,,,,,\n1970,2,18,,100,4.54,1,0,1,100,1,52,7,70,12050,1,,,2,,,52,,,44,,,7,,,6,,,70,,,60,,,12050,,,650,,\n1970,2,18,,100,4.54,1,0,2,100,2,44,6,60,650,1,,,1,,,52,,,52,,,7,,,7,,,70,,,70,,,12050,,,12050,,\n1970,2,18,,100,4.54,1,0,3,100,2,16,4,40,950,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,\n1970,2,18,,100,4.54,1,0,4,100,2,15,3,30,350,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,\n1970,2,18,,100,4.54,1,0,5,100,1,14,2,25,350,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,\n1970,2,18,,100,4.54,1,0,6,100,1,12,2,22,9999999,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,\n1970,2,18,,100,4.54,1,0,7,100,1,6,1,12,9999999,1,2,1,,,,52,44,52,,,,7,6,7,,,,70,60,70,,,,12050,650,12050,,,\n1970,2,19,,100,4.54,1,0,1,100,1,77,2,26,250,1,,,2,,,77,,,79,,,2,,,2,,,26,,,26,,,250,,,0,,\n1970,2,19,,100,4.54,1,0,2,100,2,79,2,26,0,1,,,1,,,77,,,77,,,2,,,2,,,26,,,26,,,250,,,250,,\n1970,2,20,,100,4.54,1,0,1,100,1,36,6,60,11450,1,,,2,,,36,,,32,,,6,,,6,,,60,,,60,,,11450,,,5550,,\n1970,2,20,,100,4.54,1,0,2,100,2,32,6,60,5550,1,,,1,,,36,,,36,,,6,,,6,,,60,,,60,,,11450,,,11450,,\n1970,2,20,,100,4.54,1,0,3,100,2,9,1,16,9999999,1,2,1,,,,36,32,36,,,,6,6,6,,,,60,60,60,,,,11450,5550,11450,,,\n1970,2,21,,100,4.54,1,0,1,100,1,21,4,40,2450,1,,,2,,,21,,,20,,,4,,,4,,,40,,,40,,,2450,,,4550,,\n1970,2,21,,100,4.54,1,0,2,100,2,20,4,40,4550,1,,,1,,,21,,,21,,,4,,,4,,,40,,,40,,,2450,,,2450,,\n1970,2,21,,100,4.54,1,0,3,100,1,5,1,12,9999999,1,2,1,,,,21,20,21,,,,4,4,4,,,,40,40,40,,,,2450,4550,2450,,,\n1970,2,21,,100,4.54,1,0,4,100,1,4,1,11,9999999,1,2,1,,,,21,20,21,,,,4,4,4,,,,40,40,40,,,,2450,4550,2450,,,\n1970,2,22,,100,4.54,1,0,1,100,1,23,2,26,5050,1,,,,,,23,,,,,,2,,,,,,26,,,,,,5050,,,,,\n1970,2,22,,100,4.54,1,0,2,100,2,23,3,30,1850,1,,,,,,23,,,,,,2,,,,,,26,,,,,,5050,,,,,\n1970,2,23,,100,4.54,1,0,1,100,1,63,6,60,5050,1,,,,,,63,,,,,,6,,,,,,60,,,,,,5050,,,,,\n1970,2,24,,100,4.54,1,0,1,100,2,68,3,30,2150,2,,,,,,68,,,,,,3,,,,,,30,,,,,,2150,,,,,\n1970,2,25,,100,4.54,1,0,1,100,1,65,2,22,4850,1,,,2,,,65,,,61,,,2,,,4,,,22,,,40,,,4850,,,4350,,\n1970,2,25,,100,4.54,1,0,2,100,2,61,4,40,4350,1,,,1,,,65,,,65,,,2,,,2,,,22,,,22,,,4850,,,4850,,\n1970,2,26,,100,4.54,1,0,1,100,1,61,8,80,2150,1,,,2,,,61,,,66,,,8,,,6,,,80,,,60,,,2150,,,5650,,\n1970,2,26,,100,4.54,1,0,2,100,2,66,6,60,5650,1,,,1,,,61,,,61,,,8,,,8,,,80,,,80,,,2150,,,2150,,\n1970,2,27,,100,4.54,1,0,1,100,2,77,1,14,4050,2,,,,,,77,,,,,,1,,,,,,14,,,,,,4050,,,,,\n1970,2,27,,100,4.54,1,0,2,100,1,75,1,14,2050,2,,,,,,77,,,,,,1,,,,,,14,,,,,,4050,,,,,\n1970,2,28,,100,4.54,1,0,1,100,1,32,8,80,5050,1,,,,,,32,,,,,,8,,,,,,80,,,,,,5050,,,,,\n1970,2,29,,100,4.54,1,0,1,100,1,59,5,50,15050,1,,,2,,,59,,,55,,,5,,,6,,,50,,,60,,,15050,,,0,,\n1970,2,29,,100,4.54,1,0,2,100,2,55,6,60,0,1,,,1,,,59,,,59,,,5,,,5,,,50,,,50,,,15050,,,15050,,\n1970,2,30,,100,4.54,1,0,1,100,2,47,6,60,0,2,,,,,,47,,,,,,6,,,,,,60,,,,,,0,,,,,\n1970,2,31,,100,4.54,1,0,1,100,1,43,8,80,7050,1,,,2,,,43,,,41,,,8,,,6,,,80,,,60,,,7050,,,2050,,\n1970,2,31,,100,4.54,1,0,2,100,2,41,6,60,2050,1,,,1,,,43,,,43,,,8,,,8,,,80,,,80,,,7050,,,7050,,\n1970,2,31,,100,4.54,1,0,3,100,2,18,6,65,4050,1,2,1,,,,43,41,43,,,,8,6,8,,,,80,60,80,,,,7050,2050,7050,,,\n1970,2,31,,100,4.54,1,0,4,100,2,15,2,26,0,1,2,1,,,,43,41,43,,,,8,6,8,,,,80,60,80,,,,7050,2050,7050,,,\n1970,2,31,,100,4.54,1,0,5,100,2,10,1,17,9999999,1,2,1,,,,43,41,43,,,,8,6,8,,,,80,60,80,,,,7050,2050,7050,,,\n1970,2,32,,100,4.54,1,0,1,100,1,40,10,100,13350,1,,,2,,,40,,,36,,,10,,,10,,,100,,,100,,,13350,,,0,,\n1970,2,32,,100,4.54,1,0,2,100,2,36,10,100,0,1,,,1,,,40,,,40,,,10,,,10,,,100,,,100,,,13350,,,13350,,\n1970,2,32,,100,4.54,1,0,3,100,1,14,2,25,0,1,2,1,,,,40,36,40,,,,10,10,10,,,,100,100,100,,,,13350,0,13350,,,\n1970,2,32,,100,4.54,1,0,4,100,1,10,1,16,9999999,1,2,1,,,,40,36,40,,,,10,10,10,,,,100,100,100,,,,13350,0,13350,,,\n1970,2,32,,100,4.54,1,0,5,100,2,4,1,11,9999999,1,2,1,,,,40,36,40,,,,10,10,10,,,,100,100,100,,,,13350,0,13350,,,\n1970,2,33,,100,4.54,1,0,1,100,1,40,11,111,25050,1,,,2,,,40,,,32,,,11,,,10,,,111,,,100,,,25050,,,0,,\n1970,2,33,,100,4.54,1,0,2,100,2,32,10,100,0,1,,,1,,,40,,,40,,,11,,,11,,,111,,,111,,,25050,,,25050,,\n1970,2,33,,100,4.54,1,0,3,100,1,5,0,2,9999999,1,2,1,,,,40,32,40,,,,11,10,11,,,,111,100,111,,,,25050,0,25050,,,\n1970,2,33,,100,4.54,1,0,4,100,1,3,0,2,9999999,1,2,1,,,,40,32,40,,,,11,10,11,,,,111,100,111,,,,25050,0,25050,,,\n1970,2,34,,100,4.54,1,0,1,100,1,31,11,111,19350,1,,,2,,,31,,,31,,,11,,,10,,,111,,,100,,,19350,,,0,,\n1970,2,34,,100,4.54,1,0,2,100,2,31,10,100,0,1,,,1,,,31,,,31,,,11,,,11,,,111,,,111,,,19350,,,19350,,\n1970,2,34,,100,4.54,1,0,3,100,2,3,0,2,9999999,1,2,1,,,,31,31,31,,,,11,10,11,,,,111,100,111,,,,19350,0,19350,,,\n1970,2,35,,100,4.54,1,0,1,100,1,64,11,111,17150,1,,,,,,64,,,,,,11,,,,,,111,,,,,,17150,,,,,\n1970,2,36,,100,4.54,1,0,1,100,1,55,3,30,9050,1,,,2,,,55,,,51,,,3,,,6,,,30,,,60,,,9050,,,2950,,\n1970,2,36,,100,4.54,1,0,2,100,2,51,6,60,2950,1,,,1,,,55,,,55,,,3,,,3,,,30,,,30,,,9050,,,9050,,\n1970,2,37,,100,4.54,1,0,1,100,1,43,11,111,50000,1,,,2,,,43,,,40,,,11,,,10,,,111,,,100,,,50000,,,1150,,\n1970,2,37,,100,4.54,1,0,2,100,2,40,10,100,1150,1,,,1,,,43,,,43,,,11,,,11,,,111,,,111,,,50000,,,50000,,\n1970,2,37,,100,4.54,1,0,3,100,1,16,4,40,250,1,2,1,,,,43,40,43,,,,11,10,11,,,,111,100,111,,,,50000,1150,50000,,,\n1970,2,37,,100,4.54,1,0,4,100,2,15,2,26,50,1,2,1,,,,43,40,43,,,,11,10,11,,,,111,100,111,,,,50000,1150,50000,,,\n1970,2,37,,100,4.54,1,0,5,100,1,12,2,22,9999999,1,2,1,,,,43,40,43,,,,11,10,11,,,,111,100,111,,,,50000,1150,50000,,,\n1970,2,38,,100,4.54,1,0,1,100,1,32,10,100,22150,1,,,2,,,32,,,31,,,10,,,7,,,100,,,70,,,22150,,,0,,\n1970,2,38,,100,4.54,1,0,2,100,2,31,7,70,0,1,,,1,,,32,,,32,,,10,,,10,,,100,,,100,,,22150,,,22150,,\n1970,2,38,,100,4.54,1,0,3,100,2,5,0,2,9999999,1,2,1,,,,32,31,32,,,,10,7,10,,,,100,70,100,,,,22150,0,22150,,,\n1970,2,38,,100,4.54,1,0,4,100,1,4,0,2,9999999,1,2,1,,,,32,31,32,,,,10,7,10,,,,100,70,100,,,,22150,0,22150,,,\n1970,2,38,,100,4.54,1,0,5,100,2,2,0,1,9999999,1,2,1,,,,32,31,32,,,,10,7,10,,,,100,70,100,,,,22150,0,22150,,,\n1970,2,39,,100,4.54,1,0,1,100,1,54,8,80,17850,1,,,2,,,54,,,47,,,8,,,6,,,80,,,60,,,17850,,,0,,\n1970,2,39,,100,4.54,1,0,2,100,2,47,6,60,0,1,,,1,,,54,,,54,,,8,,,8,,,80,,,80,,,17850,,,17850,,\n1970,2,39,,100,4.54,1,0,3,100,1,19,6,60,2750,1,2,1,,,,54,47,54,,,,8,6,8,,,,80,60,80,,,,17850,0,17850,,,\n1970,2,39,,100,4.54,1,0,4,100,1,12,2,23,9999999,1,2,1,,,,54,47,54,,,,8,6,8,,,,80,60,80,,,,17850,0,17850,,,\n1970,2,40,,100,4.54,1,0,1,100,1,46,6,60,17150,1,,,2,,,46,,,46,,,6,,,6,,,60,,,60,,,17150,,,5050,,\n1970,2,40,,100,4.54,1,0,2,100,2,46,6,60,5050,1,,,1,,,46,,,46,,,6,,,6,,,60,,,60,,,17150,,,17150,,\n1970,2,41,,100,4.54,1,0,1,100,1,70,6,60,11450,1,,,,,,70,,,,,,6,,,,,,60,,,,,,11450,,,,,\n1970,2,41,,100,4.54,1,0,2,100,2,28,5,50,2150,1,,,,,,70,,,,,,6,,,,,,60,,,,,,11450,,,,,\n1970,2,41,,100,4.54,1,0,3,100,1,33,8,80,9850,1,,1,,,,70,,70,,,,6,,6,,,,60,,60,,,,11450,,11450,,,\n1970,2,41,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,,1,,,,70,,33,,,,6,,8,,,,60,,80,,,,11450,,9850,,,\n1970,2,41,,100,4.54,1,0,5,100,2,5,0,2,9999999,1,,1,,,,70,,33,,,,6,,8,,,,60,,80,,,,11450,,9850,,,\n1970,2,42,,100,4.54,1,0,1,100,1,53,3,30,10050,1,,,2,,,53,,,51,,,3,,,6,,,30,,,60,,,10050,,,0,,\n1970,2,42,,100,4.54,1,0,2,100,2,51,6,60,0,1,,,1,,,53,,,53,,,3,,,3,,,30,,,30,,,10050,,,10050,,\n1970,2,42,,100,4.54,1,0,3,100,2,14,2,26,0,1,2,1,,,,53,51,53,,,,3,6,3,,,,30,60,30,,,,10050,0,10050,,,\n1970,2,43,,100,4.54,1,0,1,100,1,41,11,111,32050,1,,,2,,,41,,,40,,,11,,,11,,,111,,,111,,,32050,,,250,,\n1970,2,43,,100,4.54,1,0,2,100,2,40,11,111,250,1,,,1,,,41,,,41,,,11,,,11,,,111,,,111,,,32050,,,32050,,\n1970,2,43,,100,4.54,1,0,3,100,2,10,1,17,9999999,1,2,1,,,,41,40,41,,,,11,11,11,,,,111,111,111,,,,32050,250,32050,,,\n1970,2,43,,100,4.54,1,0,4,100,2,6,1,12,9999999,1,2,1,,,,41,40,41,,,,11,11,11,,,,111,111,111,,,,32050,250,32050,,,\n1970,2,44,,100,4.54,1,0,1,100,1,47,2,26,14050,1,,,2,,,47,,,44,,,2,,,2,,,26,,,26,,,14050,,,0,,\n1970,2,44,,100,4.54,1,0,2,100,2,44,2,26,0,1,,,1,,,47,,,47,,,2,,,2,,,26,,,26,,,14050,,,14050,,\n1970,2,44,,100,4.54,1,0,3,100,2,21,8,80,2050,1,2,1,,,,47,44,47,,,,2,2,2,,,,26,26,26,,,,14050,0,14050,,,\n1970,2,44,,100,4.54,1,0,4,100,1,18,5,50,1750,1,2,1,,,,47,44,47,,,,2,2,2,,,,26,26,26,,,,14050,0,14050,,,\n1970,2,44,,100,4.54,1,0,5,100,1,4,0,2,9999999,1,2,1,,,,47,44,47,,,,2,2,2,,,,26,26,26,,,,14050,0,14050,,,\n1970,2,45,,100,4.54,1,0,1,100,1,60,8,80,16550,1,,,2,,,60,,,50,,,8,,,6,,,80,,,60,,,16550,,,2950,,\n1970,2,45,,100,4.54,1,0,2,100,2,50,6,60,2950,1,,,1,,,60,,,60,,,8,,,8,,,80,,,80,,,16550,,,16550,,\n1970,2,46,,100,4.54,1,0,1,100,1,47,10,100,16250,1,2,,,,,47,83,,,,,10,2,,,,,100,22,,,,,16250,1250,,,,\n1970,2,46,,100,4.54,1,0,2,100,2,83,2,22,1250,1,,,,,,47,,,,,,10,,,,,,100,,,,,,16250,,,,,\n1970,2,46,,100,4.54,1,0,3,100,2,83,2,22,1450,1,,,,,,47,,,,,,10,,,,,,100,,,,,,16250,,,,,\n1970,2,48,,100,4.54,1,0,1,100,1,49,6,60,8850,1,2,,2,,,49,89,,48,,,6,2,,6,,,60,26,,60,,,8850,450,,6050,,\n1970,2,48,,100,4.54,1,0,2,100,2,48,6,60,6050,1,,,1,,,49,,,49,,,6,,,6,,,60,,,60,,,8850,,,8850,,\n1970,2,48,,100,4.54,1,0,3,100,2,89,2,26,450,1,,,,,,49,,,,,,6,,,,,,60,,,,,,8850,,,,,\n1970,2,49,,100,4.54,1,0,1,100,1,30,10,100,12050,1,,,2,,,30,,,30,,,10,,,10,,,100,,,100,,,12050,,,5650,,\n1970,2,49,,100,4.54,1,0,2,100,2,30,10,100,5650,1,,,1,,,30,,,30,,,10,,,10,,,100,,,100,,,12050,,,12050,,\n1970,2,49,,100,4.54,1,0,3,100,1,3,0,2,9999999,1,2,1,,,,30,30,30,,,,10,10,10,,,,100,100,100,,,,12050,5650,12050,,,\n1970,2,49,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,30,30,30,,,,10,10,10,,,,100,100,100,,,,12050,5650,12050,,,\n1970,2,50,,100,4.54,1,0,1,100,1,54,8,80,10150,1,,,,,,54,,,,,,8,,,,,,80,,,,,,10150,,,,,\n1970,2,51,,100,4.54,1,0,1,100,2,64,10,100,8650,2,,,,,,64,,,,,,10,,,,,,100,,,,,,8650,,,,,\n1970,2,52,,100,4.54,1,0,1,100,2,37,7,70,11350,2,,,,,,37,,,,,,7,,,,,,70,,,,,,11350,,,,,\n1970,2,52,,100,4.54,1,0,2,100,2,6,1,12,9999999,2,2,,,,,37,37,,,,,7,7,,,,,70,70,,,,,11350,11350,,,,\n1970,2,53,,100,4.54,1,0,1,100,2,34,6,60,8050,2,,,,,,34,,,,,,6,,,,,,60,,,,,,8050,,,,,\n1970,2,53,,100,4.54,1,0,2,100,1,11,2,22,9999999,2,2,,,,,34,34,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,\n1970,2,53,,100,4.54,1,0,3,100,2,9,1,15,9999999,2,2,,,,,34,34,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,\n1970,2,54,,100,4.54,1,0,1,100,1,48,0,2,0,1,,,2,,,48,,,51,,,0,,,2,,,2,,,26,,,0,,,0,,\n1970,2,54,,100,4.54,1,0,2,100,2,51,2,26,0,1,,,1,,,48,,,48,,,0,,,0,,,2,,,2,,,0,,,0,,\n1970,2,55,,100,4.54,1,0,1,100,2,27,2,25,150,2,,,,,,27,,,,,,2,,,,,,25,,,,,,150,,,,,\n1970,2,55,,100,4.54,1,0,2,100,2,12,2,23,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,\n1970,2,55,,100,4.54,1,0,3,100,2,7,1,14,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,\n1970,2,55,,100,4.54,1,0,4,100,2,6,1,12,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,\n1970,2,55,,100,4.54,1,0,5,100,2,5,1,11,9999999,2,2,,,,,27,27,,,,,2,2,,,,,25,25,,,,,150,150,,,,\n1970,2,56,,100,4.54,1,0,1,100,1,58,2,22,8050,1,,,2,,,58,,,52,,,2,,,6,,,22,,,60,,,8050,,,0,,\n1970,2,56,,100,4.54,1,0,2,100,2,52,6,60,0,1,,,1,,,58,,,58,,,2,,,2,,,22,,,22,,,8050,,,8050,,\n1970,2,56,,100,4.54,1,0,3,100,2,23,9,90,0,1,2,1,,,,58,52,58,,,,2,6,2,,,,22,60,22,,,,8050,0,8050,,,\n1970,2,57,,100,4.54,1,0,1,100,1,32,2,26,7050,1,,,2,,,32,,,32,,,2,,,5,,,26,,,50,,,7050,,,5050,,\n1970,2,57,,100,4.54,1,0,2,100,2,32,5,50,5050,1,,,1,,,32,,,32,,,2,,,2,,,26,,,26,,,7050,,,7050,,\n1970,2,57,,100,4.54,1,0,3,100,2,5,0,2,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,\n1970,2,57,,100,4.54,1,0,4,100,2,3,0,2,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,\n1970,2,57,,100,4.54,1,0,5,100,2,13,2,23,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,\n1970,2,57,,100,4.54,1,0,6,100,1,10,1,17,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,\n1970,2,57,,100,4.54,1,0,7,100,2,9,1,16,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,\n1970,2,57,,100,4.54,1,0,8,100,2,8,1,15,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,\n1970,2,57,,100,4.54,1,0,9,100,1,6,1,12,9999999,1,2,1,,,,32,32,32,,,,2,5,2,,,,26,50,26,,,,7050,5050,7050,,,\n1970,2,58,,100,4.54,1,0,1,100,1,24,11,110,1350,1,,,2,,,24,,,25,,,11,,,11,,,110,,,110,,,1350,,,8150,,\n1970,2,58,,100,4.54,1,0,2,100,2,25,11,110,8150,1,,,1,,,24,,,24,,,11,,,11,,,110,,,110,,,1350,,,1350,,\n1970,2,58,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,24,25,24,,,,11,11,11,,,,110,110,110,,,,1350,8150,1350,,,\n1970,2,59,,100,4.54,1,0,1,100,1,34,5,50,14150,1,,,2,,,34,,,31,,,5,,,6,,,50,,,60,,,14150,,,5050,,\n1970,2,59,,100,4.54,1,0,2,100,2,31,6,60,5050,1,,,1,,,34,,,34,,,5,,,5,,,50,,,50,,,14150,,,14150,,\n1970,2,59,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,\n1970,2,59,,100,4.54,1,0,4,100,1,11,2,22,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,\n1970,2,59,,100,4.54,1,0,5,100,2,10,1,17,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,\n1970,2,59,,100,4.54,1,0,6,100,1,6,1,12,9999999,1,2,1,,,,34,31,34,,,,5,6,5,,,,50,60,50,,,,14150,5050,14150,,,\n1970,2,60,,100,4.54,1,0,1,100,1,35,10,100,14050,1,,,,,,35,,,,,,10,,,,,,100,,,,,,14050,,,,,\n1970,2,62,,100,4.54,1,0,1,100,1,53,6,60,10650,1,,,2,,,53,,,49,,,6,,,6,,,60,,,60,,,10650,,,6650,,\n1970,2,62,,100,4.54,1,0,2,100,2,49,6,60,6650,1,,,1,,,53,,,53,,,6,,,6,,,60,,,60,,,10650,,,10650,,\n1970,2,63,,100,4.54,1,0,1,100,1,78,2,26,1250,1,,,,,,78,,,,,,2,,,,,,26,,,,,,1250,,,,,\n1970,2,63,,100,4.54,1,0,2,100,2,38,7,70,7050,1,,1,,,,78,,78,,,,2,,2,,,,26,,26,,,,1250,,1250,,,\n1970,2,64,,100,4.54,1,0,1,100,1,37,6,60,7050,1,,,2,,,37,,,36,,,6,,,6,,,60,,,60,,,7050,,,0,,\n1970,2,64,,100,4.54,1,0,2,100,2,36,6,60,0,1,,,1,,,37,,,37,,,6,,,6,,,60,,,60,,,7050,,,7050,,\n1970,2,64,,100,4.54,1,0,3,100,1,13,2,25,9999999,1,2,1,,,,37,36,37,,,,6,6,6,,,,60,60,60,,,,7050,0,7050,,,\n1970,2,64,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,37,36,37,,,,6,6,6,,,,60,60,60,,,,7050,0,7050,,,\n1970,2,64,,100,4.54,1,0,5,100,1,5,0,2,9999999,1,2,1,,,,37,36,37,,,,6,6,6,,,,60,60,60,,,,7050,0,7050,,,\n1970,2,65,,100,4.54,1,0,1,100,2,33,8,80,8050,2,,,,,,33,,,,,,8,,,,,,80,,,,,,8050,,,,,\n1970,2,66,,100,4.54,1,0,1,100,1,25,4,40,8050,1,,,2,,,25,,,23,,,4,,,6,,,40,,,60,,,8050,,,6050,,\n1970,2,66,,100,4.54,1,0,2,100,2,23,6,60,6050,1,,,1,,,25,,,25,,,4,,,4,,,40,,,40,,,8050,,,8050,,\n1970,2,66,,100,4.54,1,0,3,100,2,4,1,11,9999999,1,2,1,,,,25,23,25,,,,4,6,4,,,,40,60,40,,,,8050,6050,8050,,,\n1970,2,66,,100,4.54,1,0,4,100,1,2,0,1,9999999,1,2,1,,,,25,23,25,,,,4,6,4,,,,40,60,40,,,,8050,6050,8050,,,\n1970,2,68,,100,4.54,1,0,1,100,1,40,5,50,3850,1,,,2,,,40,,,34,,,5,,,10,,,50,,,100,,,3850,,,850,,\n1970,2,68,,100,4.54,1,0,2,100,2,34,10,100,850,1,,,1,,,40,,,40,,,5,,,5,,,50,,,50,,,3850,,,3850,,\n1970,2,68,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,40,34,40,,,,5,10,5,,,,50,100,50,,,,3850,850,3850,,,\n1970,2,68,,100,4.54,1,0,4,100,2,0,0,1,9999999,1,2,1,,,,40,34,40,,,,5,10,5,,,,50,100,50,,,,3850,850,3850,,,\n1970,2,68,,100,4.54,1,0,5,100,2,5,1,11,9999999,1,2,1,,,,40,34,40,,,,5,10,5,,,,50,100,50,,,,3850,850,3850,,,\n1970,2,69,,100,4.54,1,0,1,100,2,35,4,40,3850,2,,,,,,35,,,,,,4,,,,,,40,,,,,,3850,,,,,\n1970,2,69,,100,4.54,1,0,2,100,2,8,1,15,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,\n1970,2,69,,100,4.54,1,0,3,100,2,7,1,15,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,\n1970,2,69,,100,4.54,1,0,4,100,2,6,1,14,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,\n1970,2,69,,100,4.54,1,0,5,100,2,2,0,1,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,\n1970,2,69,,100,4.54,1,0,6,100,1,13,2,23,9999999,2,2,,,,,35,35,,,,,4,4,,,,,40,40,,,,,3850,3850,,,,\n1970,2,70,,100,4.54,1,0,1,100,1,29,2,26,1750,1,,,,,,29,,,,,,2,,,,,,26,,,,,,1750,,,,,\n1970,2,71,,100,4.54,1,0,1,100,1,35,7,70,12050,1,,,,,,35,,,,,,7,,,,,,70,,,,,,12050,,,,,\n1970,2,71,,100,4.54,1,0,2,100,2,31,10,100,5450,1,,,,,,35,,,,,,7,,,,,,70,,,,,,12050,,,,,\n1970,2,72,,100,4.54,1,0,1,100,2,52,4,40,1550,2,,,,,,52,,,,,,4,,,,,,40,,,,,,1550,,,,,\n1970,2,72,,100,4.54,1,0,2,100,2,26,5,50,2450,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,\n1970,2,72,,100,4.54,1,0,3,100,2,6,1,14,9999999,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,\n1970,2,72,,100,4.54,1,0,4,100,2,2,0,1,9999999,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,\n1970,2,72,,100,4.54,1,0,5,100,1,1,0,1,9999999,2,2,,,,,52,52,,,,,4,4,,,,,40,40,,,,,1550,1550,,,,\n1970,2,74,,100,4.54,1,0,1,100,2,33,6,60,10050,2,,,,,,33,,,,,,6,,,,,,60,,,,,,10050,,,,,\n1970,2,74,,100,4.54,1,0,2,100,1,13,2,22,9999999,2,2,,,,,33,33,,,,,6,6,,,,,60,60,,,,,10050,10050,,,,\n1970,2,74,,100,4.54,1,0,3,100,1,11,1,17,9999999,2,2,,,,,33,33,,,,,6,6,,,,,60,60,,,,,10050,10050,,,,\n1970,2,75,,100,4.54,1,0,1,100,2,26,9,90,10550,2,,,,,,26,,,,,,9,,,,,,90,,,,,,10550,,,,,\n1970,2,76,,100,4.54,1,0,1,100,1,35,11,111,17250,1,,,,,,35,,,,,,11,,,,,,111,,,,,,17250,,,,,\n1970,2,77,,100,4.54,1,0,1,100,1,49,6,60,11050,1,,,2,,,49,,,49,,,6,,,6,,,60,,,60,,,11050,,,0,,\n1970,2,77,,100,4.54,1,0,2,100,2,49,6,60,0,1,,,1,,,49,,,49,,,6,,,6,,,60,,,60,,,11050,,,11050,,\n1970,2,77,,100,4.54,1,0,3,100,2,16,4,40,3550,1,2,1,,,,49,49,49,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,\n1970,2,77,,100,4.54,1,0,4,100,1,12,2,23,9999999,1,2,1,,,,49,49,49,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,\n1970,2,77,,100,4.54,1,0,5,100,2,20,6,60,6050,1,2,1,,,,49,49,49,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,\n1970,2,78,,100,4.54,1,0,1,100,1,72,2,26,2050,1,,,2,,,72,,,65,,,2,,,2,,,26,,,26,,,2050,,,750,,\n1970,2,78,,100,4.54,1,0,2,100,2,65,2,26,750,1,,,1,,,72,,,72,,,2,,,2,,,26,,,26,,,2050,,,2050,,\n1970,2,79,,100,4.54,1,0,1,100,1,37,6,60,7550,1,,,2,,,37,,,35,,,6,,,9,,,60,,,90,,,7550,,,5550,,\n1970,2,79,,100,4.54,1,0,2,100,2,35,9,90,5550,1,,,1,,,37,,,37,,,6,,,6,,,60,,,60,,,7550,,,7550,,\n1970,2,80,,100,4.54,1,0,1,100,1,44,6,60,6050,1,,,2,,,44,,,45,,,6,,,6,,,60,,,60,,,6050,,,0,,\n1970,2,80,,100,4.54,1,0,2,100,2,45,6,60,0,1,,,1,,,44,,,44,,,6,,,6,,,60,,,60,,,6050,,,6050,,\n1970,2,80,,100,4.54,1,0,3,100,1,25,6,60,5050,1,2,1,,,,44,45,44,,,,6,6,6,,,,60,60,60,,,,6050,0,6050,,,\n1970,2,81,,100,4.54,1,0,1,100,1,35,2,22,4050,1,,,2,,,35,,,31,,,2,,,2,,,22,,,23,,,4050,,,0,,\n1970,2,81,,100,4.54,1,0,2,100,2,31,2,23,0,1,,,1,,,35,,,35,,,2,,,2,,,22,,,22,,,4050,,,4050,,\n1970,2,81,,100,4.54,1,0,3,100,2,13,2,23,9999999,1,2,1,,,,35,31,35,,,,2,2,2,,,,22,23,22,,,,4050,0,4050,,,\n1970,2,81,,100,4.54,1,0,4,100,2,4,0,2,9999999,1,2,1,,,,35,31,35,,,,2,2,2,,,,22,23,22,,,,4050,0,4050,,,\n1970,2,81,,100,4.54,1,0,5,100,2,1,0,1,9999999,1,2,1,,,,35,31,35,,,,2,2,2,,,,22,23,22,,,,4050,0,4050,,,\n1970,2,82,,100,4.54,1,0,1,100,1,45,6,60,10050,1,,,2,,,45,,,46,,,6,,,7,,,60,,,70,,,10050,,,4050,,\n1970,2,82,,100,4.54,1,0,2,100,2,46,7,70,4050,1,,,1,,,45,,,45,,,6,,,6,,,60,,,60,,,10050,,,10050,,\n1970,2,82,,100,4.54,1,0,3,100,1,19,6,60,2050,1,2,1,,,,45,46,45,,,,6,7,6,,,,60,70,60,,,,10050,4050,10050,,,\n1970,2,82,,100,4.54,1,0,4,100,2,13,2,23,9999999,1,2,1,,,,45,46,45,,,,6,7,6,,,,60,70,60,,,,10050,4050,10050,,,\n1970,2,83,,100,4.54,1,0,1,100,1,58,2,26,9650,1,,,2,,,58,,,59,,,2,,,4,,,26,,,40,,,9650,,,0,,\n1970,2,83,,100,4.54,1,0,2,100,2,59,4,40,0,1,,,1,,,58,,,58,,,2,,,2,,,26,,,26,,,9650,,,9650,,\n1970,2,83,,100,4.54,1,0,3,100,1,23,10,100,2050,1,2,1,,,,58,59,58,,,,2,4,2,,,,26,40,26,,,,9650,0,9650,,,\n1970,2,84,,100,4.54,1,0,1,100,1,68,2,26,5050,1,,,,,,68,,,,,,2,,,,,,26,,,,,,5050,,,,,\n1970,2,85,,100,4.54,1,0,1,100,1,40,2,22,8050,1,,,2,,,40,,,35,,,2,,,1,,,22,,,17,,,8050,,,0,,\n1970,2,85,,100,4.54,1,0,2,100,2,35,1,17,0,1,,,1,,,40,,,40,,,2,,,2,,,22,,,22,,,8050,,,8050,,\n1970,2,85,,100,4.54,1,0,3,100,2,1,0,1,9999999,1,2,1,,,,40,35,40,,,,2,1,2,,,,22,17,22,,,,8050,0,8050,,,\n1970,2,85,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,40,35,40,,,,2,1,2,,,,22,17,22,,,,8050,0,8050,,,\n1970,2,86,,100,4.54,1,0,1,100,1,56,2,23,7950,1,,,,,,56,,,,,,2,,,,,,23,,,,,,7950,,,,,\n1970,2,86,,100,4.54,1,0,2,100,2,82,2,26,1150,1,,1,,,,56,,56,,,,2,,2,,,,23,,23,,,,7950,,7950,,,\n1970,2,87,,100,4.54,1,0,1,100,1,28,6,60,12150,1,,,2,,,28,,,28,,,6,,,6,,,60,,,60,,,12150,,,0,,\n1970,2,87,,100,4.54,1,0,2,100,2,28,6,60,0,1,,,1,,,28,,,28,,,6,,,6,,,60,,,60,,,12150,,,12150,,\n1970,2,87,,100,4.54,1,0,3,100,1,1,0,1,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12150,0,12150,,,\n1970,2,88,,100,4.54,1,0,1,100,2,70,2,26,3550,2,,,,,,70,,,,,,2,,,,,,26,,,,,,3550,,,,,\n1970,2,89,,100,4.54,1,0,1,100,1,54,2,26,7150,1,,,2,,,54,,,55,,,2,,,4,,,26,,,40,,,7150,,,0,,\n1970,2,89,,100,4.54,1,0,2,100,2,55,4,40,0,1,,,1,,,54,,,54,,,2,,,2,,,26,,,26,,,7150,,,7150,,\n1970,2,90,,100,4.54,1,0,1,100,2,42,6,60,0,2,,,,,,42,,,,,,6,,,,,,60,,,,,,0,,,,,\n1970,2,90,,100,4.54,1,0,2,100,1,14,2,26,0,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,\n1970,2,90,,100,4.54,1,0,3,100,1,13,2,23,9999999,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,\n1970,2,90,,100,4.54,1,0,4,100,1,11,2,22,9999999,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,\n1970,2,90,,100,4.54,1,0,5,100,2,8,1,14,9999999,2,2,,,,,42,42,,,,,6,6,,,,,60,60,,,,,0,0,,,,\n1970,2,91,,100,4.54,1,0,1,100,2,36,6,60,3250,2,,,,,,36,,,,,,6,,,,,,60,,,,,,3250,,,,,\n1970,2,92,,100,4.54,1,0,1,100,1,42,4,40,6250,1,,,2,,,42,,,33,,,4,,,4,,,40,,,40,,,6250,,,750,,\n1970,2,92,,100,4.54,1,0,2,100,2,33,4,40,750,1,,,1,,,42,,,42,,,4,,,4,,,40,,,40,,,6250,,,6250,,\n1970,2,92,,100,4.54,1,0,3,100,2,13,2,25,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,\n1970,2,92,,100,4.54,1,0,4,100,1,12,1,17,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,\n1970,2,92,,100,4.54,1,0,5,100,1,10,1,15,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,\n1970,2,92,,100,4.54,1,0,6,100,2,6,1,12,9999999,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,\n1970,2,92,,100,4.54,1,0,7,100,2,95,2,23,1750,1,2,1,,,,42,33,42,,,,4,4,4,,,,40,40,40,,,,6250,750,6250,,,\n1970,2,93,,100,4.54,1,0,1,100,1,35,10,100,12050,1,,,2,,,35,,,31,,,10,,,6,,,100,,,60,,,12050,,,3250,,\n1970,2,93,,100,4.54,1,0,2,100,2,31,6,60,3250,1,,,1,,,35,,,35,,,10,,,10,,,100,,,100,,,12050,,,12050,,\n1970,2,93,,100,4.54,1,0,3,100,1,14,2,26,0,1,2,1,,,,35,31,35,,,,10,6,10,,,,100,60,100,,,,12050,3250,12050,,,\n1970,2,93,,100,4.54,1,0,4,100,2,12,2,23,9999999,1,2,1,,,,35,31,35,,,,10,6,10,,,,100,60,100,,,,12050,3250,12050,,,\n1970,2,93,,100,4.54,1,0,5,100,2,10,1,16,9999999,1,2,1,,,,35,31,35,,,,10,6,10,,,,100,60,100,,,,12050,3250,12050,,,\n1970,2,94,,100,4.54,1,0,1,100,2,72,9,90,6350,2,,,,,,72,,,,,,9,,,,,,90,,,,,,6350,,,,,\n1970,2,94,,100,4.54,1,0,2,100,2,32,11,111,8650,2,2,,,,,72,72,,,,,9,9,,,,,90,90,,,,,6350,6350,,,,\n1970,2,94,,100,4.54,1,0,3,100,2,42,6,60,7050,2,2,,,,,72,72,,,,,9,9,,,,,90,90,,,,,6350,6350,,,,\n1970,2,95,,100,4.54,1,0,1,100,1,50,5,50,16150,1,,,2,,,50,,,48,,,5,,,6,,,50,,,60,,,16150,,,50,,\n1970,2,95,,100,4.54,1,0,2,100,2,48,6,60,50,1,,,1,,,50,,,50,,,5,,,5,,,50,,,50,,,16150,,,16150,,\n1970,2,95,,100,4.54,1,0,3,100,2,15,3,30,0,1,2,1,,,,50,48,50,,,,5,6,5,,,,50,60,50,,,,16150,50,16150,,,\n1970,2,95,,100,4.54,1,0,4,100,1,13,2,23,9999999,1,2,1,,,,50,48,50,,,,5,6,5,,,,50,60,50,,,,16150,50,16150,,,\n1970,2,96,,100,4.54,1,0,1,100,1,21,4,40,12050,1,,,2,,,21,,,19,,,4,,,6,,,40,,,60,,,12050,,,12050,,\n1970,2,96,,100,4.54,1,0,2,100,2,19,6,60,12050,1,,,1,,,21,,,21,,,4,,,4,,,40,,,40,,,12050,,,12050,,\n1970,2,97,,100,4.54,1,0,1,100,1,66,4,40,7150,1,,,2,,,66,,,64,,,4,,,2,,,40,,,23,,,7150,,,550,,\n1970,2,97,,100,4.54,1,0,2,100,2,64,2,23,550,1,,,1,,,66,,,66,,,4,,,4,,,40,,,40,,,7150,,,7150,,\n1970,2,98,,100,4.54,1,0,1,100,1,56,6,60,11050,1,,,2,,,56,,,53,,,6,,,6,,,60,,,60,,,11050,,,0,,\n1970,2,98,,100,4.54,1,0,2,100,2,53,6,60,0,1,,,1,,,56,,,56,,,6,,,6,,,60,,,60,,,11050,,,11050,,\n1970,2,98,,100,4.54,1,0,3,100,1,29,7,70,5050,1,2,1,,,,56,53,56,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,\n1970,2,98,,100,4.54,1,0,4,100,1,18,5,50,0,1,2,1,,,,56,53,56,,,,6,6,6,,,,60,60,60,,,,11050,0,11050,,,\n1970,2,99,,100,4.54,1,0,1,100,1,51,8,80,12050,1,,,2,,,51,,,55,,,8,,,4,,,80,,,40,,,12050,,,0,,\n1970,2,99,,100,4.54,1,0,2,100,2,55,4,40,0,1,,,1,,,51,,,51,,,8,,,8,,,80,,,80,,,12050,,,12050,,\n1970,2,99,,100,4.54,1,0,3,100,2,11,2,22,9999999,1,2,1,,,,51,55,51,,,,8,4,8,,,,80,40,80,,,,12050,0,12050,,,\n1970,2,100,,100,4.54,1,0,1,100,2,56,4,40,6250,2,,,,,,56,,,,,,4,,,,,,40,,,,,,6250,,,,,\n1970,2,101,,100,4.54,1,0,1,100,1,42,9,90,21850,1,,,2,,,42,,,39,,,9,,,6,,,90,,,60,,,21850,,,650,,\n1970,2,101,,100,4.54,1,0,2,100,2,39,6,60,650,1,,,1,,,42,,,42,,,9,,,9,,,90,,,90,,,21850,,,21850,,\n1970,2,101,,100,4.54,1,0,3,100,1,18,5,50,650,1,2,1,,,,42,39,42,,,,9,6,9,,,,90,60,90,,,,21850,650,21850,,,\n1970,2,101,,100,4.54,1,0,4,100,2,11,2,22,9999999,1,2,1,,,,42,39,42,,,,9,6,9,,,,90,60,90,,,,21850,650,21850,,,\n1970,2,102,,100,4.54,1,0,1,100,1,49,6,60,19150,1,,,2,,,49,,,46,,,6,,,6,,,60,,,60,,,19150,,,0,,\n1970,2,102,,100,4.54,1,0,2,100,2,46,6,60,0,1,,,1,,,49,,,49,,,6,,,6,,,60,,,60,,,19150,,,19150,,\n1970,2,102,,100,4.54,1,0,3,100,2,9,1,16,9999999,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,\n1970,2,102,,100,4.54,1,0,4,100,1,6,1,12,9999999,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,\n1970,2,102,,100,4.54,1,0,5,100,1,17,5,50,2050,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,\n1970,2,102,,100,4.54,1,0,6,100,1,16,3,30,1450,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,\n1970,2,102,,100,4.54,1,0,7,100,1,16,3,30,1450,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,\n1970,2,102,,100,4.54,1,0,8,100,2,11,2,22,9999999,1,2,1,,,,49,46,49,,,,6,6,6,,,,60,60,60,,,,19150,0,19150,,,\n1970,2,103,,100,4.54,1,0,1,100,1,59,7,70,8850,1,,,,,,59,,,,,,7,,,,,,70,,,,,,8850,,,,,\n1970,2,103,,100,4.54,1,0,2,100,2,69,6,60,450,1,,,,,,59,,,,,,7,,,,,,70,,,,,,8850,,,,,\n1970,2,104,,100,4.54,1,0,1,100,1,59,5,50,6750,1,,,2,,,59,,,59,,,5,,,2,,,50,,,26,,,6750,,,4850,,\n1970,2,104,,100,4.54,1,0,2,100,2,59,2,26,4850,1,,,1,,,59,,,59,,,5,,,5,,,50,,,50,,,6750,,,6750,,\n1970,2,104,,100,4.54,1,0,3,100,1,20,6,60,1650,1,2,1,,,,59,59,59,,,,5,2,5,,,,50,26,50,,,,6750,4850,6750,,,\n1970,2,105,,100,4.54,1,0,1,100,2,55,7,70,9450,2,,,,,,55,,,,,,7,,,,,,70,,,,,,9450,,,,,\n1970,2,106,,100,4.54,1,0,1,100,1,63,2,23,7950,1,,,2,,,63,,,57,,,2,,,4,,,23,,,40,,,7950,,,4650,,\n1970,2,106,,100,4.54,1,0,2,100,2,57,4,40,4650,1,,,1,,,63,,,63,,,2,,,2,,,23,,,23,,,7950,,,7950,,\n1970,2,106,,100,4.54,1,0,3,100,1,21,8,80,1450,1,2,1,,,,63,57,63,,,,2,4,2,,,,23,40,23,,,,7950,4650,7950,,,\n1970,2,107,,100,4.54,1,0,1,100,1,40,3,30,18650,1,,,2,,,40,,,37,,,3,,,6,,,30,,,60,,,18650,,,0,,\n1970,2,107,,100,4.54,1,0,2,100,2,37,6,60,0,1,,,1,,,40,,,40,,,3,,,3,,,30,,,30,,,18650,,,18650,,\n1970,2,107,,100,4.54,1,0,3,100,1,14,2,26,0,1,2,1,,,,40,37,40,,,,3,6,3,,,,30,60,30,,,,18650,0,18650,,,\n1970,2,107,,100,4.54,1,0,4,100,1,8,1,15,9999999,1,2,1,,,,40,37,40,,,,3,6,3,,,,30,60,30,,,,18650,0,18650,,,\n1970,2,108,,100,4.54,1,0,1,100,1,36,6,65,4050,1,,,2,,,36,,,36,,,6,,,2,,,65,,,26,,,4050,,,1550,,\n1970,2,108,,100,4.54,1,0,2,100,2,36,2,26,1550,1,,,1,,,36,,,36,,,6,,,6,,,65,,,65,,,4050,,,4050,,\n1970,2,108,,100,4.54,1,0,3,100,2,17,5,50,950,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,\n1970,2,108,,100,4.54,1,0,4,100,2,14,2,26,0,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,\n1970,2,108,,100,4.54,1,0,5,100,1,10,1,17,9999999,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,\n1970,2,108,,100,4.54,1,0,6,100,2,8,1,15,9999999,1,2,1,,,,36,36,36,,,,6,2,6,,,,65,26,65,,,,4050,1550,4050,,,\n1970,2,109,,100,4.54,1,0,1,100,1,37,10,100,1550,1,,,,,,37,,,,,,10,,,,,,100,,,,,,1550,,,,,\n1970,2,109,,100,4.54,1,0,2,100,1,48,10,100,11050,1,,,,,,37,,,,,,10,,,,,,100,,,,,,1550,,,,,\n1970,2,110,,100,4.54,1,0,1,100,2,78,6,60,3950,2,,,,,,78,,,,,,6,,,,,,60,,,,,,3950,,,,,\n1970,2,111,,100,4.54,1,0,1,100,1,32,4,40,6050,1,,,,,,32,,,,,,4,,,,,,40,,,,,,6050,,,,,\n1970,2,112,,100,4.54,1,0,1,100,2,63,2,25,250,2,,,,,,63,,,,,,2,,,,,,25,,,,,,250,,,,,\n1970,2,113,,100,4.54,1,0,1,100,1,42,5,50,10050,1,,,2,,,42,,,34,,,5,,,6,,,50,,,60,,,10050,,,550,,\n1970,2,113,,100,4.54,1,0,2,100,2,34,6,60,550,1,,,1,,,42,,,42,,,5,,,5,,,50,,,50,,,10050,,,10050,,\n1970,2,113,,100,4.54,1,0,3,100,1,13,2,23,9999999,1,2,1,,,,42,34,42,,,,5,6,5,,,,50,60,50,,,,10050,550,10050,,,\n1970,2,113,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,42,34,42,,,,5,6,5,,,,50,60,50,,,,10050,550,10050,,,\n1970,2,113,,100,4.54,1,0,5,100,2,11,1,17,9999999,1,2,1,,,,42,34,42,,,,5,6,5,,,,50,60,50,,,,10050,550,10050,,,\n1970,2,114,,100,4.54,1,0,1,100,1,82,2,26,0,1,,,2,,,82,,,75,,,2,,,2,,,26,,,26,,,0,,,1450,,\n1970,2,114,,100,4.54,1,0,2,100,2,75,2,26,1450,1,,,1,,,82,,,82,,,2,,,2,,,26,,,26,,,0,,,0,,\n1970,2,114,,100,4.54,1,0,3,100,1,47,0,2,0,1,2,1,,,,82,75,82,,,,2,2,2,,,,26,26,26,,,,0,1450,0,,,\n1970,2,115,,100,4.54,1,0,1,100,2,70,2,23,1250,2,,,,,,70,,,,,,2,,,,,,23,,,,,,1250,,,,,\n1970,2,116,,100,4.54,1,0,1,100,1,65,3,30,3450,1,,,2,,,65,,,65,,,3,,,6,,,30,,,60,,,3450,,,350,,\n1970,2,116,,100,4.54,1,0,2,100,2,65,6,60,350,1,,,1,,,65,,,65,,,3,,,3,,,30,,,30,,,3450,,,3450,,\n1970,2,116,,100,4.54,1,0,3,100,1,42,7,70,6750,1,2,1,,,,65,65,65,,,,3,6,3,,,,30,60,30,,,,3450,350,3450,,,\n1970,2,117,,100,4.54,1,0,1,100,1,34,6,60,14050,1,,,2,,,34,,,31,,,6,,,7,,,60,,,70,,,14050,,,0,,\n1970,2,117,,100,4.54,1,0,2,100,2,31,7,70,0,1,,,1,,,34,,,34,,,6,,,6,,,60,,,60,,,14050,,,14050,,\n1970,2,117,,100,4.54,1,0,3,100,1,8,1,15,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,\n1970,2,117,,100,4.54,1,0,4,100,2,7,1,14,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,\n1970,2,117,,100,4.54,1,0,5,100,2,5,1,11,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,\n1970,2,117,,100,4.54,1,0,6,100,2,1,0,1,9999999,1,2,1,,,,34,31,34,,,,6,7,6,,,,60,70,60,,,,14050,0,14050,,,\n1970,2,118,,100,4.54,1,0,1,100,1,23,6,60,6050,1,,,2,,,23,,,20,,,6,,,7,,,60,,,70,,,6050,,,6050,,\n1970,2,118,,100,4.54,1,0,2,100,2,20,7,70,6050,1,,,1,,,23,,,23,,,6,,,6,,,60,,,60,,,6050,,,6050,,\n1970,2,119,,100,4.54,1,0,1,100,1,24,11,110,8650,1,,,2,,,24,,,25,,,11,,,6,,,110,,,60,,,8650,,,4050,,\n1970,2,119,,100,4.54,1,0,2,100,2,25,6,60,4050,1,,,1,,,24,,,24,,,11,,,11,,,110,,,110,,,8650,,,8650,,\n1970,2,119,,100,4.54,1,0,3,100,2,0,0,1,9999999,1,2,1,,,,24,25,24,,,,11,6,11,,,,110,60,110,,,,8650,4050,8650,,,\n1970,2,120,,100,4.54,1,0,1,100,1,35,2,26,10150,1,,,2,,,35,,,29,,,2,,,6,,,26,,,60,,,10150,,,2550,,\n1970,2,120,,100,4.54,1,0,2,100,2,29,6,60,2550,1,,,1,,,35,,,35,,,2,,,2,,,26,,,26,,,10150,,,10150,,\n1970,2,120,,100,4.54,1,0,3,100,2,2,0,1,9999999,1,2,1,,,,35,29,35,,,,2,6,2,,,,26,60,26,,,,10150,2550,10150,,,\n1970,2,120,,100,4.54,1,0,4,100,1,4,0,2,9999999,1,2,1,,,,35,29,35,,,,2,6,2,,,,26,60,26,,,,10150,2550,10150,,,\n1970,2,121,,100,4.54,1,0,1,100,1,45,4,40,12550,1,,,,,,45,,,,,,4,,,,,,40,,,,,,12550,,,,,\n1970,2,122,,100,4.54,1,0,1,100,1,64,6,60,5550,1,,,2,,,64,,,62,,,6,,,2,,,60,,,26,,,5550,,,0,,\n1970,2,122,,100,4.54,1,0,2,100,2,62,2,26,0,1,,,1,,,64,,,64,,,6,,,6,,,60,,,60,,,5550,,,5550,,\n1970,2,123,,100,4.54,1,0,1,100,1,26,1,12,1950,1,,,2,,,26,,,27,,,1,,,8,,,12,,,80,,,1950,,,550,,\n1970,2,123,,100,4.54,1,0,2,100,2,27,8,80,550,1,,,1,,,26,,,26,,,1,,,1,,,12,,,12,,,1950,,,1950,,\n1970,2,123,,100,4.54,1,0,3,100,1,4,0,2,9999999,1,2,1,,,,26,27,26,,,,1,8,1,,,,12,80,12,,,,1950,550,1950,,,\n1970,2,123,,100,4.54,1,0,4,100,1,4,0,2,9999999,1,2,1,,,,26,27,26,,,,1,8,1,,,,12,80,12,,,,1950,550,1950,,,\n1970,2,123,,100,4.54,1,0,5,100,2,2,0,1,9999999,1,2,1,,,,26,27,26,,,,1,8,1,,,,12,80,12,,,,1950,550,1950,,,\n1970,2,124,,100,4.54,1,0,1,100,1,44,2,23,10750,1,,,2,,,44,,,48,,,2,,,2,,,23,,,26,,,10750,,,0,,\n1970,2,124,,100,4.54,1,0,2,100,2,48,2,26,0,1,,,1,,,44,,,44,,,2,,,2,,,23,,,23,,,10750,,,10750,,\n1970,2,124,,100,4.54,1,0,3,100,2,10,1,17,9999999,1,2,1,,,,44,48,44,,,,2,2,2,,,,23,26,23,,,,10750,0,10750,,,\n1970,2,125,,100,4.54,1,0,1,100,1,55,2,26,11450,1,,,2,,,55,,,50,,,2,,,7,,,26,,,70,,,11450,,,7050,,\n1970,2,125,,100,4.54,1,0,2,100,2,50,7,70,7050,1,,,1,,,55,,,55,,,2,,,2,,,26,,,26,,,11450,,,11450,,\n1970,2,126,,100,4.54,1,0,1,100,1,53,2,26,11150,1,,,2,,,53,,,49,,,2,,,2,,,26,,,26,,,11150,,,0,,\n1970,2,126,,100,4.54,1,0,2,100,2,49,2,26,0,1,,,1,,,53,,,53,,,2,,,2,,,26,,,26,,,11150,,,11150,,\n1970,2,126,,100,4.54,1,0,3,100,1,21,5,50,750,1,2,1,,,,53,49,53,,,,2,2,2,,,,26,26,26,,,,11150,0,11150,,,\n1970,2,126,,100,4.54,1,0,4,100,2,15,3,30,0,1,2,1,,,,53,49,53,,,,2,2,2,,,,26,26,26,,,,11150,0,11150,,,\n1970,2,127,,100,4.54,1,0,1,100,1,40,8,80,11550,1,,,2,,,40,,,39,,,8,,,6,,,80,,,65,,,11550,,,2950,,\n1970,2,127,,100,4.54,1,0,2,100,2,39,6,65,2950,1,,,1,,,40,,,40,,,8,,,8,,,80,,,80,,,11550,,,11550,,\n1970,2,127,,100,4.54,1,0,3,100,2,17,5,50,250,1,2,1,,,,40,39,40,,,,8,6,8,,,,80,65,80,,,,11550,2950,11550,,,\n1970,2,127,,100,4.54,1,0,4,100,1,14,2,26,350,1,2,1,,,,40,39,40,,,,8,6,8,,,,80,65,80,,,,11550,2950,11550,,,\n1970,2,127,,100,4.54,1,0,5,100,2,8,1,15,9999999,1,2,1,,,,40,39,40,,,,8,6,8,,,,80,65,80,,,,11550,2950,11550,,,\n1970,2,128,,100,4.54,1,0,1,100,1,48,3,30,9050,1,,,2,,,48,,,44,,,3,,,3,,,30,,,30,,,9050,,,0,,\n1970,2,128,,100,4.54,1,0,2,100,2,44,3,30,0,1,,,1,,,48,,,48,,,3,,,3,,,30,,,30,,,9050,,,9050,,\n1970,2,128,,100,4.54,1,0,3,100,1,21,6,60,6850,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,\n1970,2,128,,100,4.54,1,0,4,100,2,16,3,30,0,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,\n1970,2,128,,100,4.54,1,0,5,100,2,15,3,30,0,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,\n1970,2,128,,100,4.54,1,0,6,100,2,11,2,22,9999999,1,2,1,,,,48,44,48,,,,3,3,3,,,,30,30,30,,,,9050,0,9050,,,\n1970,2,130,,100,4.54,1,0,1,100,1,61,2,22,10250,1,,,2,,,61,,,58,,,2,,,2,,,22,,,25,,,10250,,,3050,,\n1970,2,130,,100,4.54,1,0,2,100,2,58,2,25,3050,1,,,1,,,61,,,61,,,2,,,2,,,22,,,22,,,10250,,,10250,,\n1970,2,131,,100,4.54,1,0,1,100,1,59,4,40,6550,1,,,,,,59,,,,,,4,,,,,,40,,,,,,6550,,,,,\n1970,2,132,,100,4.54,1,0,1,100,1,44,2,26,8850,1,,,2,,,44,,,39,,,2,,,1,,,26,,,17,,,8850,,,0,,\n1970,2,132,,100,4.54,1,0,2,100,2,39,1,17,0,1,,,1,,,44,,,44,,,2,,,2,,,26,,,26,,,8850,,,8850,,\n1970,2,132,,100,4.54,1,0,3,100,1,15,4,40,0,1,2,1,,,,44,39,44,,,,2,1,2,,,,26,17,26,,,,8850,0,8850,,,\n1970,2,132,,100,4.54,1,0,4,100,2,10,1,17,9999999,1,2,1,,,,44,39,44,,,,2,1,2,,,,26,17,26,,,,8850,0,8850,,,\n1970,2,134,,100,4.54,1,0,1,100,1,22,1,16,4850,1,,,,,,22,,,,,,1,,,,,,16,,,,,,4850,,,,,\n1970,2,134,,100,4.54,1,0,2,100,1,20,2,23,4850,1,,,,,,22,,,,,,1,,,,,,16,,,,,,4850,,,,,\n1970,2,135,,100,4.54,1,0,1,100,1,72,2,25,3350,1,,,2,,,72,,,75,,,2,,,2,,,25,,,26,,,3350,,,0,,\n1970,2,135,,100,4.54,1,0,2,100,2,75,2,26,0,1,,,1,,,72,,,72,,,2,,,2,,,25,,,25,,,3350,,,3350,,\n1970,2,136,,100,4.54,3,0,1,100,1,14,2,23,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,137,,100,4.54,1,0,1,100,1,26,6,60,650,1,,,2,,,26,,,23,,,6,,,6,,,60,,,60,,,650,,,0,,\n1970,2,137,,100,4.54,1,0,2,100,2,23,6,60,0,1,,,1,,,26,,,26,,,6,,,6,,,60,,,60,,,650,,,650,,\n1970,2,137,,100,4.54,1,0,3,100,1,3,0,2,9999999,1,2,1,,,,26,23,26,,,,6,6,6,,,,60,60,60,,,,650,0,650,,,\n1970,2,137,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,26,23,26,,,,6,6,6,,,,60,60,60,,,,650,0,650,,,\n1970,2,137,,100,4.54,1,0,5,100,1,0,0,1,9999999,1,2,1,,,,26,23,26,,,,6,6,6,,,,60,60,60,,,,650,0,650,,,\n1970,2,138,,100,4.54,1,0,1,100,1,32,6,60,-850,1,,,,,,32,,,,,,6,,,,,,60,,,,,,-850,,,,,\n1970,2,138,,100,4.54,1,0,2,100,2,38,6,60,2650,1,,,,,,32,,,,,,6,,,,,,60,,,,,,-850,,,,,\n1970,2,140,,100,4.54,1,0,1,100,1,57,1,15,3050,1,,,2,,,57,,,52,,,1,,,2,,,15,,,25,,,3050,,,0,,\n1970,2,140,,100,4.54,1,0,2,100,2,52,2,25,0,1,,,1,,,57,,,57,,,1,,,1,,,15,,,15,,,3050,,,3050,,\n1970,2,140,,100,4.54,1,0,3,100,1,30,6,60,4050,1,2,1,,,,57,52,57,,,,1,2,1,,,,15,25,15,,,,3050,0,3050,,,\n1970,2,141,,100,4.54,1,0,1,100,2,75,2,23,5550,2,,,,,,75,,,,,,2,,,,,,23,,,,,,5550,,,,,\n1970,2,142,,100,4.54,1,0,1,100,1,76,2,22,750,1,,,,,,76,,,,,,2,,,,,,22,,,,,,750,,,,,\n1970,2,142,,100,4.54,1,0,2,100,2,72,2,26,250,1,,,,,,76,,,,,,2,,,,,,22,,,,,,750,,,,,\n1970,2,143,,100,4.54,1,0,1,100,1,58,6,60,7050,1,,,2,,,58,,,57,,,6,,,6,,,60,,,60,,,7050,,,6550,,\n1970,2,143,,100,4.54,1,0,2,100,2,57,6,60,6550,1,,,1,,,58,,,58,,,6,,,6,,,60,,,60,,,7050,,,7050,,\n1970,2,144,,100,4.54,1,0,1,100,1,30,11,110,15050,1,,,2,,,30,,,29,,,11,,,9,,,110,,,90,,,15050,,,0,,\n1970,2,144,,100,4.54,1,0,2,100,2,29,9,90,0,1,,,1,,,30,,,30,,,11,,,11,,,110,,,110,,,15050,,,15050,,\n1970,2,144,,100,4.54,1,0,3,100,2,3,0,2,9999999,1,2,1,,,,30,29,30,,,,11,9,11,,,,110,90,110,,,,15050,0,15050,,,\n1970,2,145,,100,4.54,1,0,1,100,1,30,2,26,5150,1,,,2,,,30,,,29,,,2,,,4,,,26,,,40,,,5150,,,3850,,\n1970,2,145,,100,4.54,1,0,2,100,2,29,4,40,3850,1,,,1,,,30,,,30,,,2,,,2,,,26,,,26,,,5150,,,5150,,\n1970,2,145,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,\n1970,2,145,,100,4.54,1,0,4,100,1,10,1,17,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,\n1970,2,145,,100,4.54,1,0,5,100,2,8,1,14,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,\n1970,2,145,,100,4.54,1,0,6,100,2,3,0,2,9999999,1,2,1,,,,30,29,30,,,,2,4,2,,,,26,40,26,,,,5150,3850,5150,,,\n1970,2,146,,100,4.54,1,0,1,100,2,70,6,60,1950,2,,,,,,70,,,,,,6,,,,,,60,,,,,,1950,,,,,\n1970,2,146,,100,4.54,1,0,2,100,1,37,2,26,7550,2,2,,,,,70,70,,,,,6,6,,,,,60,60,,,,,1950,1950,,,,\n1970,2,147,,100,4.54,1,0,1,100,1,34,9,90,13050,1,,,2,,,34,,,34,,,9,,,11,,,90,,,110,,,13050,,,0,,\n1970,2,147,,100,4.54,1,0,2,100,2,34,11,110,0,1,,,1,,,34,,,34,,,9,,,9,,,90,,,90,,,13050,,,13050,,\n1970,2,147,,100,4.54,1,0,3,100,2,9,1,16,9999999,1,2,1,,,,34,34,34,,,,9,11,9,,,,90,110,90,,,,13050,0,13050,,,\n1970,2,147,,100,4.54,1,0,4,100,1,7,1,14,9999999,1,2,1,,,,34,34,34,,,,9,11,9,,,,90,110,90,,,,13050,0,13050,,,\n1970,2,148,,100,4.54,1,0,1,100,2,79,6,60,1150,2,,,,,,79,,,,,,6,,,,,,60,,,,,,1150,,,,,\n1970,2,149,,100,4.54,1,0,1,100,1,70,2,26,11050,1,,,2,,,70,,,68,,,2,,,2,,,26,,,26,,,11050,,,0,,\n1970,2,149,,100,4.54,1,0,2,100,2,68,2,26,0,1,,,1,,,70,,,70,,,2,,,2,,,26,,,26,,,11050,,,11050,,\n1970,2,150,,100,4.54,1,0,1,100,1,35,6,60,15050,1,,,2,,,35,,,31,,,6,,,6,,,60,,,60,,,15050,,,1550,,\n1970,2,150,,100,4.54,1,0,2,100,2,31,6,60,1550,1,,,1,,,35,,,35,,,6,,,6,,,60,,,60,,,15050,,,15050,,\n1970,2,150,,100,4.54,1,0,3,100,1,8,1,14,9999999,1,2,1,,,,35,31,35,,,,6,6,6,,,,60,60,60,,,,15050,1550,15050,,,\n1970,2,150,,100,4.54,1,0,4,100,2,5,1,11,9999999,1,2,1,,,,35,31,35,,,,6,6,6,,,,60,60,60,,,,15050,1550,15050,,,\n1970,2,152,,100,4.54,1,0,1,100,1,58,6,60,12650,1,,,2,,,58,,,55,,,6,,,2,,,60,,,26,,,12650,,,0,,\n1970,2,152,,100,4.54,1,0,2,100,2,55,2,26,0,1,,,1,,,58,,,58,,,6,,,6,,,60,,,60,,,12650,,,12650,,\n1970,2,153,,100,4.54,1,0,1,100,2,49,4,40,4550,2,,,,,,49,,,,,,4,,,,,,40,,,,,,4550,,,,,\n1970,2,153,,100,4.54,1,0,2,100,1,39,5,50,6050,2,2,,,,,49,49,,,,,4,4,,,,,40,40,,,,,4550,4550,,,,\n1970,2,154,,100,4.54,3,0,1,100,1,73,2,26,4250,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,155,,100,4.54,1,0,1,100,1,88,4,40,1550,1,,,2,,,88,,,84,,,4,,,2,,,40,,,26,,,1550,,,850,,\n1970,2,155,,100,4.54,1,0,2,100,2,84,2,26,850,1,,,1,,,88,,,88,,,4,,,4,,,40,,,40,,,1550,,,1550,,\n1970,2,156,,100,4.54,1,0,1,100,1,25,5,50,10050,1,,,2,,,25,,,24,,,5,,,6,,,50,,,60,,,10050,,,750,,\n1970,2,156,,100,4.54,1,0,2,100,2,24,6,60,750,1,,,1,,,25,,,25,,,5,,,5,,,50,,,50,,,10050,,,10050,,\n1970,2,156,,100,4.54,1,0,3,100,2,6,1,12,9999999,1,2,1,,,,25,24,25,,,,5,6,5,,,,50,60,50,,,,10050,750,10050,,,\n1970,2,156,,100,4.54,1,0,4,100,1,3,0,2,9999999,1,2,1,,,,25,24,25,,,,5,6,5,,,,50,60,50,,,,10050,750,10050,,,\n1970,2,157,,100,4.54,1,0,1,100,2,56,0,2,3450,2,,,,,,56,,,,,,0,,,,,,2,,,,,,3450,,,,,\n1970,2,158,,100,4.54,3,0,1,100,2,79,6,60,2250,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,159,,100,4.54,1,0,1,100,1,25,8,80,1850,1,,,2,,,25,,,36,,,8,,,6,,,80,,,60,,,1850,,,3250,,\n1970,2,159,,100,4.54,1,0,2,100,2,36,6,60,3250,1,,,1,,,25,,,25,,,8,,,8,,,80,,,80,,,1850,,,1850,,\n1970,2,160,,100,4.54,1,0,1,100,2,28,6,60,3550,2,,,,,,28,,,,,,6,,,,,,60,,,,,,3550,,,,,\n1970,2,160,,100,4.54,1,0,2,100,1,7,1,12,9999999,2,2,,,,,28,28,,,,,6,6,,,,,60,60,,,,,3550,3550,,,,\n1970,2,161,,100,4.54,1,0,1,100,2,21,8,80,1350,2,,,,,,21,,,,,,8,,,,,,80,,,,,,1350,,,,,\n1970,2,161,,100,4.54,1,0,2,100,2,23,10,100,2750,2,,,,,,21,,,,,,8,,,,,,80,,,,,,1350,,,,,\n1970,2,162,,100,4.54,1,0,1,100,1,22,10,100,2050,1,,,2,,,22,,,22,,,10,,,10,,,100,,,100,,,2050,,,1250,,\n1970,2,162,,100,4.54,1,0,2,100,2,22,10,100,1250,1,,,1,,,22,,,22,,,10,,,10,,,100,,,100,,,2050,,,2050,,\n1970,2,163,,100,4.54,1,0,1,100,2,60,6,60,4350,2,,,,,,60,,,,,,6,,,,,,60,,,,,,4350,,,,,\n1970,2,164,,100,4.54,1,0,1,100,1,20,8,80,2250,1,,,,,,20,,,,,,8,,,,,,80,,,,,,2250,,,,,\n1970,2,165,,100,4.54,1,0,1,100,1,78,2,23,3250,1,,,,,,78,,,,,,2,,,,,,23,,,,,,3250,,,,,\n1970,2,166,,100,4.54,1,0,1,100,1,24,10,100,9050,1,,,2,,,24,,,24,,,10,,,8,,,100,,,80,,,9050,,,3250,,\n1970,2,166,,100,4.54,1,0,2,100,2,24,8,80,3250,1,,,1,,,24,,,24,,,10,,,10,,,100,,,100,,,9050,,,9050,,\n1970,2,166,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,24,24,24,,,,10,8,10,,,,100,80,100,,,,9050,3250,9050,,,\n1970,2,167,,100,4.54,1,0,1,100,1,34,10,100,17050,1,,,2,,,34,,,33,,,10,,,10,,,100,,,100,,,17050,,,0,,\n1970,2,167,,100,4.54,1,0,2,100,2,33,10,100,0,1,,,1,,,34,,,34,,,10,,,10,,,100,,,100,,,17050,,,17050,,\n1970,2,167,,100,4.54,1,0,3,100,1,9,1,16,9999999,1,2,1,,,,34,33,34,,,,10,10,10,,,,100,100,100,,,,17050,0,17050,,,\n1970,2,167,,100,4.54,1,0,4,100,2,7,1,12,9999999,1,2,1,,,,34,33,34,,,,10,10,10,,,,100,100,100,,,,17050,0,17050,,,\n1970,2,168,,100,4.54,1,0,1,100,1,27,6,60,16650,1,,,2,,,27,,,26,,,6,,,6,,,60,,,60,,,16650,,,0,,\n1970,2,168,,100,4.54,1,0,2,100,2,26,6,60,0,1,,,1,,,27,,,27,,,6,,,6,,,60,,,60,,,16650,,,16650,,\n1970,2,168,,100,4.54,1,0,3,100,2,5,1,11,9999999,1,2,1,,,,27,26,27,,,,6,6,6,,,,60,60,60,,,,16650,0,16650,,,\n1970,2,168,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,27,26,27,,,,6,6,6,,,,60,60,60,,,,16650,0,16650,,,\n1970,2,168,,100,4.54,1,0,5,100,2,1,0,1,9999999,1,2,1,,,,27,26,27,,,,6,6,6,,,,60,60,60,,,,16650,0,16650,,,\n1970,2,169,,100,4.54,1,0,1,100,1,28,6,65,11250,1,,,2,,,28,,,23,,,6,,,7,,,65,,,70,,,11250,,,6550,,\n1970,2,169,,100,4.54,1,0,2,100,2,23,7,70,6550,1,,,1,,,28,,,28,,,6,,,6,,,65,,,65,,,11250,,,11250,,\n1970,2,170,,100,4.54,1,0,1,100,1,45,6,60,12950,1,,,2,,,45,,,46,,,6,,,4,,,60,,,40,,,12950,,,0,,\n1970,2,170,,100,4.54,1,0,2,100,2,46,4,40,0,1,,,1,,,45,,,45,,,6,,,6,,,60,,,60,,,12950,,,12950,,\n1970,2,170,,100,4.54,1,0,3,100,1,18,6,65,1650,1,2,1,,,,45,46,45,,,,6,4,6,,,,60,40,60,,,,12950,0,12950,,,\n1970,2,170,,100,4.54,1,0,4,100,2,23,6,60,5550,1,2,1,,,,45,46,45,,,,6,4,6,,,,60,40,60,,,,12950,0,12950,,,\n1970,2,170,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,,,,,45,23,,,,,6,6,,,,,60,60,,,,,12950,5550,,,,\n1970,2,171,,100,4.54,1,0,1,100,1,26,6,60,11450,1,,,2,,,26,,,27,,,6,,,6,,,60,,,60,,,11450,,,0,,\n1970,2,171,,100,4.54,1,0,2,100,2,27,6,60,0,1,,,1,,,26,,,26,,,6,,,6,,,60,,,60,,,11450,,,11450,,\n1970,2,171,,100,4.54,1,0,3,100,1,4,0,2,9999999,1,2,1,,,,26,27,26,,,,6,6,6,,,,60,60,60,,,,11450,0,11450,,,\n1970,2,171,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,26,27,26,,,,6,6,6,,,,60,60,60,,,,11450,0,11450,,,\n1970,2,171,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,1,,,,26,27,26,,,,6,6,6,,,,60,60,60,,,,11450,0,11450,,,\n1970,2,172,,100,4.54,1,0,1,100,1,33,6,60,1450,1,,,2,,,33,,,37,,,6,,,2,,,60,,,26,,,1450,,,3450,,\n1970,2,172,,100,4.54,1,0,2,100,2,37,2,26,3450,1,,,1,,,33,,,33,,,6,,,6,,,60,,,60,,,1450,,,1450,,\n1970,2,172,,100,4.54,1,0,3,100,2,6,1,12,9999999,1,2,1,,,,33,37,33,,,,6,2,6,,,,60,26,60,,,,1450,3450,1450,,,\n1970,2,172,,100,4.54,1,0,4,100,1,5,1,11,9999999,1,2,1,,,,33,37,33,,,,6,2,6,,,,60,26,60,,,,1450,3450,1450,,,\n1970,2,174,,100,4.54,1,0,1,100,1,60,4,40,13850,1,,,2,,,60,,,48,,,4,,,6,,,40,,,60,,,13850,,,0,,\n1970,2,174,,100,4.54,1,0,2,100,2,48,6,60,0,1,,,1,,,60,,,60,,,4,,,4,,,40,,,40,,,13850,,,13850,,\n1970,2,175,,100,4.54,1,0,1,100,1,37,1,15,3550,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,\n1970,2,175,,100,4.54,1,0,2,100,1,29,1,17,1550,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,\n1970,2,175,,100,4.54,1,0,3,100,1,28,1,17,2050,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,\n1970,2,175,,100,4.54,1,0,4,100,1,29,2,23,1350,1,,,,,,37,,,,,,1,,,,,,15,,,,,,3550,,,,,\n1970,2,176,,100,4.54,4,0,1,100,1,34,11,111,11550,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,177,,100,4.54,1,0,1,100,2,75,8,80,50000,2,,,,,,75,,,,,,8,,,,,,80,,,,,,50000,,,,,\n1970,2,177,,100,4.54,1,0,2,100,2,44,6,60,0,2,2,,,,,75,75,,,,,8,8,,,,,80,80,,,,,50000,50000,,,,\n1970,2,178,,100,4.54,1,0,1,100,2,28,8,80,6050,2,,,,,,28,,,,,,8,,,,,,80,,,,,,6050,,,,,\n1970,2,178,,100,4.54,1,0,2,100,1,5,1,11,9999999,2,2,,,,,28,28,,,,,8,8,,,,,80,80,,,,,6050,6050,,,,\n1970,2,179,,100,4.54,1,0,1,100,1,35,11,111,19150,1,,,2,,,35,,,30,,,11,,,11,,,111,,,110,,,19150,,,4050,,\n1970,2,179,,100,4.54,1,0,2,100,2,30,11,110,4050,1,,,1,,,35,,,35,,,11,,,11,,,111,,,111,,,19150,,,19150,,\n1970,2,179,,100,4.54,1,0,3,100,1,4,1,11,9999999,1,2,1,,,,35,30,35,,,,11,11,11,,,,111,110,111,,,,19150,4050,19150,,,\n1970,2,180,,100,4.54,1,0,1,100,1,28,10,100,2350,1,,,2,,,28,,,26,,,10,,,11,,,100,,,110,,,2350,,,7250,,\n1970,2,180,,100,4.54,1,0,2,100,2,26,11,110,7250,1,,,1,,,28,,,28,,,10,,,10,,,100,,,100,,,2350,,,2350,,\n1970,2,182,,100,4.54,1,0,1,100,1,73,2,25,1050,1,,,,,,73,,,,,,2,,,,,,25,,,,,,1050,,,,,\n1970,2,183,,100,4.54,1,0,1,100,1,27,6,60,15050,1,,,2,,,27,,,27,,,6,,,6,,,60,,,60,,,15050,,,0,,\n1970,2,183,,100,4.54,1,0,2,100,2,27,6,60,0,1,,,1,,,27,,,27,,,6,,,6,,,60,,,60,,,15050,,,15050,,\n1970,2,183,,100,4.54,1,0,3,100,2,4,0,2,9999999,1,2,1,,,,27,27,27,,,,6,6,6,,,,60,60,60,,,,15050,0,15050,,,\n1970,2,183,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,27,27,27,,,,6,6,6,,,,60,60,60,,,,15050,0,15050,,,\n1970,2,184,,100,4.54,1,0,1,100,2,49,2,25,2550,2,,,,,,49,,,,,,2,,,,,,25,,,,,,2550,,,,,\n1970,2,184,,100,4.54,1,0,2,100,2,48,2,25,0,2,,,,,,49,,,,,,2,,,,,,25,,,,,,2550,,,,,\n1970,2,185,,100,4.54,1,0,1,100,1,29,6,60,8250,1,,,2,,,29,,,22,,,6,,,6,,,60,,,60,,,8250,,,5050,,\n1970,2,185,,100,4.54,1,0,2,100,2,22,6,60,5050,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,8250,,,8250,,\n1970,2,187,,100,4.54,1,0,1,100,1,51,8,80,10050,1,,,2,,,51,,,44,,,8,,,6,,,80,,,60,,,10050,,,1450,,\n1970,2,187,,100,4.54,1,0,2,100,2,44,6,60,1450,1,,,1,,,51,,,51,,,8,,,8,,,80,,,80,,,10050,,,10050,,\n1970,2,187,,100,4.54,1,0,3,100,2,16,4,40,0,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,\n1970,2,187,,100,4.54,1,0,4,100,2,10,1,17,9999999,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,\n1970,2,187,,100,4.54,1,0,5,100,2,9,1,15,9999999,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,\n1970,2,187,,100,4.54,1,0,6,100,2,19,6,60,0,1,2,1,,,,51,44,51,,,,8,6,8,,,,80,60,80,,,,10050,1450,10050,,,\n1970,2,187,,100,4.54,1,0,7,100,2,0,0,1,9999999,1,2,,,,,51,19,,,,,8,6,,,,,80,60,,,,,10050,0,,,,\n1970,2,188,,100,4.54,1,0,1,100,1,55,6,60,11250,1,,,2,,,55,,,50,,,6,,,2,,,60,,,26,,,11250,,,650,,\n1970,2,188,,100,4.54,1,0,2,100,2,50,2,26,650,1,,,1,,,55,,,55,,,6,,,6,,,60,,,60,,,11250,,,11250,,\n1970,2,188,,100,4.54,1,0,3,100,2,10,2,22,9999999,1,2,1,,,,55,50,55,,,,6,2,6,,,,60,26,60,,,,11250,650,11250,,,\n1970,2,189,,100,4.54,1,0,1,100,1,51,6,60,8050,1,,,2,,,51,,,51,,,6,,,4,,,60,,,40,,,8050,,,0,,\n1970,2,189,,100,4.54,1,0,2,100,2,51,4,40,0,1,,,1,,,51,,,51,,,6,,,6,,,60,,,60,,,8050,,,8050,,\n1970,2,189,,100,4.54,1,0,3,100,2,22,8,80,0,1,2,1,,,,51,51,51,,,,6,4,6,,,,60,40,60,,,,8050,0,8050,,,\n1970,2,189,,100,4.54,1,0,4,100,1,17,5,50,1250,1,2,1,,,,51,51,51,,,,6,4,6,,,,60,40,60,,,,8050,0,8050,,,\n1970,2,190,,100,4.54,1,0,1,100,1,43,11,111,21150,1,,,2,,,43,,,39,,,11,,,6,,,111,,,60,,,21150,,,0,,\n1970,2,190,,100,4.54,1,0,2,100,2,39,6,60,0,1,,,1,,,43,,,43,,,11,,,11,,,111,,,111,,,21150,,,21150,,\n1970,2,190,,100,4.54,1,0,3,100,1,15,2,26,0,1,2,1,,,,43,39,43,,,,11,6,11,,,,111,60,111,,,,21150,0,21150,,,\n1970,2,190,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,43,39,43,,,,11,6,11,,,,111,60,111,,,,21150,0,21150,,,\n1970,2,190,,100,4.54,1,0,5,100,1,9,1,16,9999999,1,2,1,,,,43,39,43,,,,11,6,11,,,,111,60,111,,,,21150,0,21150,,,\n1970,2,191,,100,4.54,1,0,1,100,1,54,3,30,7550,1,,,2,,,54,,,50,,,3,,,2,,,30,,,26,,,7550,,,5050,,\n1970,2,191,,100,4.54,1,0,2,100,2,50,2,26,5050,1,,,1,,,54,,,54,,,3,,,3,,,30,,,30,,,7550,,,7550,,\n1970,2,192,,100,4.54,1,0,1,100,2,62,2,26,3550,2,,,,,,62,,,,,,2,,,,,,26,,,,,,3550,,,,,\n1970,2,193,,100,4.54,1,0,1,100,1,26,4,40,11050,1,,,2,,,26,,,21,,,4,,,6,,,40,,,60,,,11050,,,0,,\n1970,2,193,,100,4.54,1,0,2,100,2,21,6,60,0,1,,,1,,,26,,,26,,,4,,,4,,,40,,,40,,,11050,,,11050,,\n1970,2,193,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,26,21,26,,,,4,6,4,,,,40,60,40,,,,11050,0,11050,,,\n1970,2,194,,100,4.54,1,0,1,100,1,28,1,16,2550,1,,,2,,,28,,,23,,,1,,,0,,,16,,,2,,,2550,,,1550,,\n1970,2,194,,100,4.54,1,0,2,100,2,23,0,2,1550,1,,,1,,,28,,,28,,,1,,,1,,,16,,,16,,,2550,,,2550,,\n1970,2,194,,100,4.54,1,0,3,100,1,4,0,2,9999999,1,2,1,,,,28,23,28,,,,1,0,1,,,,16,2,16,,,,2550,1550,2550,,,\n1970,2,194,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,28,23,28,,,,1,0,1,,,,16,2,16,,,,2550,1550,2550,,,\n1970,2,195,,100,4.54,1,0,1,100,1,48,6,60,9450,1,,,2,,,48,,,45,,,6,,,4,,,60,,,40,,,9450,,,3650,,\n1970,2,195,,100,4.54,1,0,2,100,2,45,4,40,3650,1,,,1,,,48,,,48,,,6,,,6,,,60,,,60,,,9450,,,9450,,\n1970,2,196,,100,4.54,1,0,1,100,1,43,3,30,18050,1,,,2,,,43,,,37,,,3,,,6,,,30,,,60,,,18050,,,2550,,\n1970,2,196,,100,4.54,1,0,2,100,2,37,6,60,2550,1,,,1,,,43,,,43,,,3,,,3,,,30,,,30,,,18050,,,18050,,\n1970,2,196,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,43,37,43,,,,3,6,3,,,,30,60,30,,,,18050,2550,18050,,,\n1970,2,196,,100,4.54,1,0,4,100,2,8,1,14,9999999,1,2,1,,,,43,37,43,,,,3,6,3,,,,30,60,30,,,,18050,2550,18050,,,\n1970,2,196,,100,4.54,1,0,5,100,2,14,2,26,0,1,2,1,,,,43,37,43,,,,3,6,3,,,,30,60,30,,,,18050,2550,18050,,,\n1970,2,197,,100,4.54,1,0,1,100,1,56,2,23,7650,1,,,,,,56,,,,,,2,,,,,,23,,,,,,7650,,,,,\n1970,2,198,,100,4.54,1,0,1,100,1,62,2,22,12050,1,,,2,,,62,,,59,,,2,,,7,,,22,,,70,,,12050,,,2650,,\n1970,2,198,,100,4.54,1,0,2,100,2,59,7,70,2650,1,,,1,,,62,,,62,,,2,,,2,,,22,,,22,,,12050,,,12050,,\n1970,2,198,,100,4.54,1,0,3,100,2,22,7,70,4350,1,2,1,,,,62,59,62,,,,2,7,2,,,,22,70,22,,,,12050,2650,12050,,,\n1970,2,198,,100,4.54,1,0,4,100,2,20,1,14,1050,1,2,1,,,,62,59,62,,,,2,7,2,,,,22,70,22,,,,12050,2650,12050,,,\n1970,2,199,,100,4.54,1,0,1,100,1,50,2,26,12050,1,,,2,,,50,,,51,,,2,,,2,,,26,,,25,,,12050,,,0,,\n1970,2,199,,100,4.54,1,0,2,100,2,51,2,25,0,1,,,1,,,50,,,50,,,2,,,2,,,26,,,26,,,12050,,,12050,,\n1970,2,199,,100,4.54,1,0,3,100,2,18,5,50,750,1,2,1,,,,50,51,50,,,,2,2,2,,,,26,25,26,,,,12050,0,12050,,,\n1970,2,200,,100,4.54,1,0,1,100,1,63,4,40,12350,1,,,2,,,63,,,59,,,4,,,6,,,40,,,60,,,12350,,,0,,\n1970,2,200,,100,4.54,1,0,2,100,2,59,6,60,0,1,,,1,,,63,,,63,,,4,,,4,,,40,,,40,,,12350,,,12350,,\n1970,2,201,,100,4.54,1,0,1,100,1,67,4,40,6350,1,,,2,,,67,,,61,,,4,,,2,,,40,,,26,,,6350,,,0,,\n1970,2,201,,100,4.54,1,0,2,100,2,61,2,26,0,1,,,1,,,67,,,67,,,4,,,4,,,40,,,40,,,6350,,,6350,,\n1970,2,202,,100,4.54,1,0,1,100,1,51,6,60,10050,1,,,2,,,51,,,45,,,6,,,6,,,60,,,60,,,10050,,,0,,\n1970,2,202,,100,4.54,1,0,2,100,2,45,6,60,0,1,,,1,,,51,,,51,,,6,,,6,,,60,,,60,,,10050,,,10050,,\n1970,2,202,,100,4.54,1,0,3,100,2,17,4,40,150,1,2,1,,,,51,45,51,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,\n1970,2,203,,100,4.54,1,0,1,100,1,42,6,60,13750,1,,,2,,,42,,,43,,,6,,,3,,,60,,,30,,,13750,,,2750,,\n1970,2,203,,100,4.54,1,0,2,100,2,43,3,30,2750,1,,,1,,,42,,,42,,,6,,,6,,,60,,,60,,,13750,,,13750,,\n1970,2,203,,100,4.54,1,0,3,100,1,19,5,50,1550,1,2,1,,,,42,43,42,,,,6,3,6,,,,60,30,60,,,,13750,2750,13750,,,\n1970,2,203,,100,4.54,1,0,4,100,1,15,3,30,50,1,2,1,,,,42,43,42,,,,6,3,6,,,,60,30,60,,,,13750,2750,13750,,,\n1970,2,203,,100,4.54,1,0,5,100,2,11,1,17,9999999,1,2,1,,,,42,43,42,,,,6,3,6,,,,60,30,60,,,,13750,2750,13750,,,\n1970,2,204,,100,4.54,1,0,1,100,1,46,2,23,5250,1,,,2,,,46,,,42,,,2,,,1,,,23,,,17,,,5250,,,0,,\n1970,2,204,,100,4.54,1,0,2,100,2,42,1,17,0,1,,,1,,,46,,,46,,,2,,,2,,,23,,,23,,,5250,,,5250,,\n1970,2,205,,100,4.54,1,0,1,100,2,47,2,26,3550,2,,,,,,47,,,,,,2,,,,,,26,,,,,,3550,,,,,\n1970,2,205,,100,4.54,1,0,2,100,1,8,1,14,9999999,2,2,,,,,47,28,,,,,2,5,,,,,26,50,,,,,3550,6050,,,,\n1970,2,205,,100,4.54,1,0,3,100,2,28,5,50,6050,2,2,,,,,47,47,,,,,2,2,,,,,26,26,,,,,3550,3550,,,,\n1970,2,205,,100,4.54,1,0,4,100,1,1,0,1,9999999,2,2,,,,,47,28,,,,,2,5,,,,,26,50,,,,,3550,6050,,,,\n1970,2,206,,100,4.54,1,0,1,100,1,29,2,23,7250,1,,,2,,,29,,,24,,,2,,,2,,,23,,,26,,,7250,,,0,,\n1970,2,206,,100,4.54,1,0,2,100,2,24,2,26,0,1,,,1,,,29,,,29,,,2,,,2,,,23,,,23,,,7250,,,7250,,\n1970,2,206,,100,4.54,1,0,3,100,1,2,0,1,9999999,1,2,1,,,,29,24,29,,,,2,2,2,,,,23,26,23,,,,7250,0,7250,,,\n1970,2,206,,100,4.54,1,0,4,100,1,0,0,1,9999999,1,2,1,,,,29,24,29,,,,2,2,2,,,,23,26,23,,,,7250,0,7250,,,\n1970,2,207,,100,4.54,1,0,1,100,2,49,6,60,7450,2,,,,,,49,,,,,,6,,,,,,60,,,,,,7450,,,,,\n1970,2,208,,100,4.54,1,0,1,100,1,35,4,40,7250,1,,,2,,,35,,,29,,,4,,,2,,,40,,,22,,,7250,,,0,,\n1970,2,208,,100,4.54,1,0,2,100,2,29,2,22,0,1,,,1,,,35,,,35,,,4,,,4,,,40,,,40,,,7250,,,7250,,\n1970,2,208,,100,4.54,1,0,3,100,2,7,1,16,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,\n1970,2,208,,100,4.54,1,0,4,100,1,8,1,14,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,\n1970,2,208,,100,4.54,1,0,5,100,1,5,0,2,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,\n1970,2,208,,100,4.54,1,0,6,100,1,3,0,2,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,\n1970,2,208,,100,4.54,1,0,7,100,1,0,0,1,9999999,1,2,1,,,,35,29,35,,,,4,2,4,,,,40,22,40,,,,7250,0,7250,,,\n1970,2,209,,100,4.54,1,0,1,100,1,29,4,40,7050,1,,,2,,,29,,,40,,,4,,,0,,,40,,,2,,,7050,,,0,,\n1970,2,209,,100,4.54,1,0,2,100,2,40,0,2,0,1,,,1,,,29,,,29,,,4,,,4,,,40,,,40,,,7050,,,7050,,\n1970,2,209,,100,4.54,1,0,3,100,1,7,1,14,9999999,1,2,1,,,,29,40,29,,,,4,0,4,,,,40,2,40,,,,7050,0,7050,,,\n1970,2,210,,100,4.54,1,0,1,100,1,77,1,15,1750,1,,,2,,,77,,,64,,,1,,,2,,,15,,,26,,,1750,,,1050,,\n1970,2,210,,100,4.54,1,0,2,100,2,64,2,26,1050,1,,,1,,,77,,,77,,,1,,,1,,,15,,,15,,,1750,,,1750,,\n1970,2,210,,100,4.54,1,0,3,100,1,26,6,65,5350,1,,,,,,77,,,,,,1,,,,,,15,,,,,,1750,,,,,\n1970,2,211,,100,4.54,1,0,1,100,1,92,0,2,2250,1,,,2,,,92,,,76,,,0,,,1,,,2,,,16,,,2250,,,1150,,\n1970,2,211,,100,4.54,1,0,2,100,2,76,1,16,1150,1,,,1,,,92,,,92,,,0,,,0,,,2,,,2,,,2250,,,2250,,\n1970,2,212,,100,4.54,1,0,1,100,1,41,6,60,7350,1,,,2,,,41,,,38,,,6,,,4,,,60,,,40,,,7350,,,0,,\n1970,2,212,,100,4.54,1,0,2,100,2,38,4,40,0,1,,,1,,,41,,,41,,,6,,,6,,,60,,,60,,,7350,,,7350,,\n1970,2,212,,100,4.54,1,0,3,100,1,14,2,26,650,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,\n1970,2,212,,100,4.54,1,0,4,100,2,18,6,60,0,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,\n1970,2,212,,100,4.54,1,0,5,100,1,17,4,40,0,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,\n1970,2,212,,100,4.54,1,0,6,100,2,12,2,23,9999999,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,\n1970,2,212,,100,4.54,1,0,7,100,1,10,1,16,9999999,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,\n1970,2,212,,100,4.54,1,0,8,100,2,15,2,25,2150,1,2,1,,,,41,38,41,,,,6,4,6,,,,60,40,60,,,,7350,0,7350,,,\n1970,2,213,,100,4.54,1,0,1,100,2,55,2,22,1650,2,,,,,,55,,,,,,2,,,,,,22,,,,,,1650,,,,,\n1970,2,213,,100,4.54,1,0,2,100,1,23,6,60,0,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,\n1970,2,213,,100,4.54,1,0,3,100,1,21,6,65,0,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,\n1970,2,213,,100,4.54,1,0,4,100,1,16,4,40,0,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,\n1970,2,213,,100,4.54,1,0,5,100,1,13,2,26,9999999,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,\n1970,2,213,,100,4.54,1,0,6,100,1,12,2,23,9999999,2,2,,,,,55,55,,,,,2,2,,,,,22,22,,,,,1650,1650,,,,\n1970,2,214,,100,4.54,1,0,1,100,1,37,2,25,9550,1,,,2,,,37,,,47,,,2,,,6,,,25,,,60,,,9550,,,10650,,\n1970,2,214,,100,4.54,1,0,2,100,2,47,6,60,10650,1,,,1,,,37,,,37,,,2,,,2,,,25,,,25,,,9550,,,9550,,\n1970,2,214,,100,4.54,1,0,3,100,1,19,6,65,1950,1,2,1,,,,37,47,37,,,,2,6,2,,,,25,60,25,,,,9550,10650,9550,,,\n1970,2,215,,100,4.54,1,0,1,100,1,52,6,60,15350,1,,,2,,,52,,,51,,,6,,,6,,,60,,,60,,,15350,,,0,,\n1970,2,215,,100,4.54,1,0,2,100,2,51,6,60,0,1,,,1,,,52,,,52,,,6,,,6,,,60,,,60,,,15350,,,15350,,\n1970,2,215,,100,4.54,1,0,3,100,1,26,6,60,5750,1,2,1,,,,52,51,52,,,,6,6,6,,,,60,60,60,,,,15350,0,15350,,,\n1970,2,216,,100,4.54,1,0,1,100,1,50,10,100,6450,1,,,2,,,50,,,38,,,10,,,6,,,100,,,60,,,6450,,,2450,,\n1970,2,216,,100,4.54,1,0,2,100,2,38,6,60,2450,1,,,1,,,50,,,50,,,10,,,10,,,100,,,100,,,6450,,,6450,,\n1970,2,216,,100,4.54,1,0,3,100,1,17,4,40,650,1,2,1,,,,50,38,50,,,,10,6,10,,,,100,60,100,,,,6450,2450,6450,,,\n1970,2,216,,100,4.54,1,0,4,100,1,16,4,40,950,1,2,1,,,,50,38,50,,,,10,6,10,,,,100,60,100,,,,6450,2450,6450,,,\n1970,2,216,,100,4.54,1,0,5,100,1,14,2,26,150,1,2,1,,,,50,38,50,,,,10,6,10,,,,100,60,100,,,,6450,2450,6450,,,\n1970,2,217,,100,4.54,1,0,1,100,1,39,6,60,11050,1,,,2,,,39,,,41,,,6,,,6,,,60,,,60,,,11050,,,1250,,\n1970,2,217,,100,4.54,1,0,2,100,2,41,6,60,1250,1,,,1,,,39,,,39,,,6,,,6,,,60,,,60,,,11050,,,11050,,\n1970,2,217,,100,4.54,1,0,3,100,1,17,4,40,0,1,2,1,,,,39,41,39,,,,6,6,6,,,,60,60,60,,,,11050,1250,11050,,,\n1970,2,218,,100,4.54,1,0,1,100,1,29,6,60,17750,1,,,2,,,29,,,29,,,6,,,6,,,60,,,60,,,17750,,,0,,\n1970,2,218,,100,4.54,1,0,2,100,2,29,6,60,0,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,17750,,,17750,,\n1970,2,218,,100,4.54,1,0,3,100,1,6,1,12,9999999,1,2,1,,,,29,29,29,,,,6,6,6,,,,60,60,60,,,,17750,0,17750,,,\n1970,2,218,,100,4.54,1,0,4,100,2,3,0,2,9999999,1,2,1,,,,29,29,29,,,,6,6,6,,,,60,60,60,,,,17750,0,17750,,,\n1970,2,219,,100,4.54,1,0,1,100,1,30,9,90,10050,1,,,2,,,30,,,22,,,9,,,6,,,90,,,60,,,10050,,,6050,,\n1970,2,219,,100,4.54,1,0,2,100,2,22,6,60,6050,1,,,1,,,30,,,30,,,9,,,9,,,90,,,90,,,10050,,,10050,,\n1970,2,220,,100,4.54,1,0,1,100,1,34,10,100,12050,1,,,2,,,34,,,30,,,10,,,10,,,100,,,100,,,12050,,,0,,\n1970,2,220,,100,4.54,1,0,2,100,2,30,10,100,0,1,,,1,,,34,,,34,,,10,,,10,,,100,,,100,,,12050,,,12050,,\n1970,2,221,,100,4.54,1,0,1,100,1,28,10,100,35050,1,,,,,,28,,,,,,10,,,,,,100,,,,,,35050,,,,,\n1970,2,222,,100,4.54,1,0,1,100,1,31,6,60,9850,1,,,2,,,31,,,26,,,6,,,7,,,60,,,70,,,9850,,,0,,\n1970,2,222,,100,4.54,1,0,2,100,2,26,7,70,0,1,,,1,,,31,,,31,,,6,,,6,,,60,,,60,,,9850,,,9850,,\n1970,2,222,,100,4.54,1,0,3,100,1,2,0,1,9999999,1,2,1,,,,31,26,31,,,,6,7,6,,,,60,70,60,,,,9850,0,9850,,,\n1970,2,222,,100,4.54,1,0,4,100,2,0,0,1,9999999,1,2,1,,,,31,26,31,,,,6,7,6,,,,60,70,60,,,,9850,0,9850,,,\n1970,2,223,,100,4.54,1,0,1,100,1,34,6,60,13250,1,,,2,,,34,,,30,,,6,,,6,,,60,,,60,,,13250,,,0,,\n1970,2,223,,100,4.54,1,0,2,100,2,30,6,60,0,1,,,1,,,34,,,34,,,6,,,6,,,60,,,60,,,13250,,,13250,,\n1970,2,223,,100,4.54,1,0,3,100,1,7,1,14,9999999,1,2,1,,,,34,30,34,,,,6,6,6,,,,60,60,60,,,,13250,0,13250,,,\n1970,2,223,,100,4.54,1,0,4,100,1,5,0,2,9999999,1,2,1,,,,34,30,34,,,,6,6,6,,,,60,60,60,,,,13250,0,13250,,,\n1970,2,223,,100,4.54,1,0,5,100,1,2,0,1,9999999,1,2,1,,,,34,30,34,,,,6,6,6,,,,60,60,60,,,,13250,0,13250,,,\n1970,2,224,,100,4.54,1,0,1,100,1,52,8,80,15250,1,,,2,,,52,,,56,,,8,,,7,,,80,,,70,,,15250,,,0,,\n1970,2,224,,100,4.54,1,0,2,100,2,56,7,70,0,1,,,1,,,52,,,52,,,8,,,8,,,80,,,80,,,15250,,,15250,,\n1970,2,224,,100,4.54,1,0,3,100,2,19,6,60,3950,1,2,1,,,,52,56,52,,,,8,7,8,,,,80,70,80,,,,15250,0,15250,,,\n1970,2,225,,100,4.54,1,0,1,100,1,34,7,70,14050,1,,,2,,,34,,,32,,,7,,,8,,,70,,,80,,,14050,,,0,,\n1970,2,225,,100,4.54,1,0,2,100,2,32,8,80,0,1,,,1,,,34,,,34,,,7,,,7,,,70,,,70,,,14050,,,14050,,\n1970,2,225,,100,4.54,1,0,3,100,2,8,1,15,9999999,1,2,1,,,,34,32,34,,,,7,8,7,,,,70,80,70,,,,14050,0,14050,,,\n1970,2,225,,100,4.54,1,0,4,100,1,2,0,1,9999999,1,2,1,,,,34,32,34,,,,7,8,7,,,,70,80,70,,,,14050,0,14050,,,\n1970,2,226,,100,4.54,1,0,1,100,1,39,7,70,15650,1,,,2,,,39,,,38,,,7,,,3,,,70,,,30,,,15650,,,0,,\n1970,2,226,,100,4.54,1,0,2,100,2,38,3,30,0,1,,,1,,,39,,,39,,,7,,,7,,,70,,,70,,,15650,,,15650,,\n1970,2,226,,100,4.54,1,0,3,100,2,11,1,17,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,\n1970,2,226,,100,4.54,1,0,4,100,2,9,1,15,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,\n1970,2,226,,100,4.54,1,0,5,100,2,5,0,2,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,\n1970,2,226,,100,4.54,1,0,6,100,1,2,0,1,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,\n1970,2,226,,100,4.54,1,0,7,100,1,1,0,1,9999999,1,2,1,,,,39,38,39,,,,7,3,7,,,,70,30,70,,,,15650,0,15650,,,\n1970,2,227,,100,4.54,1,0,1,100,2,45,6,60,8050,2,,,,,,45,,,,,,6,,,,,,60,,,,,,8050,,,,,\n1970,2,227,,100,4.54,1,0,2,100,2,17,5,50,1350,2,2,,,,,45,45,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,\n1970,2,227,,100,4.54,1,0,3,100,2,20,8,80,2050,2,2,,,,,45,45,,,,,6,6,,,,,60,60,,,,,8050,8050,,,,\n1970,2,228,,100,4.54,1,0,1,100,1,60,8,80,18150,1,,,2,,,60,,,56,,,8,,,6,,,80,,,60,,,18150,,,250,,\n1970,2,228,,100,4.54,1,0,2,100,2,56,6,60,250,1,,,1,,,60,,,60,,,8,,,8,,,80,,,80,,,18150,,,18150,,\n1970,2,228,,100,4.54,1,0,3,100,2,62,2,26,0,1,,,,,,60,,,,,,8,,,,,,80,,,,,,18150,,,,,\n1970,2,230,,100,4.54,1,0,1,100,2,63,7,70,4250,2,,,,,,63,,,,,,7,,,,,,70,,,,,,4250,,,,,\n1970,2,230,,100,4.54,1,0,2,100,2,57,6,60,4350,2,,,,,,63,,,,,,7,,,,,,70,,,,,,4250,,,,,\n1970,2,231,,100,4.54,1,0,1,100,2,65,5,50,2250,2,,,,,,65,,,,,,5,,,,,,50,,,,,,2250,,,,,\n1970,2,232,,100,4.54,1,0,1,100,1,75,2,26,23850,1,,,2,,,75,,,58,,,2,,,6,,,26,,,60,,,23850,,,1850,,\n1970,2,232,,100,4.54,1,0,2,100,2,58,6,60,1850,1,,,1,,,75,,,75,,,2,,,2,,,26,,,26,,,23850,,,23850,,\n1970,2,233,,100,4.54,4,4,1,100,1,42,2,22,150,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,234,,100,4.54,1,0,1,100,1,50,1,17,3850,1,,,,,,50,,,,,,1,,,,,,17,,,,,,3850,,,,,\n1970,2,235,,100,4.54,1,0,1,100,2,22,5,50,1050,2,,,,,,22,,,,,,5,,,,,,50,,,,,,1050,,,,,\n1970,2,235,,100,4.54,1,0,2,100,1,3,0,2,9999999,2,2,,,,,22,22,,,,,5,5,,,,,50,50,,,,,1050,1050,,,,\n1970,2,235,,100,4.54,1,0,3,100,2,20,6,60,3450,2,,,,,,22,,,,,,5,,,,,,50,,,,,,1050,,,,,\n1970,2,235,,100,4.54,1,0,4,100,1,1,0,1,9999999,2,2,,,,,22,20,,,,,5,6,,,,,50,60,,,,,1050,3450,,,,\n1970,2,236,,100,4.54,1,0,1,100,2,79,2,26,2150,2,,,,,,79,,,,,,2,,,,,,26,,,,,,2150,,,,,\n1970,2,237,,100,4.54,1,0,1,100,2,45,3,30,2150,2,,,,,,45,,,,,,3,,,,,,30,,,,,,2150,,,,,\n1970,2,237,,100,4.54,1,0,2,100,2,12,2,22,9999999,2,,,,,,45,,,,,,3,,,,,,30,,,,,,2150,,,,,\n1970,2,238,,100,4.54,1,0,1,100,2,40,6,60,2750,2,,,,,,40,,,,,,6,,,,,,60,,,,,,2750,,,,,\n1970,2,238,,100,4.54,1,0,2,100,1,18,6,60,650,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,\n1970,2,238,,100,4.54,1,0,3,100,1,17,3,30,350,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,\n1970,2,238,,100,4.54,1,0,4,100,1,13,2,25,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,\n1970,2,238,,100,4.54,1,0,5,100,1,11,1,17,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,\n1970,2,238,,100,4.54,1,0,6,100,2,10,1,17,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,\n1970,2,238,,100,4.54,1,0,7,100,2,9,1,15,9999999,2,2,,,,,40,40,,,,,6,6,,,,,60,60,,,,,2750,2750,,,,\n1970,2,240,,100,4.54,1,0,1,100,1,49,4,40,350,1,,,2,,,49,,,54,,,4,,,5,,,40,,,50,,,350,,,750,,\n1970,2,240,,100,4.54,1,0,2,100,2,54,5,50,750,1,,,1,,,49,,,49,,,4,,,4,,,40,,,40,,,350,,,350,,\n1970,2,240,,100,4.54,1,0,3,100,2,2,0,1,9999999,1,2,1,,,,49,54,49,,,,4,5,4,,,,40,50,40,,,,350,750,350,,,\n1970,2,241,,100,4.54,1,0,1,100,1,65,2,26,5550,1,,,2,,,65,,,49,,,2,,,4,,,26,,,40,,,5550,,,5350,,\n1970,2,241,,100,4.54,1,0,2,100,2,49,4,40,5350,1,,,1,,,65,,,65,,,2,,,2,,,26,,,26,,,5550,,,5550,,\n1970,2,241,,100,4.54,1,0,3,100,1,21,6,65,7550,1,2,1,,,,65,49,65,,,,2,4,2,,,,26,40,26,,,,5550,5350,5550,,,\n1970,2,241,,100,4.54,1,0,4,100,2,19,5,50,550,1,2,1,,,,65,49,65,,,,2,4,2,,,,26,40,26,,,,5550,5350,5550,,,\n1970,2,241,,100,4.54,1,0,5,100,2,74,6,60,650,1,,,,,,65,,,,,,2,,,,,,26,,,,,,5550,,,,,\n1970,2,242,,100,4.54,1,0,1,100,1,72,7,70,1250,1,,,2,,,72,,,62,,,7,,,2,,,70,,,23,,,1250,,,350,,\n1970,2,242,,100,4.54,1,0,2,100,2,62,2,23,350,1,,,1,,,72,,,72,,,7,,,7,,,70,,,70,,,1250,,,1250,,\n1970,2,243,,100,4.54,1,0,1,100,1,28,6,60,12050,1,,,2,,,28,,,28,,,6,,,6,,,60,,,60,,,12050,,,1550,,\n1970,2,243,,100,4.54,1,0,2,100,2,28,6,60,1550,1,,,1,,,28,,,28,,,6,,,6,,,60,,,60,,,12050,,,12050,,\n1970,2,243,,100,4.54,1,0,3,100,1,6,1,11,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12050,1550,12050,,,\n1970,2,243,,100,4.54,1,0,4,100,2,4,1,11,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12050,1550,12050,,,\n1970,2,243,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,1,,,,28,28,28,,,,6,6,6,,,,60,60,60,,,,12050,1550,12050,,,\n1970,2,244,,100,4.54,1,0,1,100,1,28,6,60,8050,1,,,,,,28,,,,,,6,,,,,,60,,,,,,8050,,,,,\n1970,2,244,,100,4.54,1,0,2,100,2,25,6,60,6550,1,,,,,,28,,,,,,6,,,,,,60,,,,,,8050,,,,,\n1970,2,245,,100,4.54,1,0,1,100,1,44,4,40,10050,1,,,2,,,44,,,43,,,4,,,4,,,40,,,40,,,10050,,,1450,,\n1970,2,245,,100,4.54,1,0,2,100,2,43,4,40,1450,1,,,1,,,44,,,44,,,4,,,4,,,40,,,40,,,10050,,,10050,,\n1970,2,245,,100,4.54,1,0,3,100,2,18,6,65,2750,1,2,1,,,,44,43,44,,,,4,4,4,,,,40,40,40,,,,10050,1450,10050,,,\n1970,2,245,,100,4.54,1,0,4,100,1,11,1,17,9999999,1,2,1,,,,44,43,44,,,,4,4,4,,,,40,40,40,,,,10050,1450,10050,,,\n1970,2,246,,100,4.54,1,0,1,100,1,27,10,100,5050,1,,,,,,27,,,,,,10,,,,,,100,,,,,,5050,,,,,\n1970,2,246,,100,4.54,1,0,2,100,1,23,9,90,7350,1,,,,,,27,,,,,,10,,,,,,100,,,,,,5050,,,,,\n1970,2,247,,100,4.54,1,0,1,100,1,59,4,40,350,1,,,,,,59,,,,,,4,,,,,,40,,,,,,350,,,,,\n1970,2,247,,100,4.54,1,0,2,100,2,52,4,40,7350,1,,,,,,59,,,,,,4,,,,,,40,,,,,,350,,,,,\n1970,2,248,,100,4.54,1,0,1,100,1,57,4,40,10750,1,,,2,,,57,,,38,,,4,,,6,,,40,,,60,,,10750,,,0,,\n1970,2,248,,100,4.54,1,0,2,100,2,38,6,60,0,1,,,1,,,57,,,57,,,4,,,4,,,40,,,40,,,10750,,,10750,,\n1970,2,248,,100,4.54,1,0,3,100,2,16,2,23,0,1,2,1,,,,57,38,57,,,,4,6,4,,,,40,60,40,,,,10750,0,10750,,,\n1970,2,248,,100,4.54,1,0,4,100,2,4,0,2,9999999,1,2,1,,,,57,38,57,,,,4,6,4,,,,40,60,40,,,,10750,0,10750,,,\n1970,2,248,,100,4.54,1,0,5,100,2,8,1,14,9999999,1,2,1,,,,57,38,57,,,,4,6,4,,,,40,60,40,,,,10750,0,10750,,,\n1970,2,250,,100,4.54,1,0,1,100,1,59,6,60,7050,1,,,,,,59,,,,,,6,,,,,,60,,,,,,7050,,,,,\n1970,2,251,,100,4.54,1,0,1,100,1,41,6,60,10050,1,,,2,,,41,,,33,,,6,,,6,,,60,,,60,,,10050,,,0,,\n1970,2,251,,100,4.54,1,0,2,100,2,33,6,60,0,1,,,1,,,41,,,41,,,6,,,6,,,60,,,60,,,10050,,,10050,,\n1970,2,251,,100,4.54,1,0,3,100,1,10,2,22,9999999,1,2,1,,,,41,33,41,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,\n1970,2,251,,100,4.54,1,0,4,100,1,6,1,11,9999999,1,2,1,,,,41,33,41,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,\n1970,2,252,,100,4.54,1,0,1,100,1,51,2,26,10050,1,,,2,,,51,,,46,,,2,,,2,,,26,,,26,,,10050,,,0,,\n1970,2,252,,100,4.54,1,0,2,100,2,46,2,26,0,1,,,1,,,51,,,51,,,2,,,2,,,26,,,26,,,10050,,,10050,,\n1970,2,252,,100,4.54,1,0,3,100,1,15,2,26,0,1,2,1,,,,51,46,51,,,,2,2,2,,,,26,26,26,,,,10050,0,10050,,,\n1970,2,253,,100,4.54,1,0,1,100,2,47,6,60,8050,2,,,,,,47,,,,,,6,,,,,,60,,,,,,8050,,,,,\n1970,2,254,,100,4.54,1,0,1,100,1,33,8,80,6050,1,,,2,,,33,,,41,,,8,,,6,,,80,,,60,,,6050,,,5050,,\n1970,2,254,,100,4.54,1,0,2,100,2,41,6,60,5050,1,,,1,,,33,,,33,,,8,,,8,,,80,,,80,,,6050,,,6050,,\n1970,2,254,,100,4.54,1,0,3,100,1,15,3,30,150,1,2,1,,,,33,41,33,,,,8,6,8,,,,80,60,80,,,,6050,5050,6050,,,\n1970,2,254,,100,4.54,1,0,4,100,1,14,2,26,0,1,2,1,,,,33,41,33,,,,8,6,8,,,,80,60,80,,,,6050,5050,6050,,,\n1970,2,254,,100,4.54,1,0,5,100,2,12,2,23,9999999,1,2,1,,,,33,41,33,,,,8,6,8,,,,80,60,80,,,,6050,5050,6050,,,\n1970,2,254,,100,4.54,1,0,6,100,2,47,8,80,6050,1,,,,,,33,,,,,,8,,,,,,80,,,,,,6050,,,,,\n1970,2,255,,100,4.54,1,0,1,100,1,42,6,60,20050,1,,,2,,,42,,,36,,,6,,,10,,,60,,,100,,,20050,,,13150,,\n1970,2,255,,100,4.54,1,0,2,100,2,36,10,100,13150,1,,,1,,,42,,,42,,,6,,,6,,,60,,,60,,,20050,,,20050,,\n1970,2,256,,100,4.54,1,0,1,100,1,62,11,111,16950,1,,,2,,,62,,,57,,,11,,,10,,,111,,,100,,,16950,,,50,,\n1970,2,256,,100,4.54,1,0,2,100,2,57,10,100,50,1,,,1,,,62,,,62,,,11,,,11,,,111,,,111,,,16950,,,16950,,\n1970,2,256,,100,4.54,1,0,3,100,2,19,7,70,850,1,2,1,,,,62,57,62,,,,11,10,11,,,,111,100,111,,,,16950,50,16950,,,\n1970,2,257,,100,4.54,1,0,1,100,1,38,11,111,23050,1,,,2,,,38,,,37,,,11,,,9,,,111,,,90,,,23050,,,0,,\n1970,2,257,,100,4.54,1,0,2,100,2,37,9,90,0,1,,,1,,,38,,,38,,,11,,,11,,,111,,,111,,,23050,,,23050,,\n1970,2,257,,100,4.54,1,0,3,100,2,14,2,26,0,1,2,1,,,,38,37,38,,,,11,9,11,,,,111,90,111,,,,23050,0,23050,,,\n1970,2,257,,100,4.54,1,0,4,100,2,11,2,22,9999999,1,2,1,,,,38,37,38,,,,11,9,11,,,,111,90,111,,,,23050,0,23050,,,\n1970,2,257,,100,4.54,1,0,5,100,1,9,1,16,9999999,1,2,1,,,,38,37,38,,,,11,9,11,,,,111,90,111,,,,23050,0,23050,,,\n1970,2,258,,100,4.54,1,0,1,100,1,38,6,65,3050,1,,,,,,38,,,,,,6,,,,,,65,,,,,,3050,,,,,\n1970,2,259,,100,4.54,1,0,1,100,2,69,3,30,4150,2,,,,,,69,,,,,,3,,,,,,30,,,,,,4150,,,,,\n1970,2,259,,100,4.54,1,0,2,100,1,26,2,23,1650,2,,,,,,69,,,,,,3,,,,,,30,,,,,,4150,,,,,\n1970,2,260,,100,4.54,3,0,1,100,2,89,6,60,850,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,261,,100,4.54,1,0,1,100,1,36,6,60,11450,1,,,2,,,36,,,35,,,6,,,6,,,60,,,60,,,11450,,,250,,\n1970,2,261,,100,4.54,1,0,2,100,2,35,6,60,250,1,,,1,,,36,,,36,,,6,,,6,,,60,,,60,,,11450,,,11450,,\n1970,2,261,,100,4.54,1,0,3,100,2,8,1,14,9999999,1,2,1,,,,36,35,36,,,,6,6,6,,,,60,60,60,,,,11450,250,11450,,,\n1970,2,261,,100,4.54,1,0,4,100,2,5,0,2,9999999,1,2,1,,,,36,35,36,,,,6,6,6,,,,60,60,60,,,,11450,250,11450,,,\n1970,2,262,,100,4.54,1,0,1,100,1,59,6,60,18250,1,,,2,,,59,,,51,,,6,,,6,,,60,,,60,,,18250,,,0,,\n1970,2,262,,100,4.54,1,0,2,100,2,51,6,60,0,1,,,1,,,59,,,59,,,6,,,6,,,60,,,60,,,18250,,,18250,,\n1970,2,263,,100,4.54,1,0,1,100,1,48,6,60,10050,1,,,2,,,48,,,47,,,6,,,3,,,60,,,30,,,10050,,,0,,\n1970,2,263,,100,4.54,1,0,2,100,2,47,3,30,0,1,,,1,,,48,,,48,,,6,,,6,,,60,,,60,,,10050,,,10050,,\n1970,2,263,,100,4.54,1,0,3,100,1,20,6,60,0,1,2,1,,,,48,47,48,,,,6,3,6,,,,60,30,60,,,,10050,0,10050,,,\n1970,2,263,,100,4.54,1,0,4,100,1,14,2,23,0,1,2,1,,,,48,47,48,,,,6,3,6,,,,60,30,60,,,,10050,0,10050,,,\n1970,2,263,,100,4.54,1,0,5,100,2,11,1,16,9999999,1,2,1,,,,48,47,48,,,,6,3,6,,,,60,30,60,,,,10050,0,10050,,,\n1970,2,264,,100,4.54,1,0,1,100,1,34,11,110,10850,1,,,2,,,34,,,34,,,11,,,11,,,110,,,110,,,10850,,,150,,\n1970,2,264,,100,4.54,1,0,2,100,2,34,11,110,150,1,,,1,,,34,,,34,,,11,,,11,,,110,,,110,,,10850,,,10850,,\n1970,2,264,,100,4.54,1,0,3,100,2,5,1,11,9999999,1,2,1,,,,34,34,34,,,,11,11,11,,,,110,110,110,,,,10850,150,10850,,,\n1970,2,264,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,34,34,34,,,,11,11,11,,,,110,110,110,,,,10850,150,10850,,,\n1970,2,265,,100,4.54,1,0,1,100,1,67,8,80,5450,1,,,2,,,67,,,62,,,8,,,7,,,80,,,70,,,5450,,,11550,,\n1970,2,265,,100,4.54,1,0,2,100,2,62,7,70,11550,1,,,1,,,67,,,67,,,8,,,8,,,80,,,80,,,5450,,,5450,,\n1970,2,265,,100,4.54,1,0,3,100,1,21,8,80,0,1,2,1,,,,67,62,67,,,,8,7,8,,,,80,70,80,,,,5450,11550,5450,,,\n1970,2,266,,100,4.54,1,0,1,100,1,77,4,40,15750,1,,,2,,,77,,,74,,,4,,,6,,,40,,,60,,,15750,,,2250,,\n1970,2,266,,100,4.54,1,0,2,100,2,74,6,60,2250,1,,,1,,,77,,,77,,,4,,,4,,,40,,,40,,,15750,,,15750,,\n1970,2,267,,100,4.54,1,0,1,100,1,53,6,60,12050,1,,,2,,,53,,,52,,,6,,,6,,,60,,,60,,,12050,,,3550,,\n1970,2,267,,100,4.54,1,0,2,100,2,52,6,60,3550,1,,,1,,,53,,,53,,,6,,,6,,,60,,,60,,,12050,,,12050,,\n1970,2,267,,100,4.54,1,0,3,100,2,20,8,80,2550,1,2,1,,,,53,52,53,,,,6,6,6,,,,60,60,60,,,,12050,3550,12050,,,\n1970,2,268,,100,4.54,1,0,1,100,1,39,11,111,1650,1,,,2,,,39,,,37,,,11,,,10,,,111,,,100,,,1650,,,3350,,\n1970,2,268,,100,4.54,1,0,2,100,2,37,10,100,3350,1,,,1,,,39,,,39,,,11,,,11,,,111,,,111,,,1650,,,1650,,\n1970,2,268,,100,4.54,1,0,3,100,1,6,1,12,9999999,1,2,1,,,,39,37,39,,,,11,10,11,,,,111,100,111,,,,1650,3350,1650,,,\n1970,2,269,,100,4.54,1,0,1,100,2,50,2,26,2850,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,\n1970,2,269,,100,4.54,1,0,2,100,2,17,3,30,2750,2,2,,,,,50,50,,,,,2,2,,,,,26,26,,,,,2850,2850,,,,\n1970,2,269,,100,4.54,1,0,3,100,1,26,2,25,0,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,\n1970,2,269,,100,4.54,1,0,4,100,1,26,1,17,13250,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,\n1970,2,269,,100,4.54,1,0,5,100,1,26,1,15,13250,2,,,,,,50,,,,,,2,,,,,,26,,,,,,2850,,,,,\n1970,2,270,,100,4.54,1,0,1,100,1,44,7,70,7150,1,,,2,,,44,,,48,,,7,,,8,,,70,,,80,,,7150,,,2650,,\n1970,2,270,,100,4.54,1,0,2,100,2,48,8,80,2650,1,,,1,,,44,,,44,,,7,,,7,,,70,,,70,,,7150,,,7150,,\n1970,2,270,,100,4.54,1,0,3,100,2,12,2,22,9999999,1,2,1,,,,44,48,44,,,,7,8,7,,,,70,80,70,,,,7150,2650,7150,,,\n1970,2,270,,100,4.54,1,0,4,100,2,10,1,17,9999999,1,2,1,,,,44,48,44,,,,7,8,7,,,,70,80,70,,,,7150,2650,7150,,,\n1970,2,271,,100,4.54,1,0,1,100,1,34,11,111,5550,1,,,2,,,34,,,29,,,11,,,11,,,111,,,111,,,5550,,,8250,,\n1970,2,271,,100,4.54,1,0,2,100,2,29,11,111,8250,1,,,1,,,34,,,34,,,11,,,11,,,111,,,111,,,5550,,,5550,,\n1970,2,271,,100,4.54,1,0,3,100,2,2,0,1,9999999,1,2,1,,,,34,29,34,,,,11,11,11,,,,111,111,111,,,,5550,8250,5550,,,\n1970,2,272,,100,4.54,1,0,1,100,1,43,11,110,10850,1,,,2,,,43,,,34,,,11,,,11,,,110,,,111,,,10850,,,11550,,\n1970,2,272,,100,4.54,1,0,2,100,2,34,11,111,11550,1,,,1,,,43,,,43,,,11,,,11,,,110,,,110,,,10850,,,10850,,\n1970,2,272,,100,4.54,1,0,3,100,2,9,1,17,9999999,1,2,1,,,,43,34,43,,,,11,11,11,,,,110,111,110,,,,10850,11550,10850,,,\n1970,2,273,,100,4.54,1,0,1,100,2,28,2,26,4150,2,2,,,,,28,59,,,,,2,2,,,,,26,23,,,,,4150,0,,,,\n1970,2,273,,100,4.54,1,0,2,100,1,7,1,14,9999999,2,2,,,,,28,28,,,,,2,2,,,,,26,26,,,,,4150,4150,,,,\n1970,2,273,,100,4.54,1,0,3,100,2,59,2,23,0,2,,,,,,28,,,,,,2,,,,,,26,,,,,,4150,,,,,\n1970,2,274,,100,4.54,1,0,1,100,1,38,2,26,5050,1,,,2,,,38,,,35,,,2,,,6,,,26,,,60,,,5050,,,4050,,\n1970,2,274,,100,4.54,1,0,2,100,2,35,6,60,4050,1,,,1,,,38,,,38,,,2,,,2,,,26,,,26,,,5050,,,5050,,\n1970,2,274,,100,4.54,1,0,3,100,2,15,3,30,350,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,\n1970,2,274,,100,4.54,1,0,4,100,2,14,2,25,0,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,\n1970,2,274,,100,4.54,1,0,5,100,1,9,1,15,9999999,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,\n1970,2,274,,100,4.54,1,0,6,100,1,8,1,14,9999999,1,2,1,,,,38,35,38,,,,2,6,2,,,,26,60,26,,,,5050,4050,5050,,,\n1970,2,275,,100,4.54,1,0,1,100,1,72,1,14,0,1,,,,,,72,,,,,,1,,,,,,14,,,,,,0,,,,,\n1970,2,276,,100,4.54,1,0,1,100,2,27,3,30,3250,2,,,,,,27,,,,,,3,,,,,,30,,,,,,3250,,,,,\n1970,2,276,,100,4.54,1,0,2,100,2,9,1,16,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,\n1970,2,276,,100,4.54,1,0,3,100,1,8,1,15,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,\n1970,2,276,,100,4.54,1,0,4,100,1,7,1,14,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,\n1970,2,276,,100,4.54,1,0,5,100,1,6,1,11,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,\n1970,2,276,,100,4.54,1,0,6,100,1,4,0,2,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,\n1970,2,276,,100,4.54,1,0,7,100,2,3,0,2,9999999,2,2,,,,,27,27,,,,,3,3,,,,,30,30,,,,,3250,3250,,,,\n1970,2,277,,100,4.54,1,0,1,100,1,74,2,26,4450,1,,,2,,,74,,,58,,,2,,,2,,,26,,,26,,,4450,,,4450,,\n1970,2,277,,100,4.54,1,0,2,100,2,58,2,26,4450,1,,,1,,,74,,,74,,,2,,,2,,,26,,,26,,,4450,,,4450,,\n1970,2,278,,100,4.54,1,0,1,100,1,34,3,30,9350,1,,,2,,,34,,,24,,,3,,,6,,,30,,,60,,,9350,,,0,,\n1970,2,278,,100,4.54,1,0,2,100,2,24,6,60,0,1,,,1,,,34,,,34,,,3,,,3,,,30,,,30,,,9350,,,9350,,\n1970,2,279,,100,4.54,1,0,1,100,2,64,6,60,7550,2,,,,,,64,,,,,,6,,,,,,60,,,,,,7550,,,,,\n1970,2,280,,100,4.54,1,0,1,100,1,48,6,60,7550,1,,,,,,48,,,,,,6,,,,,,60,,,,,,7550,,,,,\n1970,2,281,,100,4.54,1,0,1,100,1,47,11,110,9250,1,,,2,,,47,,,40,,,11,,,6,,,110,,,60,,,9250,,,4450,,\n1970,2,281,,100,4.54,1,0,2,100,2,40,6,60,4450,1,,,1,,,47,,,47,,,11,,,11,,,110,,,110,,,9250,,,9250,,\n1970,2,281,,100,4.54,1,0,3,100,1,16,4,40,0,1,2,1,,,,47,40,47,,,,11,6,11,,,,110,60,110,,,,9250,4450,9250,,,\n1970,2,281,,100,4.54,1,0,4,100,1,12,2,22,9999999,1,2,1,,,,47,40,47,,,,11,6,11,,,,110,60,110,,,,9250,4450,9250,,,\n1970,2,282,,100,4.54,1,0,1,100,1,36,2,26,5250,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,\n1970,2,282,,100,4.54,1,0,2,100,1,39,2,26,5050,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,\n1970,2,282,,100,4.54,1,0,3,100,2,77,2,23,650,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,\n1970,2,282,,100,4.54,1,0,4,100,1,47,4,40,0,1,,,,,,36,,,,,,2,,,,,,26,,,,,,5250,,,,,\n1970,2,282,,100,4.54,1,0,5,100,2,14,2,26,2450,1,,1,,,,36,,47,,,,2,,4,,,,26,,40,,,,5250,,0,,,\n1970,2,282,,100,4.54,1,0,6,100,2,10,1,16,9999999,1,,1,,,,36,,47,,,,2,,4,,,,26,,40,,,,5250,,0,,,\n1970,2,282,,100,4.54,1,0,7,100,1,8,1,14,9999999,1,,1,,,,36,,47,,,,2,,4,,,,26,,40,,,,5250,,0,,,\n1970,2,283,,100,4.54,1,0,1,100,2,23,8,80,7850,2,,,,,,23,,,,,,8,,,,,,80,,,,,,7850,,,,,\n1970,2,283,,100,4.54,1,0,2,100,2,18,4,40,0,2,,,,,,23,,,,,,8,,,,,,80,,,,,,7850,,,,,\n1970,2,284,,100,4.54,1,0,1,100,2,31,1,16,6250,2,,,,,,31,,,,,,1,,,,,,16,,,,,,6250,,,,,\n1970,2,284,,100,4.54,1,0,2,100,1,5,0,2,9999999,2,2,,,,,31,31,,,,,1,1,,,,,16,16,,,,,6250,6250,,,,\n1970,2,284,,100,4.54,1,0,3,100,2,4,0,2,9999999,2,2,,,,,31,31,,,,,1,1,,,,,16,16,,,,,6250,6250,,,,\n1970,2,285,,100,4.54,1,0,1,100,1,49,2,26,9050,1,,,2,,,49,,,50,,,2,,,2,,,26,,,26,,,9050,,,1550,,\n1970,2,285,,100,4.54,1,0,2,100,2,50,2,26,1550,1,,,1,,,49,,,49,,,2,,,2,,,26,,,26,,,9050,,,9050,,\n1970,2,286,,100,4.54,1,0,1,100,2,61,2,26,8450,2,,,,,,61,,,,,,2,,,,,,26,,,,,,8450,,,,,\n1970,2,287,,100,4.54,1,0,1,100,2,14,2,26,0,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,\n1970,2,287,,100,4.54,1,0,2,100,1,10,1,17,9999999,2,2,,,,,14,14,,,,,2,2,,,,,26,26,,,,,0,0,,,,\n1970,2,287,,100,4.54,1,0,3,100,1,12,1,17,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,\n1970,2,287,,100,4.54,1,0,4,100,1,9,1,16,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,\n1970,2,287,,100,4.54,1,0,5,100,1,5,1,12,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,\n1970,2,287,,100,4.54,1,0,6,100,1,48,2,25,6450,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,\n1970,2,287,,100,4.54,1,0,7,100,2,2,0,1,9999999,2,,,,,,14,,,,,,2,,,,,,26,,,,,,0,,,,,\n1970,2,288,,100,4.54,1,0,1,100,1,58,4,40,8050,1,,,2,,,58,,,53,,,4,,,6,,,40,,,60,,,8050,,,7050,,\n1970,2,288,,100,4.54,1,0,2,100,2,53,6,60,7050,1,,,1,,,58,,,58,,,4,,,4,,,40,,,40,,,8050,,,8050,,\n1970,2,289,,100,4.54,1,0,1,100,1,67,3,30,14750,1,,,,,,67,,,,,,3,,,,,,30,,,,,,14750,,,,,\n1970,2,290,,100,4.54,1,0,1,100,1,73,2,25,1950,1,,,2,,,73,,,68,,,2,,,2,,,25,,,25,,,1950,,,650,,\n1970,2,290,,100,4.54,1,0,2,100,2,68,2,25,650,1,2,,1,,,73,89,,73,,,2,2,,2,,,25,25,,25,,,1950,850,,1950,,\n1970,2,290,,100,4.54,1,0,3,100,2,89,2,25,850,1,,,,,,73,,,,,,2,,,,,,25,,,,,,1950,,,,,\n1970,2,291,,100,4.54,1,0,1,100,1,68,2,26,3450,1,,,2,,,68,,,65,,,2,,,4,,,26,,,40,,,3450,,,1750,,\n1970,2,291,,100,4.54,1,0,2,100,2,65,4,40,1750,1,,,1,,,68,,,68,,,2,,,2,,,26,,,26,,,3450,,,3450,,\n1970,2,292,,100,4.54,1,0,1,100,1,29,6,60,12050,1,,,2,,,29,,,28,,,6,,,6,,,60,,,60,,,12050,,,4050,,\n1970,2,292,,100,4.54,1,0,2,100,2,28,6,60,4050,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,12050,,,12050,,\n1970,2,292,,100,4.54,1,0,3,100,2,7,1,12,9999999,1,2,1,,,,29,28,29,,,,6,6,6,,,,60,60,60,,,,12050,4050,12050,,,\n1970,2,292,,100,4.54,1,0,4,100,1,6,1,11,9999999,1,2,1,,,,29,28,29,,,,6,6,6,,,,60,60,60,,,,12050,4050,12050,,,\n1970,2,292,,100,4.54,1,0,5,100,1,3,0,2,9999999,1,2,1,,,,29,28,29,,,,6,6,6,,,,60,60,60,,,,12050,4050,12050,,,\n1970,2,293,,100,4.54,1,0,1,100,2,63,6,60,9950,2,,,,,,63,,,,,,6,,,,,,60,,,,,,9950,,,,,\n1970,2,294,,100,4.54,1,0,1,100,1,42,6,60,30150,1,,,2,,,42,,,39,,,6,,,7,,,60,,,70,,,30150,,,1050,,\n1970,2,294,,100,4.54,1,0,2,100,2,39,7,70,1050,1,,,1,,,42,,,42,,,6,,,6,,,60,,,60,,,30150,,,30150,,\n1970,2,294,,100,4.54,1,0,3,100,2,16,4,40,450,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,\n1970,2,294,,100,4.54,1,0,4,100,1,15,3,30,450,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,\n1970,2,294,,100,4.54,1,0,5,100,1,14,2,26,0,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,\n1970,2,294,,100,4.54,1,0,6,100,1,11,2,22,9999999,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,\n1970,2,294,,100,4.54,1,0,7,100,2,9,1,16,9999999,1,2,1,,,,42,39,42,,,,6,7,6,,,,60,70,60,,,,30150,1050,30150,,,\n1970,2,295,,100,4.54,1,0,1,100,1,70,5,50,450,1,,,2,,,70,,,70,,,5,,,5,,,50,,,50,,,450,,,150,,\n1970,2,295,,100,4.54,1,0,2,100,2,70,5,50,150,1,,,1,,,70,,,70,,,5,,,5,,,50,,,50,,,450,,,450,,\n1970,2,296,,100,4.54,1,0,1,100,1,47,5,50,7050,1,,,2,,,47,,,44,,,5,,,5,,,50,,,50,,,7050,,,4550,,\n1970,2,296,,100,4.54,1,0,2,100,2,44,5,50,4550,1,2,,1,,,47,71,,47,,,5,4,,5,,,50,40,,50,,,7050,650,,7050,,\n1970,2,296,,100,4.54,1,0,3,100,2,71,4,40,650,1,,,,,,47,,,,,,5,,,,,,50,,,,,,7050,,,,,\n1970,2,296,,100,4.54,1,0,4,100,1,17,5,50,550,1,2,1,,,,47,44,47,,,,5,5,5,,,,50,50,50,,,,7050,4550,7050,,,\n1970,2,297,,100,4.54,1,0,1,100,1,33,10,100,17650,1,,,2,,,33,,,33,,,10,,,10,,,100,,,100,,,17650,,,0,,\n1970,2,297,,100,4.54,1,0,2,100,2,33,10,100,0,1,,,1,,,33,,,33,,,10,,,10,,,100,,,100,,,17650,,,17650,,\n1970,2,297,,100,4.54,1,0,3,100,1,8,1,15,9999999,1,2,1,,,,33,33,33,,,,10,10,10,,,,100,100,100,,,,17650,0,17650,,,\n1970,2,297,,100,4.54,1,0,4,100,2,1,0,1,9999999,1,2,1,,,,33,33,33,,,,10,10,10,,,,100,100,100,,,,17650,0,17650,,,\n1970,2,297,,100,4.54,1,0,5,100,2,4,1,11,9999999,1,2,1,,,,33,33,33,,,,10,10,10,,,,100,100,100,,,,17650,0,17650,,,\n1970,2,298,,100,4.54,1,0,1,100,1,49,2,26,14050,1,,,2,,,49,,,46,,,2,,,6,,,26,,,60,,,14050,,,7450,,\n1970,2,298,,100,4.54,1,0,2,100,2,46,6,60,7450,1,,,1,,,49,,,49,,,2,,,2,,,26,,,26,,,14050,,,14050,,\n1970,2,298,,100,4.54,1,0,3,100,2,13,2,23,9999999,1,2,1,,,,49,46,49,,,,2,6,2,,,,26,60,26,,,,14050,7450,14050,,,\n1970,2,299,,100,4.54,1,0,1,100,1,47,6,60,15750,1,,,2,,,47,,,46,,,6,,,6,,,60,,,60,,,15750,,,4850,,\n1970,2,299,,100,4.54,1,0,2,100,2,46,6,60,4850,1,,,1,,,47,,,47,,,6,,,6,,,60,,,60,,,15750,,,15750,,\n1970,2,299,,100,4.54,1,0,3,100,2,18,6,65,2550,1,2,1,,,,47,46,47,,,,6,6,6,,,,60,60,60,,,,15750,4850,15750,,,\n1970,2,300,,100,4.54,1,0,1,100,1,64,5,50,2450,1,,,2,,,64,,,63,,,5,,,2,,,50,,,22,,,2450,,,0,,\n1970,2,300,,100,4.54,1,0,2,100,2,63,2,22,0,1,,,1,,,64,,,64,,,5,,,5,,,50,,,50,,,2450,,,2450,,\n1970,2,301,,100,4.54,1,0,1,100,1,20,7,70,8050,1,,,2,,,20,,,21,,,7,,,7,,,70,,,70,,,8050,,,3050,,\n1970,2,301,,100,4.54,1,0,2,100,2,21,7,70,3050,1,,,1,,,20,,,20,,,7,,,7,,,70,,,70,,,8050,,,8050,,\n1970,2,301,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,20,21,20,,,,7,7,7,,,,70,70,70,,,,8050,3050,8050,,,\n1970,2,302,,100,4.54,1,0,1,100,2,23,2,26,4450,2,,,,,,23,,,,,,2,,,,,,26,,,,,,4450,,,,,\n1970,2,303,,100,4.54,1,0,1,100,1,45,6,60,11750,1,,,2,,,45,,,41,,,6,,,6,,,60,,,60,,,11750,,,0,,\n1970,2,303,,100,4.54,1,0,2,100,2,41,6,60,0,1,2,1,1,,,45,67,71,45,,,6,2,2,6,,,60,26,23,60,,,11750,1450,3350,11750,,\n1970,2,303,,100,4.54,1,0,3,100,2,21,9,90,550,1,2,1,,,,45,41,45,,,,6,6,6,,,,60,60,60,,,,11750,0,11750,,,\n1970,2,303,,100,4.54,1,0,4,100,2,17,5,50,550,1,2,1,,,,45,41,45,,,,6,6,6,,,,60,60,60,,,,11750,0,11750,,,\n1970,2,303,,100,4.54,1,0,5,100,1,15,3,30,0,1,2,1,,,,45,41,45,,,,6,6,6,,,,60,60,60,,,,11750,0,11750,,,\n1970,2,303,,100,4.54,1,0,6,100,1,71,2,23,3350,1,,,2,,,45,,,67,,,6,,,2,,,60,,,26,,,11750,,,1450,,\n1970,2,303,,100,4.54,1,0,7,100,2,67,2,26,1450,1,,,1,,,45,,,71,,,6,,,2,,,60,,,23,,,11750,,,3350,,\n1970,2,304,,100,4.54,1,0,1,100,1,75,2,26,3650,1,,,2,,,75,,,75,,,2,,,2,,,26,,,26,,,3650,,,850,,\n1970,2,304,,100,4.54,1,0,2,100,2,75,2,26,850,1,,,1,,,75,,,75,,,2,,,2,,,26,,,26,,,3650,,,3650,,\n1970,2,305,,100,4.54,1,0,1,100,1,38,6,60,10050,1,,,2,,,38,,,38,,,6,,,6,,,60,,,60,,,10050,,,0,,\n1970,2,305,,100,4.54,1,0,2,100,2,38,6,60,0,1,,,1,,,38,,,38,,,6,,,6,,,60,,,60,,,10050,,,10050,,\n1970,2,305,,100,4.54,1,0,3,100,1,11,2,22,9999999,1,2,1,,,,38,38,38,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,\n1970,2,305,,100,4.54,1,0,4,100,1,9,1,16,9999999,1,2,1,,,,38,38,38,,,,6,6,6,,,,60,60,60,,,,10050,0,10050,,,\n1970,2,306,,100,4.54,1,0,1,100,1,47,3,30,11050,1,,,2,,,47,,,40,,,3,,,6,,,30,,,60,,,11050,,,0,,\n1970,2,306,,100,4.54,1,0,2,100,2,40,6,60,0,1,,,1,,,47,,,47,,,3,,,3,,,30,,,30,,,11050,,,11050,,\n1970,2,306,,100,4.54,1,0,3,100,1,16,3,30,0,1,2,1,,,,47,40,47,,,,3,6,3,,,,30,60,30,,,,11050,0,11050,,,\n1970,2,306,,100,4.54,1,0,4,100,2,9,1,16,9999999,1,2,1,,,,47,40,47,,,,3,6,3,,,,30,60,30,,,,11050,0,11050,,,\n1970,2,306,,100,4.54,1,0,5,100,2,7,1,14,9999999,1,2,1,,,,47,40,47,,,,3,6,3,,,,30,60,30,,,,11050,0,11050,,,\n1970,2,307,,100,4.54,1,0,1,100,1,35,6,60,150,1,2,,,,,35,73,,,,,6,2,,,,,60,26,,,,,150,1250,,,,\n1970,2,307,,100,4.54,1,0,2,100,2,73,2,26,1250,1,,,,,,35,,,,,,6,,,,,,60,,,,,,150,,,,,\n1970,2,308,,100,4.54,1,0,1,100,1,77,2,22,4050,1,,,,,,77,,,,,,2,,,,,,22,,,,,,4050,,,,,\n1970,2,308,,100,4.54,1,0,2,100,1,50,4,40,650,1,,1,,,,77,,77,,,,2,,2,,,,22,,22,,,,4050,,4050,,,\n1970,2,308,,100,4.54,1,0,3,100,1,41,2,26,6550,1,,1,,,,77,,77,,,,2,,2,,,,22,,22,,,,4050,,4050,,,\n1970,2,309,,100,4.54,1,0,1,100,1,61,5,50,3550,1,,,,,,61,,,,,,5,,,,,,50,,,,,,3550,,,,,\n1970,2,309,,100,4.54,1,0,2,100,1,17,5,50,550,1,,1,,,,61,,61,,,,5,,5,,,,50,,50,,,,3550,,3550,,,\n1970,2,310,,100,4.54,1,0,1,100,1,56,6,60,7050,1,,,2,,,56,,,61,,,6,,,6,,,60,,,60,,,7050,,,0,,\n1970,2,310,,100,4.54,1,0,2,100,2,61,6,60,0,1,,,1,,,56,,,56,,,6,,,6,,,60,,,60,,,7050,,,7050,,\n1970,2,311,,100,4.54,1,0,1,100,1,43,11,110,11850,1,,,2,,,43,,,42,,,11,,,10,,,110,,,100,,,11850,,,250,,\n1970,2,311,,100,4.54,1,0,2,100,2,42,10,100,250,1,,,1,,,43,,,43,,,11,,,11,,,110,,,110,,,11850,,,11850,,\n1970,2,312,,100,4.54,1,0,1,100,2,26,6,60,4050,2,,,,,,26,,,,,,6,,,,,,60,,,,,,4050,,,,,\n1970,2,312,,100,4.54,1,0,2,100,1,9,1,15,9999999,2,2,,,,,26,26,,,,,6,6,,,,,60,60,,,,,4050,4050,,,,\n1970,2,312,,100,4.54,1,0,3,100,1,7,1,12,9999999,2,2,,,,,26,26,,,,,6,6,,,,,60,60,,,,,4050,4050,,,,\n1970,2,313,,100,4.54,1,0,1,100,1,22,10,100,0,1,,,,,,22,,,,,,10,,,,,,100,,,,,,0,,,,,\n1970,2,314,,100,4.54,1,0,1,100,1,27,6,60,7050,1,,,,,,27,,,,,,6,,,,,,60,,,,,,7050,,,,,\n1970,2,315,,100,4.54,3,0,1,100,2,61,8,80,150,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n1970,2,316,,100,4.54,1,0,1,100,1,62,4,40,12050,1,,,2,,,62,,,61,,,4,,,4,,,40,,,40,,,12050,,,1350,,\n1970,2,316,,100,4.54,1,0,2,100,2,61,4,40,1350,1,,,1,,,62,,,62,,,4,,,4,,,40,,,40,,,12050,,,12050,,\n1970,2,317,,100,4.54,1,0,1,100,2,45,8,80,5050,2,,,,,,45,,,,,,8,,,,,,80,,,,,,5050,,,,,\n1970,2,318,,100,4.54,1,0,1,100,2,23,8,80,3050,2,,,,,,23,,,,,,8,,,,,,80,,,,,,3050,,,,,\n1970,2,318,,100,4.54,1,0,2,100,1,2,0,1,9999999,2,2,,,,,23,23,,,,,8,8,,,,,80,80,,,,,3050,3050,,,,\n1970,2,319,,100,4.54,1,0,1,100,1,43,10,100,22050,1,,,2,,,43,,,37,,,10,,,10,,,100,,,100,,,22050,,,150,,\n1970,2,319,,100,4.54,1,0,2,100,2,37,10,100,150,1,,,1,,,43,,,43,,,10,,,10,,,100,,,100,,,22050,,,22050,,\n1970,2,319,,100,4.54,1,0,3,100,2,14,2,26,0,1,2,1,,,,43,37,43,,,,10,10,10,,,,100,100,100,,,,22050,150,22050,,,\n1970,2,319,,100,4.54,1,0,4,100,1,11,1,17,9999999,1,2,1,,,,43,37,43,,,,10,10,10,,,,100,100,100,,,,22050,150,22050,,,\n1970,2,320,,100,4.54,1,0,1,100,2,79,2,26,750,2,,,,,,79,,,,,,2,,,,,,26,,,,,,750,,,,,\n1970,2,321,,100,4.54,1,0,1,100,1,24,6,60,5050,1,,,2,,,24,,,21,,,6,,,6,,,60,,,60,,,5050,,,4050,,\n1970,2,321,,100,4.54,1,0,2,100,2,21,6,60,4050,1,,,1,,,24,,,24,,,6,,,6,,,60,,,60,,,5050,,,5050,,\n1970,2,321,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,24,21,24,,,,6,6,6,,,,60,60,60,,,,5050,4050,5050,,,\n1970,2,322,,100,4.54,1,0,1,100,2,60,6,60,6550,2,,,,,,60,,,,,,6,,,,,,60,,,,,,6550,,,,,\n1970,2,322,,100,4.54,1,0,2,100,1,24,6,60,1350,2,2,,,,,60,60,,,,,6,6,,,,,60,60,,,,,6550,6550,,,,\n1970,2,323,,100,4.54,1,0,1,100,1,77,7,70,10050,1,,,2,,,77,,,71,,,7,,,3,,,70,,,30,,,10050,,,0,,\n1970,2,323,,100,4.54,1,0,2,100,2,71,3,30,0,1,,,1,,,77,,,77,,,7,,,7,,,70,,,70,,,10050,,,10050,,\n1970,2,324,,100,4.54,1,0,1,100,1,66,3,30,8050,1,,,2,,,66,,,60,,,3,,,5,,,30,,,50,,,8050,,,5050,,\n1970,2,324,,100,4.54,1,0,2,100,2,60,5,50,5050,1,,,1,,,66,,,66,,,3,,,3,,,30,,,30,,,8050,,,8050,,\n1970,2,325,,100,4.54,1,0,1,100,1,40,2,25,7050,1,,,2,,,40,,,34,,,2,,,4,,,25,,,40,,,7050,,,6050,,\n1970,2,325,,100,4.54,1,0,2,100,2,34,4,40,6050,1,,,1,,,40,,,40,,,2,,,2,,,25,,,25,,,7050,,,7050,,\n1970,2,325,,100,4.54,1,0,3,100,2,16,3,30,3850,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,\n1970,2,325,,100,4.54,1,0,4,100,1,15,2,25,0,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,\n1970,2,325,,100,4.54,1,0,5,100,2,13,2,25,9999999,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,\n1970,2,325,,100,4.54,1,0,6,100,2,13,2,22,9999999,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,\n1970,2,325,,100,4.54,1,0,7,100,2,0,0,1,9999999,1,2,1,,,,40,34,40,,,,2,4,2,,,,25,40,25,,,,7050,6050,7050,,,\n1970,2,326,,100,4.54,1,0,1,100,1,37,6,60,4850,1,,,2,,,37,,,40,,,6,,,5,,,60,,,50,,,4850,,,0,,\n1970,2,326,,100,4.54,1,0,2,100,2,40,5,50,0,1,,,1,,,37,,,37,,,6,,,6,,,60,,,60,,,4850,,,4850,,\n1970,2,326,,100,4.54,1,0,3,100,2,19,6,60,2650,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,\n1970,2,326,,100,4.54,1,0,4,100,2,17,5,50,0,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,\n1970,2,326,,100,4.54,1,0,5,100,2,15,2,26,0,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,\n1970,2,326,,100,4.54,1,0,6,100,1,14,2,25,0,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,\n1970,2,326,,100,4.54,1,0,7,100,2,13,2,23,9999999,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,\n1970,2,326,,100,4.54,1,0,8,100,2,12,2,22,9999999,1,2,1,,,,37,40,37,,,,6,5,6,,,,60,50,60,,,,4850,0,4850,,,\n1970,2,326,,100,4.54,1,0,9,100,1,1,0,1,9999999,1,2,,,,,37,19,,,,,6,6,,,,,60,60,,,,,4850,2650,,,,\n1970,2,327,,100,4.54,1,0,1,100,2,53,2,23,550,2,,,,,,53,,,,,,2,,,,,,23,,,,,,550,,,,,\n1970,2,329,,100,4.54,1,0,1,100,1,33,1,17,7050,1,,,2,,,33,,,27,,,1,,,2,,,17,,,23,,,7050,,,2050,,\n1970,2,329,,100,4.54,1,0,2,100,2,27,2,23,2050,1,,,1,,,33,,,33,,,1,,,1,,,17,,,17,,,7050,,,7050,,\n1970,2,329,,100,4.54,1,0,3,100,2,0,0,1,9999999,1,2,1,,,,33,27,33,,,,1,2,1,,,,17,23,17,,,,7050,2050,7050,,,\n1970,2,330,,100,4.54,1,0,1,100,1,29,3,30,9550,1,,,2,,,29,,,24,,,3,,,3,,,30,,,30,,,9550,,,0,,\n1970,2,330,,100,4.54,1,0,2,100,2,24,3,30,0,1,,,1,,,29,,,29,,,3,,,3,,,30,,,30,,,9550,,,9550,,\n1970,2,330,,100,4.54,1,0,3,100,2,3,0,2,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,\n1970,2,330,,100,4.54,1,0,4,100,2,2,0,1,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,\n1970,2,330,,100,4.54,1,0,5,100,1,0,0,1,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,\n1970,2,330,,100,4.54,1,0,6,100,1,0,0,1,9999999,1,2,1,,,,29,24,29,,,,3,3,3,,,,30,30,30,,,,9550,0,9550,,,\n1970,2,331,,100,4.54,1,0,1,100,2,45,1,17,0,2,,,,,,45,,,,,,1,,,,,,17,,,,,,0,,,,,\n1970,2,331,,100,4.54,1,0,2,100,1,45,1,16,6150,2,,,,,,45,,,,,,1,,,,,,17,,,,,,0,,,,,\n1970,2,331,,100,4.54,1,0,3,100,2,20,2,22,4150,2,2,,,,,45,45,,,,,1,1,,,,,17,17,,,,,0,0,,,,\n1970,2,332,,100,4.54,1,0,1,100,1,39,6,60,6550,1,,,,,,39,,,,,,6,,,,,,60,,,,,,6550,,,,,\n1970,2,333,,100,4.54,1,0,1,100,1,19,6,65,1750,1,,,,,,19,,,,,,6,,,,,,65,,,,,,1750,,,,,\n1970,2,333,,100,4.54,1,0,2,100,1,18,7,70,2050,1,,,,,,19,,,,,,6,,,,,,65,,,,,,1750,,,,,\n1970,2,333,,100,4.54,1,0,3,100,1,21,9,90,1950,1,,,,,,19,,,,,,6,,,,,,65,,,,,,1750,,,,,\n1970,2,334,,100,4.54,1,0,1,100,1,40,3,30,9450,1,,,2,,,40,,,42,,,3,,,2,,,30,,,26,,,9450,,,0,,\n1970,2,334,,100,4.54,1,0,2,100,2,42,2,26,0,1,,,1,,,40,,,40,,,3,,,3,,,30,,,30,,,9450,,,9450,,\n1970,2,336,,100,4.54,1,0,1,100,2,29,5,50,3750,2,,,,,,29,,,,,,5,,,,,,50,,,,,,3750,,,,,\n1970,2,336,,100,4.54,1,0,2,100,2,5,0,2,9999999,2,2,,,,,29,29,,,,,5,5,,,,,50,50,,,,,3750,3750,,,,\n1970,2,336,,100,4.54,1,0,3,100,2,4,0,2,9999999,2,2,,,,,29,29,,,,,5,5,,,,,50,50,,,,,3750,3750,,,,\n1970,2,337,,100,4.54,1,0,1,100,1,48,11,110,15050,1,,,2,,,48,,,49,,,11,,,2,,,110,,,23,,,15050,,,0,,\n1970,2,337,,100,4.54,1,0,2,100,2,49,2,23,0,1,,,1,,,48,,,48,,,11,,,11,,,110,,,110,,,15050,,,15050,,\n1970,2,337,,100,4.54,1,0,3,100,1,18,6,60,1750,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,\n1970,2,337,,100,4.54,1,0,4,100,1,17,5,50,1050,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,\n1970,2,337,,100,4.54,1,0,5,100,1,14,2,25,50,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,\n1970,2,337,,100,4.54,1,0,6,100,2,11,2,22,9999999,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,\n1970,2,337,,100,4.54,1,0,7,100,1,10,1,17,9999999,1,2,1,,,,48,49,48,,,,11,2,11,,,,110,23,110,,,,15050,0,15050,,,\n1970,2,338,,100,4.54,1,0,1,100,1,57,9,90,10050,1,,,2,,,57,,,49,,,9,,,6,,,90,,,60,,,10050,,,250,,\n1970,2,338,,100,4.54,1,0,2,100,2,49,6,60,250,1,,,1,,,57,,,57,,,9,,,9,,,90,,,90,,,10050,,,10050,,\n1970,2,338,,100,4.54,1,0,3,100,1,16,4,40,1450,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,\n1970,2,338,,100,4.54,1,0,4,100,2,14,2,26,0,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,\n1970,2,338,,100,4.54,1,0,5,100,1,11,2,22,9999999,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,\n1970,2,338,,100,4.54,1,0,6,100,1,6,1,12,9999999,1,2,1,,,,57,49,57,,,,9,6,9,,,,90,60,90,,,,10050,250,10050,,,\n1970,2,339,,100,4.54,1,0,1,100,1,30,6,60,12050,1,,,2,,,30,,,28,,,6,,,6,,,60,,,60,,,12050,,,0,,\n1970,2,339,,100,4.54,1,0,2,100,2,28,6,60,0,1,,,1,,,30,,,30,,,6,,,6,,,60,,,60,,,12050,,,12050,,\n1970,2,339,,100,4.54,1,0,3,100,1,7,1,14,9999999,1,2,1,,,,30,28,30,,,,6,6,6,,,,60,60,60,,,,12050,0,12050,,,\n1970,2,339,,100,4.54,1,0,4,100,1,5,1,11,9999999,1,2,1,,,,30,28,30,,,,6,6,6,,,,60,60,60,,,,12050,0,12050,,,\n1970,2,339,,100,4.54,1,0,5,100,2,0,0,1,9999999,1,2,1,,,,30,28,30,,,,6,6,6,,,,60,60,60,,,,12050,0,12050,,,\n1970,2,340,,100,4.54,1,0,1,100,1,57,3,30,5250,1,,,2,,,57,,,62,,,3,,,2,,,30,,,26,,,5250,,,0,,\n1970,2,340,,100,4.54,1,0,2,100,2,62,2,26,0,1,,,1,,,57,,,57,,,3,,,3,,,30,,,30,,,5250,,,5250,,\n1970,2,341,,100,4.54,1,0,1,100,1,73,1,16,1750,1,,,2,,,73,,,67,,,1,,,1,,,16,,,16,,,1750,,,750,,\n1970,2,341,,100,4.54,1,0,2,100,2,67,1,16,750,1,,,1,,,73,,,73,,,1,,,1,,,16,,,16,,,1750,,,1750,,\n1970,2,342,,100,4.54,1,0,1,100,1,29,6,60,7050,1,,,2,,,29,,,21,,,6,,,5,,,60,,,50,,,7050,,,3350,,\n1970,2,342,,100,4.54,1,0,2,100,2,21,5,50,3350,1,,,1,,,29,,,29,,,6,,,6,,,60,,,60,,,7050,,,7050,,\n1970,2,342,,100,4.54,1,0,3,100,1,0,0,1,9999999,1,2,1,,,,29,21,29,,,,6,5,6,,,,60,50,60,,,,7050,3350,7050,,,\n1970,2,343,,100,4.54,1,0,1,100,1,36,11,111,24250,1,,,2,,,36,,,30,,,11,,,10,,,111,,,100,,,24250,,,7050,,\n1970,2,343,,100,4.54,1,0,2,100,2,30,10,100,7050,1,,,1,,,36,,,36,,,11,,,11,,,111,,,111,,,24250,,,24250,,\n1970,2,343,,100,4.54,1,0,3,100,2,7,1,14,9999999,1,2,1,,,,36,30,36,,,,11,10,11,,,,111,100,111,,,,24250,7050,24250,,,\n1970,2,343,,100,4.54,1,0,4,100,2,3,0,2,9999999,1,2,1,,,,36,30,36,,,,11,10,11,,,,111,100,111,,,,24250,7050,24250,,,\n1970,2,344,,100,4.54,1,0,1,100,1,24,11,111,250,1,,,,,,24,,,,,,11,,,,,,111,,,,,,250,,,,,\n1970,2,344,,100,4.54,1,0,2,100,1,6,1,12,9999999,1,,1,,,,24,,24,,,,11,,11,,,,111,,111,,,,250,,250,,,\n1970,2,344,,100,4.54,1,0,3,100,1,8,1,15,9999999,1,,1,,,,24,,24,,,,11,,11,,,,111,,111,,,,250,,250,,,\n1970,2,345,,100,4.54,1,0,1,100,1,58,8,80,11850,1,,,2,,,58,,,57,,,8,,,6,,,80,,,60,,,11850,,,6450,,\n1970,2,345,,100,4.54,1,0,2,100,2,57,6,60,6450,1,,,1,,,58,,,58,,,8,,,8,,,80,,,80,,,11850,,,11850,,\n1970,2,345,,100,4.54,1,0,3,100,2,28,7,70,6150,1,2,1,,,,58,57,58,,,,8,6,8,,,,80,60,80,,,,11850,6450,11850,,,\n1970,2,346,,100,4.54,1,0,1,100,1,39,6,60,12050,1,,,2,,,39,,,33,,,6,,,6,,,60,,,60,,,12050,,,3050,,\n1970,2,346,,100,4.54,1,0,2,100,2,33,6,60,3050,1,,,1,,,39,,,39,,,6,,,6,,,60,,,60,,,12050,,,12050,,\n1970,2,346,,100,4.54,1,0,3,100,1,16,3,30,0,1,2,1,,,,39,33,39,,,,6,6,6,,,,60,60,60,,,,12050,3050,12050,,,\n1970,2,346,,100,4.54,1,0,4,100,1,14,2,25,0,1,2,1,,,,39,33,39,,,,6,6,6,,,,60,60,60,,,,12050,3050,12050,,,\n1970,2,346,,100,4.54,1,0,5,100,1,4,1,11,9999999,1,2,1,,,,39,33,39,,,,6,6,6,,,,60,60,60,,,,12050,3050,12050,,,\n1970,2,347,,100,4.54,1,0,1,100,1,38,11,111,46550,1,,,2,,,38,,,33,,,11,,,6,,,111,,,60,,,46550,,,0,,\n1970,2,347,,100,4.54,1,0,2,100,2,33,6,60,0,1,,,1,,,38,,,38,,,11,,,11,,,111,,,111,,,46550,,,46550,,\n1970,2,347,,100,4.54,1,0,3,100,1,10,1,16,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,\n1970,2,347,,100,4.54,1,0,4,100,1,9,1,15,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,\n1970,2,347,,100,4.54,1,0,5,100,2,7,1,14,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,\n1970,2,347,,100,4.54,1,0,6,100,1,5,1,11,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,\n1970,2,347,,100,4.54,1,0,7,100,2,4,0,2,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,\n1970,2,347,,100,4.54,1,0,8,100,1,1,0,1,9999999,1,2,1,,,,38,33,38,,,,11,6,11,,,,111,60,111,,,,46550,0,46550,,,\n1970,2,348,,100,4.54,1,0,1,100,2,57,8,80,9050,2,,,,,,57,,,,,,8,,,,,,80,,,,,,9050,,,,,\n"
  },
  {
    "path": "examples/data/nyc-taxi_1k.csv",
    "content": "1460000001,2,2017-12-15 00:00:28,2017-12-15 00:15:43,N,1,,,,,2,1.50,11,0.5,0.5,1.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000002,2,2017-12-15 00:33:12,2017-12-15 00:51:04,N,1,,,,,3,2.53,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000003,2,2017-12-15 00:56:59,2017-12-15 00:59:51,N,1,,,,,3,0.06,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000004,2,2017-12-15 00:09:19,2017-12-15 00:18:54,N,1,,,,,5,1.47,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000005,2,2017-12-15 00:21:02,2017-12-15 00:25:30,N,1,,,,,5,0.23,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000006,2,2017-12-15 00:35:38,2017-12-15 01:26:19,N,1,,,,,5,18.48,58,0.5,0.5,9.76,5.76,,0.3,76.77,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000007,1,2017-12-15 00:48:29,2017-12-15 01:01:42,N,1,,,,,1,2.60,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000008,2,2017-12-15 00:09:01,2017-12-15 00:11:57,N,1,,,,,1,0.85,4.5,0.5,0.5,0,0,,0.3,5.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000009,2,2017-12-15 00:32:00,2017-12-15 00:35:42,N,1,,,,,1,0.48,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000010,2,2017-12-15 00:42:13,2017-12-15 00:54:48,N,1,,,,,1,2.09,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000011,2,2017-12-15 00:05:46,2017-12-15 00:09:49,N,1,,,,,3,0.60,4.5,0.5,0.5,0.87,0,,0.3,6.67,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000012,2,2017-12-15 00:10:48,2017-12-15 00:20:24,N,1,,,,,3,0.97,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000013,2,2017-12-15 00:26:39,2017-12-15 00:39:18,N,1,,,,,3,1.82,9.5,0.5,0.5,3,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000014,2,2017-12-15 00:40:49,2017-12-15 01:16:48,N,1,,,,,3,8.99,31,0.5,0.5,6.46,0,,0.3,38.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000015,2,2017-12-15 00:24:04,2017-12-15 00:34:56,N,1,,,,,1,5.04,16,0.5,0.5,5.19,0,,0.3,22.49,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000016,1,2017-12-15 00:06:12,2017-12-15 00:35:33,N,1,,,,,1,5.30,21.5,0.5,0.5,1.5,0,,0.3,24.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000017,1,2017-12-15 00:38:38,2017-12-15 00:54:18,N,1,,,,,1,0.90,10.5,0.5,0.5,1,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000018,1,2017-12-15 00:33:57,2017-12-15 00:47:58,N,1,,,,,1,2.10,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000019,1,2017-12-15 00:00:23,2017-12-15 00:13:38,N,1,,,,,1,1.40,9.5,0.5,0.5,1.5,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000020,1,2017-12-15 00:21:30,2017-12-15 00:31:23,N,1,,,,,1,1.50,8,0.5,0.5,2.3,0,,0.3,11.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000021,1,2017-12-15 00:47:17,2017-12-15 01:02:26,N,1,,,,,1,2.80,12,0.5,0.5,1,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000022,1,2017-12-15 00:22:56,2017-12-15 00:29:31,N,1,,,,,1,1.50,7,0.5,0.5,3.5,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000023,1,2017-12-15 00:44:16,2017-12-15 00:50:13,N,1,,,,,1,0.90,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000024,2,2017-12-15 00:21:58,2017-12-15 00:44:00,N,1,,,,,1,13.41,37,0.5,0.5,0,0,,0.3,38.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000025,2,2017-12-15 00:18:22,2017-12-15 00:33:48,N,1,,,,,1,1.71,11,0.5,0.5,0,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000026,2,2017-12-15 00:39:06,2017-12-15 00:52:25,N,1,,,,,1,1.01,9,0.5,0.5,1.54,0,,0.3,11.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000027,2,2017-12-15 01:00:32,2017-12-15 01:20:29,N,1,,,,,1,3.69,16,0.5,0.5,2.6,0,,0.3,19.9,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000028,1,2017-12-15 00:09:13,2017-12-15 00:25:51,N,1,,,,,1,1.90,12,0.5,0.5,1,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000029,1,2017-12-15 00:27:17,2017-12-15 00:35:39,N,1,,,,,1,1.00,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000030,1,2017-12-15 00:37:01,2017-12-15 00:41:47,N,1,,,,,1,0.60,5,0.5,0.5,1.25,0,,0.3,7.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000031,1,2017-12-15 00:44:51,2017-12-15 00:52:31,N,1,,,,,1,1.80,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000032,1,2017-12-15 00:58:04,2017-12-15 01:42:27,N,1,,,,,1,12.10,41,0.5,0.5,2,0,,0.3,44.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000033,1,2017-12-15 00:04:01,2017-12-15 00:20:26,N,1,,,,,1,1.70,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000034,1,2017-12-15 00:21:36,2017-12-15 00:23:09,N,1,,,,,1,0.20,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000035,1,2017-12-15 00:24:05,2017-12-15 00:38:33,N,1,,,,,1,3.10,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000036,1,2017-12-15 00:49:14,2017-12-15 01:05:23,N,1,,,,,1,3.90,15,0.5,0.5,3.25,0,,0.3,19.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000037,1,2017-12-15 00:19:03,2017-12-15 00:44:02,N,1,,,,,1,3.50,17,0.5,0.5,2.75,0,,0.3,21.05,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000038,1,2017-12-15 00:47:45,2017-12-15 01:01:16,N,1,,,,,1,4.90,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000039,2,2017-12-15 00:23:06,2017-12-15 00:24:06,N,1,,,,,1,0.36,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000040,2,2017-12-15 00:27:28,2017-12-15 00:30:57,N,1,,,,,1,0.99,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000041,2,2017-12-15 00:18:20,2017-12-15 00:41:29,N,1,,,,,1,3.74,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000042,1,2017-12-15 00:08:31,2017-12-15 00:11:07,N,1,,,,,1,0.50,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000043,1,2017-12-15 00:13:03,2017-12-15 00:33:47,N,1,,,,,1,7.20,22.5,0.5,0.5,0,5.76,,0.3,29.56,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000044,2,2017-12-15 00:30:10,2017-12-15 00:45:02,N,1,,,,,1,3.02,12.5,0.5,0.5,0,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000045,2,2017-12-15 00:49:42,2017-12-15 01:13:51,N,1,,,,,1,5.27,20,0.5,0.5,2,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000046,2,2017-12-15 00:05:58,2017-12-15 00:25:43,N,1,,,,,1,4.15,17,0.5,0.5,4.58,0,,0.3,22.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000047,2,2017-12-15 00:37:28,2017-12-15 00:42:26,N,1,,,,,1,1.00,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000048,1,2017-12-15 00:15:17,2017-12-15 00:37:01,N,1,,,,,1,4.00,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000049,1,2017-12-15 00:42:53,2017-12-15 00:59:39,N,1,,,,,1,3.30,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000050,2,2017-12-15 00:33:52,2017-12-15 00:40:49,N,1,,,,,1,0.40,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000051,2,2017-12-15 00:46:35,2017-12-15 00:56:49,N,1,,,,,1,1.26,8.5,0.5,0.5,2,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000052,1,2017-12-15 00:06:04,2017-12-15 00:31:04,N,1,,,,,1,4.50,20,0.5,0.5,4.26,0,,0.3,25.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000053,2,2017-12-15 00:30:11,2017-12-15 00:34:09,N,1,,,,,3,1.14,5.5,0.5,0.5,2.04,0,,0.3,8.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000054,2,2017-12-15 00:45:10,2017-12-15 01:11:33,N,1,,,,,5,10.50,31.5,0.5,0.5,0,5.76,,0.3,38.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000055,2,2017-12-15 00:31:27,2017-12-15 00:53:23,N,1,,,,,2,4.40,18,0.5,0.5,4.82,0,,0.3,24.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000056,2,2017-12-15 00:23:15,2017-12-15 00:33:59,N,1,,,,,1,1.26,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000057,1,2017-12-15 00:18:49,2017-12-15 00:25:53,N,1,,,,,2,1.10,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000058,1,2017-12-15 00:28:53,2017-12-15 00:35:26,N,1,,,,,1,0.80,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000059,1,2017-12-15 00:36:04,2017-12-15 00:58:54,N,1,,,,,1,4.00,17,0.5,0.5,3.65,0,,0.3,21.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000060,1,2017-12-15 00:52:09,2017-12-15 00:59:47,N,1,,,,,1,1.00,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000061,2,2017-12-15 00:07:57,2017-12-15 00:25:09,N,1,,,,,1,2.79,13,0.5,0.5,0,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000062,2,2017-12-15 00:26:09,2017-12-15 00:29:52,N,1,,,,,1,0.68,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000063,2,2017-12-15 00:31:59,2017-12-15 00:40:18,N,1,,,,,1,1.31,7.5,0.5,0.5,1.25,0,,0.3,10.05,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000064,2,2017-12-15 00:49:06,2017-12-15 01:07:13,N,1,,,,,1,4.21,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000065,2,2017-12-15 00:56:03,2017-12-15 01:37:49,N,1,,,,,1,22.40,62.5,0.5,0.5,17.39,5.76,,0.3,86.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000066,1,2017-12-15 00:01:19,2017-12-15 00:12:58,N,1,,,,,1,1.80,10,0.5,0.5,1,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000067,1,2017-12-15 00:18:24,2017-12-15 01:04:16,N,5,,,,,1,5.00,0,0,0,0,10.5,,0.3,10.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000068,2,2017-12-15 00:08:27,2017-12-15 00:38:37,N,1,,,,,1,4.88,21.5,0.5,0.5,2.28,0,,0.3,25.08,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000069,2,2017-12-15 00:42:27,2017-12-15 00:53:50,N,1,,,,,1,5.28,17,0.5,0.5,4.81,5.76,,0.3,28.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000070,2,2017-12-15 00:05:56,2017-12-15 00:22:42,N,1,,,,,2,2.29,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000071,2,2017-12-15 00:27:09,2017-12-15 00:31:47,N,1,,,,,2,0.77,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000072,2,2017-12-15 00:42:03,2017-12-15 01:00:47,N,1,,,,,2,3.28,14.5,0.5,0.5,3.95,0,,0.3,19.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000073,1,2017-12-15 00:45:21,2017-12-15 00:50:56,N,1,,,,,1,1.50,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000074,2,2017-12-15 00:08:09,2017-12-15 00:21:11,N,1,,,,,5,1.97,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000075,2,2017-12-15 00:21:48,2017-12-15 00:57:13,N,1,,,,,5,5.40,24,0.5,0.5,0,0,,0.3,25.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000076,1,2017-12-15 00:09:11,2017-12-15 00:35:18,N,1,,,,,4,5.20,21.5,0.5,0.5,0,0,,0.3,22.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000077,1,2017-12-15 00:42:08,2017-12-15 00:42:09,N,1,,,,,1,5.30,2.5,0.5,0.5,0,0,,0.3,3.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000078,1,2017-12-15 00:45:28,2017-12-15 01:05:27,N,1,,,,,1,6.90,22.5,0.5,0.5,3,5.76,,0.3,32.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000079,2,2017-12-15 00:03:48,2017-12-15 00:38:38,N,1,,,,,6,7.86,29,0.5,0.5,9.09,0,,0.3,39.39,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000080,1,2017-12-15 00:25:04,2017-12-15 00:28:18,N,1,,,,,2,0.60,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000081,1,2017-12-15 00:34:23,2017-12-15 00:59:40,N,1,,,,,1,3.60,17.5,0.5,0.5,3,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000082,2,2017-12-15 00:53:37,2017-12-15 01:11:06,N,1,,,,,1,2.77,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000083,2,2017-12-15 00:09:00,2017-12-15 00:42:14,N,1,,,,,1,4.94,24.5,0.5,0.5,5.16,0,,0.3,30.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000084,1,2017-12-15 00:00:49,2017-12-15 00:07:51,N,1,,,,,1,0.50,6,0.5,0.5,1,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000085,1,2017-12-15 00:22:47,2017-12-15 00:30:21,N,1,,,,,4,0.60,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000086,1,2017-12-15 00:32:04,2017-12-15 00:39:38,N,1,,,,,1,0.90,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000087,1,2017-12-15 00:50:23,2017-12-15 01:05:29,N,1,,,,,1,3.00,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000088,2,2017-12-15 00:08:01,2017-12-15 00:25:35,N,1,,,,,1,2.80,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000089,2,2017-12-15 00:27:27,2017-12-15 00:55:33,N,1,,,,,1,4.05,19.5,0.5,0.5,5.2,0,,0.3,26,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000090,2,2017-12-15 00:11:22,2017-12-15 00:37:10,N,1,,,,,2,5.04,19.5,0.5,0.5,0,0,,0.3,20.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000091,2,2017-12-15 01:00:50,2017-12-15 01:29:18,N,1,,,,,2,7.11,24.5,0.5,0.5,0,0,,0.3,25.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000092,1,2017-12-15 00:26:01,2017-12-15 00:48:49,N,1,,,,,1,14.60,40,0.5,0.5,8.25,0,,0.3,49.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000093,1,2017-12-15 00:10:53,2017-12-15 00:48:57,N,1,,,,,1,14.30,43.5,0.5,0.5,8.95,0,,0.3,53.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000094,2,2017-12-15 00:31:37,2017-12-15 00:37:18,N,1,,,,,1,0.48,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000095,2,2017-12-15 00:41:42,2017-12-15 00:51:54,N,1,,,,,1,1.66,9,0.5,0.5,1,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000096,2,2017-12-15 00:53:10,2017-12-15 01:12:40,N,1,,,,,1,3.08,15.5,0.5,0.5,1,0,,0.3,17.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000097,2,2017-12-15 00:17:32,2017-12-15 00:30:33,N,1,,,,,5,1.14,9,0.5,0.5,1,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000098,2,2017-12-15 00:57:51,2017-12-15 01:11:23,N,1,,,,,5,2.01,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000099,1,2017-12-15 00:43:52,2017-12-15 00:52:43,N,1,,,,,4,1.60,8,0.5,0.5,0,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000100,1,2017-12-15 00:02:26,2017-12-15 00:13:59,N,1,,,,,1,1.10,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000101,1,2017-12-15 00:14:19,2017-12-15 00:56:54,N,5,,,,,1,18.50,100,0,0,15,0,,0.3,115.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000102,1,2017-12-15 00:40:45,2017-12-15 00:54:30,N,1,,,,,1,2.10,12,0.5,0.5,2.2,0,,0.3,15.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000103,2,2017-12-15 00:37:36,2017-12-15 00:53:05,N,1,,,,,1,3.60,14,0.5,0.5,2,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000104,2,2017-12-15 00:53:59,2017-12-15 00:55:30,N,1,,,,,1,0.22,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000105,1,2017-12-15 00:36:23,2017-12-15 00:51:06,N,1,,,,,1,2.30,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000106,2,2017-12-15 00:19:27,2017-12-15 00:31:53,N,1,,,,,6,1.49,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000107,2,2017-12-15 00:39:17,2017-12-15 00:44:01,N,1,,,,,6,0.79,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000108,1,2017-12-15 00:09:38,2017-12-15 00:47:23,N,1,,,,,2,17.90,51,0.5,0.5,1,17.28,,0.3,70.58,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000109,1,2017-12-15 00:49:21,2017-12-15 01:11:02,N,1,,,,,1,14.00,38,0.5,0.5,1,0,,0.3,40.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000110,2,2017-12-15 00:32:24,2017-12-15 00:38:56,N,1,,,,,1,0.80,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000111,2,2017-12-15 00:40:45,2017-12-15 00:55:44,N,1,,,,,1,1.54,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000112,1,2017-12-15 00:34:39,2017-12-15 00:41:03,N,1,,,,,1,1.00,6.5,0.5,0.5,1.95,0,,0.3,9.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000113,1,2017-12-15 00:43:04,2017-12-15 00:58:19,N,1,,,,,1,2.80,12.5,0.5,0.5,2.75,0,,0.3,16.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000114,2,2017-12-15 00:32:50,2017-12-15 00:45:30,N,1,,,,,1,1.56,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000115,2,2017-12-15 00:50:04,2017-12-15 00:59:07,N,1,,,,,1,1.70,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000116,1,2017-12-15 00:31:07,2017-12-15 00:35:35,N,1,,,,,1,0.60,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000117,1,2017-12-15 00:44:08,2017-12-15 01:12:08,N,1,,,,,1,7.30,26.5,0.5,0.5,0,0,,0.3,27.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000118,2,2017-12-15 00:40:24,2017-12-15 00:45:14,N,1,,,,,1,1.47,6.5,0.5,0.5,0,0,,0.3,7.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000119,2,2017-12-15 00:46:19,2017-12-15 01:02:19,N,1,,,,,2,1.87,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000120,2,2017-12-15 00:02:07,2017-12-15 00:04:51,N,1,,,,,1,0.36,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000121,2,2017-12-15 00:22:58,2017-12-15 00:34:06,N,1,,,,,2,0.73,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000122,2,2017-12-15 00:41:31,2017-12-15 00:48:10,N,1,,,,,1,0.82,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000123,2,2017-12-15 00:52:19,2017-12-15 01:01:01,N,1,,,,,1,1.54,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000124,1,2017-12-15 00:31:59,2017-12-15 00:44:42,N,1,,,,,0,2.20,10.5,0.5,0.5,2,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000125,2,2017-12-15 00:38:43,2017-12-15 00:44:54,N,1,,,,,1,1.44,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000126,2,2017-12-15 00:51:10,2017-12-15 01:05:47,N,1,,,,,5,2.75,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000127,2,2017-12-15 00:09:57,2017-12-15 00:25:53,N,1,,,,,1,2.61,12.5,0.5,0.5,2.76,0,,0.3,16.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000128,2,2017-12-15 00:27:34,2017-12-15 00:44:22,N,1,,,,,1,7.11,22.5,0.5,0.5,3.8,0,,0.3,27.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000129,2,2017-12-15 00:29:13,2017-12-15 00:53:56,N,1,,,,,2,9.73,30,0.5,0.5,0,0,,0.3,31.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000130,2,2017-12-15 00:28:32,2017-12-15 00:33:57,N,1,,,,,6,0.59,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000131,2,2017-12-15 00:37:51,2017-12-15 01:55:38,N,1,,,,,6,15.34,58,0.5,0.5,0,0,,0.3,59.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000132,2,2017-12-15 00:04:24,2017-12-15 00:13:56,N,1,,,,,3,1.46,8,0.5,0.5,2,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000133,2,2017-12-15 00:38:48,2017-12-15 01:01:48,N,1,,,,,2,2.72,15.5,0.5,0.5,4.2,0,,0.3,21,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000134,1,2017-12-15 00:33:15,2017-12-15 00:47:55,N,1,,,,,1,9.80,27,0.5,0.5,8,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000135,2,2017-12-15 00:41:39,2017-12-15 01:14:47,N,1,,,,,1,8.90,30.5,0.5,0.5,0,0,,0.3,31.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000136,1,2017-12-15 00:50:41,2017-12-15 00:59:29,N,1,,,,,1,1.30,8,0.5,0.5,1.5,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000137,2,2017-12-15 00:49:15,2017-12-15 00:58:07,N,1,,,,,4,1.87,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000138,2,2017-12-14 23:51:20,2017-12-15 00:50:09,N,1,,,,,1,10.08,43.5,0.5,0.5,8.96,0,,0.3,53.76,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000139,2,2017-12-15 00:05:31,2017-12-15 00:19:55,N,1,,,,,5,2.97,12,0.5,0.5,2.66,0,,0.3,15.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000140,2,2017-12-15 00:29:06,2017-12-15 01:09:26,N,1,,,,,3,13.26,41.5,0.5,0.5,0,0,,0.3,42.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000141,2,2017-12-15 00:26:34,2017-12-15 00:51:30,N,1,,,,,1,5.36,21,0.5,0.5,2,0,,0.3,24.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000142,2,2017-12-15 00:04:47,2017-12-15 00:23:09,N,1,,,,,1,2.68,13.5,0.5,0.5,4.44,0,,0.3,19.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000143,2,2017-12-15 00:31:16,2017-12-15 00:44:45,N,1,,,,,1,2.40,11,0.5,0.5,1,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000144,2,2017-12-15 00:49:37,2017-12-15 01:21:04,N,1,,,,,1,6.83,25.5,0.5,0.5,5.36,0,,0.3,32.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000145,2,2017-12-15 00:10:01,2017-12-15 00:16:49,N,1,,,,,1,0.91,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000146,2,2017-12-15 00:19:03,2017-12-15 00:30:11,N,1,,,,,1,1.81,9.5,0.5,0.5,2,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000147,2,2017-12-15 00:31:07,2017-12-15 00:34:53,N,1,,,,,1,0.92,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000148,2,2017-12-15 00:43:48,2017-12-15 01:04:53,N,1,,,,,1,4.63,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000149,1,2017-12-15 00:22:11,2017-12-15 00:32:11,N,1,,,,,1,2.20,10,0.5,0.5,2.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000150,1,2017-12-15 00:48:02,2017-12-15 01:01:45,N,1,,,,,1,1.30,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000151,2,2017-12-15 00:05:43,2017-12-15 00:12:57,N,1,,,,,1,2.05,9,0.5,0.5,2.06,0,,0.3,14.31,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000152,2,2017-12-15 00:32:48,2017-12-15 00:55:01,N,1,,,,,1,9.23,28,0.5,0.5,6,5.76,,0.3,41.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000153,2,2017-12-15 01:00:10,2017-12-15 01:16:04,N,1,,,,,1,4.37,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000154,2,2017-12-15 00:08:13,2017-12-15 00:28:37,N,1,,,,,1,3.22,15,0.5,0.5,0,0,,0.3,16.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000155,2,2017-12-15 00:31:00,2017-12-15 00:41:23,N,1,,,,,1,1.60,8.5,0.5,0.5,0,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000156,2,2017-12-15 00:42:50,2017-12-15 00:53:34,N,1,,,,,1,1.37,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000157,2,2017-12-15 00:55:45,2017-12-15 00:59:31,N,1,,,,,1,1.08,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000158,2,2017-12-15 00:09:20,2017-12-15 00:23:24,N,1,,,,,1,2.62,11.5,0.5,0.5,3.2,0,,0.3,16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000159,2,2017-12-15 00:24:46,2017-12-15 00:36:33,N,1,,,,,1,1.35,9.5,0.5,0.5,0,0,,0.3,10.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000160,2,2017-12-15 00:40:16,2017-12-15 00:57:48,N,1,,,,,1,3.03,13.5,0.5,0.5,1,0,,0.3,15.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000161,1,2017-12-15 00:00:35,2017-12-15 00:12:21,N,1,,,,,1,2.00,9.5,0.5,0.5,3.2,0,,0.3,14,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000162,1,2017-12-15 00:13:48,2017-12-15 00:20:03,N,1,,,,,1,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000163,1,2017-12-15 00:20:47,2017-12-15 00:31:56,N,1,,,,,1,1.70,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000164,1,2017-12-15 00:33:29,2017-12-15 01:02:24,N,1,,,,,1,9.00,29,0.5,0.5,7.55,0,,0.3,37.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000165,1,2017-12-15 00:09:42,2017-12-15 00:18:12,N,1,,,,,2,2.40,9,0.5,0.5,0,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000166,1,2017-12-15 00:57:43,2017-12-15 01:16:05,N,1,,,,,1,3.30,13.5,0.5,0.5,2.95,0,,0.3,17.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000167,2,2017-12-15 00:09:55,2017-12-15 00:19:30,N,1,,,,,1,1.45,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000168,2,2017-12-15 00:20:11,2017-12-15 00:33:01,N,1,,,,,1,2.18,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000169,2,2017-12-15 00:35:09,2017-12-15 01:09:59,N,1,,,,,1,17.48,51,0.5,0.5,0,5.76,,0.3,58.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000170,2,2017-12-15 00:26:34,2017-12-15 00:41:45,N,1,,,,,1,1.81,11,0.5,0.5,3.69,0,,0.3,15.99,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000171,2,2017-12-15 00:46:38,2017-12-15 01:08:50,N,1,,,,,1,3.31,16,0.5,0.5,3.46,0,,0.3,20.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000172,2,2017-12-15 00:19:37,2017-12-15 00:35:33,N,1,,,,,2,1.61,10.5,0.5,0.5,5,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000173,2,2017-12-15 00:38:17,2017-12-15 00:45:48,N,1,,,,,2,1.01,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000174,2,2017-12-15 00:58:49,2017-12-15 01:35:08,N,1,,,,,2,5.57,26.5,0.5,0.5,5.56,0,,0.3,33.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000175,1,2017-12-15 00:17:10,2017-12-15 00:52:01,N,1,,,,,1,4.00,23,0.5,0.5,6,5.76,,0.3,36.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000176,2,2017-12-15 00:51:45,2017-12-15 01:16:36,N,2,,,,,2,16.55,52,0,0.5,10.56,0,,0.3,63.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000177,2,2017-12-15 00:53:53,2017-12-15 00:59:08,N,1,,,,,3,1.09,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000178,2,2017-12-15 00:17:56,2017-12-15 00:41:33,N,1,,,,,1,5.12,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000179,2,2017-12-15 00:45:41,2017-12-15 00:58:18,N,1,,,,,1,2.83,11.5,0.5,0.5,3.2,0,,0.3,16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000180,2,2017-12-15 00:01:12,2017-12-15 00:11:38,N,1,,,,,1,1.65,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000181,2,2017-12-15 00:12:58,2017-12-15 00:19:54,N,1,,,,,1,0.89,6.5,0.5,0.5,1.95,0,,0.3,9.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000182,2,2017-12-15 00:24:50,2017-12-15 00:26:01,N,5,,,,,1,0.00,100,0,0.5,8.2,0,,0.3,109,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000183,1,2017-12-15 00:32:04,2017-12-15 01:01:36,N,2,,,,,3,20.80,52,0,0.5,0,5.76,,0.3,58.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000184,2,2017-12-15 00:18:06,2017-12-15 01:07:36,N,1,,,,,1,25.24,71,0.5,0.5,15.61,5.76,,0.3,93.67,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000185,2,2017-12-15 00:10:06,2017-12-15 00:20:55,N,1,,,,,1,1.84,9.5,0.5,0.5,2.5,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000186,2,2017-12-15 00:31:27,2017-12-15 00:45:44,N,1,,,,,1,1.66,10.5,0.5,0.5,1.2,0,,0.3,13,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000187,2,2017-12-15 00:53:13,2017-12-15 01:13:27,N,1,,,,,1,2.40,14,0.5,0.5,0,0,,0.3,15.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000188,2,2017-12-15 00:03:43,2017-12-15 00:11:39,N,1,,,,,2,1.58,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000189,2,2017-12-15 00:18:02,2017-12-15 00:43:48,N,1,,,,,2,4.61,19,0.5,0.5,0,0,,0.3,20.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000190,2,2017-12-15 00:48:20,2017-12-15 01:09:19,N,1,,,,,2,2.67,15,0.5,0.5,1,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000191,1,2017-12-15 00:05:45,2017-12-15 00:26:31,N,1,,,,,2,3.50,17.5,0.5,0.5,2,0,,0.3,20.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000192,1,2017-12-15 00:35:30,2017-12-15 00:47:19,N,1,,,,,1,2.50,10.5,0.5,0.5,2,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000193,1,2017-12-15 00:55:01,2017-12-15 01:06:13,N,1,,,,,1,0.90,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000194,2,2017-12-15 00:23:31,2017-12-15 00:30:12,N,1,,,,,1,0.98,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000195,2,2017-12-15 00:33:17,2017-12-15 00:55:09,N,1,,,,,1,3.20,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000196,2,2017-12-15 00:56:39,2017-12-15 01:14:39,N,1,,,,,1,4.65,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000197,2,2017-12-15 00:12:57,2017-12-15 00:18:57,N,1,,,,,1,1.03,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000198,2,2017-12-15 00:20:41,2017-12-15 00:46:44,N,1,,,,,1,5.33,21,0.5,0.5,1,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000199,2,2017-12-14 23:56:15,2017-12-15 00:20:06,N,1,,,,,1,10.45,30.5,0.5,0.5,7.51,5.76,,0.3,45.07,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000200,2,2017-12-15 00:30:30,2017-12-15 01:22:30,N,1,,,,,1,13.51,48,0.5,0.5,0,0,,0.3,49.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000201,2,2017-12-15 00:03:15,2017-12-15 00:21:04,N,1,,,,,1,2.38,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000202,2,2017-12-15 00:23:15,2017-12-15 00:27:40,N,1,,,,,1,0.47,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000203,2,2017-12-15 00:31:32,2017-12-15 00:49:33,N,1,,,,,1,2.91,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000204,2,2017-12-15 00:08:42,2017-12-15 00:12:21,N,1,,,,,1,0.39,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000205,2,2017-12-15 00:13:31,2017-12-15 00:24:20,N,1,,,,,1,2.49,10.5,0.5,0.5,1.2,0,,0.3,13,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000206,2,2017-12-15 00:29:19,2017-12-15 00:58:28,N,1,,,,,1,3.72,20,0.5,0.5,4.26,0,,0.3,25.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000207,2,2017-12-15 00:28:14,2017-12-15 00:34:25,N,1,,,,,1,1.55,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000208,2,2017-12-15 00:36:22,2017-12-15 00:37:55,N,1,,,,,1,0.45,3.5,0.5,0.5,0.96,0,,0.3,5.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000209,2,2017-12-15 00:39:37,2017-12-15 01:35:03,N,1,,,,,1,13.96,47,0.5,0.5,5.08,5.76,,0.3,59.14,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000210,2,2017-12-15 00:25:04,2017-12-15 00:33:25,N,1,,,,,1,1.64,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000211,2,2017-12-15 00:34:48,2017-12-15 00:43:08,N,1,,,,,1,1.60,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000212,2,2017-12-15 01:01:52,2017-12-15 01:36:51,N,1,,,,,1,9.76,35,0.5,0.5,0,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000213,1,2017-12-15 00:46:44,2017-12-15 00:49:28,N,1,,,,,1,0.50,4,0.5,0.5,0,0,,0.3,5.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000214,2,2017-12-15 00:06:12,2017-12-15 00:31:47,N,1,,,,,1,4.02,18,0.5,0.5,1.5,0,,0.3,20.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000215,2,2017-12-15 00:44:58,2017-12-15 01:05:54,N,1,,,,,1,4.67,18,0.5,0.5,0,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000216,2,2017-12-15 00:17:30,2017-12-15 00:24:38,N,1,,,,,1,1.71,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000217,2,2017-12-15 00:34:51,2017-12-15 00:47:29,N,1,,,,,1,1.75,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000218,2,2017-12-15 00:50:18,2017-12-15 01:10:23,N,1,,,,,1,3.51,16,0.5,0.5,2.5,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000219,1,2017-12-15 00:21:43,2017-12-15 00:30:20,N,1,,,,,1,1.20,7.5,0.5,0.5,1.75,0,,0.3,10.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000220,1,2017-12-15 00:45:42,2017-12-15 01:20:41,N,1,,,,,1,6.70,27,0.5,0.5,6.8,5.76,,0.3,40.86,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000221,2,2017-12-15 00:16:52,2017-12-15 00:27:51,N,1,,,,,2,1.33,8.5,0.5,0.5,1,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000222,2,2017-12-15 00:31:34,2017-12-15 00:45:50,N,1,,,,,2,6.73,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000223,2,2017-12-15 00:02:41,2017-12-15 00:30:26,N,1,,,,,1,6.01,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000224,2,2017-12-15 00:47:01,2017-12-15 00:55:49,N,1,,,,,1,1.98,8.5,0.5,0.5,2,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000225,2,2017-12-15 00:58:15,2017-12-15 01:14:41,N,1,,,,,1,3.40,13.5,0.5,0.5,4.44,0,,0.3,19.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000226,2,2017-12-15 00:13:35,2017-12-15 00:27:29,N,1,,,,,1,2.59,11.5,0.5,0.5,3.84,0,,0.3,16.64,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000227,2,2017-12-15 00:38:30,2017-12-15 00:43:51,N,1,,,,,1,1.15,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000228,2,2017-12-15 00:45:24,2017-12-15 00:51:18,N,1,,,,,1,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000229,2,2017-12-15 00:55:04,2017-12-15 01:08:57,N,1,,,,,1,3.47,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000230,2,2017-12-15 00:04:17,2017-12-15 00:10:33,N,1,,,,,1,1.23,6.5,0.5,0.5,1.95,0,,0.3,9.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000231,2,2017-12-15 00:13:03,2017-12-15 00:26:55,N,1,,,,,1,2.78,12,0.5,0.5,3.32,0,,0.3,16.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000232,2,2017-12-15 00:30:01,2017-12-15 00:37:15,N,1,,,,,1,0.89,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000233,2,2017-12-15 00:38:43,2017-12-15 00:47:26,N,1,,,,,1,1.40,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000234,2,2017-12-15 00:49:45,2017-12-15 00:53:07,N,1,,,,,1,0.59,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000235,2,2017-12-15 00:56:01,2017-12-15 01:07:10,N,1,,,,,1,2.46,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000236,2,2017-12-15 00:33:40,2017-12-15 00:39:42,N,1,,,,,2,1.31,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000237,2,2017-12-15 00:55:08,2017-12-15 01:29:46,N,1,,,,,2,11.64,37,0.5,0.5,8.81,5.76,,0.3,52.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000238,2,2017-12-15 00:38:07,2017-12-15 00:42:29,N,1,,,,,6,0.95,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000239,1,2017-12-15 00:02:32,2017-12-15 00:14:54,N,1,,,,,2,2.40,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000240,1,2017-12-15 00:47:13,2017-12-15 01:23:55,N,1,,,,,2,9.40,32.5,0.5,0.5,0,0,,0.3,33.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000241,2,2017-12-15 00:07:00,2017-12-15 00:22:39,N,1,,,,,2,2.66,12,0.5,0.5,3.99,0,,0.3,17.29,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000242,2,2017-12-15 00:24:20,2017-12-15 00:39:27,N,1,,,,,2,3.08,12.5,0.5,0.5,1,0,,0.3,14.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000243,2,2017-12-15 00:42:39,2017-12-15 00:57:57,N,1,,,,,2,2.33,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000244,1,2017-12-15 00:00:39,2017-12-15 00:03:05,N,1,,,,,1,0.40,4,0.5,0.5,0,0,,0.3,5.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000245,2,2017-12-15 00:10:32,2017-12-15 00:43:16,N,1,,,,,5,8.04,28,0.5,0.5,3.7,0,,0.3,33,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000246,2,2017-12-15 00:35:42,2017-12-15 00:51:25,N,1,,,,,1,8.76,25,0.5,0.5,5,5.76,,0.3,37.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000247,2,2017-12-15 00:52:46,2017-12-15 01:04:05,N,1,,,,,3,1.78,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000248,2,2017-12-15 00:05:05,2017-12-15 00:11:34,N,1,,,,,1,1.08,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000249,2,2017-12-15 00:12:28,2017-12-15 00:37:12,N,1,,,,,1,3.21,16.5,0.5,0.5,3.56,0,,0.3,21.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000250,2,2017-12-15 00:44:06,2017-12-15 01:04:05,N,1,,,,,1,5.06,17,0.5,0.5,0,0,,0.3,18.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000251,1,2017-12-15 00:05:32,2017-12-15 00:09:10,N,1,,,,,1,0.60,4.5,0.5,0.5,1.15,0,,0.3,6.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000252,1,2017-12-15 00:27:29,2017-12-15 00:39:05,N,1,,,,,1,2.20,10,0.5,0.5,2.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000253,1,2017-12-15 00:07:54,2017-12-15 00:10:50,N,1,,,,,2,0.60,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000254,1,2017-12-15 00:12:02,2017-12-15 00:17:01,N,1,,,,,2,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000255,1,2017-12-15 00:24:58,2017-12-15 00:37:38,N,1,,,,,1,1.60,9.5,0.5,0.5,1.7,0,,0.3,12.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000256,1,2017-12-15 00:41:49,2017-12-15 00:53:48,N,1,,,,,1,3.90,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000257,2,2017-12-14 23:58:09,2017-12-15 00:02:07,N,1,,,,,1,0.49,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000258,2,2017-12-15 00:05:04,2017-12-15 00:18:50,N,1,,,,,1,2.02,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000259,2,2017-12-15 00:21:45,2017-12-15 00:55:32,N,1,,,,,1,5.18,23.5,0.5,0.5,4.96,0,,0.3,29.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000260,2,2017-12-15 00:04:25,2017-12-15 00:11:39,N,1,,,,,1,1.13,7,0.5,0.5,2.08,0,,0.3,10.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000261,2,2017-12-15 00:20:57,2017-12-15 00:31:42,N,1,,,,,1,1.08,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000262,2,2017-12-15 00:39:26,2017-12-15 00:50:31,N,1,,,,,1,2.09,9.5,0.5,0.5,0,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000263,2,2017-12-15 00:53:53,2017-12-15 01:04:58,N,1,,,,,1,1.76,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000264,2,2017-12-15 00:08:30,2017-12-15 00:20:52,N,1,,,,,1,1.75,9.5,0.5,0.5,0,0,,0.3,10.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000265,2,2017-12-15 00:23:40,2017-12-15 00:41:49,N,1,,,,,1,2.18,12.5,0.5,0.5,2.76,0,,0.3,16.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000266,2,2017-12-15 00:44:35,2017-12-15 00:49:32,N,1,,,,,1,0.79,5.5,0.5,0.5,2.04,0,,0.3,8.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000267,2,2017-12-15 00:52:36,2017-12-15 01:02:00,N,1,,,,,1,0.73,7.5,0.5,0.5,1.76,0,,0.3,10.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000268,2,2017-12-15 00:31:52,2017-12-15 00:47:40,N,1,,,,,1,1.80,11.5,0.5,0.5,1.08,0,,0.3,13.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000269,2,2017-12-15 00:53:43,2017-12-15 01:05:05,N,1,,,,,1,2.54,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000270,2,2017-12-15 00:06:34,2017-12-15 00:13:18,N,1,,,,,2,1.21,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000271,2,2017-12-15 00:18:09,2017-12-15 00:25:11,N,1,,,,,2,1.29,7,0.5,0.5,2.08,0,,0.3,10.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000272,2,2017-12-15 00:30:15,2017-12-15 00:38:42,N,1,,,,,2,1.38,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000273,2,2017-12-15 00:40:42,2017-12-15 00:44:57,N,1,,,,,2,1.32,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000274,1,2017-12-15 00:47:31,2017-12-15 00:49:29,N,1,,,,,3,0.20,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000275,1,2017-12-15 00:52:16,2017-12-15 01:28:36,N,1,,,,,1,7.30,29.5,0.5,0.5,0,0,,0.3,30.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000276,1,2017-12-15 00:01:57,2017-12-15 00:18:51,N,1,,,,,1,11.40,31.5,0.5,0.5,8.2,0,,0.3,41,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000277,1,2017-12-15 00:39:16,2017-12-15 01:04:21,N,1,,,,,1,10.10,30.5,0.5,0.5,7.5,5.76,,0.3,45.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000278,1,2017-12-15 00:18:06,2017-12-15 00:26:33,N,1,,,,,1,1.70,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000279,1,2017-12-15 00:28:42,2017-12-15 01:10:48,N,1,,,,,1,8.70,34.5,0.5,0.5,7.15,0,,0.3,42.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000280,2,2017-12-15 00:20:44,2017-12-15 00:47:16,N,1,,,,,1,3.10,17.5,0.5,0.5,3.76,0,,0.3,22.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000281,2,2017-12-15 00:48:38,2017-12-15 00:58:46,N,1,,,,,1,1.87,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000282,2,2017-12-15 00:04:31,2017-12-15 00:25:09,N,1,,,,,1,2.28,14.5,0.5,0.5,3.16,0,,0.3,18.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000283,2,2017-12-15 00:26:46,2017-12-15 00:33:08,N,1,,,,,1,0.56,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000284,2,2017-12-15 00:37:12,2017-12-15 01:04:43,N,1,,,,,1,6.54,24,0.5,0.5,0,0,,0.3,25.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000285,2,2017-12-15 00:32:16,2017-12-15 01:27:04,N,1,,,,,1,14.64,50,0.5,0.5,10.26,0,,0.3,61.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000286,2,2017-12-15 00:59:27,2017-12-15 01:20:51,N,1,,,,,1,5.95,20,0.5,0.5,0,0,,0.3,21.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000287,2,2017-12-15 00:48:18,2017-12-15 01:14:34,N,1,,,,,1,10.73,32.5,0.5,0.5,0,5.76,,0.3,39.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000288,1,2017-12-15 00:07:31,2017-12-15 00:19:09,N,1,,,,,2,2.20,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000289,1,2017-12-15 00:24:03,2017-12-15 00:42:25,N,1,,,,,1,2.30,13,0.5,0.5,3.55,0,,0.3,17.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000290,1,2017-12-15 00:44:12,2017-12-15 00:55:18,N,1,,,,,1,1.80,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000291,1,2017-12-15 00:56:48,2017-12-15 01:13:09,N,1,,,,,2,2.60,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000292,1,2017-12-15 00:19:32,2017-12-15 00:25:08,N,1,,,,,1,1.20,6,0.5,0.5,1,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000293,1,2017-12-15 00:27:00,2017-12-15 00:51:04,N,1,,,,,1,4.60,19,0.5,0.5,4.05,0,,0.3,24.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000294,1,2017-12-15 00:20:38,2017-12-15 00:25:15,N,1,,,,,1,1.20,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000295,1,2017-12-15 00:41:06,2017-12-15 00:52:18,N,1,,,,,1,1.80,9.5,0.5,0.5,2.7,0,,0.3,13.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000296,1,2017-12-15 00:55:55,2017-12-15 01:12:31,N,1,,,,,1,3.30,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000297,2,2017-12-15 00:05:55,2017-12-15 00:14:30,N,1,,,,,1,1.63,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000298,2,2017-12-15 00:27:26,2017-12-15 01:03:16,N,1,,,,,1,6.38,26,0.5,0.5,5.46,0,,0.3,32.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000299,1,2017-12-15 00:08:21,2017-12-15 00:28:24,N,1,,,,,1,3.50,15,0.5,0.5,0,0,,0.3,16.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000300,1,2017-12-15 00:32:08,2017-12-15 00:43:23,N,1,,,,,2,2.80,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000301,1,2017-12-15 00:49:47,2017-12-15 00:53:20,N,1,,,,,2,0.60,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000302,1,2017-12-15 00:30:10,2017-12-15 01:07:08,N,1,,,,,1,9.40,33.5,0.5,0.5,5,5.76,,0.3,45.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000303,1,2017-12-15 00:04:37,2017-12-15 00:11:07,N,1,,,,,1,1.00,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000304,1,2017-12-15 00:11:47,2017-12-15 00:44:18,N,1,,,,,1,5.70,24.5,0.5,0.5,0,0,,0.3,25.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000305,1,2017-12-15 00:58:49,2017-12-15 01:17:57,N,1,,,,,1,5.20,18.5,0.5,0.5,3.95,0,,0.3,23.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000306,1,2017-12-15 00:02:04,2017-12-15 00:06:33,N,1,,,,,1,0.80,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000307,1,2017-12-15 00:19:59,2017-12-15 00:38:04,Y,1,,,,,1,2.80,13,0.5,0.5,3,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000308,2,2017-12-15 00:04:13,2017-12-15 00:44:18,N,1,,,,,6,7.07,30,0.5,0.5,6.26,0,,0.3,37.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000309,2,2017-12-15 00:54:44,2017-12-15 01:03:50,N,1,,,,,5,1.55,8,0.5,0.5,2.32,0,,0.3,11.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000310,1,2017-12-15 00:43:05,2017-12-15 00:52:01,N,1,,,,,1,2.10,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000311,1,2017-12-15 00:05:03,2017-12-15 00:18:37,N,1,,,,,1,2.60,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000312,1,2017-12-15 00:20:35,2017-12-15 00:38:49,N,1,,,,,1,2.50,13.5,0.5,0.5,4.4,0,,0.3,19.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000313,1,2017-12-15 00:39:52,2017-12-15 00:44:38,N,1,,,,,1,1.10,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000314,1,2017-12-15 00:49:40,2017-12-15 01:00:41,N,1,,,,,1,1.70,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000315,1,2017-12-15 00:47:28,2017-12-15 00:52:07,N,1,,,,,1,1.20,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000316,2,2017-12-15 00:06:56,2017-12-15 00:44:48,N,1,,,,,5,3.86,23.5,0.5,0.5,4.96,0,,0.3,29.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000317,2,2017-12-15 00:55:16,2017-12-15 01:06:36,N,1,,,,,5,1.94,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000318,1,2017-12-15 00:30:37,2017-12-15 00:30:47,N,2,,,,,1,0.00,52,0,0.5,8,5.76,,0.3,66.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000319,1,2017-12-15 00:34:47,2017-12-15 01:55:20,N,1,,,,,1,20.40,75.5,0.5,0.5,5,5.76,,0.3,87.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000320,2,2017-12-14 23:51:49,2017-12-15 00:09:43,N,1,,,,,1,1.19,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000321,2,2017-12-15 00:19:42,2017-12-15 00:27:12,N,1,,,,,1,0.78,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000322,2,2017-12-15 00:28:34,2017-12-15 00:40:23,N,1,,,,,1,2.75,11.5,0.5,0.5,1.5,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000323,2,2017-12-15 00:35:15,2017-12-15 00:55:59,N,1,,,,,1,7.24,23.5,0.5,0.5,0,5.76,,0.3,30.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000324,2,2017-12-15 00:26:02,2017-12-15 00:29:23,N,1,,,,,1,0.35,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000325,2,2017-12-15 00:31:42,2017-12-15 00:36:19,N,1,,,,,1,1.07,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000326,2,2017-12-15 00:36:55,2017-12-15 00:43:24,N,1,,,,,1,1.38,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000327,2,2017-12-15 00:45:40,2017-12-15 01:24:42,N,1,,,,,1,6.01,27,0.5,0.5,4.24,0,,0.3,32.54,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000328,2,2017-12-15 00:11:05,2017-12-15 00:19:49,N,1,,,,,2,1.16,7.5,0.5,0.5,1,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000329,2,2017-12-15 00:20:45,2017-12-15 00:54:08,N,1,,,,,2,5.22,23.5,0.5,0.5,2.4,0,,0.3,27.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000330,2,2017-12-14 23:59:30,2017-12-15 00:27:27,N,1,,,,,6,4.86,21,0.5,0.5,5.58,0,,0.3,27.88,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000331,2,2017-12-15 00:32:34,2017-12-15 00:43:36,N,1,,,,,6,2.73,10.5,0.5,0.5,0.59,0,,0.3,12.39,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000332,2,2017-12-15 00:55:30,2017-12-15 01:14:02,N,1,,,,,6,3.22,14.5,0.5,0.5,1.5,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000333,2,2017-12-15 00:48:56,2017-12-15 00:57:15,N,1,,,,,6,1.23,7.5,0.5,0.5,1.76,0,,0.3,10.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000334,1,2017-12-15 00:17:24,2017-12-15 00:28:44,N,1,,,,,1,2.10,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000335,1,2017-12-15 00:29:20,2017-12-15 00:40:38,N,1,,,,,1,1.70,9,0.5,0.5,2.5,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000336,1,2017-12-15 00:43:13,2017-12-15 01:04:32,N,1,,,,,1,1.90,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000337,2,2017-12-15 00:18:55,2017-12-15 00:35:52,N,1,,,,,5,2.25,12.5,0.5,0.5,2,0,,0.3,15.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000338,2,2017-12-15 00:37:39,2017-12-15 00:52:21,N,1,,,,,5,1.76,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000339,2,2017-12-15 00:02:22,2017-12-15 00:18:02,N,1,,,,,2,1.79,11.5,0.5,0.5,1.5,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000340,2,2017-12-15 00:19:03,2017-12-15 00:55:49,N,1,,,,,2,8.96,33.5,0.5,0.5,6.96,0,,0.3,41.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000341,2,2017-12-15 00:38:47,2017-12-15 01:02:25,N,1,,,,,1,5.67,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000342,1,2017-12-15 00:54:07,2017-12-15 00:56:25,N,1,,,,,1,0.30,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000343,1,2017-12-15 00:59:33,2017-12-15 01:15:36,N,1,,,,,1,4.10,16,0.5,0.5,3.45,0,,0.3,20.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000344,1,2017-12-15 00:16:35,2017-12-15 00:38:22,N,1,,,,,1,3.70,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000345,1,2017-12-15 00:41:35,2017-12-15 00:46:07,N,1,,,,,1,1.10,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000346,1,2017-12-15 00:28:16,2017-12-15 00:49:43,N,1,,,,,1,2.10,14.5,0.5,0.5,1,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000347,1,2017-12-15 00:50:32,2017-12-15 01:02:25,N,1,,,,,1,2.00,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000348,2,2017-12-15 00:06:30,2017-12-15 00:32:46,N,2,,,,,2,18.00,52,0,0.5,8,0,,0.3,60.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000349,2,2017-12-15 00:35:57,2017-12-15 00:49:57,N,1,,,,,2,3.00,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000350,2,2017-12-15 00:51:07,2017-12-15 01:03:37,N,1,,,,,2,1.78,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000351,2,2017-12-14 23:58:18,2017-12-15 00:17:19,N,1,,,,,1,2.89,13.5,0.5,0.5,0,0,,0.3,14.8,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000352,2,2017-12-15 00:25:33,2017-12-15 00:37:17,N,1,,,,,1,1.84,9.5,0.5,0.5,2.7,0,,0.3,13.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000353,2,2017-12-15 00:38:50,2017-12-15 01:12:55,N,5,,,,,1,20.52,95,0,0.5,21.26,10.5,,0.3,127.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000354,1,2017-12-15 00:10:39,2017-12-15 00:23:43,N,1,,,,,0,1.80,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000355,1,2017-12-15 00:26:21,2017-12-15 00:46:41,N,1,,,,,0,3.90,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000356,1,2017-12-15 00:51:23,2017-12-15 01:23:51,N,1,,,,,0,4.60,21,0.5,0.5,3.2,0,,0.3,25.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000357,1,2017-12-15 00:01:01,2017-12-15 00:09:09,N,1,,,,,1,1.50,7.5,0.5,0.5,2.2,0,,0.3,11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000358,1,2017-12-15 00:11:16,2017-12-15 00:28:12,N,1,,,,,1,4.20,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000359,1,2017-12-15 00:41:38,2017-12-15 00:49:41,N,1,,,,,1,1.00,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000360,1,2017-12-15 00:53:12,2017-12-15 01:03:55,N,1,,,,,1,2.30,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000361,2,2017-12-15 00:17:03,2017-12-15 00:23:50,N,1,,,,,6,0.78,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000362,2,2017-12-15 00:26:35,2017-12-15 00:30:26,N,1,,,,,6,0.70,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000363,2,2017-12-15 00:33:08,2017-12-15 00:49:10,N,1,,,,,6,3.51,14.5,0.5,0.5,0,0,,0.3,15.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000364,2,2017-12-15 00:54:31,2017-12-15 01:18:59,N,1,,,,,6,6.01,21.5,0.5,0.5,0,0,,0.3,22.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000365,1,2017-12-15 00:06:33,2017-12-15 00:44:01,N,1,,,,,1,9.40,34,0.5,0.5,7.05,0,,0.3,42.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000366,1,2017-12-15 00:24:39,2017-12-15 00:49:07,N,1,,,,,1,2.60,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000367,1,2017-12-15 00:53:08,2017-12-15 01:07:56,N,1,,,,,1,3.30,13,0.5,0.5,3.55,0,,0.3,17.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000368,1,2017-12-15 00:02:28,2017-12-15 00:13:55,N,1,,,,,1,1.80,9.5,0.5,0.5,2,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000369,1,2017-12-15 00:33:04,2017-12-15 00:40:02,N,1,,,,,1,2.10,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000370,1,2017-12-15 00:03:29,2017-12-15 00:08:56,N,1,,,,,1,1.00,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000371,1,2017-12-15 00:10:46,2017-12-15 00:20:15,N,1,,,,,1,1.20,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000372,1,2017-12-15 00:22:56,2017-12-15 00:37:19,N,1,,,,,1,4.00,15,0.5,0.5,0,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000373,1,2017-12-15 00:42:52,2017-12-15 00:51:19,N,1,,,,,1,5.00,15.5,0.5,0.5,3.35,0,,0.3,20.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000374,1,2017-12-15 00:53:47,2017-12-15 01:23:10,N,1,,,,,1,4.70,21,0.5,0.5,0,0,,0.3,22.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000375,2,2017-12-15 00:36:27,2017-12-15 01:12:34,N,1,,,,,1,10.62,35.5,0.5,0.5,4.2,0,,0.3,41,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000376,1,2017-12-15 00:28:33,2017-12-15 00:45:42,N,1,,,,,1,3.10,14,0.5,0.5,3.8,0,,0.3,19.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000377,2,2017-12-15 00:15:28,2017-12-15 00:50:41,N,1,,,,,2,6.56,28,0.5,0.5,0,0,,0.3,29.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000378,2,2017-12-15 00:56:38,2017-12-15 01:09:17,N,1,,,,,2,6.86,20.5,0.5,0.5,1.1,0,,0.3,22.9,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000379,1,2017-12-15 00:13:36,2017-12-15 01:14:17,N,1,,,,,1,13.00,47.5,0.5,0.5,9.75,0,,0.3,58.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000380,1,2017-12-15 00:03:32,2017-12-15 00:28:38,N,1,,,,,1,4.60,19.5,0.5,0.5,1,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000381,1,2017-12-15 00:46:55,2017-12-15 01:10:49,N,1,,,,,1,5.00,20.5,0.5,0.5,0,5.76,,0.3,27.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000382,1,2017-12-15 00:09:42,2017-12-15 00:20:45,N,1,,,,,1,2.50,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000383,1,2017-12-15 00:23:14,2017-12-15 00:27:02,N,1,,,,,1,0.60,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000384,1,2017-12-15 00:31:40,2017-12-15 00:36:49,N,1,,,,,1,1.00,6,0.5,0.5,2,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000385,1,2017-12-15 00:52:15,2017-12-15 01:13:40,N,1,,,,,1,4.60,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000386,2,2017-12-15 00:13:40,2017-12-15 00:33:20,N,1,,,,,1,1.93,13.5,0.5,0.5,1.48,0,,0.3,16.28,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000387,2,2017-12-15 00:52:47,2017-12-15 01:19:23,N,1,,,,,1,5.91,23.5,0.5,0.5,7.44,0,,0.3,32.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000388,1,2017-12-15 00:38:36,2017-12-15 00:56:37,N,1,,,,,1,5.90,20,0.5,0.5,4.25,0,,0.3,25.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000389,2,2017-12-15 00:15:53,2017-12-15 00:48:54,N,1,,,,,1,3.61,22,0.5,0.5,1,0,,0.3,24.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000390,2,2017-12-15 00:20:20,2017-12-15 00:40:11,N,1,,,,,1,2.02,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000391,2,2017-12-15 00:41:22,2017-12-15 00:57:21,N,1,,,,,1,3.29,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000392,2,2017-12-15 00:59:16,2017-12-15 01:09:53,N,1,,,,,1,1.08,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000393,1,2017-12-15 00:44:17,2017-12-15 01:13:28,N,2,,,,,1,17.20,52,0,0.5,10.56,0,,0.3,63.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000394,2,2017-12-15 00:06:55,2017-12-15 00:17:28,N,1,,,,,2,1.58,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000395,2,2017-12-15 00:22:35,2017-12-15 00:34:52,N,1,,,,,2,1.84,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000396,2,2017-12-15 00:44:25,2017-12-15 00:54:41,N,1,,,,,2,0.92,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000397,2,2017-12-15 00:10:17,2017-12-15 00:44:44,N,1,,,,,2,4.78,23,0.5,0.5,4.86,0,,0.3,29.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000398,1,2017-12-15 00:07:26,2017-12-15 00:11:49,N,1,,,,,1,0.80,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000399,1,2017-12-15 00:16:45,2017-12-15 00:46:00,N,1,,,,,1,5.10,21.5,0.5,0.5,4.55,0,,0.3,27.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000400,1,2017-12-15 00:56:23,2017-12-15 00:59:44,N,1,,,,,1,0.80,4.5,0.5,0.5,1.45,0,,0.3,7.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000401,1,2017-12-15 00:02:45,2017-12-15 00:17:46,N,1,,,,,1,2.60,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000402,1,2017-12-15 00:33:58,2017-12-15 01:10:08,N,1,,,,,1,13.80,42.5,0.5,0.5,12,0,,0.3,55.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000403,1,2017-12-15 00:08:20,2017-12-15 00:19:54,N,1,,,,,1,2.20,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000404,1,2017-12-15 00:22:02,2017-12-15 00:24:32,N,1,,,,,1,0.40,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000405,1,2017-12-15 00:49:45,2017-12-15 01:07:55,N,1,,,,,1,4.30,16.5,0.5,0.5,3.55,0,,0.3,21.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000406,1,2017-12-15 00:19:42,2017-12-15 00:35:10,N,1,,,,,1,7.20,21.5,0.5,0.5,4,0,,0.3,26.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000407,1,2017-12-15 00:36:17,2017-12-15 00:39:28,N,1,,,,,1,0.60,4.5,0.5,0.5,1.2,0,,0.3,7,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000408,1,2017-12-15 00:44:47,2017-12-15 00:47:53,N,1,,,,,1,0.50,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000409,1,2017-12-15 00:06:26,2017-12-15 01:03:38,N,1,,,,,1,12.30,48,0.5,0.5,13.75,5.76,,0.3,68.81,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000410,1,2017-12-15 00:03:14,2017-12-15 00:10:39,N,1,,,,,1,1.30,7,0.5,0.5,1.5,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000411,1,2017-12-15 00:12:39,2017-12-15 00:37:19,N,1,,,,,1,4.20,18,0.5,0.5,2.5,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000412,1,2017-12-15 00:42:49,2017-12-15 00:46:16,N,1,,,,,1,0.80,5,0.5,0.5,1.55,0,,0.3,7.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000413,2,2017-12-14 19:10:23,2017-12-14 19:18:28,N,1,,,,,1,1.02,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000414,2,2017-12-14 19:23:38,2017-12-15 19:06:32,N,5,,,,,3,4.24,60,0,0.5,0,10.5,,0.3,71.3,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000415,2,2017-12-15 00:55:49,2017-12-15 01:11:02,N,1,,,,,2,7.83,23.5,0.5,0.5,0,0,,0.3,24.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000416,2,2017-12-15 00:05:09,2017-12-15 00:20:48,N,1,,,,,2,1.36,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000417,2,2017-12-15 00:29:27,2017-12-15 00:52:13,N,1,,,,,2,3.98,17.5,0.5,0.5,3.76,0,,0.3,22.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000418,2,2017-12-15 00:02:35,2017-12-15 00:21:33,N,1,,,,,1,2.34,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000419,2,2017-12-15 00:26:40,2017-12-15 01:10:02,N,1,,,,,1,9.00,34.5,0.5,0.5,2.2,0,,0.3,38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000420,2,2017-12-15 00:41:13,2017-12-15 00:47:16,N,1,,,,,5,0.95,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000421,2,2017-12-15 00:48:07,2017-12-15 00:51:55,N,1,,,,,5,0.82,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000422,2,2017-12-15 00:54:41,2017-12-15 01:36:33,N,1,,,,,5,8.13,31.5,0.5,0.5,6.56,0,,0.3,39.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000423,1,2017-12-15 00:10:16,2017-12-15 00:30:12,N,1,,,,,3,4.20,17,0.5,0.5,0,0,,0.3,18.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000424,1,2017-12-15 00:09:09,2017-12-15 00:36:56,N,1,,,,,1,4.20,20,0.5,0.5,0.01,0,,0.3,21.31,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000425,1,2017-12-15 00:54:57,2017-12-15 01:17:59,N,1,,,,,1,5.40,19.5,0.5,0.5,4.15,0,,0.3,24.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000426,2,2017-12-15 00:13:43,2017-12-15 00:42:45,N,1,,,,,1,6.63,25.5,0.5,0.5,9.77,5.76,,0.3,42.33,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000427,2,2017-12-15 00:09:34,2017-12-15 00:21:47,N,1,,,,,1,1.91,10,0.5,0.5,0,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000428,2,2017-12-15 00:26:37,2017-12-15 00:54:33,N,1,,,,,1,6.54,25,0.5,0.5,0,5.76,,0.3,32.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000429,1,2017-12-15 00:09:48,2017-12-15 00:19:19,N,1,,,,,1,2.60,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000430,1,2017-12-15 00:32:26,2017-12-15 00:36:48,N,1,,,,,1,0.50,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000431,1,2017-12-15 00:42:32,2017-12-15 01:03:48,N,1,,,,,1,3.90,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000432,2,2017-12-14 23:59:37,2017-12-15 00:04:06,N,1,,,,,1,0.61,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000433,2,2017-12-15 00:06:30,2017-12-15 00:32:11,N,1,,,,,1,4.48,18.5,0.5,0.5,2,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000434,2,2017-12-15 00:40:44,2017-12-15 01:00:28,N,1,,,,,1,4.72,16.5,0.5,0.5,4.71,5.76,,0.3,28.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000435,2,2017-12-15 00:10:53,2017-12-15 00:17:52,N,1,,,,,1,1.12,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000436,2,2017-12-15 00:35:36,2017-12-15 00:59:12,N,1,,,,,1,10.99,32.5,0.5,0.5,6.76,0,,0.3,40.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000437,2,2017-12-15 00:43:04,2017-12-15 01:16:19,N,2,,,,,1,17.90,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000438,1,2017-12-15 00:02:42,2017-12-15 00:10:12,N,1,,,,,1,1.30,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000439,1,2017-12-15 00:11:22,2017-12-15 00:40:14,N,1,,,,,1,7.30,25.5,0.5,0.5,6.7,0,,0.3,33.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000440,1,2017-12-15 00:20:08,2017-12-15 00:40:28,N,1,,,,,1,2.40,14,0.5,0.5,3.05,0,,0.3,18.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000441,1,2017-12-15 00:43:20,2017-12-15 01:08:32,N,1,,,,,2,1.30,15.5,0.5,0.5,5,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000442,1,2017-12-15 00:11:32,2017-12-15 00:47:05,N,1,,,,,1,5.10,26.5,0.5,0.5,8.3,0,,0.3,36.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000443,2,2017-12-15 00:09:29,2017-12-15 00:29:03,N,1,,,,,1,5.70,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000444,2,2017-12-15 00:30:13,2017-12-15 00:37:34,N,1,,,,,1,1.67,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000445,2,2017-12-14 23:58:56,2017-12-15 00:30:50,N,2,,,,,1,20.04,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000446,2,2017-12-15 00:36:57,2017-12-15 00:45:41,N,1,,,,,1,1.46,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000447,1,2017-12-15 00:04:54,2017-12-15 00:37:27,N,1,,,,,1,4.60,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000448,2,2017-12-15 00:48:43,2017-12-15 00:53:39,N,1,,,,,1,1.30,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000449,2,2017-12-15 00:57:01,2017-12-15 01:01:12,N,1,,,,,1,0.68,5,0.5,0.5,0.63,0,,0.3,6.93,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000450,1,2017-12-15 00:05:18,2017-12-15 00:49:34,N,1,,,,,1,15.40,51,0.5,0.5,5,0,,0.3,57.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000451,2,2017-12-15 00:45:45,2017-12-15 01:07:45,N,1,,,,,1,9.52,28,0.5,0.5,0,5.76,,0.3,35.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000452,2,2017-12-15 00:08:35,2017-12-15 00:56:01,N,1,,,,,5,7.01,31,0.5,0.5,9.69,0,,0.3,41.99,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000453,1,2017-12-15 00:10:24,2017-12-15 00:31:26,N,1,,,,,1,3.20,16,0.5,0.5,0,0,,0.3,17.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000454,2,2017-12-15 00:04:37,2017-12-15 00:53:48,N,1,,,,,1,11.07,40.5,0.5,0.5,5,5.76,,0.3,52.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000455,1,2017-12-15 00:05:10,2017-12-15 00:20:23,N,1,,,,,1,3.10,13,0.5,0.5,1.43,0,,0.3,15.73,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000456,1,2017-12-15 00:21:02,2017-12-15 00:36:30,N,1,,,,,2,2.30,11.5,0.5,0.5,1.92,0,,0.3,14.72,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000457,1,2017-12-15 00:38:38,2017-12-15 01:10:20,N,1,,,,,1,4.90,22.5,0.5,0.5,5.95,0,,0.3,29.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000458,1,2017-12-15 00:03:57,2017-12-15 00:07:19,N,1,,,,,2,1.10,5.5,0.5,0.5,1,0,,0.3,7.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000459,1,2017-12-15 00:26:00,2017-12-15 00:34:10,N,1,,,,,1,1.30,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000460,1,2017-12-15 00:36:16,2017-12-15 00:49:08,N,1,,,,,1,2.60,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000461,1,2017-12-15 00:53:25,2017-12-15 01:00:12,N,1,,,,,1,1.90,8,0.5,0.5,2.3,0,,0.3,11.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000462,2,2017-12-15 00:15:12,2017-12-15 00:26:54,N,1,,,,,1,1.17,8.5,0.5,0.5,2.94,0,,0.3,12.74,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000463,2,2017-12-15 00:47:47,2017-12-15 00:59:33,N,1,,,,,1,1.40,9,0.5,0.5,2.06,0,,0.3,12.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000464,2,2017-12-15 00:08:58,2017-12-15 00:43:22,N,1,,,,,5,6.10,25.5,0.5,0.5,8.04,0,,0.3,34.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000465,2,2017-12-15 00:48:26,2017-12-15 01:01:21,N,1,,,,,5,2.41,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000466,1,2017-12-15 00:42:54,2017-12-15 00:46:52,N,1,,,,,1,0.70,4.5,0.5,0.5,1.15,0,,0.3,6.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000467,1,2017-12-15 00:01:07,2017-12-15 00:05:50,N,1,,,,,1,0.90,5,0.5,0.5,0,0,,0.3,6.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000468,1,2017-12-15 00:09:05,2017-12-15 00:26:40,N,1,,,,,1,3.60,16,0.5,0.5,1,0,,0.3,18.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000469,1,2017-12-15 00:59:32,2017-12-15 01:16:09,N,1,,,,,1,7.70,23,0.5,0.5,7.5,5.76,,0.3,37.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000470,1,2017-12-15 00:15:12,2017-12-15 00:36:25,N,1,,,,,1,1.40,13.5,0.5,0.5,2.95,0,,0.3,17.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000471,1,2017-12-15 00:39:37,2017-12-15 01:19:53,N,1,,,,,1,8.00,31,0.5,0.5,8.05,0,,0.3,40.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000472,1,2017-12-15 00:04:23,2017-12-15 00:37:18,N,1,,,,,1,6.20,25,0.5,0.5,5.25,0,,0.3,31.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000473,1,2017-12-15 00:53:13,2017-12-15 00:59:08,N,1,,,,,1,1.20,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000474,1,2017-12-15 00:14:58,2017-12-15 00:43:09,N,1,,,,,1,4.20,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000475,1,2017-12-15 00:54:26,2017-12-15 01:01:22,N,1,,,,,1,1.10,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000476,2,2017-12-15 00:03:41,2017-12-15 00:33:29,N,1,,,,,1,6.35,25,0.5,0.5,5.26,0,,0.3,31.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000477,2,2017-12-15 00:55:24,2017-12-15 01:29:13,N,1,,,,,1,8.48,30,0.5,0.5,5,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000478,2,2017-12-15 00:13:25,2017-12-15 00:43:14,N,2,,,,,2,17.12,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000479,1,2017-12-15 00:10:35,2017-12-15 00:21:11,N,1,,,,,2,1.10,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000480,1,2017-12-15 00:22:05,2017-12-15 00:25:54,N,1,,,,,1,0.60,4.5,0.5,0.5,1.15,0,,0.3,6.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000481,1,2017-12-15 00:27:36,2017-12-15 00:40:36,N,1,,,,,1,1.00,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000482,1,2017-12-15 00:41:44,2017-12-15 00:47:23,N,1,,,,,1,1.20,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000483,1,2017-12-15 00:49:04,2017-12-15 01:03:47,N,1,,,,,1,2.10,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000484,1,2017-12-15 00:05:27,2017-12-15 00:29:12,N,1,,,,,0,10.50,31.5,0.5,0.5,9,5.76,,0.3,47.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000485,1,2017-12-15 00:35:54,2017-12-15 00:40:31,N,1,,,,,2,0.70,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000486,1,2017-12-15 00:43:11,2017-12-15 00:53:22,N,1,,,,,2,1.00,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000487,2,2017-12-15 00:31:19,2017-12-15 00:49:31,N,1,,,,,1,6.31,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000488,2,2017-12-15 00:19:14,2017-12-15 00:29:43,N,1,,,,,1,1.05,8,0.5,0.5,1.5,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000489,2,2017-12-15 00:32:35,2017-12-15 00:44:57,N,1,,,,,1,2.42,10.5,0.5,0.5,1,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000490,2,2017-12-15 00:48:40,2017-12-15 00:57:57,N,1,,,,,1,1.22,8,0.5,0.5,2.32,0,,0.3,11.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000491,2,2017-12-15 00:38:41,2017-12-15 01:21:39,N,1,,,,,2,17.74,52,0.5,0.5,11.81,5.76,,0.3,70.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000492,2,2017-12-15 00:08:26,2017-12-15 00:19:44,N,1,,,,,1,1.66,8.5,0.5,0.5,2.5,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000493,2,2017-12-15 00:22:53,2017-12-15 00:36:57,N,1,,,,,1,1.74,10,0.5,0.5,1,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000494,2,2017-12-15 01:00:01,2017-12-15 01:05:22,N,1,,,,,1,0.96,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000495,2,2017-12-15 00:02:53,2017-12-15 00:09:38,N,1,,,,,1,0.65,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000496,2,2017-12-15 00:11:31,2017-12-15 00:12:25,N,5,,,,,1,0.00,15,0,0.5,40,0,,0.3,55.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000497,2,2017-12-15 00:43:55,2017-12-15 00:56:49,N,1,,,,,1,1.94,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000498,1,2017-12-15 00:25:49,2017-12-15 00:35:05,N,1,,,,,1,1.10,7.5,0.5,0.5,1.75,0,,0.3,10.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000499,1,2017-12-15 00:52:06,2017-12-15 00:57:28,N,1,,,,,2,0.60,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000500,1,2017-12-15 00:34:18,2017-12-15 01:21:09,N,1,,,,,1,6.60,30,0.5,0.5,9.35,0,,0.3,40.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000501,2,2017-12-15 00:07:37,2017-12-15 00:59:00,N,1,,,,,1,10.17,42,0.5,0.5,0,0,,0.3,43.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000502,1,2017-12-15 00:07:42,2017-12-15 00:30:07,N,1,,,,,2,2.90,16,0.5,0.5,0,0,,0.3,17.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000503,1,2017-12-15 00:30:45,2017-12-15 00:38:40,N,1,,,,,1,1.30,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000504,1,2017-12-15 00:42:23,2017-12-15 00:55:29,N,1,,,,,2,1.80,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000505,1,2017-12-15 00:57:04,2017-12-15 00:57:04,N,1,,,,,1,0.00,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000506,1,2017-12-15 00:59:48,2017-12-15 01:21:26,N,1,,,,,1,4.50,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000507,2,2017-12-15 00:20:44,2017-12-15 00:30:11,N,1,,,,,2,0.78,7.5,0.5,0.5,0,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000508,2,2017-12-15 00:37:52,2017-12-15 00:52:16,N,1,,,,,2,1.16,10.5,0.5,0.5,1.5,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000509,2,2017-12-15 00:58:26,2017-12-15 01:04:02,N,1,,,,,2,1.00,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000510,1,2017-12-15 00:05:09,2017-12-15 00:25:20,N,1,,,,,1,2.60,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000511,1,2017-12-15 00:29:09,2017-12-15 00:36:48,N,1,,,,,1,1.40,7.5,0.5,0.5,2.2,0,,0.3,11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000512,1,2017-12-15 00:38:25,2017-12-15 00:51:42,N,1,,,,,1,1.50,10,0.5,0.5,3.35,0,,0.3,14.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000513,1,2017-12-15 00:23:44,2017-12-15 00:49:22,N,1,,,,,1,4.40,20.5,0.5,0.5,4.35,0,,0.3,26.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000514,1,2017-12-15 00:09:29,2017-12-15 00:14:30,N,1,,,,,1,0.80,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000515,1,2017-12-15 00:16:07,2017-12-15 00:35:34,N,1,,,,,1,4.60,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000516,1,2017-12-15 00:58:07,2017-12-15 01:05:15,N,1,,,,,2,1.40,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000517,2,2017-12-15 00:25:15,2017-12-15 00:34:38,N,1,,,,,1,1.21,8,0.5,0.5,2.79,0,,0.3,12.09,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000518,2,2017-12-15 00:40:54,2017-12-15 00:52:56,N,1,,,,,1,0.86,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000519,2,2017-12-15 00:54:22,2017-12-15 01:00:26,N,1,,,,,1,0.89,5.5,0.5,0.5,1,0,,0.3,7.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000520,1,2017-12-15 00:07:03,2017-12-15 00:17:51,N,1,,,,,1,1.00,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000521,1,2017-12-15 00:27:43,2017-12-15 01:02:24,N,1,,,,,1,6.80,27,0.5,0.5,0,0,,0.3,28.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000522,2,2017-12-15 00:14:12,2017-12-15 00:57:30,N,4,,,,,1,33.89,158,0.5,0.5,10,0,,0.3,169.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000523,2,2017-12-15 00:01:06,2017-12-15 00:16:46,N,1,,,,,1,3.00,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000524,2,2017-12-15 00:30:25,2017-12-15 00:41:04,N,1,,,,,1,3.33,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000525,2,2017-12-15 00:42:14,2017-12-15 00:52:16,N,1,,,,,1,4.32,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000526,1,2017-12-15 00:44:17,2017-12-15 00:53:15,N,1,,,,,1,2.10,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000527,1,2017-12-15 00:58:36,2017-12-15 01:17:56,N,1,,,,,2,5.00,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000528,2,2017-12-15 00:14:35,2017-12-15 00:18:59,N,1,,,,,1,0.79,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000529,2,2017-12-15 00:26:09,2017-12-15 01:04:29,N,1,,,,,1,11.54,37.5,0.5,0.5,7.76,0,,0.3,48.51,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000530,2,2017-12-15 00:14:54,2017-12-15 00:31:45,N,1,,,,,1,2.37,12.5,0.5,0.5,2.07,0,,0.3,15.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000531,2,2017-12-15 00:33:52,2017-12-15 00:40:05,N,1,,,,,2,1.43,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000532,2,2017-12-15 00:49:02,2017-12-15 00:52:27,N,1,,,,,2,1.01,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000533,2,2017-12-15 00:21:05,2017-12-15 00:32:58,N,1,,,,,2,1.94,10,0.5,0.5,2.82,0,,0.3,14.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000534,2,2017-12-15 00:36:08,2017-12-15 00:44:27,N,1,,,,,2,1.33,7.5,0.5,0.5,1.2,0,,0.3,10,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000535,2,2017-12-15 00:49:05,2017-12-15 01:22:21,N,1,,,,,2,6.61,25.5,0.5,0.5,5.36,0,,0.3,32.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000536,2,2017-12-15 00:43:23,2017-12-15 01:16:38,N,1,,,,,1,7.28,27,0.5,0.5,5,0,,0.3,33.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000537,1,2017-12-15 00:08:26,2017-12-15 00:11:22,N,1,,,,,2,0.40,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000538,1,2017-12-15 00:13:29,2017-12-15 00:46:41,N,1,,,,,1,6.30,25,0.5,0.5,5.25,0,,0.3,31.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000539,2,2017-12-15 00:11:39,2017-12-15 00:11:42,N,5,,,,,1,0.00,125,0,0,40,0,,0.3,165.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000540,2,2017-12-15 00:56:03,2017-12-15 01:00:44,N,1,,,,,1,0.97,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000541,1,2017-12-15 00:42:52,2017-12-15 01:02:34,N,1,,,,,1,13.00,36,0.5,0.5,6,0,,0.3,43.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000542,1,2017-12-15 00:03:04,2017-12-15 00:52:32,N,1,,,,,1,6.60,32,0.5,0.5,6.65,0,,0.3,39.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000543,1,2017-12-15 00:13:41,2017-12-15 00:28:38,N,1,,,,,1,4.20,14.5,0.5,0.5,3.5,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000544,1,2017-12-15 00:33:27,2017-12-15 00:40:04,N,1,,,,,1,1.70,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000545,1,2017-12-15 00:47:29,2017-12-15 00:58:09,N,1,,,,,1,2.20,10,0.5,0.5,2.8,0,,0.3,14.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000546,1,2017-12-15 00:05:19,2017-12-15 00:38:17,N,1,,,,,1,8.20,30,0.5,0.5,6.25,0,,0.3,37.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000547,2,2017-12-15 00:53:06,2017-12-15 01:14:50,N,1,,,,,4,12.29,35,0.5,0.5,0,0,,0.3,36.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000548,1,2017-12-15 00:02:50,2017-12-15 00:07:18,N,1,,,,,1,0.60,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000549,1,2017-12-15 00:12:02,2017-12-15 00:14:27,N,1,,,,,1,0.40,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000550,1,2017-12-15 00:15:16,2017-12-15 00:21:59,N,1,,,,,1,0.90,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000551,1,2017-12-15 00:24:38,2017-12-15 00:46:00,N,1,,,,,1,4.00,17,0.5,0.5,1,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000552,1,2017-12-15 00:53:46,2017-12-15 00:59:26,N,1,,,,,1,1.20,6.5,0.5,0.5,2,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000553,1,2017-12-15 00:00:57,2017-12-15 00:15:13,N,1,,,,,2,2.60,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000554,1,2017-12-15 00:19:11,2017-12-15 01:02:12,N,1,,,,,2,6.70,30,0.5,0.5,7.8,0,,0.3,39.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000555,2,2017-12-15 00:35:27,2017-12-15 00:54:46,N,1,,,,,1,2.59,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000556,2,2017-12-15 00:06:54,2017-12-15 00:37:31,N,1,,,,,1,9.21,31,0.5,0.5,8.08,0,,0.3,40.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000557,2,2017-12-15 00:38:32,2017-12-15 00:53:06,N,1,,,,,1,4.34,14.5,0.5,0.5,3.95,0,,0.3,19.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000558,2,2017-12-15 00:56:49,2017-12-15 01:03:38,N,1,,,,,1,1.81,7.5,0.5,0.5,0,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000559,2,2017-12-15 00:03:50,2017-12-15 00:13:29,N,1,,,,,1,2.34,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000560,2,2017-12-15 00:28:29,2017-12-15 00:35:00,N,1,,,,,1,1.32,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000561,2,2017-12-15 00:50:11,2017-12-15 01:05:29,N,1,,,,,1,3.22,13.5,0.5,0.5,4.44,0,,0.3,19.24,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000562,1,2017-12-15 00:02:47,2017-12-15 00:41:54,N,1,,,,,1,7.80,31.5,0.5,0.5,6.55,0,,0.3,39.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000563,2,2017-12-15 00:05:55,2017-12-15 00:55:30,N,1,,,,,1,14.85,47,0.5,0.5,0,0,,0.3,48.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000564,2,2017-12-15 00:57:55,2017-12-15 00:58:01,N,5,,,,,1,0.00,10,0,0.5,0,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000565,2,2017-12-15 00:13:16,2017-12-15 00:22:13,N,1,,,,,2,1.57,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000566,2,2017-12-15 00:22:47,2017-12-15 00:24:31,N,1,,,,,1,0.43,3.5,0.5,0.5,0.72,0,,0.3,5.52,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000567,2,2017-12-15 00:32:29,2017-12-15 00:35:46,N,1,,,,,2,0.72,4.5,0.5,0.5,1.45,0,,0.3,7.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000568,2,2017-12-15 00:36:36,2017-12-15 00:52:02,N,1,,,,,2,2.10,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000569,2,2017-12-15 00:54:25,2017-12-15 00:58:58,N,1,,,,,2,0.00,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000570,1,2017-12-15 00:10:19,2017-12-15 00:42:43,N,1,,,,,1,5.40,22.5,0.5,0.5,0,0,,0.3,23.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000571,2,2017-12-15 00:22:17,2017-12-15 00:54:37,N,1,,,,,1,5.00,24,0.5,0.5,4,0,,0.3,29.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000572,2,2017-12-14 23:58:52,2017-12-15 00:49:35,N,1,,,,,1,11.20,41,0.5,0.5,7.21,5.76,,0.3,55.27,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000573,1,2017-12-15 00:36:07,2017-12-15 01:03:18,Y,1,,,,,1,5.00,20,0.5,0.5,4.25,0,,0.3,25.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000574,1,2017-12-15 00:15:45,2017-12-15 00:50:45,N,1,,,,,1,9.20,32,0.5,0.5,0,5.76,,0.3,39.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000575,1,2017-12-15 00:22:12,2017-12-15 00:38:30,N,1,,,,,1,2.20,12.5,0.5,0.5,0,0,,0.3,13.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000576,1,2017-12-15 00:41:46,2017-12-15 01:01:28,Y,1,,,,,1,3.70,15.5,0.5,0.5,3.35,0,,0.3,20.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000577,1,2017-12-15 00:25:24,2017-12-15 01:02:48,N,1,,,,,1,6.00,27.5,0.5,0.5,5.75,0,,0.3,34.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000578,2,2017-12-15 00:07:55,2017-12-15 00:12:34,N,1,,,,,1,0.96,5.5,0.5,0.5,0,0,,0.3,6.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000579,2,2017-12-15 00:13:36,2017-12-15 00:45:06,N,1,,,,,1,4.72,22,0.5,0.5,0,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000580,2,2017-12-15 00:56:28,2017-12-15 01:00:43,N,1,,,,,1,0.79,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000581,2,2017-12-15 00:08:58,2017-12-15 00:16:04,N,1,,,,,1,0.73,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000582,2,2017-12-15 00:17:18,2017-12-15 00:50:54,N,1,,,,,1,3.66,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000583,2,2017-12-15 00:49:08,2017-12-15 01:09:20,N,1,,,,,1,4.16,17,0.5,0.5,3.66,0,,0.3,21.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000584,2,2017-12-15 00:50:52,2017-12-15 01:03:54,N,1,,,,,1,2.09,10,0.5,0.5,2.82,0,,0.3,14.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000585,2,2017-12-15 00:31:15,2017-12-15 00:35:58,N,1,,,,,1,1.11,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000586,2,2017-12-15 00:51:58,2017-12-15 01:15:57,N,1,,,,,1,2.79,16,0.5,0.5,0,5.76,,0.3,23.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000587,2,2017-12-15 00:03:58,2017-12-15 00:14:13,N,1,,,,,1,1.68,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000588,2,2017-12-15 00:16:35,2017-12-15 01:25:44,N,1,,,,,1,15.30,59,0.5,0.5,12.06,0,,0.3,72.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000589,2,2017-12-15 00:06:16,2017-12-15 00:20:06,N,1,,,,,1,2.16,11,0.5,0.5,0,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000590,2,2017-12-15 00:23:57,2017-12-15 00:34:07,N,1,,,,,1,1.73,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000591,2,2017-12-15 00:35:00,2017-12-15 00:41:32,N,1,,,,,1,1.44,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000592,2,2017-12-15 00:42:57,2017-12-15 01:17:17,N,1,,,,,1,8.22,28.5,0.5,0.5,7.45,0,,0.3,37.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000593,2,2017-12-15 00:11:18,2017-12-15 00:41:56,N,1,,,,,1,4.03,20.5,0.5,0.5,3.5,0,,0.3,25.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000594,2,2017-12-15 00:50:15,2017-12-15 01:05:16,N,1,,,,,1,2.87,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000595,2,2017-12-15 00:44:35,2017-12-15 01:36:39,N,1,,,,,1,13.58,47,0.5,0.5,9,0,,0.3,57.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000596,2,2017-12-15 00:10:26,2017-12-15 01:14:11,N,2,,,,,5,22.01,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000597,1,2017-12-15 00:20:22,2017-12-15 00:59:40,N,2,,,,,2,18.00,52,0,0.5,0,0,,0.3,52.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000598,1,2017-12-15 00:11:10,2017-12-15 00:22:38,N,1,,,,,2,2.00,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000599,1,2017-12-15 00:25:37,2017-12-15 00:41:15,N,1,,,,,3,2.60,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000600,2,2017-12-15 00:09:00,2017-12-15 00:36:33,N,1,,,,,2,2.91,18.5,0.5,0.5,0,0,,0.3,19.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000601,2,2017-12-15 00:53:19,2017-12-15 01:19:39,N,1,,,,,2,7.12,24,0.5,0.5,6.32,0,,0.3,31.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000602,2,2017-12-15 00:29:14,2017-12-15 00:38:05,N,1,,,,,1,1.50,8,0.5,0.5,2.32,0,,0.3,11.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000603,2,2017-12-15 00:39:24,2017-12-15 00:57:43,N,1,,,,,1,3.18,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000604,2,2017-12-15 00:22:31,2017-12-15 00:29:31,N,1,,,,,3,1.44,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000605,2,2017-12-15 00:48:10,2017-12-15 01:20:50,N,1,,,,,3,6.85,25.5,0.5,0.5,0,0,,0.3,26.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000606,1,2017-12-15 00:17:11,2017-12-15 00:24:41,N,1,,,,,1,1.30,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000607,1,2017-12-15 00:26:31,2017-12-15 00:36:40,N,1,,,,,1,1.40,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000608,1,2017-12-15 00:43:07,2017-12-15 01:24:10,N,1,,,,,1,10.60,37.5,0.5,0.5,2,0,,0.3,40.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000609,2,2017-12-15 00:22:39,2017-12-15 00:43:55,N,2,,,,,1,8.03,52,0,0.5,2,5.76,,0.3,60.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000610,1,2017-12-15 00:02:34,2017-12-15 00:11:48,N,1,,,,,1,4.90,15.5,0.5,0.5,3,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000611,1,2017-12-15 00:29:52,2017-12-15 00:32:26,N,1,,,,,1,0.30,3.5,0.5,0.5,1.4,0,,0.3,6.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000612,1,2017-12-15 00:37:00,2017-12-15 00:40:47,N,1,,,,,1,0.70,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000613,1,2017-12-15 00:23:17,2017-12-15 00:56:40,N,1,,,,,1,3.90,21.5,0.5,0.5,4.55,0,,0.3,27.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000614,2,2017-12-15 00:45:58,2017-12-15 01:16:25,N,1,,,,,1,15.59,44.5,0.5,0.5,0,5.76,,0.3,51.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000615,2,2017-12-15 00:10:34,2017-12-15 00:11:56,N,1,,,,,1,0.08,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000616,2,2017-12-15 00:17:20,2017-12-15 00:36:29,N,1,,,,,1,2.99,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000617,2,2017-12-15 00:10:12,2017-12-15 00:30:28,N,1,,,,,1,2.17,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000618,2,2017-12-15 00:35:21,2017-12-15 00:55:02,N,1,,,,,1,6.77,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000619,1,2017-12-15 00:46:25,2017-12-15 01:09:39,N,1,,,,,1,5.70,20.5,0.5,0.5,0,0,,0.3,21.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000620,1,2017-12-15 00:04:11,2017-12-15 00:18:22,N,1,,,,,1,3.10,12,0.5,0.5,2,0,,0.3,15.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000621,1,2017-12-15 00:31:24,2017-12-15 00:42:28,N,1,,,,,1,0.60,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000622,1,2017-12-15 00:44:08,2017-12-15 00:54:43,N,1,,,,,1,1.60,9,0.5,0.5,3.05,0,,0.3,13.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000623,2,2017-12-15 00:19:42,2017-12-15 00:47:45,N,1,,,,,1,13.76,39,0.5,0.5,0,5.76,,0.3,46.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000624,1,2017-12-15 00:52:27,2017-12-15 01:08:45,N,1,,,,,1,7.70,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000625,1,2017-12-15 00:19:22,2017-12-15 00:34:50,N,1,,,,,1,3.10,14,0.5,0.5,2,0,,0.3,17.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000626,1,2017-12-15 00:44:21,2017-12-15 01:23:25,N,1,,,,,1,10.70,35,0.5,0.5,7,0,,0.3,43.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000627,1,2017-12-15 00:50:13,2017-12-15 01:15:53,N,1,,,,,1,16.10,44,0.5,0.5,11.3,0,,0.3,56.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000628,2,2017-12-14 23:58:56,2017-12-15 00:03:51,N,1,,,,,3,1.18,6,0.5,0.5,1.02,0,,0.3,8.32,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000629,2,2017-12-15 00:05:10,2017-12-15 00:41:52,N,1,,,,,4,9.78,33.5,0.5,0.5,2,5.76,,0.3,42.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000630,1,2017-12-15 00:02:37,2017-12-15 00:09:45,N,1,,,,,1,1.30,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000631,1,2017-12-15 00:16:03,2017-12-15 00:22:28,N,1,,,,,1,0.50,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000632,1,2017-12-15 00:24:13,2017-12-15 00:37:26,N,1,,,,,2,1.90,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000633,1,2017-12-15 00:48:50,2017-12-15 00:52:32,N,1,,,,,1,0.50,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000634,2,2017-12-15 00:10:46,2017-12-15 00:39:54,N,1,,,,,2,4.28,21,0.5,0.5,4.46,0,,0.3,26.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000635,1,2017-12-15 00:03:20,2017-12-15 00:09:54,N,1,,,,,5,1.00,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000636,1,2017-12-15 00:11:44,2017-12-15 00:13:57,N,1,,,,,2,0.50,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000637,1,2017-12-15 00:14:52,2017-12-15 00:31:37,N,1,,,,,1,3.60,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000638,1,2017-12-15 00:28:49,2017-12-15 00:35:36,N,1,,,,,1,1.30,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000639,1,2017-12-15 00:46:13,2017-12-15 01:11:12,N,1,,,,,1,4.90,19,0.5,0.5,0,0,,0.3,20.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000640,1,2017-12-15 00:52:19,2017-12-15 01:28:24,N,1,,,,,1,6.60,29,0.5,0.5,6.05,0,,0.3,36.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000641,1,2017-12-15 00:09:02,2017-12-15 00:15:32,N,1,,,,,2,1.30,7,0.5,0.5,0,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000642,1,2017-12-15 00:32:12,2017-12-15 00:38:02,N,1,,,,,1,1.30,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000643,1,2017-12-15 00:57:40,2017-12-15 01:05:08,N,1,,,,,1,1.40,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000644,2,2017-12-15 00:07:59,2017-12-15 00:37:06,N,1,,,,,1,3.73,19,0.5,0.5,1,0,,0.3,21.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000645,2,2017-12-15 00:49:50,2017-12-15 00:56:39,N,1,,,,,1,1.95,8.5,0.5,0.5,0,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000646,2,2017-12-15 00:01:20,2017-12-15 00:25:50,N,1,,,,,5,5.33,20.5,0.5,0.5,4.36,0,,0.3,26.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000647,2,2017-12-15 00:28:14,2017-12-15 00:29:15,N,1,,,,,5,0.03,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000648,2,2017-12-15 00:39:06,2017-12-15 00:53:21,N,1,,,,,5,1.21,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000649,2,2017-12-15 00:54:22,2017-12-15 01:20:16,N,1,,,,,5,4.22,18.5,0.5,0.5,4.95,0,,0.3,24.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000650,2,2017-12-15 00:21:40,2017-12-15 00:42:49,N,1,,,,,2,3.51,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000651,2,2017-12-15 01:00:21,2017-12-15 01:20:15,N,1,,,,,2,8.83,26.5,0.5,0.5,0,5.76,,0.3,33.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000652,1,2017-12-15 00:51:00,2017-12-15 01:03:07,N,1,,,,,1,2.10,10,0.5,0.5,2.3,0,,0.3,13.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000653,1,2017-12-15 00:00:05,2017-12-15 00:14:23,N,1,,,,,1,2.60,12,0.5,0.5,3.95,0,,0.3,17.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000654,1,2017-12-15 00:26:10,2017-12-15 00:30:41,N,1,,,,,1,0.70,5,0.5,0.5,0.95,0,,0.3,7.25,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000655,1,2017-12-15 00:32:06,2017-12-15 00:42:31,N,1,,,,,1,1.70,9,0.5,0.5,0,0,,0.3,10.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000656,1,2017-12-15 00:43:47,2017-12-15 01:04:09,N,1,,,,,1,3.90,15.5,0.5,0.5,0,0,,0.3,16.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000657,2,2017-12-15 00:00:11,2017-12-15 00:29:57,N,1,,,,,3,5.36,24,0.5,0.5,7.59,0,,0.3,32.89,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000658,2,2017-12-15 00:55:15,2017-12-15 01:12:55,N,1,,,,,4,2.32,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000659,2,2017-12-15 00:05:39,2017-12-15 00:32:38,N,1,,,,,1,3.69,19,0.5,0.5,6.09,0,,0.3,26.39,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000660,2,2017-12-15 00:36:55,2017-12-15 01:15:17,N,1,,,,,1,10.12,36,0.5,0.5,7.46,0,,0.3,44.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000661,2,2017-12-15 00:11:13,2017-12-15 00:26:46,N,1,,,,,3,4.16,15.5,0.5,0.5,3.36,0,,0.3,20.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000662,2,2017-12-15 00:29:43,2017-12-15 01:09:14,N,1,,,,,3,4.33,25.5,0.5,0.5,3,0,,0.3,29.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000663,2,2017-12-15 00:04:03,2017-12-15 00:18:22,N,1,,,,,2,1.95,11,0.5,0.5,0,0,,0.3,12.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000664,2,2017-12-15 00:23:19,2017-12-15 00:25:03,N,1,,,,,2,0.55,3.5,0.5,0.5,1.2,0,,0.3,6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000665,2,2017-12-15 00:28:30,2017-12-15 00:48:26,N,1,,,,,2,3.86,16.5,0.5,0.5,2,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000666,2,2017-12-15 00:54:59,2017-12-16 00:48:26,N,1,,,,,2,6.51,27,0.5,0.5,0,0,,0.3,28.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000667,1,2017-12-15 00:00:19,2017-12-15 00:07:29,N,1,,,,,1,1.30,6.5,0.5,0.5,0.5,0,,0.3,8.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000668,1,2017-12-15 00:14:16,2017-12-15 00:24:38,N,1,,,,,1,1.70,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000669,1,2017-12-15 00:28:03,2017-12-15 00:48:26,N,1,,,,,1,3.70,15.5,0.5,0.5,1.5,0,,0.3,18.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000670,2,2017-12-15 00:04:25,2017-12-15 00:10:06,N,1,,,,,1,0.92,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000671,2,2017-12-15 00:11:22,2017-12-15 00:36:07,N,1,,,,,1,4.65,20,0.5,0.5,5.32,0,,0.3,26.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000672,2,2017-12-15 00:49:56,2017-12-15 00:56:19,N,1,,,,,1,1.85,7.5,0.5,0.5,1.76,0,,0.3,10.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000673,1,2017-12-15 00:54:32,2017-12-15 01:06:37,N,1,,,,,2,4.40,15,0.5,0.5,0,0,,0.3,16.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000674,2,2017-12-15 00:10:56,2017-12-15 00:49:42,N,2,,,,,1,19.22,52,0,0.5,0,5.76,,0.3,58.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000675,2,2017-12-15 00:53:14,2017-12-15 01:10:10,N,1,,,,,1,2.44,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000676,1,2017-12-15 00:24:30,2017-12-15 00:27:49,N,1,,,,,1,0.80,5,0.5,0.5,1,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000677,1,2017-12-15 00:35:28,2017-12-15 00:43:21,N,1,,,,,1,1.20,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000678,1,2017-12-15 00:45:35,2017-12-15 00:55:48,N,1,,,,,1,1.60,8.5,0.5,0.5,2.9,0,,0.3,12.7,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000679,2,2017-12-15 00:07:29,2017-12-15 00:38:34,N,1,,,,,1,13.41,39.5,0.5,0.5,9.31,5.76,,0.3,55.87,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000680,2,2017-12-15 00:48:16,2017-12-15 01:03:17,N,1,,,,,1,3.08,13.5,0.5,0.5,0,0,,0.3,14.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000681,1,2017-12-15 00:22:27,2017-12-15 00:38:33,N,1,,,,,1,1.20,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000682,1,2017-12-15 00:41:29,2017-12-15 00:46:11,N,1,,,,,1,0.80,5.5,0.5,0.5,12.3,0,,0.3,19.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000683,1,2017-12-15 00:57:43,2017-12-15 01:22:11,N,1,,,,,1,6.00,21.5,0.5,0.5,0,0,,0.3,22.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000684,2,2017-12-15 00:36:07,2017-12-15 01:08:34,N,1,,,,,1,5.41,22.5,0.5,0.5,0,0,,0.3,23.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000685,2,2017-12-15 00:32:43,2017-12-15 00:38:26,N,1,,,,,1,1.00,6.5,0.5,0.5,1,0,,0.3,8.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000686,2,2017-12-15 00:57:05,2017-12-15 01:05:25,N,1,,,,,1,1.67,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000687,2,2017-12-15 00:19:35,2017-12-15 00:31:13,N,1,,,,,1,2.63,11,0.5,0.5,0,0,,0.3,12.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000688,1,2017-12-15 00:04:18,2017-12-15 00:12:26,N,1,,,,,1,0.90,7,0.5,0.5,1.65,0,,0.3,9.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000689,1,2017-12-15 00:14:22,2017-12-15 00:59:18,N,1,,,,,1,6.60,29.5,0.5,0.5,6.15,0,,0.3,36.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000690,1,2017-12-15 00:00:52,2017-12-15 00:49:27,N,1,,,,,1,8.70,40,0.5,0.5,0,5.76,,0.3,47.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000691,1,2017-12-15 00:17:13,2017-12-15 00:37:07,N,1,,,,,1,3.10,14.5,0.5,0.5,3.15,0,,0.3,18.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000692,1,2017-12-15 00:38:56,2017-12-15 00:49:41,N,1,,,,,1,1.10,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000693,1,2017-12-15 00:52:01,2017-12-15 01:17:31,N,1,,,,,1,4.70,19.5,0.5,0.5,4,0,,0.3,24.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000694,2,2017-12-15 00:03:29,2017-12-15 00:03:31,N,5,,,,,1,0.06,75,0,0,15.06,0,,0.3,90.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000873,1,2017-12-15 00:48:42,2017-12-15 00:51:50,Y,1,,,,,1,0.40,4,0.5,0.5,0.79,0,,0.3,6.09,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000695,2,2017-12-15 00:22:23,2017-12-15 00:29:43,N,1,,,,,2,1.23,6.5,0.5,0.5,2.34,0,,0.3,10.14,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000696,2,2017-12-15 00:44:53,2017-12-15 00:48:01,N,1,,,,,2,0.38,4,0.5,0.5,1,0,,0.3,6.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000697,1,2017-12-15 00:19:26,2017-12-15 00:28:01,N,1,,,,,1,1.40,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000698,1,2017-12-15 00:39:46,2017-12-15 01:14:14,N,1,,,,,1,7.80,28,0.5,0.5,3,0,,0.3,32.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000699,1,2017-12-15 00:16:44,2017-12-15 00:45:52,N,2,,,,,3,17.30,52,0,0.5,0,5.76,,0.3,58.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000700,2,2017-12-15 00:02:11,2017-12-15 00:29:09,N,1,,,,,1,6.58,24,0.5,0.5,5.06,0,,0.3,30.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000701,2,2017-12-15 00:30:14,2017-12-15 00:46:04,N,1,,,,,1,2.80,12.5,0.5,0.5,0,0,,0.3,13.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000702,1,2017-12-15 00:10:46,2017-12-15 00:40:01,N,1,,,,,1,9.90,31,0.5,0.5,7.6,5.76,,0.3,45.66,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000703,1,2017-12-15 00:48:41,2017-12-15 01:06:29,N,1,,,,,1,3.50,15,0.5,0.5,4.05,0,,0.3,20.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000704,2,2017-12-15 00:22:48,2017-12-15 00:42:03,N,1,,,,,1,3.05,15,0.5,0.5,2.44,0,,0.3,18.74,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000705,2,2017-12-15 00:46:30,2017-12-15 01:13:28,N,1,,,,,1,3.78,19,0.5,0.5,5.08,0,,0.3,25.38,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000706,2,2017-12-15 00:01:57,2017-12-15 00:25:18,N,1,,,,,1,4.73,19.5,0.5,0.5,6.24,0,,0.3,27.04,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000707,2,2017-12-15 00:01:06,2017-12-15 00:05:42,N,1,,,,,2,1.25,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000708,2,2017-12-15 00:28:01,2017-12-15 00:35:18,N,1,,,,,2,1.25,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000709,2,2017-12-15 00:36:39,2017-12-15 00:50:41,N,1,,,,,2,3.16,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000710,2,2017-12-15 00:02:19,2017-12-15 00:22:36,N,1,,,,,1,3.68,15.5,0.5,0.5,3.36,0,,0.3,22.11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000711,2,2017-12-15 00:23:44,2017-12-15 00:33:57,N,1,,,,,1,1.77,9,0.5,0.5,2.58,0,,0.3,12.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000712,2,2017-12-15 00:37:09,2017-12-15 00:56:53,N,1,,,,,1,2.92,15,0.5,0.5,2,0,,0.3,18.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000713,2,2017-12-15 00:12:13,2017-12-15 00:29:34,N,1,,,,,1,12.02,33,0.5,0.5,8.58,0,,0.3,42.88,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000714,2,2017-12-15 00:11:36,2017-12-15 00:40:50,N,1,,,,,3,5.63,23,0.5,0.5,4.86,0,,0.3,29.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000715,2,2017-12-15 00:43:02,2017-12-15 00:49:08,N,1,,,,,3,1.68,7,0.5,0.5,0,0,,0.3,8.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000716,1,2017-12-15 00:05:41,2017-12-15 00:32:29,N,1,,,,,2,4.20,20,0.5,0.5,5.3,0,,0.3,26.6,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000717,1,2017-12-15 00:44:27,2017-12-15 01:04:20,N,1,,,,,1,4.90,17,0.5,0.5,5.45,0,,0.3,23.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000718,1,2017-12-15 00:30:44,2017-12-15 00:40:04,N,1,,,,,1,1.50,8.5,0.5,0.5,1.5,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000719,1,2017-12-15 00:41:48,2017-12-15 00:50:54,N,1,,,,,1,1.50,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000720,1,2017-12-15 00:59:42,2017-12-15 01:12:12,N,1,,,,,1,4.20,13.5,0.5,0.5,2,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000721,2,2017-12-15 00:08:17,2017-12-15 00:23:49,N,1,,,,,2,6.81,21.5,0.5,0.5,3.5,0,,0.3,26.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000722,2,2017-12-15 00:54:07,2017-12-15 01:12:23,N,1,,,,,2,8.71,25,0.5,0.5,0,5.76,,0.3,32.06,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000723,1,2017-12-15 00:10:46,2017-12-15 00:23:46,N,1,,,,,1,1.80,10.5,0.5,0.5,1,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000724,1,2017-12-15 00:25:04,2017-12-15 00:40:45,N,1,,,,,1,3.10,13.5,0.5,0.5,3.7,0,,0.3,18.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000725,1,2017-12-15 00:46:29,2017-12-15 01:22:33,N,1,,,,,1,5.80,26.5,0.5,0.5,5.55,0,,0.3,33.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000726,2,2017-12-15 00:07:51,2017-12-15 00:14:32,N,1,,,,,1,0.81,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000727,2,2017-12-15 00:17:00,2017-12-15 00:37:28,N,1,,,,,1,1.28,13,0.5,0.5,2.86,0,,0.3,17.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000728,2,2017-12-15 00:19:24,2017-12-15 00:34:41,N,1,,,,,1,4.52,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000729,2,2017-12-15 00:01:16,2017-12-15 00:17:43,N,1,,,,,1,1.05,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000730,2,2017-12-15 00:18:40,2017-12-15 00:31:35,N,1,,,,,1,1.22,9.5,0.5,0.5,0,0,,0.3,10.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000731,1,2017-12-15 00:31:52,2017-12-15 00:38:45,N,1,,,,,1,1.30,6.5,0.5,0.5,1.55,0,,0.3,9.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000732,1,2017-12-15 00:18:16,2017-12-15 00:43:11,N,1,,,,,1,7.00,25,0.5,0.5,7,0,,0.3,33.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000733,2,2017-12-15 00:05:53,2017-12-15 00:35:02,N,1,,,,,1,6.85,26,0.5,0.5,0,0,,0.3,27.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000734,2,2017-12-15 00:53:47,2017-12-15 01:24:10,N,1,,,,,1,8.55,29.5,0.5,0.5,3,5.76,,0.3,39.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000735,1,2017-12-15 00:18:57,2017-12-15 00:36:41,N,1,,,,,1,2.40,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000736,1,2017-12-15 00:39:32,2017-12-15 00:51:27,N,1,,,,,1,2.20,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000737,1,2017-12-15 00:09:35,2017-12-15 00:10:30,N,4,,,,,0,0.00,3,0.5,0.5,0,0,,0.3,4.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000738,1,2017-12-15 00:10:38,2017-12-15 00:50:50,N,2,,,,,2,18.30,52,0,0.5,10,0,,0.3,62.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000739,1,2017-12-15 00:59:40,2017-12-15 01:12:42,N,1,,,,,1,1.90,10,0.5,0.5,2.25,0,,0.3,13.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000740,2,2017-12-15 00:10:27,2017-12-15 00:32:38,N,1,,,,,1,15.36,41.5,0.5,0.5,8.56,0,,0.3,51.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000741,2,2017-12-15 00:43:57,2017-12-15 01:14:33,N,1,,,,,1,7.44,28,0.5,0.5,8.76,5.76,,0.3,43.82,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000742,1,2017-12-15 00:23:14,2017-12-15 00:40:55,N,1,,,,,1,3.10,14,0.5,0.5,1.5,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000743,1,2017-12-15 00:45:14,2017-12-15 01:06:11,N,1,,,,,1,4.60,19.5,0.5,0.5,4.15,0,,0.3,24.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000744,1,2017-12-15 00:05:13,2017-12-15 00:31:01,N,1,,,,,1,4.90,20.5,0.5,0.5,4.35,0,,0.3,26.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000745,1,2017-12-15 00:55:03,2017-12-15 01:00:44,N,1,,,,,1,0.90,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000746,1,2017-12-15 00:08:52,2017-12-15 00:44:05,N,1,,,,,1,3.80,23,0.5,0.5,6.05,0,,0.3,30.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000747,2,2017-12-15 00:33:05,2017-12-15 00:48:27,N,1,,,,,1,2.99,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000748,2,2017-12-15 00:15:29,2017-12-15 00:54:51,N,1,,,,,1,4.36,26,0.5,0.5,5.46,0,,0.3,32.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000749,1,2017-12-15 00:01:36,2017-12-15 00:04:58,N,1,,,,,1,0.80,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000750,1,2017-12-15 00:15:24,2017-12-15 00:21:09,N,1,,,,,1,1.80,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000751,1,2017-12-15 00:40:23,2017-12-15 00:46:29,N,1,,,,,1,1.00,6,0.5,0.5,0,0,,0.3,7.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000752,1,2017-12-15 00:49:17,2017-12-15 01:00:42,N,1,,,,,1,1.30,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000753,2,2017-12-15 00:11:14,2017-12-15 00:23:43,N,1,,,,,3,2.00,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000754,2,2017-12-15 00:31:31,2017-12-15 00:45:43,N,1,,,,,2,2.13,11.5,0.5,0.5,1,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000755,2,2017-12-15 00:40:55,2017-12-15 01:00:47,N,1,,,,,1,2.88,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000756,2,2017-12-15 00:08:22,2017-12-15 00:10:21,N,1,,,,,1,0.68,4,0.5,0.5,0,0,,0.3,5.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000757,2,2017-12-15 00:11:45,2017-12-15 00:32:57,N,1,,,,,1,4.32,18,0.5,0.5,3.86,0,,0.3,23.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000758,2,2017-12-15 00:45:14,2017-12-15 01:04:07,N,1,,,,,1,4.99,18,0.5,0.5,5.79,0,,0.3,25.09,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000759,1,2017-12-15 00:31:29,2017-12-15 00:31:58,N,5,,,,,1,0.00,18,0,0,3.65,0,,0.3,21.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000760,1,2017-12-15 00:34:33,2017-12-15 00:39:54,N,1,,,,,1,1.60,7,0.5,0.5,2.05,0,,0.3,10.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000761,1,2017-12-15 00:42:43,2017-12-15 00:51:05,N,1,,,,,1,1.50,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000762,1,2017-12-15 00:17:16,2017-12-15 00:26:19,N,1,,,,,1,2.30,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000763,1,2017-12-15 00:35:40,2017-12-15 00:49:25,N,1,,,,,1,3.20,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000764,2,2017-12-15 00:23:04,2017-12-15 00:42:31,N,1,,,,,1,8.98,26.5,0.5,0.5,2,0,,0.3,29.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000765,1,2017-12-15 00:46:24,2017-12-15 01:01:21,N,1,,,,,1,2.80,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000766,2,2017-12-14 23:57:15,2017-12-15 00:15:42,N,1,,,,,1,5.55,20,0.5,0.5,4.26,0,,0.3,25.56,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000767,2,2017-12-15 00:23:19,2017-12-15 00:34:54,N,1,,,,,1,1.26,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000768,2,2017-12-15 00:41:52,2017-12-15 00:48:13,N,1,,,,,1,0.82,6,0.5,0.5,1.82,0,,0.3,9.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000769,2,2017-12-15 00:49:53,2017-12-15 01:02:43,N,1,,,,,1,2.31,11,0.5,0.5,3.69,0,,0.3,15.99,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000770,2,2017-12-15 00:29:46,2017-12-15 01:15:02,N,1,,,,,5,9.27,37.5,0.5,0.5,8.91,5.76,,0.3,53.47,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000771,2,2017-12-15 00:03:21,2017-12-15 00:16:01,N,1,,,,,5,2.26,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000772,2,2017-12-15 00:16:49,2017-12-15 00:24:40,N,1,,,,,5,1.39,7.5,0.5,0.5,1,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000773,2,2017-12-15 00:29:56,2017-12-15 00:42:19,N,1,,,,,5,1.61,9.5,0.5,0.5,2.16,0,,0.3,12.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000774,2,2017-12-15 00:43:51,2017-12-15 00:54:08,N,1,,,,,5,1.04,7,0.5,0.5,0.7,0,,0.3,9,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000775,2,2017-12-15 00:55:15,2017-12-15 00:59:45,N,1,,,,,5,0.38,4.5,0.5,0.5,0,0,,0.3,5.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000776,2,2017-12-15 00:15:43,2017-12-15 00:43:10,N,1,,,,,3,3.34,19,0.5,0.5,4.06,0,,0.3,24.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000777,2,2017-12-15 00:54:12,2017-12-15 01:05:13,N,1,,,,,3,2.16,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000778,2,2017-12-15 00:22:21,2017-12-15 00:48:52,N,1,,,,,1,9.82,30.5,0.5,0.5,0,0,,0.3,31.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000779,2,2017-12-15 00:53:35,2017-12-15 00:55:32,N,1,,,,,1,0.36,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000780,2,2017-12-15 00:33:11,2017-12-15 00:39:57,N,1,,,,,1,1.35,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000781,2,2017-12-15 00:40:32,2017-12-15 00:59:34,N,1,,,,,1,4.29,16,0.5,0.5,0,0,,0.3,17.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000782,2,2017-12-15 00:21:31,2017-12-15 00:25:39,N,1,,,,,1,0.27,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000783,2,2017-12-15 00:22:28,2017-12-15 00:57:26,N,1,,,,,1,5.58,26,0.5,0.5,1,0,,0.3,28.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000784,1,2017-12-15 00:44:31,2017-12-15 00:48:39,N,1,,,,,3,0.60,4.5,0.5,0.5,1.7,0,,0.3,7.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000785,1,2017-12-15 00:54:03,2017-12-15 01:08:58,N,1,,,,,1,2.60,12,0.5,0.5,2.65,0,,0.3,15.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000786,1,2017-12-15 00:17:08,2017-12-15 00:38:33,N,1,,,,,1,9.00,27.5,0.5,0.5,6,5.76,,0.3,40.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000787,1,2017-12-15 00:40:42,2017-12-15 00:58:49,N,1,,,,,1,3.70,15,0.5,0.5,0.02,0,,0.3,16.32,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000788,1,2017-12-15 00:07:49,2017-12-15 00:32:05,N,1,,,,,1,3.90,17.5,0.5,0.5,1.88,0,,0.3,20.68,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000789,1,2017-12-15 00:36:09,2017-12-15 00:37:14,N,1,,,,,1,0.30,3,0.5,0.5,0.85,0,,0.3,5.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000790,1,2017-12-15 00:41:57,2017-12-15 01:13:05,N,1,,,,,1,8.30,29.5,0.5,0.5,7.7,0,,0.3,38.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000791,2,2017-12-15 00:33:18,2017-12-15 00:39:28,N,1,,,,,2,1.87,8,0.5,0.5,1,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000792,2,2017-12-15 00:41:31,2017-12-15 01:14:47,N,1,,,,,2,9.43,32.5,0.5,0.5,6.76,0,,0.3,40.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000793,2,2017-12-15 00:51:28,2017-12-15 01:18:40,N,1,,,,,3,15.96,45,0.5,0.5,0,0,,0.3,46.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000794,2,2017-12-15 00:24:56,2017-12-15 00:57:40,N,1,,,,,1,5.40,23.5,0.5,0.5,0,5.76,,0.3,30.56,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000795,2,2017-12-15 00:50:36,2017-12-15 01:11:29,N,1,,,,,1,14.03,38,0.5,0.5,7.86,0,,0.3,47.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000796,2,2017-12-15 00:16:48,2017-12-15 00:25:42,N,1,,,,,1,3.31,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000797,2,2017-12-15 00:28:58,2017-12-15 00:45:19,N,1,,,,,1,2.56,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000798,2,2017-12-15 00:47:35,2017-12-15 01:25:59,N,1,,,,,1,5.91,26,0.5,0.5,2,5.76,,0.3,35.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000799,1,2017-12-15 00:04:08,2017-12-15 00:15:31,N,1,,,,,1,1.90,9,0.5,0.5,2.55,0,,0.3,12.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000800,1,2017-12-15 00:17:18,2017-12-15 00:31:04,N,1,,,,,1,2.50,11.5,0.5,0.5,1.5,0,,0.3,14.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000801,2,2017-12-15 00:17:03,2017-12-15 00:54:29,N,1,,,,,2,6.26,25.5,0.5,0.5,0,0,,0.3,26.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000802,1,2017-12-15 00:18:30,2017-12-15 00:32:04,N,1,,,,,2,1.40,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000803,1,2017-12-15 00:40:55,2017-12-15 00:52:32,N,1,,,,,2,1.70,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000804,1,2017-12-15 00:58:53,2017-12-15 01:14:54,N,1,,,,,1,3.00,13,0.5,0.5,3.55,0,,0.3,17.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000805,1,2017-12-15 00:21:00,2017-12-15 00:39:48,N,1,,,,,1,4.40,16.5,0.5,0.5,0,0,,0.3,17.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000806,1,2017-12-15 00:41:50,2017-12-15 00:54:39,N,1,,,,,1,5.00,16.5,0.5,0.5,0,0,,0.3,17.8,4,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000807,1,2017-12-15 00:16:11,2017-12-15 00:23:24,N,1,,,,,1,1.10,6.5,0.5,0.5,1.4,0,,0.3,9.2,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000808,1,2017-12-15 00:36:58,2017-12-15 01:23:38,N,1,,,,,3,4.50,29.5,0.5,0.5,6.15,0,,0.3,36.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000809,1,2017-12-15 00:02:18,2017-12-15 00:27:13,N,1,,,,,1,5.20,20,0.5,0.5,4.25,0,,0.3,25.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000810,1,2017-12-15 00:45:21,2017-12-15 00:53:02,N,1,,,,,1,1.40,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000811,1,2017-12-15 00:54:15,2017-12-15 01:08:34,N,1,,,,,1,2.90,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000812,1,2017-12-15 00:05:06,2017-12-15 00:10:53,N,1,,,,,1,1.60,7,0.5,0.5,3,0,,0.3,11.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000813,1,2017-12-15 00:13:19,2017-12-15 00:26:11,N,1,,,,,1,2.40,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000814,1,2017-12-15 00:56:51,2017-12-15 01:03:21,N,1,,,,,1,0.50,6,0.5,0.5,0,0,,0.3,7.3,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000815,1,2017-12-15 00:05:46,2017-12-15 00:43:24,N,1,,,,,1,3.20,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000816,1,2017-12-15 00:49:45,2017-12-15 01:01:15,N,1,,,,,1,1.80,9.5,0.5,0.5,2.15,0,,0.3,12.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000817,1,2017-12-15 00:06:47,2017-12-15 00:19:20,N,1,,,,,1,2.60,11,0.5,0.5,2.45,0,,0.3,14.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000818,1,2017-12-15 00:21:08,2017-12-15 00:39:13,N,1,,,,,1,4.20,16,0.5,0.5,2,0,,0.3,19.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000819,1,2017-12-15 00:41:09,2017-12-15 00:46:56,N,1,,,,,1,0.70,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000820,1,2017-12-15 00:49:48,2017-12-15 00:56:43,N,1,,,,,4,0.80,6,0.5,0.5,1.8,0,,0.3,9.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000821,1,2017-12-15 00:57:42,2017-12-15 01:16:36,N,1,,,,,1,3.00,14,0.5,0.5,2.5,0,,0.3,17.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000822,1,2017-12-15 00:04:26,2017-12-15 00:19:40,N,1,,,,,2,1.40,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000823,1,2017-12-15 00:26:21,2017-12-15 00:34:11,N,1,,,,,1,1.10,7,0.5,0.5,2,0,,0.3,10.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000824,1,2017-12-15 00:39:20,2017-12-15 00:56:50,N,1,,,,,1,5.00,17,0.5,0.5,3.65,0,,0.3,21.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000825,2,2017-12-15 00:12:02,2017-12-15 00:26:03,N,1,,,,,1,2.27,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000826,2,2017-12-15 00:32:43,2017-12-15 00:43:57,N,1,,,,,1,3.13,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000827,2,2017-12-15 00:45:55,2017-12-15 00:54:31,N,1,,,,,1,1.89,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000828,2,2017-12-15 00:04:28,2017-12-15 00:48:05,N,1,,,,,1,7.08,31,0.5,0.5,6.46,0,,0.3,38.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000829,1,2017-12-15 00:01:24,2017-12-15 00:38:04,N,1,,,,,2,6.60,27.5,0.5,0.5,5.75,0,,0.3,34.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000830,1,2017-12-15 00:17:14,2017-12-15 00:26:11,N,1,,,,,1,2.00,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000831,1,2017-12-15 00:30:59,2017-12-15 00:47:45,N,1,,,,,1,3.00,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000832,1,2017-12-15 00:51:48,2017-12-15 01:05:41,N,1,,,,,1,6.70,20.5,0.5,0.5,4.35,0,,0.3,26.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000833,2,2017-12-15 00:04:55,2017-12-15 00:17:37,N,1,,,,,1,2.77,12,0.5,0.5,0,0,,0.3,13.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000834,2,2017-12-15 00:27:10,2017-12-15 00:40:45,N,1,,,,,1,4.32,15,0.5,0.5,3.26,0,,0.3,19.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000835,2,2017-12-15 00:47:01,2017-12-15 01:00:18,N,1,,,,,1,1.12,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000836,2,2017-12-15 00:34:54,2017-12-15 00:51:54,N,1,,,,,1,1.76,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000837,1,2017-12-15 00:41:18,2017-12-15 00:54:16,N,1,,,,,1,2.70,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000838,1,2017-12-15 00:16:37,2017-12-15 00:25:14,N,1,,,,,1,1.00,7.5,0.5,0.5,2.2,0,,0.3,11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000839,1,2017-12-15 00:29:29,2017-12-15 00:35:48,N,1,,,,,1,0.80,6,0.5,0.5,1.45,0,,0.3,8.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000840,1,2017-12-15 00:44:11,2017-12-15 00:54:00,N,1,,,,,1,1.30,8,0.5,0.5,1.85,0,,0.3,11.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000841,2,2017-12-15 00:03:15,2017-12-15 00:12:39,N,1,,,,,1,1.44,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000842,2,2017-12-15 00:13:34,2017-12-15 00:25:05,N,1,,,,,1,0.70,8,0.5,0.5,0,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000843,2,2017-12-15 00:29:16,2017-12-15 00:32:52,N,1,,,,,1,0.63,4.5,0.5,0.5,1.74,0,,0.3,7.54,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000844,2,2017-12-15 00:44:44,2017-12-15 00:55:18,N,1,,,,,1,2.22,10,0.5,0.5,2.26,0,,0.3,13.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000845,2,2017-12-15 00:56:56,2017-12-15 01:08:12,N,1,,,,,1,2.25,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000846,1,2017-12-15 00:05:27,2017-12-15 00:40:39,N,1,,,,,1,4.10,23,0.5,0.5,3,0,,0.3,27.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000847,1,2017-12-15 00:50:51,2017-12-15 00:55:54,N,1,,,,,1,1.00,5.5,0.5,0.5,1.35,0,,0.3,8.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000848,1,2017-12-15 00:59:52,2017-12-15 01:11:28,N,1,,,,,2,1.30,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000849,2,2017-12-15 00:43:01,2017-12-15 01:04:17,N,1,,,,,1,4.68,18,0.5,0.5,2.5,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000850,1,2017-12-15 00:52:23,2017-12-15 01:16:27,N,1,,,,,1,4.00,17.5,0.5,0.5,1.2,0,,0.3,20,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000851,1,2017-12-15 00:50:17,2017-12-15 01:18:00,N,1,,,,,1,7.70,26,0.5,0.5,5.45,0,,0.3,32.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000852,2,2017-12-15 00:29:48,2017-12-15 00:47:32,N,1,,,,,5,4.13,15.5,0.5,0.5,4.2,0,,0.3,21,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000853,1,2017-12-15 00:20:05,2017-12-15 00:28:06,N,1,,,,,1,0.60,6.5,0.5,0.5,2,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000854,1,2017-12-15 00:30:22,2017-12-15 01:10:08,N,1,,,,,1,5.80,27,0.5,0.5,7.05,0,,0.3,35.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000855,2,2017-12-15 00:30:08,2017-12-15 00:36:48,N,1,,,,,1,0.72,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000856,2,2017-12-15 00:38:28,2017-12-15 00:57:44,N,1,,,,,1,1.65,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000857,1,2017-12-15 00:11:26,2017-12-15 00:33:14,N,1,,,,,1,4.30,18.5,0.5,0.5,2,0,,0.3,21.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000858,1,2017-12-15 00:53:08,2017-12-15 01:24:53,N,1,,,,,2,8.90,31,0.5,0.5,5,5.76,,0.3,43.06,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000859,1,2017-12-15 00:53:46,2017-12-15 01:07:14,N,1,,,,,2,1.90,10.5,0.5,0.5,1.2,0,,0.3,13,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000860,2,2017-12-15 00:31:50,2017-12-15 00:37:17,N,1,,,,,1,1.77,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000861,1,2017-12-15 00:08:40,2017-12-15 00:58:36,N,1,,,,,1,19.60,59.5,0.5,0.5,13,0,,0.3,73.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000862,2,2017-12-15 00:02:27,2017-12-15 00:06:04,N,1,,,,,5,0.60,4.5,0.5,0.5,1.16,0,,0.3,6.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000863,2,2017-12-15 00:17:42,2017-12-15 01:05:29,N,1,,,,,5,6.86,30,0.5,0.5,0,0,,0.3,31.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000864,2,2017-12-15 00:22:02,2017-12-15 00:40:51,N,1,,,,,1,3.48,15.5,0.5,0.5,0,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000865,2,2017-12-15 00:42:18,2017-12-15 00:49:35,N,1,,,,,1,1.42,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000866,2,2017-12-15 00:02:19,2017-12-15 00:24:09,N,1,,,,,1,10.38,31,0.5,0.5,6.46,0,,0.3,38.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000867,2,2017-12-15 00:21:42,2017-12-15 01:25:06,N,1,,,,,1,11.80,45.5,0.5,0.5,0,0,,0.3,46.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000868,2,2017-12-15 00:03:13,2017-12-15 00:11:15,N,1,,,,,1,1.05,7,0.5,0.5,1.66,0,,0.3,9.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000869,2,2017-12-15 00:13:04,2017-12-15 00:43:07,N,1,,,,,1,5.54,22,0.5,0.5,0,0,,0.3,23.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000870,2,2017-12-15 00:03:01,2017-12-15 00:17:43,N,1,,,,,1,1.14,10,0.5,0.5,0,0,,0.3,11.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000871,2,2017-12-15 00:19:26,2017-12-15 00:36:55,N,1,,,,,1,3.51,15,0.5,0.5,20,0,,0.3,36.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000872,1,2017-12-15 00:42:05,2017-12-15 00:46:23,Y,1,,,,,2,0.70,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000874,1,2017-12-15 00:32:08,2017-12-15 00:38:53,N,1,,,,,1,0.60,6,0.5,0.5,0,0,,0.3,7.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000875,1,2017-12-15 00:52:04,2017-12-15 01:04:53,N,1,,,,,1,2.00,10.5,0.5,0.5,2.35,0,,0.3,14.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000876,2,2017-12-15 00:39:40,2017-12-15 01:22:10,N,2,,,,,6,21.28,52,0,0.5,6,5.76,,0.3,64.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000877,1,2017-12-15 00:04:14,2017-12-15 00:13:52,N,1,,,,,2,2.10,9.5,0.5,0.5,1.2,0,,0.3,12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000878,1,2017-12-15 00:44:57,2017-12-15 00:57:59,N,1,,,,,2,1.40,9.5,0.5,0.5,2,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000879,2,2017-12-15 00:12:01,2017-12-15 00:18:24,N,1,,,,,1,1.32,7,0.5,0.5,0.1,0,,0.3,8.4,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000880,2,2017-12-15 00:22:58,2017-12-15 00:40:06,N,1,,,,,1,4.69,16.5,0.5,0.5,3.56,0,,0.3,21.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000881,2,2017-12-15 00:47:43,2017-12-15 00:54:11,N,1,,,,,1,1.30,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000882,2,2017-12-15 00:16:46,2017-12-15 00:24:04,N,1,,,,,1,1.86,8,0.5,0.5,1.86,0,,0.3,11.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000883,2,2017-12-15 00:29:13,2017-12-15 00:43:55,N,1,,,,,1,2.95,12.5,0.5,0.5,2.2,0,,0.3,16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000884,2,2017-12-15 00:48:04,2017-12-15 00:49:49,N,1,,,,,1,0.34,3.5,0.5,0.5,0,0,,0.3,4.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000885,2,2017-12-15 00:51:53,2017-12-15 00:55:13,N,1,,,,,1,0.86,5,0.5,0.5,1.89,0,,0.3,8.19,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000886,2,2017-12-15 00:21:27,2017-12-15 01:14:34,N,1,,,,,1,10.62,42.5,0.5,0.5,10.95,0,,0.3,54.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000887,1,2017-12-15 00:04:08,2017-12-15 00:36:58,N,1,,,,,2,5.10,23,0.5,0.5,0,0,,0.3,24.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000888,1,2017-12-15 00:40:51,2017-12-15 00:57:22,N,1,,,,,2,3.10,14,0.5,0.5,3.8,0,,0.3,19.1,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000889,1,2017-12-15 00:08:02,2017-12-15 00:29:01,N,1,,,,,1,2.50,14.5,0.5,0.5,1,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000890,1,2017-12-15 00:48:54,2017-12-15 01:33:00,N,1,,,,,1,15.60,48.5,0.5,0.5,5.2,0,,0.3,55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000891,2,2017-12-14 23:59:19,2017-12-15 00:23:03,N,1,,,,,1,3.84,17.5,0.5,0.5,1,0,,0.3,19.8,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000892,2,2017-12-15 00:42:48,2017-12-15 01:03:15,N,1,,,,,1,2.45,14.5,0.5,0.5,1,0,,0.3,16.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000893,2,2017-12-15 00:16:33,2017-12-15 00:26:08,N,1,,,,,1,1.67,8.5,0.5,0.5,1.96,0,,0.3,11.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000894,2,2017-12-15 00:38:41,2017-12-15 00:56:22,N,1,,,,,1,2.95,13.5,0.5,0.5,0,0,,0.3,14.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000895,2,2017-12-15 00:57:11,2017-12-15 01:14:48,N,1,,,,,1,0.82,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000896,2,2017-12-15 00:02:45,2017-12-15 00:36:35,N,2,,,,,1,18.85,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000897,2,2017-12-15 00:43:23,2017-12-15 01:00:33,N,1,,,,,1,2.82,13.5,0.5,0.5,2.96,0,,0.3,17.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000898,2,2017-12-15 00:08:07,2017-12-15 00:41:12,N,1,,,,,1,5.36,24,0.5,0.5,6.32,0,,0.3,31.62,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000899,2,2017-12-15 00:48:01,2017-12-15 00:54:37,N,1,,,,,1,0.87,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000900,2,2017-12-15 01:00:05,2017-12-15 01:04:23,N,1,,,,,1,1.04,5.5,0.5,0.5,2.04,0,,0.3,8.84,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000901,1,2017-12-15 00:43:08,2017-12-15 00:50:53,N,1,,,,,1,2.10,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000902,1,2017-12-15 00:51:34,2017-12-15 00:54:56,N,1,,,,,1,1.10,5.5,0.5,0.5,0,0,,0.3,6.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000903,2,2017-12-15 00:53:48,2017-12-15 01:04:17,N,1,,,,,1,1.66,9,0.5,0.5,0,0,,0.3,10.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000904,1,2017-12-15 00:22:51,2017-12-15 00:50:12,N,2,,,,,1,18.40,52,0,0.5,10.55,0,,0.3,63.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000905,1,2017-12-15 00:02:29,2017-12-15 00:27:44,N,1,,,,,1,4.20,18,0.5,0.5,0,0,,0.3,19.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000906,1,2017-12-15 00:31:33,2017-12-15 00:50:28,N,1,,,,,1,3.40,15,0.5,0.5,4.05,0,,0.3,20.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000907,1,2017-12-15 00:53:36,2017-12-15 00:57:11,N,1,,,,,1,0.90,5,0.5,0.5,1.25,0,,0.3,7.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000908,2,2017-12-15 00:01:47,2017-12-15 00:35:22,N,1,,,,,1,6.78,26.5,0.5,0.5,2,0,,0.3,29.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000909,2,2017-12-15 00:55:50,2017-12-15 01:21:26,N,1,,,,,1,4.70,19,0.5,0.5,0,0,,0.3,20.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000910,1,2017-12-15 00:15:27,2017-12-15 00:20:38,N,1,,,,,1,1.40,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000911,1,2017-12-15 00:34:52,2017-12-15 01:01:21,N,1,,,,,1,5.10,21,0.5,0.5,3.35,0,,0.3,25.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000912,1,2017-12-15 00:15:57,2017-12-15 00:22:46,N,1,,,,,1,1.80,7.5,0.5,0.5,1.75,0,,0.3,10.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000913,1,2017-12-15 00:31:27,2017-12-15 01:11:20,N,1,,,,,1,5.30,26.5,0.5,0.5,1,0,,0.3,28.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000914,1,2017-12-15 00:14:15,2017-12-15 00:29:52,N,1,,,,,1,3.00,13,0.5,0.5,1,0,,0.3,15.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000915,1,2017-12-15 00:48:36,2017-12-15 01:24:10,N,1,,,,,1,4.90,26,0.5,0.5,8.15,0,,0.3,35.45,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000916,1,2017-12-15 00:22:04,2017-12-15 00:32:41,Y,1,,,,,2,1.90,9,0.5,0.5,2.05,0,,0.3,12.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000917,1,2017-12-15 00:33:58,2017-12-15 00:47:41,N,1,,,,,1,3.30,12.5,0.5,0.5,0,0,,0.3,13.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000918,1,2017-12-15 00:54:47,2017-12-15 00:55:21,N,5,,,,,1,0.00,14,0,0,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000919,1,2017-12-15 00:35:43,2017-12-15 01:08:42,N,1,,,,,1,11.70,37.5,0.5,0.5,9.7,0,,0.3,48.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000920,2,2017-12-15 00:13:56,2017-12-15 00:41:08,N,1,,,,,1,3.73,19,0.5,0.5,3,0,,0.3,23.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000921,2,2017-12-15 00:50:32,2017-12-15 01:06:42,N,1,,,,,1,2.81,12,0.5,0.5,2.66,0,,0.3,15.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000922,1,2017-12-15 00:39:58,2017-12-15 01:01:50,N,1,,,,,1,4.50,18,0.5,0.5,3.85,0,,0.3,23.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000923,1,2017-12-15 00:08:23,2017-12-15 01:07:29,N,1,,,,,1,7.70,38,0.5,0.5,0,0,,0.3,39.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000924,1,2017-12-15 00:00:04,2017-12-15 00:14:22,N,1,,,,,2,2.30,11.5,0.5,0.5,2.55,0,,0.3,15.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000925,1,2017-12-15 00:15:32,2017-12-15 00:39:24,N,1,,,,,2,3.10,16.5,0.5,0.5,2,0,,0.3,19.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000926,1,2017-12-15 00:58:47,2017-12-15 01:01:23,N,1,,,,,1,0.60,4,0.5,0.5,1.05,0,,0.3,6.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000927,2,2017-12-15 00:19:03,2017-12-15 00:26:33,N,1,,,,,1,1.07,7,0.5,0.5,1,0,,0.3,9.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000928,2,2017-12-15 00:27:47,2017-12-15 01:05:07,N,1,,,,,1,5.63,26,0.5,0.5,5.46,0,,0.3,32.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000929,1,2017-12-15 00:02:07,2017-12-15 00:28:16,N,1,,,,,0,6.70,23.5,0.5,0.5,0,0,,0.3,24.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000930,1,2017-12-15 00:33:18,2017-12-15 01:03:18,N,1,,,,,1,4.80,21.5,0.5,0.5,5.7,5.76,,0.3,34.26,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000931,1,2017-12-15 00:38:52,2017-12-15 00:38:57,N,2,,,,,1,4.60,52,0,0.5,0,0,,0.3,52.8,3,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000932,1,2017-12-15 00:43:53,2017-12-15 01:18:44,N,1,,,,,1,6.40,26.5,0,0.5,0,0,,0.3,27.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000933,1,2017-12-15 00:04:13,2017-12-15 00:51:06,N,1,,,,,2,9.20,36,0.5,0.5,7.45,0,,0.3,44.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000934,2,2017-12-15 00:09:49,2017-12-15 00:23:32,N,1,,,,,1,2.51,11.5,0.5,0.5,0,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000935,2,2017-12-15 00:28:33,2017-12-15 01:04:59,N,1,,,,,1,6.32,25.5,0.5,0.5,5.36,0,,0.3,32.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000936,2,2017-12-15 00:06:03,2017-12-15 00:22:31,N,1,,,,,5,3.26,14,0.5,0.5,3.06,0,,0.3,18.36,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000937,2,2017-12-15 00:37:08,2017-12-15 01:03:40,N,1,,,,,5,2.83,17.5,0.5,0.5,0,0,,0.3,18.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000938,2,2017-12-15 00:57:16,2017-12-15 01:16:42,N,1,,,,,1,2.93,14,0.5,0.5,1,0,,0.3,16.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000939,2,2017-12-15 00:10:53,2017-12-15 00:22:45,N,1,,,,,2,1.44,9,0.5,0.5,2.5,0,,0.3,12.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000940,2,2017-12-15 00:50:00,2017-12-15 01:26:28,N,1,,,,,2,10.03,35.5,0.5,0.5,0,0,,0.3,36.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000941,2,2017-12-15 00:07:21,2017-12-15 00:20:07,N,1,,,,,1,1.85,10.5,0.5,0.5,2.36,0,,0.3,14.16,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000942,2,2017-12-15 00:29:10,2017-12-15 00:36:06,N,1,,,,,1,1.68,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000943,2,2017-12-15 00:47:04,2017-12-15 01:20:23,N,1,,,,,1,15.51,45,0.5,0.5,9.26,0,,0.3,55.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000944,1,2017-12-15 00:19:02,2017-12-15 00:29:05,N,1,,,,,1,1.90,9.5,0.5,0.5,1,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000945,1,2017-12-15 00:31:52,2017-12-15 00:44:24,N,1,,,,,1,2.20,10.5,0.5,0.5,0,0,,0.3,11.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000946,1,2017-12-15 00:46:42,2017-12-15 01:05:12,N,1,,,,,1,6.20,21,0.5,0.5,3.35,0,,0.3,25.65,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000947,2,2017-12-15 00:11:42,2017-12-15 00:43:20,N,1,,,,,1,6.04,25.5,0.5,0.5,5.36,0,,0.3,34.11,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000948,1,2017-12-15 00:21:34,2017-12-15 00:41:16,N,1,,,,,1,2.30,14,0.5,0.5,0,0,,0.3,15.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000949,1,2017-12-15 00:42:11,2017-12-15 00:55:13,N,1,,,,,2,3.50,13,0.5,0.5,2.85,0,,0.3,17.15,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000950,2,2017-12-15 00:00:16,2017-12-15 00:17:07,N,1,,,,,2,4.05,15.5,0.5,0.5,0,0,,0.3,16.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000951,2,2017-12-15 00:47:25,2017-12-15 01:08:08,N,1,,,,,2,4.73,18,0.5,0.5,0,0,,0.3,19.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000952,2,2017-12-14 23:58:52,2017-12-15 23:48:39,N,1,,,,,2,0.27,3.5,0.5,0.5,0,0,,0.3,4.8,1,,,,yellow,0.09,1,1.2,32,22,5.82,,,,,,,,,,,,,,,,,,,,\n1460000953,2,2017-12-15 00:02:43,2017-12-15 00:31:33,N,1,,,,,1,3.81,19.5,0.5,0.5,3,0,,0.3,23.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000954,2,2017-12-15 00:08:07,2017-12-15 00:22:27,N,1,,,,,1,1.58,10.5,0.5,0.5,1.5,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000955,2,2017-12-15 00:23:59,2017-12-15 00:43:51,N,1,,,,,1,3.09,14.5,0.5,0.5,4.74,0,,0.3,20.54,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000956,2,2017-12-15 00:53:01,2017-12-15 00:53:05,N,5,,,,,1,0.00,5,0,0.5,0,0,,0.3,5.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000957,2,2017-12-15 00:55:09,2017-12-15 01:07:32,N,1,,,,,1,2.52,11.5,0.5,0.5,0,0,,0.3,12.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000958,2,2017-12-15 00:22:57,2017-12-15 00:36:58,N,1,,,,,1,2.75,12,0.5,0.5,0.66,0,,0.3,13.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000959,2,2017-12-15 00:03:25,2017-12-15 00:13:02,N,1,,,,,1,0.99,7.5,0.5,0.5,0,0,,0.3,8.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000960,2,2017-12-15 00:17:06,2017-12-15 00:31:31,N,1,,,,,1,1.79,10.5,0.5,0.5,0,0,,0.3,11.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000961,2,2017-12-15 00:37:42,2017-12-15 01:37:47,N,5,,,,,1,16.84,80,0,0.5,0,0,,0.3,80.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000962,1,2017-12-15 00:24:46,2017-12-15 00:34:31,N,1,,,,,1,1.60,8.5,0.5,0.5,1.95,0,,0.3,11.75,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000963,1,2017-12-15 00:47:28,2017-12-15 01:07:04,N,1,,,,,1,4.20,16.5,0.5,0.5,5,0,,0.3,22.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000964,2,2017-12-15 00:01:11,2017-12-15 00:09:14,N,1,,,,,1,1.57,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000965,2,2017-12-15 00:30:12,2017-12-15 01:01:05,N,1,,,,,1,4.68,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000966,1,2017-12-15 00:24:00,2017-12-15 00:38:13,N,1,,,,,1,3.20,12.5,0.5,0.5,2.75,0,,0.3,16.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000967,2,2017-12-15 00:49:53,2017-12-15 01:17:25,N,1,,,,,3,6.37,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000968,2,2017-12-15 00:05:30,2017-12-15 00:37:30,N,2,,,,,4,18.09,52,0,0.5,11.71,5.76,,0.3,70.27,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000969,2,2017-12-15 00:47:11,2017-12-15 01:02:37,N,1,,,,,4,3.35,13,0.5,0.5,0,0,,0.3,14.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000970,2,2017-12-15 00:32:31,2017-12-15 01:15:13,N,1,,,,,6,10.56,38.5,0.5,0.5,4,5.76,,0.3,49.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000971,1,2017-12-15 00:55:25,2017-12-15 01:00:03,N,1,,,,,1,0.50,5,0.5,0.5,0,0,,0.3,6.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000972,1,2017-12-15 00:00:54,2017-12-15 00:04:07,N,1,,,,,1,0.90,5,0.5,0.5,1.55,0,,0.3,7.85,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000973,1,2017-12-15 00:11:51,2017-12-15 00:24:10,N,1,,,,,1,1.70,10,0.5,0.5,2,0,,0.3,13.3,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000974,1,2017-12-15 00:33:26,2017-12-15 01:19:20,N,1,,,,,1,8.50,34.5,0.5,0.5,7.15,0,,0.3,42.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000975,1,2017-12-15 00:11:27,2017-12-15 00:54:09,N,1,,,,,2,7.90,31.5,0.5,0.5,6.55,0,,0.3,39.35,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000976,2,2017-12-15 00:10:38,2017-12-15 00:50:52,N,1,,,,,1,6.88,29.5,0.5,0.5,7.7,0,,0.3,38.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000977,2,2017-12-15 00:04:00,2017-12-15 00:21:02,N,1,,,,,6,1.51,12,0.5,0.5,3.99,0,,0.3,17.29,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000978,2,2017-12-15 00:21:43,2017-12-15 00:33:56,N,1,,,,,6,0.71,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000979,2,2017-12-15 00:36:45,2017-12-15 00:48:13,N,1,,,,,6,1.00,8.5,0.5,0.5,2.94,0,,0.3,12.74,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000980,2,2017-12-15 00:50:35,2017-12-15 01:14:51,N,1,,,,,6,4.34,18,0.5,0.5,4.82,0,,0.3,24.12,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000981,2,2017-12-15 00:35:10,2017-12-15 00:40:56,N,1,,,,,1,1.21,6,0.5,0.5,1.46,0,,0.3,8.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000982,2,2017-12-15 00:53:17,2017-12-15 01:09:14,N,1,,,,,1,3.38,13.5,0.5,0.5,3.7,0,,0.3,18.5,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000983,2,2017-12-15 00:48:43,2017-12-15 00:58:44,N,1,,,,,1,1.34,8.5,0.5,0.5,1,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000984,2,2017-12-15 00:37:51,2017-12-15 00:38:05,N,5,,,,,1,0.00,60,0,0,0,0,,0.3,60.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000985,2,2017-12-15 00:45:54,2017-12-15 00:52:31,N,1,,,,,1,1.92,7.5,0.5,0.5,2,0,,0.3,10.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000986,2,2017-12-15 00:07:43,2017-12-15 00:28:15,N,1,,,,,2,10.02,29.5,0.5,0.5,0,0,,0.3,30.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000987,2,2017-12-15 00:32:40,2017-12-15 00:41:01,N,1,,,,,2,1.29,7.5,0.5,0.5,1,0,,0.3,9.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000988,2,2017-12-15 00:41:52,2017-12-15 00:48:28,N,1,,,,,2,1.00,6.5,0.5,0.5,0,0,,0.3,7.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000989,2,2017-12-15 00:52:43,2017-12-15 01:37:15,N,5,,,,,2,4.32,70,0,0.5,0,0,,0.3,70.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000990,1,2017-12-15 00:03:36,2017-12-15 00:25:14,N,1,,,,,2,3.90,17.5,0.5,0.5,3.75,0,,0.3,22.55,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000991,1,2017-12-15 00:39:01,2017-12-15 01:05:04,N,1,,,,,2,4.70,19.5,0.5,0.5,3,0,,0.3,23.8,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000992,2,2017-12-15 00:04:21,2017-12-15 00:09:29,N,1,,,,,1,0.69,5,0.5,0.5,1.89,0,,0.3,8.19,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000993,2,2017-12-15 00:19:51,2017-12-15 00:24:23,N,1,,,,,1,0.79,5,0.5,0.5,1.26,0,,0.3,7.56,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000994,2,2017-12-15 00:25:57,2017-12-15 00:55:44,N,1,,,,,1,3.62,19.5,0.5,0.5,4.16,0,,0.3,24.96,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000995,1,2017-12-15 00:00:00,2017-12-15 00:15:00,N,1,,,,,1,2.20,12,0.5,0.5,2.65,0,,0.3,15.95,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000996,1,2017-12-15 00:16:51,2017-12-15 00:49:40,N,1,,,,,2,8.50,29,0.5,0.5,7.2,5.76,,0.3,43.26,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000997,2,2017-12-15 00:42:37,2017-12-15 01:24:17,N,1,,,,,1,8.46,31.5,0.5,0.5,8.2,0,,0.3,41,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000998,2,2017-12-15 00:06:02,2017-12-15 00:18:45,N,1,,,,,1,2.28,11,0.5,0.5,2.46,0,,0.3,14.76,1,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460000999,2,2017-12-15 00:21:04,2017-12-15 00:31:14,N,1,,,,,1,1.01,8,0.5,0.5,0,0,,0.3,9.3,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n1460001000,2,2017-12-15 00:33:21,2017-12-15 00:42:19,N,1,,,,,1,1.87,8.5,0.5,0.5,0,0,,0.3,9.8,2,,,,yellow,0.11,1,1.2,28,20,3.80,,,,,,,,,,,,,,,,,,,,\n"
  },
  {
    "path": "examples/data/plasticc_test_set_1k.csv",
    "content": "object_id,mjd,passband,flux,flux_err,detected\n13,59798.3205,2,-1.299735,1.357315,0\n13,59798.3281,1,-2.095392,1.148654,0\n13,59798.3357,3,-0.923794,1.763655,0\n13,59798.3466,4,-4.009815,2.602911,0\n13,59798.3576,5,-3.403503,5.367328,0\n13,59801.3553,2,-1.778855,2.448943,0\n13,59801.3629,1,2.491993,3.540421,0\n13,59801.3705,3,1.644129,2.284999,0\n13,59801.3815,4,-0.158192,2.515900,0\n13,59801.3924,5,-6.457387,5.381231,0\n13,59818.2740,0,1.962846,1.795587,0\n13,59819.2541,0,-1.697929,2.433431,0\n13,59820.2522,0,-1.698675,1.898612,0\n13,59821.2478,0,-0.776626,2.435191,0\n13,59822.2433,0,-3.826187,2.853957,0\n13,59823.2659,0,-0.837001,2.690573,0\n13,59826.3105,2,0.529480,0.920972,0\n13,59826.3181,1,-0.702092,0.923219,0\n13,59826.3258,3,-0.797231,1.508073,0\n13,59826.3367,4,-3.898993,2.156529,0\n13,59826.3477,5,-2.883462,5.535779,0\n13,59842.2456,2,-0.838558,0.849664,0\n13,59842.2532,1,-0.038403,0.813772,0\n13,59842.2608,3,2.863780,1.322607,0\n13,59842.2718,4,0.088068,2.166394,0\n13,59842.2827,5,-1.672899,6.068114,0\n13,59851.1792,0,-0.737309,2.574759,0\n13,59854.1485,2,0.040905,1.224270,0\n13,59854.1563,1,-0.265884,1.336087,0\n13,59854.1640,3,-0.369843,1.901700,0\n13,59854.1750,4,2.415236,2.771065,0\n13,59854.1860,5,-4.745292,5.788470,0\n13,59857.1408,2,4.814427,2.132096,0\n13,59857.1485,1,-2.618018,2.915296,0\n13,59857.1563,3,-2.852325,2.092946,0\n13,59857.1673,4,-1.584260,2.676820,0\n13,59857.1782,5,-12.249563,6.628258,0\n13,59867.1112,2,-1.164210,1.083173,0\n13,59867.1189,1,-1.176300,0.974351,0\n13,59867.1267,3,0.451480,1.538879,0\n13,59867.1377,4,0.359918,2.357359,0\n13,59867.1487,5,-2.699421,5.830664,0\n13,59870.1049,2,1.381781,0.971909,0\n13,59870.1126,1,0.387448,0.822428,0\n13,59870.1204,3,0.817313,1.373219,0\n13,59870.1314,4,0.151035,2.283455,0\n13,59870.1424,5,2.986818,5.280643,0\n13,59873.0971,2,-1.240265,1.040900,0\n13,59873.1049,1,0.931574,1.042814,0\n13,59873.1126,3,2.134935,1.442823,0\n13,59873.1236,4,-0.530458,2.068875,0\n13,59873.1346,5,-0.314489,4.587787,0\n13,59874.1461,0,-2.490195,1.611522,0\n13,59875.0995,0,-1.276708,2.019888,0\n13,59876.0980,0,3.425512,2.443817,0\n13,59877.0976,0,2.518677,1.776579,0\n13,59878.0964,0,1.872855,2.020787,0\n13,59879.0895,0,0.522355,1.864210,0\n13,59880.1017,0,-0.761489,1.371363,0\n13,59884.1760,2,0.408623,0.768234,0\n13,59884.1836,1,0.610256,0.887502,0\n13,59884.1913,3,-0.134252,1.138571,0\n13,59884.2022,4,-0.009705,1.609182,0\n13,59884.2132,5,6.633543,4.214399,0\n13,59887.2856,2,1.784338,1.826830,0\n13,59887.2933,1,1.591402,2.206727,0\n13,59887.3009,3,0.109085,2.350564,0\n13,59887.3118,4,-2.874674,3.145338,0\n13,59887.3228,5,-2.586321,7.947824,0\n13,59896.1307,2,1.084506,0.822793,0\n13,59896.1384,1,-0.561922,0.703150,0\n13,59896.1460,3,0.284255,1.393400,0\n13,59896.1569,4,0.466466,2.192684,0\n13,59896.1679,5,0.433727,5.948182,0\n13,59899.1519,2,0.102152,1.365073,0\n13,59899.1595,1,1.624047,1.388812,0\n13,59899.1672,3,1.820581,2.378815,0\n13,59899.1781,4,-3.816161,3.709328,0\n13,59899.1891,5,-8.805643,7.905914,0\n13,59902.1384,2,0.196845,1.186146,0\n13,59902.1460,1,0.993784,1.079431,0\n13,59902.1537,3,1.891725,1.943785,0\n13,59902.1646,4,-1.455820,2.762865,0\n13,59902.1755,5,-0.622898,7.024046,0\n13,59904.1053,0,-4.067552,2.281505,0\n13,59905.0555,0,0.633017,2.504281,0\n13,59906.0562,0,1.127558,2.030226,0\n13,59907.0567,0,0.456890,1.749502,0\n13,59908.0681,0,-1.925056,2.683702,0\n13,59909.0582,0,-0.678187,1.834134,0\n13,59910.0503,0,1.194135,1.869489,0\n13,59914.0526,2,-1.644144,1.438881,0\n13,59914.0602,1,-1.866351,1.956770,0\n13,59914.0678,3,-0.710913,1.686324,0\n13,59914.0788,4,5.825469,2.269697,0\n13,59914.0897,5,3.513655,5.356808,0\n13,59924.1060,2,-0.028742,1.131780,0\n13,59924.1136,1,-1.108478,1.574127,0\n13,59924.1212,3,-1.323133,1.728779,0\n13,59924.1322,4,-5.513097,2.527147,0\n13,59924.1431,5,4.266651,5.633382,0\n13,59927.1074,2,-0.400232,1.190879,0\n13,59927.1151,1,1.286833,1.135341,0\n13,59927.1227,3,2.155155,1.593354,0\n13,59927.1336,4,2.415218,2.299777,0\n13,59927.1446,5,3.778454,5.891465,0\n13,59930.1236,2,1.373614,0.942084,0\n13,59930.1312,1,-1.140287,0.828822,0\n13,59930.1388,3,0.832613,1.283629,0\n13,59930.1498,4,2.345280,2.033006,0\n13,59930.1607,5,-4.211092,5.330056,0\n13,59933.1249,2,0.395000,0.691634,0\n13,59933.1325,1,-0.423847,0.693980,0\n13,59933.1401,3,-1.108444,1.268825,0\n13,59933.1511,4,-0.328867,1.913908,0\n13,59933.1620,5,7.551563,5.429599,0\n13,59934.0638,0,-1.113758,1.141001,0\n13,59935.0646,0,-0.818122,1.122888,0\n13,59936.0642,0,2.141036,1.531045,0\n13,59937.0650,0,-1.277371,1.639949,0\n13,59938.0647,0,4.045178,2.015162,0\n13,59939.0650,0,-0.050647,2.194245,0\n13,60165.3032,2,-0.244917,1.195023,0\n13,60165.3109,1,-0.051452,1.347307,0\n13,60165.3186,3,-1.446614,1.518360,0\n13,60165.3295,4,-1.239533,2.022870,0\n13,60165.3405,5,-2.233357,4.782032,0\n13,60168.2892,2,-0.272922,0.834000,0\n13,60168.2970,1,-0.551885,0.787909,0\n13,60168.3047,3,0.215221,1.300493,0\n13,60168.3157,4,-0.354714,2.065893,0\n13,60168.3267,5,9.800998,4.821743,0\n13,60176.2820,0,-2.711282,2.363240,0\n13,60177.2726,0,2.907485,2.777568,0\n13,60181.4088,2,-0.778592,0.924176,0\n13,60181.4164,1,0.776635,1.262891,0\n13,60181.4232,3,5.557160,3.239328,0\n13,60183.2660,2,0.008992,1.915637,0\n13,60183.2736,1,3.495092,2.936934,0\n13,60183.2812,3,-4.313775,2.548015,0\n13,60183.2922,4,4.681122,2.996080,0\n13,60183.3031,5,-0.644280,6.178194,0\n13,60195.2812,2,-0.992008,0.933265,0\n13,60195.2888,1,-0.199896,1.020470,0\n13,60195.2964,3,1.175473,1.626211,0\n13,60195.3073,4,-0.928975,2.339218,0\n13,60195.3183,5,3.537798,6.276374,0\n13,60198.2690,0,0.879443,2.172960,0\n13,60199.2186,0,1.393119,2.161035,0\n13,60200.2139,0,0.226138,2.589170,0\n13,60201.2072,0,0.024500,2.490959,0\n13,60202.2089,0,-1.152337,2.483400,0\n13,60209.1811,2,-0.534034,1.565458,0\n13,60209.1888,1,0.822128,1.410793,0\n13,60209.1965,3,-0.684217,1.831758,0\n13,60209.2075,4,-3.066642,2.909104,0\n13,60209.2184,5,0.602669,6.696602,0\n13,60212.1675,2,1.117115,1.936030,0\n13,60212.1753,1,1.782217,2.752773,0\n13,60212.1830,3,0.321750,2.295181,0\n13,60212.1941,4,-1.936185,2.590936,0\n13,60212.2050,5,11.339810,5.666715,0\n13,60223.2416,2,0.123572,1.182341,0\n13,60223.2493,1,0.568967,1.241492,0\n13,60223.2569,3,0.040453,1.648758,0\n13,60223.2678,4,0.459117,2.472937,0\n13,60223.2788,5,-7.159738,5.794432,0\n13,60226.3337,2,0.034159,0.944593,0\n13,60226.3413,1,1.465278,0.946326,0\n13,60226.3489,3,0.247305,1.793391,0\n13,60226.3599,4,4.326116,3.071592,0\n13,60226.3708,5,1.558216,8.109918,0\n13,60238.3197,2,-0.738155,1.053325,0\n13,60238.3273,1,-0.990783,1.152318,0\n13,60238.3349,3,1.786266,2.171606,0\n13,60238.3459,4,3.042687,3.829919,0\n13,60238.3568,5,8.879921,11.257108,0\n13,60241.0870,2,-0.147198,1.999162,0\n13,60241.0948,1,-1.383542,3.066069,0\n13,60241.1025,3,-0.347256,2.444988,0\n13,60241.1136,4,2.242954,2.917921,0\n13,60241.1245,5,4.122897,5.941785,0\n13,60250.1708,2,2.348668,2.103043,0\n13,60250.1957,1,1.762817,2.811031,0\n13,60250.2034,3,-4.162498,2.927869,0\n13,60250.2143,4,3.563345,3.983845,0\n13,60250.2253,5,5.870506,7.937796,0\n13,60261.1296,0,2.993462,2.083503,0\n13,60262.0550,0,1.952810,2.311998,0\n13,60263.0556,0,1.620923,1.977922,0\n13,60264.0559,0,3.218953,2.187246,0\n13,60265.0780,0,-4.046216,2.407065,0\n13,60268.0449,2,1.923153,1.404630,0\n13,60268.0525,1,-0.380431,1.849085,0\n13,60268.0601,3,1.653331,2.130050,0\n13,60268.0711,4,-2.881836,2.734059,0\n13,60268.0820,5,7.530805,6.747447,0\n13,60278.0993,2,-1.574151,1.643888,0\n13,60278.1069,1,-4.148108,2.273913,0\n13,60278.1145,3,-1.136458,2.169661,0\n13,60278.1255,4,-4.712543,3.059077,0\n13,60278.1364,5,-4.843711,7.638536,0\n13,60281.1023,2,-0.579702,0.798074,0\n13,60281.1099,1,0.559643,0.795803,0\n13,60281.1175,3,0.985490,1.369862,0\n13,60281.1285,4,-0.125175,1.990803,0\n13,60281.1394,5,-6.211294,5.176151,0\n13,60284.1027,2,0.920098,0.837512,0\n13,60284.1104,1,-0.531105,0.761165,0\n13,60284.1180,3,-1.038009,1.160178,0\n13,60284.1289,4,-0.543047,1.702779,0\n13,60284.1399,5,-5.946274,4.435276,0\n13,60287.1047,2,-0.388991,1.250426,0\n13,60287.1123,1,0.622866,1.083712,0\n13,60287.1200,3,-1.365885,1.796073,0\n13,60287.1309,4,-1.566869,2.963949,0\n13,60287.1418,5,-12.680235,7.013420,0\n13,60290.0761,0,-0.996401,1.912932,0\n13,60291.0689,0,-1.639395,1.661116,0\n13,60292.0699,0,-0.087568,1.855572,0\n13,60293.0699,0,3.384405,1.578842,0\n13,60294.0708,0,0.056788,1.397227,0\n13,60532.3019,2,24.529644,1.046373,1\n13,60532.3097,1,23.404964,0.992504,1\n13,60532.3173,3,36.069386,1.568371,1\n13,60532.3282,4,42.765503,2.305008,1\n13,60532.3392,5,36.567162,5.439748,1\n13,60535.2802,2,23.662449,1.422315,1\n13,60535.2879,1,20.202259,1.361849,1\n13,60535.2957,3,39.966290,2.106815,1\n13,60535.3068,4,39.323189,3.034410,1\n13,60535.3177,5,25.412567,7.170496,1\n13,60538.2826,2,19.899044,1.856537,1\n13,60538.2903,1,14.983138,2.683123,1\n13,60538.2980,3,32.445835,1.936947,1\n13,60538.3089,4,37.566696,2.327296,1\n13,60538.3199,5,36.802090,5.025538,1\n13,60554.2651,0,2.890699,2.258298,0\n13,60555.2411,0,1.421973,2.038275,0\n13,60556.2370,0,5.816270,2.553418,0\n13,60557.2322,0,2.976124,1.849058,0\n13,60558.2332,0,3.465247,2.503530,0\n13,60559.2274,0,2.022449,2.213130,0\n13,60560.2268,0,3.503388,3.087183,0\n13,60567.3291,2,14.211590,0.945795,1\n13,60567.3368,1,8.278180,1.052801,1\n13,60567.3444,3,24.237070,1.263879,1\n13,60567.3553,4,31.986134,1.675546,1\n13,60567.3663,5,26.146296,4.267163,1\n13,60580.1736,2,10.443220,1.455966,1\n13,60580.1813,1,4.004329,1.395011,0\n13,60580.1889,3,21.601105,2.202044,1\n13,60580.1999,4,33.927616,3.210988,1\n13,60580.2108,5,31.062853,7.647927,0\n13,60582.1681,0,1.094513,2.405975,0\n13,60583.1640,0,-1.259893,2.979695,0\n13,60584.1591,0,1.617939,2.344479,0\n13,60585.1601,0,1.637716,1.874226,0\n13,60586.1564,0,0.117864,1.815223,0\n13,60587.1540,0,0.302183,2.121386,0\n13,60588.1461,0,3.094076,1.792624,0\n13,60593.1209,2,10.800429,1.153115,1\n13,60593.1287,1,4.406773,1.308495,0\n13,60593.1365,3,16.092447,1.545535,1\n13,60593.1476,4,24.343342,2.181189,1\n13,60593.1585,5,19.054943,5.111032,1\n13,60596.1351,2,12.383826,2.473228,0\n13,60596.1427,1,0.372005,3.515397,0\n13,60596.1504,3,16.204580,2.651616,1\n13,60596.1613,4,26.244993,3.235833,1\n13,60596.1723,5,20.389833,7.186456,0\n13,60605.0908,2,7.542615,0.977457,1\n13,60605.0986,1,3.585346,0.917699,0\n13,60605.1063,3,15.296724,1.463083,1\n13,60605.1174,4,21.654158,2.154012,1\n13,60605.1283,5,12.261124,5.138537,1\n13,60608.0836,2,7.212741,0.886919,1\n13,60608.0913,1,3.103603,0.823795,0\n13,60608.0991,3,15.016788,1.333418,1\n13,60608.1101,4,22.194675,1.964862,1\n13,60608.1211,5,16.844475,4.664538,0\n13,60611.0756,2,5.219585,0.772730,1\n13,60611.0833,1,2.315308,0.703447,0\n13,60611.0911,3,13.987851,1.177967,1\n13,60611.1021,4,23.238731,1.752712,1\n13,60611.1130,5,13.764248,4.170574,0\n13,60612.0813,0,-0.500034,1.440229,0\n13,60613.0818,0,1.002482,1.524789,0\n13,60614.0803,0,-1.957333,1.736466,0\n13,60615.0761,0,-1.864921,1.981273,0\n13,60616.0769,0,-0.823935,2.007182,0\n13,60617.0737,0,4.029193,1.584846,0\n13,60620.1444,0,1.135807,1.713108,0\n13,60621.2673,2,5.227184,0.942700,1\n13,60621.2749,1,0.979372,0.945305,0\n13,60621.2825,3,10.753699,1.650229,1\n13,60621.2934,4,15.025331,2.743479,1\n13,60621.3044,5,20.601212,7.516092,0\n13,60624.1290,2,4.608896,1.191301,0\n13,60624.1366,1,3.302126,1.565634,0\n13,60624.1442,3,13.692765,1.502649,1\n13,60624.1551,4,22.563152,2.025444,1\n13,60624.1661,5,17.466129,4.765408,0\n13,60633.0541,2,5.740388,1.090800,1\n13,60633.0617,1,-0.248447,1.055601,0\n13,60633.0693,3,11.721886,1.678442,1\n13,60633.0803,4,21.894281,2.489034,1\n13,60633.0912,5,8.198497,5.988672,0\n13,60636.0482,2,4.445689,1.029320,0\n13,60636.0558,1,0.510712,0.992636,0\n13,60636.0635,3,8.838101,1.589973,0\n13,60636.0744,4,18.146425,2.370814,1\n13,60636.0854,5,20.537880,5.701680,0\n13,60640.0972,2,2.708286,0.871447,0\n13,60640.1049,1,0.387127,0.847005,0\n13,60640.1125,3,11.844381,1.395034,1\n13,60640.1234,4,16.336960,2.129130,1\n13,60640.1344,5,22.784824,5.287734,0\n13,60642.0643,0,-0.045857,2.075920,0\n13,60643.0521,0,-0.339986,1.668015,0\n13,60644.0621,0,-0.061904,1.665856,0\n13,60645.0625,0,-1.683454,1.768579,0\n13,60646.0636,0,-1.090810,1.782456,0\n13,60647.0635,0,-0.917836,1.968023,0\n13,60648.0642,0,-0.471162,1.443392,0\n13,60652.1289,2,2.063019,0.939241,0\n13,60652.1365,1,0.914091,1.117558,0\n13,60652.1441,3,8.505517,1.381162,1\n13,60652.1550,4,20.247869,2.050198,1\n13,60652.1660,5,4.584575,5.200393,0\n14,59798.3205,2,14.465278,1.364599,1\n14,59798.3281,1,13.748290,1.165200,1\n14,59798.3357,3,8.555202,1.766522,1\n14,59798.3466,4,7.253281,2.603756,0\n14,59798.3576,5,1.134257,5.365180,0\n14,59801.3553,2,9.855556,2.449258,0\n14,59801.3629,1,10.256726,3.538008,0\n14,59801.3705,3,10.469539,2.286622,0\n14,59801.3815,4,4.839510,2.516017,0\n14,59801.3924,5,6.016302,5.381228,0\n14,59818.2740,0,-1.523922,1.781903,0\n14,59819.2541,0,6.043163,2.455132,0\n14,59820.2522,0,-3.440181,1.895149,0\n14,59821.2478,0,-1.079028,2.430641,0\n14,59822.2433,0,-0.827454,2.848530,0\n14,59823.2659,0,5.701578,2.706588,0\n14,59826.3105,2,2.735408,0.921596,0\n14,59826.3181,1,2.361817,0.925133,0\n14,59826.3258,3,4.882393,1.509568,1\n14,59826.3367,4,5.898125,2.157517,0\n14,59826.3477,5,7.172944,5.536250,0\n14,59842.2456,2,1.210632,0.849698,0\n14,59842.2532,1,0.480856,0.813325,0\n14,59842.2608,3,3.207553,1.321817,0\n14,59842.2718,4,4.451898,2.166812,0\n14,59842.2827,5,4.518466,6.066945,0\n14,59851.1792,0,4.186630,2.586060,0\n14,59854.1485,2,0.507158,1.223279,0\n14,59854.1563,1,1.509088,1.335596,0\n14,59854.1640,3,3.079645,1.901563,0\n14,59854.1750,4,2.476080,2.769468,0\n14,59854.1860,5,5.571058,5.787987,0\n14,59857.1408,2,2.902585,2.129320,0\n14,59857.1485,1,0.076045,2.911045,0\n14,59857.1563,3,5.179635,2.093322,0\n14,59857.1673,4,4.421298,2.676636,0\n14,59857.1782,5,4.276851,6.626348,0\n14,59867.1112,2,-0.034593,1.082066,0\n14,59867.1189,1,-1.146521,0.972931,0\n14,59867.1267,3,4.685015,1.540005,0\n14,59867.1377,4,3.080778,2.356951,0\n14,59867.1487,5,-3.318216,5.827800,0\n14,59870.1049,2,0.554243,0.970361,0\n14,59870.1126,1,0.375991,0.821212,0\n14,59870.1204,3,4.517774,1.374431,0\n14,59870.1314,4,1.906460,2.282754,0\n14,59870.1424,5,-0.145795,5.276641,0\n14,59873.0971,2,2.108685,1.041195,0\n14,59873.1049,1,0.095400,1.040191,0\n14,59873.1126,3,3.909108,1.442765,0\n14,59873.1236,4,9.230726,2.071694,0\n14,59873.1346,5,5.649127,4.588614,0\n14,59874.1461,0,-3.214277,1.608558,0\n14,59875.0995,0,2.425352,2.028404,0\n14,59876.0980,0,3.646141,2.440229,0\n14,59877.0976,0,-1.473760,1.759196,0\n14,59878.0964,0,0.576515,2.010597,0\n14,59879.0895,0,0.572945,1.861002,0\n14,59880.1017,0,-0.224757,1.368790,0\n14,59884.1760,2,-0.132164,0.767148,0\n14,59884.1836,1,-0.991678,0.885456,0\n14,59884.1913,3,2.358108,1.139322,0\n14,59884.2022,4,3.557226,1.610064,0\n14,59884.2132,5,1.323540,4.209188,0\n14,59887.2856,2,-2.655194,1.824434,0\n14,59887.2933,1,-3.118680,2.202677,0\n14,59887.3009,3,4.665102,2.350384,0\n14,59887.3118,4,1.135563,3.143775,0\n14,59887.3228,5,4.491710,7.945457,0\n14,59896.1307,2,-0.963295,0.821164,0\n14,59896.1384,1,-0.071138,0.702119,0\n14,59896.1460,3,2.277665,1.393450,0\n14,59896.1569,4,7.248254,2.194077,0\n14,59896.1679,5,4.658720,5.946997,0\n14,59899.1519,2,-1.774250,1.363683,0\n14,59899.1595,1,-1.174699,1.385324,0\n14,59899.1672,3,-0.953666,2.376407,0\n14,59899.1781,4,8.315199,3.709089,0\n14,59899.1891,5,-5.444651,7.901953,0\n14,59902.1384,2,0.053353,1.184878,0\n14,59902.1460,1,-0.247240,1.076702,0\n14,59902.1537,3,3.547660,1.943007,0\n14,59902.1646,4,3.456085,2.762274,0\n14,59902.1755,5,-2.950504,7.020600,0\n14,59904.1053,0,4.177117,2.292928,0\n14,59905.0555,0,-1.216755,2.497270,0\n14,59906.0562,0,2.410480,2.032158,0\n14,59907.0567,0,-1.391141,1.743943,0\n14,59908.0681,0,3.657772,2.691214,0\n14,59909.0582,0,0.521167,1.833233,0\n14,59910.0503,0,-0.100852,1.860389,0\n14,59914.0526,2,1.198847,1.437903,0\n14,59914.0602,1,-4.574095,1.954059,0\n14,59914.0678,3,0.367522,1.685196,0\n14,59914.0788,4,2.276051,2.267016,0\n14,59914.0897,5,10.662569,5.357585,0\n14,59924.1060,2,0.898902,1.131097,0\n14,59924.1136,1,-0.533751,1.571826,0\n14,59924.1212,3,0.148105,1.727512,0\n14,59924.1322,4,4.691455,2.527090,0\n14,59924.1431,5,-2.120271,5.628747,0\n14,59927.1074,2,-0.466719,1.189681,0\n14,59927.1151,1,-0.370304,1.132207,0\n14,59927.1227,3,2.069784,1.592095,0\n14,59927.1336,4,2.988137,2.298649,0\n14,59927.1446,5,2.182713,5.887877,0\n14,59930.1236,2,0.240297,0.940370,0\n14,59930.1312,1,-0.920183,0.827606,0\n14,59930.1388,3,1.243185,1.282906,0\n14,59930.1498,4,3.796952,2.032441,0\n14,59930.1607,5,4.695318,5.329674,0\n14,59933.1249,2,0.006509,0.690589,0\n14,59933.1325,1,1.157228,0.695202,0\n14,59933.1401,3,2.223172,1.269250,0\n14,59933.1511,4,2.050447,1.913679,0\n14,59933.1620,5,6.551545,5.426427,0\n14,59934.0638,0,-1.038826,1.138865,0\n14,59935.0646,0,-0.855022,1.120789,0\n14,59936.0642,0,4.664287,1.541755,0\n14,59937.0650,0,0.068496,1.637206,0\n14,59938.0647,0,-0.037584,1.994424,0\n14,59939.0650,0,0.482500,2.191997,0\n14,60165.3032,2,-0.497132,1.193823,0\n14,60165.3109,1,2.598282,1.347751,0\n14,60165.3186,3,-2.635130,1.517240,0\n14,60165.3295,4,5.500406,2.024001,0\n14,60165.3405,5,-1.459476,4.779670,0\n14,60168.2892,2,-0.132310,0.833159,0\n14,60168.2970,1,1.305878,0.789044,0\n14,60168.3047,3,1.487449,1.300279,0\n14,60168.3157,4,2.190609,2.065560,0\n14,60168.3267,5,-2.313916,4.814189,0\n14,60176.2820,0,-2.258721,2.358804,0\n14,60177.2726,0,-0.240173,2.761198,0\n14,60181.4088,2,-0.118050,0.923237,0\n14,60181.4164,1,-1.757872,1.260326,0\n14,60181.4232,3,-0.566288,3.235162,0\n14,60183.2660,2,-0.732747,1.913709,0\n14,60183.2736,1,0.012732,2.931251,0\n14,60183.2812,3,1.402672,2.546405,0\n14,60183.2922,4,-1.725346,2.993038,0\n14,60183.3031,5,0.494266,6.175346,0\n14,60195.2812,2,3.029382,0.934420,0\n14,60195.2888,1,-0.670324,1.018990,0\n14,60195.2964,3,-0.662677,1.624419,0\n14,60195.3073,4,-3.872562,2.337922,0\n14,60195.3183,5,2.712977,6.272959,0\n14,60198.2690,0,2.956521,2.177715,0\n14,60199.2186,0,1.331341,2.156719,0\n14,60200.2139,0,1.351745,2.588871,0\n14,60201.2072,0,2.853797,2.498285,0\n14,60202.2089,0,1.906982,2.486693,0\n14,60209.1811,2,-1.602512,1.563901,0\n14,60209.1888,1,-0.671917,1.407978,0\n14,60209.1965,3,0.374204,1.830516,0\n14,60209.2075,4,2.568392,2.908100,0\n14,60209.2184,5,6.678216,6.695596,0\n14,60212.1675,2,3.776216,1.935000,0\n14,60212.1753,1,0.021062,2.747988,0\n14,60212.1830,3,-1.807372,2.293340,0\n14,60212.1941,4,2.568171,2.590221,0\n14,60212.2050,5,10.840719,5.663691,0\n14,60223.2416,2,-0.194212,1.181089,0\n14,60223.2493,1,-0.036469,1.239146,0\n14,60223.2569,3,1.530641,1.648199,0\n14,60223.2678,4,1.801575,2.471934,0\n14,60223.2788,5,4.744702,5.793488,0\n14,60226.3337,2,-0.744695,0.943627,0\n14,60226.3413,1,-0.106145,0.942899,0\n14,60226.3489,3,2.442077,1.793019,0\n14,60226.3599,4,1.509347,3.068994,0\n14,60226.3708,5,-6.627614,8.105484,0\n14,60238.3197,2,-0.863180,1.052267,0\n14,60238.3273,1,1.557186,1.152551,0\n14,60238.3349,3,-4.336288,2.169387,0\n14,60238.3459,4,4.612921,3.828061,0\n14,60238.3568,5,-2.021824,11.249375,0\n14,60241.0870,2,0.738195,1.997376,0\n14,60241.0948,1,-1.971425,3.061613,0\n14,60241.1025,3,-2.025038,2.443142,0\n14,60241.1136,4,-0.312927,2.915593,0\n14,60241.1245,5,3.056638,5.938406,0\n14,60250.1708,2,-2.482518,2.100286,0\n14,60250.1957,1,-0.029834,2.806254,0\n14,60250.2034,3,4.678575,2.926827,0\n14,60250.2143,4,-3.449543,3.980825,0\n14,60250.2253,5,-3.656875,7.932066,0\n14,60261.1296,0,-3.352771,2.067494,0\n14,60262.0550,0,-3.894481,2.299585,0\n14,60263.0556,0,0.061202,1.966700,0\n14,60264.0559,0,-0.441779,2.168793,0\n14,60265.0780,0,-0.070926,2.402439,0\n14,60268.0449,2,-2.115148,1.402396,0\n14,60268.0525,1,-1.156284,1.846402,0\n14,60268.0601,3,-0.248761,2.127820,0\n14,60268.0711,4,1.514031,2.732877,0\n14,60268.0820,5,-11.142164,6.741534,0\n14,60278.0993,2,0.058940,1.642234,0\n14,60278.1069,1,1.004120,2.270966,0\n14,60278.1145,3,-0.427660,2.168000,0\n14,60278.1255,4,0.148053,3.057248,0\n14,60278.1364,5,14.839427,7.639682,0\n14,60281.1023,2,0.190632,0.797410,0\n14,60281.1099,1,-0.334674,0.793767,0\n14,60281.1175,3,-1.839893,1.368291,0\n14,60281.1285,4,-1.943368,1.989664,0\n14,60281.1394,5,3.423336,5.175146,0\n14,60284.1027,2,0.880585,0.836640,0\n14,60284.1104,1,-1.071148,0.760074,0\n14,60284.1180,3,-0.493422,1.159284,0\n14,60284.1289,4,-1.769790,1.701805,0\n14,60284.1399,5,10.491048,4.439059,0\n14,60287.1047,2,-2.440768,1.249228,0\n14,60287.1123,1,-0.587990,1.081420,0\n14,60287.1200,3,-0.658860,1.794693,0\n14,60287.1309,4,6.071679,2.963972,0\n14,60287.1418,5,-0.912052,7.009711,0\n14,60290.0761,0,0.791311,1.912837,0\n14,60291.0689,0,-0.916465,1.657989,0\n14,60292.0699,0,-0.940145,1.852110,0\n14,60293.0699,0,-0.065888,1.558097,0\n14,60294.0708,0,-0.362213,1.394291,0\n14,60532.3019,2,-0.128229,1.022716,0\n14,60532.3097,1,-0.763027,0.950918,0\n14,60532.3173,3,-0.986068,1.539248,0\n14,60532.3282,4,-0.845234,2.279716,0\n14,60532.3392,5,8.809470,5.421995,0\n14,60535.2802,2,-0.482045,1.404915,0\n14,60535.2879,1,1.358840,1.336113,0\n14,60535.2957,3,-0.531306,2.081220,0\n14,60535.3068,4,-3.251427,3.016502,0\n14,60535.3177,5,-1.253254,7.157213,0\n14,60538.2826,2,1.802223,1.846149,0\n14,60538.2903,1,-0.026881,2.671491,0\n14,60538.2980,3,1.533429,1.916902,0\n14,60538.3089,4,-0.856327,2.306201,0\n14,60538.3199,5,-1.663799,5.002286,0\n14,60554.2651,0,-1.160009,2.241721,0\n14,60555.2411,0,-2.432893,2.027424,0\n14,60556.2370,0,2.398724,2.534231,0\n14,60557.2322,0,-1.568709,1.829169,0\n14,60558.2332,0,-4.116055,2.484439,0\n14,60559.2274,0,0.190265,2.200325,0\n14,60560.2268,0,1.347325,3.073875,0\n14,60567.3291,2,0.910389,0.934006,0\n14,60567.3368,1,-1.444133,1.041686,0\n14,60567.3444,3,-1.500671,1.243064,0\n14,60567.3553,4,2.684830,1.653721,0\n14,60567.3663,5,0.345307,4.248466,0\n14,60580.1736,2,0.807077,1.449346,0\n14,60580.1813,1,2.870951,1.391824,0\n14,60580.1889,3,-0.262873,2.190222,0\n14,60580.1999,4,-6.388680,3.196808,0\n14,60580.2108,5,9.181289,7.635880,0\n14,60582.1681,0,3.604234,2.412230,0\n14,60583.1640,0,-2.491946,2.974151,0\n14,60584.1591,0,0.669646,2.335869,0\n14,60585.1601,0,0.574023,1.865054,0\n14,60586.1564,0,-0.297618,1.811183,0\n14,60587.1540,0,-1.218754,2.115987,0\n14,60588.1461,0,-1.992268,1.771815,0\n14,60593.1209,2,-0.511945,1.144706,0\n14,60593.1287,1,-0.914986,1.302187,0\n14,60593.1365,3,-1.629705,1.534286,0\n14,60593.1476,4,-3.500163,2.167956,0\n14,60593.1585,5,-7.225812,5.098632,0\n14,60596.1351,2,1.942903,2.467501,0\n14,60596.1427,1,-3.828583,3.510221,0\n14,60596.1504,3,-0.379595,2.643848,0\n14,60596.1613,4,-4.746652,3.225241,0\n14,60596.1723,5,-0.740908,7.175334,0\n14,60605.0908,2,-0.240784,0.970734,0\n14,60605.0986,1,1.228715,0.912676,0\n14,60605.1063,3,-0.173487,1.451932,0\n14,60605.1174,4,-3.157050,2.142289,0\n14,60605.1283,5,-4.353776,5.129833,0\n14,60608.0836,2,-1.290518,0.880033,0\n14,60608.0913,1,0.897031,0.818780,0\n14,60608.0991,3,-1.789510,1.321652,0\n14,60608.1101,4,-0.283683,1.951819,0\n14,60608.1211,5,-1.071998,4.652730,0\n14,60611.0756,2,0.428420,0.767492,0\n14,60611.0833,1,-0.175538,0.697855,0\n14,60611.0911,3,-1.351069,1.165860,0\n14,60611.1021,4,3.187355,1.739082,0\n14,60611.1130,5,-2.026892,4.159950,0\n14,60612.0813,0,1.714202,1.449054,0\n14,60613.0818,0,-1.208979,1.515573,0\n14,60614.0803,0,2.366692,1.746634,0\n14,60615.0761,0,0.562962,1.980418,0\n14,60616.0769,0,-0.108613,2.003422,0\n14,60617.0737,0,1.100774,1.564082,0\n14,60620.1444,0,-3.990349,1.704574,0\n14,60621.2673,2,-1.006580,0.937842,0\n14,60621.2749,1,-0.619488,0.942508,0\n14,60621.2825,3,1.860522,1.643903,0\n14,60621.2934,4,1.823893,2.737007,0\n14,60621.3044,5,-2.549210,7.504488,0\n14,60624.1290,2,-1.234065,1.187694,0\n14,60624.1366,1,0.769079,1.561521,0\n14,60624.1442,3,-1.651142,1.493277,0\n14,60624.1551,4,-1.015730,2.012733,0\n14,60624.1661,5,-1.555369,4.753550,0\n14,60633.0541,2,0.136783,1.086145,0\n14,60633.0617,1,-0.152341,1.054063,0\n14,60633.0693,3,-0.510173,1.670818,0\n14,60633.0803,4,0.315384,2.478524,0\n14,60633.0912,5,-6.652580,5.982395,0\n14,60636.0482,2,-0.451914,1.025376,0\n14,60636.0558,1,0.286247,0.990897,0\n14,60636.0635,3,0.255769,1.584027,0\n14,60636.0744,4,-0.340420,2.361790,0\n14,60636.0854,5,9.500138,5.693579,0\n14,60640.0972,2,0.332550,0.868813,0\n14,60640.1049,1,-0.473409,0.845206,0\n14,60640.1125,3,0.729877,1.386701,0\n14,60640.1234,4,-1.934210,2.120461,0\n14,60640.1344,5,-4.023204,5.273717,0\n14,60642.0643,0,6.371965,2.098372,0\n14,60643.0521,0,0.069668,1.665262,0\n14,60644.0621,0,-0.031334,1.662740,0\n14,60645.0625,0,0.388892,1.767081,0\n14,60646.0636,0,-0.740859,1.779115,0\n14,60647.0635,0,3.793745,1.980666,0\n14,60648.0642,0,-2.506170,1.440771,0\n14,60652.1289,2,2.536718,0.938627,0\n14,60652.1365,1,-0.345612,1.114977,0\n14,60652.1441,3,-0.218325,1.374793,0\n14,60652.1550,4,3.384219,2.040273,0\n14,60652.1660,5,5.932649,5.198509,0\n17,59750.4229,2,0.384775,1.502702,0\n17,59750.4306,1,2.970657,2.100801,0\n17,59750.4383,3,-3.890317,2.298941,0\n17,59750.4450,4,-7.424517,10.310197,0\n17,59752.4070,2,1.180267,1.063318,0\n17,59752.4147,1,-0.427451,1.103617,0\n17,59752.4224,3,0.804619,1.459435,0\n17,59752.4334,4,0.082026,2.345975,0\n17,59752.4435,5,-12.127212,9.469489,0\n17,59767.2968,2,0.300545,0.879810,0\n17,59767.3045,1,-0.789039,0.757829,0\n17,59767.3122,3,-2.516992,1.335309,0\n17,59767.3233,4,-0.305087,2.137903,0\n17,59767.3343,5,-2.361575,4.981940,0\n17,59770.2179,2,2.652607,2.031536,0\n17,59770.2256,1,0.104747,2.751110,0\n17,59770.2334,3,-1.251170,2.138434,0\n17,59770.2445,4,1.627103,2.665143,0\n17,59770.2557,5,3.234555,6.275570,0\n17,59779.3188,2,-1.560282,2.314641,0\n17,59779.3265,1,1.138921,2.800010,0\n17,59779.3342,3,1.399184,2.246128,0\n17,59779.3452,4,-4.086601,2.966733,0\n17,59779.3562,5,3.600588,7.075419,0\n17,59782.1897,2,-0.205182,1.441666,0\n17,59782.1974,1,-0.008989,1.378359,0\n17,59782.2051,3,-3.370768,2.336834,0\n17,59782.2162,4,-1.506593,3.089562,0\n17,59782.2274,5,-1.340868,7.552328,0\n17,59797.2861,2,0.574704,1.059202,0\n17,59797.2938,1,-1.925344,1.095736,0\n17,59797.3015,3,-1.303792,1.916743,0\n17,59797.3126,4,-0.918545,2.786953,0\n17,59797.3237,5,-7.784748,6.963860,0\n17,59800.3168,2,0.594011,2.277387,0\n17,59800.3244,1,6.002035,3.310603,0\n17,59800.3320,3,0.828272,2.636445,0\n17,59800.3429,4,-2.143990,3.045394,0\n17,59800.3539,5,2.182031,6.526192,0\n17,59807.1738,2,0.345367,1.600768,0\n17,59807.1815,1,1.497795,2.061084,0\n17,59807.1892,3,-1.738689,1.721755,0\n17,59807.2003,4,-2.008608,2.187723,0\n17,59807.2114,5,-4.591490,5.073585,0\n17,59810.1045,2,-0.190427,1.024519,0\n17,59810.1122,1,1.409750,0.987204,0\n17,59810.1200,3,2.097727,1.766024,0\n17,59810.1311,4,4.661858,2.723413,0\n17,59810.1422,5,1.852001,6.541757,0\n17,59813.1044,2,-0.609008,0.979608,0\n17,59813.1122,1,-1.283370,0.947661,0\n17,59813.1199,3,0.443231,1.563090,0\n17,59813.1310,4,0.463331,2.578759,0\n17,59813.1422,5,-4.206830,5.891186,0\n17,59819.1532,0,-0.111748,1.900487,0\n17,59820.1047,0,-1.127300,1.845100,0\n17,59821.1026,0,-2.983045,2.653126,0\n17,59822.1105,0,-3.786463,3.135153,0\n17,59823.1505,0,-6.251946,2.924953,0\n17,59835.0600,2,-0.105554,1.651957,0\n17,59835.0678,1,-3.813735,2.461054,0\n17,59835.0755,3,-5.183056,2.248735,0\n17,59835.0866,4,-3.032220,2.768835,0\n17,59835.0978,5,-1.849971,5.832443,0\n17,59839.0306,2,1.615393,1.178528,0\n17,59839.0384,1,-0.088986,1.136387,0\n17,59839.0461,3,-3.522223,1.587615,0\n17,59839.0573,4,-2.323542,2.134739,0\n17,59839.0684,5,2.161493,5.221899,0\n17,59842.0207,2,1.318505,1.033401,0\n17,59842.0285,1,0.922532,1.039005,0\n17,59842.0362,3,1.569138,1.599077,0\n17,59842.0473,4,3.807324,2.315629,0\n17,59842.0585,5,-1.946610,5.253097,0\n17,59851.1114,0,0.466809,1.833261,0\n17,59854.0796,2,-0.432977,1.106505,0\n17,59854.0873,1,-0.349620,1.186553,0\n17,59854.0950,3,-1.028248,1.475277,0\n17,59854.1061,4,0.624902,2.189718,0\n17,59854.1172,5,-6.577788,5.223208,0\n17,59857.0453,2,1.048279,1.770880,0\n17,59857.0531,1,4.293002,2.442868,0\n17,59857.0608,3,0.670081,1.885174,0\n17,59857.0719,4,-1.393756,2.439591,0\n17,59857.0830,5,7.867962,5.441516,0\n17,59864.0162,2,1.446581,0.850881,0\n17,59864.0239,1,1.881013,0.881634,0\n17,59864.0316,3,1.176724,1.403741,0\n17,59864.0428,4,1.061918,2.333516,0\n17,59864.0539,5,7.476207,5.917978,0\n17,59867.0178,2,0.380713,1.309081,0\n17,59867.0255,1,-3.510653,1.302897,0\n17,59867.0332,3,-2.578208,2.183999,0\n17,59867.0443,4,1.046922,3.227464,0\n17,59867.0554,5,-14.202744,9.457583,0\n17,59870.0194,2,-1.554325,0.927003,0\n17,59870.0272,1,-0.831158,0.920545,0\n17,59870.0349,3,-1.187623,1.500118,0\n17,59870.0459,4,2.125045,2.528746,0\n17,59870.0571,5,2.077787,5.911781,0\n17,59873.0212,2,-0.457801,0.738598,0\n17,59873.0289,1,2.796051,0.760300,0\n17,59873.0366,3,0.138800,1.232345,0\n17,59873.0477,4,0.273784,1.886916,0\n17,59873.0588,5,-5.356641,4.801971,0\n17,59874.0599,0,-2.398834,1.584590,0\n17,59875.0311,0,-0.540180,1.315395,0\n17,59876.0231,0,0.411913,2.120848,0\n17,59877.0238,0,-0.891288,1.592397,0\n17,59878.0246,0,0.252658,1.625712,0\n17,59879.0248,0,0.494818,1.668736,0\n17,59880.0258,0,1.609941,1.357399,0\n17,59884.0823,2,1.431002,1.091537,0\n17,59884.0900,1,-0.669697,1.493729,0\n17,59884.0976,3,0.632236,1.522321,0\n17,59884.1085,4,2.388964,2.008811,0\n17,59884.1195,5,-3.279281,4.883777,0\n17,59887.0298,2,-1.951323,1.872626,0\n17,59887.0375,1,-0.140049,2.947258,0\n17,59887.0451,3,-0.987463,2.277290,0\n17,59887.0562,4,-1.669648,2.256846,0\n17,59887.0673,5,4.776433,5.354571,0\n17,60118.4163,0,1.237189,1.560750,0\n17,60124.2541,2,-0.027002,2.279922,0\n17,60124.2618,1,-1.582832,2.932222,0\n17,60124.2695,3,-2.243939,2.501337,0\n17,60124.2807,4,-2.010008,2.976800,0\n17,60124.2918,5,-3.812106,6.686803,0\n17,60140.2290,0,2.889142,1.885088,0\n17,60141.2225,0,-2.675623,3.532677,0\n17,60142.2202,0,3.701951,3.092139,0\n17,60143.2212,0,0.107055,2.421978,0\n17,60144.2186,0,2.121859,1.934957,0\n17,60145.2123,0,3.519005,2.431906,0\n17,60153.2274,2,1.331808,1.381732,0\n17,60153.2351,1,1.678943,1.884794,0\n17,60153.2428,3,-0.669065,1.646313,0\n17,60153.2539,4,-2.336847,2.069594,0\n17,60153.2650,5,-3.040281,5.269622,0\n17,60162.1477,2,0.274822,1.883733,0\n17,60162.1554,1,-1.090832,2.591505,0\n17,60162.1631,3,-4.857473,2.765956,0\n17,60162.1742,4,-10.275881,3.141578,0\n17,60162.1853,5,-9.252888,7.983027,0\n17,60165.1369,2,-0.468279,0.846974,0\n17,60165.1446,1,-0.430437,0.708165,0\n17,60165.1524,3,-0.581242,1.364931,0\n17,60165.1635,4,3.187243,2.182051,0\n17,60165.1746,5,6.814413,5.636336,0\n17,60168.1260,2,1.795461,0.881347,0\n17,60168.1337,1,-0.350579,0.854263,0\n17,60168.1414,3,-2.100255,1.659393,0\n17,60168.1525,4,-1.526452,2.377539,0\n17,60168.1637,5,2.514493,5.409767,0\n17,60176.1332,0,-1.022222,2.365174,0\n17,60177.1370,0,-3.289955,2.790762,0\n17,60181.3147,2,-1.503265,1.258997,0\n17,60181.3223,1,1.620242,1.237350,0\n17,60181.3299,3,2.305993,2.212692,0\n17,60181.3409,4,-0.697285,4.016526,0\n17,60181.3518,5,3.620666,8.753998,0\n17,60184.3625,2,-0.132555,2.135256,0\n17,60184.3701,1,-0.614091,2.560603,0\n17,60184.3777,3,0.653611,2.486900,0\n17,60184.3887,4,1.677764,2.891815,0\n17,60184.3996,5,7.855544,6.898280,0\n17,60194.1575,2,1.309505,1.067185,0\n17,60194.1652,1,-0.309205,1.053532,0\n17,60194.1729,3,-0.334617,1.786434,0\n17,60194.1839,4,0.655740,3.139240,0\n17,60194.1926,5,5.044827,11.278636,0\n17,60197.1181,2,0.552177,1.097758,0\n17,60197.1258,1,0.541816,0.909236,0\n17,60197.1335,3,0.639729,1.336890,0\n17,60197.1446,4,-0.978055,2.037800,0\n17,60197.1557,5,4.619338,4.987423,0\n17,60198.1077,0,2.149141,2.452106,0\n17,60199.0914,0,5.486719,3.143868,0\n17,60200.0650,0,1.228997,2.524841,0\n17,60201.0680,0,6.056491,2.897650,0\n17,60202.0552,0,3.771713,2.094126,0\n17,60206.1107,0,1.964452,1.559410,0\n17,60207.1469,0,1.315419,2.819893,0\n17,60208.0229,2,-0.600505,1.423023,0\n17,60208.0307,1,0.017527,1.343573,0\n17,60208.0384,3,-1.550505,1.831250,0\n17,60208.0495,4,3.882280,2.726225,0\n17,60208.0606,5,9.559813,7.414577,0\n17,60211.0124,2,1.860704,2.250480,0\n17,60211.0202,1,-2.090354,3.034887,0\n17,60211.0279,3,0.410192,2.599820,0\n17,60211.0390,4,-1.062310,3.468099,0\n17,60211.0502,5,8.321412,9.169344,0\n17,60221.0153,2,0.448097,0.997295,0\n17,60221.0230,1,0.082163,0.883182,0\n17,60221.0308,3,1.515744,1.365631,0\n17,60221.0419,4,-0.955922,2.171834,0\n17,60221.0530,5,6.189006,5.659595,0\n17,60224.0140,2,-0.977404,1.146213,0\n17,60224.0217,1,-1.527085,1.044619,0\n17,60224.0294,3,-2.026714,1.896492,0\n17,60224.0405,4,-2.270058,2.563563,0\n17,60224.0516,5,1.532559,5.857242,0\n17,60227.0151,2,0.269555,1.054280,0\n17,60227.0228,1,1.115057,0.999069,0\n17,60227.0305,3,1.552577,1.598891,0\n17,60227.0416,4,0.899977,2.226238,0\n17,60227.0527,5,-13.314797,5.586868,0\n17,60228.0187,0,1.561344,2.716171,0\n17,60229.0162,0,2.115411,2.732387,0\n17,60234.0265,0,-0.632177,2.254489,0\n17,60237.2206,2,0.991648,0.921157,0\n17,60237.2283,1,1.011999,0.870146,0\n17,60237.2359,3,1.932343,1.516595,0\n17,60237.2468,4,0.683186,2.506894,0\n17,60237.2578,5,-2.942844,7.042232,0\n17,60240.0223,2,0.455927,1.997859,0\n17,60240.0300,1,2.247968,2.647892,0\n17,60240.0377,3,6.028008,2.647428,0\n17,60240.0488,4,2.535219,3.012484,0\n17,60240.0598,5,8.448805,5.811503,0\n17,60249.0338,2,0.460854,0.866548,0\n17,60249.0415,1,0.452554,0.772836,0\n17,60249.0492,3,-1.191506,1.286252,0\n17,60249.0602,4,2.830095,2.116775,0\n17,60249.0712,5,-4.378712,5.177276,0\n17,60260.0423,0,-7.464523,2.644202,0\n17,60261.0361,0,0.930443,1.914282,0\n17,60262.0367,0,4.964463,1.854471,0\n17,60263.0373,0,0.783741,2.196650,0\n17,60264.0465,0,-3.080768,1.916776,0\n17,60490.2647,2,-0.977129,1.482041,0\n17,60490.2725,1,-2.611819,1.794772,0\n17,60490.2802,3,1.078630,2.071858,0\n17,60490.2913,4,-0.492853,2.955702,0\n17,60490.3024,5,3.792550,7.104066,0\n17,60493.2372,2,0.021979,1.054925,0\n17,60493.2450,1,-1.376170,0.980580,0\n17,60493.2527,3,1.015628,1.668951,0\n17,60493.2639,4,-0.002133,2.546342,0\n17,60493.2750,5,-11.551120,6.173368,0\n17,60499.2467,0,0.438197,2.600874,0\n17,60500.2437,0,-1.686091,2.074696,0\n17,60501.2385,0,0.020948,2.299457,0\n17,60502.2355,0,1.068030,2.629417,0\n17,60508.2638,2,-2.790141,2.436066,0\n17,60508.2715,1,5.700086,3.490426,0\n17,60508.2792,3,0.113332,2.677060,0\n17,60508.2903,4,-1.245703,3.280095,0\n17,60508.3014,5,16.609587,7.427779,0\n17,60524.2390,0,-0.490797,2.080026,0\n17,60525.1736,0,2.666769,2.941305,0\n17,60532.3489,2,-0.207433,0.864004,0\n17,60532.3565,1,-0.134437,0.845488,0\n17,60532.3641,3,-0.515472,1.462743,0\n17,60532.3751,4,0.897895,2.323136,0\n17,60532.3860,5,1.356515,5.973135,0\n17,60535.1253,2,3.357187,1.895133,0\n17,60535.1330,1,1.788090,2.447693,0\n17,60535.1408,3,-0.746670,2.393641,0\n17,60535.1519,4,1.761451,3.298109,0\n17,60535.1630,5,-3.134825,7.769337,0\n17,60538.2351,2,7.960939,1.803240,0\n17,60538.2428,1,6.926830,2.602727,0\n17,60538.2505,3,3.353295,1.972350,0\n17,60538.2615,4,5.710919,2.318584,0\n17,60538.2725,5,0.716567,5.113697,0\n17,60546.3406,2,12.973317,1.903937,1\n17,60546.3482,1,11.756248,2.837219,1\n17,60546.3558,3,10.891992,2.308349,0\n17,60546.3668,4,10.706098,3.126808,0\n17,60546.3777,5,1.998101,7.646282,0\n17,60549.0879,2,8.457420,1.114208,1\n17,60549.0956,1,10.614448,1.048073,1\n17,60549.1034,3,9.131392,1.747332,1\n17,60549.1145,4,12.109291,2.651627,0\n17,60549.1256,5,-0.454903,6.443316,0\n17,60554.0964,0,3.385612,2.492159,0\n17,60555.0951,0,-0.653454,2.008215,0\n17,60556.0879,0,4.359657,2.500103,0\n17,60557.0831,0,0.208935,1.804377,0\n17,60558.1093,0,2.002906,2.298758,0\n17,60559.1097,0,-1.713623,1.974990,0\n17,60560.1065,0,-4.030066,2.814673,0\n17,60567.2821,2,5.086202,1.698282,0\n17,60567.2897,1,4.862064,2.316888,0\n17,60567.2973,3,2.821583,1.933260,0\n17,60567.3083,4,8.017381,2.382588,0\n17,60567.3192,5,5.673804,5.440264,0\n17,60574.1118,2,4.409955,1.550580,0\n17,60574.1195,1,2.795052,2.196265,0\n17,60574.1272,3,4.197219,1.987329,0\n17,60574.1383,4,10.014592,2.528254,0\n17,60574.1493,5,12.793771,5.745250,0\n17,60577.0186,2,4.357023,1.470498,0\n17,60577.0263,1,1.377891,1.400257,0\n17,60577.0340,3,3.097299,2.332945,0\n17,60577.0451,4,5.029422,3.520210,0\n17,60577.0563,5,9.703295,8.530192,0\n17,60580.0095,2,0.080061,1.449560,0\n17,60580.0173,1,0.636153,1.387539,0\n17,60580.0250,3,1.814262,2.309473,0\n17,60580.0361,4,2.268791,3.480996,0\n17,60580.0472,5,3.766253,8.433430,0\n17,60582.0840,0,2.160855,2.057876,0\n17,60583.0169,0,-2.377121,2.935992,0\n17,60584.0117,0,-7.030778,2.272231,0\n17,60585.0117,0,-2.432227,1.843772,0\n17,60586.0123,0,0.942960,1.782520,0\n17,60587.0127,0,-0.601708,2.076788,0\n17,60588.0131,0,-2.069814,1.647215,0\n17,60593.0636,2,0.346279,0.950896,0\n17,60593.0713,1,1.045972,1.126506,0\n17,60593.0790,3,1.954885,1.349871,0\n17,60593.0901,4,4.170722,1.979371,0\n17,60593.1012,5,5.737381,4.821476,0\n17,60596.0304,2,1.884329,2.578470,0\n17,60596.0381,1,-2.693286,3.627853,0\n17,60596.0458,3,0.705422,2.812787,0\n17,60596.0569,4,1.202975,3.396812,0\n17,60596.0680,5,16.761280,7.451248,0\n17,60603.0208,2,0.386061,0.711484,0\n17,60603.0286,1,-0.007518,0.663680,0\n17,60603.0363,3,0.688574,1.169908,0\n17,60603.0473,4,4.215033,1.826377,0\n17,60603.0584,5,4.616831,4.548802,0\n17,60606.0225,2,-0.884437,1.024675,0\n17,60606.0303,1,-0.808401,0.984848,0\n17,60606.0379,3,1.421415,1.656912,0\n17,60606.0490,4,-0.165550,2.551519,0\n17,60606.0601,5,-0.249700,6.335219,0\n17,60609.0247,2,0.342698,0.744517,0\n17,60609.0323,1,0.365452,0.700571,0\n17,60609.0400,3,3.474475,1.220512,0\n17,60609.0510,4,5.966081,1.906627,0\n17,60609.0621,5,-4.503197,4.756433,0\n17,60612.0266,0,-1.850411,1.158724,0\n17,60613.0269,0,-1.095945,1.252873,0\n17,60614.0276,0,1.658578,1.438634,0\n17,60615.0375,0,-1.868202,1.603315,0\n"
  },
  {
    "path": "examples/data/plasticc_test_set_metadata_1k.csv",
    "content": "object_id,ra,decl,gal_l,gal_b,ddf,hostgal_specz,hostgal_photoz,hostgal_photoz_err,distmod,mwebv\n13,34.453125,-5.229529,169.987075,-59.956185,1,0.3048,0.3193,0.0542,41.1123,0.019\n14,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.6323,0.0179,42.8774,0.018\n17,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.8297,0.0605,43.6000,0.016\n23,34.804688,-5.829153,171.307861,-60.174401,1,nan,0.6533,0.1479,42.9640,0.023\n34,351.321442,-64.198746,317.458993,-50.429931,1,0.4557,0.4617,0.0122,42.0540,0.023\n35,35.332031,-5.979157,172.286722,-59.931743,1,nan,0.8388,0.0375,43.6290,0.022\n43,0.574468,-45.981140,327.041068,-68.778764,1,nan,0.6669,0.0546,43.0186,0.006\n50,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.4663,0.0529,45.1281,0.006\n60,346.562500,-63.448284,320.824720,-49.866957,1,nan,0.9462,0.0116,43.9519,0.021\n69,349.160583,-64.760857,318.219706,-49.458924,1,nan,1.0432,0.1092,44.2138,0.020\n88,349.160583,-64.760857,318.219706,-49.458924,1,0.1608,0.1650,0.0053,39.4929,0.020\n96,151.171875,2.537361,237.288526,43.169764,1,0.3277,0.3680,0.0340,41.4711,0.024\n106,1.666667,-44.399834,327.519190,-70.529554,1,nan,0.8532,0.0602,43.6747,0.009\n114,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.7996,0.2747,43.5011,0.020\n115,151.347656,4.181528,235.568369,44.259942,1,nan,0.8979,0.0515,43.8114,0.016\n116,150.468750,1.641510,237.714575,42.075234,1,nan,1.1244,0.0363,44.4151,0.017\n130,34.277344,-5.679190,170.314930,-60.410322,1,0.3395,0.3368,0.0728,41.2464,0.020\n142,1.694561,-45.191612,326.278557,-69.858253,1,nan,1.2710,0.0796,44.7444,0.011\n147,150.820312,1.641510,237.994507,42.358984,1,nan,0.2904,0.1155,40.8738,0.020\n151,151.171875,1.342993,238.602520,42.464379,1,nan,0.5090,0.0122,42.3075,0.026\n168,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.0000,0.0000,nan,0.020\n171,52.910156,-27.953188,223.774083,-54.639214,1,nan,0.8623,0.0583,43.7031,0.007\n173,150.996094,4.181528,235.291975,43.970869,1,nan,0.4490,0.0219,41.9820,0.015\n176,52.910156,-27.953188,223.774083,-54.639214,1,0.3775,0.3642,0.0064,41.4450,0.007\n184,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.9112,0.0513,43.8508,0.024\n186,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.9302,0.0118,43.9062,0.011\n195,152.050781,2.985506,237.495952,44.143927,1,nan,0.4658,0.0250,42.0768,0.019\n198,1.694561,-45.191612,326.278557,-69.858253,1,0.4060,0.3959,0.0146,41.6579,0.011\n204,349.046051,-61.943836,320.796530,-51.753706,1,0.5584,0.4997,0.0312,42.2594,0.017\n211,53.613281,-27.953188,223.929533,-54.024772,1,0.5469,0.5644,0.0113,42.5781,0.007\n216,150.820312,1.641510,237.994507,42.358984,1,nan,0.4056,0.0489,41.7202,0.020\n236,2.457983,-45.389202,324.632685,-69.945696,1,0.3436,0.2885,0.0162,40.8574,0.011\n240,151.171875,2.537361,237.288526,43.169764,1,nan,1.0936,0.0318,44.3405,0.024\n260,150.820312,3.732834,235.666318,43.572109,1,nan,0.7554,0.0425,43.3496,0.016\n268,149.589844,3.583322,234.885369,42.474696,1,nan,0.6234,0.0184,42.8401,0.024\n272,149.414062,3.433834,234.919132,42.245550,1,nan,0.7059,0.0220,43.1693,0.027\n277,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.8751,0.0187,43.7426,0.021\n289,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.7442,0.0682,43.3099,0.006\n306,148.886719,2.686724,235.347248,41.389003,1,0.7180,0.7265,0.0182,43.2458,0.028\n316,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.5615,0.1166,42.5647,0.017\n337,150.117188,2.836105,236.124718,42.483719,1,nan,1.4098,0.0499,45.0228,0.016\n349,34.453125,-5.229529,169.987075,-59.956185,1,nan,0.7679,0.0318,43.3934,0.019\n357,349.966217,-62.696659,319.542989,-51.376556,1,nan,0.8937,0.0213,43.7988,0.021\n366,53.613281,-26.944359,222.237403,-53.863858,1,nan,1.3577,0.2274,44.9217,0.009\n384,359.816315,-44.003082,331.451340,-70.123054,1,nan,0.8134,0.0374,43.5469,0.013\n402,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.5684,0.0804,42.5965,0.023\n406,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.8989,0.0967,43.8145,0.018\n409,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.9110,0.0551,43.8503,0.017\n413,349.429535,-62.508568,320.039643,-51.393745,1,0.6430,0.6411,0.0083,42.9139,0.020\n443,150.996094,2.388015,237.313912,42.939977,1,0.3682,0.3649,0.0113,41.4497,0.021\n451,349.615387,-63.636005,318.927246,-50.506542,1,nan,0.8853,0.0298,43.7734,0.018\n455,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.4671,0.0117,42.0842,0.011\n466,34.277344,-5.079716,169.526841,-59.956640,1,0.4986,0.5527,0.0171,42.5229,0.019\n467,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.6573,0.0463,42.9800,0.009\n478,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7283,0.0517,43.2524,0.008\n483,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.7835,0.0575,43.4469,0.016\n489,0.574468,-45.981140,327.041068,-68.778764,1,nan,0.5798,0.0174,42.6490,0.006\n524,34.277344,-5.079716,169.526841,-59.956640,1,0.4067,0.3530,0.0854,41.3660,0.019\n561,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.9267,0.0135,43.8959,0.023\n565,152.050781,3.284369,237.157374,44.318466,1,nan,0.8253,0.0201,43.5857,0.019\n568,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.0649,0.2186,44.2692,0.009\n583,149.414062,2.238686,236.239766,41.565558,1,0.7070,0.7129,0.0137,43.1952,0.017\n607,349.160583,-64.760857,318.219706,-49.458924,1,0.2424,0.4948,0.3294,42.2339,0.020\n611,149.589844,3.583322,234.885369,42.474696,1,0.5068,0.5222,0.0459,42.3744,0.024\n613,150.820312,1.641510,237.994507,42.358984,1,0.3014,0.6138,0.3121,42.7991,0.020\n622,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.9127,0.3417,43.8552,0.009\n639,52.207031,-28.630989,224.800211,-55.343637,1,nan,0.6900,0.0255,43.1086,0.009\n662,33.750000,-4.630479,168.146242,-59.949072,1,0.4181,0.4889,0.0244,42.2029,0.019\n670,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.3168,0.1915,44.8395,0.009\n672,350.230255,-61.943836,320.053946,-52.070537,1,nan,0.9496,0.0789,43.9616,0.017\n674,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.6032,0.0466,42.7531,0.018\n680,32.871094,-4.780192,166.959493,-60.615132,1,nan,1.4883,0.2383,45.1681,0.017\n683,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.8370,0.0432,43.6235,0.011\n686,358.665253,-45.783966,330.353593,-68.203652,1,nan,1.3769,0.2502,44.9594,0.009\n687,150.468750,3.732834,235.392208,43.283244,1,nan,0.3617,0.2373,41.4273,0.020\n694,34.453125,-5.229529,169.987075,-59.956185,1,0.4544,0.4391,0.0361,41.9241,0.019\n699,0.589520,-47.161343,325.385896,-67.769893,1,0.5659,0.5592,0.0064,42.5537,0.009\n721,358.665253,-45.783966,330.353593,-68.203652,1,0.4664,0.4355,0.3217,41.9033,0.009\n725,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.7186,0.0156,43.2165,0.010\n729,52.207031,-26.610098,221.298836,-55.042928,1,nan,0.7994,0.0124,43.5005,0.014\n731,346.276581,-64.011238,320.448031,-49.344136,1,0.5315,0.5418,0.0087,42.4710,0.019\n734,34.804688,-5.829153,171.307861,-60.174401,1,nan,0.9281,0.0119,43.9001,0.023\n747,54.667969,-27.615883,223.610785,-53.050840,1,nan,1.3670,0.0656,44.9399,0.009\n759,151.347656,3.583322,236.252362,43.918627,1,nan,1.5633,0.2334,45.2997,0.015\n779,347.861847,-61.943836,321.519104,-51.424048,1,0.7469,0.7381,0.0118,43.2877,0.017\n793,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.6826,0.0160,43.0802,0.011\n810,35.332031,-5.979157,172.286722,-59.931743,1,nan,0.9306,0.0189,43.9073,0.022\n830,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.6893,0.0081,43.1060,0.011\n833,150.292969,2.686724,236.427488,42.541447,1,nan,0.4998,0.0202,42.2604,0.016\n834,359.058563,-45.191612,330.695783,-68.844915,1,0.5767,0.5866,0.0128,42.6794,0.011\n843,53.789062,-27.784405,223.685697,-53.845803,1,nan,0.8523,0.0367,43.6717,0.009\n868,35.332031,-5.979157,172.286722,-59.931743,1,0.3881,0.3855,1.3203,41.5900,0.022\n883,53.261719,-27.615883,223.280041,-54.281374,1,nan,0.8622,0.0603,43.7027,0.006\n886,33.574219,-4.780192,168.064587,-60.175886,1,0.4615,0.4476,0.7778,41.9738,0.019\n887,358.648071,-46.375080,329.462659,-67.716008,1,nan,1.2642,0.2378,44.7300,0.009\n888,359.814819,-44.399834,330.775011,-69.801007,1,nan,1.2159,0.1488,44.6254,0.009\n905,0.189873,-45.586655,328.254458,-68.969298,1,nan,0.6630,0.0275,43.0028,0.007\n916,150.292969,2.686724,236.427488,42.541447,1,0.4052,0.4393,0.0217,41.9253,0.016\n917,1.666667,-44.399834,327.519190,-70.529554,1,nan,1.3559,0.2676,44.9180,0.009\n943,2.457983,-45.389202,324.632685,-69.945696,1,0.8582,0.8663,0.0291,43.7153,0.011\n946,359.446716,-44.201530,331.730015,-69.805709,1,nan,1.5205,0.0959,45.2254,0.010\n960,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.6480,0.0129,42.9424,0.023\n962,150.820312,1.641510,237.994507,42.358984,1,nan,0.6779,0.0117,43.0616,0.020\n965,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.6636,0.0081,43.0054,0.011\n968,52.910156,-27.953188,223.774083,-54.639214,1,nan,0.2472,0.0116,40.4743,0.007\n978,34.101562,-5.829153,170.247753,-60.638325,1,0.4938,0.4954,0.0349,42.2373,0.019\n979,148.710938,2.836105,235.050801,41.328739,1,nan,1.1407,0.1072,44.4539,0.031\n983,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.3106,0.0065,41.0429,0.014\n1017,351.299988,-62.320400,319.038597,-52.026867,1,nan,1.4323,0.0481,45.0652,0.018\n1018,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.9262,0.0111,43.8945,0.018\n1020,149.238281,3.882372,234.283829,42.351155,1,nan,1.0264,0.2278,44.1701,0.033\n1030,352.711273,-63.823658,316.922299,-51.059403,1,nan,1.2671,0.0235,44.7362,0.024\n1039,150.644531,3.583322,235.698235,43.342784,1,nan,0.7124,0.0358,43.1936,0.018\n1049,1.723404,-45.981140,325.117958,-69.180825,1,nan,1.2628,0.1822,44.7271,0.010\n1059,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.0223,0.0247,44.1595,0.007\n1063,53.789062,-27.784405,223.685697,-53.845803,1,nan,0.0000,0.0000,nan,0.009\n1065,53.613281,-26.944359,222.237403,-53.863858,1,0.4795,0.4443,0.0390,41.9546,0.009\n1067,2.071130,-45.191612,325.606223,-69.989264,1,0.7417,0.8350,0.0544,43.6171,0.011\n1084,0.965665,-46.375080,325.845907,-68.579427,1,0.2126,0.5647,0.4351,42.5797,0.007\n1087,352.132874,-63.636005,317.424173,-51.095855,1,nan,1.1039,0.1661,44.3657,0.021\n1088,34.277344,-5.679190,170.314930,-60.410322,1,0.7550,0.7142,0.0477,43.2001,0.020\n1100,34.101562,-5.829153,170.247753,-60.638325,1,0.9332,0.8409,0.0631,43.6357,0.019\n1106,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.9258,0.0219,43.8935,0.022\n1108,51.855469,-26.276812,220.627031,-55.293792,1,0.6459,0.7378,0.0318,43.2867,0.014\n1111,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.3981,0.7822,41.6724,0.007\n1114,33.398438,-4.331149,167.226341,-59.936551,1,0.5401,0.5514,0.0091,42.5169,0.018\n1115,150.820312,3.732834,235.666318,43.572109,1,nan,1.1354,0.0545,44.4412,0.016\n1123,151.171875,2.238686,237.619933,42.994783,1,0.5819,0.5516,0.3972,42.5177,0.024\n1127,0.965665,-46.375080,325.845907,-68.579427,1,nan,1.0761,0.1787,44.2973,0.007\n1128,149.414062,1.940072,236.565366,41.393323,1,nan,0.3618,0.0387,41.4282,0.018\n1138,359.816315,-44.003082,331.451340,-70.123054,1,nan,1.4331,0.2745,45.0668,0.013\n1151,33.574219,-4.780192,168.064587,-60.175886,1,nan,0.9335,0.2471,43.9155,0.019\n1168,347.861847,-61.943836,321.519104,-51.424048,1,nan,1.5435,0.1436,45.2656,0.017\n1174,0.949367,-45.586655,326.991548,-69.251686,1,nan,1.0215,0.1042,44.1572,0.013\n1193,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.7156,0.0493,43.2055,0.017\n1216,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.9028,0.0148,43.8260,0.009\n1245,53.085938,-28.122234,224.100909,-54.509752,1,0.4937,0.5333,0.0166,42.4294,0.007\n1254,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.5079,0.3209,45.2031,0.023\n1265,54.667969,-27.615883,223.610785,-53.050840,1,0.5310,0.6159,0.0352,42.8082,0.009\n1266,149.414062,1.940072,236.565366,41.393323,1,nan,0.6497,0.0183,42.9492,0.018\n1271,33.750000,-4.630479,168.146242,-59.949072,1,nan,1.0088,0.0104,44.1239,0.019\n1274,0.189873,-45.586655,328.254458,-68.969298,1,nan,1.4221,0.2779,45.0460,0.007\n1288,33.222656,-4.780192,167.515653,-60.396584,1,nan,0.9169,0.1056,43.8676,0.018\n1289,346.276581,-64.011238,320.448031,-49.344136,1,nan,1.1128,0.0552,44.3872,0.019\n1304,347.846710,-64.760857,318.929827,-49.143596,1,0.3102,0.3081,0.0119,41.0226,0.019\n1321,151.523438,3.134927,236.900695,43.803170,1,nan,1.2033,0.1759,44.5975,0.019\n1347,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.7305,0.0458,43.2603,0.020\n1354,53.085938,-28.122234,224.100909,-54.509752,1,nan,0.8265,0.0527,43.5896,0.007\n1365,1.708861,-45.586655,325.688716,-69.520253,1,0.6978,0.7233,0.0170,43.2338,0.011\n1380,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.9291,0.0148,43.9029,0.020\n1388,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.8363,0.0411,43.6210,0.008\n1393,152.050781,2.985506,237.495952,44.143927,1,0.4202,2.8977,0.8121,46.9354,0.019\n1415,51.328125,-27.784405,223.130589,-55.999499,1,0.7371,0.7551,0.0216,43.3483,0.013\n1421,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.6717,0.0415,43.0374,0.009\n1439,34.101562,-5.829153,170.247753,-60.638325,1,nan,1.0442,0.1545,44.2165,0.019\n1450,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.8016,0.0528,43.5077,0.009\n1463,1.363636,-46.768478,324.669342,-68.371416,1,0.2333,0.5070,0.1130,42.2975,0.008\n1467,151.523438,3.134927,236.900695,43.803170,1,nan,0.8282,0.1183,43.5952,0.019\n1468,346.562500,-63.448284,320.824720,-49.866957,1,nan,0.8677,0.0290,43.7199,0.021\n1469,149.414062,2.238686,236.239766,41.565558,1,nan,1.0850,0.0150,44.3193,0.017\n1487,150.468750,3.732834,235.392208,43.283244,1,0.6295,0.5829,0.0311,42.6627,0.020\n1492,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.7396,0.0157,43.2934,0.021\n1498,152.050781,3.284369,237.157374,44.318466,1,nan,0.8187,0.0238,43.5644,0.019\n1500,149.414062,2.238686,236.239766,41.565558,1,nan,1.0392,0.0139,44.2035,0.017\n1522,352.132874,-63.636005,317.424173,-51.095855,1,nan,1.2524,0.0277,44.7049,0.021\n1523,150.468750,1.641510,237.714575,42.075234,1,0.2669,0.2407,0.0254,40.4086,0.017\n1536,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.5789,0.1445,42.6449,0.016\n1545,346.562500,-63.448284,320.824720,-49.866957,1,0.6731,0.7236,0.0459,43.2351,0.021\n1567,152.050781,2.985506,237.495952,44.143927,1,0.5581,0.5870,0.0381,42.6815,0.019\n1570,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.7967,0.0516,43.4914,0.017\n1578,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.5831,0.1709,42.6637,0.009\n1589,51.679688,-27.447618,222.618229,-55.642263,1,0.3366,0.3400,0.0077,41.2708,0.010\n1593,150.996094,4.181528,235.291975,43.970869,1,nan,1.2366,0.1482,44.6708,0.015\n1597,351.299988,-62.320400,319.038597,-52.026867,1,0.3161,0.3182,0.0159,41.1033,0.018\n1599,52.207031,-26.610098,221.298836,-55.042928,1,nan,1.0088,0.2519,44.1237,0.014\n1600,34.101562,-5.829153,170.247753,-60.638325,1,0.5596,0.5490,0.0474,42.5054,0.019\n1601,53.085938,-28.122234,224.100909,-54.509752,1,0.7155,0.7376,0.0244,43.2861,0.007\n1619,2.097458,-45.783966,324.737840,-69.478613,1,nan,1.1271,0.0568,44.4216,0.011\n1630,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.7373,0.0283,43.2851,0.014\n1636,0.965665,-46.375080,325.845907,-68.579427,1,nan,0.9029,0.0862,43.8264,0.007\n1660,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.8022,0.0194,43.5100,0.010\n1667,33.574219,-5.079716,168.448505,-60.407218,1,nan,1.0792,0.0660,44.3050,0.016\n1676,0.965665,-46.375080,325.845907,-68.579427,1,nan,0.9666,0.2691,44.0090,0.007\n1678,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.7378,0.0146,43.2866,0.011\n1687,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.9107,0.0140,43.8494,0.006\n1691,152.050781,2.985506,237.495952,44.143927,1,0.7346,0.7037,0.0529,43.1609,0.019\n1695,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.9741,0.2405,44.0299,0.020\n1702,150.996094,2.388015,237.313912,42.939977,1,nan,0.7806,0.0916,43.4371,0.021\n1720,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.6746,0.0276,43.0488,0.011\n1729,51.328125,-27.447618,222.535046,-55.950727,1,0.6110,0.5932,0.0122,42.7092,0.013\n1730,150.117188,3.732834,235.120533,42.993809,1,nan,0.6999,0.0471,43.1464,0.020\n1754,148.710938,2.836105,235.050801,41.328739,1,nan,0.9109,0.0431,43.8500,0.031\n1763,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.8860,0.0855,43.7757,0.017\n1770,0.190678,-45.783966,327.956322,-68.803772,1,0.2562,0.2577,0.0118,40.5774,0.005\n1780,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.3954,0.0285,41.6551,0.011\n1781,351.382965,-64.011238,317.574052,-50.604657,1,0.4976,0.4790,0.0223,42.1493,0.023\n1830,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.7349,0.0181,43.2763,0.013\n1833,32.871094,-4.780192,166.959493,-60.615132,1,0.3202,0.3262,0.0567,41.1659,0.017\n1841,0.589520,-47.161343,325.385896,-67.769893,1,nan,0.9916,0.0165,44.0776,0.009\n1871,33.574219,-4.780192,168.064587,-60.175886,1,nan,0.5952,0.0260,42.7180,0.019\n1894,150.468750,1.641510,237.714575,42.075234,1,nan,1.2779,0.0607,44.7590,0.017\n1914,359.816315,-44.003082,331.451340,-70.123054,1,0.6648,0.6345,0.0370,42.8867,0.013\n1933,150.117188,2.836105,236.124718,42.483719,1,nan,0.4788,0.2350,42.1486,0.016\n1934,351.734680,-62.884678,318.284128,-51.651217,1,0.6909,0.7567,0.0533,43.3540,0.019\n1941,52.207031,-26.610098,221.298836,-55.042928,1,nan,0.9255,0.0135,43.8926,0.014\n1948,359.814819,-44.399834,330.775011,-69.801007,1,nan,1.2610,0.0630,44.7233,0.009\n1949,33.398438,-3.732834,166.492280,-59.466614,1,0.5070,0.5068,0.0084,42.2963,0.022\n1971,149.414062,1.940072,236.565366,41.393323,1,nan,1.1763,0.1964,44.5365,0.018\n1978,54.667969,-27.615883,223.610785,-53.050840,1,nan,0.8494,0.0510,43.6626,0.009\n1999,349.160583,-64.760857,318.219706,-49.458924,1,0.6580,0.5147,0.5022,42.3368,0.020\n2000,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.6559,0.0240,42.9743,0.017\n2017,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.3325,0.1725,41.2145,0.010\n2021,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.6515,0.0269,42.9566,0.014\n2046,150.644531,3.583322,235.698235,43.342784,1,0.7425,0.7898,0.0207,43.4684,0.018\n2054,152.050781,2.985506,237.495952,44.143927,1,nan,0.7912,0.0208,43.4730,0.019\n2074,359.446716,-44.201530,331.730015,-69.805709,1,0.5175,0.4921,0.0570,42.2198,0.010\n2093,150.996094,4.181528,235.291975,43.970869,1,nan,0.3252,0.0452,41.1578,0.015\n2097,151.347656,4.181528,235.568369,44.259942,1,nan,0.6327,0.0348,42.8789,0.016\n2106,1.694561,-45.191612,326.278557,-69.858253,1,nan,1.1768,0.0524,44.5376,0.011\n2109,358.665253,-45.783966,330.353593,-68.203652,1,0.4692,0.4797,0.0133,42.1532,0.009\n2133,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.6612,0.0157,42.9957,0.023\n2145,51.328125,-27.447618,222.535046,-55.950727,1,nan,1.2076,0.0605,44.6070,0.013\n2147,0.589520,-47.161343,325.385896,-67.769893,1,nan,1.5213,0.2618,45.2269,0.009\n2160,34.277344,-5.079716,169.526841,-59.956640,1,nan,0.5716,0.0242,42.6114,0.019\n2168,359.446716,-44.201530,331.730015,-69.805709,1,0.2214,0.2308,0.2438,40.3055,0.010\n2172,0.189873,-45.586655,328.254458,-68.969298,1,0.6266,0.6681,0.0264,43.0232,0.007\n2182,151.523438,3.134927,236.900695,43.803170,1,nan,0.8239,0.0320,43.5812,0.019\n2183,148.710938,2.836105,235.050801,41.328739,1,0.2383,0.2618,0.0166,40.6165,0.031\n2187,34.101562,-5.829153,170.247753,-60.638325,1,0.3599,0.3656,0.5424,41.4546,0.019\n2193,347.846710,-64.760857,318.929827,-49.143596,1,nan,1.4268,0.0800,45.0550,0.019\n2195,150.468750,3.732834,235.392208,43.283244,1,nan,0.4598,0.0125,42.0435,0.020\n2198,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.9277,0.2414,43.8988,0.023\n2206,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.5014,0.0616,45.1915,0.010\n2208,352.398651,-62.696659,318.017427,-51.967966,1,0.7188,0.7126,0.0121,43.1942,0.020\n2223,358.648071,-46.375080,329.462659,-67.716008,1,nan,1.1672,0.0321,44.5157,0.009\n2228,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.6837,0.0267,43.0844,0.014\n2243,150.820312,3.134927,236.341348,43.230123,1,nan,1.3813,0.1932,44.9680,0.016\n2246,348.529419,-61.755440,321.293980,-51.763351,1,0.6467,0.7143,0.0413,43.2008,0.016\n2252,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.5851,0.0120,42.6728,0.008\n2265,359.058563,-45.191612,330.695783,-68.844915,1,0.2786,0.2719,0.0249,40.7105,0.011\n2270,151.171875,2.238686,237.619933,42.994783,1,nan,0.0000,0.0000,nan,0.024\n2276,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.6827,0.0726,43.0806,0.019\n2281,346.655182,-63.260487,320.952196,-50.040935,1,0.3442,0.5476,0.1871,42.4986,0.019\n2287,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.5387,0.0152,42.4558,0.019\n2292,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.9430,0.0086,43.9427,0.014\n2293,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.9148,0.0376,43.8614,0.010\n2296,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.8033,0.0310,43.5136,0.011\n2299,352.132874,-63.636005,317.424173,-51.095855,1,nan,0.8109,0.0190,43.5386,0.021\n2304,1.708861,-45.586655,325.688716,-69.520253,1,nan,1.3102,0.0895,44.8261,0.011\n2333,349.966217,-62.696659,319.542989,-51.376556,1,0.4284,0.4611,0.0224,42.0509,0.021\n2348,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.5904,0.0092,42.6966,0.018\n2351,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.7537,0.1688,43.3435,0.018\n2354,51.679688,-27.447618,222.618229,-55.642263,1,0.3829,0.3935,0.0180,41.6425,0.010\n2370,35.683594,-5.379379,171.992947,-59.253501,1,nan,0.6099,0.0096,42.7820,0.020\n2372,34.453125,-5.229529,169.987075,-59.956185,1,0.3353,0.4757,0.6757,42.1315,0.019\n2388,54.667969,-27.615883,223.610785,-53.050840,1,nan,1.0298,0.0768,44.1791,0.009\n2395,359.814819,-44.399834,330.775011,-69.801007,1,0.6315,0.6359,0.0276,42.8924,0.009\n2407,0.190678,-45.783966,327.956322,-68.803772,1,0.5859,0.5249,0.0829,42.3879,0.005\n2410,52.207031,-26.610098,221.298836,-55.042928,1,0.3848,0.4258,0.0241,41.8449,0.014\n2451,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.7416,0.0343,43.3003,0.009\n2476,149.414062,1.940072,236.565366,41.393323,1,nan,0.4730,0.1009,42.1166,0.018\n2538,346.130127,-63.072620,321.423103,-50.042305,1,0.6846,0.6643,0.0319,43.0081,0.020\n2550,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.3640,0.1891,41.4439,0.020\n2603,0.965665,-46.375080,325.845907,-68.579427,1,nan,1.2175,0.4670,44.6290,0.007\n2615,52.910156,-26.276812,220.926149,-54.363918,1,nan,1.2323,0.0534,44.6613,0.008\n2660,151.523438,3.134927,236.900695,43.803170,1,1.0412,1.3254,0.1765,44.8570,0.019\n2661,346.130127,-63.072620,321.423103,-50.042305,1,0.5160,0.5185,0.0319,42.3560,0.020\n2683,0.965665,-46.375080,325.845907,-68.579427,1,0.6609,0.6757,0.0091,43.0530,0.007\n2687,348.529419,-61.755440,321.293980,-51.763351,1,0.6669,0.6531,0.0406,42.9628,0.016\n2693,358.665253,-45.783966,330.353593,-68.203652,1,nan,1.5860,0.2426,45.3384,0.009\n2701,349.891296,-64.573555,317.972107,-49.786192,1,0.4057,0.3811,0.0322,41.5609,0.023\n2702,35.683594,-5.379379,171.992947,-59.253501,1,0.5081,0.5489,0.0165,42.5049,0.020\n2707,33.398438,-4.331149,167.226341,-59.936551,1,0.1061,0.1359,0.0193,39.0322,0.018\n2744,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.5735,0.0245,42.6203,0.021\n2753,51.328125,-27.784405,223.130589,-55.999499,1,0.4302,0.3267,0.0389,41.1697,0.013\n2759,148.886719,2.686724,235.347248,41.389003,1,nan,0.5974,0.0069,42.7274,0.028\n2760,149.414062,3.433834,234.919132,42.245550,1,nan,0.3415,0.2635,41.2821,0.027\n2766,150.468750,3.732834,235.392208,43.283244,1,nan,1.0362,0.0118,44.1956,0.020\n2786,349.615387,-63.636005,318.927246,-50.506542,1,nan,0.3010,0.0504,40.9634,0.018\n2790,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.0000,0.0000,nan,0.018\n2794,359.415588,-46.768478,327.729895,-67.686097,1,0.6983,0.7077,0.0266,43.1760,0.009\n2814,150.117188,2.836105,236.124718,42.483719,1,nan,0.7215,0.0782,43.2272,0.016\n2818,52.207031,-28.291550,224.208534,-55.300157,1,nan,0.8251,0.0216,43.5851,0.007\n2856,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.6348,0.0515,42.8877,0.009\n2858,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.1145,0.0322,44.3914,0.023\n2860,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.4398,0.2606,41.9285,0.016\n2871,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.4647,0.0229,42.0709,0.019\n2882,151.171875,1.342993,238.602520,42.464379,1,nan,2.7824,0.2944,46.8290,0.026\n2886,148.710938,2.836105,235.050801,41.328739,1,nan,0.3475,0.0073,41.3259,0.031\n2909,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.3229,0.3149,41.1406,0.010\n2932,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.1911,0.0104,39.8466,0.024\n2933,348.586945,-64.573555,318.693903,-49.477869,1,0.6336,0.6157,0.0140,42.8070,0.018\n2943,0.589520,-47.161343,325.385896,-67.769893,1,0.7816,0.7511,0.0242,43.3344,0.009\n2945,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.1553,0.0720,44.4880,0.010\n2949,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.2125,0.0257,44.6178,0.010\n2958,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.5145,0.0383,42.3356,0.009\n2961,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.3771,0.0426,41.5336,0.019\n2962,149.414062,3.433834,234.919132,42.245550,1,nan,0.7453,0.0637,43.3138,0.027\n2970,53.085938,-27.111860,222.384291,-54.355086,1,0.4506,0.5123,0.0647,42.3247,0.007\n2975,53.085938,-28.122234,224.100909,-54.509752,1,nan,1.4981,0.2615,45.1857,0.007\n3004,349.966217,-62.696659,319.542989,-51.376556,1,0.4053,0.4978,0.0481,42.2498,0.021\n3008,358.312500,-44.993881,332.185785,-68.685906,1,nan,0.1344,0.7139,39.0065,0.009\n3012,150.117188,2.238686,236.784618,42.139082,1,nan,1.4853,0.0818,45.1627,0.016\n3022,52.207031,-28.630989,224.800211,-55.343637,1,0.4266,0.3630,0.0443,41.4365,0.009\n3025,359.415588,-46.768478,327.729895,-67.686097,1,0.6636,0.6827,0.0129,43.0806,0.009\n3028,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.6075,0.2450,42.7719,0.017\n3029,347.617462,-62.508568,321.121462,-50.904708,1,nan,0.7168,0.0344,43.2100,0.019\n3037,150.820312,3.732834,235.666318,43.572109,1,0.3807,0.3856,0.0770,41.5907,0.016\n3052,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.9395,0.0475,43.9327,0.019\n3073,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.5457,0.7707,42.4896,0.014\n3077,0.189873,-45.586655,328.254458,-68.969298,1,0.2853,0.3011,0.0497,40.9649,0.007\n3133,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.6167,0.0739,45.3897,0.006\n3163,34.277344,-5.079716,169.526841,-59.956640,1,0.6388,0.6322,0.0085,42.8770,0.019\n3170,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.4754,0.0150,42.1298,0.018\n3171,33.925781,-5.979157,170.179895,-60.866303,1,0.5550,0.5525,0.0096,42.5220,0.022\n3175,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.0217,0.0243,44.1579,0.019\n3176,150.820312,3.134927,236.341348,43.230123,1,nan,0.7794,0.0377,43.4329,0.016\n3190,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.7705,0.0220,43.4023,0.022\n3193,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.8700,0.0495,43.7269,0.021\n3196,358.648071,-46.375080,329.462659,-67.716008,1,0.3112,0.2901,0.0115,40.8716,0.009\n3206,53.085938,-27.111860,222.384291,-54.355086,1,0.6472,0.6860,0.0225,43.0931,0.007\n3209,1.363636,-46.768478,324.669342,-68.371416,1,0.4369,0.4994,1.2314,42.2578,0.008\n3210,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.7133,0.0739,43.1968,0.008\n3214,348.908447,-63.823658,319.169886,-50.176186,1,0.2600,0.2876,0.3195,40.8500,0.018\n3217,151.347656,4.181528,235.568369,44.259942,1,0.1826,0.4846,0.3727,42.1795,0.016\n3247,1.708861,-45.586655,325.688716,-69.520253,1,nan,1.3152,0.1148,44.8363,0.011\n3252,349.615387,-63.636005,318.927246,-50.506542,1,nan,0.8013,0.0111,43.5070,0.018\n3278,53.613281,-27.953188,223.929533,-54.024772,1,nan,1.5343,0.1417,45.2497,0.007\n3284,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.4121,1.2419,41.7612,0.011\n3286,33.574219,-4.780192,168.064587,-60.175886,1,nan,0.7985,0.0224,43.4974,0.019\n3288,51.855469,-26.276812,220.627031,-55.293792,1,nan,1.2928,0.0725,44.7902,0.014\n3325,53.613281,-28.630989,225.073365,-54.119461,1,0.4825,0.5158,0.0204,42.3423,0.006\n3336,34.453125,-5.229529,169.987075,-59.956185,1,nan,0.7542,0.0326,43.3453,0.019\n3347,52.910156,-27.953188,223.774083,-54.639214,1,nan,0.5940,0.0536,42.7127,0.007\n3351,359.811707,-45.191612,329.485675,-69.150905,1,0.5478,0.5828,0.0191,42.6626,0.010\n3362,347.617462,-62.508568,321.121462,-50.904708,1,nan,0.6700,0.1306,43.0306,0.019\n3373,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.4474,0.2471,41.9728,0.020\n3395,53.613281,-27.953188,223.929533,-54.024772,1,0.3963,0.3657,0.8633,41.4551,0.007\n3398,34.804688,-5.829153,171.307861,-60.174401,1,nan,1.0070,0.0407,44.1188,0.023\n3410,349.615387,-63.636005,318.927246,-50.506542,1,nan,1.1667,0.0446,44.5144,0.018\n3418,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.0109,0.0153,44.1294,0.005\n3441,0.189873,-45.586655,328.254458,-68.969298,1,nan,0.4974,0.0111,42.2477,0.007\n3457,151.347656,4.181528,235.568369,44.259942,1,nan,0.7204,0.0318,43.2232,0.016\n3475,149.414062,3.433834,234.919132,42.245550,1,nan,0.7214,0.0251,43.2270,0.027\n3498,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.9131,0.0795,43.8565,0.019\n3499,150.996094,2.985506,236.647967,43.287350,1,0.9971,1.0190,0.0171,44.1508,0.020\n3500,52.207031,-28.630989,224.800211,-55.343637,1,0.2306,0.4849,0.6744,42.1816,0.009\n3504,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.5829,0.0306,42.6630,0.019\n3509,53.261719,-27.615883,223.280041,-54.281374,1,0.5035,0.5169,0.2812,42.3478,0.006\n3522,52.910156,-27.279613,222.625192,-54.536648,1,0.8636,0.9140,0.0250,43.8590,0.007\n3529,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.9039,0.0247,43.8293,0.018\n3535,359.814819,-44.399834,330.775011,-69.801007,1,nan,1.4756,0.1618,45.1450,0.009\n3538,150.996094,2.985506,236.647967,43.287350,1,0.4615,0.4664,0.0103,42.0806,0.020\n3547,2.097458,-45.783966,324.737840,-69.478613,1,nan,0.9467,0.0338,43.9533,0.011\n3551,358.636353,-46.768478,328.890146,-67.388837,1,0.2573,0.2393,0.0145,40.3948,0.008\n3566,151.171875,2.238686,237.619933,42.994783,1,0.6331,0.5448,0.6502,42.4852,0.024\n3581,51.855469,-26.276812,220.627031,-55.293792,1,0.4844,0.4938,0.0183,42.2287,0.014\n3585,150.820312,3.732834,235.666318,43.572109,1,nan,0.5231,0.1034,42.3791,0.016\n3597,2.457983,-45.389202,324.632685,-69.945696,1,0.8409,0.8742,0.0306,43.7396,0.011\n3620,35.859375,-4.630479,171.270769,-58.580806,1,nan,0.7190,0.0186,43.2181,0.022\n3629,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.3951,0.1453,44.9947,0.009\n3641,149.414062,3.433834,234.919132,42.245550,1,nan,0.5965,0.0131,42.7237,0.027\n3645,346.562500,-63.448284,320.824720,-49.866957,1,0.5351,0.5274,0.0205,42.4003,0.021\n3652,149.238281,3.882372,234.283829,42.351155,1,nan,0.8499,0.0477,43.6644,0.033\n3657,34.277344,-5.679190,170.314930,-60.410322,1,0.3050,2.7750,0.9745,46.8221,0.020\n3661,347.812500,-63.448284,320.128971,-50.202348,1,nan,1.3297,0.0487,44.8658,0.021\n3666,150.117188,2.836105,236.124718,42.483719,1,nan,1.1820,0.1059,44.5495,0.016\n3672,348.529419,-61.755440,321.293980,-51.763351,1,0.6020,0.5785,0.0108,42.6431,0.016\n3681,150.996094,2.985506,236.647967,43.287350,1,0.6459,0.6429,0.0113,42.9215,0.020\n3702,151.171875,2.238686,237.619933,42.994783,1,nan,0.8082,0.1270,43.5299,0.024\n3706,51.328125,-27.784405,223.130589,-55.999499,1,0.3284,0.3150,0.1745,41.0780,0.013\n3709,151.171875,2.238686,237.619933,42.994783,1,nan,0.8753,0.0139,43.7432,0.024\n3710,1.666667,-44.399834,327.519190,-70.529554,1,nan,0.8356,0.0335,43.6190,0.009\n3735,0.574468,-45.981140,327.041068,-68.778764,1,nan,0.7982,0.1565,43.4967,0.006\n3737,150.292969,2.686724,236.427488,42.541447,1,nan,0.7567,0.0404,43.3543,0.016\n3748,52.910156,-25.944481,220.366350,-54.301439,1,nan,1.0812,0.0621,44.3099,0.010\n3749,359.415588,-46.768478,327.729895,-67.686097,1,0.7033,0.7840,0.0371,43.4487,0.009\n3751,352.132874,-63.636005,317.424173,-51.095855,1,0.6527,0.6351,0.0190,42.8890,0.021\n3753,346.562500,-63.448284,320.824720,-49.866957,1,0.1191,0.0761,0.0195,37.6870,0.021\n3763,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.3349,0.0091,41.2320,0.017\n3764,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.8530,0.0218,43.6740,0.021\n3777,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.5510,0.0652,42.5152,0.016\n3779,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.9939,0.2200,44.0838,0.021\n3785,359.415588,-46.768478,327.729895,-67.686097,1,0.5052,0.5168,0.0229,42.3475,0.009\n3794,52.910156,-26.276812,220.926149,-54.363918,1,0.6236,0.6316,0.0331,42.8743,0.008\n3803,151.171875,1.342993,238.602520,42.464379,1,nan,0.7038,0.0688,43.1613,0.026\n3821,52.910156,-27.953188,223.774083,-54.639214,1,nan,1.4631,0.2823,45.1222,0.007\n3822,358.648071,-46.375080,329.462659,-67.716008,1,0.3455,2.9896,1.2696,47.0170,0.009\n3824,1.753247,-46.768478,324.030235,-68.498041,1,0.8063,0.8352,0.0339,43.6178,0.014\n3841,2.071130,-45.191612,325.606223,-69.989264,1,nan,0.5971,0.0990,42.7263,0.011\n3844,2.457983,-45.389202,324.632685,-69.945696,1,0.4789,0.3748,0.4652,41.5181,0.011\n3855,349.891296,-64.573555,317.972107,-49.786192,1,0.4507,0.3704,0.5053,41.4880,0.023\n3857,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.7254,0.0296,43.2418,0.011\n3875,52.207031,-28.630989,224.800211,-55.343637,1,0.6825,0.6468,0.0279,42.9374,0.009\n3878,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.9110,0.2538,43.8503,0.008\n3880,351.382965,-64.011238,317.574052,-50.604657,1,nan,1.5002,0.2529,45.1894,0.023\n3889,53.964844,-28.630989,225.142950,-53.813613,1,0.1582,0.2015,1.1876,39.9743,0.009\n3925,150.996094,4.181528,235.291975,43.970869,1,nan,0.2421,1.1561,40.4235,0.015\n3928,150.996094,2.985506,236.647967,43.287350,1,nan,0.7806,0.0538,43.4371,0.020\n3929,149.589844,3.583322,234.885369,42.474696,1,nan,1.8897,0.1427,45.8062,0.024\n3936,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.0853,0.0192,44.3200,0.023\n3950,352.132874,-63.636005,317.424173,-51.095855,1,0.7004,0.6924,0.0235,43.1180,0.021\n3970,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.4637,0.1090,45.1234,0.014\n3972,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.7985,0.0333,43.4976,0.016\n4001,150.996094,4.181528,235.291975,43.970869,1,nan,1.5533,0.1631,45.2826,0.015\n4007,150.820312,3.732834,235.666318,43.572109,1,nan,0.7855,0.0241,43.4536,0.016\n4016,346.130127,-63.072620,321.423103,-50.042305,1,nan,1.2676,0.0738,44.7372,0.020\n4023,0.589520,-47.161343,325.385896,-67.769893,1,nan,0.9574,0.1398,43.9835,0.009\n4025,33.222656,-4.780192,167.515653,-60.396584,1,nan,0.0000,0.0000,nan,0.018\n4038,35.859375,-4.630479,171.270769,-58.580806,1,nan,1.1349,0.0866,44.4401,0.022\n4044,151.171875,2.238686,237.619933,42.994783,1,nan,0.6693,0.0222,43.0278,0.024\n4054,34.453125,-5.229529,169.987075,-59.956185,1,0.5653,0.5478,0.0237,42.4998,0.019\n4062,358.312500,-44.993881,332.185785,-68.685906,1,nan,1.1896,0.0988,44.5666,0.009\n4063,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.6406,0.1744,42.9119,0.018\n4065,35.332031,-5.979157,172.286722,-59.931743,1,nan,1.3023,0.0984,44.8098,0.022\n4077,150.468750,1.641510,237.714575,42.075234,1,nan,1.1662,0.1634,44.5133,0.017\n4103,349.160583,-64.760857,318.219706,-49.458924,1,0.3113,0.3331,0.0201,41.2185,0.020\n4109,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.0000,0.0000,nan,0.008\n4183,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.8906,0.0209,43.7897,0.009\n4197,54.667969,-27.615883,223.610785,-53.050840,1,nan,0.7358,0.0471,43.2794,0.009\n4201,34.453125,-5.229529,169.987075,-59.956185,1,0.2193,0.2338,0.1899,40.3374,0.019\n4202,33.398438,-3.732834,166.492280,-59.466614,1,0.6916,0.7529,0.0348,43.3408,0.022\n4216,53.964844,-28.630989,225.142950,-53.813613,1,0.4807,0.4515,0.0246,41.9964,0.009\n4224,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.6240,0.0370,42.8426,0.022\n4225,346.562500,-63.448284,320.824720,-49.866957,1,0.2704,0.3546,0.2924,41.3770,0.021\n4249,51.679688,-27.447618,222.618229,-55.642263,1,nan,1.3003,0.1720,44.8058,0.010\n4262,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.7108,0.0343,43.1877,0.020\n4267,33.222656,-4.780192,167.515653,-60.396584,1,0.4186,0.3797,0.0298,41.5512,0.018\n4272,53.613281,-26.944359,222.237403,-53.863858,1,0.2892,0.2718,0.0268,40.7089,0.009\n4274,351.321442,-64.198746,317.458993,-50.429931,1,0.6479,0.6553,0.0091,42.9721,0.023\n4278,149.414062,2.238686,236.239766,41.565558,1,nan,0.8546,0.0815,43.6790,0.017\n4282,53.085938,-27.784405,223.525509,-54.460748,1,nan,0.4929,0.0100,42.2237,0.007\n4283,52.910156,-27.279613,222.625192,-54.536648,1,0.3708,0.3947,0.0245,41.6505,0.007\n4293,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.6305,0.1495,45.4125,0.014\n4294,150.117188,2.836105,236.124718,42.483719,1,nan,0.8510,0.1009,43.6678,0.016\n4300,51.855469,-26.276812,220.627031,-55.293792,1,0.6127,0.6028,0.0088,42.7511,0.014\n4303,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.9797,0.0098,44.0451,0.018\n4304,34.277344,-5.679190,170.314930,-60.410322,1,nan,0.2127,0.0101,40.1063,0.020\n4306,151.171875,2.537361,237.288526,43.169764,1,nan,1.4930,0.1628,45.1765,0.024\n4313,149.414062,3.433834,234.919132,42.245550,1,nan,1.1395,0.1626,44.4510,0.027\n4322,151.347656,4.181528,235.568369,44.259942,1,0.7816,0.7648,0.0184,43.3824,0.016\n4325,34.980469,-6.279288,172.180075,-60.389399,1,nan,0.6802,0.0118,43.0708,0.023\n4330,53.613281,-28.630989,225.073365,-54.119461,1,nan,1.1409,0.1366,44.4543,0.006\n4337,34.804688,-5.829153,171.307861,-60.174401,1,nan,0.6082,0.0100,42.7747,0.023\n4347,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.5051,0.0376,45.1982,0.020\n4369,53.964844,-28.630989,225.142950,-53.813613,1,nan,1.1974,0.0630,44.5843,0.009\n4372,351.259003,-64.386185,317.344860,-50.255113,1,0.8588,0.8990,0.0160,43.8146,0.020\n4376,33.222656,-4.780192,167.515653,-60.396584,1,nan,0.9810,0.0139,44.0488,0.018\n4390,53.437500,-29.142223,225.908120,-54.336118,1,nan,1.0152,0.3139,44.1407,0.008\n4424,151.699219,3.583322,236.533224,44.205648,1,nan,1.0317,0.0256,44.1840,0.016\n4467,53.261719,-27.615883,223.280041,-54.281374,1,0.3489,0.3023,0.1233,40.9750,0.006\n4469,150.820312,1.641510,237.994507,42.358984,1,nan,1.0352,0.2064,44.1930,0.020\n4475,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.5952,0.0666,42.7177,0.014\n4489,35.332031,-5.979157,172.286722,-59.931743,1,nan,0.3478,0.0152,41.3280,0.022\n4490,1.723404,-45.981140,325.117958,-69.180825,1,nan,1.1304,0.0573,44.4295,0.010\n4492,358.636353,-46.768478,328.890146,-67.388837,1,0.2686,0.6256,0.1162,42.8492,0.008\n4494,2.097458,-45.783966,324.737840,-69.478613,1,0.4329,0.4492,0.0147,41.9831,0.011\n4507,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.1883,0.0370,44.5636,0.020\n4508,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.7777,0.0513,43.4272,0.021\n4525,149.589844,3.583322,234.885369,42.474696,1,nan,0.7945,0.0729,43.4840,0.024\n4528,359.811707,-45.191612,329.485675,-69.150905,1,0.6181,0.5632,0.0527,42.5723,0.010\n4533,150.996094,2.985506,236.647967,43.287350,1,nan,0.9019,0.2483,43.8233,0.020\n4551,358.312500,-44.993881,332.185785,-68.685906,1,0.2666,0.6085,0.0540,42.7761,0.009\n4559,0.929752,-44.597992,328.531426,-70.083244,1,0.3229,0.3027,0.6578,40.9778,0.011\n4561,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.7697,0.0339,43.3995,0.019\n4576,150.292969,2.686724,236.427488,42.541447,1,0.5613,0.5636,0.0277,42.5742,0.016\n4586,2.071130,-45.191612,325.606223,-69.989264,1,0.3810,0.3822,0.0308,41.5678,0.011\n4589,150.820312,1.641510,237.994507,42.358984,1,nan,1.2748,0.1135,44.7525,0.020\n4592,52.207031,-26.610098,221.298836,-55.042928,1,0.7073,0.7753,0.0429,43.4190,0.014\n4597,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.6120,0.0070,42.7913,0.018\n4611,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.8291,0.0669,43.5981,0.007\n4625,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.7723,0.0357,43.4086,0.021\n4644,35.859375,-4.630479,171.270769,-58.580806,1,0.4138,0.4027,0.0331,41.7017,0.022\n4645,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.6546,0.1405,42.9690,0.009\n4653,150.292969,2.686724,236.427488,42.541447,1,nan,0.9818,0.0169,44.0509,0.016\n4673,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.5303,0.0066,42.4145,0.024\n4677,53.085938,-27.784405,223.525509,-54.460748,1,0.3093,0.1395,0.2071,39.0948,0.007\n4695,349.966217,-62.696659,319.542989,-51.376556,1,0.6195,0.6168,0.0156,42.8120,0.021\n4707,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.8457,0.0478,43.6511,0.009\n4712,0.190678,-45.783966,327.956322,-68.803772,1,0.1206,0.5167,0.0880,42.3468,0.005\n4713,358.665253,-45.783966,330.353593,-68.203652,1,0.8042,0.7677,0.0483,43.3928,0.009\n4719,0.965665,-46.375080,325.845907,-68.579427,1,0.3619,0.3727,0.0303,41.5035,0.007\n4720,359.805206,-46.768478,327.135979,-67.829903,1,0.5530,0.5247,0.1218,42.3871,0.011\n4724,352.398651,-62.696659,318.017427,-51.967966,1,0.5390,0.4826,0.1275,42.1692,0.020\n4738,35.332031,-5.979157,172.286722,-59.931743,1,nan,1.0768,0.0572,44.2988,0.022\n4739,51.855469,-26.276812,220.627031,-55.293792,1,nan,0.7661,0.0148,43.3871,0.014\n4750,0.965665,-46.375080,325.845907,-68.579427,1,0.4822,0.2376,0.3459,40.3769,0.007\n4759,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.7922,0.2472,43.4764,0.010\n4760,350.230255,-61.943836,320.053946,-52.070537,1,nan,1.1776,0.0799,44.5394,0.017\n4762,51.679688,-27.447618,222.618229,-55.642263,1,nan,1.0371,0.2322,44.1980,0.010\n4768,349.285706,-62.884678,319.786163,-51.046461,1,nan,1.0012,0.0457,44.1033,0.018\n4776,150.117188,2.238686,236.784618,42.139082,1,0.4602,0.4465,0.0429,41.9677,0.016\n4812,151.171875,1.342993,238.602520,42.464379,1,nan,0.8278,0.0161,43.5940,0.026\n4822,149.589844,3.583322,234.885369,42.474696,1,nan,0.8752,0.0381,43.7427,0.024\n4824,346.655182,-63.260487,320.952196,-50.040935,1,0.4116,0.3961,1.0457,41.6592,0.019\n4825,150.468750,3.732834,235.392208,43.283244,1,nan,0.8466,0.0165,43.6540,0.020\n4830,1.666667,-44.399834,327.519190,-70.529554,1,nan,0.8267,0.0561,43.5904,0.009\n4833,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.6975,0.0469,43.1373,0.011\n4834,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.6570,0.0487,42.9789,0.008\n4840,151.171875,2.537361,237.288526,43.169764,1,nan,0.9905,0.0337,44.0747,0.024\n4844,34.277344,-5.679190,170.314930,-60.410322,1,nan,1.0144,0.1816,44.1385,0.020\n4853,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.8895,0.1178,43.7862,0.017\n4854,0.949367,-45.586655,326.991548,-69.251686,1,nan,0.7034,0.0178,43.1597,0.013\n4864,151.171875,1.342993,238.602520,42.464379,1,0.1623,0.1825,0.0221,39.7354,0.026\n4866,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.6113,0.0972,42.7883,0.019\n4903,151.171875,2.238686,237.619933,42.994783,1,nan,0.4125,0.0223,41.7635,0.024\n4910,1.723404,-45.981140,325.117958,-69.180825,1,nan,0.6558,0.0453,42.9739,0.010\n4934,52.031250,-26.443335,220.963669,-55.168557,1,nan,1.2051,0.0888,44.6015,0.014\n4937,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.5072,0.0257,42.2985,0.010\n4948,151.171875,2.537361,237.288526,43.169764,1,nan,1.0614,0.0814,44.2604,0.024\n4986,32.871094,-4.780192,166.959493,-60.615132,1,0.5554,0.4904,0.4394,42.2108,0.017\n4990,149.414062,1.940072,236.565366,41.393323,1,nan,0.6203,0.0205,42.8268,0.018\n4994,150.468750,1.641510,237.714575,42.075234,1,nan,0.8252,0.0451,43.5856,0.017\n5024,352.711273,-63.823658,316.922299,-51.059403,1,0.3343,0.5197,0.1503,42.3618,0.024\n5034,346.276581,-64.011238,320.448031,-49.344136,1,nan,0.2533,0.8554,40.5348,0.019\n5057,150.468750,3.732834,235.392208,43.283244,1,0.8187,0.7582,0.0593,43.3595,0.020\n5061,150.292969,2.686724,236.427488,42.541447,1,0.4812,0.4838,0.0111,42.1754,0.016\n5084,52.207031,-26.610098,221.298836,-55.042928,1,nan,1.0849,0.0363,44.3190,0.014\n5088,349.160583,-64.760857,318.219706,-49.458924,1,0.7677,0.7828,0.0261,43.4447,0.020\n5094,2.071130,-45.191612,325.606223,-69.989264,1,nan,0.3655,0.0077,41.4542,0.011\n5099,1.753247,-46.768478,324.030235,-68.498041,1,nan,0.3400,0.2409,41.2708,0.014\n5135,33.925781,-5.979157,170.179895,-60.866303,1,0.7377,0.7425,0.0053,43.3035,0.022\n5145,150.468750,1.641510,237.714575,42.075234,1,nan,1.1364,0.0385,44.4437,0.017\n5153,149.414062,2.238686,236.239766,41.565558,1,0.7765,0.7695,0.0155,43.3989,0.017\n5158,359.415588,-46.768478,327.729895,-67.686097,1,0.4004,0.4266,0.0302,41.8498,0.009\n5162,52.558594,-27.279613,222.538937,-54.845107,1,nan,0.5872,0.0094,42.6823,0.008\n5166,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7222,0.0331,43.2299,0.008\n5176,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.8147,0.0386,43.5513,0.009\n5204,33.574219,-4.780192,168.064587,-60.175886,1,0.4879,0.5068,0.0211,42.2965,0.019\n5210,33.574219,-4.780192,168.064587,-60.175886,1,0.4971,0.4954,0.0328,42.2371,0.019\n5217,53.261719,-27.615883,223.280041,-54.281374,1,nan,1.6018,0.0726,45.3649,0.006\n5234,51.328125,-27.784405,223.130589,-55.999499,1,0.7616,0.8177,0.0589,43.5609,0.013\n5236,35.332031,-5.979157,172.286722,-59.931743,1,0.3651,0.4588,0.1365,42.0379,0.022\n5249,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.9021,0.0339,43.8239,0.018\n5264,347.617462,-62.508568,321.121462,-50.904708,1,nan,0.6893,0.0103,43.1059,0.019\n5278,53.789062,-27.784405,223.685697,-53.845803,1,0.4394,0.3893,0.1926,41.6151,0.009\n5280,351.299988,-62.320400,319.038597,-52.026867,1,nan,1.2309,0.2137,44.6583,0.018\n5283,347.013428,-62.508568,321.472056,-50.735330,1,nan,0.6296,0.1513,42.8661,0.018\n5286,347.846710,-64.760857,318.929827,-49.143596,1,0.5384,0.4991,0.1472,42.2564,0.019\n5307,2.457983,-45.389202,324.632685,-69.945696,1,0.4932,0.4967,0.0234,42.2437,0.011\n5313,349.891296,-64.573555,317.972107,-49.786192,1,0.3924,0.4004,0.3110,41.6868,0.023\n5315,359.805206,-46.768478,327.135979,-67.829903,1,nan,0.7107,0.0354,43.1872,0.011\n5317,33.574219,-5.379379,168.838090,-60.637536,1,0.2383,0.2830,0.1436,40.8096,0.017\n5319,148.710938,2.836105,235.050801,41.328739,1,nan,0.5440,0.0154,42.4816,0.031\n5322,0.929752,-44.597992,328.531426,-70.083244,1,nan,1.0052,0.0257,44.1143,0.011\n5335,348.908447,-63.823658,319.169886,-50.176186,1,0.2108,0.2283,0.1361,40.2787,0.018\n5338,152.050781,3.284369,237.157374,44.318466,1,nan,0.8439,0.0429,43.6452,0.019\n5354,34.980469,-6.279288,172.180075,-60.389399,1,0.2692,0.3663,0.1994,41.4598,0.023\n5365,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.9173,0.0221,43.8686,0.018\n5370,358.636353,-46.768478,328.890146,-67.388837,1,0.7437,0.6297,0.0526,42.8666,0.008\n5372,348.908447,-63.823658,319.169886,-50.176186,1,0.5219,0.5300,0.0576,42.4134,0.018\n5386,34.453125,-5.229529,169.987075,-59.956185,1,nan,0.6316,0.0102,42.8746,0.019\n5403,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.9827,0.0233,44.0533,0.009\n5412,151.347656,4.181528,235.568369,44.259942,1,0.3435,0.3761,0.2378,41.5268,0.016\n5417,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.7730,0.0296,43.4109,0.017\n5419,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.5571,0.0159,42.5440,0.008\n5422,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.1195,0.1674,44.4033,0.005\n5433,53.085938,-28.122234,224.100909,-54.509752,1,nan,1.1700,0.1749,44.5221,0.007\n5453,150.117188,3.732834,235.120533,42.993809,1,nan,0.5035,0.3843,42.2794,0.020\n5473,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.5390,0.0204,42.4575,0.008\n5478,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.9349,0.0204,43.9195,0.018\n5520,152.050781,2.985506,237.495952,44.143927,1,0.2992,0.2421,0.0411,40.4233,0.019\n5521,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.9909,0.0426,44.0758,0.013\n5535,53.261719,-27.615883,223.280041,-54.281374,1,nan,0.9054,0.0151,43.8337,0.006\n5543,33.398438,-3.732834,166.492280,-59.466614,1,nan,0.8296,0.1240,43.5996,0.022\n5550,151.171875,1.342993,238.602520,42.464379,1,nan,0.5898,0.0156,42.6937,0.026\n5562,351.734680,-62.884678,318.284128,-51.651217,1,0.3430,0.4407,0.1538,41.9338,0.019\n5564,351.321442,-64.198746,317.458993,-50.429931,1,nan,0.6240,0.0197,42.8426,0.023\n5565,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.7586,0.0376,43.3607,0.011\n5566,347.013428,-62.508568,321.472056,-50.735330,1,nan,0.9684,0.0328,44.0142,0.018\n5569,33.574219,-5.079716,168.448505,-60.407218,1,0.2696,0.2658,0.6279,40.6536,0.016\n5570,347.861847,-61.943836,321.519104,-51.424048,1,nan,0.6613,0.0352,42.9962,0.017\n5576,0.589520,-47.161343,325.385896,-67.769893,1,nan,1.4255,0.1860,45.0525,0.009\n5591,350.230255,-61.943836,320.053946,-52.070537,1,nan,0.7811,0.0143,43.4387,0.017\n5623,1.753247,-46.768478,324.030235,-68.498041,1,0.3915,0.4107,1.0343,41.7522,0.014\n5635,359.415588,-46.768478,327.729895,-67.686097,1,nan,1.0468,0.1246,44.2231,0.009\n5655,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.4730,0.2564,45.1403,0.014\n5656,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.9835,0.0168,44.0555,0.011\n5661,35.683594,-5.379379,171.992947,-59.253501,1,nan,0.2668,0.2983,40.6628,0.020\n5696,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.8087,0.0795,43.5316,0.018\n5704,350.230255,-61.943836,320.053946,-52.070537,1,nan,0.5054,0.0636,42.2892,0.017\n5723,351.321442,-64.198746,317.458993,-50.429931,1,nan,0.8582,0.1911,43.6902,0.023\n5729,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.3930,0.0637,44.9906,0.007\n5758,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.9711,0.0522,44.0216,0.010\n5761,150.468750,1.641510,237.714575,42.075234,1,nan,0.9866,0.2871,44.0641,0.017\n5763,349.891296,-64.573555,317.972107,-49.786192,1,nan,1.0138,0.2387,44.1370,0.023\n5794,351.299988,-62.320400,319.038597,-52.026867,1,nan,1.3563,0.0554,44.9189,0.018\n5798,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.8854,0.0513,43.7737,0.013\n5801,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.8968,0.0208,43.8080,0.006\n5817,34.277344,-5.079716,169.526841,-59.956640,1,0.5298,0.5217,0.0200,42.3721,0.019\n5834,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.8078,0.0389,43.5284,0.020\n5844,2.097458,-45.783966,324.737840,-69.478613,1,nan,1.1469,0.0339,44.4683,0.011\n5852,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.8192,0.0304,43.5659,0.005\n5864,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.3871,0.1040,44.9791,0.009\n5877,151.347656,4.181528,235.568369,44.259942,1,0.3096,0.3166,0.7338,41.0910,0.016\n5881,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.5213,0.0134,42.3702,0.018\n5895,348.908447,-63.823658,319.169886,-50.176186,1,nan,0.8744,0.0299,43.7403,0.018\n5911,33.750000,-4.630479,168.146242,-59.949072,1,0.2117,0.5363,0.4038,42.4441,0.019\n5922,1.694561,-45.191612,326.278557,-69.858253,1,0.1845,0.2240,0.3763,40.2324,0.011\n5924,51.855469,-28.630989,224.733260,-55.649872,1,0.7987,0.8068,0.0181,43.5253,0.009\n5930,149.589844,3.583322,234.885369,42.474696,1,nan,1.3364,0.0830,44.8793,0.024\n5937,2.457983,-45.389202,324.632685,-69.945696,1,nan,1.2012,0.0366,44.5926,0.011\n5954,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.1290,0.0618,44.4261,0.019\n5956,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.8904,0.0380,43.7889,0.009\n5957,150.468750,3.732834,235.392208,43.283244,1,nan,0.7308,0.0756,43.2613,0.020\n5978,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.5152,0.0154,42.3393,0.019\n5982,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.4710,0.0617,42.1060,0.022\n5995,359.058563,-45.191612,330.695783,-68.844915,1,nan,1.5541,0.1950,45.2840,0.011\n6000,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.2101,0.0361,44.6126,0.006\n6001,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.8961,0.0542,43.8060,0.008\n6003,349.891296,-64.573555,317.972107,-49.786192,1,0.6069,0.5395,0.0894,42.4600,0.023\n6017,151.347656,4.181528,235.568369,44.259942,1,nan,0.5050,0.5080,42.2869,0.016\n6021,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.9940,0.0435,44.0841,0.008\n6022,352.711273,-63.823658,316.922299,-51.059403,1,nan,0.2460,0.2146,40.4626,0.024\n6023,348.595886,-63.072620,320.023289,-50.713060,1,nan,0.4699,0.0341,42.0998,0.021\n6028,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.7000,0.0060,43.1467,0.009\n6030,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7964,0.0420,43.4906,0.008\n6076,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.2728,0.0758,44.7483,0.005\n6087,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.8998,0.0507,43.8170,0.010\n6091,359.058563,-45.191612,330.695783,-68.844915,1,0.1812,0.1971,0.0181,39.9205,0.011\n6092,348.595886,-63.072620,320.023289,-50.713060,1,nan,1.2574,0.2139,44.7155,0.021\n6096,351.953644,-62.132156,318.777388,-52.347124,1,nan,1.1012,0.0271,44.3592,0.019\n6106,150.117188,2.836105,236.124718,42.483719,1,nan,0.5103,0.0193,42.3145,0.016\n6119,1.708861,-45.586655,325.688716,-69.520253,1,0.3975,0.4588,0.5979,42.0377,0.011\n6120,346.655182,-63.260487,320.952196,-50.040935,1,nan,1.1674,0.4165,44.5161,0.019\n6126,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.7902,0.0393,43.4696,0.007\n6135,348.586945,-64.573555,318.693903,-49.477869,1,0.8477,0.8451,0.1229,43.6492,0.018\n6151,150.292969,2.686724,236.427488,42.541447,1,nan,0.8598,0.0520,43.6952,0.016\n6162,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.3977,0.0957,44.9997,0.011\n6164,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.5785,0.3260,45.3258,0.023\n6173,150.117188,2.836105,236.124718,42.483719,1,0.3881,0.3821,0.0142,41.5673,0.016\n6179,348.586945,-64.573555,318.693903,-49.477869,1,0.5991,0.5030,0.0291,42.2767,0.018\n6187,349.891296,-64.573555,317.972107,-49.786192,1,nan,1.1064,0.0831,44.3719,0.023\n6191,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.2328,0.0166,40.3267,0.006\n6197,151.523438,3.134927,236.900695,43.803170,1,nan,1.0306,0.0174,44.1812,0.019\n6205,33.574219,-6.579593,170.455585,-61.548219,1,0.5161,0.5873,0.0270,42.6825,0.021\n6211,0.949367,-45.586655,326.991548,-69.251686,1,0.5940,0.5967,0.0095,42.7247,0.013\n6217,35.683594,-5.379379,171.992947,-59.253501,1,nan,0.6309,0.0313,42.8717,0.020\n6223,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.2937,0.0805,40.9020,0.017\n6287,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.6517,0.0263,42.9572,0.009\n6289,52.207031,-26.610098,221.298836,-55.042928,1,0.2186,0.3975,0.2967,41.6682,0.014\n6293,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.8837,0.0179,43.7686,0.022\n6297,53.964844,-28.630989,225.142950,-53.813613,1,0.5674,0.6449,0.0377,42.9295,0.009\n6308,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.9701,0.0203,44.0187,0.021\n6313,349.966217,-62.696659,319.542989,-51.376556,1,nan,0.3825,0.0215,41.5700,0.021\n6343,33.398438,-4.331149,167.226341,-59.936551,1,nan,1.0126,0.0334,44.1339,0.018\n6345,0.190678,-45.783966,327.956322,-68.803772,1,nan,1.0255,0.0178,44.1678,0.005\n6347,149.414062,3.433834,234.919132,42.245550,1,nan,0.2243,0.0261,40.2360,0.027\n6350,33.574219,-5.379379,168.838090,-60.637536,1,nan,1.2142,0.1253,44.6216,0.017\n6352,150.996094,4.181528,235.291975,43.970869,1,nan,1.4079,0.0591,45.0191,0.015\n6354,34.804688,-5.829153,171.307861,-60.174401,1,0.3701,0.4966,0.3999,42.2433,0.023\n6368,351.299988,-62.320400,319.038597,-52.026867,1,nan,0.9252,0.0390,43.8918,0.018\n6369,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.5996,0.0136,42.7374,0.017\n6372,359.805206,-46.768478,327.135979,-67.829903,1,0.7964,0.7850,0.0422,43.4520,0.011\n6376,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.7511,0.0307,43.3345,0.018\n6378,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.6002,0.0209,42.7397,0.016\n6390,0.589520,-47.161343,325.385896,-67.769893,1,0.5421,0.4998,0.2593,42.2602,0.009\n6391,34.101562,-5.829153,170.247753,-60.638325,1,nan,1.1253,0.1662,44.4173,0.019\n6402,51.679688,-27.447618,222.618229,-55.642263,1,0.4580,0.4038,0.0453,41.7087,0.010\n6405,34.804688,-5.829153,171.307861,-60.174401,1,nan,1.3943,0.2658,44.9931,0.023\n6436,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.4164,0.3756,41.7878,0.006\n6437,33.925781,-5.979157,170.179895,-60.866303,1,0.4279,0.4344,0.0322,41.8968,0.022\n6450,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.7339,0.0231,43.2726,0.005\n6458,352.711273,-63.823658,316.922299,-51.059403,1,1.1032,1.0971,0.1861,44.3491,0.024\n6460,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.3954,0.0255,41.6549,0.010\n6470,358.636353,-46.768478,328.890146,-67.388837,1,nan,1.0362,0.0170,44.1958,0.008\n6474,51.855469,-27.953188,223.543603,-55.561470,1,nan,0.7091,0.1441,43.1812,0.008\n6498,348.586945,-64.573555,318.693903,-49.477869,1,nan,1.1153,0.1670,44.3933,0.018\n6514,2.071130,-45.191612,325.606223,-69.989264,1,0.3119,0.3565,0.4554,41.3906,0.011\n6515,34.804688,-5.829153,171.307861,-60.174401,1,nan,1.1225,0.0927,44.4106,0.023\n6537,152.050781,2.985506,237.495952,44.143927,1,nan,1.2730,0.1522,44.7487,0.019\n6546,53.085938,-28.122234,224.100909,-54.509752,1,0.2509,0.3826,0.2720,41.5706,0.007\n6548,148.710938,2.836105,235.050801,41.328739,1,nan,1.1286,0.1579,44.4252,0.031\n6555,151.699219,3.583322,236.533224,44.205648,1,0.2053,0.2236,0.0177,40.2274,0.016\n6560,53.613281,-27.953188,223.929533,-54.024772,1,nan,0.7824,0.0662,43.4432,0.007\n6567,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.7820,0.0137,43.4419,0.017\n6568,51.679688,-27.447618,222.618229,-55.642263,1,nan,1.0385,0.0242,44.2017,0.010\n6575,33.398438,-3.732834,166.492280,-59.466614,1,0.5197,0.5580,0.0155,42.5483,0.022\n6598,0.589520,-47.161343,325.385896,-67.769893,1,nan,0.7918,0.0238,43.4750,0.009\n6606,151.523438,3.134927,236.900695,43.803170,1,nan,0.5715,0.0474,42.6108,0.019\n6620,351.259003,-64.386185,317.344860,-50.255113,1,nan,0.9861,0.0153,44.0628,0.020\n6623,33.750000,-4.630479,168.146242,-59.949072,1,nan,0.8838,0.0439,43.7691,0.019\n6633,349.891296,-64.573555,317.972107,-49.786192,1,0.5312,0.5073,0.1220,42.2989,0.023\n6654,150.468750,3.732834,235.392208,43.283244,1,0.8058,0.8088,0.0111,43.5318,0.020\n6655,0.929752,-44.597992,328.531426,-70.083244,1,0.6631,0.6768,0.0337,43.0574,0.011\n6663,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.4449,0.3362,41.9584,0.008\n6673,150.820312,3.732834,235.666318,43.572109,1,0.5028,0.5238,0.0312,42.3823,0.016\n6682,352.711273,-63.823658,316.922299,-51.059403,1,0.2960,0.5328,0.7393,42.4271,0.024\n6692,1.694561,-45.191612,326.278557,-69.858253,1,nan,0.4504,0.0611,41.9897,0.011\n6704,348.586945,-64.573555,318.693903,-49.477869,1,0.3131,0.2939,0.0390,40.9044,0.018\n6723,349.160583,-64.760857,318.219706,-49.458924,1,0.2401,0.2742,0.1134,40.7312,0.020\n6729,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.9192,0.0320,43.8743,0.011\n6742,349.615387,-63.636005,318.927246,-50.506542,1,nan,1.7240,0.2208,45.5614,0.018\n6756,359.816315,-44.003082,331.451340,-70.123054,1,nan,1.0236,0.0293,44.1629,0.013\n6814,152.050781,3.284369,237.157374,44.318466,1,0.6160,0.6226,0.6151,42.8365,0.019\n6821,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.3834,0.0765,41.5757,0.011\n6827,358.636353,-46.768478,328.890146,-67.388837,1,nan,1.1052,0.1737,44.3688,0.008\n6833,150.996094,2.388015,237.313912,42.939977,1,nan,1.4382,0.0724,45.0762,0.021\n6837,32.871094,-4.780192,166.959493,-60.615132,1,nan,0.8994,0.0420,43.8158,0.017\n6847,33.574219,-5.079716,168.448505,-60.407218,1,nan,0.6653,0.0303,43.0120,0.016\n6855,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.9313,0.0387,43.9092,0.020\n6856,1.363636,-46.768478,324.669342,-68.371416,1,0.3068,0.2935,0.0090,40.9003,0.008\n6876,150.644531,3.583322,235.698235,43.342784,1,0.3246,0.3216,0.4661,41.1300,0.018\n6878,32.871094,-4.780192,166.959493,-60.615132,1,0.5919,0.5848,0.1811,42.6713,0.017\n6884,52.910156,-27.279613,222.625192,-54.536648,1,0.2612,0.2825,1.1744,40.8053,0.007\n6897,359.814819,-44.399834,330.775011,-69.801007,1,nan,0.6130,0.0239,42.7954,0.009\n6907,151.171875,1.342993,238.602520,42.464379,1,0.4791,0.5900,0.3591,42.6946,0.026\n6911,0.929752,-44.597992,328.531426,-70.083244,1,0.3125,0.3006,0.0066,40.9604,0.011\n6919,346.655182,-63.260487,320.952196,-50.040935,1,nan,1.6301,0.2474,45.4117,0.019\n6927,151.171875,2.537361,237.288526,43.169764,1,nan,0.5389,0.2528,42.4568,0.024\n6930,52.207031,-26.610098,221.298836,-55.042928,1,0.6478,0.5687,0.2240,42.5980,0.014\n6932,52.910156,-27.953188,223.774083,-54.639214,1,nan,1.3042,0.0324,44.8137,0.007\n6937,0.965665,-46.375080,325.845907,-68.579427,1,nan,0.8820,0.0370,43.7636,0.007\n6949,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.8422,0.0195,43.6399,0.020\n6951,0.589520,-47.161343,325.385896,-67.769893,1,0.5372,0.5224,0.0077,42.3758,0.009\n6952,150.117188,2.238686,236.784618,42.139082,1,nan,1.7307,0.2954,45.5719,0.016\n6964,359.816315,-44.003082,331.451340,-70.123054,1,nan,1.1443,0.2764,44.4622,0.013\n6982,358.312500,-44.993881,332.185785,-68.685906,1,0.3227,0.3189,0.0263,41.1089,0.009\n6988,150.996094,2.985506,236.647967,43.287350,1,nan,0.8042,0.0261,43.5165,0.020\n7013,34.277344,-5.679190,170.314930,-60.410322,1,nan,0.5236,0.0416,42.3817,0.020\n7017,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.3210,0.0095,41.1251,0.020\n7021,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.9959,0.0260,44.0892,0.009\n7032,51.328125,-27.784405,223.130589,-55.999499,1,nan,0.8584,0.0214,43.6910,0.013\n7055,34.277344,-5.679190,170.314930,-60.410322,1,nan,0.9347,0.0439,43.9190,0.020\n7060,52.207031,-28.291550,224.208534,-55.300157,1,nan,0.6635,0.0179,43.0048,0.007\n7065,52.910156,-26.276812,220.926149,-54.363918,1,nan,1.5901,0.1813,45.3453,0.008\n7129,51.328125,-27.447618,222.535046,-55.950727,1,nan,1.1651,0.0362,44.5107,0.013\n7140,150.820312,3.134927,236.341348,43.230123,1,nan,0.9363,0.0122,43.9237,0.016\n7161,1.363636,-46.768478,324.669342,-68.371416,1,0.3784,0.3433,0.1675,41.2947,0.008\n7162,52.910156,-27.953188,223.774083,-54.639214,1,0.3593,0.4691,0.9910,42.0956,0.007\n7163,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.1234,0.1962,44.4129,0.007\n7172,151.171875,1.342993,238.602520,42.464379,1,nan,0.6339,0.0129,42.8842,0.026\n7175,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.0926,0.1310,44.3382,0.019\n7224,53.964844,-28.630989,225.142950,-53.813613,1,nan,1.2376,0.0743,44.6730,0.009\n7226,52.207031,-26.610098,221.298836,-55.042928,1,0.3847,0.3273,0.8615,41.1744,0.014\n7233,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.2494,0.2608,44.6985,0.020\n7241,150.820312,3.732834,235.666318,43.572109,1,nan,0.7200,0.0438,43.2218,0.016\n7246,151.347656,4.181528,235.568369,44.259942,1,nan,0.6039,0.0147,42.7560,0.016\n7265,151.171875,2.238686,237.619933,42.994783,1,nan,0.7073,0.0647,43.1743,0.024\n7275,1.708861,-45.586655,325.688716,-69.520253,1,0.4268,0.4094,0.0089,41.7441,0.011\n7282,51.855469,-28.630989,224.733260,-55.649872,1,nan,1.4386,0.0998,45.0769,0.009\n7292,33.574219,-5.379379,168.838090,-60.637536,1,nan,0.7780,0.0880,43.4282,0.017\n7297,0.574468,-45.981140,327.041068,-68.778764,1,0.5290,0.5535,0.4055,42.5268,0.006\n7326,351.299988,-62.320400,319.038597,-52.026867,1,0.2394,0.2364,0.0161,40.3644,0.018\n7344,33.398438,-4.331149,167.226341,-59.936551,1,0.5794,0.5951,0.0111,42.7176,0.018\n7378,347.846710,-64.760857,318.929827,-49.143596,1,nan,1.0318,0.1438,44.1843,0.019\n7381,52.031250,-26.443335,220.963669,-55.168557,1,0.3373,0.5151,0.2782,42.3389,0.014\n7385,52.910156,-26.276812,220.926149,-54.363918,1,0.7536,0.7669,0.0410,43.3898,0.008\n7389,359.446716,-44.201530,331.730015,-69.805709,1,0.6180,0.6621,0.1010,42.9994,0.010\n7447,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.6276,0.0506,42.8577,0.005\n7451,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.7431,0.0243,43.3058,0.019\n7464,346.500000,-62.320400,321.951129,-50.736054,1,0.5846,0.6343,0.0145,42.8857,0.020\n7479,359.058563,-45.191612,330.695783,-68.844915,1,0.3899,0.3252,0.7307,41.1585,0.011\n7496,0.190678,-45.783966,327.956322,-68.803772,1,nan,0.6583,0.0121,42.9840,0.005\n7508,34.980469,-6.279288,172.180075,-60.389399,1,nan,1.1198,0.0531,44.4042,0.023\n7515,150.820312,3.134927,236.341348,43.230123,1,nan,1.0715,0.0116,44.2857,0.016\n7535,33.222656,-4.780192,167.515653,-60.396584,1,nan,1.4866,0.1394,45.1649,0.018\n7556,53.085938,-27.784405,223.525509,-54.460748,1,0.4003,0.4870,0.4078,42.1925,0.007\n7590,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.7702,0.0277,43.4013,0.010\n7593,53.085938,-27.784405,223.525509,-54.460748,1,nan,1.1395,0.0880,44.4511,0.007\n7596,358.312500,-44.993881,332.185785,-68.685906,1,nan,1.0214,0.2333,44.1572,0.009\n7597,152.050781,2.985506,237.495952,44.143927,1,0.7873,0.8649,0.0311,43.7112,0.019\n7599,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.6460,0.0576,42.9341,0.020\n7616,151.347656,3.583322,236.252362,43.918627,1,nan,0.6020,0.0567,42.7478,0.015\n7651,2.097458,-45.783966,324.737840,-69.478613,1,0.4739,0.4618,0.0214,42.0550,0.011\n7655,150.117188,2.836105,236.124718,42.483719,1,nan,0.7487,0.0442,43.3259,0.016\n7657,352.398651,-62.696659,318.017427,-51.967966,1,nan,1.1347,0.0316,44.4397,0.020\n7667,53.613281,-27.953188,223.929533,-54.024772,1,nan,0.6882,0.0180,43.1017,0.007\n7693,349.615387,-63.636005,318.927246,-50.506542,1,0.5467,0.5108,0.1636,42.3168,0.018\n7705,347.013428,-62.508568,321.472056,-50.735330,1,0.6672,0.6780,0.0682,43.0621,0.018\n7723,33.398438,-4.331149,167.226341,-59.936551,1,nan,0.7023,0.0297,43.1555,0.018\n7764,349.429535,-62.508568,320.039643,-51.393745,1,nan,0.5974,0.0081,42.7277,0.020\n7771,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.5341,0.0966,42.4333,0.017\n7775,349.615387,-63.636005,318.927246,-50.506542,1,0.3416,0.3512,0.0280,41.3526,0.018\n7788,346.130127,-63.072620,321.423103,-50.042305,1,nan,1.3320,0.0596,44.8704,0.020\n7793,33.925781,-5.979157,170.179895,-60.866303,1,nan,1.1112,0.0841,44.3835,0.022\n7806,152.050781,2.985506,237.495952,44.143927,1,0.1438,1.9587,0.6082,45.9018,0.019\n7809,358.648071,-46.375080,329.462659,-67.716008,1,nan,0.6914,0.0120,43.1142,0.009\n7820,148.710938,2.836105,235.050801,41.328739,1,0.3550,0.2846,0.0753,40.8234,0.031\n7830,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.6696,0.0570,43.0291,0.011\n7858,1.363636,-46.768478,324.669342,-68.371416,1,0.4673,0.4927,0.0100,42.2231,0.008\n7867,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.6117,0.0088,42.7899,0.010\n7873,150.468750,1.641510,237.714575,42.075234,1,0.5708,0.6019,0.0233,42.7474,0.017\n7920,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.8342,0.0228,43.6144,0.009\n7928,151.171875,1.342993,238.602520,42.464379,1,nan,0.8590,0.0235,43.6927,0.026\n7931,33.574219,-6.579593,170.455585,-61.548219,1,nan,0.5225,0.4053,42.3762,0.021\n7940,52.910156,-25.944481,220.366350,-54.301439,1,0.6969,0.7104,0.0284,43.1859,0.010\n7942,0.190678,-45.783966,327.956322,-68.803772,1,0.2814,0.3115,0.5654,41.0497,0.005\n7957,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.9375,0.0504,43.9271,0.011\n7977,33.398438,-3.732834,166.492280,-59.466614,1,nan,1.0780,0.0578,44.3020,0.022\n7980,358.312500,-44.993881,332.185785,-68.685906,1,nan,1.2456,0.1373,44.6902,0.009\n7982,346.276581,-64.011238,320.448031,-49.344136,1,0.4520,0.4716,0.0369,42.1091,0.019\n7983,0.949367,-45.586655,326.991548,-69.251686,1,nan,1.0778,0.0319,44.3015,0.013\n7984,34.453125,-5.229529,169.987075,-59.956185,1,nan,1.4448,0.2401,45.0886,0.019\n7990,1.723404,-45.981140,325.117958,-69.180825,1,nan,0.8379,0.0368,43.6263,0.010\n8019,33.574219,-6.579593,170.455585,-61.548219,1,0.2036,0.2032,0.0104,39.9944,0.021\n8034,52.207031,-26.610098,221.298836,-55.042928,1,nan,1.5348,0.1021,45.2506,0.014\n8036,51.328125,-27.447618,222.535046,-55.950727,1,0.3381,0.5336,0.3006,42.4311,0.013\n8037,0.189873,-45.586655,328.254458,-68.969298,1,nan,0.8038,0.0994,43.5152,0.007\n8054,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.8669,0.0250,43.7173,0.010\n8057,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.7677,0.0225,43.3927,0.011\n8058,52.031250,-26.443335,220.963669,-55.168557,1,nan,1.0909,0.0215,44.3339,0.014\n8063,150.468750,3.732834,235.392208,43.283244,1,nan,1.0023,0.2042,44.1065,0.020\n8094,149.414062,2.238686,236.239766,41.565558,1,nan,0.5647,0.0343,42.5793,0.017\n8102,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.1741,0.0373,44.5314,0.011\n8129,359.814819,-44.399834,330.775011,-69.801007,1,0.4569,0.4721,0.2974,42.1119,0.009\n8135,0.190678,-45.783966,327.956322,-68.803772,1,0.2615,0.2935,0.0129,40.9008,0.005\n8153,350.230255,-61.943836,320.053946,-52.070537,1,0.2924,0.3076,0.0101,41.0184,0.017\n8165,33.750000,-4.630479,168.146242,-59.949072,1,nan,1.0355,0.1353,44.1940,0.019\n8166,150.292969,2.686724,236.427488,42.541447,1,nan,1.2423,0.1331,44.6832,0.016\n8179,52.207031,-28.630989,224.800211,-55.343637,1,nan,0.6201,0.0376,42.8258,0.009\n8181,0.965665,-46.375080,325.845907,-68.579427,1,0.6159,0.6147,0.0327,42.8026,0.007\n8182,348.586945,-64.573555,318.693903,-49.477869,1,nan,1.0126,0.0872,44.1337,0.018\n8196,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.6223,0.0130,42.8354,0.007\n8197,33.750000,-4.630479,168.146242,-59.949072,1,nan,0.8678,0.0526,43.7202,0.019\n8201,53.261719,-27.615883,223.280041,-54.281374,1,nan,1.0299,0.0450,44.1792,0.006\n8242,150.468750,1.641510,237.714575,42.075234,1,nan,0.9643,0.0352,44.0026,0.017\n8250,51.855469,-27.953188,223.543603,-55.561470,1,nan,1.3103,0.1902,44.8263,0.008\n8252,359.446716,-44.201530,331.730015,-69.805709,1,nan,0.8891,0.1482,43.7851,0.010\n8263,32.695312,-4.929937,166.868469,-60.841230,1,0.7023,0.7653,0.0398,43.3844,0.018\n8271,53.964844,-28.630989,225.142950,-53.813613,1,nan,1.2974,0.0994,44.7998,0.009\n8283,349.160583,-64.760857,318.219706,-49.458924,1,nan,1.1567,0.0332,44.4912,0.020\n8284,35.859375,-4.630479,171.270769,-58.580806,1,0.4045,0.4502,0.0352,41.9889,0.022\n8315,152.050781,3.284369,237.157374,44.318466,1,0.7654,0.7712,0.0372,43.4049,0.019\n8322,150.117188,2.836105,236.124718,42.483719,1,0.0718,0.0803,0.1025,37.8083,0.016\n8329,52.031250,-26.443335,220.963669,-55.168557,1,nan,1.5343,0.1326,45.2496,0.014\n8331,33.574219,-5.079716,168.448505,-60.407218,1,0.6347,0.6283,0.0176,42.8605,0.016\n8332,35.859375,-4.630479,171.270769,-58.580806,1,0.5166,0.5551,0.0652,42.5347,0.022\n8345,359.058563,-45.191612,330.695783,-68.844915,1,0.5651,0.5479,0.0133,42.5004,0.011\n8361,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.9078,0.0244,43.8409,0.009\n8386,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.9630,0.0958,43.9990,0.010\n8388,33.750000,-4.630479,168.146242,-59.949072,1,nan,1.0142,0.0893,44.1381,0.019\n8399,1.363636,-46.768478,324.669342,-68.371416,1,0.6489,0.6460,0.0193,42.9339,0.008\n8401,152.050781,2.985506,237.495952,44.143927,1,0.5695,0.5583,0.0070,42.5495,0.019\n8404,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.9683,0.0275,44.0136,0.010\n8413,349.966217,-62.696659,319.542989,-51.376556,1,nan,0.9296,0.2134,43.9042,0.021\n8420,359.811707,-45.191612,329.485675,-69.150905,1,nan,1.2904,0.0367,44.7852,0.010\n8430,51.855469,-26.276812,220.627031,-55.293792,1,nan,1.3057,0.0606,44.8168,0.014\n8441,32.695312,-4.929937,166.868469,-60.841230,1,nan,0.6746,0.0423,43.0487,0.018\n8442,351.953644,-62.132156,318.777388,-52.347124,1,nan,0.0000,0.0000,nan,0.019\n8455,151.347656,3.583322,236.252362,43.918627,1,nan,1.0052,0.0667,44.1141,0.015\n8463,150.644531,3.583322,235.698235,43.342784,1,nan,0.9599,0.0304,43.9903,0.018\n8469,348.586945,-64.573555,318.693903,-49.477869,1,nan,0.7521,0.0183,43.3379,0.018\n8487,53.613281,-26.944359,222.237403,-53.863858,1,nan,0.6150,0.0095,42.8042,0.009\n8492,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.9894,0.1246,44.0715,0.006\n8512,52.558594,-27.279613,222.538937,-54.845107,1,0.8384,0.8562,0.0216,43.6841,0.008\n8513,33.574219,-5.079716,168.448505,-60.407218,1,0.3943,0.4243,0.0320,41.8362,0.016\n8521,33.750000,-4.630479,168.146242,-59.949072,1,0.4536,0.4521,0.0212,41.9996,0.019\n8534,349.285706,-62.884678,319.786163,-51.046461,1,nan,0.8932,0.0144,43.7973,0.018\n8548,151.347656,3.583322,236.252362,43.918627,1,0.4318,0.5090,0.0311,42.3075,0.015\n8549,358.636353,-46.768478,328.890146,-67.388837,1,nan,0.7646,0.0436,43.3818,0.008\n8553,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.4066,1.0187,41.7265,0.021\n8565,359.058563,-45.191612,330.695783,-68.844915,1,0.6011,0.5779,0.1187,42.6400,0.011\n8592,53.613281,-27.953188,223.929533,-54.024772,1,nan,0.8871,0.0176,43.7788,0.007\n8619,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.4192,0.1271,45.0406,0.009\n8625,351.734680,-62.884678,318.284128,-51.651217,1,0.3067,0.3063,0.0081,41.0077,0.019\n8644,52.207031,-28.630989,224.800211,-55.343637,1,0.6866,0.7308,0.0554,43.2613,0.009\n8646,1.694561,-45.191612,326.278557,-69.858253,1,nan,1.3300,0.1125,44.8664,0.011\n8661,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.9744,0.0308,44.0307,0.017\n8664,52.558594,-27.279613,222.538937,-54.845107,1,nan,0.5444,0.0180,42.4836,0.008\n8665,151.171875,2.537361,237.288526,43.169764,1,nan,1.2262,0.0861,44.6480,0.024\n8672,35.683594,-5.379379,171.992947,-59.253501,1,nan,1.1123,0.0767,44.3861,0.020\n8674,53.789062,-27.784405,223.685697,-53.845803,1,0.9353,0.7665,0.0891,43.3883,0.009\n8682,349.046051,-61.943836,320.796530,-51.753706,1,nan,1.0543,0.0550,44.2423,0.017\n8683,52.910156,-27.953188,223.774083,-54.639214,1,0.6644,0.6624,0.0227,43.0003,0.007\n8684,51.855469,-27.953188,223.543603,-55.561470,1,nan,0.8518,0.0200,43.6702,0.008\n8691,351.734680,-62.884678,318.284128,-51.651217,1,nan,0.6565,0.0183,42.9769,0.019\n8697,351.734680,-62.884678,318.284128,-51.651217,1,nan,1.4889,0.0787,45.1691,0.019\n8701,347.812500,-63.448284,320.128971,-50.202348,1,0.2506,0.2395,0.0127,40.3969,0.021\n8702,1.753247,-46.768478,324.030235,-68.498041,1,nan,1.2922,0.0134,44.7890,0.014\n8705,150.820312,3.732834,235.666318,43.572109,1,nan,0.9669,0.0557,44.0099,0.016\n8717,52.207031,-28.291550,224.208534,-55.300157,1,nan,1.4656,0.3245,45.1269,0.007\n8724,2.071130,-45.191612,325.606223,-69.989264,1,0.2852,0.2994,0.0072,40.9502,0.011\n8730,348.586945,-64.573555,318.693903,-49.477869,1,0.4718,2.9626,1.3143,46.9933,0.018\n8734,348.586945,-64.573555,318.693903,-49.477869,1,0.4341,0.4524,0.0446,42.0014,0.018\n8738,52.910156,-26.276812,220.926149,-54.363918,1,nan,1.0394,0.1965,44.2041,0.008\n8739,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.1723,0.2076,44.5273,0.009\n8767,150.820312,3.134927,236.341348,43.230123,1,nan,0.6681,0.0088,43.0230,0.016\n8803,2.097458,-45.783966,324.737840,-69.478613,1,nan,1.0540,0.2342,44.2414,0.011\n8805,0.965665,-46.375080,325.845907,-68.579427,1,nan,1.1091,0.1745,44.3785,0.007\n8806,352.398651,-62.696659,318.017427,-51.967966,1,nan,1.3680,0.2494,44.9420,0.020\n8812,151.347656,4.181528,235.568369,44.259942,1,nan,1.2164,0.1126,44.6264,0.016\n8816,149.414062,3.433834,234.919132,42.245550,1,nan,0.8846,0.0380,43.7713,0.027\n8821,35.859375,-4.630479,171.270769,-58.580806,1,nan,0.7988,0.0369,43.4987,0.022\n8837,351.299988,-62.320400,319.038597,-52.026867,1,nan,0.5320,0.0097,42.4234,0.018\n8850,346.562500,-63.448284,320.824720,-49.866957,1,nan,1.0271,0.0826,44.1720,0.021\n8857,150.117188,2.238686,236.784618,42.139082,1,nan,1.0907,0.1270,44.3334,0.016\n8861,351.321442,-64.198746,317.458993,-50.429931,1,nan,0.3476,1.1210,41.3264,0.023\n8871,32.695312,-4.929937,166.868469,-60.841230,1,nan,1.8695,0.2279,45.7776,0.018\n8883,359.446716,-44.201530,331.730015,-69.805709,1,nan,1.3015,0.2137,44.8082,0.010\n8902,151.171875,2.238686,237.619933,42.994783,1,nan,0.4347,0.0093,41.8984,0.024\n8904,347.812500,-63.448284,320.128971,-50.202348,1,nan,1.1116,0.0337,44.3843,0.021\n8917,34.101562,-5.829153,170.247753,-60.638325,1,0.5976,0.5969,0.0088,42.7253,0.019\n8919,51.679688,-27.447618,222.618229,-55.642263,1,nan,0.9152,0.0404,43.8626,0.010\n8921,150.644531,3.583322,235.698235,43.342784,1,0.5296,0.5248,0.0115,42.3874,0.018\n8933,349.046051,-61.943836,320.796530,-51.753706,1,nan,0.0000,0.0000,nan,0.017\n8947,33.574219,-5.379379,168.838090,-60.637536,1,0.3757,2.9135,0.9540,46.9496,0.017\n8962,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.2366,0.1848,44.6708,0.011\n8978,348.529419,-61.755440,321.293980,-51.763351,1,nan,0.5523,0.1275,42.5212,0.016\n8983,53.085938,-28.122234,224.100909,-54.509752,1,0.4014,0.3910,0.0331,41.6261,0.007\n8987,0.574468,-45.981140,327.041068,-68.778764,1,nan,1.0629,0.0133,44.2641,0.006\n9033,0.190678,-45.783966,327.956322,-68.803772,1,0.3132,0.2593,0.0293,40.5923,0.005\n9041,347.617462,-62.508568,321.121462,-50.904708,1,nan,1.0424,0.0183,44.2118,0.019\n9051,358.665253,-45.783966,330.353593,-68.203652,1,nan,0.9828,0.0093,44.0536,0.009\n9053,1.708861,-45.586655,325.688716,-69.520253,1,nan,0.5661,0.0926,42.5860,0.011\n9080,33.925781,-5.979157,170.179895,-60.866303,1,nan,0.8653,0.0487,43.7124,0.022\n9084,52.910156,-26.276812,220.926149,-54.363918,1,nan,0.8458,0.0301,43.6512,0.008\n9099,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.5841,0.0141,42.6684,0.020\n9107,150.468750,1.641510,237.714575,42.075234,1,0.4687,0.5719,0.2556,42.6127,0.017\n9110,0.929752,-44.597992,328.531426,-70.083244,1,0.2531,0.2978,0.0321,40.9371,0.011\n9115,53.261719,-27.615883,223.280041,-54.281374,1,nan,0.5434,0.6581,42.4785,0.006\n9124,34.277344,-5.679190,170.314930,-60.410322,1,nan,1.5382,0.2744,45.2564,0.020\n9138,352.132874,-63.636005,317.424173,-51.095855,1,0.2703,0.2829,0.0150,40.8088,0.021\n9165,349.429535,-62.508568,320.039643,-51.393745,1,nan,1.1058,0.0670,44.3703,0.020\n9167,346.130127,-63.072620,321.423103,-50.042305,1,nan,1.1829,0.0376,44.5516,0.020\n9170,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.5880,0.0130,42.6856,0.020\n9187,35.859375,-4.630479,171.270769,-58.580806,1,nan,0.3500,0.0275,41.3440,0.022\n9197,2.071130,-45.191612,325.606223,-69.989264,1,nan,1.3572,0.2236,44.9207,0.011\n9209,150.644531,3.583322,235.698235,43.342784,1,nan,0.5872,0.0397,42.6820,0.018\n9210,359.415588,-46.768478,327.729895,-67.686097,1,nan,1.2507,0.0994,44.7013,0.009\n9216,359.811707,-45.191612,329.485675,-69.150905,1,0.9399,1.5923,0.1936,45.3491,0.010\n9220,52.207031,-28.630989,224.800211,-55.343637,1,nan,0.6692,0.0244,43.0276,0.009\n9229,52.910156,-27.953188,223.774083,-54.639214,1,0.3075,0.3138,0.0076,41.0683,0.007\n9237,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.5799,0.2003,42.6494,0.011\n9277,149.414062,1.940072,236.565366,41.393323,1,nan,0.7257,0.0452,43.2426,0.018\n9293,151.171875,2.238686,237.619933,42.994783,1,0.5639,0.5209,0.5082,42.3682,0.024\n9302,351.321442,-64.198746,317.458993,-50.429931,1,nan,1.3310,0.0582,44.8683,0.023\n9303,34.453125,-5.229529,169.987075,-59.956185,1,nan,1.0260,0.0163,44.1691,0.019\n9316,33.574219,-5.379379,168.838090,-60.637536,1,nan,0.6163,0.0159,42.8095,0.017\n9322,33.574219,-5.079716,168.448505,-60.407218,1,nan,1.3730,0.1469,44.9518,0.016\n9346,351.321442,-64.198746,317.458993,-50.429931,1,nan,1.3099,0.0995,44.8254,0.023\n9362,53.437500,-29.142223,225.908120,-54.336118,1,nan,0.2692,0.0170,40.6858,0.008\n9365,34.277344,-5.679190,170.314930,-60.410322,1,0.2935,0.2933,0.0217,40.8992,0.020\n9374,53.085938,-27.111860,222.384291,-54.355086,1,nan,0.7189,0.0162,43.2179,0.007\n9380,346.655182,-63.260487,320.952196,-50.040935,1,nan,0.7849,0.0428,43.4518,0.019\n9408,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.9664,0.0147,44.0085,0.020\n9423,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.5558,0.0329,42.5379,0.023\n9444,349.046051,-61.943836,320.796530,-51.753706,1,nan,1.1578,0.0579,44.4939,0.017\n9451,351.382965,-64.011238,317.574052,-50.604657,1,nan,0.9855,0.0096,44.0612,0.023\n9461,358.665253,-45.783966,330.353593,-68.203652,1,nan,1.1498,0.0929,44.4752,0.009\n9468,51.855469,-26.276812,220.627031,-55.293792,1,nan,0.7969,0.0460,43.4923,0.014\n9469,32.871094,-4.780192,166.959493,-60.615132,1,0.8180,0.7700,0.0220,43.4006,0.017\n9482,53.085938,-28.122234,224.100909,-54.509752,1,nan,0.7314,0.0368,43.2634,0.007\n9497,51.328125,-27.784405,223.130589,-55.999499,1,0.4664,0.4234,0.0109,41.8306,0.013\n9507,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.7375,0.0178,43.2857,0.010\n9550,346.130127,-63.072620,321.423103,-50.042305,1,0.6995,0.7070,0.0097,43.1732,0.020\n9554,51.679688,-27.447618,222.618229,-55.642263,1,0.6646,0.6800,0.0238,43.0701,0.010\n9562,349.160583,-64.760857,318.219706,-49.458924,1,nan,0.3891,0.0546,41.6136,0.020\n9566,52.910156,-25.944481,220.366350,-54.301439,1,nan,0.8552,0.0311,43.6808,0.010\n9588,51.855469,-28.630989,224.733260,-55.649872,1,nan,0.8782,0.0321,43.7520,0.009\n9597,53.964844,-28.630989,225.142950,-53.813613,1,nan,0.8452,0.2344,43.6493,0.009\n9603,349.046051,-61.943836,320.796530,-51.753706,1,nan,1.6743,0.3261,45.4832,0.017\n9626,150.644531,3.583322,235.698235,43.342784,1,nan,1.0665,0.0241,44.2730,0.018\n9652,53.613281,-28.630989,225.073365,-54.119461,1,nan,0.6624,0.0296,43.0006,0.006\n9662,53.613281,-26.944359,222.237403,-53.863858,1,nan,1.4334,0.0635,45.0673,0.009\n9676,1.753247,-46.768478,324.030235,-68.498041,1,0.6764,0.7221,0.0336,43.2295,0.014\n9678,151.523438,3.134927,236.900695,43.803170,1,nan,1.1508,0.7055,44.4777,0.019\n9699,2.097458,-45.783966,324.737840,-69.478613,1,nan,0.6400,0.1023,42.9095,0.011\n9705,352.711273,-63.823658,316.922299,-51.059403,1,0.5977,0.5685,0.0384,42.5972,0.024\n9720,351.734680,-62.884678,318.284128,-51.651217,1,0.6071,0.5353,0.0256,42.4395,0.019\n9725,350.230255,-61.943836,320.053946,-52.070537,1,nan,1.2775,0.0723,44.7581,0.017\n9726,358.312500,-44.993881,332.185785,-68.685906,1,nan,0.7580,0.0243,43.3588,0.009\n9761,52.207031,-28.291550,224.208534,-55.300157,1,nan,1.4229,0.1718,45.0476,0.007\n9772,53.613281,-26.944359,222.237403,-53.863858,1,nan,0.5526,0.0238,42.5224,0.009\n9776,351.953644,-62.132156,318.777388,-52.347124,1,0.4229,0.4114,0.0259,41.7568,0.019\n9782,149.589844,3.583322,234.885369,42.474696,1,nan,1.1776,0.1586,44.5394,0.024\n9799,346.276581,-64.011238,320.448031,-49.344136,1,nan,0.9878,0.3118,44.0673,0.019\n9806,0.965665,-46.375080,325.845907,-68.579427,1,0.4619,0.5359,0.0762,42.4424,0.007\n9809,347.812500,-63.448284,320.128971,-50.202348,1,nan,0.6301,0.0342,42.8682,0.021\n9839,53.613281,-27.953188,223.929533,-54.024772,1,nan,1.5628,0.1334,45.2989,0.007\n9864,0.929752,-44.597992,328.531426,-70.083244,1,nan,0.7718,0.0734,43.4067,0.011\n9865,150.117188,3.732834,235.120533,42.993809,1,0.4651,2.3854,0.8502,46.4240,0.020\n9868,54.667969,-27.615883,223.610785,-53.050840,1,nan,0.7611,0.0247,43.3696,0.009\n9884,51.328125,-27.447618,222.535046,-55.950727,1,0.6206,0.6001,0.0161,42.7395,0.013\n9893,33.750000,-4.630479,168.146242,-59.949072,1,0.4883,0.5053,0.0956,42.2888,0.019\n9916,349.891296,-64.573555,317.972107,-49.786192,1,nan,0.6122,0.0358,42.7920,0.023\n9924,349.285706,-62.884678,319.786163,-51.046461,1,0.7986,0.8711,0.0680,43.7303,0.018\n9950,51.328125,-27.447618,222.535046,-55.950727,1,nan,0.5462,0.0962,42.4921,0.013\n9951,346.276581,-64.011238,320.448031,-49.344136,1,0.8417,0.7427,0.0547,43.3044,0.019\n9961,53.789062,-27.784405,223.685697,-53.845803,1,nan,1.0492,0.2680,44.2292,0.009\n9967,150.292969,2.686724,236.427488,42.541447,1,nan,1.4395,0.2623,45.0786,0.016\n9989,346.130127,-63.072620,321.423103,-50.042305,1,nan,0.9517,0.2190,43.9673,0.020\n9998,2.457983,-45.389202,324.632685,-69.945696,1,nan,0.5127,0.0376,42.3265,0.011\n10021,52.207031,-28.630989,224.800211,-55.343637,1,nan,1.4337,0.0978,45.0679,0.009\n10056,52.558594,-27.279613,222.538937,-54.845107,1,0.4511,0.3564,0.0517,41.3900,0.008\n10071,0.949367,-45.586655,326.991548,-69.251686,1,0.3219,0.4590,0.0597,42.0387,0.013\n10102,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.7450,0.0248,43.3128,0.011\n10103,359.805206,-46.768478,327.135979,-67.829903,1,nan,0.8130,0.0375,43.5455,0.011\n10110,54.667969,-27.615883,223.610785,-53.050840,1,0.7638,0.8007,0.0367,43.5050,0.009\n10119,351.953644,-62.132156,318.777388,-52.347124,1,0.6209,0.6434,0.0311,42.9234,0.019\n10133,352.398651,-62.696659,318.017427,-51.967966,1,nan,0.9030,0.0216,43.8266,0.020\n10139,350.230255,-61.943836,320.053946,-52.070537,1,nan,1.3210,0.0562,44.8481,0.017\n10146,346.500000,-62.320400,321.951129,-50.736054,1,nan,0.5522,0.0066,42.5205,0.020\n10150,359.814819,-44.399834,330.775011,-69.801007,1,0.4395,2.7379,0.4044,46.7868,0.009\n10155,0.589520,-47.161343,325.385896,-67.769893,1,nan,1.0100,0.2301,44.1270,0.009\n10180,359.811707,-45.191612,329.485675,-69.150905,1,nan,0.5829,0.0112,42.6631,0.010\n10213,359.058563,-45.191612,330.695783,-68.844915,1,nan,0.5634,0.0192,42.5732,0.011\n10218,52.558594,-27.279613,222.538937,-54.845107,1,0.3131,0.3414,0.0364,41.2809,0.008\n10228,151.171875,1.342993,238.602520,42.464379,1,nan,0.7524,0.0449,43.3390,0.026\n10244,53.789062,-27.784405,223.685697,-53.845803,1,nan,0.7687,0.0207,43.3962,0.009\n"
  },
  {
    "path": "examples/data/plasticc_training_set_1k.csv",
    "content": "object_id,mjd,passband,flux,flux_err,detected\n615,59750.4229,2,-544.810303,3.622952,1\n615,59750.4306,1,-816.434326,5.553370,1\n615,59750.4383,3,-471.385529,3.801213,1\n615,59750.4450,4,-388.984985,11.395031,1\n615,59752.4070,2,-681.858887,4.041204,1\n615,59752.4147,1,-1061.457031,6.472994,1\n615,59752.4224,3,-524.954590,3.552751,1\n615,59752.4334,4,-393.480225,3.599346,1\n615,59752.4435,5,-355.886780,10.421921,1\n615,59767.2968,2,-548.013550,3.462291,1\n615,59767.3045,1,-815.188599,5.293019,1\n615,59767.3122,3,-475.516052,3.340643,1\n615,59767.3233,4,-405.663818,3.496113,1\n615,59767.3343,5,-421.199066,6.377517,1\n615,59770.2179,2,-554.903198,3.927843,1\n615,59770.2256,1,-820.042786,5.875329,1\n615,59770.2334,3,-477.004730,3.736262,1\n615,59770.2445,4,-400.270386,3.834955,1\n615,59770.2557,5,-415.286896,7.435979,1\n615,59779.3188,2,-630.523682,4.333287,1\n615,59779.3265,1,-921.002502,6.306800,1\n615,59779.3342,3,-518.533997,3.915225,1\n615,59779.3452,4,-422.184509,4.089213,1\n615,59779.3562,5,-422.815094,8.124096,1\n615,59782.1897,2,-280.039520,2.819228,1\n615,59782.1974,1,-449.095612,4.028310,1\n615,59782.2051,3,-316.704865,3.491153,1\n615,59782.2162,4,-332.885437,4.021619,1\n615,59782.2274,5,-365.075775,8.514805,1\n615,59797.2861,2,391.399231,3.098059,1\n615,59797.2938,1,35.511822,3.163646,1\n615,59797.3015,3,330.623901,3.388776,1\n615,59797.3126,4,360.397858,3.980607,1\n615,59797.3237,5,369.439667,8.207490,1\n615,59800.3168,2,168.739899,3.128495,1\n615,59800.3244,1,129.541901,4.358776,1\n615,59800.3320,3,30.120724,3.396606,1\n615,59800.3429,4,-60.942333,3.704243,1\n615,59800.3539,5,-128.920334,7.495701,1\n615,59807.1738,2,-256.660980,2.781354,1\n615,59807.1815,1,-420.796417,4.037735,1\n615,59807.1892,3,-298.936859,3.041390,1\n615,59807.2003,4,-311.977783,3.318007,1\n615,59807.2114,5,-344.536072,6.367201,1\n615,59810.1045,2,-342.819763,2.808321,1\n615,59810.1122,1,-527.020325,4.204173,1\n615,59810.1200,3,-363.282532,3.237536,1\n615,59810.1311,4,-348.628662,3.774855,1\n615,59810.1422,5,-391.271271,7.657067,1\n615,59813.1044,2,-678.045715,4.032819,1\n615,59813.1122,1,-1100.440063,6.709106,1\n615,59813.1199,3,-506.687408,3.547398,1\n615,59813.1310,4,-304.049713,3.590496,1\n615,59813.1422,5,-187.285919,6.984019,1\n615,59819.1532,0,6.878784,3.633152,0\n615,59820.1047,0,39.364853,3.775619,1\n615,59821.1026,0,-10.422381,4.172683,0\n615,59822.1105,0,-65.485130,4.362876,1\n615,59823.1505,0,-113.349159,4.069051,1\n615,59835.0600,2,-54.949490,2.575779,1\n615,59835.0678,1,-178.149399,3.809858,1\n615,59835.0755,3,-140.818436,3.149077,1\n615,59835.0866,4,-200.294128,3.578005,1\n615,59835.0978,5,-263.578430,6.954262,1\n615,59839.0306,2,-639.035950,3.928531,1\n615,59839.0384,1,-953.883728,6.035410,1\n615,59839.0461,3,-518.293274,3.595869,1\n615,59839.0573,4,-418.723907,3.536483,1\n615,59839.0684,5,-418.799927,6.580595,1\n615,59842.0207,2,-502.215332,3.348443,1\n615,59842.0285,1,-1003.971497,6.256784,1\n615,59842.0362,3,-233.167755,2.872840,1\n615,59842.0473,4,111.507675,3.229112,1\n615,59842.0585,5,206.425323,6.615822,1\n615,59851.1114,0,-68.502457,3.338555,1\n615,59854.0796,2,459.452667,3.336711,1\n615,59854.0873,1,217.894211,3.332742,1\n615,59854.0950,3,361.023438,3.237847,1\n615,59854.1061,4,374.446442,3.622074,1\n615,59854.1172,5,370.346283,6.789766,1\n615,59857.0453,2,599.812195,4.121032,1\n615,59857.0531,1,646.523193,5.291624,1\n615,59857.0608,3,354.961365,3.407785,1\n615,59857.0719,4,293.879608,3.581862,1\n615,59857.0830,5,232.535995,6.761845,1\n615,59864.0162,2,-637.105347,3.818432,1\n615,59864.0239,1,-942.167908,5.916004,1\n615,59864.0316,3,-524.586548,3.538931,1\n615,59864.0428,4,-414.447723,3.635253,1\n615,59864.0539,5,-408.089233,7.119730,1\n615,59867.0178,2,-332.763123,2.872951,1\n615,59867.0255,1,-910.677734,5.852032,1\n615,59867.0332,3,-62.065010,3.073413,1\n615,59867.0443,4,202.288223,4.026751,1\n615,59867.0554,5,270.584869,10.321785,1\n615,59870.0194,2,604.344543,3.901750,1\n615,59870.0272,1,659.486694,4.991051,1\n615,59870.0349,3,373.986511,3.294667,1\n615,59870.0459,4,322.604034,3.716555,1\n615,59870.0571,5,263.481476,7.193131,1\n615,59873.0212,2,4.656033,2.130510,0\n615,59873.0289,1,-98.796974,3.034533,1\n615,59873.0366,3,-93.732880,2.499724,1\n615,59873.0477,4,-165.793457,2.906058,1\n615,59873.0588,5,-233.501724,6.087882,1\n615,59874.0599,0,-97.353195,3.133990,1\n615,59875.0311,0,-97.523880,2.963075,1\n615,59876.0231,0,-108.672577,3.449714,1\n615,59877.0238,0,-116.913223,3.097836,1\n615,59878.0246,0,-102.768921,3.135772,1\n615,59879.0248,0,-52.407089,3.261559,1\n615,59880.0258,0,55.567715,3.355268,1\n615,59884.0823,2,-274.711029,2.572093,1\n615,59884.0900,1,-437.425110,3.831595,1\n615,59884.0976,3,-310.010925,2.957125,1\n615,59884.1085,4,-317.630920,3.213168,1\n615,59884.1195,5,-351.278198,6.231324,1\n615,59887.0298,2,-491.146423,3.592675,1\n615,59887.0375,1,-743.267212,5.624708,1\n615,59887.0451,3,-449.714752,3.728483,1\n615,59887.0562,4,-393.971649,3.532816,1\n615,59887.0673,5,-406.549103,6.648589,1\n615,60118.4163,0,-107.080536,3.102513,1\n615,60124.2541,2,-588.397949,4.176047,1\n615,60124.2618,1,-878.043396,6.210247,1\n615,60124.2695,3,-495.472015,4.011444,1\n615,60124.2807,4,-417.145325,4.094360,1\n615,60124.2918,5,-413.673431,7.793959,1\n615,60140.2290,0,-88.981155,3.468430,1\n615,60141.2225,0,-50.179337,4.734193,1\n615,60142.2202,0,50.008640,4.636651,1\n615,60143.2212,0,110.753555,4.380840,1\n615,60144.2186,0,120.867218,4.103332,1\n615,60145.2123,0,111.464226,4.367030,1\n615,60153.2274,2,-322.420471,2.833071,1\n615,60153.2351,1,-917.875488,5.951387,1\n615,60153.2428,3,-52.056461,2.698249,1\n615,60153.2539,4,205.180893,3.169676,1\n615,60153.2650,5,269.709167,6.682271,1\n615,60162.1477,2,31.499735,2.753767,1\n615,60162.1554,1,-62.120552,3.818642,1\n615,60162.1631,3,-72.958771,3.515574,1\n615,60162.1742,4,-151.126511,3.838288,1\n615,60162.1853,5,-216.914032,8.832489,1\n615,60165.1369,2,-568.408875,3.553168,1\n615,60165.1446,1,-836.233154,5.405759,1\n615,60165.1524,3,-483.071381,3.387615,1\n615,60165.1635,4,-409.470642,3.541994,1\n615,60165.1746,5,-412.820221,6.907444,1\n615,60168.1260,2,-628.321350,3.804775,1\n615,60168.1337,1,-1077.347900,6.591075,1\n615,60168.1414,3,-421.859406,3.336656,1\n615,60168.1525,4,-93.729095,3.211201,1\n615,60168.1637,5,31.207939,6.615005,0\n615,60176.1332,0,-49.905262,3.874426,1\n615,60177.1370,0,-87.160583,4.078375,1\n615,60181.3147,2,-180.729568,2.498579,1\n615,60181.3223,1,-339.875153,3.626661,1\n615,60181.3299,3,-249.205673,3.280824,1\n615,60181.3409,4,-275.762329,4.694962,1\n615,60181.3518,5,-330.891327,9.580047,1\n615,60184.3625,2,-555.853943,3.979171,1\n615,60184.3701,1,-1028.441528,6.719577,1\n615,60184.3777,3,-306.200500,3.557627,1\n615,60184.3887,4,49.555847,3.627351,1\n615,60184.3996,5,154.876785,7.988054,1\n615,60194.1575,2,469.654999,3.386857,1\n615,60194.1652,1,276.757751,3.594162,1\n615,60194.1729,3,374.669556,3.437137,1\n615,60194.1839,4,374.948822,4.267094,1\n615,60194.1926,5,363.130493,12.845472,1\n615,60197.1181,2,607.786804,3.960346,1\n615,60197.1258,1,650.984314,4.970811,1\n615,60197.1335,3,365.408752,3.197298,1\n615,60197.1446,4,305.330750,3.360043,1\n615,60197.1557,5,256.966217,6.443069,1\n615,60198.1077,0,100.129280,4.266314,1\n615,60199.0914,0,86.776741,4.679742,1\n615,60200.0650,0,82.078186,4.342434,1\n615,60201.0680,0,41.947815,4.467065,1\n615,60202.0552,0,9.061676,3.831397,0\n615,60206.1107,0,-83.072884,3.130236,1\n615,60207.1469,0,108.483109,4.458607,1\n615,60208.0229,2,-672.681335,4.138056,1\n615,60208.0307,1,-1094.027588,6.717340,1\n615,60208.0384,3,-503.870422,3.665424,1\n615,60208.0495,4,-284.747498,3.666287,1\n615,60208.0606,5,-176.409851,8.308295,1\n615,60211.0124,2,208.281052,3.229784,1\n615,60211.0202,1,-370.189575,4.525907,1\n615,60211.0279,3,269.200806,3.688238,1\n615,60211.0390,4,326.272308,4.424663,1\n615,60211.0502,5,358.320099,10.152412,1\n615,60221.0153,2,-648.682312,3.906177,1\n615,60221.0230,1,-1086.777710,6.620100,1\n615,60221.0308,3,-455.588196,3.295532,1\n615,60221.0419,4,-145.305023,3.087424,1\n615,60221.0530,5,2.939076,6.798505,0\n615,60224.0140,2,491.748383,3.509416,1\n615,60224.0217,1,346.335083,3.835582,1\n615,60224.0294,3,384.185303,3.529593,1\n615,60224.0405,4,381.953735,3.885009,1\n615,60224.0516,5,378.118225,7.311360,1\n615,60227.0151,2,341.057709,2.940147,1\n615,60227.0228,1,356.632690,3.856145,1\n615,60227.0305,3,153.004929,2.847803,1\n615,60227.0416,4,52.912033,3.109148,1\n615,60227.0527,5,-19.384567,6.713308,0\n615,60228.0187,0,6.768485,4.174600,0\n615,60229.0162,0,-35.149330,4.086384,1\n615,60234.0265,0,-52.922794,3.681808,1\n615,60237.2206,2,-676.669189,4.009161,1\n615,60237.2283,1,-1098.651489,6.689435,1\n615,60237.2359,3,-511.148254,3.546333,1\n615,60237.2468,4,-347.090027,3.624199,1\n615,60237.2578,5,-240.316895,8.035271,1\n615,60240.0223,2,85.162651,2.829378,1\n615,60240.0300,1,14.526012,3.783879,0\n615,60240.0377,3,-24.350578,3.397041,1\n615,60240.0488,4,-111.062698,3.698180,1\n615,60240.0598,5,-180.234787,6.894514,1\n615,60249.0338,2,611.984558,3.908728,1\n615,60249.0415,1,660.626343,4.961018,1\n615,60249.0492,3,386.311920,3.240422,1\n615,60249.0602,4,325.401184,3.454910,1\n615,60249.0712,5,280.721069,6.623785,1\n615,60260.0423,0,108.020546,4.337497,1\n615,60261.0361,0,125.182808,3.909554,1\n615,60262.0367,0,107.649780,3.796611,1\n615,60263.0373,0,61.068066,3.877589,1\n615,60264.0465,0,-9.100937,3.518127,0\n615,60490.2647,2,-408.570984,3.169784,1\n615,60490.2725,1,-624.518799,4.704853,1\n615,60490.2802,3,-405.614258,3.513195,1\n615,60490.2913,4,-371.286377,3.985296,1\n615,60490.3024,5,-395.406128,8.139952,1\n615,60493.2372,2,-680.489441,4.065931,1\n615,60493.2450,1,-1031.102905,6.378702,1\n615,60493.2527,3,-530.644592,3.672556,1\n615,60493.2639,4,-406.733521,3.772714,1\n615,60493.2750,5,-358.876160,7.310321,1\n615,60499.2467,0,106.447296,4.481476,1\n615,60500.2437,0,67.234062,4.020935,1\n615,60501.2385,0,24.868933,4.027500,1\n615,60502.2355,0,-15.392517,4.142292,0\n615,60508.2638,2,365.607056,3.696270,1\n615,60508.2715,1,-32.986282,4.440859,0\n615,60508.2792,3,319.249847,3.828632,1\n615,60508.2903,4,360.507599,4.336362,1\n615,60508.3014,5,370.305267,8.601955,1\n615,60524.2390,0,89.070496,3.901179,1\n615,60525.1736,0,118.935989,4.737393,1\n615,60532.3489,2,510.690094,3.489832,1\n615,60532.3565,1,566.281433,4.607503,1\n615,60532.3641,3,271.663910,3.007311,1\n615,60532.3751,4,204.409866,3.353202,1\n615,60532.3860,5,128.521851,7.134325,1\n615,60535.1253,2,-664.729675,4.282414,1\n615,60535.1330,1,-1084.891113,6.952323,1\n615,60535.1408,3,-488.010925,3.928481,1\n615,60535.1519,4,-222.254257,4.034600,1\n615,60535.1630,5,-85.524307,8.625449,1\n615,60538.2351,2,113.021248,2.712380,1\n615,60538.2428,1,51.060081,3.762334,1\n615,60538.2505,3,-4.268328,2.895656,0\n615,60538.2615,4,-96.020035,3.141703,1\n615,60538.2725,5,-175.912643,6.308159,1\n615,60546.3406,2,178.624359,2.905459,1\n615,60546.3482,1,142.089966,4.065646,1\n615,60546.3558,3,41.418739,3.163731,1\n615,60546.3668,4,-52.460590,3.784039,1\n615,60546.3777,5,-112.286079,8.527776,1\n615,60549.0879,2,-629.010254,3.867215,1\n615,60549.0956,1,-1076.652100,6.604701,1\n615,60549.1034,3,-435.558533,3.417534,1\n615,60549.1145,4,-111.499573,3.424588,1\n615,60549.1256,5,30.267401,7.478198,0\n615,60554.0964,0,82.168922,4.318140,1\n615,60555.0951,0,49.886921,3.917516,1\n615,60556.0879,0,9.075453,4.103900,0\n615,60557.0831,0,-30.764908,3.555157,1\n615,60558.1093,0,-101.419899,3.653430,1\n615,60559.1097,0,-110.688477,3.426444,1\n615,60560.1065,0,-114.774445,4.013463,1\n615,60567.2821,2,-447.681580,3.368270,1\n615,60567.2897,1,-972.201111,6.347886,1\n615,60567.2973,3,-176.163651,2.960412,1\n615,60567.3083,4,140.860611,3.302721,1\n615,60567.3192,5,228.033112,6.797573,1\n615,60574.1118,2,-143.843872,2.547544,1\n615,60574.1195,1,-812.792908,5.570116,1\n615,60574.1272,3,86.606873,2.966459,1\n615,60574.1383,4,257.570221,3.575394,1\n615,60574.1493,5,302.167328,7.100554,1\n615,60577.0186,2,-425.988464,3.239578,1\n615,60577.0263,1,-963.216980,6.134610,1\n615,60577.0340,3,-148.178238,3.247297,1\n615,60577.0451,4,161.872543,4.228243,1\n615,60577.0563,5,238.576889,9.461221,1\n615,60580.0095,2,586.178345,3.996895,1\n615,60580.0173,1,655.284058,5.148244,1\n615,60580.0250,3,445.737061,3.952905,1\n615,60580.0361,4,361.595764,4.508256,1\n615,60580.0472,5,328.836731,9.460338,1\n615,60582.0840,0,-51.614189,3.517908,1\n615,60583.0169,0,20.364273,4.460314,0\n615,60584.0117,0,-24.682575,3.866380,1\n615,60585.0117,0,-63.546600,3.497667,1\n615,60586.0123,0,-101.819290,3.383004,1\n615,60587.0127,0,-110.978699,3.555624,1\n615,60588.0131,0,-113.588432,3.241369,1\n615,60593.0636,2,226.696259,2.514855,1\n615,60593.0713,1,205.029755,3.258004,1\n615,60593.0790,3,73.384720,2.584785,1\n615,60593.0901,4,-19.212976,2.899512,0\n615,60593.1012,5,-83.394951,6.073453,1\n615,60596.0304,2,-224.917938,3.388916,1\n615,60596.0381,1,-388.231476,4.931039,1\n615,60596.0458,3,-274.108429,3.720238,1\n615,60596.0569,4,-292.558990,4.188871,1\n615,60596.0680,5,-354.074280,8.392479,1\n615,60603.0208,2,404.391388,3.043772,1\n615,60603.0286,1,70.494507,3.060846,1\n615,60603.0363,3,338.994537,3.051842,1\n615,60603.0473,4,362.888550,3.381572,1\n615,60603.0584,5,378.188141,6.295821,1\n615,60606.0225,2,422.610779,3.198191,1\n615,60606.0303,1,457.502197,4.173640,1\n615,60606.0379,3,205.937546,2.957614,1\n615,60606.0490,4,123.048210,3.402847,1\n615,60606.0601,5,33.726837,7.368811,0\n615,60609.0247,2,-355.611389,2.720825,1\n615,60609.0323,1,-537.169312,4.090708,1\n615,60609.0400,3,-372.485565,2.985755,1\n615,60609.0510,4,-350.518677,3.225662,1\n615,60609.0621,5,-371.873230,6.150734,1\n615,60612.0266,0,-110.649872,2.844200,1\n615,60613.0269,0,-89.973892,2.937887,1\n615,60614.0276,0,-10.015225,3.212408,0\n615,60615.0375,0,99.438087,3.662484,1\n615,60616.0290,0,120.849113,3.776495,1\n615,60617.0295,0,121.411896,3.569777,1\n615,60621.1734,2,56.559818,2.259825,1\n615,60621.1810,1,-607.040771,4.452463,1\n615,60621.1886,3,208.770279,2.959783,1\n615,60621.1996,4,297.624725,3.718585,1\n615,60621.2105,5,332.919006,8.157172,1\n615,60624.1760,2,552.150269,3.917989,1\n615,60624.1836,1,607.047668,5.140991,1\n615,60624.1913,3,296.946533,3.475000,1\n615,60624.2022,4,235.489929,3.926538,1\n615,60624.2132,5,157.080200,8.453112,1\n713,59825.2600,2,9.110147,1.013889,0\n713,59825.2676,1,7.615042,1.160329,1\n713,59825.2752,3,6.673631,1.932316,1\n713,59825.2862,4,5.214194,3.018003,0\n713,59825.2971,5,12.060948,7.163382,0\n713,59839.2161,2,4.953065,1.196956,0\n713,59839.2236,1,3.131028,1.351706,0\n713,59839.2313,3,6.108739,1.789895,0\n713,59839.2422,4,8.283792,2.527953,0\n713,59839.2532,5,9.686500,5.882469,0\n713,59842.1987,2,6.472355,1.110572,0\n713,59842.2064,1,5.914848,1.134476,1\n713,59842.2140,3,5.311658,1.683777,0\n713,59842.2250,4,4.680908,2.349234,0\n713,59842.2359,5,6.921503,5.573885,0\n713,59851.2006,0,7.267655,2.866838,0\n713,59854.2089,2,3.945918,0.922779,0\n713,59854.2165,1,2.956484,0.953529,0\n713,59854.2242,3,4.768611,1.422675,0\n713,59854.2351,4,4.065430,1.943197,0\n713,59854.2461,5,1.290383,3.980583,0\n713,59857.1879,2,3.937931,2.088610,0\n713,59857.1956,1,-2.223347,2.861396,0\n713,59857.2032,3,-0.476698,2.280299,0\n713,59857.2141,4,8.054095,2.712542,0\n713,59857.2251,5,-9.332252,5.174713,0\n713,59867.1600,2,1.380378,1.105223,0\n713,59867.1676,1,1.569406,1.369900,0\n713,59867.1753,3,1.433712,1.730335,0\n713,59867.1862,4,5.299760,2.530279,0\n713,59867.1971,5,2.355590,5.574841,0\n713,59870.1521,2,3.404463,0.889940,0\n713,59870.1597,1,2.961649,0.946383,0\n713,59870.1673,3,2.594970,1.322884,0\n713,59870.1782,4,2.805032,1.753258,0\n713,59870.1892,5,3.101222,4.041600,0\n713,59873.1442,2,3.869869,0.763644,0\n713,59873.1519,1,4.354049,0.790372,1\n713,59873.1595,3,2.949366,1.101570,0\n713,59873.1704,4,3.117238,1.623630,0\n713,59873.1814,5,4.010789,3.832515,0\n713,59874.1612,0,4.171277,2.180456,0\n713,59875.1175,0,2.671449,2.536783,0\n713,59876.1160,0,-0.354117,2.566333,0\n713,59877.1178,0,2.332870,2.217193,0\n713,59878.1127,0,3.797837,2.270967,0\n713,59879.1104,0,4.900619,2.339577,0\n713,59880.1181,0,1.331082,1.865762,0\n713,59884.1292,2,3.091794,0.995209,0\n713,59884.1368,1,1.712878,1.129837,0\n713,59884.1444,3,2.552051,1.229161,0\n713,59884.1554,4,2.099711,1.737169,0\n713,59884.1663,5,5.788035,3.869600,0\n713,59887.0951,2,5.011691,1.668699,0\n713,59887.1027,1,3.719429,2.314904,0\n713,59887.1103,3,10.036420,1.852814,0\n713,59887.1213,4,5.964674,2.277389,0\n713,59887.1322,5,8.822542,4.767565,0\n713,59896.0839,2,5.064992,0.999215,0\n713,59896.0915,1,5.780192,1.083338,0\n713,59896.0992,3,3.587355,1.642954,1\n713,59896.1101,4,6.185760,2.277758,0\n713,59896.1211,5,-4.762829,5.042903,0\n713,59899.0854,2,4.823127,1.244829,0\n713,59899.0930,1,6.899071,1.246326,0\n713,59899.1007,3,3.249064,2.133093,0\n713,59899.1116,4,7.382133,3.388385,0\n713,59899.1226,5,-4.677240,9.115748,0\n713,59902.0445,2,9.166100,1.426165,0\n713,59902.0522,1,8.076466,1.395627,0\n713,59902.0598,3,11.330316,2.051576,1\n713,59902.0707,4,9.245844,2.876306,0\n713,59902.0817,5,0.942024,7.248375,0\n713,59904.0584,0,3.223553,2.679078,0\n713,59905.0468,0,14.509829,3.098125,0\n713,59906.0474,0,5.995616,2.589032,0\n713,59907.0480,0,5.440472,2.469325,0\n713,59908.0487,0,5.961231,3.348282,0\n713,59909.0494,0,10.137896,2.151001,0\n713,59910.0590,0,8.248549,2.160179,0\n713,59913.2446,2,5.475236,0.822163,1\n713,59913.2522,1,6.833441,0.969664,0\n713,59913.2599,3,6.275328,1.430679,0\n713,59913.2708,4,4.298039,2.311868,0\n713,59913.2818,5,3.143612,5.875287,0\n713,59916.0544,2,10.529041,1.787002,0\n713,59916.0621,1,9.129021,2.415574,0\n713,59916.0697,3,5.509865,2.141148,0\n713,59916.0806,4,9.827934,2.274502,0\n713,59916.0915,5,2.627945,4.551546,0\n713,59924.0589,2,5.190053,0.786980,1\n713,59924.0665,1,6.531730,0.851491,1\n713,59924.0742,3,9.141804,1.210878,1\n713,59924.0851,4,9.810373,1.739901,0\n713,59924.0961,5,2.349317,4.040898,0\n713,59927.0604,2,5.366942,0.863455,0\n713,59927.0680,1,4.619713,0.947374,0\n713,59927.0756,3,6.296741,1.472587,0\n713,59927.0866,4,2.465199,2.073566,0\n713,59927.0975,5,-0.702472,6.396966,0\n713,59930.0619,2,2.780317,0.759708,1\n713,59930.0695,1,4.959312,0.809846,0\n713,59930.0771,3,4.033259,1.196190,0\n713,59930.0881,4,4.485665,1.901773,0\n713,59930.1063,5,6.218721,4.791905,0\n713,59933.0632,2,1.830853,0.639458,0\n713,59933.0709,1,1.716145,0.707228,0\n713,59933.0785,3,4.893567,0.968482,0\n713,59933.0944,4,3.197614,1.429430,0\n713,59933.1150,5,3.335699,3.523145,0\n713,59935.0739,0,0.554208,1.573855,0\n713,59936.0735,0,2.584441,1.804314,0\n713,59937.0743,0,6.470248,1.848658,0\n713,59938.0754,0,0.724684,2.076312,0\n713,59939.0808,0,2.375108,2.243821,0\n713,59942.0746,2,-0.148046,0.800387,0\n713,59942.0889,1,0.648101,0.878962,0\n713,59942.0965,3,0.328905,1.080046,0\n713,59942.1074,4,-4.550706,1.669870,0\n713,59942.1184,5,2.364145,4.108390,0\n713,59945.0770,2,-3.002108,1.474453,0\n713,59945.0846,1,-1.725136,2.059556,0\n713,59945.0922,3,-1.422123,1.846779,0\n713,59945.1032,4,-6.208874,2.036851,0\n713,59945.1141,5,-2.945050,4.744831,0\n713,60192.2930,2,7.250862,1.972519,0\n713,60192.3006,1,7.834616,2.653803,0\n713,60192.3082,3,6.543319,2.216304,0\n713,60192.3192,4,7.960829,2.695718,0\n713,60192.3301,5,3.404367,5.894906,0\n713,60195.2343,2,4.326025,1.101823,0\n713,60195.2419,1,3.302556,1.100982,1\n713,60195.2496,3,5.533146,1.655807,0\n713,60195.2605,4,5.854890,2.367182,0\n713,60195.2715,5,3.825871,5.774144,0\n713,60198.2332,0,2.208139,3.192551,0\n713,60199.2358,0,8.620851,2.547614,0\n713,60200.2314,0,3.770694,2.643626,0\n713,60201.2281,0,6.634655,2.975509,0\n713,60202.2255,0,9.813441,2.750465,0\n713,60209.2281,2,3.984369,1.161990,0\n713,60209.2357,1,1.381281,1.139709,0\n713,60209.2433,3,1.715379,1.724909,0\n713,60209.2543,4,0.545876,2.622813,0\n713,60209.2652,5,6.504875,6.293293,0\n713,60212.2147,2,2.873843,1.790648,0\n713,60212.2223,1,1.546698,2.668681,0\n713,60212.2300,3,5.084908,2.077699,0\n713,60212.2409,4,3.087726,2.654123,0\n713,60212.2519,5,-2.272981,5.844298,0\n713,60223.1948,2,0.405613,1.246678,0\n713,60223.2024,1,1.120193,1.277229,0\n713,60223.2100,3,1.011539,1.974625,0\n713,60223.2210,4,3.507817,2.882992,0\n713,60223.2319,5,14.770886,6.656366,0\n713,60226.2721,2,1.071414,0.746168,0\n713,60226.2797,1,1.648819,0.776689,0\n713,60226.2931,3,1.727918,1.133994,0\n713,60226.3129,4,-0.916487,1.736045,0\n713,60226.3238,5,3.996732,4.304620,0\n713,60236.1862,0,4.067199,2.328237,0\n713,60238.2696,2,-0.087907,0.758784,0\n713,60238.2803,1,-0.829578,0.881391,0\n713,60238.2879,3,-0.576265,1.265385,0\n713,60238.2988,4,0.329135,1.983817,0\n713,60238.3098,5,-4.923808,4.832184,0\n713,60241.1342,2,2.333379,1.636026,0\n713,60241.1418,1,-0.293893,2.097461,0\n713,60241.1495,3,3.571144,1.816384,0\n713,60241.1604,4,1.146531,2.471305,0\n713,60241.1713,5,-7.436915,5.122927,0\n713,60260.0773,0,1.232121,3.040076,0\n713,60261.0632,0,2.412768,2.796987,0\n713,60262.0637,0,-1.678317,2.631186,0\n713,60263.0643,0,-0.390618,2.617705,0\n713,60264.0716,0,1.131548,2.522264,0\n713,60265.0867,0,-2.189290,2.832789,0\n713,60267.0443,2,-2.785007,1.335972,0\n713,60267.0519,1,-1.797494,1.524965,0\n713,60267.0595,3,-4.881196,1.920069,0\n713,60267.0705,4,3.231234,2.734247,0\n713,60267.0814,5,-12.699218,5.767424,0\n713,60270.1082,2,0.393975,2.344530,0\n713,60270.1158,1,2.370688,3.545214,0\n713,60270.1234,3,2.489378,2.970400,0\n713,60270.1344,4,-7.822262,3.554679,0\n713,60270.1453,5,-10.877887,6.829591,0\n713,60278.0525,2,-0.607012,0.969379,0\n713,60278.0601,1,-0.027766,1.022582,0\n713,60278.0677,3,2.024312,1.532588,0\n713,60278.0786,4,-2.256550,2.154194,0\n713,60278.0896,5,-1.112494,5.843420,0\n713,60281.0552,2,-1.695972,1.034572,0\n713,60281.0629,1,-1.833499,1.044974,0\n713,60281.0705,3,-0.919016,1.470630,0\n713,60281.0814,4,-1.391540,2.003621,0\n713,60281.0924,5,-3.945375,4.607381,0\n713,60284.0557,2,-2.572076,0.783297,0\n713,60284.0633,1,-2.387862,0.801296,0\n713,60284.0709,3,-5.832908,1.240340,0\n713,60284.0819,4,-1.649157,1.857165,0\n713,60284.0928,5,8.627832,4.482957,0\n713,60287.0577,2,-0.764727,0.906658,0\n713,60287.0653,1,-1.477176,0.966182,0\n713,60287.0729,3,-0.536819,1.463827,0\n713,60287.0839,4,-0.667864,2.361719,0\n713,60287.0948,5,-5.286497,6.810267,0\n713,60290.0641,0,-1.816348,2.038470,0\n713,60291.0599,0,-5.132619,1.989085,0\n713,60292.0607,0,-5.080487,1.908693,0\n713,60293.0607,0,-5.075594,1.939040,0\n713,60294.0616,0,-0.566193,1.833754,0\n713,60295.0621,0,-3.857503,2.114682,0\n713,60297.1169,2,-4.830737,0.921245,0\n713,60297.1245,1,-4.334117,1.068175,0\n713,60297.1321,3,-4.767125,1.234546,0\n713,60297.1431,4,-4.473659,1.723665,0\n713,60297.1540,5,-10.414721,4.258311,0\n713,60300.0641,2,-5.492156,1.607434,0\n713,60300.0717,1,-3.599649,2.103016,0\n713,60300.0793,3,-3.476922,1.725975,0\n713,60300.1031,4,-2.745461,2.269754,0\n713,60300.1160,5,7.738044,4.946638,0\n713,60554.2916,0,-9.100129,2.393532,0\n713,60555.2620,0,-8.218450,2.308315,1\n713,60556.2548,0,-5.576579,2.770439,0\n713,60557.2501,0,-9.173389,2.218352,0\n713,60558.2534,0,-13.083604,2.663738,0\n713,60559.2490,0,-9.237353,2.428750,0\n713,60560.2424,0,-10.050170,3.275514,0\n713,60567.2231,2,-8.265152,1.515329,0\n713,60567.2308,1,-8.954789,2.105672,1\n713,60567.2384,3,-8.418892,1.630414,0\n713,60567.2493,4,-12.286801,1.978125,1\n713,60567.2603,5,-11.054881,4.445991,0\n713,60578.2746,2,-4.951467,1.229683,0\n713,60578.2822,1,-7.403615,1.305094,0\n713,60578.2898,3,-5.050255,1.839125,1\n713,60578.3008,4,-7.385537,2.667687,0\n713,60578.3117,5,-6.356452,6.387929,0\n713,60581.1779,2,-5.760825,1.288651,1\n713,60581.1855,1,-7.428378,1.275975,0\n713,60581.1931,3,-6.902376,1.927237,0\n713,60581.2041,4,-9.594004,2.818656,0\n713,60581.2150,5,-14.211164,6.624023,0\n713,60582.2087,0,-11.829331,2.358846,0\n713,60583.1842,0,-9.363182,3.042286,0\n713,60584.1807,0,-9.220502,2.544668,0\n713,60585.1757,0,-3.587870,2.280919,0\n713,60586.1765,0,-9.129416,2.146863,0\n713,60587.1702,0,-5.876253,2.481174,0\n713,60588.1666,0,-9.116284,2.157747,0\n713,60593.1682,2,-9.569608,0.985850,1\n713,60593.1758,1,-8.809836,1.078624,1\n713,60593.1834,3,-9.553467,1.347112,1\n713,60593.1944,4,-9.193518,1.914358,1\n713,60593.2053,5,-8.280509,4.493694,0\n713,60596.1820,2,-8.760753,2.037911,0\n713,60596.1896,1,-4.396494,2.863201,0\n713,60596.1972,3,-11.907238,2.252078,1\n713,60596.2081,4,-4.786119,2.784098,0\n713,60596.2191,5,-5.489277,6.255779,0\n713,60605.1380,2,-9.696579,0.873996,1\n713,60605.1456,1,-11.159884,0.883977,1\n713,60605.1532,3,-12.394593,1.305202,1\n713,60605.1642,4,-9.511388,1.906236,0\n713,60605.1751,5,-6.906372,4.587698,0\n713,60608.1308,2,-9.163915,0.800012,1\n713,60608.1384,1,-11.715749,0.823976,1\n713,60608.1460,3,-11.449253,1.202452,0\n713,60608.1569,4,-12.221146,1.749559,1\n713,60608.1679,5,-2.633516,4.132709,0\n713,60611.1227,2,-10.067919,0.717739,1\n713,60611.1303,1,-9.289042,0.761477,1\n713,60611.1380,3,-10.801243,1.080986,1\n713,60611.1489,4,-11.623042,1.560488,1\n713,60611.1599,5,-7.861447,3.710802,0\n713,60612.1183,0,-11.605895,1.778605,1\n713,60613.1019,0,-11.340659,1.930082,1\n713,60614.0960,0,-10.934606,2.143276,1\n713,60615.0917,0,-14.735178,2.326417,0\n713,60616.0927,0,-12.353376,2.357691,1\n713,60617.0896,0,-6.599936,2.023456,0\n713,60620.1350,0,-6.110061,2.056073,0\n713,60621.1263,2,-5.537477,0.829998,1\n713,60621.1339,1,-7.972793,0.877838,1\n713,60621.1416,3,-7.565215,1.233034,1\n713,60621.1525,4,-6.638791,1.780862,0\n713,60621.1635,5,-9.333499,4.242186,0\n713,60624.0821,2,-4.490414,1.314625,0\n713,60624.0897,1,-5.545699,1.695655,0\n713,60624.0974,3,-7.286825,1.600662,0\n713,60624.1083,4,-6.478677,2.086655,0\n713,60624.1193,5,-7.099849,4.810002,0\n713,60627.2801,2,-6.219934,2.530638,0\n713,60627.2877,1,-5.039655,3.228468,0\n713,60627.2954,3,-0.950650,2.782719,0\n713,60627.3063,4,-4.143107,3.379841,0\n713,60627.3173,5,0.243241,7.639313,0\n713,60632.0400,2,-5.855491,1.492071,0\n713,60632.0476,1,-6.847743,1.453141,0\n713,60632.0552,3,-9.374930,2.217679,0\n713,60632.0662,4,-10.557325,3.237312,0\n713,60632.0771,5,-2.921649,7.857955,0\n713,60635.0469,2,-6.480945,1.091159,1\n713,60635.0545,1,-6.966879,1.087843,1\n713,60635.0621,3,-6.185159,1.629475,1\n713,60635.0731,4,-5.490345,2.377301,0\n713,60635.0840,5,-2.292507,5.646507,0\n713,60640.0504,2,-7.312206,0.974580,1\n713,60640.0580,1,-7.250492,0.991461,0\n713,60640.0656,3,-10.161006,1.479101,1\n713,60640.0766,4,-6.631466,2.145102,0\n713,60640.0875,5,-10.591419,5.138685,0\n713,60643.0609,0,-9.289350,1.992813,1\n713,60644.0533,0,-8.482151,2.118450,0\n713,60645.0537,0,-2.605739,2.197297,1\n713,60646.0548,0,-8.104684,2.135281,0\n713,60647.0546,0,-7.506279,2.275638,0\n713,60648.0553,0,-10.602926,1.838902,1\n713,60649.0561,0,-12.232555,1.708795,0\n713,60651.1265,2,-9.331477,0.865811,1\n713,60651.1451,1,-10.061421,0.932510,1\n713,60651.1527,3,-9.335849,1.315029,1\n713,60651.1637,4,-6.167844,1.952829,0\n713,60651.1746,5,-10.171921,4.815349,0\n713,60654.0597,2,-9.607999,1.647062,1\n713,60654.0673,1,-6.258916,2.222855,0\n713,60654.0749,3,-9.524345,1.901351,0\n713,60654.0859,4,-9.513783,2.389906,0\n713,60654.0968,5,-9.744430,5.404162,0\n713,60662.1451,2,-5.698765,1.334831,0\n713,60662.1527,1,-4.317381,2.017339,0\n713,60662.1603,3,-6.093997,1.675434,0\n713,60662.1713,4,-5.760686,2.093239,0\n713,60662.1822,5,-5.400730,4.721159,0\n713,60665.0637,2,-6.826318,0.676851,1\n713,60665.0713,1,-6.498077,0.740772,1\n713,60665.0789,3,-5.938825,1.048616,1\n713,60665.1017,4,-6.440791,1.543503,1\n713,60665.1156,5,-13.727009,3.770338,0\n713,60668.0647,2,-6.938087,0.920544,1\n713,60668.0723,1,-8.995543,0.954973,1\n713,60668.0893,3,-10.263328,1.437371,1\n713,60668.1055,4,-5.455149,2.164149,0\n713,60668.1165,5,-9.138229,5.354884,0\n713,60671.0655,0,-10.165054,1.726118,1\n713,60672.0693,0,-10.828177,1.470152,1\n713,60673.0745,0,-12.148479,2.243120,0\n713,60674.0798,0,-8.669188,2.216094,0\n730,59798.3205,2,1.177371,1.364300,0\n730,59798.3281,1,2.320849,1.159247,0\n730,59798.3357,3,2.939447,1.771328,0\n730,59798.3466,4,2.128097,2.610659,0\n730,59798.3576,5,-12.809639,5.380097,0\n730,59801.3553,2,0.111235,2.460576,0\n730,59801.3629,1,-3.393080,3.564052,0\n730,59801.3705,3,-1.899219,2.292693,0\n730,59801.3815,4,2.284906,2.523534,0\n730,59801.3924,5,5.203419,5.395980,0\n730,59818.2740,0,-2.342200,1.801066,0\n730,59819.2541,0,3.380978,2.469600,0\n730,59820.2522,0,-2.230815,1.915426,0\n730,59821.2478,0,1.159034,2.461736,0\n730,59822.2433,0,5.942166,2.901580,0\n730,59823.2659,0,-0.180970,2.714361,0\n730,59826.3105,2,0.521923,0.925337,0\n730,59826.3181,1,-1.421768,0.929596,0\n730,59826.3258,3,0.972355,1.513987,0\n730,59826.3367,4,-0.570261,2.162375,0\n730,59826.3477,5,-2.301237,5.548611,0\n730,59842.2456,2,0.156290,0.853800,0\n730,59842.2532,1,-0.567360,0.819375,0\n730,59842.2608,3,-0.251899,1.325633,0\n730,59842.2718,4,2.019500,2.173066,0\n730,59842.2827,5,10.142254,6.086383,0\n730,59851.1792,0,-1.472170,2.597541,0\n730,59854.1485,2,0.368931,1.230250,0\n730,59854.1563,1,0.664051,1.345911,0\n730,59854.1640,3,3.201455,1.909905,0\n730,59854.1750,4,3.012713,2.778862,0\n730,59854.1860,5,3.750187,5.803461,0\n730,59857.1408,2,1.076537,2.141015,0\n730,59857.1485,1,5.693109,2.937809,0\n730,59857.1563,3,-2.640246,2.100464,0\n730,59857.1673,4,0.402461,2.684283,0\n730,59857.1782,5,-4.509360,6.643411,0\n730,59867.1112,2,-0.449365,1.088300,0\n730,59867.1189,1,0.282022,0.981426,0\n730,59867.1267,3,0.026595,1.544194,0\n730,59867.1377,4,0.956947,2.364042,0\n730,59867.1487,5,2.724518,5.845339,0\n730,59870.1049,2,1.070328,0.976301,0\n730,59870.1126,1,0.511964,0.828288,0\n730,59870.1204,3,-0.505236,1.377689,0\n730,59870.1314,4,0.251195,2.289763,0\n730,59870.1424,5,4.119082,5.293428,0\n730,59873.0971,2,-0.211154,1.045822,0\n730,59873.1049,1,-1.287062,1.048773,0\n730,59873.1126,3,-1.557674,1.446841,0\n730,59873.1236,4,-0.739414,2.074561,0\n730,59873.1346,5,5.151175,4.601235,0\n730,59874.1461,0,0.412505,1.627923,0\n730,59875.0995,0,-2.200486,2.037783,0\n730,59876.0980,0,-2.931559,2.450620,0\n730,59877.0976,0,2.024089,1.789397,0\n730,59878.0964,0,-1.250103,2.029308,0\n730,59879.0895,0,-0.671039,1.877854,0\n730,59880.1017,0,0.189355,1.384724,0\n730,59884.1760,2,-0.108323,0.771566,0\n730,59884.1836,1,-1.113737,0.892852,0\n730,59884.1913,3,-0.427802,1.142666,0\n730,59884.2022,4,1.402694,1.614300,0\n730,59884.2132,5,-1.060647,4.220271,0\n730,59887.2856,2,1.474370,1.835391,0\n730,59887.2933,1,-3.203188,2.221069,0\n730,59887.3009,3,-5.435799,2.359130,0\n730,59887.3118,4,0.192088,3.154000,0\n730,59887.3228,5,-1.082339,7.966248,0\n730,59896.1307,2,1.145174,0.826742,0\n730,59896.1384,1,-0.032153,0.707979,0\n730,59896.1460,3,-0.357363,1.398256,0\n730,59896.1569,4,-2.788487,2.198583,0\n730,59896.1679,5,2.883538,5.962979,0\n730,59899.1519,2,-1.456884,1.371527,0\n730,59899.1595,1,-0.707794,1.396877,0\n730,59899.1672,3,5.298447,2.388603,0\n730,59899.1781,4,6.412822,3.720956,0\n730,59899.1891,5,1.091714,7.924479,0\n730,59902.1384,2,-0.887660,1.191683,0\n730,59902.1460,1,0.168580,1.085883,0\n730,59902.1537,3,-1.369444,1.950019,0\n730,59902.1646,4,0.303218,2.770533,0\n730,59902.1755,5,11.777126,7.044582,0\n730,59904.1053,0,-0.620050,2.301550,0\n730,59905.0555,0,0.922903,2.527480,0\n730,59906.0562,0,-1.153271,2.043133,0\n730,59907.0567,0,0.449173,1.764913,0\n730,59908.0681,0,0.837362,2.710272,0\n730,59909.0582,0,-0.985495,1.850359,0\n730,59910.0503,0,-0.355463,1.880359,0\n730,59914.0526,2,-0.833646,1.445693,0\n730,59914.0602,1,0.370377,1.970406,0\n730,59914.0678,3,-1.719942,1.692403,0\n730,59914.0788,4,1.004354,2.274112,0\n730,59914.0897,5,0.138586,5.367689,0\n730,59924.1060,2,-0.787230,1.137160,0\n730,59924.1136,1,-1.572903,1.584968,0\n730,59924.1212,3,0.555294,1.735223,0\n730,59924.1322,4,-2.475216,2.533980,0\n730,59924.1431,5,-0.816748,5.644574,0\n730,59927.1074,2,-0.474538,1.196533,0\n730,59927.1151,1,0.973025,1.142775,0\n730,59927.1227,3,-2.039601,1.598035,0\n730,59927.1336,4,-1.036243,2.305239,0\n730,59927.1446,5,-3.438392,5.903537,0\n730,59930.1236,2,-0.114812,0.945627,0\n730,59930.1312,1,0.475511,0.835235,0\n730,59930.1388,3,0.226621,1.287869,0\n730,59930.1498,4,-3.755495,2.037717,0\n730,59930.1607,5,2.542647,5.343603,0\n730,59933.1249,2,0.603719,0.695106,0\n730,59933.1325,1,-0.226574,0.698751,0\n730,59933.1401,3,0.106692,1.273440,0\n730,59933.1511,4,0.993756,1.919590,0\n730,59933.1620,5,5.318815,5.441072,0\n730,59934.0638,0,1.190260,1.159169,0\n730,59935.0646,0,-0.320948,1.132809,0\n730,59936.0642,0,-1.230814,1.533033,0\n730,59937.0650,0,-0.751357,1.654440,0\n730,59938.0647,0,-3.109122,2.015928,0\n730,59939.0650,0,1.571790,2.219707,0\n730,60165.3032,2,-0.502432,1.200698,0\n730,60165.3109,1,-2.832010,1.356671,0\n730,60165.3186,3,1.843434,1.524752,0\n730,60165.3295,4,3.196369,2.029726,0\n730,60165.3405,5,-8.889149,4.793297,0\n730,60168.2892,2,-0.176546,0.837958,0\n730,60168.2970,1,0.201754,0.793672,0\n730,60168.3047,3,0.369397,1.305260,0\n730,60168.3157,4,-2.235131,2.071596,0\n730,60168.3267,5,-5.047883,4.827778,0\n730,60176.2820,0,1.753881,2.391554,0\n730,60177.2726,0,-1.260694,2.790846,0\n730,60181.4088,2,1.103341,0.929294,0\n730,60181.4164,1,1.391831,1.272189,0\n730,60181.4232,3,-4.766650,3.249354,0\n730,60183.2660,2,-2.848838,1.924783,0\n730,60183.2736,1,-3.133007,2.955767,0\n730,60183.2812,3,2.159384,2.557742,0\n730,60183.2922,4,-5.836310,3.003132,0\n730,60183.3031,5,-8.716421,6.192660,0\n730,60195.2812,2,-0.162802,0.937681,0\n730,60195.2888,1,0.864197,1.028533,0\n730,60195.2964,3,-2.294667,1.631539,0\n730,60195.3073,4,-2.693345,2.345676,0\n730,60195.3183,5,-0.570636,6.289552,0\n730,60198.2690,0,1.026459,2.192766,0\n730,60199.2186,0,0.527036,2.176080,0\n730,60200.2139,0,-1.516695,2.611164,0\n730,60201.2072,0,0.150572,2.513518,0\n730,60202.2089,0,-1.982165,2.505382,0\n730,60209.1811,2,-0.462435,1.572888,0\n730,60209.1888,1,2.153122,1.421764,0\n730,60209.1965,3,0.765511,1.838664,0\n730,60209.2075,4,-1.591831,2.917069,0\n730,60209.2184,5,0.939232,6.712256,0\n730,60212.1675,2,-2.669531,1.944909,0\n730,60212.1753,1,3.706729,2.772595,0\n730,60212.1830,3,-1.340184,2.303333,0\n730,60212.1941,4,-3.525083,2.598093,0\n730,60212.2050,5,7.831807,5.678200,0\n730,60223.2416,2,1.328195,1.188578,0\n730,60223.2493,1,-0.298775,1.249490,0\n730,60223.2569,3,2.987647,1.656098,0\n730,60223.2678,4,4.241424,2.481011,0\n730,60223.2788,5,9.333996,5.811805,0\n730,60226.3337,2,-0.679090,0.949060,0\n730,60226.3413,1,1.282680,0.952563,0\n730,60226.3489,3,0.970081,1.800158,0\n730,60226.3599,4,1.366870,3.079182,0\n730,60226.3708,5,-4.552550,8.128254,0\n730,60238.3197,2,-0.599053,1.058323,0\n730,60238.3273,1,0.734427,1.161131,0\n730,60238.3349,3,2.553997,2.179708,0\n730,60238.3459,4,1.377842,3.840058,0\n730,60238.3568,5,-19.159811,11.281384,0\n730,60241.0870,2,0.823192,2.008905,0\n730,60241.0948,1,3.386674,3.088520,0\n730,60241.1025,3,0.043122,2.453789,0\n730,60241.1136,4,-1.052531,2.925313,0\n730,60241.1245,5,-8.036972,5.953956,0\n730,60250.1708,2,-0.962673,2.112349,0\n730,60250.1957,1,0.580816,2.829899,0\n730,60250.2034,3,6.845217,2.940232,0\n730,60250.2143,4,0.204509,3.994097,0\n730,60250.2253,5,1.290714,7.954757,0\n730,60261.1296,0,1.237353,2.094631,0\n730,60262.0550,0,3.469973,2.338792,0\n730,60263.0556,0,2.352035,1.998888,0\n730,60264.0559,0,-2.396658,2.192123,0\n730,60265.0780,0,3.070599,2.439756,0\n730,60268.0449,2,-0.465229,1.410433,0\n730,60268.0525,1,0.174091,1.861911,0\n730,60268.0601,3,0.538344,2.137292,0\n730,60268.0711,4,-3.556071,2.741589,0\n730,60268.0820,5,-3.639747,6.760314,0\n730,60278.0993,2,2.521567,1.652593,0\n730,60278.1069,1,-2.468382,2.289480,0\n730,60278.1145,3,-1.407348,2.177464,0\n730,60278.1255,4,3.475310,3.068326,0\n730,60278.1364,5,8.474236,7.658961,0\n730,60281.1023,2,-1.139811,0.801878,0\n730,60281.1099,1,-1.247972,0.800422,0\n730,60281.1175,3,-1.347594,1.374244,0\n730,60281.1285,4,-0.890039,1.996277,0\n730,60281.1394,5,2.285095,5.189152,0\n730,60284.1027,2,-0.679968,0.840813,0\n730,60284.1104,1,-0.530991,0.766401,0\n730,60284.1180,3,-1.148911,1.164351,0\n730,60284.1289,4,0.493227,1.707686,0\n730,60284.1399,5,-4.683412,4.445528,0\n730,60287.1047,2,0.288175,1.256500,0\n730,60287.1123,1,-2.067724,1.090506,0\n730,60287.1200,3,-0.846692,1.802521,0\n730,60287.1309,4,-5.051833,2.972183,0\n730,60287.1418,5,-13.252449,7.029711,0\n730,60290.0761,0,-3.000368,1.929932,0\n730,60291.0689,0,1.081815,1.681175,0\n730,60292.0699,0,2.211185,1.882060,0\n730,60293.0699,0,0.182480,1.575780,0\n730,60294.0708,0,1.605139,1.418435,0\n730,60532.3019,2,20.994711,1.047298,1\n730,60532.3097,1,1.504146,0.960956,0\n730,60532.3173,3,31.523088,1.569497,1\n730,60532.3282,4,41.159981,2.310168,1\n730,60532.3392,5,46.795868,5.458707,1\n730,60535.2802,2,20.880348,1.426747,0\n730,60535.2879,1,2.271271,1.348233,0\n730,60535.2957,3,30.361010,2.107024,1\n730,60535.3068,4,40.715591,3.043571,1\n730,60535.3177,5,47.310059,7.197146,1\n730,60538.2826,2,19.450977,1.865142,1\n730,60538.2903,1,3.462672,2.695356,0\n730,60538.2980,3,33.572102,1.944897,1\n730,60538.3089,4,38.518837,2.334413,1\n730,60538.3199,5,40.146099,5.039364,1\n730,60554.2651,0,0.190944,2.266587,0\n730,60555.2411,0,0.098122,2.049620,0\n730,60556.2370,0,-0.253067,2.551228,0\n730,60557.2322,0,-2.200897,1.848830,0\n730,60558.2332,0,-3.459960,2.511074,0\n730,60559.2274,0,0.328893,2.224590,0\n730,60560.2268,0,2.453341,3.110694,0\n730,60567.3291,2,15.044784,0.951184,1\n730,60567.3368,1,-0.142653,1.050350,0\n730,60567.3444,3,18.416132,1.262663,1\n730,60567.3553,4,28.234451,1.676854,1\n730,60567.3663,5,31.623583,4.281011,1\n730,60580.1736,2,12.164557,1.463993,1\n730,60580.1813,1,2.065962,1.402610,0\n730,60580.1889,3,10.053763,2.203885,1\n730,60580.1999,4,19.975168,3.213686,1\n730,60580.2108,5,24.093925,7.662856,0\n730,60582.1681,0,-0.473370,2.422541,0\n730,60583.1640,0,-3.070249,3.006098,0\n730,60584.1591,0,0.970706,2.362254,0\n730,60585.1601,0,-0.533032,1.881978,0\n730,60586.1564,0,-0.049936,1.830623,0\n730,60587.1540,0,-2.202578,2.138732,0\n730,60588.1461,0,1.361049,1.798501,0\n730,60593.1209,2,6.307311,1.155241,1\n730,60593.1287,1,0.462838,1.313489,0\n730,60593.1365,3,8.789671,1.545997,1\n730,60593.1476,4,10.031554,2.179338,0\n730,60593.1585,5,10.850924,5.118365,0\n730,60596.1351,2,8.231540,2.483539,0\n730,60596.1427,1,1.623348,3.539990,0\n730,60596.1504,3,9.673650,2.658536,0\n730,60596.1613,4,8.778720,3.238315,0\n730,60596.1723,5,10.870938,7.199404,0\n730,60605.0908,2,5.607800,0.980471,1\n730,60605.0986,1,-0.587054,0.918425,0\n730,60605.1063,3,6.155015,1.461859,1\n730,60605.1174,4,7.274523,2.152366,0\n730,60605.1283,5,2.950838,5.145659,0\n730,60608.0836,2,5.938226,0.889949,0\n730,60608.0913,1,0.898013,0.825604,0\n730,60608.0991,3,5.509429,1.330889,0\n730,60608.1101,4,9.166319,1.962560,0\n730,60608.1211,5,3.346682,4.667600,0\n730,60611.0756,2,2.112415,0.773398,1\n730,60611.0833,1,0.247475,0.704158,0\n730,60611.0911,3,1.898379,1.172223,0\n730,60611.1021,4,4.244992,1.745410,0\n730,60611.1130,5,6.172510,4.175368,0\n730,60612.0813,0,1.228119,1.461220,0\n730,60613.0818,0,1.540095,1.541647,0\n730,60614.0803,0,1.231758,1.758784,0\n730,60615.0761,0,-0.502854,1.998764,0\n730,60616.0769,0,-2.247711,2.024976,0\n730,60617.0737,0,-1.035569,1.574103,0\n730,60620.1444,0,-1.018565,1.722706,0\n730,60621.2673,2,2.330264,0.944892,0\n730,60621.2749,1,-0.153496,0.950369,0\n730,60621.2825,3,3.589653,1.651967,0\n730,60621.2934,4,1.950011,2.746167,0\n730,60621.3044,5,6.258384,7.527862,0\n"
  },
  {
    "path": "examples/data/plasticc_training_set_metadata_1k.csv",
    "content": "object_id,ra,decl,gal_l,gal_b,ddf,hostgal_specz,hostgal_photoz,hostgal_photoz_err,distmod,mwebv,target\n615,349.046051,-61.943836,320.796530,-51.753706,1,0.0000,0.0000,0.0000,nan,0.017,92\n713,53.085938,-27.784405,223.525509,-54.460748,1,1.8181,1.6267,0.2552,45.4063,0.007,88\n730,33.574219,-6.579593,170.455585,-61.548219,1,0.2320,0.2262,0.0157,40.2561,0.021,42\n745,0.189873,-45.586655,328.254458,-68.969298,1,0.3037,0.2813,1.1523,40.7951,0.007,90\n1124,352.711273,-63.823658,316.922299,-51.059403,1,0.1934,0.2415,0.0176,40.4166,0.024,90\n1227,35.683594,-5.379379,171.992947,-59.253501,1,0.0000,0.0000,0.0000,nan,0.020,65\n1598,347.846710,-64.760857,318.929827,-49.143596,1,0.1352,0.1820,0.0304,39.7279,0.019,90\n1632,348.595886,-63.072620,320.023289,-50.713060,1,0.6857,0.7014,0.0100,43.1524,0.021,42\n1920,149.414062,3.433834,234.919132,42.245550,1,0.3088,0.3229,0.3360,41.1401,0.027,90\n1926,149.414062,1.940072,236.565366,41.393323,1,0.0000,0.0000,0.0000,nan,0.018,65\n2072,0.965665,-46.375080,325.845907,-68.579427,1,0.1516,0.1900,0.0104,39.8317,0.007,90\n2103,346.500000,-62.320400,321.951129,-50.736054,1,0.1695,0.5409,0.2283,42.4667,0.020,42\n2300,359.446716,-44.201530,331.730015,-69.805709,1,0.2360,2.7474,0.5335,46.7959,0.010,42\n2330,359.805206,-46.768478,327.135979,-67.829903,1,0.4541,0.5736,0.2827,42.6207,0.011,90\n2624,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65\n2677,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,16\n2922,352.398651,-62.696659,318.017427,-51.967966,1,0.1539,0.1469,0.0094,39.2171,0.020,67\n3041,346.130127,-63.072620,321.423103,-50.042305,1,0.1069,0.1274,0.0198,38.8800,0.020,67\n3285,150.820312,1.641510,237.994507,42.358984,1,0.1610,0.1818,0.0079,39.7258,0.020,42\n3423,349.615387,-63.636005,318.927246,-50.506542,1,1.9876,1.1213,0.1591,44.4078,0.018,95\n3489,150.117188,2.836105,236.124718,42.483719,1,1.1330,1.4377,0.2168,45.0753,0.016,88\n3910,0.589520,-47.161343,325.385896,-67.769893,1,0.1969,2.6766,0.5926,46.7274,0.009,62\n4088,0.965665,-46.375080,325.845907,-68.579427,1,0.4833,0.4644,0.0321,42.0691,0.007,88\n4132,359.811707,-45.191612,329.485675,-69.150905,1,0.0561,0.0556,0.0301,36.9750,0.010,42\n4171,2.097458,-45.783966,324.737840,-69.478613,1,0.0000,0.0000,0.0000,nan,0.011,16\n4173,152.050781,3.284369,237.157374,44.318466,1,0.5149,0.5512,0.0221,42.5158,0.019,15\n4220,358.648071,-46.375080,329.462659,-67.716008,1,0.1197,0.1322,0.3351,38.9679,0.009,42\n4389,151.699219,3.583322,236.533224,44.205648,1,0.2333,0.2205,0.9667,40.1939,0.016,90\n4595,349.615387,-63.636005,318.927246,-50.506542,1,0.5919,0.5995,0.0127,42.7370,0.018,90\n4819,35.332031,-5.979157,172.286722,-59.931743,1,0.3053,0.2870,0.0076,40.8445,0.022,90\n5527,347.861847,-61.943836,321.519104,-51.424048,1,0.1315,0.2487,0.8604,40.4896,0.017,42\n6180,33.222656,-4.780192,167.515653,-60.396584,1,0.3201,0.2685,0.5211,40.6793,0.018,90\n6266,0.929752,-44.597992,328.531426,-70.083244,1,0.0000,0.0000,0.0000,nan,0.011,65\n6762,348.595886,-63.072620,320.023289,-50.713060,1,0.3863,0.3983,0.0132,41.6735,0.021,90\n6947,34.277344,-5.679190,170.314930,-60.410322,1,0.5680,0.5667,0.0181,42.5888,0.020,90\n7033,52.207031,-28.291550,224.208534,-55.300157,1,0.0826,0.0850,0.0073,37.9414,0.007,42\n7164,347.861847,-61.943836,321.519104,-51.424048,1,0.4299,0.4245,0.0288,41.8371,0.017,90\n7315,2.071130,-45.191612,325.606223,-69.989264,1,0.1330,0.1337,0.0171,38.9942,0.011,88\n7409,352.398651,-62.696659,318.017427,-51.967966,1,3.4451,0.5176,1.2609,42.3516,0.020,88\n7566,359.446716,-44.201530,331.730015,-69.805709,1,0.0000,0.0000,0.0000,nan,0.010,16\n7698,347.013428,-62.508568,321.472056,-50.735330,1,0.2628,0.1876,0.0216,39.8011,0.018,90\n7703,53.085938,-28.122234,224.100909,-54.509752,1,0.0830,0.0820,0.2257,37.8568,0.007,62\n7756,149.414062,2.238686,236.239766,41.565558,1,0.0000,0.0000,0.0000,nan,0.017,16\n8328,1.694561,-45.191612,326.278557,-69.858253,1,0.3779,0.4808,0.2970,42.1592,0.011,90\n8688,32.695312,-4.929937,166.868469,-60.841230,1,0.0000,0.0000,0.0000,nan,0.018,65\n8745,349.966217,-62.696659,319.542989,-51.376556,1,0.6276,0.6136,0.0129,42.7983,0.021,90\n8784,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,16\n9006,34.277344,-5.079716,169.526841,-59.956640,1,0.0000,0.0000,0.0000,nan,0.019,65\n9172,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65\n9184,0.949367,-45.586655,326.991548,-69.251686,1,1.4031,1.2719,0.4971,44.7463,0.013,88\n9203,51.855469,-27.953188,223.543603,-55.561470,1,0.2138,0.1111,0.0626,38.5591,0.008,90\n9543,352.132874,-63.636005,317.424173,-51.095855,1,0.0000,0.0000,0.0000,nan,0.021,65\n9936,32.871094,-4.780192,166.959493,-60.615132,1,0.1633,0.0719,0.0389,37.5580,0.017,42\n9985,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,65\n10321,358.312500,-44.993881,332.185785,-68.685906,1,1.0833,1.1162,0.1020,44.3954,0.009,95\n10337,54.667969,-27.615883,223.610785,-53.050840,1,0.6830,0.6725,0.0089,43.0404,0.009,90\n10349,34.980469,-6.279288,172.180075,-60.389399,1,0.0000,0.0000,0.0000,nan,0.023,65\n10478,52.910156,-27.953188,223.774083,-54.639214,1,0.5552,0.2233,0.2002,40.2248,0.007,90\n10586,358.636353,-46.768478,328.890146,-67.388837,1,0.6052,0.6017,0.0153,42.7467,0.008,88\n10757,52.910156,-26.276812,220.926149,-54.363918,1,0.1699,0.1711,0.0185,39.5801,0.008,52\n10796,52.910156,-25.944481,220.366350,-54.301439,1,0.0000,0.0000,0.0000,nan,0.010,65\n10798,351.299988,-62.320400,319.038597,-52.026867,1,0.1778,0.1872,0.0121,39.7959,0.018,42\n11165,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16\n11359,349.966217,-62.696659,319.542989,-51.376556,1,0.1529,0.1415,0.0072,39.1281,0.021,42\n11507,53.085938,-28.122234,224.100909,-54.509752,1,0.3312,0.5095,0.0718,42.3102,0.007,90\n11770,346.130127,-63.072620,321.423103,-50.042305,1,0.1415,0.2171,0.4350,40.1560,0.020,62\n11773,150.644531,3.583322,235.698235,43.342784,1,0.2207,0.5279,0.1679,42.4027,0.018,52\n11931,149.589844,3.583322,234.885369,42.474696,1,0.0000,0.0000,0.0000,nan,0.024,65\n11978,358.648071,-46.375080,329.462659,-67.716008,1,0.4920,0.4605,0.0179,42.0472,0.009,90\n12695,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,92\n12872,347.861847,-61.943836,321.519104,-51.424048,1,0.0000,0.0000,0.0000,nan,0.017,65\n13079,151.699219,3.583322,236.533224,44.205648,1,0.2019,2.4470,1.0434,46.4913,0.016,90\n13138,346.655182,-63.260487,320.952196,-50.040935,1,0.0756,0.5192,0.2158,42.3596,0.019,52\n13194,53.789062,-27.784405,223.685697,-53.845803,1,0.5195,0.5624,0.2843,42.5685,0.009,90\n13459,150.117188,2.836105,236.124718,42.483719,1,0.3495,0.3449,0.6556,41.3068,0.016,90\n13482,33.750000,-4.630479,168.146242,-59.949072,1,0.2929,0.3115,0.0205,41.0501,0.019,90\n13504,1.363636,-46.768478,324.669342,-68.371416,1,0.4469,0.3816,0.0766,41.5643,0.008,90\n14080,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,65\n14156,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,65\n14279,54.667969,-27.615883,223.610785,-53.050840,1,0.3434,0.5728,0.4518,42.6167,0.009,52\n14398,2.071130,-45.191612,325.606223,-69.989264,1,0.2812,0.2634,1.0581,40.6310,0.011,90\n14539,150.644531,3.583322,235.698235,43.342784,1,0.2882,0.2359,0.0434,40.3590,0.018,88\n14553,359.805206,-46.768478,327.135979,-67.829903,1,1.1897,1.1667,0.1717,44.5143,0.011,95\n14601,32.695312,-4.929937,166.868469,-60.841230,1,0.3837,0.3653,0.2005,41.4527,0.018,90\n14674,33.750000,-4.630479,168.146242,-59.949072,1,0.2012,0.0567,0.4176,37.0171,0.019,90\n14983,349.615387,-63.636005,318.927246,-50.506542,1,0.3391,0.3238,0.0255,41.1476,0.018,90\n15002,349.046051,-61.943836,320.796530,-51.753706,1,0.3409,0.3512,0.0531,41.3530,0.017,90\n15251,32.871094,-4.780192,166.959493,-60.615132,1,0.4653,2.3270,0.6097,46.3585,0.017,90\n15475,351.382965,-64.011238,317.574052,-50.604657,1,0.0000,0.0000,0.0000,nan,0.023,65\n15626,346.130127,-63.072620,321.423103,-50.042305,1,0.0000,0.0000,0.0000,nan,0.020,16\n15674,0.965665,-46.375080,325.845907,-68.579427,1,0.2927,0.2727,0.3286,40.7172,0.007,90\n15700,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,16\n15718,51.855469,-27.953188,223.543603,-55.561470,1,0.1193,2.3179,0.7672,46.3482,0.008,52\n15845,53.789062,-27.784405,223.685697,-53.845803,1,0.3174,0.3471,0.8216,41.3232,0.009,90\n15968,149.414062,2.238686,236.239766,41.565558,1,0.3509,0.4729,0.4544,42.1164,0.017,90\n16339,51.328125,-27.447618,222.535046,-55.950727,1,0.0000,0.0000,0.0000,nan,0.013,16\n16349,150.820312,3.134927,236.341348,43.230123,1,0.0000,0.0000,0.0000,nan,0.016,16\n16463,151.699219,3.583322,236.533224,44.205648,1,0.2023,0.1805,0.0254,39.7082,0.016,90\n16496,359.415588,-46.768478,327.729895,-67.686097,1,0.3391,0.3895,0.2635,41.6162,0.009,52\n16802,53.437500,-29.142223,225.908120,-54.336118,1,0.3145,0.3319,0.0234,41.2094,0.008,90\n16983,150.117188,3.732834,235.120533,42.993809,1,0.2899,0.2762,0.1879,40.7495,0.020,90\n17094,52.207031,-28.291550,224.208534,-55.300157,1,0.0000,0.0000,0.0000,nan,0.007,16\n17172,53.437500,-29.142223,225.908120,-54.336118,1,0.0000,0.0000,0.0000,nan,0.008,16\n17285,148.710938,2.836105,235.050801,41.328739,1,0.3073,0.3057,0.0484,41.0025,0.031,90\n17366,349.285706,-62.884678,319.786163,-51.046461,1,0.2387,0.2024,0.0247,39.9853,0.018,90\n17370,0.949367,-45.586655,326.991548,-69.251686,1,0.3138,0.3391,0.4176,41.2636,0.013,62\n17515,52.207031,-28.630989,224.800211,-55.343637,1,0.3577,0.3487,0.0073,41.3345,0.009,90\n18029,359.415588,-46.768478,327.729895,-67.686097,1,0.3525,0.3609,0.0112,41.4219,0.009,90\n18507,352.711273,-63.823658,316.922299,-51.059403,1,0.3755,0.3457,0.0230,41.3125,0.024,88\n18556,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,6\n18645,358.636353,-46.768478,328.890146,-67.388837,1,0.1640,2.3025,1.1022,46.3306,0.008,62\n18706,34.277344,-5.679190,170.314930,-60.410322,1,0.1706,0.1766,0.0158,39.6556,0.020,62\n18937,348.595886,-63.072620,320.023289,-50.713060,1,0.2142,0.2222,0.0102,40.2123,0.021,90\n18952,151.699219,3.583322,236.533224,44.205648,1,0.2800,0.2658,1.1944,40.6541,0.016,90\n19154,351.382965,-64.011238,317.574052,-50.604657,1,0.2354,2.4138,0.5022,46.4553,0.023,67\n19213,1.753247,-46.768478,324.030235,-68.498041,1,0.1254,0.1484,0.0086,39.2403,0.014,62\n19866,359.814819,-44.399834,330.775011,-69.801007,1,0.2608,0.2877,0.0235,40.8505,0.009,90\n20567,351.259003,-64.386185,317.344860,-50.255113,1,0.1549,0.1481,0.2206,39.2350,0.020,62\n20934,348.908447,-63.823658,319.169886,-50.176186,1,0.0999,2.5704,1.2137,46.6209,0.018,42\n21335,33.574219,-5.379379,168.838090,-60.637536,1,0.1542,0.2082,0.4220,40.0542,0.017,90\n22184,358.312500,-44.993881,332.185785,-68.685906,1,0.3508,0.3850,0.6064,41.5869,0.009,90\n22574,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16\n22901,151.171875,1.342993,238.602520,42.464379,1,0.2581,0.2502,0.0061,40.5039,0.026,90\n23116,53.261719,-27.615883,223.280041,-54.281374,1,0.8237,0.7520,0.0300,43.3376,0.006,15\n23127,149.414062,3.433834,234.919132,42.245550,1,0.3221,0.4025,0.7933,41.7004,0.027,52\n23299,33.222656,-4.780192,167.515653,-60.396584,1,0.5869,0.5400,0.0151,42.4624,0.018,88\n23373,150.117188,3.732834,235.120533,42.993809,1,0.5442,0.5636,0.2043,42.5744,0.020,88\n23396,359.811707,-45.191612,329.485675,-69.150905,1,0.5667,0.6192,0.1193,42.8220,0.010,90\n23409,348.595886,-63.072620,320.023289,-50.713060,1,0.1407,0.1392,0.0136,39.0882,0.021,52\n23539,34.277344,-5.079716,169.526841,-59.956640,1,0.4550,0.2524,0.3112,40.5254,0.019,95\n23795,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,65\n23822,2.457983,-45.389202,324.632685,-69.945696,1,0.2411,0.2420,0.9270,40.4218,0.011,52\n23848,33.925781,-5.979157,170.179895,-60.866303,1,0.3316,0.3185,1.0181,41.1057,0.022,90\n23857,151.699219,3.583322,236.533224,44.205648,1,0.2988,0.4769,0.0894,42.1379,0.016,90\n23931,32.695312,-4.929937,166.868469,-60.841230,1,0.6282,0.6337,0.0073,42.8832,0.018,88\n24193,152.050781,2.985506,237.495952,44.143927,1,2.0958,1.3937,0.2518,44.9919,0.019,88\n24236,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65\n24592,349.966217,-62.696659,319.542989,-51.376556,1,0.2901,0.2846,0.0249,40.8234,0.021,90\n24849,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,16\n24903,52.031250,-26.443335,220.963669,-55.168557,1,0.0000,0.0000,0.0000,nan,0.014,65\n24947,150.117188,2.238686,236.784618,42.139082,1,0.4723,0.4521,0.0193,41.9998,0.016,90\n24989,34.804688,-5.829153,171.307861,-60.174401,1,0.4468,0.4763,0.0117,42.1349,0.023,90\n25003,359.814819,-44.399834,330.775011,-69.801007,1,0.3137,0.2996,0.0218,40.9523,0.009,90\n25039,346.562500,-63.448284,320.824720,-49.866957,1,0.3161,0.2675,1.1577,40.6696,0.021,90\n25474,151.523438,3.134927,236.900695,43.803170,1,0.5236,0.5626,0.0155,42.5697,0.019,90\n25529,358.312500,-44.993881,332.185785,-68.685906,1,0.2835,0.5789,0.2180,42.6448,0.009,90\n25577,348.529419,-61.755440,321.293980,-51.763351,1,0.4028,0.3918,0.0170,41.6314,0.016,90\n25783,150.820312,3.134927,236.341348,43.230123,1,0.1040,0.1439,0.0116,39.1669,0.016,42\n25920,150.644531,3.583322,235.698235,43.342784,1,0.0000,0.0000,0.0000,nan,0.018,16\n25925,35.332031,-5.979157,172.286722,-59.931743,1,1.7327,1.7075,0.1320,45.5358,0.022,88\n26161,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,92\n26338,151.171875,2.537361,237.288526,43.169764,1,0.1892,0.2250,0.0141,40.2436,0.024,62\n26352,1.708861,-45.586655,325.688716,-69.520253,1,0.0000,0.0000,0.0000,nan,0.011,65\n26401,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,16\n26531,351.259003,-64.386185,317.344860,-50.255113,1,2.5314,2.4324,0.2792,46.4755,0.020,88\n26660,347.846710,-64.760857,318.929827,-49.143596,1,0.0000,0.0000,0.0000,nan,0.019,65\n26783,150.820312,1.641510,237.994507,42.358984,1,0.0000,0.0000,0.0000,nan,0.020,92\n27124,351.299988,-62.320400,319.038597,-52.026867,1,0.0000,0.0000,0.0000,nan,0.018,16\n27339,51.855469,-26.276812,220.627031,-55.293792,1,0.1432,0.1625,0.0226,39.4561,0.014,90\n27941,149.414062,1.940072,236.565366,41.393323,1,0.3632,0.3746,0.0319,41.5166,0.018,90\n28220,1.694561,-45.191612,326.278557,-69.858253,1,0.2985,0.3605,0.3149,41.4193,0.011,90\n28301,0.189873,-45.586655,328.254458,-68.969298,1,0.3606,0.2852,1.3620,40.8288,0.007,90\n28391,351.953644,-62.132156,318.777388,-52.347124,1,0.0000,0.0000,0.0000,nan,0.019,92\n28636,51.855469,-28.630989,224.733260,-55.649872,1,0.1743,0.4412,0.3366,41.9364,0.009,67\n28843,151.171875,2.537361,237.288526,43.169764,1,0.3664,0.3611,0.0225,41.4234,0.024,90\n28915,53.789062,-27.784405,223.685697,-53.845803,1,0.0000,0.0000,0.0000,nan,0.009,16\n29088,52.558594,-27.279613,222.538937,-54.845107,1,0.3037,0.3244,0.0203,41.1521,0.008,90\n29252,51.855469,-28.630989,224.733260,-55.649872,1,0.1439,0.1421,0.0233,39.1376,0.009,42\n29416,1.694561,-45.191612,326.278557,-69.858253,1,0.2168,0.1921,0.0349,39.8588,0.011,90\n29420,2.097458,-45.783966,324.737840,-69.478613,1,0.5849,0.5559,0.0102,42.5385,0.011,90\n29576,346.655182,-63.260487,320.952196,-50.040935,1,0.2362,2.5224,1.0484,46.5713,0.019,90\n29668,151.699219,3.583322,236.533224,44.205648,1,0.1461,0.1584,0.0175,39.3960,0.016,42\n29670,1.694561,-45.191612,326.278557,-69.858253,1,0.1135,0.1208,0.0198,38.7544,0.011,62\n30066,351.259003,-64.386185,317.344860,-50.255113,1,0.0000,0.0000,0.0000,nan,0.020,65\n30172,33.574219,-5.379379,168.838090,-60.637536,1,0.5444,0.5455,0.0094,42.4889,0.017,90\n30191,150.117188,2.238686,236.784618,42.139082,1,1.5405,1.3073,0.1521,44.8201,0.016,88\n30505,151.171875,2.238686,237.619933,42.994783,1,0.0000,0.0000,0.0000,nan,0.024,16\n30545,2.071130,-45.191612,325.606223,-69.989264,1,0.2160,0.2221,0.0338,40.2113,0.011,90\n30576,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65\n30673,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,16\n30895,349.429535,-62.508568,320.039643,-51.393745,1,0.0000,0.0000,0.0000,nan,0.020,16\n31033,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,65\n31100,1.694561,-45.191612,326.278557,-69.858253,1,0.3614,0.3471,1.2589,41.3231,0.011,90\n31310,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,92\n31569,346.655182,-63.260487,320.952196,-50.040935,1,1.0133,1.0031,0.0118,44.1084,0.019,95\n31605,150.292969,2.686724,236.427488,42.541447,1,0.0000,0.0000,0.0000,nan,0.016,65\n31824,352.398651,-62.696659,318.017427,-51.967966,1,0.1838,0.0844,0.3311,37.9246,0.020,42\n32238,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,16\n32309,34.804688,-5.829153,171.307861,-60.174401,1,0.2251,0.2258,0.9011,40.2521,0.023,42\n32375,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,65\n32695,358.636353,-46.768478,328.890146,-67.388837,1,0.7689,0.7806,0.0113,43.4371,0.008,90\n33088,351.259003,-64.386185,317.344860,-50.255113,1,0.3437,0.3495,0.0200,41.3400,0.020,90\n33179,51.855469,-27.953188,223.543603,-55.561470,1,0.4407,0.4765,0.4079,42.1357,0.008,90\n33191,151.171875,2.238686,237.619933,42.994783,1,0.4030,0.4039,0.0174,41.7094,0.024,42\n33409,33.222656,-4.780192,167.515653,-60.396584,1,0.0000,0.0000,0.0000,nan,0.018,65\n33419,150.820312,3.732834,235.666318,43.572109,1,0.7462,0.7461,0.0356,43.3165,0.016,90\n33422,33.574219,-6.579593,170.455585,-61.548219,1,1.1111,1.1054,0.0101,44.3693,0.021,88\n34012,35.683594,-5.379379,171.992947,-59.253501,1,0.0853,0.0793,0.0210,37.7805,0.020,52\n34166,0.189873,-45.586655,328.254458,-68.969298,1,0.0873,0.0909,0.0246,38.0959,0.007,42\n34243,34.101562,-5.829153,170.247753,-60.638325,1,0.1416,0.5642,0.3631,42.5771,0.019,88\n34299,346.276581,-64.011238,320.448031,-49.344136,1,0.1901,0.2016,0.0086,39.9759,0.019,62\n34437,152.050781,2.985506,237.495952,44.143927,1,0.2657,0.2629,1.0604,40.6263,0.019,67\n35197,51.679688,-27.447618,222.618229,-55.642263,1,0.2509,0.2366,0.1269,40.3670,0.010,42\n35315,150.468750,3.732834,235.392208,43.283244,1,1.8476,1.5239,0.2256,45.2314,0.020,95\n35555,359.805206,-46.768478,327.135979,-67.829903,1,0.0000,0.0000,0.0000,nan,0.011,65\n35743,34.277344,-5.679190,170.314930,-60.410322,1,0.0781,0.0752,0.0197,37.6598,0.020,42\n35772,150.117188,2.836105,236.124718,42.483719,1,0.2385,0.2588,0.0217,40.5879,0.016,90\n35855,0.929752,-44.597992,328.531426,-70.083244,1,0.3815,0.4401,0.5094,41.9300,0.011,90\n36085,352.398651,-62.696659,318.017427,-51.967966,1,0.1689,0.1759,0.5357,39.6465,0.020,42\n36153,150.468750,1.641510,237.714575,42.075234,1,0.2547,0.2589,0.0256,40.5887,0.017,52\n36337,52.558594,-27.279613,222.538937,-54.845107,1,0.0000,0.0000,0.0000,nan,0.008,65\n36362,53.085938,-27.784405,223.525509,-54.460748,1,0.0000,0.0000,0.0000,nan,0.007,65\n36671,149.589844,3.583322,234.885369,42.474696,1,0.0000,0.0000,0.0000,nan,0.024,65\n36783,349.966217,-62.696659,319.542989,-51.376556,1,0.1287,0.1431,0.0129,39.1539,0.021,90\n37149,359.816315,-44.003082,331.451340,-70.123054,1,0.9435,0.9017,0.0524,43.8228,0.013,90\n37168,53.613281,-27.953188,223.929533,-54.024772,1,0.0000,0.0000,0.0000,nan,0.007,65\n37661,32.871094,-4.780192,166.959493,-60.615132,1,0.1226,0.0973,0.0168,38.2528,0.017,52\n37776,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,16\n37865,151.171875,2.238686,237.619933,42.994783,1,0.2263,0.2221,0.0172,40.2112,0.024,90\n37872,150.820312,3.134927,236.341348,43.230123,1,0.2517,0.2448,0.0217,40.4506,0.016,67\n38174,1.694561,-45.191612,326.278557,-69.858253,1,1.6152,1.7388,0.1564,45.5843,0.011,88\n38205,33.750000,-4.630479,168.146242,-59.949072,1,0.2945,0.2311,1.2272,40.3089,0.019,42\n38244,346.655182,-63.260487,320.952196,-50.040935,1,0.0000,0.0000,0.0000,nan,0.019,65\n38690,33.222656,-4.780192,167.515653,-60.396584,1,0.1801,0.1274,0.0307,38.8795,0.018,90\n38730,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,16\n38754,33.574219,-6.579593,170.455585,-61.548219,1,0.2646,0.2656,0.0093,40.6515,0.021,90\n38899,1.666667,-44.399834,327.519190,-70.529554,1,0.4828,0.4754,0.0332,42.1297,0.009,90\n39223,150.996094,2.388015,237.313912,42.939977,1,0.0000,0.0000,0.0000,nan,0.021,65\n39305,346.562500,-63.448284,320.824720,-49.866957,1,0.4045,0.2986,0.1602,40.9435,0.021,90\n39398,51.679688,-27.447618,222.618229,-55.642263,1,0.3347,0.5555,0.5802,42.5365,0.010,90\n39597,53.085938,-28.122234,224.100909,-54.509752,1,0.1280,0.1327,0.0064,38.9763,0.007,62\n39626,149.414062,2.238686,236.239766,41.565558,1,0.5197,0.4293,0.7344,41.8662,0.017,90\n39846,351.382965,-64.011238,317.574052,-50.604657,1,0.1886,0.2780,0.6915,40.7654,0.023,62\n40290,35.859375,-4.630479,171.270769,-58.580806,1,0.3153,0.5118,0.8469,42.3221,0.022,42\n41515,358.648071,-46.375080,329.462659,-67.716008,1,0.5720,0.5797,0.0188,42.6484,0.009,90\n41738,150.117188,3.732834,235.120533,42.993809,1,0.1206,0.1277,0.0222,38.8865,0.020,42\n42118,0.574468,-45.981140,327.041068,-68.778764,1,0.1801,0.1977,0.0131,39.9287,0.006,62\n42224,51.328125,-27.784405,223.130589,-55.999499,1,0.1119,0.0888,0.1482,38.0396,0.013,42\n42288,359.415588,-46.768478,327.729895,-67.686097,1,0.3487,0.3864,0.0241,41.5962,0.009,90\n42333,346.562500,-63.448284,320.824720,-49.866957,1,0.1921,0.2046,0.0101,40.0116,0.021,67\n42469,2.071130,-45.191612,325.606223,-69.989264,1,1.5989,1.4913,0.1216,45.1735,0.011,95\n42689,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65\n42776,152.050781,3.284369,237.157374,44.318466,1,0.0000,0.0000,0.0000,nan,0.019,16\n42852,351.321442,-64.198746,317.458993,-50.429931,1,0.6771,0.6680,0.0223,43.0226,0.023,88\n43028,51.679688,-27.447618,222.618229,-55.642263,1,0.1366,0.1364,0.0092,39.0408,0.010,42\n43151,34.980469,-6.279288,172.180075,-60.389399,1,0.1096,0.1352,0.0222,39.0199,0.023,52\n43211,34.980469,-6.279288,172.180075,-60.389399,1,0.3321,0.3275,0.0164,41.1762,0.023,90\n43337,51.328125,-27.447618,222.535046,-55.950727,1,0.1775,0.2488,0.0180,40.4902,0.013,90\n43413,348.595886,-63.072620,320.023289,-50.713060,1,0.0000,0.0000,0.0000,nan,0.021,16\n43509,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,92\n43812,150.820312,3.134927,236.341348,43.230123,1,0.2783,0.2821,0.0219,40.8022,0.016,90\n43962,347.861847,-61.943836,321.519104,-51.424048,1,0.6595,0.6813,0.0340,43.0750,0.017,90\n44102,152.050781,3.284369,237.157374,44.318466,1,0.2450,0.2640,0.2125,40.6366,0.019,42\n44217,51.855469,-27.953188,223.543603,-55.561470,1,0.4288,0.1772,0.6584,39.6645,0.008,90\n44309,34.980469,-6.279288,172.180075,-60.389399,1,0.3131,0.3059,1.2366,41.0045,0.023,90\n44480,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,16\n44836,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,65\n45060,346.276581,-64.011238,320.448031,-49.344136,1,0.3613,0.3300,0.1387,41.1948,0.019,62\n45115,2.097458,-45.783966,324.737840,-69.478613,1,0.0000,0.0000,0.0000,nan,0.011,65\n45127,35.859375,-4.630479,171.270769,-58.580806,1,0.1378,0.1359,0.0125,39.0331,0.022,90\n45203,150.820312,3.134927,236.341348,43.230123,1,0.0000,0.0000,0.0000,nan,0.016,16\n45319,348.595886,-63.072620,320.023289,-50.713060,1,0.1270,0.0737,0.0272,37.6138,0.021,42\n45349,32.695312,-4.929937,166.868469,-60.841230,1,0.2821,2.6404,0.9837,46.6916,0.018,67\n45549,52.207031,-28.291550,224.208534,-55.300157,1,0.6733,0.7639,0.0513,43.3795,0.007,42\n46210,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65\n46567,149.414062,3.433834,234.919132,42.245550,1,0.0000,0.0000,0.0000,nan,0.027,16\n46804,352.398651,-62.696659,318.017427,-51.967966,1,0.2933,2.3248,0.6338,46.3561,0.020,62\n46958,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,65\n47148,53.085938,-27.784405,223.525509,-54.460748,1,0.3153,0.3234,0.0167,41.1439,0.007,90\n47725,351.259003,-64.386185,317.344860,-50.255113,1,0.3615,0.2950,0.7526,40.9133,0.020,90\n48187,52.207031,-26.610098,221.298836,-55.042928,1,0.5466,0.5379,0.1063,42.4521,0.014,90\n48260,52.207031,-28.291550,224.208534,-55.300157,1,0.1677,0.1746,0.4651,39.6283,0.007,42\n48426,351.734680,-62.884678,318.284128,-51.651217,1,0.0000,0.0000,0.0000,nan,0.019,16\n48473,51.855469,-27.953188,223.543603,-55.561470,1,0.0809,0.0825,0.0165,37.8722,0.008,42\n48575,32.695312,-4.929937,166.868469,-60.841230,1,0.7796,0.7698,0.0437,43.4000,0.018,90\n48687,347.812500,-63.448284,320.128971,-50.202348,1,0.7916,0.8371,0.0551,43.6237,0.021,88\n48725,53.613281,-28.630989,225.073365,-54.119461,1,0.3658,0.3599,0.1804,41.4145,0.006,67\n48749,348.529419,-61.755440,321.293980,-51.763351,1,1.6645,1.5782,0.0657,45.3252,0.016,88\n48817,348.586945,-64.573555,318.693903,-49.477869,1,0.0962,0.1446,0.6309,39.1788,0.018,90\n48981,358.648071,-46.375080,329.462659,-67.716008,1,0.0000,0.0000,0.0000,nan,0.009,16\n49219,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,65\n49389,349.285706,-62.884678,319.786163,-51.046461,1,0.0000,0.0000,0.0000,nan,0.018,92\n49529,53.613281,-27.953188,223.929533,-54.024772,1,0.1974,0.2117,0.0123,40.0947,0.007,62\n49783,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,65\n49937,151.347656,3.583322,236.252362,43.918627,1,0.0947,0.1091,0.0218,38.5175,0.015,52\n50277,349.046051,-61.943836,320.796530,-51.753706,1,0.3009,0.2478,1.3214,40.4807,0.017,67\n50395,349.891296,-64.573555,317.972107,-49.786192,1,0.1552,0.1574,0.3022,39.3801,0.023,90\n51178,52.558594,-27.279613,222.538937,-54.845107,1,0.9668,0.9534,0.0188,43.9721,0.008,88\n51279,1.708861,-45.586655,325.688716,-69.520253,1,0.1760,0.1824,0.3979,39.7343,0.011,90\n51318,34.277344,-5.679190,170.314930,-60.410322,1,0.2987,0.3038,0.0273,40.9868,0.020,62\n51490,0.574468,-45.981140,327.041068,-68.778764,1,0.3707,0.4717,0.3810,42.1099,0.006,90\n51987,352.711273,-63.823658,316.922299,-51.059403,1,0.0000,0.0000,0.0000,nan,0.024,92\n52150,52.207031,-26.610098,221.298836,-55.042928,1,0.2746,0.3069,0.0656,41.0122,0.014,90\n52320,52.910156,-25.944481,220.366350,-54.301439,1,0.1656,2.0324,0.6458,46.0000,0.010,88\n52370,352.711273,-63.823658,316.922299,-51.059403,1,0.0000,0.0000,0.0000,nan,0.024,16\n52425,52.910156,-26.276812,220.926149,-54.363918,1,0.1587,0.1182,0.6266,38.7042,0.008,42\n52740,53.613281,-27.953188,223.929533,-54.024772,1,0.3653,0.4721,0.8955,42.1118,0.007,90\n52854,149.414062,1.940072,236.565366,41.393323,1,0.4383,0.4343,0.0335,41.8957,0.018,90\n53025,358.636353,-46.768478,328.890146,-67.388837,1,0.0000,0.0000,0.0000,nan,0.008,65\n53249,349.285706,-62.884678,319.786163,-51.046461,1,0.3941,0.4211,0.4203,41.8166,0.018,90\n53354,34.980469,-6.279288,172.180075,-60.389399,1,0.2231,0.2134,0.0125,40.1138,0.023,62\n53525,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,16\n53574,0.574468,-45.981140,327.041068,-68.778764,1,0.4176,0.4427,0.0420,41.9454,0.006,90\n53782,53.261719,-27.615883,223.280041,-54.281374,1,0.3798,0.3714,0.0189,41.4948,0.006,90\n53938,53.437500,-29.142223,225.908120,-54.336118,1,0.0000,0.0000,0.0000,nan,0.008,16\n54416,347.812500,-63.448284,320.128971,-50.202348,1,0.3708,0.3734,0.0162,41.5087,0.021,42\n54883,347.617462,-62.508568,321.121462,-50.904708,1,0.7844,0.8578,0.0392,43.6891,0.019,88\n54915,148.886719,2.686724,235.347248,41.389003,1,0.0000,0.0000,0.0000,nan,0.028,65\n55002,52.207031,-28.291550,224.208534,-55.300157,1,0.4345,0.4175,0.0286,41.7946,0.007,90\n55018,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,65\n55033,151.171875,1.342993,238.602520,42.464379,1,0.4920,0.5345,0.0204,42.4355,0.026,42\n55060,51.855469,-28.630989,224.733260,-55.649872,1,0.0234,0.0824,0.0202,37.8680,0.009,42\n55141,2.097458,-45.783966,324.737840,-69.478613,1,0.0000,0.0000,0.0000,nan,0.011,65\n55155,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,65\n55354,350.230255,-61.943836,320.053946,-52.070537,1,0.1958,2.3870,0.2904,46.4258,0.017,90\n55419,150.820312,3.732834,235.666318,43.572109,1,0.0967,0.1092,0.0137,38.5190,0.016,62\n55946,347.617462,-62.508568,321.121462,-50.904708,1,0.0000,0.0000,0.0000,nan,0.019,65\n56053,150.292969,2.686724,236.427488,42.541447,1,0.8507,0.8283,0.0406,43.5954,0.016,90\n56245,1.708861,-45.586655,325.688716,-69.520253,1,0.0000,0.0000,0.0000,nan,0.011,16\n56334,358.312500,-44.993881,332.185785,-68.685906,1,0.1955,0.1948,0.0100,39.8925,0.009,62\n56349,1.694561,-45.191612,326.278557,-69.858253,1,0.1646,0.2000,0.1961,39.9565,0.011,52\n56461,347.812500,-63.448284,320.128971,-50.202348,1,0.1760,2.4036,1.0097,46.4440,0.021,90\n56769,33.574219,-5.079716,168.448505,-60.407218,1,0.0000,0.0000,0.0000,nan,0.016,65\n56821,52.207031,-26.610098,221.298836,-55.042928,1,0.0775,0.0884,0.0181,38.0309,0.014,62\n56893,150.117188,2.836105,236.124718,42.483719,1,0.0000,0.0000,0.0000,nan,0.016,65\n56987,33.574219,-6.579593,170.455585,-61.548219,1,0.2910,0.3152,0.0132,41.0794,0.021,62\n57205,359.816315,-44.003082,331.451340,-70.123054,1,0.5891,0.6057,0.0306,42.7639,0.013,90\n57237,52.207031,-26.610098,221.298836,-55.042928,1,0.2544,0.3807,0.6581,41.5580,0.014,42\n57263,351.299988,-62.320400,319.038597,-52.026867,1,0.0000,0.0000,0.0000,nan,0.018,65\n57561,152.050781,3.284369,237.157374,44.318466,1,0.0000,0.0000,0.0000,nan,0.019,65\n57666,1.666667,-44.399834,327.519190,-70.529554,1,0.4399,0.4648,0.0183,42.0716,0.009,90\n57784,348.529419,-61.755440,321.293980,-51.763351,1,0.0000,0.0000,0.0000,nan,0.016,65\n58174,348.529419,-61.755440,321.293980,-51.763351,1,1.1032,1.1018,0.1226,44.3607,0.016,42\n58265,0.190678,-45.783966,327.956322,-68.803772,1,0.0650,0.1027,1.1126,38.3778,0.005,67\n58323,349.615387,-63.636005,318.927246,-50.506542,1,1.0155,1.0187,0.0471,44.1500,0.018,88\n59068,34.277344,-5.079716,169.526841,-59.956640,1,0.5716,0.5429,0.1404,42.4762,0.019,42\n59128,348.529419,-61.755440,321.293980,-51.763351,1,0.6794,0.4288,0.2152,41.8629,0.016,90\n59163,150.644531,3.583322,235.698235,43.342784,1,0.0000,0.0000,0.0000,nan,0.018,65\n59427,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,65\n59463,52.031250,-26.443335,220.963669,-55.168557,1,0.1439,0.1438,0.0169,39.1654,0.014,42\n59580,1.753247,-46.768478,324.030235,-68.498041,1,0.4178,0.5020,0.7485,42.2716,0.014,42\n59644,34.980469,-6.279288,172.180075,-60.389399,1,0.4238,0.3347,0.1863,41.2308,0.023,90\n59732,347.812500,-63.448284,320.128971,-50.202348,1,0.2489,2.7125,0.9243,46.7623,0.021,90\n60023,359.814819,-44.399834,330.775011,-69.801007,1,0.6857,0.6858,0.0077,43.0925,0.009,90\n60098,53.261719,-27.615883,223.280041,-54.281374,1,0.2529,0.2478,0.0697,40.4804,0.006,90\n60340,148.710938,2.836105,235.050801,41.328739,1,0.3521,0.3472,0.0096,41.3237,0.031,67\n60350,52.558594,-27.279613,222.538937,-54.845107,1,0.5295,0.5982,0.1284,42.7310,0.008,62\n60376,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,16\n60407,346.130127,-63.072620,321.423103,-50.042305,1,0.0000,0.0000,0.0000,nan,0.020,65\n60554,347.013428,-62.508568,321.472056,-50.735330,1,0.2356,0.2869,0.0805,40.8438,0.018,67\n60742,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65\n60976,358.648071,-46.375080,329.462659,-67.716008,1,0.0615,0.0821,0.0078,37.8606,0.009,52\n61101,151.171875,2.537361,237.288526,43.169764,1,0.0000,0.0000,0.0000,nan,0.024,16\n61165,53.789062,-27.784405,223.685697,-53.845803,1,0.1089,0.1122,0.0140,38.5832,0.009,62\n61407,53.613281,-27.953188,223.929533,-54.024772,1,0.0000,0.0000,0.0000,nan,0.007,92\n61763,52.207031,-28.291550,224.208534,-55.300157,1,0.5749,0.5237,0.0263,42.3819,0.007,90\n62078,0.965665,-46.375080,325.845907,-68.579427,1,0.0000,0.0000,0.0000,nan,0.007,65\n62187,33.398438,-3.732834,166.492280,-59.466614,1,0.1391,0.1301,1.1896,38.9300,0.022,64\n62230,53.085938,-27.784405,223.525509,-54.460748,1,0.0000,0.0000,0.0000,nan,0.007,16\n62253,51.328125,-27.447618,222.535046,-55.950727,1,0.7671,0.7610,0.0367,43.3693,0.013,90\n62254,53.085938,-27.111860,222.384291,-54.355086,1,0.2738,0.2895,0.0180,40.8666,0.007,90\n62384,351.321442,-64.198746,317.458993,-50.429931,1,0.0000,0.0000,0.0000,nan,0.023,65\n62541,347.013428,-62.508568,321.472056,-50.735330,1,0.5490,0.3280,0.8012,41.1797,0.018,90\n62908,150.820312,3.134927,236.341348,43.230123,1,0.2372,0.2111,0.0189,40.0878,0.016,62\n63561,359.814819,-44.399834,330.775011,-69.801007,1,0.7386,0.7247,0.0129,43.2390,0.009,90\n63718,0.965665,-46.375080,325.845907,-68.579427,1,0.2891,0.4200,0.0613,41.8100,0.007,90\n63860,351.382965,-64.011238,317.574052,-50.604657,1,0.1617,0.1988,0.0128,39.9412,0.023,42\n64248,2.097458,-45.783966,324.737840,-69.478613,1,0.1653,0.1710,0.0196,39.5787,0.011,52\n64485,348.586945,-64.573555,318.693903,-49.477869,1,0.0000,0.0000,0.0000,nan,0.018,16\n64854,1.723404,-45.981140,325.117958,-69.180825,1,0.1163,0.0705,0.0098,37.5117,0.010,62\n64888,52.031250,-26.443335,220.963669,-55.168557,1,0.3802,0.4086,0.7097,41.7390,0.014,90\n64896,347.846710,-64.760857,318.929827,-49.143596,1,0.0991,0.1224,1.1874,38.7854,0.019,42\n64911,150.820312,1.641510,237.994507,42.358984,1,0.2375,2.2621,1.0009,46.2837,0.020,90\n65745,53.085938,-28.122234,224.100909,-54.509752,1,0.0680,0.0552,1.1581,36.9555,0.007,90\n65749,33.398438,-4.331149,167.226341,-59.936551,1,0.0000,0.0000,0.0000,nan,0.018,65\n65877,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,65\n66126,348.908447,-63.823658,319.169886,-50.176186,1,0.0601,0.0718,0.0163,37.5531,0.018,42\n66325,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,65\n66536,150.996094,2.388015,237.313912,42.939977,1,0.3144,0.3150,0.2975,41.0779,0.021,90\n66548,347.617462,-62.508568,321.121462,-50.904708,1,0.3713,0.4282,1.3857,41.8598,0.019,90\n66852,2.071130,-45.191612,325.606223,-69.989264,1,0.5802,0.5679,0.0103,42.5945,0.011,42\n66904,349.285706,-62.884678,319.786163,-51.046461,1,0.0000,0.0000,0.0000,nan,0.018,16\n66967,150.996094,2.388015,237.313912,42.939977,1,0.4580,0.4551,0.0163,42.0168,0.021,88\n66999,33.222656,-4.780192,167.515653,-60.396584,1,0.3635,0.4147,0.0332,41.7770,0.018,90\n67245,150.292969,2.686724,236.427488,42.541447,1,0.0482,0.0598,0.0147,37.1379,0.016,42\n67480,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,65\n67514,34.101562,-5.829153,170.247753,-60.638325,1,0.2756,0.5256,0.1081,42.3913,0.019,90\n67686,359.415588,-46.768478,327.729895,-67.686097,1,0.1785,0.2040,0.0109,40.0042,0.009,90\n67730,52.910156,-25.944481,220.366350,-54.301439,1,0.3927,0.3926,0.0321,41.6368,0.010,90\n67898,347.812500,-63.448284,320.128971,-50.202348,1,0.3824,0.3802,0.0081,41.5546,0.021,42\n67981,151.523438,3.134927,236.900695,43.803170,1,0.0000,0.0000,0.0000,nan,0.019,65\n68003,150.820312,3.732834,235.666318,43.572109,1,0.4201,0.4099,0.0162,41.7474,0.016,90\n68276,33.398438,-3.732834,166.492280,-59.466614,1,0.0251,0.0342,0.0167,35.8818,0.022,42\n68298,349.429535,-62.508568,320.039643,-51.393745,1,0.6264,0.6544,0.1479,42.9683,0.020,90\n68667,348.595886,-63.072620,320.023289,-50.713060,1,0.0000,0.0000,0.0000,nan,0.021,16\n68835,348.908447,-63.823658,319.169886,-50.176186,1,0.0177,0.0504,0.0318,36.7530,0.018,42\n68886,2.457983,-45.389202,324.632685,-69.945696,1,0.3991,0.4463,0.3629,41.9666,0.011,52\n69271,148.710938,2.836105,235.050801,41.328739,1,0.4269,0.4491,0.8217,41.9826,0.031,90\n69490,51.855469,-26.276812,220.627031,-55.293792,1,0.2919,0.4825,0.1693,42.1687,0.014,42\n69767,1.694561,-45.191612,326.278557,-69.858253,1,0.1603,2.4521,1.2066,46.4968,0.011,62\n70046,351.321442,-64.198746,317.458993,-50.429931,1,0.3441,0.3129,0.0280,41.0610,0.023,90\n70135,0.949367,-45.586655,326.991548,-69.251686,1,0.4500,0.4728,0.0167,42.1160,0.013,42\n70171,53.789062,-27.784405,223.685697,-53.845803,1,0.4708,0.5059,0.0358,42.2916,0.009,90\n70272,34.277344,-5.079716,169.526841,-59.956640,1,0.3088,0.3314,0.0357,41.2058,0.019,88\n70276,151.171875,1.342993,238.602520,42.464379,1,0.8102,0.7948,0.0774,43.4853,0.026,88\n70430,150.820312,1.641510,237.994507,42.358984,1,0.0451,0.0858,0.0255,37.9624,0.020,42\n70571,149.414062,1.940072,236.565366,41.393323,1,0.3902,0.4137,1.1078,41.7706,0.018,90\n70816,349.429535,-62.508568,320.039643,-51.393745,1,0.0000,0.0000,0.0000,nan,0.020,65\n70898,151.171875,2.238686,237.619933,42.994783,1,0.5346,0.5478,0.0157,42.4996,0.024,90\n70977,51.328125,-27.784405,223.130589,-55.999499,1,0.3428,0.3404,0.0099,41.2737,0.013,90\n71068,34.453125,-5.229529,169.987075,-59.956185,1,0.2726,0.2679,1.0038,40.6730,0.019,88\n71080,53.964844,-28.630989,225.142950,-53.813613,1,0.0000,0.0000,0.0000,nan,0.009,92\n71084,52.558594,-27.279613,222.538937,-54.845107,1,0.1522,0.1472,0.0208,39.2216,0.008,42\n71126,53.964844,-28.630989,225.142950,-53.813613,1,0.3021,0.5146,0.5741,42.3363,0.009,62\n71438,52.558594,-27.279613,222.538937,-54.845107,1,0.0000,0.0000,0.0000,nan,0.008,65\n71676,53.437500,-29.142223,225.908120,-54.336118,1,0.4396,0.4103,0.0209,41.7496,0.008,88\n71890,348.595886,-63.072620,320.023289,-50.713060,1,0.7036,0.7024,0.0078,43.1558,0.021,88\n71954,1.666667,-44.399834,327.519190,-70.529554,1,0.2483,0.2571,0.6448,40.5711,0.009,90\n72053,53.613281,-27.953188,223.929533,-54.024772,1,0.2832,0.2616,0.0190,40.6141,0.007,88\n72256,358.636353,-46.768478,328.890146,-67.388837,1,0.0000,0.0000,0.0000,nan,0.008,65\n72337,34.277344,-5.079716,169.526841,-59.956640,1,0.2449,0.2107,0.1165,40.0824,0.019,90\n72385,150.117188,2.836105,236.124718,42.483719,1,0.3029,0.2983,0.3068,40.9409,0.016,67\n72426,51.679688,-27.447618,222.618229,-55.642263,1,0.5166,0.3846,0.3149,41.5839,0.010,90\n72428,33.574219,-5.379379,168.838090,-60.637536,1,0.2214,2.4663,0.4396,46.5120,0.017,90\n72489,53.613281,-26.944359,222.237403,-53.863858,1,0.8358,0.8312,0.0197,43.6047,0.009,90\n72525,34.101562,-5.829153,170.247753,-60.638325,1,0.2407,0.2580,0.0145,40.5802,0.019,90\n72735,151.699219,3.583322,236.533224,44.205648,1,0.2648,0.2313,0.0265,40.3104,0.016,67\n73031,34.277344,-5.079716,169.526841,-59.956640,1,0.3151,0.5833,0.2442,42.6648,0.019,52\n73236,33.398438,-3.732834,166.492280,-59.466614,1,0.1398,0.5280,0.4528,42.4036,0.022,90\n73339,351.299988,-62.320400,319.038597,-52.026867,1,0.5183,0.2010,0.3771,39.9686,0.018,90\n73433,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,65\n73509,34.453125,-5.229529,169.987075,-59.956185,1,0.2953,0.2879,0.0259,40.8522,0.019,42\n73610,34.277344,-5.079716,169.526841,-59.956640,1,0.8134,1.5079,0.3508,45.2031,0.019,95\n74093,351.259003,-64.386185,317.344860,-50.255113,1,0.3940,0.4643,1.0625,42.0686,0.020,90\n75116,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,16\n75223,351.382965,-64.011238,317.574052,-50.604657,1,0.5652,0.5648,0.0157,42.5800,0.023,90\n75562,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65\n75598,1.723404,-45.981140,325.117958,-69.180825,1,0.1652,0.1477,0.0151,39.2287,0.010,42\n75646,348.529419,-61.755440,321.293980,-51.763351,1,0.2103,0.2089,0.0083,40.0620,0.016,67\n75754,151.347656,3.583322,236.252362,43.918627,1,0.0000,0.0000,0.0000,nan,0.015,16\n75792,148.886719,2.686724,235.347248,41.389003,1,0.3699,0.3602,0.0451,41.4171,0.028,90\n75886,358.636353,-46.768478,328.890146,-67.388837,1,0.3985,0.3735,0.0375,41.5091,0.008,90\n75987,54.667969,-27.615883,223.610785,-53.050840,1,0.9014,0.8506,0.0414,43.6664,0.009,88\n76242,152.050781,2.985506,237.495952,44.143927,1,0.4916,0.5572,0.3767,42.5443,0.019,90\n76304,348.595886,-63.072620,320.023289,-50.713060,1,2.4303,2.6811,1.0262,46.7317,0.021,95\n76305,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,16\n76639,346.500000,-62.320400,321.951129,-50.736054,1,0.5322,0.5077,0.0148,42.3012,0.020,90\n77010,34.453125,-5.229529,169.987075,-59.956185,1,0.0000,0.0000,0.0000,nan,0.019,65\n77041,346.276581,-64.011238,320.448031,-49.344136,1,0.3793,0.4070,0.6742,41.7290,0.019,90\n77157,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,6\n77192,151.347656,4.181528,235.568369,44.259942,1,0.3227,0.2764,0.0329,40.7513,0.016,62\n77222,151.171875,1.342993,238.602520,42.464379,1,0.4342,0.3228,0.3114,41.1398,0.026,90\n77292,34.277344,-5.079716,169.526841,-59.956640,1,0.1250,0.1890,0.2309,39.8194,0.019,62\n77306,148.710938,2.836105,235.050801,41.328739,1,0.7167,0.6876,0.0165,43.0996,0.031,90\n77340,346.500000,-62.320400,321.951129,-50.736054,1,0.8207,0.8217,0.0401,43.5741,0.020,88\n77391,346.130127,-63.072620,321.423103,-50.042305,1,1.3214,1.4667,0.1459,45.1288,0.020,95\n77518,53.437500,-29.142223,225.908120,-54.336118,1,0.3531,0.3522,0.0125,41.3602,0.008,67\n77623,0.190678,-45.783966,327.956322,-68.803772,1,0.2765,0.2994,0.0135,40.9507,0.005,42\n77825,349.046051,-61.943836,320.796530,-51.753706,1,0.1071,0.1065,0.0160,38.4603,0.017,42\n77906,359.811707,-45.191612,329.485675,-69.150905,1,0.1265,0.0738,0.0297,37.6154,0.010,90\n77952,358.665253,-45.783966,330.353593,-68.203652,1,0.1258,0.0858,0.0192,37.9614,0.009,90\n78095,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,65\n78233,148.710938,2.836105,235.050801,41.328739,1,0.2391,0.2060,0.0706,40.0283,0.031,90\n78677,53.437500,-29.142223,225.908120,-54.336118,1,0.0000,0.0000,0.0000,nan,0.008,65\n78702,349.891296,-64.573555,317.972107,-49.786192,1,0.0000,0.0000,0.0000,nan,0.023,92\n78705,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,92\n78727,51.679688,-27.447618,222.618229,-55.642263,1,0.1592,0.1479,0.0203,39.2318,0.010,90\n78974,152.050781,3.284369,237.157374,44.318466,1,0.6592,0.6572,0.0085,42.9797,0.019,90\n79002,2.097458,-45.783966,324.737840,-69.478613,1,0.4446,0.2938,0.8441,40.9035,0.011,90\n79155,1.666667,-44.399834,327.519190,-70.529554,1,0.4097,0.4154,0.0160,41.7815,0.009,42\n79235,52.031250,-26.443335,220.963669,-55.168557,1,0.1639,0.1669,0.0564,39.5206,0.014,42\n79428,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,65\n79515,53.964844,-28.630989,225.142950,-53.813613,1,0.2207,0.2188,1.1526,40.1744,0.009,90\n79743,32.695312,-4.929937,166.868469,-60.841230,1,0.6266,0.6062,0.0094,42.7661,0.018,90\n79819,150.820312,1.641510,237.994507,42.358984,1,0.1742,0.1725,0.0134,39.5994,0.020,42\n79921,352.132874,-63.636005,317.424173,-51.095855,1,0.1638,0.1709,0.0219,39.5766,0.021,42\n80155,53.085938,-28.122234,224.100909,-54.509752,1,0.3628,0.3633,0.0418,41.4386,0.007,90\n80205,33.925781,-5.979157,170.179895,-60.866303,1,0.8134,0.7508,0.1176,43.3333,0.022,95\n80780,152.050781,3.284369,237.157374,44.318466,1,0.1684,0.1902,0.0581,39.8341,0.019,42\n80832,150.117188,2.238686,236.784618,42.139082,1,0.3950,0.3665,0.0146,41.4614,0.016,90\n80852,151.347656,3.583322,236.252362,43.918627,1,0.2569,0.2659,0.0082,40.6545,0.015,62\n80903,52.910156,-26.276812,220.926149,-54.363918,1,0.4140,0.4279,0.0171,41.8576,0.008,90\n81000,150.996094,2.985506,236.647967,43.287350,1,0.2989,0.3106,0.0100,41.0422,0.020,42\n81252,33.574219,-4.780192,168.064587,-60.175886,1,0.1494,0.1737,0.6985,39.6157,0.019,62\n81464,149.414062,3.433834,234.919132,42.245550,1,0.2854,0.2818,0.3207,40.7987,0.027,42\n81665,53.789062,-27.784405,223.685697,-53.845803,1,0.0000,0.0000,0.0000,nan,0.009,65\n82302,346.276581,-64.011238,320.448031,-49.344136,1,0.0000,0.0000,0.0000,nan,0.019,65\n82401,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,16\n82409,348.595886,-63.072620,320.023289,-50.713060,1,0.1147,2.6274,1.1973,46.6786,0.021,42\n82702,35.683594,-5.379379,171.992947,-59.253501,1,0.3194,0.2986,0.0204,40.9437,0.020,90\n82740,349.615387,-63.636005,318.927246,-50.506542,1,1.0263,1.0228,0.2611,44.1609,0.018,88\n83348,349.429535,-62.508568,320.039643,-51.393745,1,0.0000,0.0000,0.0000,nan,0.020,6\n83410,51.855469,-26.276812,220.627031,-55.293792,1,0.5138,0.5071,0.0093,42.2980,0.014,90\n83462,150.820312,3.134927,236.341348,43.230123,1,0.3424,0.3239,0.0179,41.1483,0.016,90\n83634,349.285706,-62.884678,319.786163,-51.046461,1,2.1107,1.3813,0.4158,44.9680,0.018,95\n83821,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,65\n83872,149.589844,3.583322,234.885369,42.474696,1,0.2160,0.2054,0.2819,40.0211,0.024,90\n83954,346.276581,-64.011238,320.448031,-49.344136,1,0.4390,0.4535,0.0428,42.0079,0.019,90\n83961,1.753247,-46.768478,324.030235,-68.498041,1,0.2177,0.2211,0.0151,40.2004,0.014,90\n84306,151.171875,2.537361,237.288526,43.169764,1,0.0000,0.0000,0.0000,nan,0.024,16\n84716,151.523438,3.134927,236.900695,43.803170,1,0.4303,0.2925,1.0152,40.8923,0.019,90\n84758,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,65\n85125,53.085938,-28.122234,224.100909,-54.509752,1,0.0000,0.0000,0.0000,nan,0.007,65\n85470,33.398438,-4.331149,167.226341,-59.936551,1,0.0000,0.0000,0.0000,nan,0.018,65\n85490,348.908447,-63.823658,319.169886,-50.176186,1,0.2537,0.5506,0.2880,42.5130,0.018,90\n85789,53.437500,-29.142223,225.908120,-54.336118,1,0.2990,0.2893,0.0367,40.8644,0.008,62\n86456,33.574219,-5.379379,168.838090,-60.637536,1,0.2127,0.2191,0.0141,40.1785,0.017,42\n86487,0.574468,-45.981140,327.041068,-68.778764,1,0.3850,0.3847,0.0333,41.5849,0.006,90\n86759,348.529419,-61.755440,321.293980,-51.763351,1,0.0000,0.0000,0.0000,nan,0.016,65\n86834,149.589844,3.583322,234.885369,42.474696,1,0.3160,0.4622,0.4742,42.0568,0.024,90\n87180,150.820312,1.641510,237.994507,42.358984,1,0.1782,0.1820,0.0103,39.7288,0.020,62\n87467,150.820312,3.134927,236.341348,43.230123,1,0.0000,0.0000,0.0000,nan,0.016,65\n87498,152.050781,2.985506,237.495952,44.143927,1,0.0000,0.0000,0.0000,nan,0.019,16\n87608,358.665253,-45.783966,330.353593,-68.203652,1,0.1894,0.2101,0.0179,40.0763,0.009,90\n87685,347.861847,-61.943836,321.519104,-51.424048,1,0.3743,0.3612,0.0219,41.4237,0.017,90\n87703,51.855469,-26.276812,220.627031,-55.293792,1,0.3246,0.5019,0.3014,42.2711,0.014,90\n88073,347.013428,-62.508568,321.472056,-50.735330,1,0.0000,0.0000,0.0000,nan,0.018,92\n88180,149.414062,1.940072,236.565366,41.393323,1,0.2996,2.6936,1.3051,46.7439,0.018,67\n88195,53.964844,-28.630989,225.142950,-53.813613,1,0.3403,0.3448,0.0304,41.3063,0.009,90\n88511,53.613281,-28.630989,225.073365,-54.119461,1,0.0000,0.0000,0.0000,nan,0.006,65\n88587,352.711273,-63.823658,316.922299,-51.059403,1,0.7839,0.7789,0.0120,43.4314,0.024,90\n88600,351.321442,-64.198746,317.458993,-50.429931,1,0.3303,0.3150,0.3295,41.0778,0.023,90\n88627,32.871094,-4.780192,166.959493,-60.615132,1,0.3934,0.4406,0.0433,41.9329,0.017,67\n88980,351.382965,-64.011238,317.574052,-50.604657,1,0.0000,0.0000,0.0000,nan,0.023,16\n89157,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,92\n89298,359.811707,-45.191612,329.485675,-69.150905,1,0.0000,0.0000,0.0000,nan,0.010,16\n89387,346.130127,-63.072620,321.423103,-50.042305,1,0.6210,0.4739,0.0648,42.1217,0.020,90\n89455,0.189873,-45.586655,328.254458,-68.969298,1,0.2603,0.2485,0.0142,40.4873,0.007,90\n89709,52.910156,-25.944481,220.366350,-54.301439,1,0.0000,0.0000,0.0000,nan,0.010,92\n89999,149.238281,3.882372,234.283829,42.351155,1,0.2640,0.3194,0.0576,41.1125,0.033,90\n90399,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,65\n90534,152.050781,3.284369,237.157374,44.318466,1,0.1759,0.1913,0.0131,39.8481,0.019,62\n90645,51.855469,-28.630989,224.733260,-55.649872,1,0.4496,0.4486,0.0173,41.9797,0.009,90\n90814,348.595886,-63.072620,320.023289,-50.713060,1,0.2252,0.2208,0.0172,40.1971,0.021,62\n90892,152.050781,3.284369,237.157374,44.318466,1,0.0322,0.0365,0.0161,36.0320,0.019,52\n91219,150.820312,3.134927,236.341348,43.230123,1,0.2921,0.5086,0.1231,42.3058,0.016,90\n91291,352.711273,-63.823658,316.922299,-51.059403,1,0.1826,0.1746,0.0132,39.6288,0.024,90\n91335,151.699219,3.583322,236.533224,44.205648,1,1.0655,1.4889,0.2658,45.1692,0.016,88\n91337,53.613281,-27.953188,223.929533,-54.024772,1,0.0000,0.0000,0.0000,nan,0.007,65\n91460,53.613281,-28.630989,225.073365,-54.119461,1,0.2782,0.5294,0.2771,42.4103,0.006,90\n91610,346.130127,-63.072620,321.423103,-50.042305,1,0.2326,0.2302,0.0070,40.2992,0.020,42\n91644,349.891296,-64.573555,317.972107,-49.786192,1,0.1893,0.1839,0.1322,39.7535,0.023,90\n91917,152.050781,2.985506,237.495952,44.143927,1,0.2448,0.2740,0.0117,40.7289,0.019,90\n91988,150.468750,1.641510,237.714575,42.075234,1,0.0000,0.0000,0.0000,nan,0.017,65\n92334,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,65\n92354,51.328125,-27.784405,223.130589,-55.999499,1,0.5449,0.5807,0.0374,42.6530,0.013,88\n92566,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,16\n92577,351.734680,-62.884678,318.284128,-51.651217,1,0.4426,0.3854,0.6831,41.5893,0.019,90\n92904,347.812500,-63.448284,320.128971,-50.202348,1,0.2536,0.2814,0.0325,40.7958,0.021,62\n92929,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,65\n93333,151.171875,1.342993,238.602520,42.464379,1,0.4029,0.3217,0.4659,41.1310,0.026,90\n93362,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,92\n93509,51.855469,-26.276812,220.627031,-55.293792,1,0.0587,0.0644,0.0151,37.3066,0.014,42\n93663,53.964844,-28.630989,225.142950,-53.813613,1,0.4196,0.4099,0.0367,41.7471,0.009,90\n94004,52.910156,-27.953188,223.774083,-54.639214,1,0.2354,0.2257,0.0162,40.2510,0.007,62\n94107,34.980469,-6.279288,172.180075,-60.389399,1,0.0000,0.0000,0.0000,nan,0.023,16\n94229,52.207031,-28.291550,224.208534,-55.300157,1,0.3853,0.4477,0.0268,41.9745,0.007,90\n94613,2.457983,-45.389202,324.632685,-69.945696,1,0.0739,0.0616,0.0188,37.2044,0.011,62\n94704,349.966217,-62.696659,319.542989,-51.376556,1,0.2722,0.2658,0.0080,40.6537,0.021,90\n95127,351.299988,-62.320400,319.038597,-52.026867,1,1.8136,1.6691,0.2276,45.4751,0.018,95\n95147,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,65\n95369,53.613281,-28.630989,225.073365,-54.119461,1,0.1313,0.5834,0.7684,42.6652,0.006,62\n95455,351.259003,-64.386185,317.344860,-50.255113,1,0.4942,0.5590,0.1899,42.5528,0.020,90\n95483,150.820312,3.732834,235.666318,43.572109,1,0.8512,0.8164,0.0436,43.5568,0.016,88\n95508,149.414062,2.238686,236.239766,41.565558,1,0.6107,0.5480,0.0306,42.5009,0.017,88\n95566,33.574219,-5.079716,168.448505,-60.407218,1,0.3880,0.4580,0.0992,42.0331,0.016,90\n95580,34.101562,-5.829153,170.247753,-60.638325,1,0.4200,0.4311,0.0132,41.8770,0.019,90\n95690,351.734680,-62.884678,318.284128,-51.651217,1,0.3442,0.3507,0.0052,41.3494,0.019,42\n95741,35.332031,-5.979157,172.286722,-59.931743,1,0.5064,0.5368,0.0209,42.4469,0.022,52\n95864,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,92\n96284,152.050781,3.284369,237.157374,44.318466,1,0.1593,2.4014,0.4125,46.4417,0.019,42\n97053,150.117188,3.732834,235.120533,42.993809,1,0.0000,0.0000,0.0000,nan,0.020,65\n97406,347.846710,-64.760857,318.929827,-49.143596,1,0.1163,0.0897,0.8592,38.0649,0.019,15\n97687,346.655182,-63.260487,320.952196,-50.040935,1,0.4603,0.4626,0.0271,42.0594,0.019,90\n97850,351.259003,-64.386185,317.344860,-50.255113,1,0.3090,0.3005,0.0119,40.9599,0.020,90\n97920,150.820312,3.134927,236.341348,43.230123,1,0.3588,0.3524,0.0238,41.3611,0.016,67\n97957,1.723404,-45.981140,325.117958,-69.180825,1,0.2947,0.2906,0.0093,40.8754,0.010,90\n98533,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,65\n98570,1.708861,-45.586655,325.688716,-69.520253,1,0.6197,0.5296,0.0328,42.4113,0.011,42\n98749,33.750000,-4.630479,168.146242,-59.949072,1,0.1473,0.0973,0.6573,38.2516,0.019,67\n99013,350.230255,-61.943836,320.053946,-52.070537,1,0.5255,0.5303,0.0220,42.4150,0.017,90\n99050,52.207031,-26.610098,221.298836,-55.042928,1,0.2972,0.3036,0.1492,40.9851,0.014,42\n99261,53.613281,-27.953188,223.929533,-54.024772,1,0.1411,0.0857,0.0282,37.9579,0.007,90\n99280,359.811707,-45.191612,329.485675,-69.150905,1,0.2037,0.2150,0.0079,40.1324,0.010,62\n99293,347.846710,-64.760857,318.929827,-49.143596,1,0.3106,0.3644,0.0251,41.4467,0.019,90\n99294,348.529419,-61.755440,321.293980,-51.763351,1,0.5552,0.5204,0.0185,42.3656,0.016,90\n99452,352.711273,-63.823658,316.922299,-51.059403,1,0.8420,0.8479,0.0465,43.6581,0.024,88\n99642,347.846710,-64.760857,318.929827,-49.143596,1,0.2320,0.2369,1.1397,40.3699,0.019,90\n99862,52.207031,-28.291550,224.208534,-55.300157,1,0.5810,0.5733,0.0141,42.6194,0.007,42\n99932,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,65\n100057,346.130127,-63.072620,321.423103,-50.042305,1,0.8320,0.8006,0.0197,43.5045,0.020,90\n100097,348.529419,-61.755440,321.293980,-51.763351,1,0.1566,0.1745,0.0186,39.6271,0.016,42\n100133,346.655182,-63.260487,320.952196,-50.040935,1,0.2309,0.2215,0.0190,40.2046,0.019,42\n100331,52.207031,-26.610098,221.298836,-55.042928,1,0.1684,0.1999,0.0173,39.9556,0.014,42\n101050,32.695312,-4.929937,166.868469,-60.841230,1,0.2222,0.2529,0.0906,40.5303,0.018,90\n101298,34.277344,-5.079716,169.526841,-59.956640,1,0.1848,0.1983,0.0069,39.9355,0.019,90\n101374,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,16\n101489,359.816315,-44.003082,331.451340,-70.123054,1,0.0721,0.0695,0.0025,37.4803,0.013,42\n101508,358.648071,-46.375080,329.462659,-67.716008,1,0.0656,0.0249,0.0130,35.1822,0.009,90\n101890,346.276581,-64.011238,320.448031,-49.344136,1,0.0000,0.0000,0.0000,nan,0.019,65\n102036,53.789062,-27.784405,223.685697,-53.845803,1,0.2331,0.2269,0.0197,40.2635,0.009,42\n102330,148.886719,2.686724,235.347248,41.389003,1,0.0000,0.0000,0.0000,nan,0.028,16\n102343,51.328125,-27.447618,222.535046,-55.950727,1,0.1081,0.1422,0.0164,39.1394,0.013,67\n102363,349.160583,-64.760857,318.219706,-49.458924,1,0.8046,0.7835,0.0167,43.4470,0.020,90\n102745,349.615387,-63.636005,318.927246,-50.506542,1,0.2460,0.2333,1.0359,40.3315,0.018,90\n102823,347.861847,-61.943836,321.519104,-51.424048,1,0.0000,0.0000,0.0000,nan,0.017,65\n102864,0.574468,-45.981140,327.041068,-68.778764,1,0.2123,0.2254,0.0097,40.2473,0.006,42\n103023,359.811707,-45.191612,329.485675,-69.150905,1,0.3221,0.2994,0.0125,40.9500,0.010,90\n103026,348.529419,-61.755440,321.293980,-51.763351,1,0.1949,0.2006,0.0174,39.9637,0.016,42\n103100,34.101562,-5.829153,170.247753,-60.638325,1,0.5057,0.4350,0.0240,41.9001,0.019,90\n103145,349.160583,-64.760857,318.219706,-49.458924,1,0.6603,0.6380,0.0433,42.9010,0.020,90\n103162,350.230255,-61.943836,320.053946,-52.070537,1,0.1101,0.1207,0.0096,38.7539,0.017,90\n103171,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,92\n103350,359.446716,-44.201530,331.730015,-69.805709,1,0.6777,0.6935,0.0507,43.1220,0.010,90\n103354,150.117188,2.836105,236.124718,42.483719,1,2.7124,2.7655,0.0801,46.8131,0.016,88\n103572,52.207031,-28.291550,224.208534,-55.300157,1,0.0000,0.0000,0.0000,nan,0.007,65\n103927,150.820312,1.641510,237.994507,42.358984,1,0.2130,0.2282,0.0218,40.2779,0.020,90\n103948,33.574219,-4.780192,168.064587,-60.175886,1,0.2257,0.2319,0.1182,40.3169,0.019,52\n103967,0.190678,-45.783966,327.956322,-68.803772,1,0.5094,0.4789,0.0608,42.1493,0.005,42\n104212,53.789062,-27.784405,223.685697,-53.845803,1,0.3895,0.3850,1.0136,41.5869,0.009,90\n104397,349.966217,-62.696659,319.542989,-51.376556,1,0.3220,0.3038,0.0187,40.9870,0.021,90\n104476,51.855469,-28.630989,224.733260,-55.649872,1,1.4820,1.4819,0.1602,45.1565,0.009,95\n104498,149.238281,3.882372,234.283829,42.351155,1,0.1312,0.1100,0.0094,38.5371,0.033,67\n104523,152.050781,3.284369,237.157374,44.318466,1,0.0000,0.0000,0.0000,nan,0.019,16\n104526,349.615387,-63.636005,318.927246,-50.506542,1,0.2353,0.2389,0.0148,40.3902,0.018,42\n104701,352.132874,-63.636005,317.424173,-51.095855,1,0.0000,0.0000,0.0000,nan,0.021,65\n105744,348.908447,-63.823658,319.169886,-50.176186,1,0.3087,0.3318,1.0382,41.2090,0.018,90\n106177,358.312500,-44.993881,332.185785,-68.685906,1,0.6593,0.5956,0.1033,42.7194,0.009,90\n106429,152.050781,2.985506,237.495952,44.143927,1,0.0000,0.0000,0.0000,nan,0.019,16\n106434,33.574219,-4.780192,168.064587,-60.175886,1,0.2861,0.2795,0.0098,40.7789,0.019,62\n106594,0.589520,-47.161343,325.385896,-67.769893,1,0.2403,0.2370,1.0591,40.3710,0.009,88\n106730,347.013428,-62.508568,321.472056,-50.735330,1,0.1576,2.2437,0.3333,46.2621,0.018,52\n106743,0.574468,-45.981140,327.041068,-68.778764,1,0.2154,0.2539,0.0150,40.5405,0.006,90\n106818,348.595886,-63.072620,320.023289,-50.713060,1,0.0000,0.0000,0.0000,nan,0.021,92\n106937,53.085938,-28.122234,224.100909,-54.509752,1,0.3656,0.3882,0.8841,41.6079,0.007,52\n107193,359.058563,-45.191612,330.695783,-68.844915,1,0.2109,0.1761,0.0082,39.6492,0.011,90\n107439,150.468750,3.732834,235.392208,43.283244,1,0.9187,1.5247,0.3061,45.2328,0.020,88\n107451,347.861847,-61.943836,321.519104,-51.424048,1,0.0000,0.0000,0.0000,nan,0.017,65\n107568,33.750000,-4.630479,168.146242,-59.949072,1,0.1042,0.0802,0.0143,37.8074,0.019,67\n107615,150.117188,2.836105,236.124718,42.483719,1,0.1323,2.3872,0.9055,46.4260,0.016,42\n107712,53.261719,-27.615883,223.280041,-54.281374,1,0.1282,0.1450,0.0286,39.1863,0.006,90\n107901,359.058563,-45.191612,330.695783,-68.844915,1,0.0000,0.0000,0.0000,nan,0.011,65\n108021,150.996094,4.181528,235.291975,43.970869,1,0.2815,0.3756,0.8142,41.5232,0.015,62\n108141,53.789062,-27.784405,223.685697,-53.845803,1,0.2179,0.2239,0.0151,40.2317,0.009,90\n108229,351.321442,-64.198746,317.458993,-50.429931,1,0.1858,0.2022,1.1539,39.9826,0.023,90\n108358,349.891296,-64.573555,317.972107,-49.786192,1,0.0769,0.2433,1.0869,40.4350,0.023,52\n108487,359.816315,-44.003082,331.451340,-70.123054,1,0.1787,0.2401,0.6645,40.4026,0.013,42\n108554,33.222656,-4.780192,167.515653,-60.396584,1,0.4478,0.4542,0.0258,42.0120,0.018,90\n108693,0.574468,-45.981140,327.041068,-68.778764,1,0.4868,0.4183,0.0364,41.7991,0.006,90\n108739,53.085938,-28.122234,224.100909,-54.509752,1,0.4453,0.4388,0.0272,41.9226,0.007,90\n108888,358.648071,-46.375080,329.462659,-67.716008,1,0.6247,0.5951,0.0599,42.7172,0.009,90\n109036,1.753247,-46.768478,324.030235,-68.498041,1,0.1924,0.1862,0.2684,39.7838,0.014,90\n109057,348.595886,-63.072620,320.023289,-50.713060,1,0.4499,0.3307,1.0499,41.2005,0.021,90\n109294,359.814819,-44.399834,330.775011,-69.801007,1,0.3014,0.3643,0.8070,41.4461,0.009,90\n109516,1.753247,-46.768478,324.030235,-68.498041,1,0.3828,0.3941,0.0095,41.6466,0.014,95\n109654,347.013428,-62.508568,321.472056,-50.735330,1,0.1395,0.1231,0.0131,38.8003,0.018,90\n109860,0.929752,-44.597992,328.531426,-70.083244,1,0.2708,0.2639,0.0184,40.6357,0.011,42\n109903,150.996094,2.388015,237.313912,42.939977,1,0.3892,0.3579,0.0137,41.4006,0.021,62\n109937,149.414062,3.433834,234.919132,42.245550,1,0.6620,0.6423,0.0188,42.9190,0.027,88\n110241,34.453125,-5.229529,169.987075,-59.956185,1,0.1072,0.0924,0.4007,38.1327,0.019,42\n110257,148.886719,2.686724,235.347248,41.389003,1,0.9318,0.8495,0.0379,43.6631,0.028,88\n110270,54.667969,-27.615883,223.610785,-53.050840,1,0.8259,0.8541,0.0446,43.6776,0.009,90\n110304,53.261719,-27.615883,223.280041,-54.281374,1,0.2475,0.5457,0.2676,42.4899,0.006,62\n110387,151.347656,4.181528,235.568369,44.259942,1,0.4318,0.4218,0.6713,41.8205,0.016,90\n110551,51.328125,-27.447618,222.535046,-55.950727,1,0.4861,0.3027,0.3931,40.9783,0.013,88\n110768,351.734680,-62.884678,318.284128,-51.651217,1,2.9378,2.8626,1.1139,46.9035,0.019,88\n110958,349.615387,-63.636005,318.927246,-50.506542,1,0.4630,0.4742,0.5031,42.1236,0.018,90\n111281,0.589520,-47.161343,325.385896,-67.769893,1,1.4967,1.6014,0.0895,45.3643,0.009,42\n111283,150.468750,1.641510,237.714575,42.075234,1,0.6565,0.6363,0.0252,42.8941,0.017,90\n111448,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65\n111650,351.734680,-62.884678,318.284128,-51.651217,1,0.0000,0.0000,0.0000,nan,0.019,65\n111795,34.453125,-5.229529,169.987075,-59.956185,1,0.4070,0.3077,1.0584,41.0187,0.019,90\n111799,52.558594,-27.279613,222.538937,-54.845107,1,0.2813,0.5741,0.4749,42.6227,0.008,42\n112151,349.966217,-62.696659,319.542989,-51.376556,1,0.5341,0.5419,0.0204,42.4716,0.021,90\n112462,33.574219,-4.780192,168.064587,-60.175886,1,0.4045,0.4356,0.0299,41.9040,0.019,90\n112629,346.276581,-64.011238,320.448031,-49.344136,1,0.0000,0.0000,0.0000,nan,0.019,16\n112717,35.683594,-5.379379,171.992947,-59.253501,1,0.2867,0.3138,0.8216,41.0680,0.020,67\n112764,347.812500,-63.448284,320.128971,-50.202348,1,0.3208,0.2995,0.0255,40.9517,0.021,90\n112782,32.871094,-4.780192,166.959493,-60.615132,1,0.4211,0.4038,0.0224,41.7088,0.017,90\n112886,351.953644,-62.132156,318.777388,-52.347124,1,0.0000,0.0000,0.0000,nan,0.019,65\n113028,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,65\n113206,52.207031,-28.291550,224.208534,-55.300157,1,0.2446,0.2208,0.0078,40.1969,0.007,42\n113335,358.636353,-46.768478,328.890146,-67.388837,1,0.0000,0.0000,0.0000,nan,0.008,65\n113625,149.589844,3.583322,234.885369,42.474696,1,0.2615,0.2381,0.0160,40.3824,0.024,90\n113669,351.299988,-62.320400,319.038597,-52.026867,1,0.2914,0.2905,0.0061,40.8748,0.018,15\n113982,359.805206,-46.768478,327.135979,-67.829903,1,0.7438,0.9216,0.1482,43.8813,0.011,90\n114191,33.574219,-6.579593,170.455585,-61.548219,1,0.0000,0.0000,0.0000,nan,0.021,65\n114341,151.699219,3.583322,236.533224,44.205648,1,0.1709,2.3232,0.5051,46.3542,0.016,42\n114626,2.071130,-45.191612,325.606223,-69.989264,1,0.5702,0.5408,0.0178,42.4659,0.011,90\n114670,150.996094,2.388015,237.313912,42.939977,1,0.2930,0.2607,0.0670,40.6057,0.021,90\n114715,0.965665,-46.375080,325.845907,-68.579427,1,0.2141,0.2020,0.0362,39.9807,0.007,67\n114808,52.207031,-26.610098,221.298836,-55.042928,1,0.3037,0.2944,0.0104,40.9082,0.014,90\n115053,33.925781,-5.979157,170.179895,-60.866303,1,0.1649,0.1424,0.2329,39.1427,0.022,42\n115079,1.694561,-45.191612,326.278557,-69.858253,1,0.2207,0.2325,0.0072,40.3232,0.011,90\n115157,2.457983,-45.389202,324.632685,-69.945696,1,0.6382,0.5628,0.0442,42.5706,0.011,90\n115336,351.734680,-62.884678,318.284128,-51.651217,1,1.7312,1.7123,0.0766,45.5432,0.019,95\n115638,358.665253,-45.783966,330.353593,-68.203652,1,0.6365,0.6239,0.0079,42.8420,0.009,90\n115670,151.347656,3.583322,236.252362,43.918627,1,0.0000,0.0000,0.0000,nan,0.015,16\n115792,352.711273,-63.823658,316.922299,-51.059403,1,0.2389,0.2859,0.0505,40.8350,0.024,90\n115859,148.886719,2.686724,235.347248,41.389003,1,1.4248,1.2871,0.1757,44.7782,0.028,88\n115937,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,65\n116132,150.292969,2.686724,236.427488,42.541447,1,0.0878,0.0965,0.0207,38.2336,0.016,62\n116212,150.644531,3.583322,235.698235,43.342784,1,0.2344,0.2757,1.1756,40.7443,0.018,90\n116570,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,16\n116720,34.980469,-6.279288,172.180075,-60.389399,1,0.4081,0.3212,0.0367,41.1269,0.023,90\n116818,359.805206,-46.768478,327.135979,-67.829903,1,0.6528,0.7091,0.0211,43.1811,0.011,88\n117016,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,92\n117104,346.130127,-63.072620,321.423103,-50.042305,1,0.0000,0.0000,0.0000,nan,0.020,92\n117184,352.132874,-63.636005,317.424173,-51.095855,1,0.0000,0.0000,0.0000,nan,0.021,16\n117393,52.910156,-25.944481,220.366350,-54.301439,1,0.8259,0.8295,0.0204,43.5993,0.010,90\n117461,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,92\n117513,151.347656,3.583322,236.252362,43.918627,1,0.8515,0.8845,0.0148,43.7712,0.015,90\n117774,346.276581,-64.011238,320.448031,-49.344136,1,0.5589,0.5608,0.1532,42.5611,0.019,90\n118211,151.699219,3.583322,236.533224,44.205648,1,0.2420,0.2139,1.1778,40.1199,0.016,90\n118422,149.589844,3.583322,234.885369,42.474696,1,0.0000,0.0000,0.0000,nan,0.024,65\n118455,347.861847,-61.943836,321.519104,-51.424048,1,0.1346,0.1396,0.3139,39.0951,0.017,90\n118770,53.085938,-28.122234,224.100909,-54.509752,1,0.2642,0.6375,0.0267,42.8991,0.007,90\n118868,350.230255,-61.943836,320.053946,-52.070537,1,0.0000,0.0000,0.0000,nan,0.017,65\n118979,2.457983,-45.389202,324.632685,-69.945696,1,0.2374,0.5280,0.1420,42.4036,0.011,67\n119215,52.031250,-26.443335,220.963669,-55.168557,1,0.2490,0.2486,0.0069,40.4887,0.014,90\n119383,0.949367,-45.586655,326.991548,-69.251686,1,0.3501,0.2602,0.6723,40.6012,0.013,42\n119494,150.117188,2.836105,236.124718,42.483719,1,0.2113,0.2256,0.0219,40.2502,0.016,90\n119647,151.523438,3.134927,236.900695,43.803170,1,0.1844,0.1571,0.0210,39.3762,0.019,62\n119811,348.586945,-64.573555,318.693903,-49.477869,1,0.7287,0.7966,0.0308,43.4912,0.018,90\n119882,358.312500,-44.993881,332.185785,-68.685906,1,0.0000,0.0000,0.0000,nan,0.009,65\n120356,32.695312,-4.929937,166.868469,-60.841230,1,0.0582,0.0543,0.0185,36.9203,0.018,42\n120927,152.050781,3.284369,237.157374,44.318466,1,0.4807,0.4897,0.0334,42.2071,0.019,90\n121107,35.332031,-5.979157,172.286722,-59.931743,1,0.5440,0.2209,0.2405,40.1982,0.022,90\n121182,53.085938,-28.122234,224.100909,-54.509752,1,0.0000,0.0000,0.0000,nan,0.007,65\n121224,359.811707,-45.191612,329.485675,-69.150905,1,0.1244,2.3005,0.4304,46.3283,0.010,42\n121266,151.347656,3.583322,236.252362,43.918627,1,0.4062,0.5002,0.0481,42.2623,0.015,42\n121301,52.207031,-28.630989,224.800211,-55.343637,1,0.1459,0.1449,1.1250,39.1845,0.009,62\n121440,53.613281,-28.630989,225.073365,-54.119461,1,0.2230,0.2219,0.1530,40.2089,0.006,42\n121447,0.189873,-45.586655,328.254458,-68.969298,1,0.2571,0.2465,0.0106,40.4674,0.007,90\n121704,359.805206,-46.768478,327.135979,-67.829903,1,1.5357,1.5245,0.1053,45.2324,0.011,88\n121705,352.132874,-63.636005,317.424173,-51.095855,1,0.1125,0.1219,0.0187,38.7767,0.021,42\n121783,35.332031,-5.979157,172.286722,-59.931743,1,0.1324,0.0956,0.0132,38.2109,0.022,90\n121803,348.586945,-64.573555,318.693903,-49.477869,1,0.2563,0.3703,1.1769,41.4870,0.018,90\n121883,150.117188,2.836105,236.124718,42.483719,1,0.0000,0.0000,0.0000,nan,0.016,65\n122235,53.085938,-28.122234,224.100909,-54.509752,1,0.0000,0.0000,0.0000,nan,0.007,65\n122275,54.667969,-27.615883,223.610785,-53.050840,1,0.3172,0.5059,0.1684,42.2916,0.009,90\n122716,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,65\n122965,53.085938,-28.122234,224.100909,-54.509752,1,0.7135,0.7416,0.0359,43.3006,0.007,90\n123035,150.468750,3.732834,235.392208,43.283244,1,0.2740,0.3135,0.5471,41.0660,0.020,52\n123151,52.207031,-28.291550,224.208534,-55.300157,1,0.2548,0.2657,0.0184,40.6528,0.007,62\n123211,150.996094,2.985506,236.647967,43.287350,1,1.6177,1.6654,0.0205,45.4691,0.020,88\n123244,1.363636,-46.768478,324.669342,-68.371416,1,0.4582,0.3682,0.3963,41.4728,0.008,42\n123437,348.595886,-63.072620,320.023289,-50.713060,1,0.1987,0.2137,0.0164,40.1178,0.021,52\n123493,34.804688,-5.829153,171.307861,-60.174401,1,1.0395,0.9736,0.2348,44.0284,0.023,95\n123743,152.050781,2.985506,237.495952,44.143927,1,0.2016,0.2129,0.0188,40.1084,0.019,90\n123926,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,16\n123927,150.468750,3.732834,235.392208,43.283244,1,0.0000,0.0000,0.0000,nan,0.020,16\n124006,2.457983,-45.389202,324.632685,-69.945696,1,0.3574,0.3496,0.7517,41.3410,0.011,90\n124183,33.574219,-6.579593,170.455585,-61.548219,1,0.0000,0.0000,0.0000,nan,0.021,65\n124188,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,65\n124361,51.855469,-27.953188,223.543603,-55.561470,1,0.3256,0.3420,0.8779,41.2856,0.008,88\n124394,52.207031,-26.610098,221.298836,-55.042928,1,0.9198,1.4761,0.2825,45.1459,0.014,90\n124679,346.276581,-64.011238,320.448031,-49.344136,1,0.3602,0.3409,0.0345,41.2771,0.019,52\n124762,347.861847,-61.943836,321.519104,-51.424048,1,0.1442,2.8043,0.6276,46.8496,0.017,42\n125095,352.711273,-63.823658,316.922299,-51.059403,1,0.0000,0.0000,0.0000,nan,0.024,65\n125242,349.891296,-64.573555,317.972107,-49.786192,1,0.2721,0.5662,0.2621,42.5866,0.023,90\n125258,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,65\n125426,349.429535,-62.508568,320.039643,-51.393745,1,1.1327,1.1093,0.0315,44.3789,0.020,88\n125470,34.277344,-5.079716,169.526841,-59.956640,1,0.5731,0.5848,0.0092,42.6713,0.019,90\n125518,359.805206,-46.768478,327.135979,-67.829903,1,0.0000,0.0000,0.0000,nan,0.011,65\n125743,347.013428,-62.508568,321.472056,-50.735330,1,0.9950,1.0046,0.0133,44.1126,0.018,42\n125762,1.753247,-46.768478,324.030235,-68.498041,1,0.0000,0.0000,0.0000,nan,0.014,65\n126061,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,65\n126084,149.238281,3.882372,234.283829,42.351155,1,0.1250,0.1223,0.4578,38.7846,0.033,90\n126970,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16\n127056,34.453125,-5.229529,169.987075,-59.956185,1,0.1629,0.1385,0.0373,39.0772,0.019,42\n127488,348.908447,-63.823658,319.169886,-50.176186,1,0.0000,0.0000,0.0000,nan,0.018,65\n127773,53.085938,-27.111860,222.384291,-54.355086,1,0.0000,0.0000,0.0000,nan,0.007,65\n127942,348.595886,-63.072620,320.023289,-50.713060,1,0.3899,0.5026,0.2876,42.2748,0.021,90\n127996,346.562500,-63.448284,320.824720,-49.866957,1,0.6993,0.6759,0.0121,43.0539,0.021,95\n128339,0.965665,-46.375080,325.845907,-68.579427,1,0.2192,2.1719,1.1420,46.1760,0.007,67\n128405,352.398651,-62.696659,318.017427,-51.967966,1,0.0000,0.0000,0.0000,nan,0.020,65\n128488,150.468750,1.641510,237.714575,42.075234,1,0.1333,0.1194,0.0257,38.7277,0.017,42\n128518,34.101562,-5.829153,170.247753,-60.638325,1,0.5644,0.5449,0.0420,42.4859,0.019,90\n128564,150.117188,2.238686,236.784618,42.139082,1,0.4541,2.8846,1.1987,46.9235,0.016,90\n128737,52.558594,-27.279613,222.538937,-54.845107,1,0.4204,0.4270,0.0598,41.8523,0.008,90\n128746,152.050781,2.985506,237.495952,44.143927,1,0.3408,0.3671,0.5585,41.4654,0.019,52\n128967,150.644531,3.583322,235.698235,43.342784,1,0.3145,0.3149,0.0153,41.0770,0.018,90\n129179,346.130127,-63.072620,321.423103,-50.042305,1,0.1422,0.1600,0.0245,39.4198,0.020,52\n129490,34.453125,-5.229529,169.987075,-59.956185,1,0.3803,0.4821,0.2384,42.1664,0.019,42\n129503,150.292969,2.686724,236.427488,42.541447,1,0.1939,0.1882,0.0140,39.8092,0.016,52\n129637,33.925781,-5.979157,170.179895,-60.866303,1,0.0000,0.0000,0.0000,nan,0.022,92\n129648,348.529419,-61.755440,321.293980,-51.763351,1,0.3613,2.9518,0.4926,46.9838,0.016,62\n129861,351.321442,-64.198746,317.458993,-50.429931,1,0.5180,0.5215,0.0294,42.3710,0.023,90\n130220,346.276581,-64.011238,320.448031,-49.344136,1,0.5002,0.4635,0.0365,42.0643,0.019,42\n130404,150.820312,3.732834,235.666318,43.572109,1,0.2531,0.2629,0.0072,40.6262,0.016,42\n130502,35.683594,-5.379379,171.992947,-59.253501,1,0.2141,0.1909,0.0935,39.8439,0.020,90\n130625,1.708861,-45.586655,325.688716,-69.520253,1,0.0000,0.0000,0.0000,nan,0.011,16\n130750,359.816315,-44.003082,331.451340,-70.123054,1,0.2536,0.2917,0.7396,40.8850,0.013,90\n131075,52.910156,-25.944481,220.366350,-54.301439,1,0.1709,0.1776,0.0174,39.6689,0.010,42\n131181,34.101562,-5.829153,170.247753,-60.638325,1,0.0000,0.0000,0.0000,nan,0.019,16\n131305,351.382965,-64.011238,317.574052,-50.604657,1,0.2846,0.3113,0.0407,41.0484,0.023,90\n131368,150.996094,2.388015,237.313912,42.939977,1,0.0000,0.0000,0.0000,nan,0.021,65\n131488,0.965665,-46.375080,325.845907,-68.579427,1,0.2691,0.2621,0.0183,40.6187,0.007,90\n131492,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,16\n131629,33.574219,-5.079716,168.448505,-60.407218,1,1.1119,1.0817,0.0329,44.3113,0.016,95\n131814,151.699219,3.583322,236.533224,44.205648,1,0.2823,0.5387,0.2038,42.4557,0.016,90\n131815,32.695312,-4.929937,166.868469,-60.841230,1,0.3521,0.3520,0.0254,41.3584,0.018,90\n132021,359.058563,-45.191612,330.695783,-68.844915,1,0.2525,0.6394,0.1855,42.9069,0.011,90\n132278,359.415588,-46.768478,327.729895,-67.686097,1,0.0000,0.0000,0.0000,nan,0.009,92\n133074,347.617462,-62.508568,321.121462,-50.904708,1,0.2610,0.2672,0.8507,40.6666,0.019,90\n133191,346.276581,-64.011238,320.448031,-49.344136,1,0.4648,0.4423,0.0247,41.9430,0.019,90\n133234,151.523438,3.134927,236.900695,43.803170,1,0.2288,0.2866,0.0891,40.8416,0.019,15\n133354,358.665253,-45.783966,330.353593,-68.203652,1,0.4990,0.5076,0.0086,42.3007,0.009,90\n133513,34.980469,-6.279288,172.180075,-60.389399,1,0.4337,0.4221,0.0715,41.8224,0.023,90\n133773,149.414062,3.433834,234.919132,42.245550,1,0.0000,0.0000,0.0000,nan,0.027,53\n134380,150.996094,2.985506,236.647967,43.287350,1,0.3943,0.3670,0.0425,41.4645,0.020,90\n134824,351.734680,-62.884678,318.284128,-51.651217,1,0.1196,0.1143,0.0103,38.6270,0.019,42\n135054,350.230255,-61.943836,320.053946,-52.070537,1,0.3893,0.4001,0.3373,41.6849,0.017,90\n135067,148.886719,2.686724,235.347248,41.389003,1,0.5285,0.5059,0.6484,42.2917,0.028,90\n135097,151.171875,1.342993,238.602520,42.464379,1,0.2066,0.3032,0.0238,40.9819,0.026,42\n135357,51.855469,-28.630989,224.733260,-55.649872,1,0.0000,0.0000,0.0000,nan,0.009,65\n135588,151.523438,3.134927,236.900695,43.803170,1,0.0000,0.0000,0.0000,nan,0.019,16\n135790,150.117188,2.238686,236.784618,42.139082,1,0.4136,0.3284,0.7703,41.1829,0.016,90\n135813,0.589520,-47.161343,325.385896,-67.769893,1,0.2577,0.2445,0.0139,40.4473,0.009,52\n136110,53.261719,-27.615883,223.280041,-54.281374,1,0.3785,0.4183,0.7565,41.7995,0.006,90\n136352,51.328125,-27.784405,223.130589,-55.999499,1,0.0000,0.0000,0.0000,nan,0.013,65\n136407,33.574219,-4.780192,168.064587,-60.175886,1,0.1110,0.0954,0.0100,38.2070,0.019,42\n136704,150.820312,3.732834,235.666318,43.572109,1,0.2725,1.2302,1.1813,44.6568,0.016,62\n136931,52.558594,-27.279613,222.538937,-54.845107,1,0.3304,0.3783,0.1057,41.5421,0.008,52\n136949,351.321442,-64.198746,317.458993,-50.429931,1,0.0000,0.0000,0.0000,nan,0.023,65\n137510,346.562500,-63.448284,320.824720,-49.866957,1,0.0000,0.0000,0.0000,nan,0.021,65\n137645,51.855469,-26.276812,220.627031,-55.293792,1,0.1590,0.1754,0.0130,39.6390,0.014,42\n138010,52.207031,-28.630989,224.800211,-55.343637,1,0.2411,0.2709,0.0169,40.7009,0.009,42\n138068,150.292969,2.686724,236.427488,42.541447,1,0.2270,2.7496,0.8355,46.7979,0.016,42\n138263,51.328125,-27.784405,223.130589,-55.999499,1,0.4085,0.4242,0.0398,41.8353,0.013,90\n138415,349.429535,-62.508568,320.039643,-51.393745,1,0.2761,0.2500,0.0203,40.5026,0.020,42\n138553,347.812500,-63.448284,320.128971,-50.202348,1,0.3010,0.2608,0.2763,40.6065,0.021,90\n138947,349.160583,-64.760857,318.219706,-49.458924,1,0.5965,0.5575,0.2970,42.5456,0.020,42\n139016,151.523438,3.134927,236.900695,43.803170,1,0.0000,0.0000,0.0000,nan,0.019,16\n139329,151.699219,3.583322,236.533224,44.205648,1,0.5029,0.5454,0.2843,42.4885,0.016,90\n139362,149.238281,3.882372,234.283829,42.351155,1,0.0809,0.0779,0.0097,37.7408,0.033,64\n139405,1.666667,-44.399834,327.519190,-70.529554,1,0.3701,0.4458,0.8403,41.9636,0.009,90\n139637,359.816315,-44.003082,331.451340,-70.123054,1,0.4142,0.4028,0.3993,41.7023,0.013,90\n140096,2.457983,-45.389202,324.632685,-69.945696,1,0.2211,0.2342,0.0540,40.3413,0.011,90\n140472,52.558594,-27.279613,222.538937,-54.845107,1,0.0000,0.0000,0.0000,nan,0.008,65\n140948,1.666667,-44.399834,327.519190,-70.529554,1,0.2048,0.1926,0.0117,39.8645,0.009,62\n141212,53.613281,-28.630989,225.073365,-54.119461,1,0.1256,2.3091,0.5229,46.3382,0.006,90\n141302,150.292969,2.686724,236.427488,42.541447,1,0.4316,0.4620,0.3119,42.0560,0.016,42\n141334,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,65\n141686,53.085938,-28.122234,224.100909,-54.509752,1,0.3431,0.3179,0.2942,41.1008,0.007,67\n141937,151.523438,3.134927,236.900695,43.803170,1,0.4487,0.4658,0.0138,42.0770,0.019,90\n142099,52.207031,-26.610098,221.298836,-55.042928,1,0.0000,0.0000,0.0000,nan,0.014,65\n142254,52.910156,-25.944481,220.366350,-54.301439,1,0.4858,0.5210,0.3055,42.3685,0.010,90\n142368,347.013428,-62.508568,321.472056,-50.735330,1,0.2586,0.0878,0.7050,38.0148,0.018,90\n142866,150.996094,2.388015,237.313912,42.939977,1,0.6327,0.6476,0.0215,42.9408,0.021,42\n142867,349.966217,-62.696659,319.542989,-51.376556,1,0.1548,0.1591,0.0118,39.4056,0.021,42\n142885,52.207031,-28.291550,224.208534,-55.300157,1,0.4057,0.3852,0.0218,41.5882,0.007,90\n143066,0.189873,-45.586655,328.254458,-68.969298,1,0.1937,0.5270,0.1240,42.3983,0.007,42\n143275,2.071130,-45.191612,325.606223,-69.989264,1,0.1114,0.1030,0.0117,38.3830,0.011,42\n143651,34.101562,-5.829153,170.247753,-60.638325,1,0.3590,0.3440,0.0396,41.3003,0.019,90\n143865,33.925781,-5.979157,170.179895,-60.866303,1,0.0000,0.0000,0.0000,nan,0.022,16\n144204,53.085938,-27.111860,222.384291,-54.355086,1,0.4825,0.5378,0.3853,42.4513,0.007,90\n144244,33.222656,-4.780192,167.515653,-60.396584,1,0.2120,0.6014,0.1268,42.7453,0.018,90\n145107,34.980469,-6.279288,172.180075,-60.389399,1,0.0000,0.0000,0.0000,nan,0.023,16\n145160,0.189873,-45.586655,328.254458,-68.969298,1,0.0000,0.0000,0.0000,nan,0.007,65\n145257,52.207031,-28.291550,224.208534,-55.300157,1,0.2570,0.2708,0.2312,40.7001,0.007,90\n145675,51.855469,-27.953188,223.543603,-55.561470,1,0.8655,0.8841,0.0293,43.7698,0.008,90\n145859,34.980469,-6.279288,172.180075,-60.389399,1,0.2213,0.2411,0.0230,40.4126,0.023,90\n145926,152.050781,3.284369,237.157374,44.318466,1,0.2061,0.2269,0.0092,40.2640,0.019,42\n145990,33.222656,-4.780192,167.515653,-60.396584,1,0.4648,0.4366,0.0319,41.9096,0.018,90\n146187,33.398438,-3.732834,166.492280,-59.466614,1,0.0000,0.0000,0.0000,nan,0.022,92\n146410,359.805206,-46.768478,327.135979,-67.829903,1,0.2555,0.4043,0.6020,41.7117,0.011,90\n146429,150.117188,2.238686,236.784618,42.139082,1,0.1260,0.1220,0.0168,38.7782,0.016,62\n147214,51.855469,-27.953188,223.543603,-55.561470,1,0.1677,0.1843,0.0127,39.7584,0.008,90\n147571,0.190678,-45.783966,327.956322,-68.803772,1,0.1885,0.2575,0.1606,40.5748,0.005,90\n147642,52.910156,-26.276812,220.926149,-54.363918,1,0.2453,0.2263,0.0090,40.2576,0.008,62\n147752,151.523438,3.134927,236.900695,43.803170,1,0.2042,0.1823,0.0158,39.7324,0.019,90\n147816,34.277344,-5.079716,169.526841,-59.956640,1,0.0000,0.0000,0.0000,nan,0.019,92\n148204,151.171875,2.238686,237.619933,42.994783,1,0.0000,0.0000,0.0000,nan,0.024,65\n148466,151.171875,2.238686,237.619933,42.994783,1,0.5915,0.5684,0.0143,42.5965,0.024,90\n148535,34.277344,-5.679190,170.314930,-60.410322,1,0.1966,0.1835,0.0107,39.7482,0.020,42\n148543,349.046051,-61.943836,320.796530,-51.753706,1,0.4717,0.5098,0.0652,42.3120,0.017,90\n148976,53.964844,-28.630989,225.142950,-53.813613,1,0.6993,0.6715,0.0353,43.0365,0.009,90\n148996,51.855469,-26.276812,220.627031,-55.293792,1,0.0258,0.0954,0.0390,38.2066,0.014,15\n149129,0.589520,-47.161343,325.385896,-67.769893,1,0.1440,2.5349,0.7043,46.5843,0.009,90\n149130,151.171875,2.537361,237.288526,43.169764,1,0.1925,0.1847,0.0130,39.7633,0.024,90\n149478,2.071130,-45.191612,325.606223,-69.989264,1,0.1744,0.5308,0.1019,42.4174,0.011,42\n149673,33.398438,-4.331149,167.226341,-59.936551,1,0.3411,0.3499,0.0077,41.3436,0.018,90\n150266,349.615387,-63.636005,318.927246,-50.506542,1,0.0000,0.0000,0.0000,nan,0.018,16\n150344,349.966217,-62.696659,319.542989,-51.376556,1,0.0000,0.0000,0.0000,nan,0.021,65\n150561,53.261719,-27.615883,223.280041,-54.281374,1,0.2265,0.2307,0.0085,40.3046,0.006,42\n150765,51.855469,-27.953188,223.543603,-55.561470,1,0.0000,0.0000,0.0000,nan,0.008,65\n150818,53.613281,-26.944359,222.237403,-53.863858,1,0.1401,0.1231,0.0150,38.8002,0.009,90\n150880,351.382965,-64.011238,317.574052,-50.604657,1,0.1168,0.0932,0.0179,38.1533,0.023,42\n151356,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,16\n151427,0.574468,-45.981140,327.041068,-68.778764,1,0.0370,0.0313,0.0117,35.6883,0.006,42\n151458,53.261719,-27.615883,223.280041,-54.281374,1,0.0000,0.0000,0.0000,nan,0.006,65\n151462,33.574219,-4.780192,168.064587,-60.175886,1,0.2901,0.3886,0.4161,41.6102,0.019,42\n151498,359.446716,-44.201530,331.730015,-69.805709,1,0.4319,0.4487,0.5711,41.9804,0.010,90\n151694,54.667969,-27.615883,223.610785,-53.050840,1,0.0000,0.0000,0.0000,nan,0.009,92\n151704,150.292969,2.686724,236.427488,42.541447,1,0.1558,0.1295,0.6463,38.9183,0.016,52\n151973,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,92\n152079,33.574219,-5.379379,168.838090,-60.637536,1,0.3503,0.3429,0.0254,41.2924,0.017,90\n152083,150.468750,3.732834,235.392208,43.283244,1,0.5001,0.5041,0.0165,42.2827,0.020,90\n152300,33.574219,-6.579593,170.455585,-61.548219,1,0.7306,0.6981,0.0380,43.1398,0.021,42\n152425,150.820312,3.732834,235.666318,43.572109,1,0.0000,0.0000,0.0000,nan,0.016,65\n152453,52.910156,-27.953188,223.774083,-54.639214,1,0.2741,0.5064,0.2047,42.2944,0.007,90\n152567,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,16\n152618,33.574219,-5.079716,168.448505,-60.407218,1,0.0000,0.0000,0.0000,nan,0.016,16\n152640,33.574219,-4.780192,168.064587,-60.175886,1,0.0000,0.0000,0.0000,nan,0.019,65\n152682,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,16\n152756,150.996094,4.181528,235.291975,43.970869,1,0.1326,0.1837,0.4894,39.7513,0.015,62\n152787,351.299988,-62.320400,319.038597,-52.026867,1,0.0000,0.0000,0.0000,nan,0.018,65\n152812,32.695312,-4.929937,166.868469,-60.841230,1,2.2378,2.2813,0.0673,46.3061,0.018,88\n153089,35.332031,-5.979157,172.286722,-59.931743,1,0.0000,0.0000,0.0000,nan,0.022,16\n153539,54.667969,-27.615883,223.610785,-53.050840,1,0.3371,0.3157,0.0155,41.0832,0.009,88\n153880,51.679688,-27.447618,222.618229,-55.642263,1,0.3501,0.3152,0.0731,41.0793,0.010,90\n154053,34.101562,-5.829153,170.247753,-60.638325,1,0.1526,0.1479,0.0167,39.2327,0.019,42\n154402,151.171875,2.238686,237.619933,42.994783,1,0.0294,0.0619,0.0172,37.2157,0.024,90\n154631,358.648071,-46.375080,329.462659,-67.716008,1,0.9576,0.8148,0.0688,43.5515,0.009,90\n154648,359.811707,-45.191612,329.485675,-69.150905,1,1.4174,1.4408,0.0548,45.0811,0.010,88\n154762,35.859375,-4.630479,171.270769,-58.580806,1,0.3187,0.2788,1.0862,40.7724,0.022,90\n154986,150.996094,2.985506,236.647967,43.287350,1,0.0000,0.0000,0.0000,nan,0.020,16\n155110,148.886719,2.686724,235.347248,41.389003,1,0.0000,0.0000,0.0000,nan,0.028,65\n155380,150.996094,2.388015,237.313912,42.939977,1,0.2147,0.2288,0.0127,40.2844,0.021,88\n155468,150.996094,4.181528,235.291975,43.970869,1,0.0000,0.0000,0.0000,nan,0.015,65\n155541,151.347656,4.181528,235.568369,44.259942,1,0.5115,0.4788,0.0312,42.1482,0.016,90\n155613,148.710938,2.836105,235.050801,41.328739,1,0.3585,0.3694,0.0090,41.4811,0.031,90\n155778,53.085938,-27.111860,222.384291,-54.355086,1,0.2234,0.5912,0.2372,42.6999,0.007,42\n156386,151.523438,3.134927,236.900695,43.803170,1,1.4308,1.2425,0.1528,44.6834,0.019,88\n156537,352.132874,-63.636005,317.424173,-51.095855,1,2.0260,2.3090,0.0769,46.3380,0.021,88\n156739,351.321442,-64.198746,317.458993,-50.429931,1,0.3405,0.4842,0.8467,42.1775,0.023,90\n157120,51.328125,-27.447618,222.535046,-55.950727,1,0.2126,0.2539,0.5364,40.5402,0.013,42\n157299,34.453125,-5.229529,169.987075,-59.956185,1,0.5504,0.6351,0.0335,42.8890,0.019,42\n157477,359.805206,-46.768478,327.135979,-67.829903,1,0.9586,0.7920,0.0736,43.4756,0.011,95\n157746,149.414062,1.940072,236.565366,41.393323,1,0.2167,0.5603,0.2926,42.5588,0.018,52\n158042,1.666667,-44.399834,327.519190,-70.529554,1,0.1965,0.2093,0.0089,40.0667,0.009,90\n158241,347.013428,-62.508568,321.472056,-50.735330,1,0.0424,0.0344,0.0149,35.8982,0.018,62\n158507,351.299988,-62.320400,319.038597,-52.026867,1,0.6109,0.7017,0.0936,43.1535,0.018,90\n158515,52.910156,-26.276812,220.926149,-54.363918,1,0.3265,0.3196,0.0097,41.1146,0.008,42\n158573,33.574219,-4.780192,168.064587,-60.175886,1,0.2736,0.5337,0.1790,42.4315,0.019,90\n158697,35.332031,-5.979157,172.286722,-59.931743,1,0.3830,0.4049,0.0394,41.7159,0.022,90\n158731,347.846710,-64.760857,318.929827,-49.143596,1,0.0000,0.0000,0.0000,nan,0.019,65\n158813,150.820312,3.134927,236.341348,43.230123,1,0.1966,0.1962,0.0134,39.9093,0.016,62\n158904,2.097458,-45.783966,324.737840,-69.478613,1,1.8591,1.6874,0.1207,45.5041,0.011,88\n159277,359.805206,-46.768478,327.135979,-67.829903,1,0.6452,0.5890,0.0356,42.6903,0.011,90\n159316,151.699219,3.583322,236.533224,44.205648,1,0.0000,0.0000,0.0000,nan,0.016,65\n159491,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,16\n159665,33.574219,-4.780192,168.064587,-60.175886,1,0.3253,0.3336,0.0122,41.2221,0.019,90\n159925,150.820312,3.134927,236.341348,43.230123,1,1.6095,1.6763,0.0691,45.4866,0.016,88\n160048,51.855469,-28.630989,224.733260,-55.649872,1,0.3638,0.4409,0.3427,41.9350,0.009,90\n160426,351.321442,-64.198746,317.458993,-50.429931,1,0.3511,0.3353,0.0637,41.2356,0.023,90\n160527,2.097458,-45.783966,324.737840,-69.478613,1,0.3162,0.2266,0.6723,40.2609,0.011,90\n160737,52.207031,-28.291550,224.208534,-55.300157,1,0.3593,0.5374,0.2042,42.4496,0.007,88\n160921,349.160583,-64.760857,318.219706,-49.458924,1,0.0000,0.0000,0.0000,nan,0.020,65\n161135,52.207031,-28.291550,224.208534,-55.300157,1,0.4293,0.4326,0.0209,41.8860,0.007,90\n161411,150.468750,3.732834,235.392208,43.283244,1,0.0000,0.0000,0.0000,nan,0.020,65\n161432,149.414062,2.238686,236.239766,41.565558,1,0.0000,0.0000,0.0000,nan,0.017,16\n161521,346.130127,-63.072620,321.423103,-50.042305,1,0.1147,0.1640,0.9394,39.4785,0.020,90\n161591,150.996094,4.181528,235.291975,43.970869,1,0.0761,0.1416,0.0294,39.1295,0.015,62\n161877,150.468750,1.641510,237.714575,42.075234,1,0.0000,0.0000,0.0000,nan,0.017,65\n161988,51.679688,-27.447618,222.618229,-55.642263,1,0.5641,0.5875,0.0180,42.6834,0.010,90\n162093,51.855469,-28.630989,224.733260,-55.649872,1,0.1580,0.1822,0.0192,39.7311,0.009,62\n162139,150.820312,3.134927,236.341348,43.230123,1,0.1036,0.0926,0.0114,38.1367,0.016,42\n162152,348.529419,-61.755440,321.293980,-51.763351,1,0.0000,0.0000,0.0000,nan,0.016,16\n162531,347.861847,-61.943836,321.519104,-51.424048,1,0.5290,0.4535,0.0438,42.0076,0.017,90\n162538,150.996094,4.181528,235.291975,43.970869,1,0.2795,0.5154,0.2561,42.3402,0.015,90\n162994,53.085938,-28.122234,224.100909,-54.509752,1,0.5827,0.5520,0.0825,42.5199,0.007,90\n163208,33.398438,-4.331149,167.226341,-59.936551,1,0.1546,0.1227,0.0022,38.7928,0.018,42\n163680,0.574468,-45.981140,327.041068,-68.778764,1,0.3407,0.3516,0.0201,41.3557,0.006,90\n163894,52.207031,-28.630989,224.800211,-55.343637,1,0.3027,0.3162,0.0110,41.0874,0.009,88\n164582,32.871094,-4.780192,166.959493,-60.615132,1,0.6341,0.6230,0.0125,42.8383,0.017,90\n164805,51.679688,-27.447618,222.618229,-55.642263,1,0.0000,0.0000,0.0000,nan,0.010,92\n165406,351.321442,-64.198746,317.458993,-50.429931,1,0.0000,0.0000,0.0000,nan,0.023,65\n165494,349.615387,-63.636005,318.927246,-50.506542,1,0.2616,0.2665,0.0118,40.6604,0.018,90\n165507,151.171875,1.342993,238.602520,42.464379,1,0.2323,0.2251,0.0187,40.2445,0.026,67\n165821,149.414062,2.238686,236.239766,41.565558,1,0.2458,1.0072,0.4491,44.1195,0.017,42\n165985,51.679688,-27.447618,222.618229,-55.642263,1,0.4551,0.4933,0.2305,42.2259,0.010,88\n166103,0.589520,-47.161343,325.385896,-67.769893,1,0.2871,0.2079,0.4627,40.0507,0.009,90\n166165,150.117188,2.836105,236.124718,42.483719,1,0.0000,0.0000,0.0000,nan,0.016,16\n166186,51.679688,-27.447618,222.618229,-55.642263,1,0.5156,0.5329,0.0107,42.4278,0.010,42\n166195,149.238281,3.882372,234.283829,42.351155,1,0.0000,0.0000,0.0000,nan,0.033,16\n166330,149.414062,1.940072,236.565366,41.393323,1,0.0000,0.0000,0.0000,nan,0.018,65\n166697,53.613281,-28.630989,225.073365,-54.119461,1,1.1664,1.1160,0.0368,44.3951,0.006,95\n166727,34.453125,-5.229529,169.987075,-59.956185,1,0.2659,0.2946,0.0220,40.9098,0.019,62\n166956,359.811707,-45.191612,329.485675,-69.150905,1,0.0000,0.0000,0.0000,nan,0.010,65\n167123,149.414062,2.238686,236.239766,41.565558,1,0.0000,0.0000,0.0000,nan,0.017,65\n167220,359.446716,-44.201530,331.730015,-69.805709,1,0.7136,0.7235,0.0416,43.2348,0.010,90\n167260,2.071130,-45.191612,325.606223,-69.989264,1,0.3449,0.3554,0.0193,41.3826,0.011,62\n167310,349.615387,-63.636005,318.927246,-50.506542,1,0.3079,0.3246,0.0181,41.1538,0.018,42\n167417,349.046051,-61.943836,320.796530,-51.753706,1,0.5774,0.5558,0.0410,42.5377,0.017,90\n167436,350.230255,-61.943836,320.053946,-52.070537,1,0.1918,0.2263,0.0138,40.2576,0.017,90\n167488,348.586945,-64.573555,318.693903,-49.477869,1,0.3928,0.3939,0.0171,41.6449,0.018,90\n167910,348.908447,-63.823658,319.169886,-50.176186,1,0.4761,0.4646,0.0434,42.0701,0.018,90\n168146,51.855469,-26.276812,220.627031,-55.293792,1,0.0000,0.0000,0.0000,nan,0.014,65\n168465,149.414062,1.940072,236.565366,41.393323,1,2.1492,2.4337,0.1988,46.4769,0.018,95\n168659,53.613281,-27.953188,223.929533,-54.024772,1,0.2663,0.3135,0.2502,41.0656,0.007,67\n168952,358.312500,-44.993881,332.185785,-68.685906,1,0.0000,0.0000,0.0000,nan,0.009,65\n168957,53.085938,-27.784405,223.525509,-54.460748,1,0.0000,0.0000,0.0000,nan,0.007,65\n168967,347.812500,-63.448284,320.128971,-50.202348,1,0.1845,0.2142,0.1608,40.1234,0.021,90\n168989,151.171875,1.342993,238.602520,42.464379,1,0.0000,0.0000,0.0000,nan,0.026,16\n169133,347.013428,-62.508568,321.472056,-50.735330,1,0.4656,0.4799,0.2303,42.1546,0.018,90\n169203,347.861847,-61.943836,321.519104,-51.424048,1,0.1833,0.1791,0.1849,39.6900,0.017,90\n169282,149.414062,3.433834,234.919132,42.245550,1,0.3181,0.3458,0.3165,41.3133,0.027,90\n"
  },
  {
    "path": "examples/docker/modin-ray/Dockerfile",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n# Build image from this dockerfile like this:\n# docker build -t modin-ray:latest .\n\nFROM ubuntu:20.04\n\n# Proxy settings\nENV http_proxy=${http_proxy}\nENV https_proxy=${https_proxy}\nENV no_proxy=${no_proxy}\n\nRUN apt-get update --yes \\\n    && apt-get install wget --yes \\\n    && rm -rf /var/lib/apt/lists/*\n\nENV USER modin\nENV UID 1000\nENV HOME /home/$USER\n\nRUN adduser --disabled-password \\\n    --gecos \"Non-root user\" \\\n    --uid $UID \\\n    --home $HOME \\\n    $USER\n\n# Conda settings\nENV CONDA_DIR=${HOME}/miniconda\nENV CONDA_ENV_NAME=modin-ray\nENV PATH=\"${CONDA_DIR}/bin:${PATH}\"\n\nRUN wget -nv https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda3.sh \\\n    && bash /tmp/miniconda3.sh -b -p \"${CONDA_DIR}\" -f -u \\\n    && \"${CONDA_DIR}/bin/conda\" init bash \\\n    && rm -f /tmp/miniconda3.sh\n\nRUN conda update -n base -c defaults conda -y \\\n    && conda create -n ${CONDA_ENV_NAME} --yes -c conda-forge --strict-channel-priority \\\n        modin-ray \\\n        ray-dashboard \\\n        scikit-learn \\\n        scikit-learn-intelex \\\n        xgboost \\\n    && conda clean --all --yes\n\n# Activate ${CONDA_ENV_NAME} for interactive shells\nRUN echo \"source ${CONDA_DIR}/bin/activate ${CONDA_ENV_NAME}\" >> \"${HOME}/.bashrc\"\n# Activate ${CONDA_ENV_NAME} for non-interactive shells\n# The following line comments out line that prevents ~/.bashrc execution in\n# non-interactive mode.\nRUN sed -e 's,\\(^[[:space:]]\\+[*]) return;;$\\),# \\1,' -i \"${HOME}/.bashrc\"\nENV BASH_ENV=\"${HOME}/.bashrc\"\n\n# Set up benchmark scripts\nCOPY nyc-taxi.py \"${HOME}\"\nCOPY census.py \"${HOME}\"\nCOPY plasticc.py \"${HOME}\"\nRUN mkdir /dataset\nWORKDIR ${HOME}\n\n# Clean up proxy settings to publish on Docker Hub\nENV http_proxy=\nENV https_proxy=\nENV no_proxy=\n\n# Set entrypoint with arguments expansion\nENTRYPOINT [\"/bin/bash\", \"-c\", \"exec $0 $*\"]\n"
  },
  {
    "path": "examples/docker/modin-ray/build-docker-image.sh",
    "content": "#!/bin/bash -e\n\n# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\ncd \"`dirname \\\"$0\\\"`\"\n\ndocker build -t modin-ray .\n\necho -e '\\nNYC TAXI BENCHMARK\nUser is responsible for preparing the dataset.\nIt Can be generated by following the instructions on the link:\nhttps://github.com/toddwschneider/nyc-taxi-data#instructions\nTo run the benchmark execute:\n\\tdocker run --rm -v /path/to/dataset:/dataset modin-ray python nyc-taxi.py <name of file starting with /dataset>\n\nCENSUS BENCHMARK\nUser is responsible for preparing the dataset.\nIt can be downloaded from the following link:\nhttps://rapidsai-data.s3.us-east-2.amazonaws.com/datasets/ipums_education2income_1970-2010.csv.gz\nTo run the benchmark execute:\n\\tdocker run --rm -v /path/to/dataset:/dataset modin-ray python census.py <name of file starting with /dataset>\n\nPLASTICC BENCHMARK\nUser is responsible for preparing the datasets.\nThe datasets must include four files: training set, test set,\ntraining set metadata and test set metadata.\nTo run the benchmark execute:\n\\tdocker run --rm -v /path/to/dataset:/dataset modin-ray python plasticc.py <training set file name starting with /dataset> <test set file name starting with /dataset> <training set metadata file name starting with /dataset> <test set metadata file name starting with /dataset>\\n'\n"
  },
  {
    "path": "examples/docker/modin-ray/census.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport sys\nimport time\n\nimport sklearnex\nfrom sklearn import config_context\n\nimport modin.pandas as pd\n\nsklearnex.patch_sklearn()\nimport numpy as np\nimport sklearn.linear_model as lm\nfrom sklearn.model_selection import train_test_split\n\n\ndef read(filename):\n    columns_names = [\n        \"YEAR0\",\n        \"DATANUM\",\n        \"SERIAL\",\n        \"CBSERIAL\",\n        \"HHWT\",\n        \"CPI99\",\n        \"GQ\",\n        \"QGQ\",\n        \"PERNUM\",\n        \"PERWT\",\n        \"SEX\",\n        \"AGE\",\n        \"EDUC\",\n        \"EDUCD\",\n        \"INCTOT\",\n        \"SEX_HEAD\",\n        \"SEX_MOM\",\n        \"SEX_POP\",\n        \"SEX_SP\",\n        \"SEX_MOM2\",\n        \"SEX_POP2\",\n        \"AGE_HEAD\",\n        \"AGE_MOM\",\n        \"AGE_POP\",\n        \"AGE_SP\",\n        \"AGE_MOM2\",\n        \"AGE_POP2\",\n        \"EDUC_HEAD\",\n        \"EDUC_MOM\",\n        \"EDUC_POP\",\n        \"EDUC_SP\",\n        \"EDUC_MOM2\",\n        \"EDUC_POP2\",\n        \"EDUCD_HEAD\",\n        \"EDUCD_MOM\",\n        \"EDUCD_POP\",\n        \"EDUCD_SP\",\n        \"EDUCD_MOM2\",\n        \"EDUCD_POP2\",\n        \"INCTOT_HEAD\",\n        \"INCTOT_MOM\",\n        \"INCTOT_POP\",\n        \"INCTOT_SP\",\n        \"INCTOT_MOM2\",\n        \"INCTOT_POP2\",\n    ]\n    columns_types = [\n        \"int64\",\n        \"int64\",\n        \"int64\",\n        \"float64\",\n        \"int64\",\n        \"float64\",\n        \"int64\",\n        \"float64\",\n        \"int64\",\n        \"int64\",\n        \"int64\",\n        \"int64\",\n        \"int64\",\n        \"int64\",\n        \"int64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n        \"float64\",\n    ]\n    dtypes = {columns_names[i]: columns_types[i] for i in range(len(columns_names))}\n\n    df = pd.read_csv(\n        filename,\n        names=columns_names,\n        dtype=dtypes,\n        skiprows=1,\n    )\n\n    return df\n\n\ndef etl(df):\n    keep_cols = [\n        \"YEAR0\",\n        \"DATANUM\",\n        \"SERIAL\",\n        \"CBSERIAL\",\n        \"HHWT\",\n        \"CPI99\",\n        \"GQ\",\n        \"PERNUM\",\n        \"SEX\",\n        \"AGE\",\n        \"INCTOT\",\n        \"EDUC\",\n        \"EDUCD\",\n        \"EDUC_HEAD\",\n        \"EDUC_POP\",\n        \"EDUC_MOM\",\n        \"EDUCD_MOM2\",\n        \"EDUCD_POP2\",\n        \"INCTOT_MOM\",\n        \"INCTOT_POP\",\n        \"INCTOT_MOM2\",\n        \"INCTOT_POP2\",\n        \"INCTOT_HEAD\",\n        \"SEX_HEAD\",\n    ]\n    df = df[keep_cols]\n\n    df = df[df[\"INCTOT\"] != 9999999]\n    df = df[df[\"EDUC\"] != -1]\n    df = df[df[\"EDUCD\"] != -1]\n\n    df[\"INCTOT\"] = df[\"INCTOT\"] * df[\"CPI99\"]\n\n    for column in keep_cols:\n        df[column] = df[column].fillna(-1)\n\n        df[column] = df[column].astype(\"float64\")\n\n    y = df[\"EDUC\"]\n    X = df.drop(columns=[\"EDUC\", \"CPI99\"])\n\n    return (df, X, y)\n\n\ndef mse(y_test, y_pred):\n    return ((y_test - y_pred) ** 2).mean()\n\n\ndef cod(y_test, y_pred):\n    y_bar = y_test.mean()\n    total = ((y_test - y_bar) ** 2).sum()\n    residuals = ((y_test - y_pred) ** 2).sum()\n    return 1 - (residuals / total)\n\n\ndef ml(X, y, random_state, n_runs, test_size):\n    clf = lm.Ridge()\n\n    X = np.ascontiguousarray(X, dtype=np.float64)\n    y = np.ascontiguousarray(y, dtype=np.float64)\n\n    mse_values, cod_values = [], []\n    ml_scores = {}\n\n    print(\"ML runs: \", n_runs)\n    for i in range(n_runs):\n        (X_train, X_test, y_train, y_test) = train_test_split(\n            X, y, test_size=test_size, random_state=random_state\n        )\n        random_state += 777\n\n        with config_context(assume_finite=True):\n            model = clf.fit(X_train, y_train)\n\n        y_pred = model.predict(X_test)\n\n        mse_values.append(mse(y_test, y_pred))\n        cod_values.append(cod(y_test, y_pred))\n\n    ml_scores[\"mse_mean\"] = sum(mse_values) / len(mse_values)\n    ml_scores[\"cod_mean\"] = sum(cod_values) / len(cod_values)\n    ml_scores[\"mse_dev\"] = pow(\n        sum([(mse_value - ml_scores[\"mse_mean\"]) ** 2 for mse_value in mse_values])\n        / (len(mse_values) - 1),\n        0.5,\n    )\n    ml_scores[\"cod_dev\"] = pow(\n        sum([(cod_value - ml_scores[\"cod_mean\"]) ** 2 for cod_value in cod_values])\n        / (len(cod_values) - 1),\n        0.5,\n    )\n\n    return ml_scores\n\n\ndef measure(name, func, *args, **kw):\n    t0 = time.time()\n    res = func(*args, **kw)\n    t1 = time.time()\n    print(f\"{name}: {t1 - t0} sec\")\n    return res\n\n\ndef main():\n    if len(sys.argv) != 2:\n        print(\n            f\"USAGE: docker run --rm -v /path/to/dataset:/dataset python census.py <data file name starting with /dataset>\"\n        )\n        return\n    # ML specific\n    N_RUNS = 50\n    TEST_SIZE = 0.1\n    RANDOM_STATE = 777\n\n    df = measure(\"Reading\", read, sys.argv[1])\n    _, X, y = measure(\"ETL\", etl, df)\n    measure(\n        \"ML\", ml, X, y, random_state=RANDOM_STATE, n_runs=N_RUNS, test_size=TEST_SIZE\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "examples/docker/modin-ray/nyc-taxi.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport sys\nimport time\n\nimport modin.pandas as pd\n\n\ndef read(filename):\n    columns_names = [\n        \"trip_id\",\n        \"vendor_id\",\n        \"pickup_datetime\",\n        \"dropoff_datetime\",\n        \"store_and_fwd_flag\",\n        \"rate_code_id\",\n        \"pickup_longitude\",\n        \"pickup_latitude\",\n        \"dropoff_longitude\",\n        \"dropoff_latitude\",\n        \"passenger_count\",\n        \"trip_distance\",\n        \"fare_amount\",\n        \"extra\",\n        \"mta_tax\",\n        \"tip_amount\",\n        \"tolls_amount\",\n        \"ehail_fee\",\n        \"improvement_surcharge\",\n        \"total_amount\",\n        \"payment_type\",\n        \"trip_type\",\n        \"pickup\",\n        \"dropoff\",\n        \"cab_type\",\n        \"precipitation\",\n        \"snow_depth\",\n        \"snowfall\",\n        \"max_temperature\",\n        \"min_temperature\",\n        \"average_wind_speed\",\n        \"pickup_nyct2010_gid\",\n        \"pickup_ctlabel\",\n        \"pickup_borocode\",\n        \"pickup_boroname\",\n        \"pickup_ct2010\",\n        \"pickup_boroct2010\",\n        \"pickup_cdeligibil\",\n        \"pickup_ntacode\",\n        \"pickup_ntaname\",\n        \"pickup_puma\",\n        \"dropoff_nyct2010_gid\",\n        \"dropoff_ctlabel\",\n        \"dropoff_borocode\",\n        \"dropoff_boroname\",\n        \"dropoff_ct2010\",\n        \"dropoff_boroct2010\",\n        \"dropoff_cdeligibil\",\n        \"dropoff_ntacode\",\n        \"dropoff_ntaname\",\n        \"dropoff_puma\",\n    ]\n    parse_dates = [\"pickup_datetime\", \"dropoff_datetime\"]\n    return pd.read_csv(\n        filename, names=columns_names, header=None, parse_dates=parse_dates\n    )\n\n\ndef q1(df):\n    return df.groupby(\"cab_type\")[\"cab_type\"].count()\n\n\ndef q2(df):\n    return df.groupby(\"passenger_count\", as_index=False).mean()[\n        [\"passenger_count\", \"total_amount\"]\n    ]\n\n\ndef q3(df):\n    transformed = pd.DataFrame(\n        {\n            \"pickup_datetime\": df[\"pickup_datetime\"].dt.year,\n            \"passenger_count\": df[\"passenger_count\"],\n        }\n    )\n    return transformed.groupby(\n        [\"pickup_datetime\", \"passenger_count\"], as_index=False\n    ).size()\n\n\ndef q4(df):\n    transformed = pd.DataFrame(\n        {\n            \"passenger_count\": df[\"passenger_count\"],\n            \"pickup_datetime\": df[\"pickup_datetime\"].dt.year,\n            \"trip_distance\": df[\"trip_distance\"].astype(\"int64\"),\n        }\n    )\n    return (\n        transformed.groupby(\n            [\"passenger_count\", \"pickup_datetime\", \"trip_distance\"], as_index=False\n        )\n        .size()\n        .sort_values(by=[\"pickup_datetime\", \"size\"], ascending=[True, False])\n    )\n\n\ndef measure(name, func, *args, **kw):\n    t0 = time.time()\n    res = func(*args, **kw)\n    t1 = time.time()\n    print(f\"{name}: {t1 - t0} sec\")\n    return res\n\n\ndef main():\n    if len(sys.argv) != 2:\n        print(\n            f\"USAGE: docker run --rm -v /path/to/dataset:/dataset python nyc-taxi.py <data file name starting with /dataset>\"\n        )\n        return\n    df = measure(\"Reading\", read, sys.argv[1])\n    measure(\"Q1\", q1, df)\n    measure(\"Q2\", q2, df)\n    measure(\"Q3\", q3, df)\n    measure(\"Q4\", q4, df)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "examples/docker/modin-ray/plasticc.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport sys\nimport time\nfrom functools import partial\n\nimport numpy as np\nimport sklearnex\nimport xgboost as xgb\n\nimport modin.pandas as pd\n\nsklearnex.patch_sklearn()\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\n\n\n################ helper functions ###############################\ndef create_dtypes():\n    dtypes = dict(\n        [\n            (\"object_id\", \"int32\"),\n            (\"mjd\", \"float32\"),\n            (\"passband\", \"int32\"),\n            (\"flux\", \"float32\"),\n            (\"flux_err\", \"float32\"),\n            (\"detected\", \"int32\"),\n        ]\n    )\n\n    # load metadata\n    columns_names = [\n        \"object_id\",\n        \"ra\",\n        \"decl\",\n        \"gal_l\",\n        \"gal_b\",\n        \"ddf\",\n        \"hostgal_specz\",\n        \"hostgal_photoz\",\n        \"hostgal_photoz_err\",\n        \"distmod\",\n        \"mwebv\",\n        \"target\",\n    ]\n    meta_dtypes = [\"int32\"] + [\"float32\"] * 4 + [\"int32\"] + [\"float32\"] * 5 + [\"int32\"]\n    meta_dtypes = dict(\n        [(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))]\n    )\n    return dtypes, meta_dtypes\n\n\ndef ravel_column_names(cols):\n    d0 = cols.get_level_values(0)\n    d1 = cols.get_level_values(1)\n    return [\"%s_%s\" % (i, j) for i, j in zip(d0, d1)]\n\n\ndef measure(name, func, *args, **kw):\n    t0 = time.time()\n    res = func(*args, **kw)\n    t1 = time.time()\n    print(f\"{name}: {t1 - t0} sec\")\n    return res\n\n\ndef all_etl(train, train_meta, test, test_meta):\n    train_final = etl(train, train_meta)\n    test_final = etl(test, test_meta)\n    return (train_final, test_final)\n\n\ndef split_step(train_final, test_final):\n    X = train_final.drop([\"object_id\", \"target\"], axis=1).values\n    Xt = test_final.drop([\"object_id\"], axis=1).values\n\n    y = train_final[\"target\"]\n    assert X.shape[1] == Xt.shape[1]\n    classes = sorted(y.unique())\n\n    class_weights = {c: 1 for c in classes}\n    class_weights.update({c: 2 for c in [64, 15]})\n\n    lbl = LabelEncoder()\n    y = lbl.fit_transform(y)\n\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=0.1, stratify=y, random_state=126\n    )\n\n    return X_train, y_train, X_test, y_test, Xt, classes, class_weights\n\n\ndef multi_weighted_logloss(y_true, y_preds, classes, class_weights):\n    \"\"\"\n    refactor from\n    @author olivier https://www.kaggle.com/ogrellier\n    multi logloss for PLAsTiCC challenge\n    \"\"\"\n    y_p = y_preds.reshape(y_true.shape[0], len(classes), order=\"F\")\n    y_ohe = pd.get_dummies(y_true)\n    y_p = np.clip(a=y_p, a_min=1e-15, a_max=1 - 1e-15)\n    y_p_log = np.log(y_p)\n    y_log_ones = np.sum(y_ohe.values * y_p_log, axis=0)\n    nb_pos = y_ohe.sum(axis=0).values.astype(float)\n    class_arr = np.array([class_weights[k] for k in sorted(class_weights.keys())])\n    y_w = y_log_ones * class_arr / nb_pos\n\n    loss = -np.sum(y_w) / np.sum(class_arr)\n    return loss\n\n\ndef xgb_multi_weighted_logloss(y_predicted, y_true, classes, class_weights):\n    loss = multi_weighted_logloss(\n        y_true.get_label(), y_predicted, classes, class_weights\n    )\n    return \"wloss\", loss\n\n\n################ helper functions ###############################\n\n\ndef read(\n    training_set_filename,\n    test_set_filename,\n    training_set_metadata_filename,\n    test_set_metadata_filename,\n    dtypes,\n    meta_dtypes,\n):\n    train = pd.read_csv(training_set_filename, dtype=dtypes)\n    test = pd.read_csv(\n        test_set_filename,\n        names=list(dtypes.keys()),\n        dtype=dtypes,\n        header=0,\n    )\n\n    train_meta = pd.read_csv(training_set_metadata_filename, dtype=meta_dtypes)\n    target = meta_dtypes.pop(\"target\")\n    test_meta = pd.read_csv(test_set_metadata_filename, dtype=meta_dtypes)\n    meta_dtypes[\"target\"] = target\n\n    dfs = (train, train_meta, test, test_meta)\n    return dfs\n\n\ndef etl(df, df_meta):\n    # workaround for Modin_on_ray. Eventually this should be fixed\n    df[\"flux_ratio_sq\"] = (df[\"flux\"] / df[\"flux_err\"]) * (\n        df[\"flux\"] / df[\"flux_err\"]\n    )  # np.power(df[\"flux\"] / df[\"flux_err\"], 2.0)\n    df[\"flux_by_flux_ratio_sq\"] = df[\"flux\"] * df[\"flux_ratio_sq\"]\n\n    aggs = {\n        \"passband\": [\"mean\"],\n        \"flux\": [\"min\", \"max\", \"mean\", \"skew\"],\n        \"flux_err\": [\"min\", \"max\", \"mean\"],\n        \"detected\": [\"mean\"],\n        \"mjd\": [\"max\", \"min\"],\n        \"flux_ratio_sq\": [\"sum\"],\n        \"flux_by_flux_ratio_sq\": [\"sum\"],\n    }\n    agg_df = df.groupby(\"object_id\", sort=False).agg(aggs)\n\n    agg_df.columns = ravel_column_names(agg_df.columns)\n\n    agg_df[\"flux_diff\"] = agg_df[\"flux_max\"] - agg_df[\"flux_min\"]\n    agg_df[\"flux_dif2\"] = agg_df[\"flux_diff\"] / agg_df[\"flux_mean\"]\n    agg_df[\"flux_w_mean\"] = (\n        agg_df[\"flux_by_flux_ratio_sq_sum\"] / agg_df[\"flux_ratio_sq_sum\"]\n    )\n    agg_df[\"flux_dif3\"] = agg_df[\"flux_diff\"] / agg_df[\"flux_w_mean\"]\n    agg_df[\"mjd_diff\"] = agg_df[\"mjd_max\"] - agg_df[\"mjd_min\"]\n\n    agg_df = agg_df.drop([\"mjd_max\", \"mjd_min\"], axis=1)\n\n    agg_df = agg_df.reset_index()\n\n    df_meta = df_meta.drop([\"ra\", \"decl\", \"gal_l\", \"gal_b\"], axis=1)\n\n    df_meta = df_meta.merge(agg_df, on=\"object_id\", how=\"left\")\n\n    return df_meta\n\n\ndef ml(train_final, test_final):\n    X_train, y_train, X_test, y_test, Xt, classes, class_weights = split_step(\n        train_final, test_final\n    )\n\n    cpu_params = {\n        \"objective\": \"multi:softprob\",\n        \"eval_metric\": \"merror\",\n        \"tree_method\": \"hist\",\n        \"nthread\": 16,\n        \"num_class\": 14,\n        \"max_depth\": 7,\n        \"verbosity\": 1,\n        \"subsample\": 0.7,\n        \"colsample_bytree\": 0.7,\n    }\n\n    func_loss = partial(\n        xgb_multi_weighted_logloss, classes=classes, class_weights=class_weights\n    )\n\n    dtrain = xgb.DMatrix(data=X_train, label=y_train)\n    dvalid = xgb.DMatrix(data=X_test, label=y_test)\n    dtest = xgb.DMatrix(data=Xt)\n\n    watchlist = [(dvalid, \"eval\"), (dtrain, \"train\")]\n\n    clf = xgb.train(\n        cpu_params,\n        dtrain=dtrain,\n        num_boost_round=60,\n        evals=watchlist,\n        feval=func_loss,\n        early_stopping_rounds=10,\n        verbose_eval=None,\n    )\n\n    yp = clf.predict(dvalid)\n    cpu_loss = multi_weighted_logloss(y_test, yp, classes, class_weights)\n    ysub = clf.predict(dtest)  # noqa: F841 (unused variable)\n\n    return cpu_loss\n\n\ndef main():\n    if len(sys.argv) != 5:\n        print(\n            f\"USAGE: docker run --rm -v /path/to/dataset:/dataset python plasticc.py <training set file name startin with /dataset> <test set file name starting with /dataset> <training set metadata file name starting with /dataset> <test set metadata file name starting with /dataset>\"\n        )\n        return\n\n    dtypes, meta_dtypes = create_dtypes()\n\n    train, train_meta, test, test_meta = measure(\n        \"Reading\",\n        read,\n        sys.argv[1],\n        sys.argv[2],\n        sys.argv[3],\n        sys.argv[4],\n        dtypes,\n        meta_dtypes,\n    )\n    train_final, test_final = measure(\n        \"ETL\", all_etl, train, train_meta, test, test_meta\n    )\n    cpu_loss = measure(\"ML\", ml, train_final, test_final)\n\n    print(\"validation cpu_loss:\", cpu_loss)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "examples/jupyter/Modin_Taxi.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"id\": \"cc4bd9e9\",\n   \"metadata\": {\n    \"slideshow\": {\n     \"slide_type\": \"skip\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# To run this notebook as done in the README GIFs, you must first locally download the 2015 NYC Taxi Trip Data.\\n\",\n    \"import urllib.request\\n\",\n    \"url_path = \\\"https://modin-datasets.intel.com/green-taxi/green_tripdata_2015-01.csv\\\"\\n\",\n    \"urllib.request.urlretrieve(url_path, \\\"taxi.csv\\\")\\n\",\n    \"\\n\",\n    \"from modin.config import Engine\\n\",\n    \"Engine.put(\\\"dask\\\")\\n\",\n    \"from dask.distributed import Client\\n\",\n    \"client = Client(n_workers=12)\\n\",\n    \"\\n\",\n    \"from modin.config import BenchmarkMode\\n\",\n    \"BenchmarkMode.put(True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"id\": \"97b245e5\",\n   \"metadata\": {\n    \"slideshow\": {\n     \"slide_type\": \"slide\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"id\": \"b65b121c\",\n   \"metadata\": {\n    \"slideshow\": {\n     \"slide_type\": \"slide\"\n    }\n   },\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"CPU times: user 1.57 s, sys: 683 ms, total: 2.26 s\\n\",\n      \"Wall time: 14.2 s\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%time df = pd.read_csv(\\\"taxi.csv\\\", parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"id\": \"c48193b2\",\n   \"metadata\": {\n    \"slideshow\": {\n     \"slide_type\": \"slide\"\n    }\n   },\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"CPU times: user 138 ms, sys: 27.3 ms, total: 166 ms\\n\",\n      \"Wall time: 404 ms\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%time isnull = df.isnull()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"id\": \"1d32ed7c\",\n   \"metadata\": {\n    \"slideshow\": {\n     \"slide_type\": \"slide\"\n    }\n   },\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"CPU times: user 175 ms, sys: 28.4 ms, total: 203 ms\\n\",\n      \"Wall time: 663 ms\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%time rounded_trip_distance = df[[\\\"pickup_longitude\\\"]].applymap(round)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"3ef271dc\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.11\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "examples/jupyter/Pandas_Taxi.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"id\": \"5d674ce8\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# To run this notebook as done in the README GIFs, you must first locally download the 2015 NYC Taxi Trip Data.\\n\",\n    \"import urllib.request\\n\",\n    \"url_path = \\\"https://modin-datasets.intel.com/green-taxi/green_tripdata_2015-01.csv\\\"\\n\",\n    \"urllib.request.urlretrieve(url_path, \\\"taxi.csv\\\")\\n\",\n    \"\\n\",\n    \"import warnings\\n\",\n    \"warnings.filterwarnings(\\\"ignore\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"id\": \"27f7321c\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import pandas as pd\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"id\": \"8de98215\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"CPU times: user 30.7 s, sys: 4.25 s, total: 35 s\\n\",\n      \"Wall time: 35.3 s\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%time df = pd.read_csv(\\\"taxi.csv\\\", parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"id\": \"14422c3f\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"CPU times: user 1.37 s, sys: 300 ms, total: 1.67 s\\n\",\n      \"Wall time: 1.67 s\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%time isnull = df.isnull()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"id\": \"f8f87974\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"CPU times: user 3.07 s, sys: 305 ms, total: 3.37 s\\n\",\n      \"Wall time: 3.37 s\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%time rounded_trip_distance = df[[\\\"pickup_longitude\\\"]].applymap(round)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"2c7d62bf\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.11\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/NLTK.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating NLTK Modin Interoperability\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## All the examples in this section are taken / adapted from https://www.kirenz.com/post/2021-12-11-text-mining-and-sentiment-analysis-with-nltk-and-pandas-in-python/text-mining-and-sentiment-analysis-with-nltk-and-pandas-in-python/\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import nltk\\n\",\n    \"from nltk.tokenize import RegexpTokenizer\\n\",\n    \"from nltk.corpus import stopwords\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Import some Tweets from Barack Obama \\n\",\n    \"modin_df = pd.read_csv(\\\"https://raw.githubusercontent.com/kirenz/twitter-tweepy/main/tweets-obama.csv\\\")\\n\",\n    \"modin_df.head(3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df['text'] = modin_df['text'].astype(str).str.lower()\\n\",\n    \"modin_df.head(3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"regexp = RegexpTokenizer('\\\\w+')\\n\",\n    \"\\n\",\n    \"modin_df['text_token']=modin_df['text'].apply(regexp.tokenize)\\n\",\n    \"modin_df.head(3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"nltk.download('stopwords')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Make a list of english stopwords\\n\",\n    \"stopwords = nltk.corpus.stopwords.words(\\\"english\\\")\\n\",\n    \"\\n\",\n    \"# Extend the list with your own custom stopwords\\n\",\n    \"my_stopwords = ['https']\\n\",\n    \"stopwords.extend(my_stopwords)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Remove stopwords\\n\",\n    \"modin_df['text_token'] = modin_df['text_token'].apply(lambda x: [item for item in x if item not in stopwords])\\n\",\n    \"modin_df.head(3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df['text_string'] = modin_df['text_token'].apply(lambda x: ' '.join([item for item in x if len(item)>2]))\\n\",\n    \"modin_df[['text', 'text_token', 'text_string']].head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"nltk.download('punkt')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"all_words = ' '.join([word for word in modin_df['text_string']])\\n\",\n    \"tokenized_words = nltk.tokenize.word_tokenize(all_words)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from nltk.probability import FreqDist\\n\",\n    \"\\n\",\n    \"fdist = FreqDist(tokenized_words)\\n\",\n    \"fdist\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df['text_string_fdist'] = modin_df['text_token'].apply(lambda x: ' '.join([item for item in x if fdist[item] >= 1 ]))\\n\",\n    \"modin_df[['text', 'text_token', 'text_string', 'text_string_fdist']].head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"#lemmatization\\n\",\n    \"nltk.download('wordnet')\\n\",\n    \"nltk.download('omw-1.4')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from nltk.stem import WordNetLemmatizer\\n\",\n    \"\\n\",\n    \"wordnet_lem = WordNetLemmatizer()\\n\",\n    \"\\n\",\n    \"modin_df['text_string_lem'] = modin_df['text_string_fdist'].apply(wordnet_lem.lemmatize)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# check if the columns are equal\\n\",\n    \"modin_df['is_equal']= (modin_df['text_string_fdist']==modin_df['text_string_lem'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# show level count\\n\",\n    \"modin_df.is_equal.value_counts()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"all_words_lem = ' '.join([word for word in modin_df['text_string_lem']])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%matplotlib inline\\n\",\n    \"import matplotlib.pyplot as plt\\n\",\n    \"from wordcloud import WordCloud\\n\",\n    \"\\n\",\n    \"wordcloud = WordCloud(width=600, \\n\",\n    \"                     height=400, \\n\",\n    \"                     random_state=2, \\n\",\n    \"                     max_font_size=100).generate(all_words_lem)\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(10, 7))\\n\",\n    \"plt.imshow(wordcloud, interpolation='bilinear')\\n\",\n    \"plt.axis('off');\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Replicating NLTK workflow with pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Import some Tweets from Barack Obama as pandas df\\n\",\n    \"pandas_df = pandas.read_csv(\\\"https://raw.githubusercontent.com/kirenz/twitter-tweepy/main/tweets-obama.csv\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df['text'] = pandas_df['text'].astype(str).str.lower()\\n\",\n    \"pandas_df.head(3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"regexp = RegexpTokenizer('\\\\w+')\\n\",\n    \"\\n\",\n    \"pandas_df['text_token']=pandas_df['text'].apply(regexp.tokenize)\\n\",\n    \"pandas_df.head(3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Remove stopwords\\n\",\n    \"pandas_df['text_token'] = pandas_df['text_token'].apply(lambda x: [item for item in x if item not in stopwords])\\n\",\n    \"pandas_df.head(3)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df['text_string'] = pandas_df['text_token'].apply(lambda x: ' '.join([item for item in x if len(item)>2]))\\n\",\n    \"pandas_df[['text', 'text_token', 'text_string']].head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"all_words = ' '.join([word for word in pandas_df['text_string']])\\n\",\n    \"tokenized_words = nltk.tokenize.word_tokenize(all_words)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from nltk.probability import FreqDist\\n\",\n    \"\\n\",\n    \"fdist = FreqDist(tokenized_words)\\n\",\n    \"fdist\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df['text_string_fdist'] = pandas_df['text_token'].apply(lambda x: ' '.join([item for item in x if fdist[item] >= 1 ]))\\n\",\n    \"pandas_df[['text', 'text_token', 'text_string', 'text_string_fdist']].head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from nltk.stem import WordNetLemmatizer\\n\",\n    \"\\n\",\n    \"wordnet_lem = WordNetLemmatizer()\\n\",\n    \"\\n\",\n    \"pandas_df['text_string_lem'] = pandas_df['text_string_fdist'].apply(wordnet_lem.lemmatize)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# check if the columns are equal\\n\",\n    \"pandas_df['is_equal']= (pandas_df['text_string_fdist']==pandas_df['text_string_lem'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# show level count\\n\",\n    \"pandas_df.is_equal.value_counts()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"all_words_lem = ' '.join([word for word in pandas_df['text_string_lem']])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%matplotlib inline\\n\",\n    \"import matplotlib.pyplot as plt\\n\",\n    \"from wordcloud import WordCloud\\n\",\n    \"\\n\",\n    \"wordcloud = WordCloud(width=600, \\n\",\n    \"                     height=400, \\n\",\n    \"                     random_state=2, \\n\",\n    \"                     max_font_size=100).generate(all_words_lem)\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(10, 7))\\n\",\n    \"plt.imshow(wordcloud, interpolation='bilinear')\\n\",\n    \"plt.axis('off');\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/altair.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating Altair Modin Interoperability\\n\",\n    \"### Currently Altair is not interoperable with Modin. Each visualization is created with a Modin and then pandas dataframe for comparison.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import altair as alt\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from vega_datasets import data\\n\",\n    \"pandas_cars = data.cars()\\n\",\n    \"modin_cars = pd.DataFrame(data.cars())\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"alt.Chart(modin_cars).mark_point().encode(\\n\",\n    \"    x='Horsepower',\\n\",\n    \"    y='Miles_per_Gallon',\\n\",\n    \"    color='Origin',\\n\",\n    \").interactive()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"alt.Chart(pandas_cars).mark_point().encode(\\n\",\n    \"    x='Horsepower',\\n\",\n    \"    y='Miles_per_Gallon',\\n\",\n    \"    color='Origin',\\n\",\n    \").interactive()\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.18\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/bokeh.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating Bokeh Modin Interoperability\\n\",\n    \"### Currently Boken is not interoperable with Modin. Each visualization is created with a Modin and then pandas dataframe for comparison.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"from bokeh.plotting import figure\\n\",\n    \"from bokeh.models import ColumnDataSource\\n\",\n    \"from bokeh.io import show\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"UserWarning: `from_dict` is not currently supported by PandasOnRay, defaulting to pandas implementation.\\n\",\n      \"Please refer to https://modin.readthedocs.io/en/stable/supported_apis/defaulting_to_pandas.html for explanation.\\n\",\n      \"2023-04-06 12:14:58,510\\tINFO worker.py:1544 -- Started a local Ray instance. View the dashboard at \\u001b[1m\\u001b[32m127.0.0.1:8265 \\u001b[39m\\u001b[22m\\n\",\n      \"UserWarning: When using a pre-initialized Ray cluster, please ensure that the runtime env sets environment variable __MODIN_AUTOIMPORT_PANDAS__ to 1\\n\"\n     ]\n    },\n    {\n     \"ename\": \"ValueError\",\n     \"evalue\": \"expected a dict or pandas.DataFrame, got    x_values  y_values\\n0         1         6\\n1         2         7\\n2         3         2\\n3         4         3\\n4         5         6\",\n     \"output_type\": \"error\",\n     \"traceback\": [\n      \"\\u001b[0;31m---------------------------------------------------------------------------\\u001b[0m\",\n      \"\\u001b[0;31mValueError\\u001b[0m                                Traceback (most recent call last)\",\n      \"\\u001b[0;32m/var/folders/qj/jybppsbd2jl75s8y2q8s2xx80000gn/T/ipykernel_5953/1336630338.py\\u001b[0m in \\u001b[0;36m<module>\\u001b[0;34m\\u001b[0m\\n\\u001b[1;32m      3\\u001b[0m \\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m      4\\u001b[0m \\u001b[0;31m# create a ColumnDataSource by passing the dict\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m----> 5\\u001b[0;31m \\u001b[0msource\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mColumnDataSource\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mmodin_data\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m      6\\u001b[0m \\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m      7\\u001b[0m \\u001b[0mp\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mfigure\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/bokeh/models/sources.py\\u001b[0m in \\u001b[0;36m__init__\\u001b[0;34m(self, *args, **kwargs)\\u001b[0m\\n\\u001b[1;32m    229\\u001b[0m                 \\u001b[0mraw_data\\u001b[0m \\u001b[0;34m=\\u001b[0m \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m_data_from_groupby\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mraw_data\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m    230\\u001b[0m             \\u001b[0;32melse\\u001b[0m\\u001b[0;34m:\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0;32m--> 231\\u001b[0;31m                 \\u001b[0;32mraise\\u001b[0m \\u001b[0mValueError\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0;34mf\\\"expected a dict or pandas.DataFrame, got {raw_data}\\\"\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[0m\\u001b[1;32m    232\\u001b[0m         \\u001b[0msuper\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0m__init__\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0;34m**\\u001b[0m\\u001b[0mkwargs\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\\u001b[1;32m    233\\u001b[0m         \\u001b[0mself\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mdata\\u001b[0m\\u001b[0;34m.\\u001b[0m\\u001b[0mupdate\\u001b[0m\\u001b[0;34m(\\u001b[0m\\u001b[0mraw_data\\u001b[0m\\u001b[0;34m)\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0;34m\\u001b[0m\\u001b[0m\\n\",\n      \"\\u001b[0;31mValueError\\u001b[0m: expected a dict or pandas.DataFrame, got    x_values  y_values\\n0         1         6\\n1         2         7\\n2         3         2\\n3         4         3\\n4         5         6\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df \\n\",\n    \"modin_data = pd.DataFrame.from_dict({'x_values': [1, 2, 3, 4, 5], 'y_values': [6, 7, 2, 3, 6]})\\n\",\n    \"\\n\",\n    \"# create a ColumnDataSource by passing the dict\\n\",\n    \"source = ColumnDataSource(modin_data)\\n\",\n    \"\\n\",\n    \"p = figure()\\n\",\n    \"p.circle(x='x_values', y='y_values', source=source)\\n\",\n    \"show(p)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df \\n\",\n    \"pandas_data = pandas.DataFrame.from_dict({'x_values': [1, 2, 3, 4, 5], 'y_values': [6, 7, 2, 3, 6]})\\n\",\n    \"\\n\",\n    \"# create a ColumnDataSource by passing the dict\\n\",\n    \"source = ColumnDataSource(pandas_data)\\n\",\n    \"\\n\",\n    \"p = figure()\\n\",\n    \"p.circle(x='x_values', y='y_values', source=source)\\n\",\n    \"show(p)\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/huggingface.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating Hugging Face Modin Interoperability\\n\",\n    \"## All the examples in this section are taken/ adapted from https://www.kaggle.com/code/satyampd/imdb-sentiment-analysis-using-bert-w-huggingface/notebook\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import numpy as np # linear algebra\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import tensorflow as tf\\n\",\n    \"import sklearn\\n\",\n    \"from tqdm import tqdm\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import urllib.request\\n\",\n    \"url_path = \\\"https://modin-datasets.intel.com/testing/IMDB_Dataset.csv\\\"\\n\",\n    \"urllib.request.urlretrieve(url_path, \\\"imdb.csv\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%%time\\n\",\n    \"modin_df = pd.read_csv(\\\"imdb.csv\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"type(modin_df)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df.sample()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from transformers import BertTokenizer, TFBertForSequenceClassification\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Loading the BERT Classifier and Tokenizer along with Input module\\n\",\n    \"from transformers import InputExample, InputFeatures\\n\",\n    \"\\n\",\n    \"model = TFBertForSequenceClassification.from_pretrained(\\\"bert-base-uncased\\\")\\n\",\n    \"tokenizer = BertTokenizer.from_pretrained(\\\"bert-base-uncased\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.summary()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# changing positive and negative into numeric values\\n\",\n    \"\\n\",\n    \"def cat2num(value):\\n\",\n    \"    if value=='positive': \\n\",\n    \"        return 1\\n\",\n    \"    else: \\n\",\n    \"        return 0\\n\",\n    \"    \\n\",\n    \"modin_df['sentiment']  =  modin_df['sentiment'].apply(cat2num)\\n\",\n    \"train = modin_df[:45000]\\n\",\n    \"test = modin_df[45000:]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# But first see BERT tokenizer exmaples and other required stuff!\\n\",\n    \"\\n\",\n    \"example='In this Kaggle notebook, I will do sentiment analysis using BERT with Huggingface'\\n\",\n    \"tokens=tokenizer.tokenize(example)\\n\",\n    \"token_ids = tokenizer.convert_tokens_to_ids(tokens)\\n\",\n    \"print(tokens)\\n\",\n    \"print(token_ids)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"type(train)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def convert_data_to_examples(train, test, review, sentiment): \\n\",\n    \"    train_InputExamples = train.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case\\n\",\n    \"                                                          text_a = x[review], \\n\",\n    \"                                                          label = x[sentiment]), axis = 1)\\n\",\n    \"\\n\",\n    \"    validation_InputExamples = test.apply(lambda x: InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this case\\n\",\n    \"                                                          text_a = x[review], \\n\",\n    \"                                                          label = x[sentiment]), axis = 1,)\\n\",\n    \"  \\n\",\n    \"    return train_InputExamples, validation_InputExamples\\n\",\n    \"\\n\",\n    \"train_InputExamples, validation_InputExamples = convert_data_to_examples(train,  test, 'review',  'sentiment')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def convert_examples_to_tf_dataset(examples, tokenizer, max_length=128):\\n\",\n    \"    features = [] # -> will hold InputFeatures to be converted later\\n\",\n    \"\\n\",\n    \"    for e in tqdm(examples):\\n\",\n    \"        input_dict = tokenizer.encode_plus(\\n\",\n    \"            e.text_a,\\n\",\n    \"            add_special_tokens=True,    # Add 'CLS' and 'SEP'\\n\",\n    \"            max_length=max_length,    # truncates if len(s) > max_length\\n\",\n    \"            return_token_type_ids=True,\\n\",\n    \"            return_attention_mask=True,\\n\",\n    \"            pad_to_max_length=True, # pads to the right by default # CHECK THIS for pad_to_max_length\\n\",\n    \"            truncation=True\\n\",\n    \"        )\\n\",\n    \"\\n\",\n    \"        input_ids, token_type_ids, attention_mask = (input_dict[\\\"input_ids\\\"],input_dict[\\\"token_type_ids\\\"], input_dict['attention_mask'])\\n\",\n    \"        features.append(InputFeatures( input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label) )\\n\",\n    \"\\n\",\n    \"    def gen():\\n\",\n    \"        for f in features:\\n\",\n    \"            yield (\\n\",\n    \"                {\\n\",\n    \"                    \\\"input_ids\\\": f.input_ids,\\n\",\n    \"                    \\\"attention_mask\\\": f.attention_mask,\\n\",\n    \"                    \\\"token_type_ids\\\": f.token_type_ids,\\n\",\n    \"                },\\n\",\n    \"                f.label,\\n\",\n    \"            )\\n\",\n    \"\\n\",\n    \"    return tf.data.Dataset.from_generator(\\n\",\n    \"        gen,\\n\",\n    \"        ({\\\"input_ids\\\": tf.int32, \\\"attention_mask\\\": tf.int32, \\\"token_type_ids\\\": tf.int32}, tf.int64),\\n\",\n    \"        (\\n\",\n    \"            {\\n\",\n    \"                \\\"input_ids\\\": tf.TensorShape([None]),\\n\",\n    \"                \\\"attention_mask\\\": tf.TensorShape([None]),\\n\",\n    \"                \\\"token_type_ids\\\": tf.TensorShape([None]),\\n\",\n    \"            },\\n\",\n    \"            tf.TensorShape([]),\\n\",\n    \"        ),\\n\",\n    \"    )\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"DATA_COLUMN = 'review'\\n\",\n    \"LABEL_COLUMN = 'sentiment'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"train_InputExamples\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"train_data = convert_examples_to_tf_dataset(list(train_InputExamples), tokenizer)\\n\",\n    \"train_data = train_data.shuffle(100).batch(32).repeat(2)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"validation_data = convert_examples_to_tf_dataset(list(validation_InputExamples), tokenizer)\\n\",\n    \"validation_data = validation_data.batch(32)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0), \\n\",\n    \"              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \\n\",\n    \"              metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.fit(train_data, epochs=2, validation_data=validation_data)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pred_sentences = ['worst movie of my life, will never watch movies from this series', \\n\",\n    \"                  'Wow, blew my mind, what a movie by Marvel, animation and story is amazing']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tf_batch = tokenizer(pred_sentences, max_length=128, padding=True, truncation=True, return_tensors='tf')   # we are tokenizing before sending into our trained model\\n\",\n    \"tf_outputs = model(tf_batch)                                  \\n\",\n    \"tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)       # axis=-1, this means that the index that will be returned by argmax will be taken from the *last* axis.\\n\",\n    \"labels = ['Negative','Positive']\\n\",\n    \"label = tf.argmax(tf_predictions, axis=1)\\n\",\n    \"label = label.numpy()\\n\",\n    \"for i in range(len(pred_sentences)):\\n\",\n    \"    print(pred_sentences[i], \\\": \\\", labels[label[i]])\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/matplotlib.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating Matplotlib Modin Interoperability\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import numpy as np\\n\",\n    \"import matplotlib.pyplot as plt\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"# Example modified from https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/xcorr_acorr_demo.html#sphx-glr-gallery-lines-bars-and-markers-xcorr-acorr-demo-py\\n\",\n    \"\\n\",\n    \"# Fixing random state for reproducibility\\n\",\n    \"np.random.seed(19680801)\\n\",\n    \"\\n\",\n    \"x = pd.DataFrame(np.random.randn(100, 1),columns=[\\\"Col_1\\\"])\\n\",\n    \"y = pd.DataFrame(np.random.randn(100, 1),columns=[\\\"Col_1\\\"])\\n\",\n    \"\\n\",\n    \"fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True)\\n\",\n    \"ax1.xcorr(x[\\\"Col_1\\\"], y[\\\"Col_1\\\"], usevlines=True, maxlags=50, normed=True, lw=2)\\n\",\n    \"ax1.grid(True)\\n\",\n    \"\\n\",\n    \"ax2.acorr(x[\\\"Col_1\\\"], usevlines=True, normed=True, maxlags=50, lw=2)\\n\",\n    \"ax2.grid(True)\\n\",\n    \"\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"# Example modified from https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/xcorr_acorr_demo.html#sphx-glr-gallery-lines-bars-and-markers-xcorr-acorr-demo-py\\n\",\n    \"\\n\",\n    \"# Fixing random state for reproducibility\\n\",\n    \"np.random.seed(19680801)\\n\",\n    \"\\n\",\n    \"x = pandas.DataFrame(np.random.randn(100, 1),columns=[\\\"Col_1\\\"])\\n\",\n    \"y = pandas.DataFrame(np.random.randn(100, 1),columns=[\\\"Col_1\\\"])\\n\",\n    \"\\n\",\n    \"fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True)\\n\",\n    \"ax1.xcorr(x[\\\"Col_1\\\"], y[\\\"Col_1\\\"], usevlines=True, maxlags=50, normed=True, lw=2)\\n\",\n    \"ax1.grid(True)\\n\",\n    \"\\n\",\n    \"ax2.acorr(x[\\\"Col_1\\\"], usevlines=True, normed=True, maxlags=50, lw=2)\\n\",\n    \"ax2.grid(True)\\n\",\n    \"\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\\n\",\n    \"\\n\",\n    \"names = ['group_a', 'group_b', 'group_c']\\n\",\n    \"values = [1, 10, 100]\\n\",\n    \"\\n\",\n    \"modin_df = pd.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(9, 3))\\n\",\n    \"\\n\",\n    \"plt.subplot(131)\\n\",\n    \"plt.bar(modin_df['names'], modin_df['values'])\\n\",\n    \"plt.subplot(132)\\n\",\n    \"#plt.scatter(df['names'], df['values'])\\n\",\n    \"#plt.subplot(133)\\n\",\n    \"plt.plot(modin_df['names'], modin_df['values'])\\n\",\n    \"plt.suptitle('Categorical Plotting')\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\\n\",\n    \"\\n\",\n    \"names = ['group_a', 'group_b', 'group_c']\\n\",\n    \"values = [1, 10, 100]\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(9, 3))\\n\",\n    \"\\n\",\n    \"plt.subplot(131)\\n\",\n    \"plt.bar(pandas_df['names'], pandas_df['values'])\\n\",\n    \"plt.subplot(132)\\n\",\n    \"\\n\",\n    \"plt.plot(pandas_df['names'], pandas_df['values'])\\n\",\n    \"plt.suptitle('Categorical Plotting')\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\\n\",\n    \"\\n\",\n    \"names = ['group_a', 'group_b', 'group_c']\\n\",\n    \"values = [1, 10, 100]\\n\",\n    \"\\n\",\n    \"modin_df = pd.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(9, 3))\\n\",\n    \"\\n\",\n    \"plt.subplot(131)\\n\",\n    \"plt.barh(modin_df['names'], modin_df['values'])\\n\",\n    \"plt.subplot(132)\\n\",\n    \"#plt.scatter(df['names'], df['values'])\\n\",\n    \"#plt.subplot(133)\\n\",\n    \"plt.plot(modin_df['names'], modin_df['values'])\\n\",\n    \"plt.suptitle('Categorical Plotting')\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\\n\",\n    \"\\n\",\n    \"names = ['group_a', 'group_b', 'group_c']\\n\",\n    \"values = [1, 10, 100]\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(9, 3))\\n\",\n    \"\\n\",\n    \"plt.subplot(131)\\n\",\n    \"plt.barh(pandas_df['names'], pandas_df['values'])\\n\",\n    \"plt.subplot(132)\\n\",\n    \"plt.plot(pandas_df['names'], pandas_df['values'])\\n\",\n    \"plt.suptitle('Categorical Plotting')\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"plt.figure(figsize=(9, 3))\\n\",\n    \"\\n\",\n    \"plt.subplot(131)\\n\",\n    \"plt.hlines(pandas_df['values'], 1, 3)\\n\",\n    \"plt.suptitle('Categorical Plotting')\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\\n\",\n    \"\\n\",\n    \"names = ['group_a', 'group_b', 'group_c']\\n\",\n    \"values = [1, 10, 100]\\n\",\n    \"\\n\",\n    \"modin_df = pd.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(9, 3))\\n\",\n    \"\\n\",\n    \"plt.subplot(131)\\n\",\n    \"plt.bar(modin_df['names'], modin_df['values'])\\n\",\n    \"plt.subplot(132)\\n\",\n    \"#plt.scatter(df['names'], df['values'])\\n\",\n    \"#plt.subplot(133)\\n\",\n    \"plt.plot(modin_df['names'], modin_df['values'])\\n\",\n    \"plt.suptitle('Categorical Plotting')\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"# Example modified from https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py\\n\",\n    \"\\n\",\n    \"names = ['group_a', 'group_b', 'group_c']\\n\",\n    \"values = [1, 10, 100]\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.DataFrame({'names':['group_a', 'group_b', 'group_c'],'values':[1, 10, 100]})\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(9, 3))\\n\",\n    \"\\n\",\n    \"plt.subplot(131)\\n\",\n    \"plt.bar(pandas_df['names'], pandas_df['values'])\\n\",\n    \"plt.subplot(132)\\n\",\n    \"#plt.scatter(df['names'], df['values'])\\n\",\n    \"#plt.subplot(133)\\n\",\n    \"plt.plot(pandas_df['names'], pandas_df['values'])\\n\",\n    \"plt.suptitle('Categorical Plotting')\\n\",\n    \"plt.show()\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.10 64-bit\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/plotly.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating Plotly Modin Interoperability\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Currently Plotly is not completely interoperable with Modin. Each visualization is created with a Modin and then pandas dataframe for comparison.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import numpy as np\\n\",\n    \"import plotly.express as px\\n\",\n    \"import plotly.io as pio\\n\",\n    \"pio.renderers.default = \\\"notebook\\\"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df = pd.DataFrame(dict(a=[1,3,2,4], b=[3,2,1,0]))\\n\",\n    \"pandas_df = pandas.DataFrame(dict(a=[1,3,2,4], b=[3,2,1,0]))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"fig2 = px.bar(modin_df)\\n\",\n    \"fig2.show()\\n\",\n    \"# py.iplot(fig2 , filename='jupyter-basic_bar')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"fig2 = px.bar(pandas_df)\\n\",\n    \"fig2.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"fig = px.line(modin_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"fig = px.line(pandas_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"fig = px.area(modin_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"fig = px.area(pandas_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"fig = px.area(modin_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"fig = px.area(pandas_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"fig = px.violin(modin_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"fig = px.violin(pandas_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"fig = px.box(modin_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"fig = px.box(pandas_df)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"fig = px.histogram(modin_df, opacity=0.5, orientation='h', nbins=5)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"fig = px.histogram(pandas_df, opacity=0.5, orientation='h', nbins=5)\\n\",\n    \"fig.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"# Example from https://plotly.com/python/mapbox-county-choropleth/#choropleth-map-using-plotlyexpress-and-carto-base-map-no-token-needed\\n\",\n    \"from urllib.request import urlopen\\n\",\n    \"import json\\n\",\n    \"with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:\\n\",\n    \"    counties = json.load(response)\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"modin_df = pd.read_csv(\\\"https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv\\\",\\n\",\n    \"                   dtype={\\\"fips\\\": str})\\n\",\n    \"fig = px.choropleth(modin_df, geojson=counties, locations='fips', color='unemp',\\n\",\n    \"                           color_continuous_scale=\\\"Viridis\\\",\\n\",\n    \"                           range_color=(0, 12),\\n\",\n    \"                           scope=\\\"usa\\\",\\n\",\n    \"                           labels={'unemp':'unemployment rate'}\\n\",\n    \"                          )\\n\",\n    \"fig.update_layout(margin={\\\"r\\\":0,\\\"t\\\":0,\\\"l\\\":0,\\\"b\\\":0})\\n\",\n    \"fig.show()\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"# Example from https://plotly.com/python/mapbox-county-choropleth/#choropleth-map-using-plotlyexpress-and-carto-base-map-no-token-needed\\n\",\n    \"from urllib.request import urlopen\\n\",\n    \"import json\\n\",\n    \"with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:\\n\",\n    \"    counties = json.load(response)\\n\",\n    \"import pandas\\n\",\n    \"pandas_df = pandas.read_csv(\\\"https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv\\\",\\n\",\n    \"                   dtype={\\\"fips\\\": str})\\n\",\n    \"\\n\",\n    \"fig = px.choropleth(pandas_df, geojson=counties, locations='fips', color='unemp',\\n\",\n    \"                           color_continuous_scale=\\\"Viridis\\\",\\n\",\n    \"                           range_color=(0, 12),\\n\",\n    \"                           scope=\\\"usa\\\",\\n\",\n    \"                           labels={'unemp':'unemployment rate'}\\n\",\n    \"                          )\\n\",\n    \"fig.update_layout(margin={\\\"r\\\":0,\\\"t\\\":0,\\\"l\\\":0,\\\"b\\\":0})\\n\",\n    \"fig.show()\\n\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/seaborn.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating Seaborn Modin Interoperability\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### All the examples in this section are taken / adapted from https://seaborn.pydata.org/tutorial/introduction.html. Each visualization is created with a Modin and then pandas dataframe for comparison.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 26,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import seaborn as sns\\n\",\n    \"import modin.pandas as pd\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 27,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"UserWarning: Distributing <class 'pandas.core.frame.DataFrame'> object. This may take some time.\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"modin.pandas.dataframe.DataFrame\"\n      ]\n     },\n     \"execution_count\": 27,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# Apply the default theme\\n\",\n    \"sns.set_theme()\\n\",\n    \"\\n\",\n    \"# Load an example dataset\\n\",\n    \"pandas_tips = sns.load_dataset(\\\"tips\\\")\\n\",\n    \"modin_tips = pd.DataFrame(pandas_tips)\\n\",\n    \"\\n\",\n    \"type(modin_tips)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 28,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>total_bill</th>\\n\",\n       \"      <th>tip</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"      <th>smoker</th>\\n\",\n       \"      <th>day</th>\\n\",\n       \"      <th>time</th>\\n\",\n       \"      <th>size</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>16.99</td>\\n\",\n       \"      <td>1.01</td>\\n\",\n       \"      <td>Female</td>\\n\",\n       \"      <td>No</td>\\n\",\n       \"      <td>Sun</td>\\n\",\n       \"      <td>Dinner</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>10.34</td>\\n\",\n       \"      <td>1.66</td>\\n\",\n       \"      <td>Male</td>\\n\",\n       \"      <td>No</td>\\n\",\n       \"      <td>Sun</td>\\n\",\n       \"      <td>Dinner</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>21.01</td>\\n\",\n       \"      <td>3.50</td>\\n\",\n       \"      <td>Male</td>\\n\",\n       \"      <td>No</td>\\n\",\n       \"      <td>Sun</td>\\n\",\n       \"      <td>Dinner</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>23.68</td>\\n\",\n       \"      <td>3.31</td>\\n\",\n       \"      <td>Male</td>\\n\",\n       \"      <td>No</td>\\n\",\n       \"      <td>Sun</td>\\n\",\n       \"      <td>Dinner</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>24.59</td>\\n\",\n       \"      <td>3.61</td>\\n\",\n       \"      <td>Female</td>\\n\",\n       \"      <td>No</td>\\n\",\n       \"      <td>Sun</td>\\n\",\n       \"      <td>Dinner</td>\\n\",\n       \"      <td>4</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   total_bill   tip     sex smoker  day    time  size\\n\",\n       \"0       16.99  1.01  Female     No  Sun  Dinner     2\\n\",\n       \"1       10.34  1.66    Male     No  Sun  Dinner     3\\n\",\n       \"2       21.01  3.50    Male     No  Sun  Dinner     3\\n\",\n       \"3       23.68  3.31    Male     No  Sun  Dinner     2\\n\",\n       \"4       24.59  3.61  Female     No  Sun  Dinner     4\"\n      ]\n     },\n     \"execution_count\": 28,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"modin_tips.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 29,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<seaborn.axisgrid.FacetGrid at 0x7fc3bbb1a430>\"\n      ]\n     },\n     \"execution_count\": 29,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAwEAAAFcCAYAAACQkLIVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACZvElEQVR4nOzdZ4BU1dnA8f+902dntvdCWXrvKEXBggXEAhp7iRo1+qoxMUaNpvhGo4mvRGOKJiZGYxcQsKNYqErvvSxsYXubXu59PywsLLsLy/bdeX5fYO7MnHvO7O6Z+9xzznMUXdd1hBBCCCGEEBFD7egKCCGEEEIIIdqXBAFCCCGEEEJEGAkChBBCCCGEiDASBAghhBBCCBFhJAgQQgghhBAiwkgQIIQQQgghRISRIEB0GbfeeitlZWUA/OhHP2LPnj0dWp+HH36YV155pc3Pc+ONN/Lpp5+2+XmEEN1TZ+w7zzrrLC677DIuu+wypk+fzq9+9SuKi4sBKCws5JprrunQOgoRCYwdXQEhmmr58uW1///HP/7RgTURQoiuozP2nbfccgu33XYbALqu89JLL3H77bczb948UlJSePvttzu4hkJ0fxIEiC7hkUceAeDmm2/m5Zdf5vrrr+f555/H4/Hw3HPPkZaWxv79+7HZbNxxxx28/vrr7N+/nwsuuIBHH30UgCVLlvC3v/2NYDCI1WrlF7/4BaNGjapznj179vCzn/2s3vlvuukmZs+e3aS65ubmMnPmTNavX1/v8bx581i8eDGqqpKTk4PVauWZZ56hT58+FBcX8+tf/5p9+/ahqirXXHMNN910EwBffvklr7zyCiUlJUyYMIHf/e53qKoM5AkhTq4r9J2KonDXXXcxf/58li9fTnZ2dm2f+ec//5m8vDyKi4vJy8sjJSWFP/7xjyQnJ3PuuedyxRVXsHLlSgoKCrjsssv4yU9+ctI6//nPf2bDhg0UFRUxYMAAnn322Vb4lIXoonQhuoj+/fvrpaWluq7r+jnnnKNv2rRJX7VqlT5o0CB969atuq7r+m233aZfffXVut/v10tLS/UhQ4bohw8f1vfv369fcsklellZma7rur5r1y590qRJutvtbnZ9fvGLX+j//Oc/6x0/dOiQPnLkyAYfz507Vx8zZoxeUFCg67quP/HEE/pDDz2k67qu33PPPfozzzyj67quV1VV6TNmzNAPHDig33DDDfqPf/xjPRQK6R6PR580aZK+evXqZtdbCBFZukrfee+99+r/+Mc/6vSZL7zwgn7eeefp1dXVuq7r+p133qk///zztW15+umndV3X9cOHD+vDhg3TDx48eNI6v/DCC/qFF16oB4PBZtdfiO5CRgJEl5eZmcngwYMB6NGjB06nE7PZTHx8PFFRUVRWVrJ69WqKioq45ZZbat+nKAoHDx5k4MCBtcdaYyTgVIYMGUJqaioAgwcPZvHixQCsWLGCn//85wA4nU4+/PDD2vdMnz4dg8GAzWajV69elJaWtkpdhBCRq7P1nYqiYLPZ6h0fP348DocDqOkzKysra58777zzAEhJSSEhIYHKyko2btzYaJ0BRo4cidEolz9CyF+B6PLMZnOdxw117pqmMWHCBP70pz/VHisoKCA5ObnO6/r27cuCBQtaVB9FUdB1vfZxMBis87zVam3wtUajEUVRap87dOgQcXFxtc81Vr4QQjRHZ+o7dV1n69at3HDDDfWea6zPBLBYLPWeO1mdFy9ejN1ub3Y9hehOZFKx6DIMBgOhUKhZ750wYQLLly9n7969AHzzzTdceuml+Hy+1qwiANHR0QSDwdoMHB999FGT6zh37lwAqqurufnmmzlw4ECr108IEVk6e98ZDof5y1/+QlxcHOPGjWtxee3Z3wvRlclIgOgyLrroIm688Ub+/Oc/n/Z7+/btyxNPPMFPf/pTdF3HaDTyt7/9jaioqBbVac6cObz44ou1j8855xyee+45fv7zn/OjH/2I+Ph4LrrooiaV9atf/Yrf/OY3zJw5E13XufPOOxk6dGiL6ieEEJ2x73z11VdZuHAhiqIQDocZNmwYL7/8covKbOs6C9HdKLrMKxBCCCGEECKiyHQgIYQQQgghIowEAUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEIIEWEkCBBCCCGEECLCdLkUoaWlLjSt+yY0iouzU17u6ehqtLtIbHckthmk3a0hKcnZrPd19/4TIvP3KxLbDJHZ7khsM7R+u5vbh3Y3MhLQyRiNho6uQoeIxHZHYptB2i3aViR+zpHYZojMdkdimyFy293WJAgQQgghhBAiwkgQIIQQQgghRISRIEAIIYQQQogII0GAEEIIIYQQEUaCACGEEEIIISKMBAFCCCGEEEJEGAkChBBCCCGEiDBtGgS4XC4uueQScnNzAVixYgUzZ87kggsuYM6cOW15aiGEEEII0Y0oCpg1d+1jix55G6e1pjYLAjZu3Mi1117LgQMHAPD5fDz66KP89a9/5eOPP2bLli188803bXV6IYQQQgjRTSgKmMr2UP3Rc1j8pVjceVR+8Hss7vyOrlqX1WZBwLvvvsuvf/1rkpOTAdi0aRM9e/YkKysLo9HIzJkz+fTTT9vq9EIIIYQQoptQtRDBgl0ECvZSsfBZyuY9Q7D4EFp5PorS0bXrmoxtVfCTTz5Z53FRURFJSUm1j5OTkyksLGyr0wshhBBCiG4irBgxDjqfKFc57g2LAYg55ybCmaPQ9Q6uXBfVZkHAiTRNQzkuVNN1vc7jpkpIcLRmtTqlpCRnR1ehQ0RiuyOxzSDt7iiR0H9Cx3/OHSES2wyR2e5IbDPUtNtfsJfKXd/XHvNu+ZrkAWMxJ2R0YM26rnYLAlJTUykuLq59XFxcXDtV6HSUlrrQtO4b8iUlOSkuru7oarS7SGx3JLYZpN2tVVZzdPf+EyLz9ysS2wyR2e5IbDPUtLu8pJzgxq/QPJXETrudcHUZ1avm4d6zkSo9+rRGAyI1kDpRuwUBI0aMYP/+/eTk5JCZmcmHH37I7Nmz2+v0QgghhBCiiwrpRswjZpDUZwyB2N4Y9TBJWQMJxfSU6UDN1G5BgMVi4emnn+bee+/F7/czZcoULrroovY6vRBCCCGE6MKCRgfB2L4AaIqBUFz/Dq5R19bmQcCSJUtq/z9hwgQWLlzY1qcUQgghhBBCnITsGCyEEEIIIUSEkSBACCGEEEKICCNBgBBCCCGEEBFGggAhhBBCCCEijAQBQgghhBBCRBgJAoQQQgghhIgwEgQIIYQQQggRYSQIEEIIIYQQIsJIECCEEEIIIUSEkSBACCGEEEKICCNBgBBCCCGEEBFGggAhhBBCCCEijAQBQgghhBBCRBgJAoQQQgghhIgwEgQIIYQQQggRYSQIEEIIIYQQIsJIECCEEEIIIUSEkSBACCGEEEKICCNBgBBCCCGEEBFGggAhhBBCCNGuFKWjayCMHV0BIYQQQggRGTyBMPsLqjlwuIqEaCv9s2KJd5g7uloRSYIAIYQQQgjR5jz+MC8v2sqmPSW1x6KsRn55y3hSY60dWLPIJNOBhBBCCCFEm9uTX1knAABw+0J8uHw/egfVKZJJECCEEEIIIdqUoijsOlTR4HOrtxfiDYTbt0JCggAhhBBCCNG2dF0nKdbW4HNJsTZMRrkkbW/yiQshhBBCiDY3uFc8FpOh3vEfnNcPkyrpgtqbBAFCCCGEEKLNJcdYeOyH4xjeLxGApFgr9101gsE9Yzu2YhFKsgMJIYQQQog2p+uQEW/nvlnD8QTCmI0qZoOMAHQUCQKEEEIIIUS7URVwWOpPCxLtS6YDCSGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAgjQYAQQgghhBARRoIAIYQQQgghIowEAUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAgjQYAQQgghhBARRoIAIYQQQgghIowEAUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAjTIUHAggULmDFjBjNmzOCZZ57piCoIIYQQQggRsdo9CPB6vTz55JO8/vrrLFiwgDVr1rBixYr2roYQQgghhBARq92DgHA4jKZpeL1eQqEQoVAIi8XS3tUQQgghhBAiYhnb+4QOh4P777+fiy++GJvNxrhx4xg9enR7V0MIIYQQQoiIpei6rrfnCXfs2MHDDz/MK6+8gtPp5MEHH2T48OHcfvvt7VkNIYQQQgghIla7jwQsW7aMCRMmkJCQAMCsWbN48803mxwElJa60LR2jVvaVVKSk+Li6o6uRruLxHZHYptB2t1aZTVHd+8/ITJ/vyKxzRCZ7Y7ENkPrt7u5fWh30+5rAgYOHMiKFSvweDzous6SJUsYNmxYe1dDCCGEEEKIiNXuIwGTJ09m27ZtzJo1C5PJxLBhw7jjjjvauxpCCCGEEEJErHYPAgDuuOMOufAXQgghhBCig8iOwUIIIYQQQkQYCQKEEEIIIYSIMBIECCGEEEII0QkcTaPfHjpkTYAQQgghhGhYSNPJL/OQX+zGYjbQOy2aWLupo6sluhkJAoQQQgghOomQprN4zSHeW7Kn9pjNYuSXN4+T/PadiNvt5pFHHiEnJwdVVRkyZAgzZszgT3/6E2lpaezfvx+bzcYdd9zB66+/zv79+7ngggt49NFHAXjnnXd4/fXXUVWVxMREHn/8cXr37l3nHE899RQ7d+7kr3/9KyaTiWeffZbVq1cTDocZPHgwjz32GA6Hg3PPPZfhw4ezc+dOfvrTnzJt2rQmtUGmAwkhhBBCdBL5ZZ46AQCA1x/i3x9tw+0NdlCtxIkWL16M2+1mwYIFvP/++wDk5uayefNm7rjjDhYsWIDD4eDll1/mpZdeYt68ebz55psUFhaycuVK/vnPf/Laa6+xcOFCLrnkEu655x50vWYzR13XeeKJJ8jPz+cf//gHUVFRvPzyyxgMBubNm8fChQtJTk7m2Wefra1Pv379+OSTT5ocAICMBAghhBBCdBr5Je4Gj+/Nq6Sw3IPTJPdvO4MxY8YwZ84cbrzxRiZOnMjNN99MWVkZmZmZDB48GIAePXrgdDoxm83Ex8cTFRVFZWUlS5cuZfr06cTHxwMwa9YsnnzySXJzcwF49dVXKS0t5YMPPsBsNgPw9ddfU11dzYoVKwAIBoMkJCTU1mfs2LGn3QYJAoQQQgghOgmLydDgcaNBwWKUAKCzyMrKYvHixXz33XesWrWKH/7whzzxxBO1F+1HGY31L7U1Tat3TNd1QqEQAOPGjWP06NE88sgjvPPOO5hMJjRN49FHH2XKlClAzXQkv99f+3673X7abZDfJiGEEEKITqJ3WjQ2S/0Lx4sn9iIt0dEBNRINefPNN3nkkUeYPHkyP//5z5k8eTLbtm1r0nvPOussPv74Y8rKygCYO3cusbGx9OzZE4ChQ4dyww034HQ6efHFFwGYPHkyb7zxBoFAAE3TePzxx3nuueda1AYJAoQQQgghOolYu4lf3jKOvpmxQM0IwMyzejNtbBaqqnRs5UStyy+/nHA4zPTp05k1axbV1dUMGDCgSe+dNGkSt9xyCzfffDMzZszggw8+4KWXXkJVj12WK4rCU089xZtvvsm6deu4++67ycjI4IorrmD69Onous7DDz/cojYo+tFVCF1EaakLTetSVT4tSUlOiourO7oa7S4S2x2JbQZpd2uV1Rzdvf+EyPz9isQ2Q/dvd0jTqXAHMBlVYuwm0Lt/mxvT2u2WLEs1ZE2AEEIIIUQnY1QVEp2WmgfdO3YXHUSmAwkhhBBCCBFhJAgQQgghhBAiwkgQIIQQQgghRISRIEAIIYQQQogII0GAEEIIIYQQEUaCACGEEEIIISKMBAFCCCGEEEJ0Erm5uQwYMIDly5fXOX7uueeSm5vbaueRIEAIIYQQQohOxGQy8fjjj+NyudrsHLJZmBBCCCGEEKfh67WHeO2T7ZSUe0mMs3HTxYOYOiar1cpPTk5m4sSJPPPMM/zv//5vnef+/ve/s3DhQgwGA5MmTeLnP/85BoPhtM8hIwFCCCGEEEI00ddrD/HiexspLveiA8XlXl58byNfrz3Uqud5+OGHWbZsWZ1pQd9++y1Llixh7ty5zJ8/n5ycHN5+++1mlS9BgBBCCCGEEE302ifb8QfDdY75g2Fe+2R7q57H4XDwv//7v3WmBa1atYoZM2Zgs9kwGo3Mnj2blStXNqt8CQKEEEIIIYRoopJy72kdb4nJkyfXTgsC0DSt3mtCoVCzypYgQAghhBBCiCZKjLOd1vGWOjotqKioiDPPPJOPPvoIn89HKBRi7ty5nHnmmc0qV4IAIYQQQgghmuimiwdhMdVdiGsxGbjp4kFtcr6j04KCwSBTp05l6tSpzJ49mxkzZpCens4NN9zQrHIlO5AQQgghhBBNdDQLUFtlB8rMzGTJkiV1jk2ePJmdO3cCcPfdd3P33Xe3+DwSBAjRzRkMCgDhsN7BNRFCCCFAA0IhDbNJhS761TR1TFarpgTtCBIECNFNufwh9uZVsXp7IaqqMG5QCtlp0URZTj+XsBBCCNFSYV1nV24lC5buo6jMy5lDUzl3TCaJTktHVy0iSRAgRDdU5Q3ywvsb2ZdXVXts2cZ8BvWK567Lh+K0yp++EEKI9rXtQAVz3llf+/jTVTl8t+Uwv7p1PDF2UwfWLDLJwmAhuhlFUVi2qaBOAHDU9gNlrNtVjKJ0QMWEEEJErEBY47+f7ah3vNzlZ09eZQfUSEgQIEQ34/aH+HD5/kafX7h0H75Q/TzDQgghRFvxBzWKKxrJr1/Z+vn1xalJECBENxMK6/gC4Uafr/YEZJGwEEKIdmUzG+ibGdvgcxmJjvatjAAkCBCi27FbDPTJiGn0+aF9ErGa5U9fCCFE+zGqCtdd0B+DWnc+6sCecfROc3ZQrTqfJ554gvvuu6/OsWXLlnHeeefhcrla9VxyJSBEN2NUFa48t2+jz18yqTcqsihACHGMMeTGGD42JcMarmrXtUOyTiky9Ep28Nvbz2TW1D5MGp7G3bOGc/cVw4iySLKKo372s5+xZcsWvvzySwA8Hg+/+c1veOqpp3A4WnfERD51IbqhvunR3HfVCP790TaqPUEAYh0Wbps5mN4pMuwqhDjGFHLjW/0+itGCZezlqK4iyhbNIW7GfQRie6O34exBRQFT0VYwmgkl9MdUeQDNVUYobSS63KzoltLjbWRM6IWqdt39a/RwkMPvPg1AyuwHKZz7LACpP3gYxdCyLEdRUVH87ne/49FHH2XChAm88MILnHvuudhsNq699lp8Ph9xcXH89re/JSsri3//+9/Mnz8fVVUZPnw4TzzxRJPPJUGAEN2QQVEY1TeBfndO5HC5B1WBlDg7drPsESCEqMvgL8OzbSmEQ2ieCvwHNqH5PXg2fYll8o2ElLbL4W4NlFK86HkA4s7/IaVLXkMP+km+6Wm8luQ2O6/oWLquE2586Vqnd/jdp/Ed3AbAwRfuQA+Hao+nXft4i8ufOHEikydP5pFHHmHfvn28+eabXH/99fz9738nPT2dpUuX8vjjj/PKK6/w0ksvsXTpUgwGA7/85S8pLCwkJSWlSeeRIECIbkrXIcpioE+qzLUUQjTO7+hBwuUPUjr/Wbw7VwFg6zsW64QfEGzDAADAa0og/tIHKP3gWco+fQlQiJ95Pz5LUpueV4jWoIcC6KEAAIrR3KplP/zww0ydOpW//OUvFBQUcOjQIX784x/XPu9yuTAYDIwaNYorr7yS8847jx/+8IdNDgBAggAhhBAioum6jmK2o6gq+pHswWpULLraPps3KRY7iqKiEwZFQbU5auYJdc2ZIiICpMx+sGYE4EgAAKAYjKTM/nmrncPhcBAdHU1GRgYul4vMzEwWLFgAQDgcpqSkBIC//vWvbNiwgW+//Zbbb7+dZ599lvHjxzfpHLIwWAghhIhgVnceZfOeRg8FMCX3BtWIe+MXBNcvwqD72/bcgVJK5z2DroVxnnk5KCql8/6A1VvYpucVoiUK5z5bOwXoKD0conDuH9vkfNnZ2VRWVrJmzRoA5s6dy4MPPkhZWRnTp0+nf//+3H///UyaNImdO3c2uVwZCRBCCCEimGZ2YErri2q0EDX1ZvTyXMoWPIc5axAh1dymd+QD5lhiz7kJxWRFyxhBQvoAwuWHCVgT2u6kQrQSxWhGMRjrBQStzWw28/zzz/Pkk0/i9/txOBw888wzxMfHc/XVV3PllVdis9no3bs3s2fPbnK5iq635br/1lda6kLTulSVT0tSkpPi4uqOrka7i8R2R2KbQdrdWmU1R3fvPyEyf79ao83mQAW6ohI0RaMoCtZACT5zPLre9hl6FMKAWpsNSCWMxqmTGMjPOnK0drub24ce1ZbZgdqTjAQIIYQQES5gjq39v67reE0J7TYnXz/hgr8pAYAQHUkxmOpkAWqNjEAdQdYECCGEEEIIEWEkCBBCCCGEECLCSBAghBBCCCFEhJEgQAghhBBCiAgjQYAQQgghhBARRoIAIYQQQkQERQGjUUVV2z71qRCdXYcEAUuWLGHWrFlcfPHF/O53v+uIKgghhBAigpS6Any2JpenXl/L3G/3UVDuRZFYQLSizZs3c99993V0NZqs3fcJOHToEL/+9a957733SEhI4Oabb+abb75hypQp7V0VIYQQQkSAMleAp15dTbnLD8COnHI+W5XD47eOJyPe3sG1E93FsGHDeOGFFzq6Gk3W7kHA4sWLmT59OqmpqQDMmTMHi8XS3tUQQgghRCdX6Q1iNKg4LEZ0vXm7lykKbNpbUhsAHBUIaXz+/UF+ePHAdtsYTXQfbrebRx55hJycHFRVZciQIcyYMYMnn3ySDz/8kNtuu42SkhIAPB4Phw4d4tNPPyU9PZ1nn32W1atXEw6HGTx4MI899hgOh6Pd29Du04FycnIIh8PcddddXHbZZbz55pvExMS0dzWEEEII0UlVeIK88vF2fvrCUh76y3K+3VRAMKw1qyxVVdl2oLzB57buKyMQal65IrItXrwYt9vNggULeP/99wHIzc2tff6VV15hwYIFvPfee6SkpPDTn/6UXr168fLLL2MwGJg3bx4LFy4kOTmZZ599tkPa0O4jAeFwmDVr1vD6669jt9v58Y9/zPz585k1a1aT3p+Q0P6RUntLSnJ2dBU6RCS2OxLbDNLujhIJ/Sd0/OfcEbpTmwOhMP/4aC0rNxcA4PWH+PdH20hOsDN5REad1za13QN6xrFme2H94z3iSE50YDIaWl7xdtKdftano7O1e8yYMcyZM4cbb7yRiRMncvPNN1NWVlbnNZqm8eCDD5Kdnc0dd9wBwNdff011dTUrVqwAIBgMkpCQ0O71hw4IAhITE5kwYQLx8fEAnH/++WzatKnJQUBpqQtN677jdklJToqLqzu6Gu2uPdtdXOVnT14FBoNK/8xYYu2mdjnvieRnHVlas93N/TLs7v0nRObvV3drc7knWBsAHO+bdbkMzoolfGRE4HTaPaJPAvOtRjy+UO0xVVW48MweVJR7Wqfi7aC7/aybqrXb3RoBRVZWFosXL+a7775j1apV/PCHP+SJJ56o85onn3wSr9fLnDlzao9pmsajjz5auxbW7Xbj99edqtZe2j0IOOecc/jFL35BVVUVUVFRLF26lPPOO6+9qyEiVEm1n9+88h1ef80XQXKcncduGYvD0u5/CkIIIRpgVBUsJgP+YLjO8VinpdnrAhKdFn71w/F8uzGfDbuK6ZMRw/nje5CVaJf1AKJZ3nzzTdauXcuzzz7LWWedRWlpKdu2bat9/uWXX2b9+vW8/vrrGAzHRpomT57MG2+8wYQJEzAajTz++OPY7fYOyZbZ7lc+I0aM4Pbbb+e6664jGAwyadIkZs+e3d7VEBFqf0FVbQAAUFTuIbfIzcAsWZcihBCdQbTNyJXn9OWNz3fWHjOoChOHprVoJCs5xspVU7KZdXY2BkWpCSgkABDNdPnll/P9998zffp0bDYbaWlpDBgwgE8//ZTCwkKee+45evfuzQ033ICm1Yxe3Xfffdx9990888wzXHHFFYTDYQYNGsTDDz/cIW3okNufV155JVdeeWVHnFpEOJOx/lp4k0n2zBNCiM5C1+Gs4Wkkxtr4dkMeCTFWzhqZQWYr3LXX9ZqMKM0dURDiKLvdzp/+9Kd6x6dNmwbAjh07Gn3vr3/967aq1mmRORAiovRJj6FPRgx78yoBmDA0lczEqA6ulRBCiOOZjSojsuMZ3S8RXddrRgDkul2IViVBgIgoTquRn10zkrwSNwaDSnqCHbNBRgKEEAJAOTpNhpr8+h19wzzczLSgQohTkyBARByryUCftOiOroYQQnQq5mAleuEu9IzhGDzF6D43gYQBHV2tRgVDYUqq/fj8IRKirdjMXSfNpxCdgQQBQgghhECpzKP0478QNXQq3j1rMMam4LzkAQJq59tfwuUP8dbcjXyx+hC6DrEOC3ddMYz+mdEybUiIJpJ5EEIIIYQglDiA6AmzcG/5Gj3oJ/biuztlAKAo8NW6PBZ/f6h2ulKFy88f3lhLYbmvYysnRBfS5CCgsrISl8vVlnURQgghRAcxugpwrf8c1WJHDwfxbluKQet8F9XVvhAfLd9f77im6ewrqOyAGgnRNZ0yCNi3bx+zZ89mwoQJnHHGGdxwww3k5+e3R92EEEII0U4UdExJWSRc9ztip90GWrhTTq3RNJ1gIwuGAyFZSCy6B5fLxSWXXEJubm6bneOUQcAjjzzCVVddxcaNG1m/fj0XXnghv/zlL9usQkIIIYRofz5HFo6L7sdnikfvPRHDiBmEDdaOrlY9TpuJCUPTGnyuV4qznWsjROvbuHEj1157LQcOHGjT85wyCPB6vVxzzTWYTCbMZjM33ngjJSUlbVopIYQQQrS/gFJz0a9hIKx2vgAAQAGumNKHlHh7nePXXtBf9n0R7SoUClFSUkIoFGrVct99911+/etfk5yc3KrlnuiU2YGys7NZt24do0ePBmDXrl1kZma2aaWEEEIIIRqT4DDz9D2T2bG/FI8/RFqCnaQYKwZF6eiqiQixceNG7r//fgKBAGazmeeff54RI0a0StlPPvlkq5RzKqcMAvLz87nxxhsZMGAARqORbdu2kZSUxMyZMwFYtGhRm1dSCCGEEOJ4ibE2BmTGdHQ1RAQKhULcf//9tQlzAoEA999/P19++SUGQ9fZr+KUQcCDDz7YHvUQQgghhBCi06uoqCAQCNQ5FggEKC8vJzExsYNqdfoaDQL27t1Lnz59iIpqeH7dkCFD2qxSQgghhBBCdEaxsbGYzeY6gYDZbCYuLq4Da3X6Gg0C/vCHP/DSSy9x1VVXkZaWhq4fyxPm9XpZuXJlu1RQCCGEEKIzUBTQO2HaVNG+jEYjzz//fL01AV1pKhCcJAh45plnqKiooE+fPrz++uvouo6iKASDQW644Yb2rKMQopWoqoKiKIQbybEthBCivgpPkN25FRSVe8lMdtAnPRqH5ZQzqkU3NmLECL788kvKy8uJi4trkwBgyZIlrV7m8Rr9Df7Zz37G8uXLURSFCRMm1B43GAxceOGFbVopIUTr0tE5WOxh1dbDFJV5OGNIKoN7xuGwypeYEKJ1WL2HCVpiCatWDJoPk78Cny211cpXFHD7wxRVeDEaVGxRllYr+2RKqv089Z/VVLiOTf3okxHDvVcNJ9pqapc6iM7JYDB0qTUAJ2r0CuCVV14BajYL+/3vf99uFRJCtL5duVX84Y21tcPY63cVM7h3PPdcMQybuWsNX4ruRVF0LMFKfMZYjJoXJRwgaJKML12NpTqHkrlP4xg5DevIi/Bt+oTKjV+SMOsX+J09W+EMOlsPVvKPBVuoPHIx3ivNyR2XDSM1tg33M1Dg63V5dQIAgL15lWw/UM4ZA9s2j7sQbemUm4VJACBE1+YPabz68bZ681i37S/jQJGrYyolxBHm0l2Uvv0rrK5DBNYuwPPNq5jD7o6uljgNigK6z4Ue9FP9/ULK3/kN1d8vQg/60X0uWiN1/8ESD//35rraAADgQEE1T/1nNVXeYMtP0IhASGPV1sMNPrd+VzEGwykvo4TotOS3V4huzuULUVjmbfC50sqGjwvRbqISMUTFUvzm47jXf4p94ERCBltH10qcBl2HYPJg4mf+BIBQZREA8TPvJ5g8uMULaVVVYcXmggbLcXmD7C+obtkJTsJkUOvtTHxUepIDTZNVwqLrkiBAiCYqdwdYv7eUFdsK2ZlXiTcY7ugqNYndYiDW0fDc2Zh2mlN7SgqUuQLklrip9oVQTnHr8PinFYVWudMoOkbYZMec2rfmgcGIIS4NTb6amkeBck/N31GFJ9iufxdq2E/w8O46x4IFe1DD/haXrSgK+/KqGn2+3NXyczR6buCys7LrHTeoCmMGJAMSBJwOX0hjZ14VK7YVsn5vKWXuwKnfJNqMrAoU4lQU2H6okj+9vZ5g6FhWnYwkBw9cPZJ4h7ndq2RQwoR1Q73/N8RuNnD9RQP4y/ub6hxPT4yid5qzTevZFC5/iI9X5fD5dwfRNB2bxcjV5/fjzMEpmBsYajcHK9EObUTpOYaQMQpT6U5AJZjQT1L3dUFqyV7cm5cQe/6tuDd+QcUnfyNm9qP4lYb3qBEN8wTCLFmXx8Kl+wiFNcxGlSum9OHskenYTHX7B0WpuRhToMG/sdNhUMLoihElZy3V3y8C1UD0GZdRteoDqr9fSFxcCmrvyS26Y67rOkOyE9iTW9Hg8zbzsUsZTdc5XOHjcKmHWIeZzKSoFrexT7qTB68bzZuf7yS/xE3/HrGcMyaLtxbvYNygVEb1SyLaJpdTp1LmDvCndzaQe9w0VJNR5SdXj2JQjxiJpzqA/NYKcQqFFT6ee3Md4RO+xPKKXfxj0VYevGYUhna842YKuQhu+gTLoLMA8G9fimn4xQSNjgZfr+swIjuBn18/mg+XH6CsysdZI9KZMDStw1Pc6cDcb/bxzbrc2mNef4hXP9qO1Wxk/ICkOq83aT7cy/6Lb/dq7MP2Y+s7ltKFc1BQSLz2N/jsGe3cAtFSWmJfkq77LQFHFjEZQyAckADgNGmazpJ1ucz7em/tsUBI450vd6OoKheNy6y9CA/rOut3l/LmZztRVYVbpg9icM841Gb0YRZfMf6tS7CMnI4am4IlcxCOsTMIpw4lIaUPrnUfo6YNJtTCKTOapjN2YDIfr9hf50YMQEq8HU3XUFUFTddYsbWIfy3aWvv8xOFp3DCtP1ZT8xMgGBSFwT1i+fWt4zlY7OLDZQd4+YPN6Dps21/Oyh4F3H/ViHrBljgmrOu8smhrnQAAIBjS+L+31vHknRNIiWnDBd6iQRIECHEKO3LK6wUAR+3MKSe/1ENWYsNzRtuCGvLg2b0az47lNQcMZuIGT4VGggAAo6owKCuWgdeOJBzWMRnUOhsAdpSiCl+dAOB4b3+xi2HZ8XW+WEMGK1EjL8S/fyOezV/h2fwVAPZRFxKydK2dGkWNkGol5KjJHuO3JHRwbbqmvGIXC5fub/C5+d/s4czBKbV3qg8Vu/nr3GOjgs+9vZ7/vWMCGfHNWIfhq6R6w2J8B7cSLM0jbur16Am9CGugpQzGMa0XAUPrBHQZCTZuu3QIi787yN68SlQFRg9MoVdaNAo1gUJJdYBXP9pW530rNhUweXg6AzNbnnEqENR44Z0NVHvqLkTedbCCg0UuBmRIVqvG5Jd52X6gvMHnNE1ne04ZKcPT27lWndeLL77IJ598AsCUKVN46KGH2uQ8MvFSiJNQVYX8kpNnKqn2tO+cRr81mfiL7iTsqiDsqiD+orvwW5NO/UZA0WsCgs4QAABUneSzq6j24wvUXXeh6xBK6EvM2dfUHlNtTuxjZxIytF8gJkRnUuUOEGpkA0B/IIzbd+yitbC8fjKA4gpPs84bjO9LzMQrCRYfBEXB1GsUQUPNzQhdV1otAKgpENLi7cQ4zFx6VjaXTM6mqNzDt+tyGdgzHoDyan+D045KK32tUgW3L1gvADiqvKrt1iV0B65TfE/mF7tRmzMc1Q2tWLGCZcuWMX/+fD744AO2bt3K4sWL2+RcEgQIcRKappOZ1PgddoDoqPZdE2DxFVH20YsYnIkYnImUffwiFl9Ru9ahtcQ4zI0uXoyPttaZ6ws1c5mNJTup+ObN2mOatxrPqnmYJK2kiFCxDjNmY8Nf5zaLkajj5qunnpDpRlEgJa55AbSpdDeVy9/DnNYHgOpv38AUaru0w1mJUcya0pdwWGP3oXLOHZPFL24cQ6y9ZsOueKcVQwMXkomxrZNtymEzNdrfx0fLVJaTcdpP/j2Zkdz1Mi2Vl5fzt7/9jXvuuYe///3vlJc3PNJxupKSknj44Ycxm82YTCb69OlDfn5+q5R9IpkOJMQpDOgRi9GgEArX76CGZMeT1pxh9BbQjFFEDZ2KuX/NTt6B3SvRTF1zDnVStJXzxmbxxepD9Z67dlp/rKa6FzbGsBfv5q8gHCJq1IXY+oyh9INn8exYjn3E+QTtXfNzEKIl0pMcXD6lD+9+ubvec1ee25cYm7l29C8z0c79PxjJm4t3YlJVrr9oIKlxzbyAtcfhHHcJ5mEXQHkumquMsKFtL4bT421cNbUPqqoQFxdFcfGx9KAJTjO3XzqElxdsqU0SMHV0Jj2TW6dfiLIYuHn6IP783sY6x4dkx5OVfPKbRZEuPd7GsD4JbN5bWu85o0FhUM+uNZ2zvLycq6++murqaoLBIOvXr2f+/Pm8/fbbxMW1rC39+vWr/f+BAwf45JNPeOutt1pa5QYpemeZF9BEpaWuLhctno6kJGedTi1SdPZ2786v4rm31+M/bnpKr7Ro7r1yOHHNHAloSZsNikZYV+v9vys4sd1uf5gv1h7io+UHCIU1HDYT110wgDEDkjA1cFfPFKpCz98KGSMIG+2YyvagqCqB2OxOnR2oNX/Hk5Kal9Wpu/ef0Pn7kraQlOTkYH4FSzcdZv43e/AHwtgsRq48ty8Th6RiaWCUIKjpKNRMD2yJo/2PogC6ht6OEwwa+lnr1CRzKCr3EOMwkx5vx9SKG3ppus6+w9UsWZtLeZWfs0amMyw7Aae1fe6pduXf73J3gBfnbmJ//rF0rxazgQeuGUX/9OiTvre1293cPvSov/3tb7z22msEg8emh5lMJm666SZ+/OMft7R6AOzevZs777yTe++9lyuuuKJVyjyRjAQI0QT9M6J5+scTOXi4Gk8gTEK0laykqAa/XNvD8Rf9XSkAaEiUxcBlk3oxZWQGXn+IaLsZh9XQ6AV90BiN0nNizZ1NvWZesq4j6eVERLOZDFw0LpMzB6fg9gVrpq7YTI2u/2kowG6Oo/1PzWk6vi9SgNRYK6mxbTMioSoKfdOiGXDZUADCjazFaAtGjl1wKgqoWpCwYmq387dUXJSZh64bTW6xm5IqHzazgZ4pTmKjTJ36Bk5DtmzZUicAAAgGg2zZsqVVyl+7di333Xcfjz76KDNmzGiVMhsiQYAQTaDrEGMzMax3fEdXpXvSIdZuqp3be6ovhOMvbLral4cQbUXTdKJtxtpMQG090K/pUFzpI7/UjcVkoEeKo8PTDreX9rz4BzAFK/Gvno9//HQUUwqm0l0EDm3BOOwiQmrX2WHbYlTpk+akz3F71HTFPnzYsGGsX7+eQODYgmez2cywYcNaXHZBQQH33HMPc+bMYcKECS0u72Qi469VCCGEEK1G03WWby3k1Y+21V7ExTksPHTjmIjL964oCsGwhtGgtMmIpKrohHYtxb3la7z71hN7zo2UffoSejhIQmo2SuqILnkh3ZVdffXVzJ8/n6qqKoLBIGazGafTydVXX93isl955RX8fj9PP/107bFrrrmGa6+9tsVln0iCACGEEEKclsPlPv79Yd2c/OUuP699up2f/WAkamNpv7qZSm+Q5ZsKWLoxn8xkBzMn96ZHYusmKNB0BXP/yVgObcd/cCtlH70IQNSI89ES+0oA0AHi4uJ4++23efvtt9myZQvDhg3j6quvbvGiYIDHHnuMxx57rBVqeWoSBAghhBDitBwqajgV6Pb95ZS5AiQ6Le1co/YX1nVe/3QH63YWA1BY5mHj7mKe+NGZpLZSWtKjguZYHGOm4z94bDdk+7Bz8LXmXgzitMTFxbXaIuCO0vGreIQQQgjRpZiMDd/pV1UFQytm4+nMiip8tQHAUaGwzq5DFa16HkWp2ZOhbOGfah4batZOlc77AxZP2+SPF5EhMv5ShRBCCNFqeqZGN7hB2TljMomzd52MNS3R2MLrcGun4dU1dG8VejiEc9Q0km7+A5YeQ9ADXgjU3wFaiKaS6UBCCCGEOC2JTjMP3zSWv83bTHGFF0WBScPTmTmxV0dXrVYwrJFX6sHtDZIUZyMp2kprrlRIjrUxsGccO3KO7RSrKtA/K7YVzwI6KqHMUSRd+2tsKVmUeww4zvsRqr8Kv7OHpEcWzSZBgBBCCCFOi65Dr2QHT9x+BsWVPiwmA4nRlla9yG6JSm+QfyzayrZ9ZUDNxfm1Fwxgyoj0Fm+QdpRRVfjRpUP57Psclm7IJz0ximum9Scj3t4q5R9P11V8zl44o5zgqSZgigVTrAQAokUkCBBCCCFEs1iMKpkJrX/R2xKKAl+vy6sNAKBmT4M3PttJr7Ro+qS2bLfY48VFmbj23H5cNjkbs1HF0FmiICGaQNYECCGEEKLb8ATCfPpdToPP7cwpp7Wzl+q6js0kAYBoPc8//zzTp09nxowZ/Pvf/26z88hIgBBCCCFaRFFAVVU0TevwvPUKCoZGpvyorTQVSAgAn89HSUkJiYmJWK2ts0ne999/z6pVq1i4cCGhUIjp06czZcoUsrOzW6X840kQIIQQQohmK6n2s2F3CXvzKhjQI57hfROIjzJ3WH1sZgMzJ/XmnS9313tuUK/4Dg9SRNcXCoV44YUXmDt3bm3wO3v2bO677z6MxpZdWo8fP57XXnsNo9FIYWEh4XAYu71tptxJECCEEEJEAFVV0HW9VS+Ci6v8PPGv73D7QgB8t7WQOIeFR28ZR4KjYwIBXdeZNCyNvBI3yzbW5NG3mAzcMmMwWYmda/2C6JpeeOEF5s2bh9/vrz02b948FEXhgQceaHH5JpOJF154gX/9619cdNFFpKSktLjMhsiaACGEEKKNmANlWMp2oipgqdyPxXu43esQ0nS251byn093sGR9PpWeYKuUaw6UUVFUQFjTeeTSNPql1Vxgl7v8rN1Z1Opz70+Hw2rklosG8NSdE/jlzWN5+scTOXNQMmpHVkp0Cz6fj7lz5+Lz+eodf//99+sdb6777ruPlStXUlBQwLvvvtsqZZ5IggAh2pE3qLE7v4otOeXsPlgu2d2E6MZUFcL711Iy9xnY9iml857Gs/ZDTErrXIQ31a7cSv7437V8tS6P1z/dwT8XbSPUwg2tzJoH11f/Jm3XPH4zI5q075/nx+M1HLaajcI27Czu8J2DVUUhNc5Gn7RoYuymRjf3EuJ0lJSUoKoN/26rqkpJSUmLyt+7dy/bt28HwGazccEFF7Bz584WldkYmQ4kRDtQFNhX6OLP722kotpfe+ycMZlcflY2Dov8KQrR3WgamPtNxpKzmcqlb2OMTsI+4Sr8evvtqGswqHyzIa/Osa37Symt9pMS0/yFjEGDHee4SymZ9wy23E1oSX3YWObA5a25AOrbIxattXfOFaITSExMRNO0Bp/TdZ3ExMQWlZ+bm8sLL7zAW2+9BcCXX37J7NmzW1RmY2QkQHQIgx7E5slDQcccrMBfeKCjq9SmCit9PPPamtoAAGo221myJpdPvztIp9lhRwjRahQFlOoCAnk7QTUSqi5FK9qP0o5jgJqmk5XsqHPMYjZgM7fsxoOuA0YTypHpNbrRgjtQc2FkMqpMGJIqQYDolqxWK7Nnz66XDaix46drypQpTJ06lcsvv5zZs2czatQoZsyY0aIyGyO3H0WHMJbtpWjeH4i76C5c276lsrqM2FmP4lcdp35zF6MosHlvKYFQw3cOPl2VwzmjMklwdlw2DSFEW9DR3BWYU/sSM+02XKsXEa4sQk0NEVbaZzRA13UmDk1j9fZCDhW6MBlV7rp8WIunx5h1L1VL38KUmo111AyqP/kzZ/WfTPnYLKaOziA93iZZeES3dd9996EoCu+//36d7ED33ntvq5R/7733tlpZJ6PoXWySXGmpq1vfXUhKclJcXN3R1WhzprAb38q38WxbCopK6nW/wuPo1S2/NAwGlefnbmTdjuJGX/PLm8fSJy26HWvVcSLld/xErdnupKTm7Xja3ftP6Hy/Xwo6prCbgMGBMeRGN5gIK60b8Delzf6QRkmljyibibgoU6v0tWZfCagqQUs8Vk8BbnMcqtFKew10dLafdXuIxDZD67e7uX3oidpin4D2JCMBokMoWpCw68iW7rqO5qmCKJ3uOC9G13XS4qOAxoMAm6X95ggLIdqPjkLAUDPCGTJGneR1OsVVfkorfaTE2Ulwmpt9oe4LahwqcWEyqKQn2DEbVCxGlYyEmuw9rXWzJWBNPFp5vLa0mvnF3TvGFKIOq9VKZmZmR1ej2SQIEB1CqS4iULCHxKsexbNjBZXfLcI5oy9+5WgOZx1FUTrlyIAOhDUds1Ft0l1VTdMZPSCZj1YcaPD5Qb3jSI3rencQhBCtQ1Fgy/4K5ryzHl2vmVP/6M3j6JnUeNBwIkv5bjBH4TLEoh3cxLaDFhasKeesEelcf0F/zB2cqUcI0fl0aK/wzDPP8PDDD3dkFcRpMFGT1k5RwKgHTvraoKbjD/ix+Q6jKDoKOjbvYVTCAATi+5F0/ZME4vtjHXcVCRfeRnDHt5hCLhRFx3R4M6aSHbWLzjqDkKaz41Alz769gV/+YxUfLDtAmevkn8NRPZKjuOGiAfWOx0db+OH0wZK7WohmMOjHUm0e7Z86+zmNR96j6iFUvWaDLW9Q45+Lttbe9AiGNN7/aneTb6oHq0qp/Pq/lM1/Bn3dXMJf/52ze2iYjSpLN+aTX+I5ZRlW72GMmhcAi68IU7DytNrlC4bZmVfJ9zuLySl2EW7kBomR9v+ZdUaBsMbew9V8v7OYfYerCYQbXjMmRFvqsJGAlStXMn/+fKZOndpRVRCnwRwow/vdXKLGz0L3VuDbvwHTiOmEVFu91+aXeZj/1S6u7V1EYO3bxF76U1Rdo2jRn4i74EfoPc9ER8FrTgQdFFWlbPEb+HI2Yy/Lw9prJGUf/wVUlaRrn8BnT++AFte39UA5z7+7ofbxB9/uZcXmAn5581ic1pP/KRkUhSnD0+mfGcfOQ+VUuQP0y4qjZ7IDp00G5IQ4XUbNT3j7YszJ2Shx6XhWvod93OVA68z1bfSc2xZjTslGiU3Hs6rmnH5rUpPeb3Hn4t28BNv42YQObqy5ydFjHGFNwesP1XltlSuApoOhCfcHTNEJxM24l6LXHsa/5Uu04Zfw8nfh2mQEgVD4pO+3unMpef8p7EPOJmrYuZQt+D+MCRnYp/6QoPHUn6c3GOZfH21n7Y6i2mPXTuvP+WMy60zwNIVcBNYtwDpoMhgteNZ9hG38bAKm2FM3shsJhjXe/3ovX6w+VHvsojN7csVZ2Zia8gMXopV0yNVHRUUFc+bM4a677mLHjh0dUQVxmpSQD1/OFvyHtqH53JgzBmAO+cFcNwjwBTX+9M4G3L4QroFJxBtNVCx4tqYMsxVDXBqhE+b9Bw024s66ksP5u/BsXYpn61IAnOMuJ2RrWb7d1uILabz+af3f1aJyD/vyqxiRHX/KMgyqQmaincxEO4qikJjoiMgFXkK0BkULECzJpWrlfAzOeDSvC/uoC9vhnIeoWnX8OS9q2nsV0L1VuLd8g//QVkLlhTjHz8SQFcJhsTJral/e+WJX7etnTe3bpAAAIOzz4N2xArRQTT+7+1suH/kj/ljgJi3BTmaSA1WtmV7ZUC4QzRSFKTEL97pPca/7FICoUdPQDE2bpnig0FUnAAB4+4tdjOibSPJxexEoYT/+w3txb1uGYjSjGM3Yx/ghwpZE5ZV66gQAUJMl7syhqfRIbPoUMCFaqkOCgF/96lc88MADFBQUnPZ7ExK6XwrJE7XWqvXWNQBl2i0UL3gegMTzbsSSllXvVfvyKimprNky+5XvAjw4/BJYNxeAuLOvwZk9hGil/iw0PWEwsZOvovyr/wJgiIohdtyFmKIT2qpBp+VQYTVlVQ1vBV7lCTT7Z9Y5f9ZtT9rdMbpX/+nEPuUacnd+R7iymIRpt+LsNRhoy8/ZiX3qteTuOnLOC27D2WsQ0U2czqfFjiIw4jyq13+OanUQO346ppiaUYRLz+5DdmYMRWUespKdDOmTgLWJufzD7ioIeki44HZsvYdSuPBFeiXbufeqEQzqlUCFy8+n3x/CZFSYNDyDAT3j6rXLMu0W8l99BABjfDoxI87B6Iht0vm/21k/6YGugycQPuFn4cR8wW0UvPZL9ICXlB88QlRW3yadozEd/TfVHJtzKho87vKGmtSertjm1hCp7W5L7R4EvPfee6SlpTFhwgTmzZt32u/v7inuOmv6L6s7l5KP/oY5rS+hymIKP3ie2MsexG+q+2ViNSj0THVS7fZx97gwrJqPYqxJh1f25WsojiQCyYM5PguQouhYS7ZR/vWbtcfC7kpKl/wX6xlXEzR2/IWLQdfJTHKQW+yq91xCtLVZP7PO+rNua9Lu1imrObpT/2kOVVH9yYsoZivmlN6Ufvkf1PgsnH2Ht9nv17Fz2mrO+cWrqHGZ+GJ6n/K9iqJgzFtL9frPsfYeiS9nM6VfvoF10g0E1Zq75f1SnfRLrfnZVld6aWorkpKiMY68lJDBTKVuJOaSn+FXbIwAiiq9PPbyKkJH5px/ujKH/73jTOKjjqUptXiLKF84BwDV6iBUlk/pt+9hHn0ZIUP9KZ8nSoiuP2JgNChE20x1fhYWfwnl8/8PQ3QiqiWKogXPk3jV4/jsaU1s6Ynt7pp9SVwje8LEOcynbE9XbXNLddYUoW3tmWeeoby8nKeffrpNym/3IODjjz+muLiYyy67jMrKSjweD0899RSPPvpoe1dFnIawOYboCbMw9p2AEnQTPryHUAMX5yaDwj2zh/PlmoMYDSWoZiuxl/8cFZ3SD56lofxxxrAX9/YVoGs4z5yFpccQSuf/Ad/edUSNnt4pggCTQeGm6QN5+rU1HH8NNbBnHL1Su0ZnIkR3EjLYiRoxDUNcKro9AeuuZWj2E+9wt8U5z8cQl3ba59R1HTU+i9ipN6D2P4uo4j2AQkBtncxgIYO99v9+5diF+8HC6toAAMDrD5Fb7CI+6rgpjAqg68RMvQFL9lgqPv87NX110wLGHklRXDKpNx8u3w+AqircecUw4h11L3ZDZifOsTMwZg1DV42E9q0mbI6M/VGOlx5r4/oLB/DGZzuBmqliN140iORYyRLXlbhcLj777DNycnLo2bMnF154IQ5H612vtMfa2Q7dLGzevHl8//33pxXhdKc7WQ3pzFG+qlB7AawoOrp+kiFwBcKahiNUidcUh6KAJVCG3xzX4PviLAHcOdsIpwxCUy2YK/aBwYTfUX/KUUc6VOLm+22FFJS5GT8olUE94065KLgxnfln3Zak3a1TVnN0t/5TQUc/Mqp4tH9q69+vhs55Ok71HgUN/bjEfQZFI6yfPJHfydq8/VAFf3xjXZ1jj90yjuwTbl5YAqWETNGEFRPmQDmawVInsGiwrke6cl2vSZtcWOGlwhUgOc5GgtOM0sC+L8e3vzmf3/G6cl+io1NY4aOsyk9CjJXkaCtNmVXWldvcEp1tJGDNmjU88MAD6LqOz+fDarWiKApz5sxh7NixLa5fRUUFd9xxB9OnT2fHjh3dZyRAdF3Hd9YnDQAAdDAoKt4j04V0HXym+EZvLBmjEwimjaxJkaeDPya7dSrdyrISo+g5tQ+KohCWlG5CdCj9uIvM9optWnrOk71HJYx6aC3+6EyqjIkkmP2Et3yGeej5zc6g0zvVydTRmXy9LheASyb1IrOB/Qf85mPrrwLmk49uhHWdQ8Vuth8oR9d1BvaMp0dyFOnxdtLjTx44HN/+bhSPnjYFhdRYG6mxp55uJToXl8vFAw88gNfrrT3m89WsGXzggQf45JNPWjwi0JK1s6ejQ4OAWbNmMWvWrI6sguhEOuPGYA2puZPaRSorhOhwvmAYtz9MjN2EUW38BoqlOpedVUb+9vEBCst3MLhHNDcOjCPxu7lYJ99EsBlpdKwmA9dP68eFZ2TVZCVzWjlJFU5Nge+2FfHPhVvrHL5p+iCmjkiFU90gEqKL++yzzxrMsgU10/4+++wzZs+e3ezyW7p29nTISIAQQgjRJnS2Hazknwu3UuHy0zPVyY8uHUp6fMN3f6ttqby8Mp/C8pq7itsOVvFVfCI3nzsCfwvyaBoUhZSY1rnjXO4K8OpH2+sd/++nOxjaO55Ep6VVztNZKGhYqg4SticRNEZhDlag+irxR/fsMjeuROvKycmpvfN/Ip/Px8GDB1tUfnuunZUgQAghhGgD+WU+/u+tdbUXizmHq3nmv2t48s4JOCz1v35dQZX8Em+dY9vy/fjMiQ3Mru8YxZW+OguNj9I0neIKb7cKAhR0jPkbKP7wz9iHTcUx7lIql7xCIHcn8bMeIhDXr6OrKDpAz549sVqtDQYCVquVHj16tKj8f//737X/P7p2tq2S55x8tZEQQgghmuVQUXW9u8XVniCHSz0Nvj7e6GNUn9g6x87ub8NatqvB17cXDQiENdyBMKFw47e/o2zdbNcvRUG1x6AYTXg2f0XRf36OP2cLisWOYpZNvSLVhRdeWLPbdwMUReHCC9t208LWJEGAEEII0QYsZkMjxxsehDf7K7mmTxkzxiTSM9XJjedmMTq0nkDeTgx6oC2r2gidPQXV/PGt9fzy5VV8uHw/+cVuRvRLqvfKkf0SSTvFouCuRtchENeH+EsfqDkQDgGQcMVD+KPSO7BmoiM5HA7mzJmDzWbDaq1J62q1WrHZbMyZM6dV04TOmjWrzTIDgUwHEkIIIdpE77Ro4qMtlFX5a4+NHpBEaiNrAnyOTDL7hbluiB2/NRmT5kUtDRKKzyasNLzBVFs6WOzh96+trh3N+Oy7g4wdlEL/HrEkxdlYvjEfXde5eEIvpozMwNSiFcedkylQgWvtR3WOuTd9gXXclQSNMhoQqcaOHcsnn3zCZ599xsGDB+nRo0er7xPQHiQIEEIIIdpAjM3EIzeNY+3OIvYcqmD0gCSG9E7AoCi4/GEsJrXehXPYmUEII+g6umrEnzi4fsFK7f5e9Z9SQNWC+HUjvqBGlClMTkmASpefuGgr6fH2k2YoOlaOwvrdxfXOsXZHIRlJUazZXsjkEenEx1iZPr5Ht0yZrCoQ2rMKf84WVHs0cefeRNmnL+PZ/BW2XsNR0kdF1OJgI0FCRxaoG7QgmsF46nTh3ZjD4WhRFqDOQIKAbk1H1cNoihEDIcJt8OP2BsPsL6ii3BUgK9lBVmJUp1nAJoQQHS3BYebCsZlcfORCubDSxz8WbqW4wsuFZ/ZEUSAh2krfjGhivLn4dq7EPGomatCNZ91H2MZdUbtHgKLAwRIPC5buw+0JctnZ2QzIOLbjrqKAuXwvnv0b+cw1iPE9jBgPLmdzYATzV5cDcNbIdK45tx92k8b+Ej95RS6yU+0kxDowG06YIXzcBa5BVZh2Rk8sJgOxTgtnj8zg8+9yGDsopdF0iV2dpoO1z3iiynIxJ2TgWr+Y2EmzCbur0FMGRlQAYAq7CWz4EGu/MwlHpxPetQRTfCbBlMERHQh0dRIEdFOKomMq3EbYU46l11gCmz/H0nskfkfLVq3XPQl8vDKHj1YcqD30yE1j6ZceedvACyFEY3QdwmENtz/M06+vodIV4LoLB/D6J9trLyTvubQvI4o/wbtzFTZ3JYHDewlXFmHLHoVyZCPF0uoAT/77ewKhmrvuf/jvWn5163iSk2v6XJPmpWrVPAIHt3JWnxIMS7cQ9FQx+ZwRfLheJRjSWLohn5F9Exjm/R6UDAweIzHb11HW6xzSsrJq66PrOiP7J7Jw2T50Ha6Y2pcvVh+korpmapPNYmT2uf3ITo/uVrtQn8hnjscx4SqK/vtLdJ8LS88hmMZcTkDvZougT8HgLcW17jPcm7/GPmgS7o1fYErMJOayX+A3tGz3XdFxZGFwN6WGgwQP76Xi839SueBpqlbOI1x+GEVpeWd9dFV8hSvAJysP1HnuoxUHULvhvFAhhGip/FI3la4AvdKi2bqvrM6d5L8u2oMy7geYMwfh3bmScGUR0WddSzhlSO3rCso8tQHAUXnFrtr/BxQb/rE3QFwmyt4VaJ4qvKOv4YWlPoLHve+rdflolSUkrvk7Q/b/F33vKnzVlbj94Tpl90iK4uEbxzJuUDKHy9y1AQCA1x9iw64ieiQ1PgfaoPux+IsBUPUQtkAxjSRV6bRMYTdVS99CD3ixZA2iasV8KNrV5drRUn5HFvEz/gc94MW98QsMzgRiZvxEAoAuToKAbiqsmrEMOx9jdBLBwgPY+o9D6TG6RcN2bl+I73cWM2/pPnbmVoKqYDxh+Nhpj6y7I0KIrqW02s/WnAoOV3jbfd9vo6Gm//UHwlhPyBxkNKiYtADhqpLaY6GKAtTwsaxA8Q3k4I+PrrvI2KoEUHxVtY9N7mLSY+v2y3abkcCg89EDPqg8THDcdbyywlPvBo6CQr/0aH506RD251Vxop055fiD4XrHAQyan/DmTymf+xRW32HUg99T/MZjmCv2Nfj6zipksGPrfyYJl/0Mx0X3ET35KpTo5IiaCgSg6kFCFYW1jzW/BwLuDqyRaA0SBHRTBj2Ab8MnhKqKsWQMwLtrNXrOmmaPBOjA3G/38ff5m1m0bD/P/HctBaVubpo+qPY1FrOBi87s2a2HhoUQXVdOkZtH/r6C/3trHY/+fSUb95W26x3d9IQo+mTEUFDqJjsjBttxG4Y9cs1g/KveJlxVTPTkq7H0GoFn89coRTtr65gSa+XHs4ZhsxgxqApXTO1DdtqxO7Ehn4vw8v+ge6swT7oeQ2pfjDu/5KrhKibjsa/7c0akYlj2T1SLHeIyMH7/X359eTI2U8OXBCZVYUS/xHrHB2cnYG3kPbpqxOCIJ+wqp+TNX1H+2csoJiuYrM345DqOjkIobQSBpMEEFBvKkIvwW+qnSO3uzK58qpa9gzE+nbgL70AP+qn4+EWsWnVHV61buvHGG5kxYwaXXXYZl112GRs3bmyT88iagG4qrJgx9xhCXEImao+RWLd/hSEhi1AzRwIqPEG+WZ9b59jCb/fz8+tH0TPVSaUrQFqCnXiHOeLukAghOj8deHfJ7jqbXf1r0TaeuXtSoxe/rc1iVLln9nBWbT3M2h1F3DZzCAZVITHGSlqcDVvSDdgHTEDLHI2j30TseZvRjluAqioK4wckM7hXHOGwTozdVPtcSbWfJ/61jjumziI9u5znvjNyz8W3EFO9j39t1AiGNCwmA9dc0J/+6U4M3omQ0p9qzYotZxmW6FiCx/fdCri8IVRVIcpi4NzRmazZVkhRRc2OxlE2E1ef1w+1kShKw4C5zwRsBzbi3bMGgPjpd+O3d738+vpx6S40PTLvnQYcmcRf/GPU5GwClkQSLnWgOOLxqZE5HSgUCrFw4ULeeustiouLSUpK4tprr+XSSy/FaGzZpbWu6xw4cICvvvqqxWWdigQB3VggYSBKgkYIFXXwBfhb0HmZDSp2ixG3L1R7LDXRjlFRyIi3k3FkkxgJAIQQnZGug8cfrHMsEAyjtXOnFWs3cfH4Hlx8Rg8UqB05VRRw6QlYeiWhhXXCphjU7LPqjazquk7Ukc3Gjq/6pr0luLwh/vRpEdEOCxXVXn75xh6eu/8sfpAR5uKzg0RHmYmLOhI4DDwPTVeJAgwJVxA87vvBH9L4cm0uH3y7F4vZyG0zBzMiO4HHbx3PoSIXYU0nKymK6JPsEKzqIcIHvq8JAAxGCIco//TvxF3+EH57Wmt9nKKdhBUjes8zCR75fQymjui2WaFOJRQKcd9997Fp0yZ8Ph8ALpeL5557ji+++IIXXnihRRfv+/bVTJm79dZbqaio4Ac/+AE33HBDq9T9RJEZ0kYQ/ciPuKV3L+JNPu65cjhGg8IV4xLonxnNRWc0PPUnpOnkl3nYk1+FO9DwfFEhhGhPqgKzp/atc2z2Of1wWBre1bfJ5arKaSdD0HUdXdNr+89qX4gPlh/gl/9YyWuf7aTcXbMO4HSmVhYfuUOv6dQu4A2GNFzuIHFRZnokRRF73MjB0e+EkKaTU+JjT0EVniP99b6CKt7/ag+hsI7bG+TP722ksMJLlNnAwMwYhvSIPWkAAKDoYcIVhaj2GJKu+19ip92G5nND0Nv0D0p0Ksf/PkZqAACwcOHCOgHAUT6fj02bNrFo0aIWlV9VVcWECRP4y1/+wquvvsrbb7/N8uXLW1RmY2QkQJySxVdM5cd/Zvg5N/HS/4zE/80rXD71HLQYS72FdZqus3hNLu8t2Q1AcqyNh24YQ7yj/Xe7FEKI4w3qEcuvbh1PXombhGgr2WnOFo1eFlb6WL6pALcvyNkjMuiZfPo7yCoKLFmXy4Jva+7+LVmTS4XLz92XD0Vt4q4rIVc5o/on8+36fC4eFc+8VUUA9Ex1Eh9dfzFx7fs0nYUrDvDhsv0AZCRF8bNrR1NSWffiRtehvNpPamzDOx03JKxaMAy/mIRBk/FZklB7J5OYPgC/NZl2X5EtRCt666236gUAR/l8Pt58802uuOKKZpc/atQoRo0aVfv4yiuv5JtvvmHSpEnNLrMxEgSIU/NUECzLp2z+HzDGJhMsySUYl4Y5fTAhte6XQkmVvzYAACiq8LJhTzHnjsxo71oLIUQdBkWhV7KDXsmNp7VsqkpvkN+9uhq3t2aK0Tfrcvntj86snRrZVIGwzopNBXWOrdtRjMcfxmFp/Cs6pOkUVXhJMLop2PAdg/qM5I9XJaGvnUv81Ev5fLufH88aXm9H4uMVVfhqAwCAvGI323PKyUysG8yYjSopcafXLoCwaiVsqVkIrGHAZ2n/AMAcrCRstBFWzFjDlfiNMTJtVbRIcXFxi54/lTVr1hAMBpkwYQJQM+rSVmsDZDqQOCV/fD8SLrkXPRQgWJKLpddwrONn1wsAAPzB+lvHuzzB2r0FhBCiO8gvcdcGAFAzDSfncE2mFIuvEEvlflQFLOW7MAfKGi3HZFAY3rdu5p3+PWKxmRv/0g+GdT5Yto9vN+bzyaZqfvt9InM+LeRwRQCjM55xg9N49KaxJJ9kFABoML2nxxeiR4qDn107iuyMaEb0S+SXt4wnwdn80VxNC1NZWcnGfaWUl5cTDLfPNFFzsBLXFy+jbfsCa3UOpW89jqUqp13OLbqvpKSTZ4c61fOnUl1dzR/+8Af8fj8ul4v58+czbdq0FpXZGBkJEKdkDlbgWv957eNA7g6oOIQS36/eHZWUOBuDe8Wz7UDNl56qKozsnxTR8weFEN2Pw1b/othpN2NSQnjWfYx3x0qiJ86iZOm7xEz+Aergi2hwir8O0yf0pNIdYM32Qvr3iOW2S4ZgOMl9k8PlXvblV2E1G9mwq+auY2EZbN6v8MRNV5Iek9Sku91pCTayM6LZd2QPAKNBYVDPOAyKwpCecQy6cSyqUlPH5nbhgWCQ0L7v0PN2oyRPRt37BZ6sURh7jyHK0rx9Zazew2gGC+DEoAcxewvx29PRTrivGTZFYe0zmsqvX4flYE7rh2apyWajoGMOVeM3RmMMe4CaPQGEOJVrr72W5557rsEpQVarleuuu65F5Z9zzjls3LiRyy+/HE3TuO666+pMD2pNEgSIU1Iq8/Ef2krUiPOx9h5F6cI5uNd/hn1qJsETRgPMBoU7LhvKrtwKXN4AfTNiyUy0yxxQIUSXp6hKbUaftHgb10wbwLtf7kLTdM4akU6f9GiCuoGoM2YTyN1B5bdvY+kxBOPAKQSO9IEuf4i9eVVUeQKkJUTRM9lBXJSZ+2b0JDQthbAtAXuglLAGIbXhO/m+QIgBPeJZ8O3eOsfDms7m3YVkx+l4LSmnbI/VaODeK0ew61AlHn+QfpmxpMcd69NVaHHffbDITbRPw7bnW/ocWo0W8OFOHcnuncWcPSL9tIMLi6eAsvnPYIhPxz7zHrSdyyn69i3ip9+NnjmmNhkGQBgj1rR+x97bYwhhkwNFAdPhLVR8+xbxl/4E94bPAQXzuFkNjnALcbxLL72UL774ot7iYKvVyvDhw5k5c2aLz/GTn/yEn/zkJy0u51QkCBCnFEwcQNI1vyIUlUrAYCfp6sfQrHEEGukso21Gxh6/sYwEAEKILkxHZ29+NYuWH8BuNTBjYm+yEu1MG5PBmAFJgE5slBmDoqCgEy4+QKiyGFQjgfzdKJX5KHF9qPKGmPPOBg4UHNt99+rz+jF9XCrB9QsJHN5H7Dk3UbroeZxjZxDMnopBVTlxWn96YhT7C+ru4BtlMzF6QDKpGfEErXFN7ndjbCbG9a+/EVhrUFWFZZsLyYxOZEJ8BlpZLmpqf74rcbJsxwHOGJKKxahiCVbgM8YCNXfoLaHK2seNCRzaRt6/foHmc0EjC6it4SrKFs3BnNYPa59RVC17l6SeQ/HF9kV1JqIHvBT95yFAIX7G/xBS228jM4OiET6SoUlFQ1GofXzi86JzMRqNvPDCCyxatIg333yzdp+A66+/nksuuaTNc/u3pq5TU9HuKj1B9uVX4QuG6ZGcSrrBhgL4nL2aXaaCjqoFajJHaD40gwW9kQ3MFEWh2hdE0+puilP/dTV1VVUFp9UkU4+EEK0qt8TLc++sZ8LQdJx2E+98sYtbZgzGZlY5UFBFhTtAdno0PeONOPCgqeAYdwlRQyYRLjqA7qsGXWdvfhUHC6uZPqk3FpNKOKzTK9FMiUcndfBZhN3lFL35K9ToRNzx/Vi4dB+KonDGkFSSYqzYTDXpTB0WI+MGpbJ1Xymb95ZyzphMLGYj3289zJa9JRRP7MWEgQnYbVaMmr/REYX20CfVxijXt2jl+SiDp6Ft+4JzU3dRmNYXg0HFVLqT0o9eJP6ynxGM6YUhfyNlS/5D/OUPErIlElItddrgt6eRMOthiv776JEAAOIuuI1w1hj0Ey6afYZo4i/9KZo5irA5muTMQQQdNZuVha2xWLIG4tmxEtVqx5DUk2ATszG1lMVXRGDXSkxDzidssGI8vIlQWT6WHsPxO7KweAsJ7FmFafA0go1MUTIRIIgZBQ1VCxLuwJ9xJDIajVxxxRUtygLUGUgQIBpU6vLz1H/WUH4k37SiwAPXjGJYr7hmzw1V0DEe3kgwfzfWURfjWbsIa8/hBJIH13ttWNf5fnsRr32ynVBI44opfThvTCYWY91O3h/SWLwmlwXf7sVoVLn54kGMH5jU6C6WQghxug6XebjynH58uHw/FdV+RvZPosLl58udRVQdyemfrpZh08NUbfoKa88hmKMT8G5dij9/L9FnX4uOQkW1j5mTs1m6IY+yKh+3T00ka+d8VOeVKEoVChA18EzcRXkcLguSmeJk0+4SfvPP74h1WLh79nCy0xys3l7Eqx9v58IzejK0TyI7c8pZt/PYju5vfLaTfQcTuGtmPwKr52HtM4ZA4qB2/9w0TSczNY5DhSOIn9iP1zcZuWVSH/ICUYwYlEpUqJKyT/+O5q2mdN7TRE+YTdm3b4GuUb30LaKGnY01fTCeNQuw9h5FIGkwBj2IL2cT6MeSULi3r8CZPoSAOa5eHXyOrNr/e6N7AzU3mMjbiGfHKmLPvZnq7xZS9cU/cUz/CQHadjTAormo/OgFgqW52N3lmNP6ogU8VC57D9XyEfEz7qV88SuEq0uItUShDjwf7YR8GxZPPq4V7+GcchPhor0ES/MwDL2QcDuOZIjuQcaaRD2KAmt3FNcGAFCzKOyVRVtbuPmXjh7wU73mI8refAz3+s/Rgz6UBsatC8q8/GPBFvyBMGFN5/2v9nDgSOaN4+0/XM28r/cQ1nT8gTAvL9hCQfmxzWg0HQ6VuPlyQz6L1+axv9BF6DQ24BFCiKQ4G/O/3lO7CdeGXcXsya1kxaZ8kuPsrNpcgMmgo3kq8eXuoGr1R5R98W/CrgrMKb3QLNEAZCU78QdClFXVzCO2mRS0inzCHz9NyQd/IuypRut7FoqrhFTPLipdAb7fVghAhcvPn95Zz+EKHy99UNM3Lly6D13XWbezqF6dV24vZf+mTbg3LUEP+hvsZ9tDz2QH1dZU/vh1iG05lTy5JESOL5oR2fH4jDHEXfYgqs2BHvBR+c0boGuYknriHDed0o/+WvNdseEL9FAABR1TZQ5V374JKMSfdxOGqFgCh7bhX/8RRqVp30+6rkPaEJKufhy931Tir3iI6Kk3t3kAABAwOIg574coRjOezV9T8fk/CZbmYes9Es3voWTeM4SrSzCn9cXQa3S9AABA0cL483ZS9u5vKPvoL2h+N4rewAuFOAUZCRD1qKrKnrzKescrXQG8/hB2U/N22NRRMWQNx5Tci2DRAcwZ/VEyhtZZyHXU8QHIUaVVPiCmzrGjX6Z13lvlJyPejqIorN9Twl/e31jn+ZsuHsSUEWntNPArhOjqTAYVty9U59ieQxX07xFHtSeApsOrK108frYRx/BzqF77CYrJgiUtG0NKX3ymWAB6pzlZvOZQbRmvLq/gN+dfgXHZyzXnGTeb+948zAPT/odd5Qb2l9Tthz2+EBXVgTrHTqzX8ar9Oqk9BqOkDULvoB5PVRXOGJjMwB5xVLj8RNmMJDgttWsWgo4MoideScWXr9a+J+bs69ATemBKzCJYcghL1pDaNoSiM3COm4kpqQcxw89GTRuEa+mbWEdehF9v+ndT0OgkGOMEHXz2tFZudeN0HUJxvYgacT6utR8D4MvfTcLFd+Pdv6H2dTFnXYvPHN9gGQFnBtHjL6Ny6VugGogaeSFeyWwkmkFGAkQ94bDG6P7189z2THUSbW9+rmiDEsa/8ROCRTlEDTmbQN4uQlu/QFXq36FKT7BjNNT99cxKdtZ7XWZS3U1/TEaVtCMb3VS4A/xz4ZZ67/nvp9spqaofZAghREMSYyxknNDXnDksjfPHZpEQbUVVFX56bjROc5jqtZ9g6z0CUHBvX4lv5wosnprNwFSo07f+/MJ4TCv/DakDMMQk4/78r9x6dgK/W1jI3OUFDOtTd8FuaoKd1HgbpuOmRaqKgqGBDcFUVSE5MRp/zhZCO7+t7WctngIs/hIURcPqysGoeeu9t7Xpuk60zUiPpCgSHMcCAAUdQ8FGKpa8Vuf1pYvmQP5WQpXFRA05G/+hrYS2f42qaIRUG4YRlxDOGoNiMOK3pxF14f/gt7TN4ubWphKGvStqAwAUFcegSZQseK7O60oXPofFdaiBEsBUupvKpW9j6zcexWSh4tO/YQ672rrqohuSkQDRoKG94xk3KIXV22uGoqOjzNxx2dCT7j55KmHdgKXfGZhSekPGcCy9RqDGpePXFSpcfg5XeHHazUSZDSRGW3jsh+NZuGwfPn+ISydn16QaPUFmYhS/uHEsC77di91m4rLJ2SQ6zeg6lFT58DcwfUnToajcS9IpNtIRQggAi0Hl/h+M4PPvD7Int5ILz+jB0F5xWIwqyXE2EmNt7CovJqVXInFTr0Pze3COuQjN66Jy3WdYhpxXW9aQXnGcOTSVVVsO881uH5ePvxpTz5FEmzUCh/eSm2umb2YMZwxNQ1F0brx4EEs35NE7PZqLz+xJgsPMY7eMZ8GyfXi8QYb3SSDGYea1j7fXqfO15/YirV9PFMs9qAlZ+HUFi+6h8rO/omsaztEXUbz4XyRc+hOUtBEdsouuOVRFxXFTgGoyI81B87pwb19B4qwHCcf1xtJzGIaELHxHFv6GVUud7EcBuk5fbgh58R7cDEDstNsxJmYSLthVM3UsrS+xF95F9bK38e5ZQ7g8H8WZWS95hu5MIW7arSg9xxLlLkb3VBA0RDV0OiFOStG7WCqV0lIXWjee052U5KS4uP7c944Q0nQOl3vxB8KkxtuJsjRvGtCJFEVB1/Xaf4ur/Tz/znrySzxER5l54JpR9Ew60qEpoKCcMuOPoiig6MevFeNwuZdHX1rZ4Ot/9cPx9EpxNPhce+lMP+v2JO1unbKao7v3n9C2v1+KqtRcLDfQHykqqIqKEvahBL2EzLGoeghjqBr/Cekuj+9b0xPttVl/1CM3WVz+EIeKXOw4UEZ8jJURfROJsZvRj/vZKSqg1/SNDqeNDbsK2bC7BHSdEf2SyE5zYlAUVFWp8zO3evIpefs36KEA9iFTsJx5dYdukmXxFuJe8S5Rk6/Db0nA4s7DvXoh9knXEjTHoet6vTYc1VX7ElOoGrUyj2BCP3TFgLnqIErAheZMJWCOxxSsQq0qIBjfB02pe6/2aJuP/0yOfpd2Z639s25uH3qivLy82hShGRkZrVImwJIlS3jxxRfxer1MmjSJxx57rNXKPp6MBHRnioJywoXx6TCqCpkJrf/lcLSz0nUdFPhw2X7yS2p2bKxyB/jb/E389rYzsBjUmp0qm7CgTdf1enmxk2KtTByWxorNBXWOD+gZS3qi3DURQpwe/SQBlK5BGA0wg8kMuk4YA+EG8t0f7VuDYR1/SEOH2k3IAOwmAwMyYhiUFVt77MRz1/TrNcdsViP9MmLonxkLul7ngvn4/yuKRri8AD0UBCBweA/2kLtDgwC/LQXbeXfip2aqqT8qA9vU2whgrg22ulvgGjQ6IWFgzQMd/M4edZ83RUNC9EnLOP4z6e4BQGe0bds2nnrqKfbv34/JZCIYDNK7d28effRRBg+un/HwdBw6dIhf//rXvPfeeyQkJHDzzTfzzTffMGXKlFaq/TESBHRDIU1n64FyPly+n5R4O5dM6k1qbNtlPWjsLk1TaDrsy6+76U1xuZdAUKsJAlrAoChcfV4/UhPsfLTiAOGwzrTxPZg2PguzQZYFCyEapygK3mAYg6Jgau3+QoE9+VW8+tEO8ktcjB6QxNXn9yfJWXdaS5P6VQXW7Szi/SW70TSdy8/Opk9qw3c5jSEv3l2riBp5PvahUyib/yx6dQmKNalDpgMdFcR80sdCdCbbtm3jjjvuqN0t2O+vWWO4Y8cO7rjjDl5++eUWBQKLFy9m+vTppKamAjBnzhwslraZ8iZBQDe0t6Ca59/dUPP/vEo27y3hyTsn4LC07o+7whNg455Siso9jB6QTO8Ux2nn5zcocMEZPfj3h9tqj501IgOntXXq6rQauXRiL84dnYkONVOa5KaJEOIkAmGNlVsLef+rPURHmfjhjCH0TXe2Wt9RUObl6dfX1l7kr9tZTG6xi9/eeka9vVBOpbDCx2//uaq2rO37S3nijgmkx9Xf0T1oiMI2+UZ01YjPYCf+6t/gN8Z0aAAgRFfz1FNP1QYAJ/L5fPz+97/n9ddfb3b5OTk5mEwm7rrrLgoKCpg6dSo/+clPml3eyUh2oG5GUWDXwfI6x6o9QQrLWzcDRCCs8dd5m/nPx9v5ZGUOT766mv2Fp5+dQNdh/MBk/ueqEYzqn8T1Fw5k1tTsVq2rpunYzQaizBIACCFObW9+Ff/5eDtub5CCEg9/+O8aSk9IzdkSOYer693lLyrzklfiPu2y8kvcdaf/6JBX3HhfHDRF107/8RljOyx1qBBdUV5eHvv37z/pa/bt20deXl6zzxEOh1m5ciVPPfUU77zzDps2bWL+/PnNLu9kJAjoZnSdeqnsDKpCrKN1h5KKK3zsya2bw3r19sLahW2nw2JUufDMXvzkqhGcPzqdaKuptaophBCnLf+Ei/FQWKe0svVupJyY/vioPbmV5JZ6TqusOGf9vj0+WnaOFaItFBcXYzKd/BrFZDJRXFzc7HMkJiYyYcIE4uPjsVqtnH/++WzatKnZ5Z2MBAHd0MAeMVxwRg9UBZx2E/dfPYoER+vOsbRajPVyUyfF2lq0QCkc1mRYWgjRbCGtZvfc73YWc7DY3aSkAg058UaK0aAS42i9C+vs9GjsJ0x5HNAjjt25FTzz37W4TrIB2ImyEqO4beYQLCYDJqPK9RcMoEdSx2Y+E6K7SkpKIhgMnvQ1wWCQpKT6ey011TnnnMOyZcuoqqoiHA6zdOlShgwZ0uzyTkbWBHRDdrORa87ty8Vn9sRkUImyGFr94jrBYeLWmUN4ZdFWNE2nZ6qT0f07dnFZR1MUsATL8BnjMehBjMFq/I3s+CiEaF2KovD9jkL+uXBr7bGfXjuaoT1jT7usvunRXHfhAD5ZcQCn3cyU0Zks35THrLNaZ6pivMPM47eMZ/Hqg+zLr2Jgzzh04LNVOQAUlHnol37y7DBHGVSFy6b0YUSfBHQg2mZs1X5Y03WqfSHsFmOL9okRojvIyMigd+/e7Nixo9HXZGdntyhd6IgRI7j99tu57rrrCAaDTJo0idmzZze7vJORIKCb8IU0yqp8OO3m2i+BGFvNkFXbXJgrTBiURL/MSXj9QZJjbae9oK27sbjyKHn/SeIuvptweQEVm74k9rKHJBAQoh14AiHe/HxnnWPvfrGLgbeOw3iaCQs0XeebdbkM65uI2xvkjc92oCpw0Rk9sZtbZ7+UlFgrYwcmU1ThZdnGfFzeY3cXDQaFjfvKCGs6WckOEqPNKCeZu68oCo4jIwut2d+XuQL8++NtbN1XRq+0aO66YijJMtWoW1MUMGh+QkrNNDMTAcnWdIJHH320Tnag41mtVh555JEWn+PKK6/kyiuvbHE5pyJBQDdQUu1nztsbKCh1Y7MY+ck1o+iX1nCKuLCms+9wNTmHq0mJt9EvIwarqfEvNVXRsFTloKhGQtZYjJ5iNJMdnzUVUEh0msEpHQRA2BKNtdcIyj54FoDos64hZJJheSHag6Io9ebam4wqiqKgoGNxHSRkT0bRNAy+UvzOrNqdWF3+EAVlHnIOV2NQFcZmR/PoxXHMWVzGpL52pvdP5L2NIaq8QVZuPUyc00L/zNjaC++m1Q9UVSUc1mofa8DhUnedAGDsoBQ+XZnDmh1FAKgK3HvVSEb0iW9SYoOQpuMNhLGZDRhbcudegUXL9rN1XxkABwqqeGXRNh66bjSSYbl7UhQwl+/Du/UbLGdchRqoxr3qfeyTrycgN7NqDR48mJdffpnf//737Nu3r3afgOzsbB555JEW7xPQniQI6OIUFT777iAFpTUL2bz+EH+du5Gn7pyIzVT/zvzmA2W88O7G2sdXTOnDpRN7Nnr3yOIppOitJ7APGA+Kgmf3GhKm3Yol04FflQvc42kGK6akLLw7a3YpNiZkElQkQBKiPdhMKrfMGFTbv6kKXHNBfwyA1VdI0Vu/JWrE+eihAJ5ty0i+8Sm8lmTCus6mvaW8v2QPFS4/qqow+soEQl88xz2jr0TN3YDhYBk/mflL7vnrCo5cwzNuUAp3XDoYQxNGGaq8QdbtKmbXoQrGDUphSM84jEaVd77YxcRh6QRDGkXlHjKTHWQmO/nL+8f6aE2Hv8zdyNM/nkT8SdZ26cDuvCre+HwHhwpd9Ex1cv2FA+mT5jjpKEJjAiGNzftK6xzbk1uBLxButd3jRediCPvxbPkKz7alhDwVhIoOEnaXYx8yBSU1PqKn+55o8ODBvP766222Y3B7kSCgy1M4UFB3s61KVwBfIFwvCNCBBd/uq3Ns0bJ9nDMqo9E7Wn5bCnHTbqV88SsAxE6ajZrQE58EAPWYPYcpWvYu0ROvIliaS/nHfyHx+ifxmeQOihBtTddhRO94nvmfyeQXVZMSH1W7SaLfmkzcBT+i/LOXAIif8T/4LDUL98qq/RRX+Khw1Wz4o2k6/13r50ejpuNf9z4Asdf8hv+uKa8NAKAmG9qsKX1IOcVGjLoO73y5m5VbDgOwasth7rpiGOMHJpGdHsPCpfuwWYzEOi3kF7soq/LXKyMU1skvcZ80CDhQ6OLp19fUPs45XM3vX1vNr249g55Jp79DusWocvaoDD74Zm/tsTOHpGKXAKDbCqkWrBN+QNhTiX9/TSAae+7NhFKGSADQiIyMjC558X9UZE/i7gZ0TeeCM+puOT5haCqx9voprBQF4k6Yz2m3mjCcZGdeQ9hLIG9X7eNgWQG6qwRVb3r2ikjht6eRdN0TqEOmYZt0HYlX/VLWAwjRjhRFYXDvBEZkJ9TZJd0Q8uLPP9aP+fN3Y9Bq5vOajAbUE7rAoekWwoV7ah+HS3LISKibitOgKpiasA6q0husDQCO+uy7HEBhxsReZCQ58PpDuL1BbrlkCKu2FDRYjvkk0zbDms7n3x+sd1zX4dsNeRiaMX9H1+HcURlceW4/0hOjmD6xF1ed2092FejmDH4XoeJDtY/9uTsxhE4vba3oOmQkoBsY0SeB+68eyfdbC+mXFcPo/kk0OEKtw+wpfdhxoAxfIIyqwI8uG4rdrDYa5RsC1XgPbCT2rB+g2qKpXPkBlqxBGMJeNGPD6w4ilaYY8TlqArKw0UzQ2Ho7jAohms8QdOHft574S+5HDwepWvYOtmHnE7LaiI0y0TstmhH9kti4uxhVVRiRYUDbl0/SNb/Gd2Aznh0rOWPKMOY5LZRX19ypv+GigcQ7Taf8G7eYVGIdltqRBoDs9BgUBRKdFh6/eSyFlTUjERt2F9MnI4YdOXU3fIx3Wsg4yd38UEijoJGNxnKLXIBCczojh9XIjDN6cOG4LAxqs4oQXYiJANUr3yPsLif2nJvx5+3Au2sV9gHjUdJHy2hAN6ToLUns3gFKS131dlrsTpKSnBQXV5/0NRo6Fa4gFpMBh/VY+k+DQUXTTp5rX1Gg3BOkqNxDnMN6yqwTANZQBaAQNEdj8ZcRxEjQFHN6DTuFprS7u4nENoO0u7XKao7u3n9C45+zNVSB3xiDgo45VIXPGFv7nK7rFFcHKKn0YjKq9Ex2EK3XvMaoB1CDHgLm2JoFxKUeHDYTKbFW1CZmHdpzuJo/vb0ejy9EWkIUD1wzksQjm3y5/SH++OY6Dh7Zcf2KqX3ZfaicHQfKuXhiL2xmA3arkd5p0WQmRjXYWyclOfnvJ9t454vd9Z676eKBnDMyvVtewEViX9LWbbYEytDLDhFKHYoh6EEp3kU4dQhhtWOzQrV2u5vbh3Y3MhLQxVT7Qrz9xS5WbjlMlM3Ej68YxuAesQC1WSdORtch1mYi1tb0i3ivMYZ9h118uHwzFpOB6RN70TOprVKPCiFE6zt60a+j1AkAoGYaUXK0heToY1N+fNS8JqSYwVwzF99hMTY5f//x+qU5+f1dE6n2BkhwWuukU95fUF0bAADM/3oPA3vFce9VI3hl0Vaq3IHa5x64ZhTDe8c12PeOH5TCN+vyOFx2bOpGRpKDkf0ie/8WcXr85niUtJpFwJrJiZIxRn5/ujEJArqYTXtLaueXur1Bnn93A8/cPanBNQCtJb/My1P/WV3bEazbWcxTd02ovZMlhBDdgaIoLdr1vE5ZKuSXesk5XI3ZZCA7PZr0OHu915VV1881nl/sZvO+0joBAMC/PtzaaOa3uCgzj9w4ltxSF8GQTpTNRFqcrdX2NRCR4/g/AQkAujcJAroQVVXIOXxsOExVICPRRrUnQKz96I+y9Zdt5Ra563QEobBGfom7UwUBqqqgKEqTRkOEEOJE5kA5evE+9IwRGCprFtkGYrNPOb3SH9IwqirHp+QvrvaxN6+KfyzYUvv+mCgzj94yjqQT+s34BjbfinNaKCqrvxiz0hXA4w9hMzWcJchsUtmTW8nCb/dhMqrcPH0Q4wYkNXnakhAiskh2oC5E03RG9qtJa6cqcN+FSfxs6GF6xqmYCjdjKtyMorR+2B7XwMV+bCcJABQF8su9/HfxLn732hqWbz2M2y+Zi4QQTacoCnrRHso++jOB79+mdN4zuFYvwqjVv0t/VCCk8e2mAn758ir+750N5B25aHcHwny1Lp/3l+ypE0BUugN8t/UwygkX5L1To+mTUXeKUX6Jm9EDkuuds09GDDEnGfXdV1DN/K/3EtZ0fIEwL32whcPl3qZ8BEKICCQjAV1M/8xo7v/BSJZuzKO/00fwy/n4inbgz9uJffBkLEn9CKm2Vj1nrxQHs6b2ZcG3e1FVhesuGEB6fP1h7Y5QWOHjf//1Pf5gGIDdhyqYMiqDGy8YQEs2yxRCRA5d19GzRhE1/DzcG75AtTqIPucWvErjiyF3HKrg3x9tB6C00sfTr63hqbsm1i4uLquqH0DsOliBYbJCKHQsOoiyGPifK0ewaU8pyzblkxpvZ+roTBJjrew8mMbKzTUpQ+Ojrdw2c8hJdwFu6Jzl1f5O018LIToXCQK6GKOqMiI7nlH9EjCgESi7ANf6z1GtDqImXImvlQMAALNR5ZIJPThreBqKohBjN3aaeYI7D1bUBgBHfbM+j4sn9CS5gWF2IYRoiKHiIJ7ty1CtUWg+F/69qzH0m0JYqX/nXVUVdh6sqHPM7QtRWuXDaTeTX+yid3o0+/PrbuQ4fnAKoVD9KYsxNhNnD09l6sh04FiSh1unD2TGxF74A2FS4+0NrgU4XmZy3TSiZqNKWsLpbxQmhIgMEgR0UbqmoxRtw7XhC0yJmQRLcvGs+xjT6MvbJpWXTu0wdGcJAAC8jUz9aeiLVgghGqXrWDIHEX3OLfh2r0IPh1AaSYyvaTp9M+pmWLOYDSSavCSYPEwclkaFy095tZ+KI/sKjOyfxIi+iSc7fb01TQZFIT2u6Td2shId/OLGsSz4di8Om4mZZ2WT4DR3qj77lBSo9oYwGBSizMZWW6gthKhPgoAuStdBiYrDMepCrGNmouVtBSCsNr6tfHMEwzrBsEaUxdApv0gGHEmPerzs9GiSYlt/REQI0X0F4voQdd6deFUbhoHnAhBWGu9PB/aM5cpz+rBw6X4SYyzcfkEWykf/i3/wZMaNvYpyT5hhfRKpdPkxG1VS4u0EQxpBTcfUSnMVPYEwhuPKUhUYkBHNL64bDYqOrrXspo2igKqF0FQjqh5CUWrWQvg1A1aTodXTUFR5Q3y86gBfrD6E1Wzg6vP6ccbgFMyN7GofKM1DJQqNmgxI1mAZflMcuuxrLESTdEgQ8OKLL/LJJ58AMGXKFB566KGOqEbrU6DCFUCnJl3b6TLrXvSQFT3gxR8MYrcY0U02dL2mQ9OAclcAk0Ehxm7Cb0/HNG42pT4IxwwjyezCEPJgNULQ48ZliCHW4MFrjEdRwBT24FdtlLuCGAwKBlXFFwgT5zBhUBQUNMxlu9EdSWgGC5TsY68vga15AUb3spOUnFgnv3VnkJXk4KfXjOI/H2+ntMrHuEEp/OC8fq32JSuE6CQUqHAH0TSdOIf5pJd5iqJjDHsJqnaMmhdNNddeKJ7sBKUBE/6AnzinhUp3AE33owDJdo3ygJFgWCfFoaOHdaJMJmac2ZOZY+Jxbf2W0OJXMCRmYBk5A7+m4LQacaLj8YXYlVvBlv1lhMM663cVMWtKHwZlxaIe108Zwj50gwkNA2bdS0Bp/EZGSXWAT1YdYOmGPKJsJmZN6Utqgh2zSSUjIQpjE3f3VRQwaTXnMug16UiPBj6KAqbSnYQK92POGIhWeRh/4QFM6QMorAjzVX4054/vcVrTLlU9hKKFCBusmDQvIYO19vsNBT5edYDPv6vJzOTxhfj3R9uJdlgY0Tv+WBlomF25KEYThz/8M/Hn3oAWnQGeckrm/4G4C+4glDZCAgEhmqDdg4AVK1awbNky5s+fj6Io3H777SxevJhp06a1d1VaVTCss2R9Hu8v2Y2u68w8K5uLx/fA3MSLZouvmOov/4l6xqUYq8sJ7F1POGMAhtgUSB+O2x/mva/38c26XMxGlR9eMpjxA1PYfNDNX+Zu5Ipx8Zx5+F3iz5hO+foviBpwJk6bneq963GOmU44FMa1ehGHsi/jqbkHMKgqF0/sxeY9JaQnRnHNef1INFRRsuh5TAmZGGJT8W5bysDLH8QXYyJhw7uYxs2CpL5t/EmeHlWBob3i+N2dZxIIakRZjZLySohuRtN0lm09zH8/3UlY05g2vgeXTe6NzVT/wl7XNYwFm/Ht/g7HxKvxrl2EMSUbQ+ZQDL5K/I7Mmqk3mk5RpY9gWCM13s6OnDJe+mALg3rFE+s089XaXAyqwvUX9MOgBXn9yxyCIY3bz09nAhswJmRhzhhI1eKXcPYbQ4XRRLD4EHrJfpSUIei6wvbcKp59Y23t3Xir2cBlZ/fh/95az8+vH8OgrJopRcawh8D6RZgSs7BmDKDq85dxTrkRvyOzXvtc/hB/fGMtxRU1WX8qXQH+/dE2LjqzJ99vK2T6hF6cOzq9SUGAuXwf1d/NJ+a82/Dt+R7CQQyDpxFWzFj9JRR/8H/ooQDR42YQLMvHu3c9ht2rSTvnh+z9rpTvtxfxm9vOaNI+NaoeQsn5nmDJIaJGXYxr+VtYB0wkmDIUUKj2hvhi9aF671uy5hCj+yYQDusoChiLtlE8/1kcI87DOWwKRfOfw953DCgKut9DxZf/IuGa3+Izxp36AxAiwrV7EJCUlMTDDz+M+cgOjH369CE/P7+9q9HqckvcvPPFrtrHC77dx+Ce8fTLaOLukoqCFvBQsugF9HAIa9Yg9LJDVFkScSoqu3LL+WZdLlAzHPvygi30SHXywnsb8AfCfLqxkiFnXUR0VQlhVxkVK+aiB3zY+4+neu0nePeuw5wxiF2HqtD1mlz/i5bu44qpfZn/9R6G901k/IAkEi5/kJJ3noD8XURPuZ6qr16ll89NGAhqYFU615qAoywGFUsjQ8ZCiK6toNzLq0cy8QB8/t1BhmUnMKRnQxd6CorBgGfnKnz71qP5vcSfewPBzYspX/cpSTc8RZUxgQXL9vPxigMADOmdQGqiHa8/RK/0aOZ9tQeAUFinyqsx/+v9taVv3F/FuN4GXIv/gWqxg2oASzQFEx4k9dBiAq4K1GQNf1jhtY+31ekvfYEwBwurSU2wM/frPTxywxgMCuiKimo0UvH5y0fKNNbcim9AzuHq2gDgeF+tzeW88T14+4tdjBmQdNJUorVUlUDBbkreeBTN7yH67Os4uteMz5xA3MU/pmzRC1St/qjmkzWaiRk/k0/2m9hfWFOH/QVVjOqTcMpT6YqKajDgXvcJ3q3foAX92AadXXs+g6pgMRnqrfNy2s1HXqPXTIONSceUko1r45c1LzAYsaT2pvK7D1FtTuKv+IUEAEI0UbsHAf369av9/4EDB/jkk0946623mvz+hARHW1SrxbYeqqh3zOULkZTkBCBYWYwpJolgZTFGZzyKWvcOlq47CJ9xBeUfvgBAVL+xlO9cy2qjk0tHWalw+U94Pbi8QfyBmsw4gZCGplpxbVmKY8hZVK5aAIBj9AUUvfMk6BrGkdNZ+GZRnXKOLkQrrvASZw9T/v0KQAFVxbPre+z9x1O1+iP0QdPICcQyKdHZ4s+qMUc/q0gSiW0GaXdH6az956nsPm6TxKOqvaFGP09Hr8G4MwfiO7gVU0I6YVc51d9/SNLMe4lK68H+fWW1AQDA1v2l9D8SUJyYVODExbqr97q49fzJsOEzNL8H+5mz+Tw/hv9+k8P5w8ZRuBp+NtCCIaRRWFb/Yr2wzENijI39+ZWYrUbio22Ak8DI86he8zGa30PsWT/AkdmHaGP9C3nv7pIG2+wPhjEc2fHYajU16XdNi+mLf8jZVK//HNViJ2bIREzxx6behGzDqU7pSbCw5rOyZY/Cu38DYx1lrMnIYFeeG4+/8Z/DiUKWUVTHpREqL8DaeziOXoMw2I+997oLBvDKoq21jxUFLp7Yi/j44zMcOVEnXUHRvP8DwJo1iEDhATSfi9izrsKRkU10I5updRcd3Y90lEhtd1vqsIXBu3fv5s477+Shhx6iV69eTX5faakLTet8t6KTY2yYjCrBI18gBlUhNcFGcXE11nAl5XOfImbK9VQue5v46f+D15Ze5/1WTx7lH/8VS8+haNVllC9/n5jxMzkrsAtPdV96ptb95XfYTKTE2emR4uRgYTWzz0ggZutrxJ51FSWL/oKt9wgCpbmUfvRX4s+/BdeWb/F++jw/vegn/H5hzciL9bjt5Af0iMNdUUWgNJ/4S+5FsUdTufgVVHs0ev8pKNsXM6jPEEpKEttkJCApyUlxcf0v+u4sEtsM0u7WKqs5Omv/eSpJ0VZsFmPtXWJFgYykqAY/z8QEOxWrP8V3cBuOMRfhWr+YkKsca9Zg/KX5hIrLqaz213vfURazAVWBox+TAkRZjbh9Nee+7qwUtK//hmqxYes7FvequYw65z7eNap8sbkSgPwiF8mxVsYOTGbNjro3Xgb0iOOb9bmMG5xK2B+iuLgai+aicsEcVEsUtr6jqVj6LsaUPvjjB9RvX0zDc/CT42xUuv1cdnYfLAZO+bumKArGvDVUr/+cqJHT8GxbStHHL+G44G4Cig1T2I1v1Ts1AYCigq7j2fUdjnEzCccN5NDKmmAkKcbWpN9rI0ECq94mVFmEY8zFuNZ+QsXaz2DQhbWf9bgBSThsI/lyzSGio8ycPy6L9DhrnfIt5bspWfA8isGEGhWD78BmHEPPxpI5kIql74EtFr3XmU1YA9I1Sf/ZeuWJDgoC1q5dy3333cejjz7KjBkzOqIKrS45xsLjPxzP4u8PEgxrXHBGD9LibKCD3xhL9OSrKV3wHI6R5xO01d8JMmSNJ/7iOzHEpRPyenBXlhOyR2GNTiSkKfRMiuKRm8by2Xc5xEdbOW9sFtFWI/deNYKlG/NZd6iSsefeg+o0E3/h7YRjs7CiYaguRE8dgDNjGHrJPtyWNMYNCmO3mRjeJ5FVWwp46PoxZKc4CKgKjgvuJmiIAkUhdvrd5Be72OwcwDkXD8WQ1Itw17t+EEJ0cbF2E4/dMo4v1hzC5QlywRk9yEpoeOGsohowZY8lISEDY1w6Jmc8poQsAhWFVK9aQFy/CWQkxhIfbaGsqiYYMBpUhvdJIBAIcbjExU+vG8O3G3IxGQxkpTr45TUD+XJLBRWuAP16JhE74BqwxRC2J0LqAD7apxA4cgMoKdZKrMOMCvzg/H4cLvWQW+wCYFifRAIhDUVRuOiMHrV19qsOos++FsXqJGxLxNJjCFp0WoPty0iwc/64rDrz52s2cRyI3WqgR7KjSesBdF1HScwmfsa96Bn/396dx0lVn/ke/5xau3pf6I1eAWn2HRVwAST2NTar0YhmMIbMJLnXV5wk9w4xhmtmMiZRbl5xifEmNyZmcV7jK8YtMmKcIRCNC8giKCog0DQNTdP0vtZ2fvePhoJm7dbqharv+y+quuqc8zTVT53n/H7n+U3BN/YqsG0Cju7vLSvURfB4NZbbS9ZN3ybQ2kjLy4/RVlfD68dH0ekPMWdSPiW5vRtdCuHGN2kBCaOvIJw9hqyCsVgZw/GfdqweV/c6ONNHZwHWWaMwAJY3GVdSBmlzlhFuqaN97xa8hWMI292j4q2bXiSjYAJ+d3qvjksknllmgJvw1tTUsGzZMh566CFmz57d5/cP9StZzhPz0k9PXt5QM00vPIC3YAydezaTdctquhKHn/Xe+jY/2/bUkZeZyIQRmTg5e/690+nAGNPjd2BOdM041tBBXmZid+eMyHxSQ/jEmbt1Yqj45Dag+8vjQj31T8Zj23a/3gsQj1c34jFmUNzR2tYnMdTz58U4HBaWde6Tw5NO/p6tE/cvJYQa8LsycJgw7mAzXZ7u+ev1bQHe23ecrkCYyaOGUZDlw7KsSE7szn0msj/L6t539+/PRPJhVyjMX7Ye5tXNVZTkpXD79WPITT91tT4Qsqmqa8cfDNPY2oXTYVFWlEFWcs8pK9Zp91udzNXn4w/ZVNa2sqeqieREN5MvyyYr2d2rk/8zXWhf3kADlr8Vf2oJFgZHw34q23xsPRSgrCid0ryUc96cfeH9nWhx/SnuL0sIt2DjJLDtRRInzqPLl4cr2ELXOy/gm34j/oSzL7TFCuXP6G1PBqEIuP/++3n22WcpLj51FWT58uXcdtttvXr/pfglZlk2Ce01BJNycbXXEkrIJHTGyr7tgTD3/b+3aTwx9/8fFk9g9vjcXmwc/vbeUX699gMAEhNcfPfOy8m/xPrkx2Nii8eYQXFHa1ufxKWYP/vqXL9np90JlpOw5cFjtxN0Jp12wv3pmx1YFnQFbdxOBxfqTnyhfVkWBG2D02Fh9fJ4Tm5voP+movE7i4asNDf1zcHIY5cVImRie/kj5c/obU8GYTrQ6tWrWb169UDvdlAZ46AzsQAMhBILzvp5IGRz+Hg7c2cUcriujXc+qOXI8faLXg2C7hGA3637KPK4oyvEX7ZUs6K8LOa/7EVEzsXpdERGC5x2J6HtL+FISMZXdiWNLz9G2twv4E/vbnccjZNZY+jVGirn21cwbLNt73FefH0/2ek+Pr9gNAWZiZ94e/1tKBQAAA5PAnCqCIj1AkAk2vQXM8g6A2F+9R8fsG13HQDjSjOZP72QOZPze7VcejhsEzpjaLy5PXBiOpDBckAoDC7HxQsKEZFLWXNHkG1vV+IPhMnPSqQwKwmnw4UzJYvmjb+nddMLWG4vuIfWSOmewy384oX3ATha38G+w8388KtzSPXpK1pE+o8aqw+yg7VtkQIA4MPKBqaNyen1dJ6MFC/XzTy1oIxlQfkVxYTDNo3tAZ7+y8fc98QmXt5cRdsZ/ZdFRGJFyDZ8cLCRjVur+eWL7/PYH3fy7v56QpYbz4ipWO4ETChA4rg5hJOGzpxxy4KPq5t6PNfRFeJ489ntRUVEokmXGQaZPxg+67lQ+OznztTSGaLqWCsel5NFV49g8qhsmtv9lOSmUJSdhG0M//bq7kiB8cz6vWDgxiuLNSIgIjGnpTPIurcqqT7WRrKvu53o48/u5Gd3zyCw/v9iudwkTy+nddOfyCoch5U/NarTWtzBZhwtRwhlleFqPwrGxp9cdNH3GQPFuT3nJ7ucDjJSvNE7OBGRc1ARMMhK8lJITfLQ0h4AICXRTUnehVcZbmgL8MPfvkPDiV7XM8fm8PcLx+M5bU5qmz/MgcPNFA1L4NDxLsYUJvP2rhquv7wQ13lWohQRuVQ1tfmpPtZGWrKbVfM8BC0Pf620cB/aRur8FYQtF6GkXIYNL8POKI5qAWBZFtTu5vjLj5N61S00bF2HO7uI5Bv+kYB17r7+pxtbnM6yuaN46W/7SUvy8pWlE8lIiu0Fr0Rk8KkIGGTpiW7+952Xs/tQEwaYMjqbZPeFZ2ntrW6KFAAAWz46xo1zSinNOdWv2ed18N3PeLA7mtncXsBV7g8wwyfhdlhD5qYuEZFoCNk2gZDN4mtGMj7PRcpH/w61e1laOJnOv24hcen/JJg7CWMgnD0x6vs3xmAKppAwYS4tbzyDIyEJz9yVBB0JvWrb6XM7WTSnhOtmFOJyWnidmqkrIv1PRcAAuVBLtawUL3NOtAM9sw3W6R2CTv7bPseGzuwE5DUh0v01tG95nrnDSrGPV5I2vAiDoXsNzMExVFrLiUhsMMB/bT3MH9bvBeBVj5MH7/g7rJf+BSq3kDhzIeHcCRfNO582N9ktxwjs2woOF7a/k/rdO3CVXU1a0sVHAk4GkuSJvVVue9PlTkQGh4qAftbUEeS9/fU0tvqZMCKTEXnJOHpxEt7eFWLXwUYO17UxtSyb9s4gew41UZqXyojhaST53LR3drdGG1eayfBhST3eH3Z4SJx8Pf4P/0boeCW+sstxlswgaAanAGjpDPH+gXrqmjqZNHIYpblJODQtSUQ+pca2AH/8y97I44lFPhwfv4Yd7MKdXYhduw9XUxXhtFK8/uMQ6CCUXoLn+EeQkErAm0VlbQs7DraTluRl0qgs0hPdfToGy4JgezMmOQt71p0MsxppPHIEn78WLy78SWe3ho51YWM4cLSN9/fXk5fpY8KILFISdMohMpToLzLKDBAM2SR4nHQEwjz6zLtU1nRf2X/xtX38w9JJTCzJIPkCydAAz762j43bDuPzunA6Hbzw132Rn980bxTf//srOVDTgsftZEReKgln9Kh2mgBd764j1FKHp6CMzj3vkDByOlbJLMwAFwJB2/DLP+1i14F6AF58bT/3fvFyLsvXYh0i0pPDYbBti+4ux2EM5786HrINbf4Qpw+EdgQMeJNh8b+wsQp2flzP1YfczEgMYG9bS8fut0m/ZjnHNj5F6rRyavOu4p9/vzsyClCcm8w9K2aelVMvxBiodpWwv2A5M1qO0vDaL3BdcQtdr/6UkC+V1MX/hN8aWm1J+9vHh1t48KmtkcfTx2bz35dMxKmLPyJDhoqAKKpr8fPMho/5uLqJz8wsYvLoYZECALq/KPZVN+HAcMWY87eoa2oP8tfthwGYdNkw3n6/psfP//T6fuZPL2T6ZcPOu42w5cFTPIGMrEIcxVMJfbgBZ1YxoUEYCTje3BUpAE56670jlBWM1YJmIhLh9dcT2PMGngkLoOkIdnsDdtFM7HMUAu3+EL97ZTcNrV3MnpTPW+9158mPDnfgXzyPZ/96kLfeP9r9XFUTQdtB+RXL8B/6iMb1v8GTfxkGw+bd9T2mAVXVtnG0oaPHPVa9kZ+ZxNu73PyxxsPnRl1FaPMzWO4E0pauoivOCgCn08GG7dU9ntv2UR318/3kpPVyepSI9DsVAVESCNs8+swODte1AfDHDR/j9bhI8rlo7zzVnz/B42JfdROzxuWe9wTY43KQ6HXR3hWiuc1PZmoCR+s7Ij9PT/bivtDa9CePKWssVpZNCAeO8eX4zcDfbBYMGyzL4pqpw3nrvaORhc2yMxI1T1REeupooOXtF/AcfJ9g7QE8xeNJGj4B23n2CfnHh1t458NaADJTElg27zIyUryMK83E6bQiBcBJ//XOIeaPKCDUchyAYH01SWOuJDN4dl70efr+1ehxObh57igcbTV0vLwbGzDBLkLH9uMozjpnIROrjDHkZfacoupxOfC64+d3IHIpUAuCKGluD0YKgJPe+bCWlQvHc3L0s/DElaVJo7IveAU8OcHJV5ZOwuW02H2wkcvH55J0YvqQx+XgK0sn9WgHeiHmxH+xPQgFwOGGDn74+y3c+/M32XOwkeXXl5Ge7CU/K4krxuXqBmER6SGYNZq0a24lcGQPxhjS5t9J8BwFAEDbiXuioDvXPr/xY4alJTC2NBOfy0FeVmKP15cVpcHxSnwjp5J354O4M4eDZTFt/HBKc7tfa1lw+/VjyE77ZD36XU6LhGALDoeTnC+uIXnGjfgPfYgj7L/4m2OIbRuunpxPdnr3VX+Hw+LLiyeQntS3ey1EpH9Z5hK7HFtf3zYkp5D4wzb3/XITdU2nVnm87foxXD9zOFV1HVTVttLQ3MXw7GQmj8zEc54WcJHuQBbUtwZobQ+Qk+EjbBuON3eRkewh/RLoH93SGeTeX7xFR9epURCnw+K7X7qc3HQfvjOuCJ3ZFSkexGPMoLijta1PYqjmz5O8zfup/+OPcKZlE2o8im/MLHxXfYGgM+ms1x5u6OT//NtWFlzevUK61+NkzsQ8LivOpK6ulUP1HTz89HYaW/2U5qfyP26aTG4yOMJdBF2p+AJ1hNypBC0v4UAn1c02Po+T7DQv1qfooGZZBm+olS5nKi67E8sOE3T1bWpRXw3Vv6nOQJjapk6SfW6GpXiIdme6oRp3f4rHmCH6cX/SHBprNB0oSrxOB99cPo1fr/2Ayppm5s8oYtaEXDAWxcOSKMlJ7r7y3duay0BWsoes5FMn/CkJ/ftFEk1Hjnf0KAAAwrahvrmL0uxLJw4RGUDeZJImziVh5hJMfRXhlmPYjnNflS/I8vH1W6aw5qmtBEPd0wxDIZuS4ekAFGUlcv9XZtHWGSI92YPbYREGwq7unNrpyY5sy+nxUZJ95h4+GWMsupzdCz6GHL64Hm/3eZx9vrdCRAaOioAoyktP4NtfmI4/GMbncUauefhDNofq2uj0hyjOSSGtj+3nLkXu80xX8ro0J1REzs2fkIN71m34jRMrdzxWzhjs83xNOSyLN9+riRQAcyblY4DXtlczPCuJo/VtJHhdFGUn9+oeKhGReKMiIMqcFiSetuBLyDY8vX5vpNtPSqKb1V+6guyUTzbn9FJRmJ3E6OJ09lY1RZ7LSfdRkqchOBE5v5Dpzp/GWJiLfEWdnNo0a2Ie9S1dvHmiQ1BWWgKzJ+Wz9m8HmD+9kNuuH41LrSlFRHqI44HKgVHb2BkpAABaO4K8v7/+Au+IDV6Xg7uWTeKOz45l4sgsbv3MaFb93QwtFiMiUWHbhutmFOFyWuRlJbH7YGPkZ/XNXbhO3He1YVs1tY1dg3WYMkQ57S4cdI8ieUx83bgtcpLOyPqZfY57AMLhoXtjXjSl+tzMnzqcz8woJBw2agkqIlFVnJ3Ev35lNofr2i/4OuUeOZ3L7iT07lrcOaV4ckfSsv7XpFz7BfyJwwf70EQGlIqAfpaX4WPm2By2fHQMgASPk0kjswb5qAaOMd0364mIRJsxhty0BFITPYwuTGNvdTMAqUmeSA+Gy8flkpveu8W6LMvgCPsJOxJw2n5shwcT5Y42MgQYgx0O0fDy4zh8SZHnROKNioB+5nY6uPPGscybXkinP0RJXgrZqV7lGxGRKPG5HXz95ikcONqCwSI73UfN8Tb+1+3TKc1Lwe3szYm8wV37AV0Hd5I4czFdO9bhzh9NKG+yCoEYE3ImkjS1nPZ3/xO7s420a28nmJQ/2IclMuBUBAyARI+L8cXpkccqAEREois5wcWk0sxIP/H8EwtV9ZaFwQS7aN/+Kl17NhFubyLjhq8BNsTRar/xwBtupfE/HsWRkEhCyWSaX/t3huWOwJ8xerAPTWRAqQgQEZF+4TAhnLafoDMJd7idsMOLbQ29rx3LAne4EwomkjxlAaHGIziS0nEWTyHUDwVAbXMX1cfaSE30UJybjLeXK8BLdARcyaTOuQUrKQPbl4m3dBJ2cu5gH5bIgBt62VhERIYEb8cRcHq6hy/tYJ9unLSwcVS9Q2flTlKuvpXWvz1NQukUTMmVmDMa09U2d/He/nqy03yMK0k/74rq/cGywF2/h9Y3/kD61TeDHcSVkQ8NR/C/uw7ntCWETfQKgUPHO/jXJzcROtEgomJOKTddO+JTrVIsfWOMRTB3YmRU3iqZo5vHJS6pCBARkbM4LAgd3Uvb1nUAJM/4LI5Rw7F7vei5A0diGp17N9O1byvGDpM4/lpCZxQArV0hfvjbd2jtCALwj5+fypSRmVGN5YLHacDyJBJqrKHu+Z9gQgEyPvMlvCOnYyWm449iAeBwWGzcXh0pAABefquSudMKGZbiucA7JdpOP+dXASDxSmOQIiJyFtuAp3AC4dYGwq0NeAon9LoAiMgqwT2sGBMK4B5WDFklZ72k0x+KFAAAxxo7P+WR952dlIOv7EpMKIDDm4iraBLBnIn4kwqivi/HGYuWWXSPRoiIDDQVASIichaHBf4D20iZ/TlSZn8O/4HtOPpwsuq0bAI71hE8XkXqVTcTPF5FYMc6nFbPlsEZyR7+25XdxUGSz82kUQPbQtmygCM7ad/5F1JmLQHLouU/f4nb7oj6vmzbMG96Ae7T7gFYfM1IsjQKICKDQNOBRETkLLYB1+hrMCdv5DUh+rLOYdg48I6bi7d0MsGsMobll0FiBn7T89qT2+ngpmtHMG9aAYkJrgFfVdwYsHJGM2zZPxHKGUvmyMvBhPFbvVtboK8KsxL5wVdnc+hYG6lJHoqyk0CzUURkEKgIEBGRcwo5TjsRttx9fr8/IRsSssEGf2bZeV/ndjrI7WNLz2gKuNMgOw0MhJML+3VfxsCwFC/DUrz9uh8RkYvRdCARERERkTijIkBEREREJM6oCBARERERiTMqAkRERERE4oyKABERERGROKMiQEREREQkzqgIEBERERGJMyoCRERERETizCW3WJijL+vWX6LiIcZzice44zFmUNzxuv+BEi9xni4eY4b4jDseY4b4jbs/WcYYLVguIiIiIhJHNB1IRERERCTOqAgQEREREYkzKgJEREREROKMigARERERkTijIkBEREREJM6oCBARERERiTMqAkRERERE4oyKABERERGROKMiQEREREQkzqgIGGRtbW0sXLiQ6upqAN58800WLVpEeXk5Dz300CAfXf947LHHqKiooKKigjVr1gDxEfcjjzzCjTfeSEVFBU8++SQQH3EDPPjgg9xzzz1AfMS8YsUKKioqWLJkCUuWLGHHjh1xEfdgiLccqvyp/BnrMSt/DiAjg+bdd981CxcuNBMmTDCHDh0ynZ2dZu7cuaaqqsoEg0GzcuVKs3HjxsE+zKh64403zK233mr8fr8JBALmjjvuMC+99FLMx71p0yazfPlyEwwGTWdnp5k/f7758MMPYz5uY4x58803zZVXXmm+/e1vx8Vn3LZtc/XVV5tgMBh5Lh7iHgzxlkOVP5U/Yz1m5c+BpZGAQfSHP/yB733ve+Tk5ACwc+dOSkpKKCoqwuVysWjRIl555ZVBPsroys7O5p577sHj8eB2uxk1ahSVlZUxH/cVV1zB7373O1wuF/X19YTDYVpaWmI+7qamJh566CG+9rWvAfHxGd+/fz8AK1euZPHixTz11FNxEfdgiLccqvyp/BnrMSt/DiwVAYPoBz/4ATNnzow8PnbsGNnZ2ZHHOTk51NbWDsah9ZvRo0czdepUACorK1m3bh2WZcV83ABut5tHH32UiooKZs+eHRf/3/fddx/f/OY3SU1NBeLjM97S0sLs2bP52c9+xm9+8xuefvppjhw5EvNxD4Z4y6HKn8qfsR6z8ufAUhEwhNi2jWVZkcfGmB6PY8nevXtZuXIlq1atoqioKG7ivvvuu3nrrbeoqamhsrIypuN+5plnyM/PZ/bs2ZHn4uEzPm3aNNasWUNKSgqZmZncfPPNPProozEf91AQD58vUP5U/uwWazGD8udAcw32AcgpeXl51NXVRR7X1dVFhrljydatW7n77ru59957qaioYPPmzTEf9759+wgEAowbNw6fz0d5eTmvvPIKTqcz8ppYi/vll1+mrq6OJUuW0NzcTEdHB4cPH47pmAG2bNlCMBiMfHkbYygoKIj5z/hQEA85VPlT+fOkWIsZlD8HmkYChpApU6Zw4MABDh48SDgcZu3atVx77bWDfVhRVVNTw1133cWPf/xjKioqgPiIu7q6mtWrVxMIBAgEAqxfv57ly5fHdNxPPvkka9eu5cUXX+Tuu+/muuuu44knnojpmAFaW1tZs2YNfr+ftrY2nn/+eb71rW/FfNxDQaznEuVP5c9YjhmUPweaRgKGEK/XywMPPMDXv/51/H4/c+fO5YYbbhjsw4qqX/3qV/j9fh544IHIc8uXL4/5uOfOncvOnTtZunQpTqeT8vJyKioqyMzMjOm4zxQPn/H58+ezY8cOli5dim3b3H777UybNi3m4x4KYv3zpfyp/Bnr/9fKnwPLMsaYwT4IEREREREZOJoOJCIiIiISZ1QEiIiIiIjEGRUBIiIiIiJxRkWAiIiIiEicUREgIiIiIhJnVASIiIiIiMQZFQES01auXElDQ8Onfs2mTZtYuHDhRfc3ZsyYc25r/fr13H///QCsWLGCV155herqaqZNm3bRbYqIDAblT5HYpsXCJKa98cYbUXnNp7VgwQIWLFjQ7/sREYkW5U+R2KaRAIlZ3/nOdwD44he/yObNm1mxYgWLFi1i8eLFvPDCC2e9pqamhg0bNrB8+XJuuukm5s2bx8MPP9zn/T788MMsW7aMJUuWsGHDBgCee+45vvrVr0YlLhGR/qb8KRL7NBIgMetHP/oRzz33HL/97W/5/Oc/z6pVqygvL6e2tpZbbrmFkpKSHq/JyMhg1apVPPDAA5SWllJbW8v8+fO54447+rTfwsJCvv/977Nnzx5WrFjBunXr+ilCEZH+ofwpEvtUBEjM27dvH36/n/LycgByc3MpLy/n9ddf7zGn1LIsfv7zn7Nx40bWrl3Lvn37MMbQ2dnZp/3ddtttAJSVlTFq1Ci2b98evWBERAaQ8qdI7NJ0IIl5lmVhWVaP54wxhEKhHs91dHSwbNkydu3axfjx41m1ahUulwtjTJ/253Cc+rOybRuXS7W2iFyalD9FYpeKAIlpTqeTgoICXC4Xr776KgC1tbX8+c9/Zs6cOZHXhEIhDh48SFtbG9/4xje47rrr2LRpE4FAANu2+7TP559/HoBdu3ZRVVXFlClTohuUiMgAUP4UiW0qsSWm3XDDDdx55508/vjj3H///fz0pz8lHA5z1113MWvWrMhrVqxYwSOPPMK8efP47Gc/i8fjoaysjMsuu4yDBw/i8Xh6vc9Dhw6xdOlSLMviJz/5Cenp6f0UnYhI/1H+FIltlunrWJ2IiIiIiFzSNBIg0gdPPPEEL7300jl/9uUvf5nFixcP8BGJiFwalD9FhhaNBIiIiIiIxBndGCwiIiIiEmdUBIiIiIiIxBkVASIiIiIicUZFgIiIiIhInFERICIiIiISZ/4/+yMhMUUXWKIAAAAASUVORK5CYII=\",\n      \"text/plain\": [\n       \"<Figure size 777.475x360 with 2 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"sns.relplot(\\n\",\n    \"    data=modin_tips,\\n\",\n    \"    x=\\\"total_bill\\\", y=\\\"tip\\\", col=\\\"time\\\", col_order=[\\\"Lunch\\\", \\\"Dinner\\\"],\\n\",\n    \"    hue=\\\"smoker\\\", style=\\\"smoker\\\", size=\\\"size\\\",\\n\",\n    \")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 30,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<seaborn.axisgrid.FacetGrid at 0x7fc3bbd7fac0>\"\n      ]\n     },\n     \"execution_count\": 30,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAwEAAAFcCAYAAACQkLIVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACckUlEQVR4nOzdeXxU1d348c+9c2fNTPaVfd8RUFEBFVErCiIC7ntbH7X601bbWvXRLj5q9amP1K2ttrZWq+IGgigoihuLCMoi+x4IBJJM1klmudvvj2ggJkDInsz3/XrxejF35p57zmTmzP2eVbFt20YIIYQQQggRN9S2zoAQQgghhBCidUkQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkCRIfxk5/8hOLiYgD+67/+i+3bt7dpfu655x5eeOGFFr/Otddey8KFC1v8OkKIzqk91p1nnHEGU6dOZerUqUyaNInf/va3FBYWAnDw4EGuuOKKNs2jEPFAa+sMCNFQS5curfn/3//+9zbMiRBCdBztse684YYb+OlPfwqAbds899xz3HjjjcyePZusrCxmzZrVxjkUovOTIEB0CPfeey8A119/Pc8//zxXX301Tz75JFVVVTzxxBPk5OSwa9cuvF4vN910Ey+//DK7du3ivPPO47777gNg8eLF/PWvf0XXdTweD7/5zW8YNWpUrets376dX/7yl3Wuf9111zFjxowG5TUvL48pU6awevXqOo9nz57NokWLUFWV3NxcPB4Pjz32GH379qWwsJDf/e537Ny5E1VVueKKK7juuusA+Pjjj3nhhRcoKipizJgxPPTQQ6iqdOQJIY6uI9SdiqJwyy23MGfOHJYuXUqfPn1q6synn36affv2UVhYyL59+8jKyuJPf/oTmZmZnH322UybNo3ly5eTn5/P1KlT+cUvfnHUPD/99NOsWbOGgoICBg4cyOOPP94M77IQHZQtRAcxYMAAOxgM2rZt2xMmTLDXrVtnf/nll/bgwYPtDRs22LZt2z/96U/tyy+/3I5Go3YwGLSHDh1qHzhwwN61a5d94YUX2sXFxbZt2/bWrVvtcePG2ZWVlY3Oz29+8xv7H//4R53je/futUeOHFnv47fffts+6aST7Pz8fNu2bfvBBx+07777btu2bfu2226zH3vsMdu2bbu8vNyePHmyvXv3bvuaa66xf/azn9mGYdhVVVX2uHHj7JUrVzY630KI+NJR6s7bb7/d/vvf/16rznzqqafsc845x66oqLBt27Zvvvlm+8knn6wpy6OPPmrbtm0fOHDAHj58uL1nz56j5vmpp56yJ06caOu63uj8C9FZSE+A6PC6devGkCFDAOjRoweBQACXy0VqaioJCQmUlZWxcuVKCgoKuOGGG2rOUxSFPXv2MGjQoJpjzdETcCxDhw4lOzsbgCFDhrBo0SIAli1bxq9//WsAAoEA8+fPrzln0qRJOBwOvF4vvXr1IhgMNktehBDxq73VnYqi4PV66xw/5ZRT8Pv9QHWdWVZWVvPcOeecA0BWVhZpaWmUlZWxdu3aI+YZYOTIkWia3P4IId8C0eG5XK5aj+ur3C3LYsyYMfz5z3+uOZafn09mZmat1/Xr14+5c+c2KT+KomDbds1jXddrPe/xeOp9raZpKIpS89zevXtJSUmpee5I6QshRGO0p7rTtm02bNjANddcU+e5I9WZAG63u85zR8vzokWL8Pl8jc6nEJ2JDCoWHYbD4cAwjEadO2bMGJYuXcqOHTsA+Oyzz7jooouIRCLNmUUAEhMT0XW9ZgWO9957r8F5fPvttwGoqKjg+uuvZ/fu3c2ePyFEfGnvdadpmjz77LOkpKQwevToJqfXmvW9EB2Z9ASIDuP888/n2muv5emnnz7uc/v168eDDz7IXXfdhW3baJrGX//6VxISEpqUp5kzZ/LMM8/UPJ4wYQJPPPEEv/71r/mv//ovUlNTOf/88xuU1m9/+1t+//vfM2XKFGzb5uabb2bYsGFNyp8QQrTHuvPFF19k3rx5KIqCaZoMHz6c559/vklptnSehehsFFvGFQghhBBCCBFXZDiQEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnOtwSocFgCMvqvAsapaT4KCmpautstLp4LHc8lhmk3M0hIyPQqPM6e/0J8fn5iscyQ3yWOx7LDM1f7sbWoZ2N9AS0M5rmaOsstIl4LHc8lhmk3KJlxeP7HI9lhvgsdzyWGeK33C1NggAhhBBCCCHijAQBQgghhBBCxBkJAoQQQgghhIgzEgQIIYQQQggRZyQIEEIIIYQQIs5IECCEEEIIIUSckSBACCGEEEKIONOiQUAoFOLCCy8kLy8PgGXLljFlyhTOO+88Zs6c2ZKXFkIIIYQQnYxl2xSURSiqiNK5tz5seS22Y/DatWu5//772b17NwCRSIT77ruPl19+mZycHG6++WY+++wzxo8f31JZEEIIIYQQnURplc6L721i3Y4iFAXGj+rGjPF9SXDLZmKN0WI9AW+88Qa/+93vyMzMBGDdunX07NmT7t27o2kaU6ZMYeHChS11eSGEEEII0VkoMH/ZLtbtKALAtuHTb/JYtaUARWnjvHVQLdYT8PDDD9d6XFBQQEZGRs3jzMxMDh482FKXF0IIIYQQnURV1OTz1fvqHP/k6zzOHJGDxAHHr8WCgB+yLAvlsFDNtu1ajxsqLc3fnNlqlzIyAm2dhTYRj+WOxzKDlLutxEP9CW3/PreFeCwzxGe547HMAKnJXtKTvRwIVtU63jXTT1pKAg6HrHVzvFotCMjOzqawsLDmcWFhYc1QoeMRDIawrM47FSQjI0BhYUVbZ6PVxWO547HMIOVurrQao7PXnxCfn694LDPEZ7njscxQXe5IVYxrJg7i8Ve/qTnuUBUmntqD4uLK405PtGIQMGLECHbt2kVubi7dunVj/vz5zJgxo7UuL4QQQgghOrBB3ZP57Y9P4dtdQVyag+F90uiS6m3rbHVYrRYEuN1uHn30UW6//Xai0Sjjx4/n/PPPb63LCyGEEEKIDkxVoFeWn97Z1UMb7c7dsdniWjwIWLx4cc3/x4wZw7x581r6kkIIIYQQopOSm//mIbMohBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQrQKT2Ue7lgQAIcVwVu+CxWjjXMVnyQIEEIIIYQQLc4d2kvRW49Q9v7TePUg5rr3KJj1B5Qdy1BtCQRam9bWGRBCCCGEEHFAc+PwJqIX7Kbgpd+AaYCi4gikEVMcbZ27uCM9AUIIIYQQosVFPZmkTv0lKGp1AACkTroVPWsIoLRt5uKQBAFCCCGEEKLFOawI4U2fg23VHKtY+S6uaHEb5ip+SRAghBBCCCFanLNsLxVfvQuKSuoFt6IlZ6MX5BJZsxBNMds6e3FH5gQIIYQQQogWpyf3JPHMK3GmdkPPGkJyRi+qvp6Pe+QkYrbMCWhtEgQIIYQQQogWZyouHAPPRlc1bFsh6snEffp1xGy5HW0L8q4LIYQQQohWYSpOsA89NiQAaDMyJ0AIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEGQkChBBCCCGEiDMSBAghhBBCCBFnJAgQQgghhBAizkgQIIQQQgghRJyRIEAIIYQQQog4I0GAEEIIIYQQcUaCACGEEEIIIeKMBAFCCCGEEELEmTYJAubOncvkyZOZPHkyjz32WFtkQQghhBBCiLjV6kFAOBzm4Ycf5uWXX2bu3LmsWrWKZcuWtXY2hBBCCCGEiFutHgSYpollWYTDYQzDwDAM3G53a2dDCCGEEEKIuKW19gX9fj8///nPueCCC/B6vYwePZoTTzyxtbMhhBBCCCFE3FJs27Zb84KbN2/mnnvu4YUXXiAQCPCrX/2KE044gRtvvLE1syGEEEIIIUTcavWegCVLljBmzBjS0tIAmD59Oq+++mqDg4BgMIRltWrc0qoyMgIUFla0dTZaXTyWOx7LDFLu5kqrMTp7/Qnx+fmKxzJDfJY7HssMzV/uxtahnU2rzwkYNGgQy5Yto6qqCtu2Wbx4McOHD2/tbAghhBBCCBG3Wr0n4PTTT2fjxo1Mnz4dp9PJ8OHDuemmm1o7G0IIIYQQQsStVg8CAG666Sa58RdCCCGEEKKNyI7BQgghhBBCxBkJAoQQQgghhIgzEgQIIYQQQgjRDny/jH5rkCBACCGEEKIdcUWDuCOFACjYeKv247D1Ns6V6GzaZGKwEEIIIYSoyxUrpnzhs9hGlJQpd2EG91Aw/2mSxl+FlXxBW2dPfKeyspJ7772X3NxcVFVl6NChTJ48mT//+c/k5OSwa9cuvF4vN910Ey+//DK7du3ivPPO47777gPg9ddf5+WXX0ZVVdLT03nggQfo3bt3rWs88sgjbNmyhb/85S84nU4ef/xxVq5ciWmaDBkyhPvvvx+/38/ZZ5/NCSecwJYtW7jrrrv40Y9+1KAySE+AEEIIIUQ7oVgmthHDCO4j+PrvKZ7/NNgWdiyKbVltnT3xnUWLFlFZWcncuXN56623AMjLy+Pbb7/lpptuYu7cufj9fp5//nmee+45Zs+ezauvvsrBgwdZvnw5//jHP3jppZeYN28eF154Ibfddhu2Xb2Zo23bPPjgg+zfv5+///3vJCQk8Pzzz+NwOJg9ezbz5s0jMzOTxx9/vCY//fv3Z8GCBQ0OAEB6AoQQQggh2o2oJ4OUKXcSfP33WOHqXXIDo6fgGHIuDo8PKuJvx+D26KSTTmLmzJlce+21jB07luuvv57i4mK6devGkCFDAOjRoweBQACXy0VqaioJCQmUlZXxxRdfMGnSJFJTUwGYPn06Dz/8MHl5eQC8+OKLBINB3nnnHVwuFwCffvopFRUVLFu2DABd10lLS6vJz8knn3zcZZAgQAghhBCinVCwMYN7sSKVNcfCO78hech4IL3tMiZq6d69O4sWLWLFihV8+eWX/PjHP+bBBx+suWn/nqbVvdW26unRsW0bwzAAGD16NCeeeCL33nsvr7/+Ok6nE8uyuO+++xg/fjxQPRwpGo3WnO/z+Y67DDIcSAghhBCinfBU5VM8/ymwLQKjp6CldcUI7qPi479jVJW3dfbEd1599VXuvfdeTj/9dH79619z+umns3Hjxgade8YZZ/D+++9TXFwMwNtvv01ycjI9e/YEYNiwYVxzzTUEAgGeeeYZAE4//XReeeUVYrEYlmXxwAMP8MQTTzSpDNITIIQQQgjRTsS8GSSNvwo7FsUx5FySh4yn4uO/Exh/HZovESplOFB7cPHFF/PVV18xadIkvF4vOTk5DBw4kIULFx7z3HHjxnHDDTdw/fXXY1kWqampPPfcc6jqobZ5RVF45JFHuPjiixk/fjy33norjz32GNOmTcM0TQYPHsw999zTpDIo9vezEDqIYDCEZXWoLB+XjIwAhYXx9wWPx3LHY5lByt1caTVGZ68/IT4/X/FYZujc5a5eDtTGVKqHlriIEMPTqct8NM1d7sbWoZ2N9AQIIYQQQrQjpuKs9TiGp41yIjozmRMghBBCCCFEnJEgQAghhBBCiDgjQYAQQgghhBBxRoIAIYQQQggh4owEAUIIIYQQQsQZCQKEEEIIIYSIMxIECCGEEEII0Q48+OCD3HHHHbWOLVmyhHPOOYdQKNSs15IgQAghhBBCiHbgl7/8JevXr+fjjz8GoKqqit///vc88sgj+P3+Zr2WbBYmhBBCCCHEcfj06728tGATRSVh0lO8XHfBYM46qXuT001ISOChhx7ivvvuY8yYMTz11FOcffbZeL1errzySiKRCCkpKfzhD3+ge/fu/Otf/2LOnDmoqsoJJ5zAgw8+2OBrSRAghBBCCCFEA3369V6eeXMtUd0EoLAkzDNvrgVolkBg7NixnH766dx7773s3LmTV199lauvvpq//e1vdOnShS+++IIHHniAF154geeee44vvvgCh8PBf//3f3Pw4EGysrIadB0JAoQQQgghhGiglxZsqgkAvhfVTV5asKlZggCAe+65h7POOotnn32W/Px89u7dy89+9rOa50OhEA6Hg1GjRnHJJZdwzjnn8OMf/7jBAQBIECCEEEIIIUSDFZWEj+t4Y/j9fhITE+natSuhUIhu3boxd+5cAEzTpKioCIC//OUvrFmzhs8//5wbb7yRxx9/nFNOOaVB15CJwUIIIYQQQjRQeor3uI43VZ8+fSgrK2PVqlUAvP322/zqV7+iuLiYSZMmMWDAAH7+858zbtw4tmzZ0uB0pSdACCGEEEKIBrrugsG15gQAuJ0OrrtgcItcz+Vy8eSTT/Lwww8TjUbx+/089thjpKamcvnll3PJJZfg9Xrp3bs3M2bMaHC6EgQIIYQQQgjRQN+P+2+J1YEOt3jx4pr/jxo1irfeeqvOa2644QZuuOGGRqUvQYAQQgghhGgVqmpjWQoAigLYFnYHHJ1+1kndm/2mv7V1vHddCNEg7uItuCMFNY89Vfm4S7ZXV7pCCCFEK3PpZdjr3sWll6IoCs6iLWj7VqMoVltnLS5JT4AQnZCrcANFc59AC6STPPWXKJZJ8Zz/xYqESJ12N3paf2y7rXMphBAiXjjRCa94i6qNX+DeuwX/iecTnP8kmCYZV/6eSKBnW2cx7kgQIEQn47BjxPK3gWVilB2kZM5j2KaBVVUGgFGwEyWlN7YiX38hhBCtQ8eJ78QLiOxeR3TvBqJ7NwCQMGoihi+jjXMXn2Q4kBCdjKm4cAw7n8ApU6ofVwRrAoDEcZehDJyAJQGAEEKIVqb7u5I8/uqax4rmImHU+RgOXxvmKn5JECBEJ2SqHryDxtY+qGp4+o3GVFxtkykhhBBxS1EUtMJNFH/4fM0x24hR9tE/cOmlbZexOCZBgBCdkKcqn+Db/1v7oGVQ/M7jtSYLCyGEEK1BtaLE9m4E0yBh1EQyrnkE1ZdELH87arSirbPXruTl5TFw4ECWLl1a6/jZZ59NXl5es11HxgQI0ck40QmtmodZWQJAYOwloEepWPkuRtlBIusW4Tz1Cgzb0cY5FUK0F4oC5RGDotIIHpeDrGQvaiuuJKYoYNrgUJBFCzopU3GhnXABaV36Y6X1JeLwkTr9N6BHiAa6g/zda3E6nTzwwAPMmzcPv9/fIteQIECITkbHie+0yzAry/D0GIY65FywLQKKgl6Uh/vEKcQkABBCHGZ9bil/m72OyogBwOjBWVx93gASvc4Wv3YwFOOz1Xl8s6WQYX3TOOek7mQkulv8uqL1GaoXMofXPI76ulT/p4MFALapc+CNRwHImvErDr79OADZl92D4mie70xmZiZjx47lscce43/+539qPfe3v/2NefPm4XA4GDduHL/+9a9xOI7/d12GAwnRCcVcKQTOvRl1yLmYigtT9eA4YRK+8dcT0xLbOntCiHbkQGmEma99UxMAAKzcdJCFX+1p8X1FqmImT7z2DfOX7mZ/USUfrtjDH19aSXnYOPbJQrSRA288SmTPRiJ7NrLnqZtq/v99YNBc7rnnHpYsWVJrWNDnn3/O4sWLefvtt5kzZw65ubnMmjWrUelLECBEJxVzJtWaBGyqHnRnUhvmSAjRHu0+UI5VT0vsR1/tafGb8bzCSvKDVbWOlYZi7DkoY8RF+2cbMaxoFbYRa5H0/X4///M//8MDDzxAKBQC4Msvv2Ty5Ml4vV40TWPGjBksX768UelLECCEEELEMdOsfyyGadlYLTxA37Tq3ynWqC8qEaKdyJrxKxRH7RH1ikMja8avm/1ap59+es2wIACrnu+MYTQuWJcgQAghhIhjPbMD9R4fO7xLi88J6JbhJ+CrfQ23y0HPrJaZCClEczj49uPYZu0bb9s0OPj2n1rket8PCyooKOC0007jvffeIxKJYBgGb7/9Nqeddlqj0pUgQAghhIhjXVK9/HTKUNTDlgPqnuln2pl9aOkFggIejXuvG83IARloDpUhvVO5/4bRpPplPxPR/imaC9XtQ9Fa9vP6/bAgXdc566yzOOuss5gxYwaTJ0+mS5cuXHPNNY1KV7HtjrUYVzAYwurE3YQZGQEKC+NvLGQ8ljseywxS7uZKqzE6e/0J8fn5ao4y29gEK2LkB6vwuTW6ZiTg0VqvndDCJhKzcDsdOBoYecjfOn40d7kbW4d+rzVWB2oNskSoEEIIEecUFNIDbtIDbbM0p4qCzyVLF4uOQXE4ybnygZrHh/+/I5HhQEIIIYQQQsQZCQKEEEIIIYSIMxIECCGEEEIIEWckCBBCCCGEECLOSBAghBBCCCFEnJEgQAghhBCdnqL88HFL74IgRPvWJkHA4sWLmT59OhdccAEPPfRQW2RBCCGEEHFCUSycB9bhjhQA4NJL0fJW4rBjbZwz0Zl8++233HHHHW2djQZr9SBg7969/O53v+Mvf/kL8+bNY+PGjXz22WetnQ0hhBBCxAFFAWfBJoJzZ1I69//wVu0n9NHfKX7vGdixBFXp3BvoidYzfPhwnnrqqbbORoO1+mZhixYtYtKkSWRnZwMwc+ZM3O622ZxECCGEEO2PooDDimIo1fcHTmLouBqVlm2DkpiNM7MXesEuCv5zHwCqLwmty2CitgwLEsevsrKSe++9l9zcXFRVZejQoUyePJmHH36Y+fPn89Of/pSioiIAqqqq2Lt3LwsXLqRLly48/vjjrFy5EtM0GTJkCPfffz9+v7/Vy9DqPQG5ubmYpsktt9zC1KlTefXVV0lKSmrtbAghhBCiHVIUcJXsILb8NVxmJe6q/YQXP48rVtzoNKPuNFIuuKXWsbQLbyfqy2lqdkWcWrRoEZWVlcydO5e33noLgLy8vJrnX3jhBebOncubb75JVlYWd911F7169eL555/H4XAwe/Zs5s2bR2ZmJo8//niblKHVewJM02TVqlW8/PLL+Hw+fvaznzFnzhymT5/eoPPT0lo/UmptGRmBts5Cm4jHcsdjmUHK3Vbiof6Etn+f20JnKrNZFaLoi8VUblyKFS4jmr8Tq6qMwIgJZAzqWeu1DS23XnKQgg9ernWs5MO/k3XpPbgzezRb3ltDZ/pbH4/2Vu6TTjqJmTNncu211zJ27Fiuv/56iotrB6qWZfGrX/2KPn36cNNNNwHw6aefUlFRwbJlywDQdZ20tLRWzz+0QRCQnp7OmDFjSE1NBeDcc89l3bp1DQ4CgsEQltV5x+9lZAQoLKxo62y0utYqt4qFK5SH4ctCsXQc0RKi/u7YbfCRkr91fGnOcjf2x7Cz158Qn5+vzlhmz2mXY4RKCe9YDUDyOT8mmjqQ8GHlbGi5FQWcB3cQzV2P6ksi7aJfULr4JfSCXYS2rCSkJmN1kCFBnfFv3RDNXe7mCCi6d+/OokWLWLFiBV9++SU//vGPefDBB2u95uGHHyYcDjNz5syaY5Zlcd999zF+/HigelhRNBptcn4ao9WDgAkTJvCb3/yG8vJyEhIS+OKLLzjnnHNaOxsiTrmr8il49bcETp2KWREkvPUrMq55hLArva2zJoQQ4jtKrBK96NDQiti+rXh6noSlHX9vlm2DkTmYlPNvwZHeg4ivC0kX/D+MvetQ+pyG0UECANG+vPrqq3z99dc8/vjjnHHGGQSDQTZu3Fjz/PPPP8/q1at5+eWXcTgcNcdPP/10XnnlFcaMGYOmaTzwwAP4fL42WS2z1YOAESNGcOONN3LVVVeh6zrjxo1jxowZrZ0NEaeiviySzrqWsk+ru4VTp/yCiDsNOnfjqBBCdBhOooSWzsKqKiP5nBuI5m6gavNSvANGo+SMbFTPrYWG3fM0jO/OjbrTUPpPaJNeYNE5XHzxxXz11VdMmjQJr9dLTk4OAwcOZOHChRw8eJAnnniC3r17c80112BZFgB33HEHt956K4899hjTpk3DNE0GDx7MPffc0yZlUGy7Y30FOnt3tnT1tSynESKy4nWqNnwBQGDMdLShEzHU1l+hSv7W8UWGA7WOePx8dcYyu2NB7OI8jKyhOMwqlIJtmDnDMJVDdXVnLPexxGOZoX0OB+oMWr0nQIi25IhVENnxDakX3YlVWULFyvdIGTgWwyXL1AohRHsRdaWh5KRh22BpiShdT5JWeyGamQQBIq5EfDmkX/UQEWcyim2S0mMEUWdKW2dLCCHaDU1TsSy7zXuNDr/plwBAiOYnQYCIO2EtBWyw0SQAEEKI7+imzcY9JXy0ci+9sgOcdWI30vyN26CrNRjlQfhuAzHNioJtYjh8bZspITqQVt8sTAghhBDtz5a8Up58fQ0bdgZ5b9lunnxjDVHDauts1csd2sv+l3+Lu3Ifmh3D2PABsW/moplVbZ01IToM6QkQQggh4pzDobByU0GtY3kFIQrLInRLa1+t6xoxKlcvwCg9QPHsP+LtfyqV6z4GRcU3eByGv2Nt/iVEW2lwT0BZWRmhUKgl8yKEEEKINmBZNr1zaq+Y4nY5CHidbZSjIzNw4Tvtcrx9RmGFQzUBQOqUXxD1d2/r7AnRYRwzCNi5cyczZsxgzJgxnHrqqVxzzTXs37+/NfImhBBCiFZg2zCqfwajBmQAkODRuP3SESQntM85AZbDgyvzUIu/4tBwBNKwkY2/ROcQCoW48MILycvLO/aLG+mYQcC9997LpZdeytq1a1m9ejUTJ07kv//7v1ssQ0IIIYRofUk+J7dOG8b/3jaOP/5sLEN7JNMetxJyWDHMDR9S9uVcUFS01C7YRozg7D/irpJGStHxrV27liuvvJLdu3e36HWOGQSEw2GuuOIKnE4nLpeLa6+9lqKiohbNlBBCCCFan0NRSA+48bu1drssp6U60dK7geogdcovSJ52L+7eI9GSc7A1T1tnT8QRwzAoKirCMIxmTfeNN97gd7/7HZmZmc2a7g8dc2Jwnz59+OabbzjxxBMB2Lp1K926dWvRTAkhhBBC1MdGwcgZSdef/C8Vaio2Cv6zbgDbIuZKbevsiTixdu1afv7znxOLxXC5XDz55JOMGDGiWdJ++OGHmyWdYzlmELB//36uvfZaBg4ciKZpbNy4kYyMDKZMmQLAu+++2+KZFEIIIYT4no2CO6sX5YUVAMScyW2bIRFXDMPg5z//ec2CObFYjJ///Od8/PHHOByONs5dwx0zCPjVr37VGvkQQgghhBCi3SstLSUWi9U6FovFKCkpIT09vY1ydfyOGATs2LGDvn37kpCQUO/zQ4cObbFMCSGEEEII0R4lJyfjcrlqBQIul4uUlJQ2zNXxO2IQ8L//+78899xzXHrppeTk5NRaISAcDrN8+fJWyaAQQgghhBDthaZpPPnkk3XmBHSkoUBwlCDgscceo7S0lL59+/Lyyy9j2zaKoqDrOtdcc01r5lEI0QwUhXa72ocQQrRXqmLhKsvFTMhEdyTgipWgxiqIBnpInRrHRowYwccff0xJSQkpKSktEgAsXry42dM83BGDgF/+8pcsXboURVEYM2ZMzXGHw8HEiRNbNFNCiOblNCtRCrZgZQ3GUL24y3MBm2hir7bOmhCik4gaFnmFlRRXREgJuOme4cetHXMl8gZzGhVYDhem4gZALyvEgYZ57OmNjaZg4dj7DYXvP0vC8AkkjJ5C2cf/IJa/jbTpvyGa3LfFri3aP4fD0aHmAPzQEb85L7zwAlC9Wdgf//jHVsuQEKJ5aVaE6FdvUfntJ/hPmoS3/ykUzX4MsEm/7AEivq5tnUUR50JRg/xgFQleJ1nJHhyK7Pra0YQiBi/M38ja7Yf2ERrZP4OfXDgYv7vpN+lOo4LIslk4kjNxDD8fhxGmaNE/cfUcgaP/eEylhQIBRUVJSEJxaFR+u5jKjZ+DaeBISAaXr2WuKUQrOea3RgIAITo2Q/Xg6XcKlRu+IPT1+4S+fh8A74BTMV2JbZw7Ee+KKqL88d+rKAlFAbj2gkFMGNkFZJhFh7JuZ7BWAACwZlsh63dmctrgrCalrSgK9v71VG1eCkDA0AkX5hLN/ZbwrnVkdBmA6e/epGsciW2Dntqf1It+QXDOn8Cs3hQqddrdRHw5LXJNIVpL8/XTCSHaLT1zMMlnX1fzWPUkEDjzanQt0Ia5EvFOURS+3HCgJgAAeOWDLQQrokc5S7Q3qqrw6Td59T73ydf7UNSm9ezYto3S5QQShp8FQMWq+URzvwUgZcK1GP6mBRnH4oyWEFr1fq1jVd9+jNOsatHrCtHSJAgQogGcRghP1f6axw7bwFu1D1Wx2jBXDecqy6Xs89dqHluRSqpWL0Szwm2Yq0PcVmXN/xVFwW2GjnmOogKKgowc6bgUBcpCtdfatiwb3egY36v25vDvUfXjY3+PmksgwVX/cb+zWb6jltNLwsDTUBzOmmOu7N4YJftxFO9q+gWOQFVs9G1Lie7dgCMhmdQLb0fRXFSu/RilaBuKYksd1ECKYuGt2ofDNmqOear24zRa73MqapMgQIhjcBohIstfo+jNh3BX5OLAhB1LKPjP/Tj2ft0mgYCiQNS0iJnWMX+AnHaUyq/fxY6F8Q44lbSpd4GqEVr9AVo42DoZPgp3+W5KZz+EO3wARVFwFm2mbN6fcEcL6329adls3V/O02+v5w//+opP1uynPKy3cq5Fc7Asm7En5NT6DJ88KJP0RE/bZaqDclftp/Tth3CH9lQH0uW7KZ39MO6q/DqvdShmvf9vDEWByqjBuSfXPxzn3JN7YJlNH9vliJVTvvJdbPPQdz12YBegQCCj5phTL8MVra7XNLMKd6T+eqShLFtBG3AmCcMnkDLtN6hZ/Um74BYCp07FyhiAs3AzrpIdEggcg6JYaHnfUPCf+7G3f44DA3fFHorefIjIstckEGgjim13rAWugsEQltWhsnxcMjICFH63DXo8ac/ldpdspejNRwBQ3D78J5xNxcr5AKgeP2lXPkjEmXrc6Ta2zKZts25nMS8v3IxC9RjqE3qnoh7lV8ill2LuWIHabyy6FsBVuAmw0DOHtvoSd4eX22OUEZz1AFZVOQ5/Cknjr6Z44d/ANPD0HonvR7eic6iFUVHgm+1Bnn5zba00+3ZN4s7LR+Jztd81mpvzM56R0bhhXO2x/rSx2VtYxY79ZSQnuOnfLQm/p/GTPNtzXdJSUjw6+1/+LUZJPqrbR/KPbqTkw+exYxGcmT1Jmvobokr1JFanUYG+9n3cQyeAZRDd+iXO4RPRHfVvDHo0VTGTxd/ksWB5LqMHZdK3ezKzFm0lHDXwujWumTiI0YMy0Jo4HEhRFLTc5dV1A5B48gUYZYVUbVsFKGRe/SDhhO449TKqPvs3ZlUZKZNuo3L1B0S2ryTl4t8Q9TZtyJCGjmrpVH78PLH8bQRGT0FL60pw3pMoqkr6Fb8n4uvSpGs0REf9fHv0kuq6Plx9sx8YfSGhdYuxo9VDqtIvuZdo6sAjnt/c5W5sHdrZSBDQznTUL3hTtedyq5gou5ZTuugftY97Ekidfi9Rf7dGpdvYMu8tquR3/1hR69gfbjyN7ulHX6lCVaA9fHUOL7eigLN4O8Vz/hfbODQsxJGYQcrFvybqyax1bli3uO+5ZXWGkAD8+uqTGNw9qWUz3wQSBLSO9lyXtJSMjADluzZQ/PajWNFD49RVr7+6jko4tAKYO3yQknceAxRsQ0f1+kmechdR9/Evc7hiSwHPzVlf87hLegJ3XTGKSMwg4HOR6HXSXLcYDiuCtfZdFIeDqu3f4EztAti4u/RH7X0yMWcqmhXGWPMeFavmg6qBZeDqNojAuTcTczV9J1dFAWdwK8Vz/lSrR8J/0iSco6ZgqN4mX+NYOvLn212ZV/0ZjdRu9U8+9yfYfcZhceRGnHgLAp555hkWLFgAwPjx47n77rtb5DoyHEiIY7BwoPY5FU+vEbWOp/zoRmKBxgUATXGwpO44/oKSY09Qa4/3frYNRlo/ks68otbx1AtuqRMAAFRG9HoDAICSikiL5FGIjiAW6Enyj26sdSzlvJuI+WsvARz1ZpEy8WbMiiBWuJyUiTc3KgBQVYUNO4trHdtfVElV1KBLqo+AR2u2AADAdrhwpHahatvXaElZJJwyFcXhpvybRSih6uE/hurFM+oCHP4UsKrHnSef85NmCQDgu/oqfSCJp19ac8yRkIz3xMmtEgB0dDF/N1LO+69axzy9huPoO+aoAUC8WbZsGUuWLGHOnDm88847bNiwgUWLFrXItSQIEOIYHLaBtW0pkd21h6AUf/h3XOW5rZ6frJS6Lf5ZqR1zvWpFAa1oC6WfvVrrePF7z+AJH6jz+gSPRnLAXW9aqTKOXMQxV9kuSj58vtaxkoV/w1Wxt9YxT/ggxe//pXq9fX8Kxe8/e8T5N0djWTbD+6bVOpaY4CLlCN/PprJsFbv7SQROm4b3zOuJ+LqSPPZiks+5gVhqPwA0K0zVqnmYoRIUrXoYYemHz+OKFR8t6eOiFWyk7Is3ah6blaVUffUOmqwUdEyu8j0Uf/BcrWOR3d9ibP2i1mThjqKkpIS//vWv3Hbbbfztb3+jpKSkWdLNyMjgnnvuweVy4XQ66du3L/v37z/2iY0gQYAQx6CV7qB08YtA9RCgxDHTAbCjVRTPfgyP3nw/MA3RNc3HXVeMIjvVR3aqj7uuHEWXDhoEuPUySt5/tnrzncQMUqf8HEVzYYZKqFj6Ok5qt/p7nQ5+PGlInXQG9UyhZ5a/tbItRLtiVBRT+uFz2LEIqtdP2tS7UN0+rGgVZR+9gNs+dINqOhNIGH4WKVPvJmXab0gYegamdvzzAQCG9U7lih8NJDHBxeBeKdx9zUkktOC8HFP1YHQ7Gd1Zvb+JO7v3d/OaquccKKaOVVmKq9sgMq57jMDJF2JWlaOYzbNwgNOqonLdx2AZ+E+aRPqMe1AcTqo2fYEWK2uWa3RWHr2E4jmP1cwBSBwzHdVTXWeXffISWsmOtszecSspKeHyyy/npZdeYsWKFbz00ktcccUVzRII9O/fn5EjRwKwe/duFixYwPjx45ucbn1kTkA705HH+zVFey6306gksvJNIttWkjr9Xgx/DuxaTulH/yT1wtsxckZgNyKebmqZ9e++B84mTrprbT8st7tiD2Ufv0DSxFuJ+bJwFm+jYukbJJ53C1FXWp3zLdtm18EQn3ydR2FpmDNHduWEvmkEmjCZtDXInIDW0Z7rkpaSkRGgYs82Shc+S9J5txDzd8VVsYeyj/5J0vm31hla51AsTFut8//GUBSFiG6iOVQcrVwV1fe3durlKJZBzF09R8ChVzZquNORuPQyrPxN0G0EpsOLM7gVRXMRTezVbNc4mo76+Vaw0fLXUjz/KZLP+TH0GYtWmU/x24/i6XsSntMuO+rk9PY2J+Cvf/0rL730Erp+KMB0Op1cd911/OxnP2tq9gDYtm0bN998M7fffjvTpk1rljR/SIKAdqajfsGbqr2X22lU4tAriHizgerJwu5wARFvVqMCAGj/ZW4p9ZXbZVcR+271EkUBl1lFVD1674bDUf2+m2bHWFNegoDWEY/fq+/L7LaralYBAuo87mza6m+tKErNfAdFoVVXWHP7XARLqvC6tFYPuppKwcYTPkDUm1kzB8AdPoDlChxzdar2FgTcdtttrFixos7xU089lWeffbZJaQN8/fXX3HHHHdx3331Mnjy5yekdSftuOhOindC1BPTDuswtHIS9smV8c4kddqNi2xwzAICOc/MvRGv54Q1/SwcALr0ULIuYu3qJZG+0gKgrFUvp3LcWh7edtlYAYNo2m/eU8sqHWzgQrKJ/92Su/NEAemV2nGGQNkqd383odw1rHc3w4cNZvXo1sdihIasul4vhw4c3Oe38/Hxuu+02Zs6cyZgxY5qc3tHInAAhhBBCHBeXXkbok39S8eFfceuluIKbKfjPfah7VqJ2wEmeTaFwKBJoqdGZ2/eX83+vreZAsHpM/ba9pTz84kr2F7ePXd/jzeWXX04gEMDprN7B2uVyEQgEuPzyy5uc9gsvvEA0GuXRRx9l6tSpTJ06lddee63J6danc4frQgghhGh2ihHBCO7DrAhSOvthjIoSsAz0oj1o3UdCJ+8N+J67PBezvACr24k4w0UY+zah9RmDoTbfKkk2MO+LXXWOm5bNys0HuXhc72ZdjlUcW0pKCrNmzWLWrFmsX7+e4cOHc/nll5OS0vTlaO+//37uv//+ZsjlscXHt1QIIYQQzSbqzSJ12t0E33gQo6x6idGEEefiHHVR3KyZ77FClCx4FqOsgKSzrqHkm4WY5UVkZvXCCPRqtuvopsXB4vqXIM0vqkRVFUxTgoDWlpKS0myTgNuKDAcSQgghxHGzKkuwYtGax3pBLqoRP5v2RVQ/yRf+HNXrp+zT/2CWF5E04TpijdxF/khcmsppw+ofOz9qQKbMjxKNJkGAEEIIIY6Lp2o/wTmPg2XgGzYeRyCNWP42Qp+8gMuOo3HqDg3F4ax5qLi82EozTwyw4axRXetsxNavWxJDejXPbsgiPslwICGEEEIcF92bRuCUi7BiVbhOnIpvVDll7z9J4LQZxFQvtPHoFEUBdyxI1JWKbStoZhWqGSXmar6bZrcZonT+k5ihEgKnXkzluo8p/eA5Mq/sQjjQs9muA5AecPPbH5/CrgMV5BdV0jUzgT45ifjdchsnGk8+PUIIIYQ4LqbixjFsIpptYqheDK+XpGn3EXP4W3Xd/PooCrhKdlD0zp9IueBWlMwBRFbNRi/IJTDx1mYLBGKan6RzfoxZvB/6jiO132hiu75B92Ue++RGSPI5+dGpPeNuHwzRciQIEEIIIcRxM3+wAk5U9bd5DwCAahvE9m3GjkUonvdnXF36E9u3BcXhRI2WQTMFAbYNemp/lOTeWGiYCV1xDMvG+G4jLCHaO5kTIIQQQohOw0RDHXIugVMuAtsitm8LKCqp0+4mmtirWa9l29TaHM2UAEA0gyeffJJJkyYxefJk/vWvf7XYdaQnQAghhBCNpiitt3NuQymWiRU9bFlN28bWw1R3VbTQjl4i7kQiEYqKikhPT8fj8TRLml999RVffvkl8+bNwzAMJk2axPjx4+nTp0+zpH846QkQQgghRKOomLgKNuDSSwFwxYpxF21Coe2WrXRgYKx7n8q1H6E4nHgHngbYFM/7M+7SuptuCXG8DMPgiSee4JxzzuHKK6/knHPO4YknnsAwmr5b9imnnMJLL72EpmkEg0FM08Tn8zVDruuSngAhhBCihamqgmW1s+byJlIUUPesomjBX3H3GErShOso+eA59AO7SJt6J3r2CW3SQ2Ci4e5/KlUbl5B8wa3Yqb3QUnLQC3IxfWmtnyHR6Tz11FPMnj2baPTQPhmzZ89GURTuvPPOJqfvdDp56qmn+Oc//8n5559PVlZWk9Osj/QECCGEEC0kalis2Rnkn+9vZtnGg1RGmt5SeLycRgXu4CYcmLir8nFX5DZLujHDZoejH9tG/pxg5skUzfoD+oGdOBLTUZKy23SIUNTfndQr/oCeNgAdF47h5+Mbfz26M6ntMiU6hUgkwttvv00kEqlz/K233qpzvLHuuOMOli9fTn5+Pm+88UazpPlD0hMgRCvRrDCqESbmSgXANg08epCIU1qmhOiMFAVWbSnkhXc3APD5mn2cd2oPrpjQr1XzoBTtoGjen0kcM53itR+jJaaReOGviKqNH2JgAx9/k8cbH28Dqne1vXfiT0hd/gxpF/2CsKdllsk8HhFHUs1qRabixnS6j36CEA1QVFSEqtbfhq6qKkVFRXTr1vhdo3fs2EEsFmPw4MF4vV7OO+88tmzZ0uj0jkZ6AoRoBZoVJvbNXMrfexJ3NIiqWIQ2LKHolftxl+9u6+wJIVqAYcH7y3fXOvbRV3uoiLZeb4Btg5U1hISR51G+fDa2HiX5/FubFAAAlFbGeOuT7TWPY4bFp7kaWko2ZV+8VjNHQIjOJj09Hcuqf86Lbdukp6c3Kf28vDzuv/9+YrEYsViMjz/+mJNOOqlJaR6JBAGizRwoDbNicwHrc0sIlnXebeYVBdSCLVR+sxC9MJey959C2bGEwnefwY6FKXl3Jh6zvK2zKYRoZg4FemT6ax1LS/LidrTuT68jHCS8dQWoGrYeIbZ3PardtEDEsqtveA5nqi4cviSiueuxD25DUWQVHtH5eDweZsyYUWc1oCMdP17jx4/nrLPO4uKLL2bGjBmMGjWKyZMnNynNI5HhQKJN5AWrePCfKzDM6h+RsSfkcMMFg9A64Y+GbYOdMYCEEedQufZj9MJcSj/6JwCK5iLlwp8T1RLbxSY7QojmdfH4vmzLK6O4PILXrXHztOG4tFZuf4uGcCQkkX7ZA4S3LEcvzsfVS8dyNP4WINXvYtKYXry3bDcADlXhnNE9Cfiux7NvI3aXE+oECUJ0FnfccQeKovDWW2+hqiqWZTFjxgxuv/32Zkn/9ttvb7a0jkaxO9i3NBgMdboVFg6XkRHo9FuCK4rC25/vZP7S2ku1PXzzGHJSvG2Uq5bntsOUzXsM/eDummNp036Nnjm03a2x3ZLi4TNen+Ysd0ZGoFHndfb6E9rn5yscMwmWR0jyu0n0as3+fT9WmRUFXGaIqOpHs6JgmxiOpi85GDUscg+GKK+M0i3TT06KF9sGVbGx7JZv0GmPf+uWFo9lhuYvd2Pr0B9qiX0CWpMMBxJtwMbtqrurokPtfL0A31MVC2vfevSDtVflKF/yBq5IsI1yJYRoDV6Xg27pCQQ8Rw4ANLMKV6wUAHesBIetH/d1FEVB01Q8RjGKYuOwddyxEmwbomr1sCRDdTdLAADg1lQGdE3k5AEZZCd7a8rWGgGAEO2Bx+OhW7duHTIAAAkCRBuwbRg9KJMEr7Pm2NTxfUlP9GADhm2327Gkh8cp1Vk8dpOeooBWsJHi9/8C2CiaC3evEQDVcwQWPIVbL2uR/Aoh2j/NjqGvmU9o8d/xhvMpnfsY7FrO8bSLBEMx3lm6m6feXMvK9fugaCf2ts8ofecx3LHilsu8EKLDatM5AY899hglJSU8+uijbZkN0UCKAqGoiUNV8DrVI7ZofT/hzFY1Sioi7DpYCUCv7ERSEqpv/DOTPDz0X6eRV1RJgkejT9ck1m4rYv7SXZSFoow/sRtjhmaT7HPWf5E24NLLMLYvw9lvHIYrCWfRJmw9hpEzHPsoLV+2DUpSDs7MHhjF+aTNuAc7MQfXV29TsfYjEkach+70H/F8IcSRRU0L3bCO2srenq5pA6GogdfpQPvuLt9UXXj6nkTl2o8oePleHAnJaNn9iTYw7dKKCE++sYa8ghAAq7bALyem0WPlKySefhlGA+qXsG6SeyBEeVWMLmkJ5KR5cRxHY4wnfBDTmYCu+XFHi7AVBzFXSp3XKQpUxSws2ybg0Tr98LQjcUcKsB1uYs4kXHopiqkT9WS0dbZEnGmzIGD58uXMmTOHs846q62yII5D1LD4dPU+Zn+2A59H48aLhjGkRxIKtX8kVNtAyf0KRVHICwzjwRe/IRIzAfC6NR748SlkJ1d3myX5nCT1SEZRYPn6Azw359uadN78eBsrNhzg7qtOxFfP0KHW5iRG1fJZhDcvx5u/A9+w8QTnPQm2ReaVvyfs73HU86OuNJIm3YESDRFN7IltQ8qEK/EMHEMsuRcWbV9GITqabfkVPD/nW0pDUSaN68XE0Uf/HrbUNRtaR5WHdd7+dAdL1u2nW6afmy8eTpfvxtHjTUJ1J2AaMdRAOraz4UN28ouqagKA732zz6ZPUgZaWjcM1XnUTsvKqMFf5qxn0+5DPQY/nTKUccOyGrRggacqn+DsR/H0HoF/9EWUvP8UittH4NxbiLmSa15n2zZfbw/y4nubMEyLK84dwNih2Tgd7bPnt6W4I4WUvvsEjuQsks66jrLF/8IMlZB84Z1E3U1bXlKI49Emw4FKS0uZOXMmt9xyS1tcXjTC9n1lvP7xNnTDoiwUY+Zr31BYHq3zOsU2MEvyKVv3KW8t3loTAACEowbvLtlZ5zelImLwnwWb6qS150AFuQfbxwQoHRcJJ05C9SQQ3r6K4Dv/B5ZBwqjz0H0Na72JutKIBHrWtBxqCclEkvtKACBEI5RUxvi/V74mWB7BtGze/WIXm3JLWvyaj//gmpv3lDboXEVRWLr+AF+s3Y9tw96DIf48azURw8Kp6FR9NRuwSZvyc4yiPZg7vmzwcKCAz4nzBysOdc/y4+o2iJIFf8EdO/r7sn1/ea0AAODf72+iJBRr0PUtpw9ndh+qNnxOwYu/Qi/cg7vbEKwfBDL7SyI8+9Y6KsM60ZjJv9/fxO6C9lHHtybL6cPVdRDRXWsp+Ncvieaux91tMKbWeRfGEO1Tm/QE/Pa3v+XOO+8kPz//uM9NS+v8wyaaa9Z6cwquP1DrsWVDKGIwtO8P8xpAP2USFTHYtqKqTjpb9pTi9rpI8h/aubE8r5TKSP1rVofCRvt5PzKGYE+4luCCvwGguDyknjYFZ0rjd8ZsN2VrZVLuttGZ6s/9JYXEjNob9uw5WN0a3lLv8/6SQvQfXDP3YAUTx/Q65rmWZbNma2GtY0VlEQwLktNTiZ11JXYsiju7N+60bLRAGlpiw8ph2zZ3XD6SZ95cSzRmMrxvKmNO7E16Qk/0k8/HqgqSkhXANg1iRXvx9ToBRTs01LJ4bd3fYsO0iOhWA9/LAJ6zr2HfztUAOBKSSR49EWdi7d3QN+XVnftUFoo16e/V1t+pxgkQO30GVZuWgGmgaC5Sx16MMzW7QWd3zDI3XbyWuyW1ehDw5ptvkpOTw5gxY5g9e/Zxn9/Zl7hrr8t/dUmr3aLj0lRSElx18uq0wkSWvIwjuJ8T+0zns/VFtZ4/aVAmekSnMHyohcmpQkrATUlF3Z6FZH/da7QFRVFwFm0i+OE/ao7ZsQhFH/4L7xnXoTsTjzvN9vq3bmlS7uZJqzE6U/2Z6NVI8DqpDB9aRWdAjySAFvt81XvN7kkNup6iwLgTurBtb2nNsV45ibg15bvzA+AIQGEFuHIgSvX/GyAjI8CwHik89rOxRGImaYluHIpCScSB11IoeOOPePuPxoqFieVtJuPqhwm7D/VgZqfWbYH2ujUS3FqDyuaOFlH2/pMAqB4/ZmUpwcWv4Dn1MnTtUOCZFnCjqkqtz2BGsrfRf6+OWpe4YiWEFv8DTAPV48eKhChY+Hf8439ca/hUfTpqmZuqvS4R2tJaeu5sqwcB77//PoWFhUydOpWysjKqqqp45JFHuO+++1o7K+I49MoKcMelI5j92Q6S/C4uPbs/qX5XnUlxuurFO3gcXhQu9PRmw94Kisqqb+6zUn38aHT3OhvIeJ0Obpw6jD/95+tax08cmEGPjPbRcqmaYSI7vgHTIGHU+fgGjyU4+zEiud/iP7W8UUGAEKLx/B6Ne687mbc+2c7B4iouOr03/bsmtdtr2jacNDCdaGwgi7/eS//uyVx0em+czbQ0sm3bJHqdJHprL6YQ8WWTeuHtFL9bfZOePuM3RDwZtcb6984OMOGkbnzydR4AmkPh1hknkORr4MRnu/pfYMx0vEPOovyTF8G2+OGEgsxkD/ddfzJvfLyNqG5y6YT+9EhvnuVKOxwbEkacS8LoqYRWvI1ZISs4dTShUIgPPviA3NxcevbsycSJE/H7m++epTXmzrbpZmGzZ8/mq6++Oq4IpzO1ZNWnvUf5pl29TGZDf7bCukleYSUo0DU9AZ+z/vHvySkJrNlykC83HKC4PMppQ7MZ2COZhHYwKfh7TrMKpXArVuZADNWLO7QXTINYcu9GrUrS3v/WLUXK3TxpNUZnrD9twLJtNFXBtlvn8/XDax4PRVHQLat6ZaAjnatAJGbi1Bw0ZM7s0crs0ssIffpPorurF17wDjwN79gr0bXanyHDsskvDhMKx8hK8ZEacB3XLubuWDGm04+huHDqZaA4avUCHO77AVVNnZTYkesSV6wUy+HEcCTgNEIolnHMXgDo2GVuivbWE7Bq1SruvPNObNsmEong8XhQFIWZM2dy8sknNzl/paWl3HTTTUyaNInNmzd3np4A0bEd7yIOXqeD/l2O3Uru1FR6Zfrpk90fRVEwTeuY57Q23eFDyRlZ86Mf9Xev/k/nuqcSokNRAIdy/DfjbXVN27bRlCMHAKWhKr7ZeIA5y/I4b1QG/fvm0K9LUs1yosedVzOKVVFM2iX3gh6j4ss5KGaszq+/pip0T/cB37XMH6VsLr0MNVxMNKkXtq3gKd+N5UnCUFwA6M6j947IBkXUuuE/UrAk2qdQKMSdd95JOByuORaJRAC48847WbBgQZN7BJoyd/Z4tOl3cfr06bJHgKjFsux2GQB8r+36zYQQHZFmRVCU6orDYcdQqX8RBIBQJIK1fTkjzbVcOz6b8cYX+Mt2sPmweQTHK+rJJOnie9BT+6NnDSFw4S+IudOOfeIROPUyQp+8QOEbD+E8uBFXcDOFbzxExUfP4TrGKkQdXTvdw1K0sg8++KDOsObv2bbNBx980KT0D58729KkJ0AIIYRoAU6zkuiqOXh6j8TOGoixaTGq24faZyxWPT+/+w5WkB4L4/p2LsMSPsMMV6BljWD2p9sYcPXJuLTGtdtF1YTvWvYVYmrTWihthxtXZk+iu9cRfOfx6jtj28KZ2QtL8zQp7faqrEpna14pBSVhumX46dslEb9Hbp/iVW5ubk3L/w9FIhH27NnTpPRbc+6sfIqFEEKIFuCIllG1cQmV6xbj7T+a8NYVaKk5JHc/gagzuc7rdxVEKFb7MyKQillRjNLrZBbu9rA7P0jUsBodBDQnQ/XgHHkhnuC+6sUSbBt3j6F4Tr6YGJ0vCCiqiPLHl1bVWr2ud5dEfn7pSBK9cgsVj3r27InH46k3EPB4PPTo0bRNC//1r3/V/P/7ubMttXhO29coQgghRCcUTehC+ozfgG0R3roC1Rsg5aJf1hsAAIzolcCw8s8xK8tQ+o7B3r2Kad0LOG1oFj532yySoCqHhj14w/twmpUowV1Edq2rOR7N2wwFO2qGPXUWigJfrN1fZ/nqXfvL2bA72Ea5Em1t4sSJKEcYG6YoChMnTmzlHDWeBAFCCCFEC1CtGNF9W2oeW5FKrNIDR7xZTk5KItJjDKHTbuLVilMxx1zPbiuH88f0wtEGA9I1swo2LsQdLcRdup3oli9xVuZTsuAvYBn4Bp5C4MTzwDIpWfDsMXcm7mhips2XP9go83tfby7A4ZBbqHjk9/uZOXMmXq8Xj6e698vj8eD1epk5c2azLhPa0nNnpS9LCCGEaAGucBEFS96oHgI0/hqC85+m5MPnSbviQSJacp3Xe10OrB6D2Lm/Am9CId8q2fTvnUROiu+IExFbkqNsLyVfvI624XN8/U+h/Kt3ieRtJvXs64js2YAZCeEdfDqKx4+r+3CirtROtVqaU1XITvNRWBqu81zXDH+b/E1E+3DyySezYMECPvjgA/bs2UOPHj2afZ+A1iBBgBBCCNECIgk5pE27GyWQRsyTQdr036A43cQOW6PfYeuYyqENvvxuJ326JDKgezJep4JlUetmU1EOrVJ2+P8Pp2JQEQPNoZJsBjG0ALrqwWFHccbKiRy2W/DRGCm9STz9MsqXvEH5irk4EtPx9j6B4MK/48rsTuD0Kwkn9MAxrAsxpYEbi3UwF47rzbc7ag/9cagKpwzJ7nR7bjREzLTQDRuf29Hg/YI6K7/fz4wZM9o6G00iQUAnZ9k2UcPG61JbpIXGHS0EG2LeTNyVezHcqRiOON0BUgghDmPbCnrGoOqbYxv0pB6oe1ehFubi7D4CR8V+wgd24+k5HBuFQjuVhStyWfTVHhK8Tq780UBOGpBes0eAwzZQcr/CkdUP2+HG2rMGpdfJwKGgoqwywtzPd/D5uoMk+d1cPSaFUYmb8PY7mci3H1K+7hNSp9+D5fKj6wZRZxJJZhBL0Yi5Un5QAgXU6rkIjsR0ksdOxwpXgEMjYfBYorvW4ByeVb3OfSe9H+6akcDV5w9i8aq95BdV0qdrEhed0Yfu6V6s9ruadbOzsdmSV86L72+ksCTMqUOymTGhH2l+V1tnTTSBDGjrxIoqojw1+1vufPJzXvpgK2VhvVnTd9lRKj55kdL5M9H2f0Pha7+HfWuPOGFGCCHizeGt4y69jLLPXqXs81exdn/FwTf/iMPSKX3/GUIr5zNvyU4WfpmLadmUV8Z47p1v2ZpXVnO+Uy+jfOmblMx9nIqP/kbpp//BESmted6y4eUPt/Hp2oNYNpRURHnmwwNsC/kpnfMoFV++gxWuwK4sZuv+EPe9vIlfP7uUhasLKd+yEie1fyO0kp2Uf/4azpy+JJ16EcEP/wmqSsaFt1Hy2SwULDi4GbWRG5l1BOWVMWZ9uIWe2YlMOb0PSQkuPlm1B1WNr9unvKIwf3rlawqKw9g2fLnhAM+8tRbd7KTRX5yIr09xHDFteOHdjazbVkTMsPj0mzw+XpVHc/bfxRQ3gTOvwQyVUPzuk7i7D0HpMlTGSQohRD0izlRSp/8G2zSI5e/A03UAJV+8jlFWSGzEdD79Jq/OOZ+v3VczATXiSiNt6i8xywqI7d1EyqTbiPq71ry2uCLKN1sK66Sx5qCjugUfSJ1yBwW+fjwyazMHiiOEwjovfXqA3KQT0XHWOs9K6k7i2EtIOu9naKldUDSNkk9epWD249iWiTM1B7IHH3FYjKIoFIdifLu7mG37y4noZqPfu7aS7HeTnuzhy/X5vLtkJ6u3FjJ6SDaGEUfdAMD2faV1hnvlHqigoJ75EqLjkCCgk6qMGGzZU3ulhpWbDmI0YQyjO3wAd9V+VMXGU7YTlxnCDpdhG9WtR2Z5IaoRPUYqQgjRNhx2DHeseny3K1aC0wi1eh7scDm2oeMIpGJWllYfM3UUI4pWz2ozCR5nTcOKUy+lbNmbKA4nqsdP2eev4gofrHmtw6FQX6O816lgm9X1tFFaQFVlGPMHvwX5wao65+kOH+rwyUTd6URT+pNy9vWADbZF4ikXQa9T0R0JRyzr9vxy7vnrMmbOWsMfX1rF/81aQ1lV8/ZItzSPpnLnFSdy0qBMUhM9XHHuAEb1S2/rbLU6l1b/ErWyQlLHJn+9TirB42Bwr9rjO08dmo2zkd22LqKEPn+F4tmPoWz7lMLX/wcluJPwxs/x9hlJxlX/gxWpwgrmynAgIUS7oyg27FxOyexH8VbmUfHRc8RWv4tG692UeowSiuc/hcMbQEvJwQyHSPvRj/H1Own31g+ZflafH+QZzhzZ5VBLu6KhJaSQOv03pF32AM60rqBUT+1TMck0D3D+qd1rpeFQFU5Ii5AwaCygUP75q3SPbiM10V3rdTk+o973wrQVFAWcxTso+ehfoKjg0ChfMRf7wKYjLncaMSyem/MthnmoxXzHvjK+3dnx1tfPTHRz68XD+OPNY5g4ujteV9vs2dCW+ndPwu2sXe5Th2aRmdT5NohrD6699lomT57M1KlTmTp1KmvXrm2R68jE4E7KoSj85MKhvLV4G+t3FXP6iBzOPrFbo1dviOEmMOEGgq//ntLF/8bb/xSs9H54UnuBbRNxJpF22e+IuRJlOJAQot2xbQWtyyCw51Lwyv0oTjeJ4y4npjhbbVJr1JlC6kV3gW1jpvYh45LuGGVFeE6/Gmw4kwApiT4+WrmHlICb80/tSY+MQy3tuubHNeZKdNWNbYPv3J8RpXpiprNkJ0VvPsz5p/+EbpP68fHaQrJT3JzTT6F/so7d5QpSc/oTWrMIR1Y/bpvh4KUFW6io0rnkrN7075OEcdhwICc6Ok4UBRxWDDtWBbZN2sW/RHH5CM55DCtcgWKZ2ErdW4lQWKeorO6Oqpt2lzB+RBdMs2MNp1EAp0OJ29+3rCQPv/3JKXy0ai+788s5Y2QXThqQWW/PUzwwDIN58+bx2muvUVhYSEZGBldeeSUXXXQRmta0W2vbttm9ezeffPJJk9M6FgkCOrE0v4ubLhpK1LDwOh1NqrwULMzgHqxwdfd5dN8WEiKlRH1dal4TcaU2Oc9CCNFiHC7UhBTMimIUpwfcCa26rKVtg57ar+b/Vd6uKL5u2LaNqoDHhlMGpHHq4AywqXesvaG4a4IWnUMrs1j+bLz9RxNe8k+GaS5Gdh1M4tgZKAkp6Jofy1JQup9EIHsQMS1A7yz47+tOxrJt3JpaewJzNEjVsln4x1yKFS4nsm0F7pMuJuPaPxJ2VQ+FSb/qIWKuFCzqbxX3e52kJXoIltcOBAb1TO5wAYCo/rzmpHi5fuJAbOzqUWHxGQ9hGAZ33HEH69atIxKp/nyHQiGeeOIJPvroI5566qkm3bzv3LkTgJ/85CeUlpZy2WWXcc011zRL3n9IhgN1cgrVYxqb2nqh2Dr5US9lE35N4IancCQkY5cX1jv0xxUrxWFVfzE8RufaQVII0TGpKhi7VmEE95F28V0oDo2qle/gVBo/HOjw6q+hoyDtH9w82baNuyofZftnOBUdbd/XOIM7jrvO1p0BfEPPrE7TiKHoYUhII6oGsCzlu2uptfYo0FQFv1mGasVQlOr6WlFAsXRiB3YQfOshiuf8L0bpQbD0mgAAIOJKP2IAANW/OzdPG47mOPTG9OqSyAl94288fWdiWTa2Fb8BAMC8efNqBQDfi0QirFu3jnfffbdJ6ZeXlzNmzBieffZZXnzxRWbNmsXSpUublOaRSE+AOKaIbvLe8n0sWL4fy4auGSF+ccndeJPq7mLpMiqo+Ph53F364+kziuA7j5N60V1Ek/ocIXUhhGh5lgXOvmNI7z6UaEIXUi7+NbbDRcx2HvvkeiiKhbNwMyRmYzvcKEU7MDMHYiruY598GFUB48A2Sj/+F55da4jsXIN/9GScyd1rDc85Gt2wKC0OEt5bSGJWPzTVJrZ/K+Elr+AZd/URJ++69FLKFz6Nt+/JuLoMIPjun0mbfjfRQA+Sz7qW4LtPApB0+hWEncnHVS6AfjkB/njLWPIKK/G4HPTI8uN1xt94etG5vPbaa3UCgO9FIhFeffVVpk2b1uj0R40axahRo2oeX3LJJXz22WeMGzeu0WkeiQQB4pg27y3jvWW7ax7vKwzx3LtbuPuqUWg/6EsyNB++weMo/fDvVKyYiyunP7b3hxvQCCFE69M1f83GVlFvdpPScsdKCb73DI6UbLTkbMJbviTzqgcJJ3SreY2qKsfcVdayq4MTz641RHZ8g5aShWfkZKLHCADceimG5idmK/zn/Q3M/WIn2WleRvW7nKljuuJY+iKevidhaN4jznkwnX58g8ZR9unL1Wn2HIblTsQdyqNowV9xdR2IWV5IcN5MUqbfS7QRQz7TAm7SAscXGDUn07YpKI0QjhrkpPkkCBFNVlhYdxne43n+WFatWoWu64wZMwao7i1sqbkBEgSIo3I4FL7efLDO8e15pZRWxkj/QeVu4cCT1q16BQnbwpnTD1PztlZ2hRCiVUScqaRN+zWFs/6AfmAnKRfcSiShes3+yqjBup1BNueWMrJ/OkN6puD+YYvJdxRsyFtLZMdqtJRsjJID6FuX4BhwFqZSfyDg0kso/+CvJJ1yIUaogtWbHdx2bgZ9yOOVnRo7i7IYcub1GJoX2z7yqF9L0XBl9jqUbk5/TM2HrWgknXE5jl4noxhhzIM7MJyBI6ZzLO5oISE1kZIqmxS/k0SjmIgno8WHlFi2zYcr83hz8Tagevffu64cRYpPdrkVjZeRkUEodOTlhTMyMpqUfkVFBU899RSzZs1C13XmzJnDH/7whyaleSQSBIijsiybLhn+Ose9bg1PPS0qHquC4vlP4crui3fgaZR9+jIZfUdhpgxojewKIUSrcJqVVH77SXWDh6oSWr2QxKx+xNypvP9lLguW5wLwxZp9/PjCIZw5PLv+m15FQU1IxT96Mp6Rk6sDgKQsjHpW3Pme5fCReMpkSj9/Db04n9tHTUfbugS7vIArzr2LnZUxdEf6MVc9cutlBOc/ibvnMNzdh1K+5HUye59AONAbZcDZmLYCziSU3lnYduOWgXFHCtiZe5CnPtlGYWmE7DQft4910KtbiFhy70YFAlHDYm9hiC83F+JxqfTJSSLRW/f9KiyL1gQAAPsKK9mws5jTh1X3AlVGTQpKwyT6XKQFJDAQDXPllVfyxBNP1DskyOPxcNVVVzUp/QkTJrB27VouvvhiLMviqquuqjU8qDlJECCOyrbhpIEZvLd0F1URo+b41RMHEfA668wJiKgBUi+6C8vpw3AlkpnTDz2had3uQgjR1g4f2qMoCoqpY1aWkDrlF6gJyZR99A8UUycUNfhgxZ5a576/bDdjhmbhVBXcoTxMdxK6FsBTlY+leYil9EFJ6sHOEp1i14lkeRPIVJQjD+NxuHE4ffiHnkHp1wtxrJ6NDURPuJiXV0S4alLdhpv6RLQk0qbdjeXyYzr9ZHYfQsxfveLb4Tf9jQ0AAGK6wYtfhiksrb5hOhCs4s0NifwivRJsi+NdnySim7yyaBtL1+2vOZbid3PPdSeT8YO9DyKxujsUl1fpKAocLIvy2MurKKmI4lAV/t8lIxjZNy1ulwAVDXfRRRfx0Ucf1Zkc7PF4OOGEE5gyZUqTr/GLX/yCX/ziF01O51gkCBDHlJno4fc/PZXNuSWUV8YY0COF3ln+I1aW33eJA4QDPVsrm0II0SI0Kwy5q9G6DEWxYliFuzC7jsJ/zi3oDh82CkkX30NU8eEyLTKSPRwsDtec3y3Tj/ZdAFD0+oN4+40m4aQLCM75E1pSJoFJd7B8V5Rn31oHVAcc910/mj5Z1TfzDsXCPGxYjydWQtG8P5M8/nJUhxPL4URLzUFNCHDNBUPISnbR0M3hI4fNYWiJ+rrKk82ewq21ju0uiBJJG4nruwDAsuFAaZhQOEaK30NGkvuIAdDO/IpaAQBASSjK+1/mcv3EAbXOy0710rdrEjv2lQHV7+vwPmkoisJHK/dQUlG9w71p2Tw/dz2P3jqWgLv1bot0y8aybLwuR/WqO1T3crgcatyuv98RaJrGU089xbvvvsurr75as0/A1VdfzYUXXtjia/s3p46TU9GqFGw8lfuIeTOwHG66qUXkDE7G0Jq+rrZu2YTCOn6v86g7GGt2FPO7TXGcxGqtiX24759TFHCYUQy17SahCSE6Hy1STNm3n6DlbSJ2YCfuLv3xZfbDtgxc4RLsQAZK+UEcCZlEdRc3XTyc4vIIBSVhqiIG407IARtMdyLegWPxdu1DxeevYOtRks68gvywxp6DRST5XZSFYliWzZuLt/KbK0fi04NUbVqKa+i5mIoTU3VjaD4yL7+f4Pt/waoqJ23SLcTytxHb9zXpvfsR3bQEV9ehFDiy8HucaG14R5nggnNGZbNw5aEb93NHppOgmeholIV1XvlwK6s2Vc89UxU4/7ReTBrTE6dDJRTRCXiry6AoClv21L/s9NK1+7n0rL74DtvN162p/L8ZJ7B5TwnllTqDeqXQPd2HolT3SBwuHDXQdQta4ecjopus3h5k7uc7iMRMJo/tRf/uKby/fDebdgXp1SWRaeP70TsrAYX6/3Y2UFal43Y68LnUuF6ysy1omsa0adOatApQeyBBgKhDUcBZtJmC2X8iccwMXN0HUzT7MXyDxuI69XIMtfHbhBeUR/nL2+vYc7CCPl0SuWXa8DqTiwGcRjmRL9/EN+wscPkILXuDhDOuIeZOq/U6VzRI5Rf/wT/uMuxIJeGNn+M55VL0JkxiE0KIwxneDLy9hoOp4xo8luiBnSixEKGv38OT1Yvwnk0YQyay4UAlX23azapNB0kOuJk8rjfrthVy3sndAdC1RBIGnEThOzPxDzsTZeQUXv+6ivmrvsSpqUwa15uvNhwgryBETDfxlO2kfOV8fP1OxNr5JVYshmvAWMJfzUYZdhbJZ12FYpkULXgOK1xBYNR5RHaspnzFO6heP3tG/D/mr6vipouHku5vm8YRZ8EmzvFsJG3cAL7ZZzG6u4MTjJWYG3fCkInMWnQoAIDqXoH3l+8m0e9izdZCNueW0K9bEjdNHUZ6wE3yEcqR5HehOeoOLUryOTl1UOahAzaYps3E03qyfmew5vDpI7qQktDy8wIUBT76Oo/Zn+6oOWZa8Mi/V2J8t4nahp3FbNz1FQ/8+BR6ZdYd2hWKGLz16Q4+X7OPxAQXt04fzsBuyTKUSRw32SxM1GHboPgz0FK7UL7sTYpefxDb0PH0PRnT0fgfEtOGlxZsYs/BCgB27i/njcXbqW/vSMUyMStLCM5+jOI3/wej9CCKXfeVim1ilORT/Mb/UDznfzErS1HsQ+NAnXoFnordKIpdvRlOxR5cMdnATAjRcFq0FKMiSNmKd6lYvYhI7npsI4Z/7BWgOYkV7WVDgcKBkkjNDW1pRZR3Pt1O7y5JbNtXCoC7Kh+9MBdnSjahbz9l074wc78qxLRsIjGT2Z9s55Sh1XOoZpzoh3AFkbzNlC6dTckn/8GOVqJYBmZlKcVv/ZHiD/5B+ablpP3oJ6CoVKz+kPIV74CiEhp5Ff9ZWsT2vFLe+WznseYItxg7uRuByr1M7Bnm/mtHcd4QD97CjTj7nMSB0hgrNtZdfQ5g7uc76ZmTCMD2vDLmfLYDGxjcK6XWBmTfu/Ts/rjqOX4kA7slce91JzP1zD7cMm04l07o1+AN35qiOBRj3hc7ax6nBNwUlFTVBADfs21Y+GUu9XUErN1RxOdr9gFQXhlj5qw1lFbGWjTfonOSIEDUK+ZJJ/nMK2see/udBFkDmzRBLKqbbN9bWuvYltxidKPuzX3MlULiuMuwjRhWtIqkM64g5q277FbMm0nSmVdhRauwjRiJYy8l5koGQNPLCS99mcJZD+I8uBFXyQ6K3nyIio+ek0BACNFgtuLA23MYitODWVmKf9h4SMzELMmjYtUHJJ44kdKYk9355bXOq4wYOJ0OCkq+mx+guaja9S3J42YAUKrXbXlOS1C5f2ICQ9wHqdi4lMDIc6sbNzQX3hPOJepOJ3HspdimjlkRJGHAqST0PxlP7xE1aajdhvLJ/gBFZdU3hptzS9DNtgkDYs4kEs7+L6weo4nqoKcOJHnafUQ9mVRGjrxbczhq4DpsBbpNuSXohkV2spd7rxtN9+/mS/g8Gj++cDAj+qYdKal6aapC/y6JXDyuF6cMzCDgaZ2BEZVhHeOwv0VqooeCkqp6X5t7oAKjnskdO/fX/pxFdZPyKgkCxPGTIEDUy1W6k+D8p0BRUX2JhLetRF//IQ6r8RWNz+XgjFFdax0bP6obbmfdj6EnVkTwnf9DS8nG1XUQxe89g7tyf53XuSv3U/zeM7i6DUZLyaZ47v/hiVV38doOF1pyNtgWwbn/R9Fbj2AbMbSUbGy1cbuECiHij+30EPr2M8DGN3AMFas/xC7ajVG4h5Txl1P6xRt0DVgM7V37RjQnLYGSigg9s6uHJ0ZdaSSffhlFC/8OikpXv1Hr9S5Npa+5k275iwkte4PA4NMo/2o+3r4ngqJStujveGMFFM89VDdWrllEybK3iexcXZOOtfdbJiZs5OS+1dc968SuuLXqBpxQ1OCb7UE+Xr2P3IJQq/QQ6FoAi+obetu2iTqqb+BTAu56W/UBMpK9lH43cRfgrFFda34remf5+e/rRvP0L8/isZ+N48zhOUfch+FYWnsETXLATYL30O/P/qJKemYn1vvakwZm1jufY+SA2g1i6Uke0pMaP0xXxC+ZEyDqp7lRXR6SJ9+BmpxNybtPoPoC2GrTdlucMrY3SQkuVm0u4LSh2YwbnoOCjREqBRw4rDAoTnRnEklnXoEjqx+26sTK+xbDnVwnPcOdTPKEa1C7jUAxY5gF29Gd1RWqqXpwjrwQX2kBVVu/BBtcXfrjO+1yoopsYCaEaJiYmkDCSZPwn+bFSuyCu8dQ7OTuqOkDQA+Rcs4NJHXLwllkc92kwXy+eh/ZaQl0y/QTqorRO/vQHCXL6UV1eUmadBspKT34tXqAd9dV4fc5mTo6g16JVZjp52CGSnAkppNy9rXECveQPvlWLMVBzJVM4hmX48jsh+1woZbuoXzFPFBUUqf8AhQonvdn3EVbOaXvCfTvk8PYYTnYdvWQzFc+3MKKDYeG4Pz3DaPpm902c6jS/C4uHt+Ptw5by/97V503kH2FIXpkBxg7PIexw2rvs+ByKHTNTqSwsKJDTYoNeDR+OmUoT72xBqju8dAcKhnJnpplVAH8XidnjupS7ypJg7olcduME1iwfDc9sgNccFov2QlZNIpid7CZJMFg6JjbsHdkGRkBCgsr2jobAHiMUqLOJGxbwW2U1mrNaQpFUbCpHupo2zaugvVUfbuYwFnXUbXmQxyBNOh/JjhcNX/rw9forpveodacH/7fVbKD4OzHsI3vejAUlbSpv0TPGtKkoU3NoT39rVuTlLt50mqMzl5/Qst9vg6vW35YH9V6zqEQipgUlYZxaiqZyR4cPxhs7jHKiDoTsW0Fl16K4UrEVtTvbvhsXGW7wZdCVEvGbVdB8R70tP7YigPbrn19VVUIKBVEivYTS+kPCriKt4M/nagrtaaeBSgoi3DPX5fVyssZI7pw44WDMdtouFDUsPh6ayFvfLyN8soYXdITuPJHAxjUIxmnQ8WyqVWGw3XUusSyYW9hJWu2F1IZ1hk1IIPMVB9b95SwfW8pPXISGdY7jTR/3eFitcqsKNWfvU7+nYbm/1s3tg79oX379tUsEdq1a9djn9BAixcv5plnniEcDjNu3Djuv//+Zkv7cNIT0Ekd6cb4eES05JpWiKiW3FxZq6nMbarzhtNDJHc9kZfuwdYjpJz/MyzFUetH9mg3LoeX7fD/u41yit97GtuI4Rs2HtXjJ7TqPYrfe5qMax4m7Dy+MaRCiPh1eN3yw/qo1nOmjc+p0iMj4YhpRbQkHJaOpTqJOZNRbQNssFEBhWhi75rXRhUfpA367kJ1r29ZNs70bMrM6iE2ChBN6V+TscNz6tRUNIdSa0x6SqKnTVvS3ZrKuKFZnNg/nYhu4XM7cH23ys/35exst7iqAj0zE+iV5UdRDpVzzOAsxg3NbnigbtsdqhekM9m4cSOPPPIIu3btwul0ous6vXv35r777mPIkCFNSnvv3r387ne/48033yQtLY3rr7+ezz77jPHjxzdT7g+ROQGdkNMox7H9M1xmJe7KfWj7vkatdw2etmfbYCd3x9trOLYewRFIR+s6GKsZPpoRRyIpF92J/8Tz8Zx6GdrIKSSOmUbq1LuIuCQAEEIcmaYcWmXMgXGUVzYibSuCuW4+rpIdaIqJsn0JWv5alEbU05oVpfzrD3BHg7hiJTh2fI5mRep9bUqCi2vPH1zzONnvYtzwnDbvHbJt8DgdJPucNQFAPLBtu85739Z/C3FsGzdu5KabbmLz5s1Eo1FCoRDRaJTNmzdz0003sXHjxialv2jRIiZNmkR2djZOp5OZM2cyYsSIY5/YCNIT0Ak5qoopXPxv3L3XoOfvQPUlkjy1H1EtqdmuoSjgKs8FpxfDnYJWshMzqRuG48itX/WnY2NtX0rV9q8JnDqV0NcLqFz2Op4zbkCn6ZN3o4GeOE/uUpOWY9gkdNUprSdCiCNyxUqIrl2AZ8QFEC4ltm8TjsHnYCrNs9a+Fi6iZOV8Ql8vIGH4BEKrP8CRlEnKjD7HXU9rsTKCn72Gw58KDgdGyQHSuwzE8NQ/UXTcsCz6d0+iokonJ82HvxV3yBWiM3jkkUeIROoPtCORCH/84x95+eWXG51+bm4uTqeTW265hfz8fM466yx+8YtfNDq9o5FvfycUTepJ8jk3UPrRPwFIv/wBws0YAAC4zBClC/4Kto1vyDiKls8mfcZvMNIGH/vkw9i2gtZjBFkz7iaaPoi0nsPB6SXaDAHA94zD0jIVZ+frWxZCNCtVr6Ry4xIiu9dhhkpxZvYk0H8sprN5goBIQjfSpt5FcM6fCK3+ANUbIHXqL9FVDw5br66nGpqWJ5OsGb8i/5XfA5B+yb1EPFlHfL2qKGQne8lOlsURhDhe+/btY9euXUd9zc6dO9m3b1+j5wiYpsmqVat4+eWX8fl8/OxnP2POnDlMnz69UekdTfz0u8URV2U+5UvfQvUlVm8as2IuTqN5J09FVT8pU36BURGkfPlsAqdchJHat3FpudJIGHQqpq0STe5HNKH5JtcIIcTxivq7k3LuTzBKDmDrEZLP/QkxZ3Kzpe+wDcyygprHth6FSAX2lk+xt31+XMOPXNEgwU/+g+L0oLi8lH32Kq5oUbPlVQhxSGFhIU7n0YN0p9NJYWFho6+Rnp7OmDFjSE1NxePxcO6557Ju3bpGp3c0EgR0RqqGK6cvaZf9jtSL7kRNSMRWmrfTx2HrxPauB8sARSW89Su0aGmzXkMIIY6HSy8jWpALgCd8AKdZ2ah03BV7KPnwHzjTu6G4fZQs/Bveqrzmy2f4AKWL/40jMZ3k0y/BtkyK33saFZPKbz9B00MNT0zVcCZlknbJvaRd+t9oKZmgSie/EC0hIyMDXT/yJncAuq6TkVF3c9OGmjBhAkuWLKG8vBzTNPniiy8YOnRoo9M7GqkpOqGoJxPfuT8jggslOxUtcwBGM41l/Z5qRokc2EXi6Zfj6TOK4nkzIVoJcb5fSVXMZF+wEqdDpUuqD1cjN7ARQhwfDYPI13Mp3f0tKefdSHD+0/hPvgB16CSs41wO2HYH8J9wForqwJXVi9iBncT2bkIblI1hN/1nM+rLIXXy7ahp3VBMnRRVA1Wl/JsPSZ1+L5Hj6HWIOZNIv+BmikPV4xy9428k1kz1vaKAw4phKC4cdgxLcWLTtksrC9GWunbtSu/evdm8efMRX9OnT58mLRc6YsQIbrzxRq666ip0XWfcuHHMmDGj0ekdjewT0M40Zi1cRQGnWUlMTcBhhkFRMdXmvemvj9MIYTlcmIoLj1lBVAs0esJtR13v+XDlYZ0nZq1mz8HqVrwzRnbh6nMHHDEQ6Axlbgwpd/Ok1Ridvf50hw9SPPuPWJWlaGldSb7wLqLuxq0E5lEilC94htj+bbh7DcN/1k+Jqr5my6uiKLijhUQ3f0FozUfYho5tGqRNuQM1pSsRd3VLojtSgKX50DX/EdNqie+UooAzuJXozm/wnjSF6IZP0NK6YeSMaDeBQDzWJa1RZpvq3zPNoeB3O+vdo6G1tad9Ar5fHai+ycEej4fnn3++ycuEthZppuzgFAWcBRuoWPg0XqMYY+18rI0f4bBi9b7eaVTgCe1FVRXcVfm4GzB2tLA8yqdr9/PRN/vYur+cikj1eFVd82Mq1ZuZRByNDwA6i+37ymoCAIAv1uxnf3FVG+ZIiDhj6tX/ADsWrR6u+J3CiihfrD/A0g0HCVZEa53miR7EV7ETlxHCW7kXT+UeduZX8IX/fDYM+3+Uebtj7V+PtyoPVbFwR4twV+U3KouH9g2zUUKFlH81H9WXRMr4K1Gcbko++ifG7m9wl+/GEzlIyZzHiCx/Dadx7CFCGrWHKWh2/b8DDWLb2JEQoW8+oHjW7yhf9hZWVRkK5rHPFR1WWZXOvxZs5q6nvuDuZ5fx6dr96Gb7XGK8rQwZMoTnn3+ewYMH43a78fv9uN1uBg8e3KECAJDhQB2ebYPiTkAv2E3hS/dgGzFSzr8ZS627s6+qgrVzBcVfzCL1vJso/uwV3N0G4T3rxiMuxxkMxfjDP1dQ9d2Nf5LfxdQz+zJuWDZOtX20BrUXMb1uRakbUnkK0Ro0DCKbl6AF0kmadjfF859Cz12DOvg8iip0HnxhBZXf12MJLn7/01NJ8jlx22HsolwOLnweX7+TUYA9KSfz0MKKml6TAd268es+AWLzn8Z/8mTK1iwCbJKm3Ve9mdcxuCv3galjJPVAK9iEndSFmCsFI60Paef9BKOskPJvPiRz+q8wyosIfvB3FFVDcbqxqsowSgtQzNhRf7GdRojYN3PxDDmDWKAnrrJdRLZ+iWvkFHTt+JZuBrBRULoMw9NrOJHd69BSc9D6jCYqtw2dlg289ekOlq7bD0A4avDv9zeRmuhheK+Uts1cOzNkyBBefvnlFtsxuLVIT0AnYAWy8fYehW3EUH2JaF0GY1E3CLAscPQ5FVdOf4oX/AVFUfGPveyo6/HvPlBeEwAAlIVilIWiFJSEW6QsHVmfrol4XIfe964ZfrqmH/+PrxDi+BlouEZeSOaMXxIJ9CRl+r2oA87EshV2H6ioCQAAyipj5BZUDy2IKl6U9J4kj7mYqi1fEs79lg0VybWGTW3NK2e/ko2r+xBKP/onRskBks+/tUEBgJMYlStmE3z7Ucy171I0509Y+9ajqqAU76Xk09co/3ohRkk+sbIilJwheHuNwNYjWFVlaKldSDz/NmLu1CNew7Zt7D1fE1qziOK3H0XLW0Vw9qOEvlmIvX8dinL8DTaqYmNu+4LI7nV4B56GUXyAyOr30Jp54zTRfpSFdZZ9u7/O8S/W7sMRR5u4HY+uXbsycuTIDhkAgPQEdHiqqmDvXEXVluUkjplOxdfvU/HZS/jOuRmduvMClFgFRnH1l9yMVGCFilDS0o44lMftrK9HQcFVz/F4l5Xk4fc/PZUd+8twaQ76dU3C55L3SYjWomt+XGkBKKwg6k6vOe6p53vo1qqPKVjYoWL0YHW9aBs6fmfdHjyPBnr+9urXmAZm8T7UhGws++g3Rzou/GdeS+z131Hx5Rx8A09D6XUKlgUYUVxdB5B41nWE136EFQ6h6VXECnbXnG9WFEO4FI4yWVhRFJQeo/D2W094+0qK33saAO+gsShdhjVqTLdlK3i6DiFl4k0ovU7B2/8UVH+a9AR0Ypqq4HY6iMRqD/lK9rvbxbwA0fwktOvgLMtG7TqM9Ol3o55wIWnT78V/2oz6AwDFxizMRfX6ybz+MTw9TyC6ex0OM1pPytV6ZwcY1ufQxLoT+qfTNd1PemLLTzzuaGwbMpM8jBmcxUn900nyNd+GZ0KIxuuVFeCEfoeCglOGZNEjq3qirWaGsStLiORtJfW8n5I8ZhqDXflkpx7aTOuSCf3IVkswq8rIuPohEkadR3jrVziMY/eIqpgY+zdjVpahaC7CO9egVlTPJ9Azh5Aw4UbM0gN4+5+Mu+/JlMz7P8yKIK6cfngHnIKtRwjOfhRv7Ojzt3QtkYQTJ9Y65h95HrrW+AmQEV8OZq8x6LaG3mUU0cSejU5LtH8Bj5NLz+lf65iqKpwxokunXlAgnsnqQO1MQ2bAu+woMcWNioVixTDVhq/LqWLg1ENEnck49TJshwtDPfrOkVHDIq+okqhukpbkISPgobmnA8gqD/FDyt08aTVGZ68/4cjvc9SwyC+uQlEgJ9WH67DhDU69As2OEHWl4jJDKLZF0EokP1iF162RmezB6QC3XkZES0Yzq1AsHd157J3YHVYUa9PHWNFKfCecS9miv+M/eTJ65lBsGzyVeyl87Q8oqkratLuxy/Kp+PZTAqMmoiVnEN71LVpKNlpKDoY7pd5VgjIyApTv+Jbg7EexYxEUhxPb1FHdPlJn3EPU36NJ72l7FY91SUuXOWZYbN5bymer95Ga6OHMkV3onpFQPWGgDbWn1YE6E+nX62Dclfuo+OJVEs/5KcaBbRhFe3GMmIx5jBv571loRL/rVm7IDxhAghplYPRb1C5DUKwQVt5arG4nHtfW9kII0ZbcmkqvzPqX2dSdAXSqbwoijup60Qf0zTl0o2DbENGSATAcPuqZdlUvU3XjGHIuqmUSdnjxT7wN3ZFQMwTTdKfiG3o6Vd9+StGbD6H6EvEPOwPV6eLgqw+SeNpUVF8ShbMexDdkHK5Tr8D4QcOP/d1KPrYewztoLIFxl1Hx2SuEd36DHalECRD3q7eJhnFpKif0TmVUv3Rs265uNJDPTqclQUAHpBfkEpz1W6xwiIQTz0dr4S+oFiml6OMXcWb3xY6EMEMlpF3eB9Pd+B3xhBCiPdI0FdO0mzQGWlHAYVShRUuI+buBCa5IkEhCV2Jq7VZVXUvAf+p0wlu/wo5WYVWV40jugp3SHWdGN8q/fAd4B1QNz4DTiNXT86soCnrWUDKu+B0kpBBWE/GNv47EMRdT5e0qN3HiuJmyLGhckDkBHYzuzyEw+kKscAgUlYSRE6my3S1ax0cTckidcgexvE3oRXtJu/hXNRvZtAeHr3zRiEUwhBCCqGGxcmsRj/zna+Yt201p5bHX2FfV6ppXUaonGAMo2LiLt6F/8w5Fs/6AK3819pbFFLzyAM6D6+vUUU4jRGjZG9jRQ3uKlC7+N3a4nMSxl9Yc8/Y7CTu97xHzYtsKtstPxeJ/4QkfgPIDhL75EKcRX8NlhBANJz0BHYyzeDtFX8zCN3gs5Y5U3ly6n8XrN9KvWzKXTuhHRgtM2HVFiihb8jqKywOmSfnyt/BPuJGYK7nZr3W8HLaBum8NanpvYu7U6h9ZfzpRX05bZ00I0YGs3Rnkb7O/BWDz7hK27CnhzstG4DhCy4IrVoy+ZQmuIRNQQoWYJflYvU7DaYQo/XwW/sGnYlsWhXP/DICWnI2SlF1nWI4jWkp4ywoUp5u0ab+matNSwpuWopQfIPjhP0HVcPgSCW9dgTO9G44h5x15R3jLxCjOJ/jGg9iGjqv7ENyWbO4lhKifBAEdjO3PIuX8m1G6n8iStQXMXrgNgFWbDnKgqJL7bzi51oS3ZrkmCo6EZJLOuwX0CJVrPsRuJ03uroq9FLz3DM6MnvhPuoDgB8+jJWWSfMn9RNX6x/8KIcThHA6VT1fl1Tq2cVcxJaEY6YH6b7iVcCnlK+bh2v0tetEenNl9CXQbTlRLIuX8m6lYMgtf35FUbVsFQNKZVxLzZtQZmhP1dyN12t0oDo1Ycm88o3Nwdx2ImtoF1ZdI8nn/hRrIpPS9mTgSM7DUI/9sx3yZJJ15JcF5MwFIHHsJkXbQWCOEaJ9kOFAHE3MmYfYci6V5WbahsNZzeYUhyir1I5zZhGu60/Cd9/+IJnQlltIX7/gfN3hScUuLBboQOG06emEuJQv/huJ0k3z+zyQAEEI0mG3bDOhZe0fUxAQXPvdRbriT+5A84Rpi+duw9RgpP7qRmJaIZlYR2b4SLSmdqm2rUH2JoGoUv/8srpLt9Vwb9LT+RJN6V/9fS8DsOYZwQg9SL/sdsbRBRFypJE35FVaP0fVuBPk9d+W+6ut0G4yWlEXx3CfwxIKNf2OEEJ2a9AR0QN/P2D9pUCbb9pbWHM9M9RJoobXpv993wLZBVxq+JGlLsxQXzvRuNY8d3kTwyNJfQoiGsyyb8SO7sGVPCVtyS0hMcHHHpSOPutmfqyyX4OevVS/dWVZI4eL/UDrkEtLSkvEYOs6UHLTkLJJOuRDVl0Tww3+AVn+vwg+HCH0/KTniSKw5FtWO3fBiuFNInnANarcRKGYUq2Anuqt9NNg0lGbHMBQXAE70o+5oL4RoGgkCOijbhjFDswmWRfjk6730ykniJxcOwaM1vXNHUZTqiW6WjqVo2DZoGBjt8OPirsil8L1nUFxefP1PoXLDZ5QteJqkC39J1CG9AUKIhklJcPHLy0dSGorh82gkuB1HX1bTnUDC4HEc7HEOVvF+PFX5vP5FHv176Uw/7XwsRSO554nYDjc6DtKufKjmpl5TTAy78buJK1gotoWlVNfJll694aPh8KH2HY9h2eAEpWdm05cGVaAqaqIoCqpi4XRoqN9NhFawj9ozcbzcFXsIr1+Me/QMVCNM5YrZ+MZcRsyVWu/rY7rJgdIwxeVRfB6NLmkJuBztY6iqEB1Bm9zVPfPMMyxYsACA8ePHc/fdd7dFNpqdy6pCd/iwbXDZYWJKw9bu/56Nze59JURjJjaQ7AHHYS1HLiLE8KBgo1lhAh4fV0/ozsWn98TldKKYEXYcqMC2LNxuJ163Rk4ACkKgAMl+Fy67Og3NDqPaFjE1oSZdgMLyKCs2HqAqojN2QCLpAQdVlovU8F7cKVlE3Wn1Z76NmN50Ek44G++QMzADOTiSMnBm9iamtf3mJkKI5uO0wujf7YdyeJ11JFHToiwUIzHB1aDGEc0Ko6ou0hPdeO0KorYXh22i2BaqrRM0PER1yPFb6IqHIpKJDLmUdz7dyebcCJmBDLYfCLHjQIRzT+6G1+lAUTRcoX2YB3ZjZ/XGpwRRNCehdZ/iPnEKscOGVSqKQnlEJxo1SQm4jjghWcFCy1+LFa1C7XUaTr2Mog/m4xlxPrbqBJSaRRsaEgAYlk3Jd4GP/weBT2mVzser9rLoqz04NZXTR3Yl2e/i5EGZZJetxzZj0OOU4woEwrpJRZVe/Xt0+IZtdozwtx9Tuf4zzMoy9OA+zLICvP1ORumWVmvJ1uJQjK15pRSXVwc/X67PJ68gRJ+uidw6/QRSE1wNzo8Q8azVg4Bly5axZMkS5syZg6Io3HjjjSxatIgf/ehHrZ2VZuWKlRD69EX8o6egOJxULH0d/9k/JdbAm+aIbjJ3yS4+/GoPDlXl/DE9ORis5LJx2aRnpOOqOkD5xy+QdM5PMcsOEtm2Et9p04lu+AR/IB16jOKVxblEcFEZ1vl6cwFup4PLzh3AohW5FJaGuWRCH840lpE6fBxmcA9V21eTOGY6JYtfJPHsn1CoZvDQi19RUVU9r+DDrxQuO6c/sxatZWjPRG6eBP7mX3yoSXTNj+vUy4l+12XsGHoeuurEtqU1SIjOwqmXEf7iP/iGT0D1JVH2yb9JPPdGop7Mel9fWB7lyTfWsr8oRHqSh59cNJRQlU7/rkkkfTdk0m2UVW/apWp49GIq136EK6MHnvRulC7+N0ljp2NWlmGWB6ks3ENl+kjKCeDf/xHbB1zDX+ZsJBw1GNY3jdOG5fDhij0AdM1IwKWpaOgYa+ZT8s0HJI+bTnTLcipWLyL5zMsJ71iF4vLiOHEapq2iKPDt7mL+OvtbwlGDEwdlcN3EQSR66w6F8YQPUjD/abAtkifECO38mkjuemIFuSQMPJVw3lZ8Z92I3oCd5EsqdZ6ft54tuSUEfE5uv3Qk/bsEsO3qJVP/Onsd2/LKgOqdZD/4MpfThuVQVhFhctVirH0byLiqK5EG7ki8v7iKJ2atobg8QrcMP7dfOqJmRTtdceE+5RLMyjIiO1cDkHj65VhdRtQKAAorovzPP78iFD40/+3Sc/rz3tJd7NxXzvNz1/OrK0ahNfe29kJ0Qq0+MTgjI4N77rkHl8uF0+mkb9++7N+/v7Wz0QJUwCY4+1GCbz2MbejHtWj9zvwKPlixB9sGw7SYv2QX3bISeXNpfnWDtqJiVVVQ9PrvKX73SVDV6ms6nJR+/C/2rF/HR2sKSU308PXmAgCiuskrCzdx2vAcTMvm9Y93kO8fTOGbDxNc9C9sI0po7SLMiiAoCvsKQzUBAFSPk62KGABsyC1n6dbKdrkOv3HYmFFTcUkAIESno4CqEpz7fxS98SB2LAxK/T9fumHy5ifb2V8UAqCoLMKsD7fy1caDPPqfrwlFDVyxYsreexJ1z1e4ircTfON/8HbtR8nCvxHevhIrFqbwnZkEP/gHlm1hWgq+5X8nZ9UzlGeN4um31hOOVteN63cEcTkdeFwOkvwufnxON5yKhYETT7/RKJqL0iVvUbF6EVpSJhg6akIq7qETMO3qMhRXxnj6zbU1aX6zuZC12+uf0Bv1ZpB8zg0AlH7ybyK561G9fhL6j6Z02Rz8J07CcBw7AFAUhSXr9rMltwSAiiqdp99cQyhavaRoXlFlTQBwuBUb8vF63ejp/Uk84wr0hOxjXguqexyem7ue4vJIdfqFId75fEetDlvVCKMXH7of0At2o5qRWnleufFgrQAAYOHy3Yw9oQsAW/eUkl9chRDi2Fq9J6B///41/9+9ezcLFizgtddea/D5aWntdZx3AOeYizmwex02kHL6dBK69qx5tqi0iv1FlaQmeuiWWXfiasWWwjrHTNNi+74KXB4XgdReMPZiit7/GwDJp0zG06U7Mdd4QivnY1ombpdGVaR25WjZYFqHqtmQK52M78aPJvQ/meAn/yFtwrUEuvYmVa9b4TsO667dllfGNZNabtJtRkb8TeiNxzKDlLuttN/681gCeE6bQnjrCmzLJHnsdPxde5FYT6tEeSjGzn2167K8whDD+6WzatNB8ovD9OifitFrGCUfPA+At/cIXKldQFEJrf2YlPFXEVxY/ZyV1psyT18Stn+JHYsQCvQiZuyolX6ooorfXZj8/9u788Co6nPh499zZp/JZJnsCZusYgREREVBFiWiQRZFBb2oxdtXrS2320ut5XbxtS16e0Wteu29Vmtr+7oU0UoFrQhqQUGQRZEdExISkpB9JrOcOefcPyKBQIQEJwszz+cfMoeZ3znPZObJPPPbcNTsI6WsmNTBt2BxezFShhIquJymrasBSDp/Ak3b15A+9RskHff3oepADVq07Q6tpVVNZGYOoT2abSxNH72K7q9vafe88QR2bwDTwO5ykpzRsdfZnuMWloCWQsCg5XW6s50CAFqGGHmdFuzle0m+7B7svo71dlfWBjhU5W9zbF9ZAy63Ha/HgR5upvr9V9Ebqki7Yh6RygMEdm/AM+QiMi+Y0vqYksMnb37W1KzhOm4lp6jZ8++1rhTPsZ1KosbdlXpspufevXu56667WLRoEQMGDOjw42pq/BhG7xvs7QxXcWTZw9hzB6PYHFQtX0rGvF8QcuUSCOv89q/b2FNaT5LLxuI7LiYrpe24mvwMD4pybAyn025BUWDqhdlEwxr+6n0cWfnfuIZeglZZTOUrS8i4cTF1q/4LxWoj16Vx4TlppHrbfgOUkmRH01q+2bFZVTKb9+McNBq97jB16/5KxjV3c2TFk6jpfUn3nsONU4bw1zV7MU24YEgG1fXHvlG5YmQW1dVds/tkZqa3y9rurRIxZpC4Y9XWmeit+fN0nFoNNa8swZbRF0tKFtUrnoDkLELu/JPum5npZerFfXnpnb2txy4tyGH7viNAy8o7dQGw9x0BG/7W0v7AC6j94CVUl5f0qXdQveJJnP3PR/fXYWssx7vp75gpOejJuaSXryMvYyDlR47lxosGenCv/QWK3YF7/kPUBlQs/lrYvZqmrauxJKVihEPUr19O2oSbOPLW79FVO1r6MEwTkl1WctM9VNQEWtscPaz9fGvXGvCvfbalAFAtYOg0bl5F2pW3ESzdTe17/x/P1HuJdGAVt0mj89lx4FiPw7n903BZVaqrm0j7irGfLoeV4b4Q+sYdVL32GN6r7/nKibvHUxWYeGEf1mw+th/D1LH9CAc1Qs0tuzO7L70J56AxGHkX4Bh8GY6Bo9FzCto8D2POzeLjnZVt2h6Qm9xaYKiqgtdpjdscI/kzdu2JHioCNm/ezMKFC7n//vspKirqiUuIOc2eRlrhN1EyBoKiYlbtJepoSYxV9cHWb1z8QY3iw41kpWS2eXyfdBc/uX0sb20owWG3cu4AH2o0woWDfBiGSdSVju+aeyD3PDxaM2ZDBSGHD+/lN6HY3RieLOZnlFGmJ/HtOSP5YFs5vmQnl4/MZc3mMiaMymPq2Hz6R/fBiG9gCVRhBOoxc4bju/Zb6J4srKrCtIv7MHZ4Fno0ipMQ7+9sZMQgH1NGZnLeCetoCyFEd4jYUkmb+q8oaX0xLHbcw3YRdX71B88JI3PxOG18+FkFA/NTCEV0SiubuHBYJn0zPTgitRz52yM4+o/Amp5P3ernyb75J+g2D4bDS/pV38Ca2Q8zGsEIh0iedDulWioH63TG+Wr5/sgRrN1WScnhRqaOzqLPzj9hzxtEpOIA/vUv47zsVqJWF7bUXKy+PJIKxqM6PDRs+BuWpDTMUACj8QiKbzAmFpxWle/NvYD3t5a3tHlxP4bkJbcbmxINEa0pbylYbvgx0cN7qXvnObTaKrzj5mCgdqgAABgx0Me354xi7SdlDO2XyuUj8lrH0melOLjpyiG8vHpvm8d885qBDMh1UOP1EW2oQomGoSPzcE2YNWEgueketuyp4rIReVw4JKPNeP+w3dc6CVi32lD6jWvz/wAFA3xMHJ3Pe1sOAZCZ6uLykXn85e1dAMy6YiAZyQ5ZGEKIDlDME99hXayiooLZs2ezdOlSxo0b1+nH9+ZvshRFaU1Yx/9c64/w7//zUet4z8XfGMvAbO8JjzVxNBTjzO5HuKEGbG7CliQM46vab+k1UBSwRhpBUYnavDgiNWj2VKx2O7puoOsGqtoyX0HXzdY2jvait7ShnJRoj55PUVq+WdF1s937xEoifruRiDGDxB2rts5Eb86fp3N8T+nxP5/o6POsKC3DGf2hKAcr/VhUhfwMN05by0o2zsYvMFw+DIsda+Mhoqn9W+cXqaqCYRzLky3/KihKy3HTNFFVBVVVsPgriexZj31EIWb1AUwtRLTPaExTRVFMXFodRnMDpisVi6KgGwZKsI5IyoCTVtU52mb0hKFBJ3KEqkDXCHvyUYniDpQScmYTtbjP6Lm1WtV2c3zUMCmp9LP7YB12q8rwfAd9s1MxFBuOYCWYOmF3XqfOdfT3croYT8UwTaoaQtQHNIoPNbBs7T7SvA7mTBnMyIHprb/jeCT5M3btiR4oAh588EGWLVtGv37HVhOYO3cu8+bN69Djz9Y/YofrgxRXNJKV5mZAtpcTFy6wGiEiG15CO1JKtLYc38wfEE4ddNp2bUYzwfefR7FY8Vw4jZrlvyFt6p1oOSO//vrQ3SgRE1sixgwSd6zaOhNna/7sjPaeZ0VRqA9ECISiZKQ42ixNGQsWxfhykq+JgoF5Bmvnq0rLHC4ABROTji9w0NXvqVMVXT0pM9PL4apGQhEDm1WJ+e+1N5L8Gbv2RA8MB1q8eDGLFy/u7tP2uJxUFzmp7e8b4NTqiNhTcBVcga3scwJ6FLztL313oqjFhfu8CdS8/gjNO9dhTc9HSc3rlQlbCCG6y7HeUoVPi2t58q/bCWs6A3K9fHvOqJiuJX90lZ+Wj++dLwDsWj3RfR9iHzoBpbkWvfYQRv/Orb/flXrz3xOLouBx9I7nSYizTfyXzb2cI1RF7SsPYCndhH54L01b/kHKxUXQWNGhx5umgprkQ7F+uc26Lw/jy+XhWrqwjw4f6sVZXAghYsARKCN0aC8WRcde/TnOaD0NzRpPLWspAACKK5r48LPDKL1ovWM1VE/DumU0vfUENa8uwb/tHaya//QPFEKIr0GKgB6m25JwDhlL7ZtPUbf2L7iHjoXMIWhpgzv0eIcZouHdZ7Ekp+O75lsE921GqdyFqoK9/gC2w9uxKRqWA+taxnAKIUQcshNEr9hN05Z/oH7xEXrNQYIfv0YkFCQU0dvct7iiEbWLNpM6k3bDyQNIu+oOImW7MMPNpE27p81uwkII0RV6bIlQ0UK3uLDnDCbA2wA48oYQsSdjmqeuz1zhakKOdHTTJG3y7ehWJxFnBpnzcom6M1D1CKE9H+Hf+g6uIWMI7vmY1KsWoAzKks20hBBxx6qHqP3sfbSqEtxDGojUV2N6fGTb/Qzvn8bOLzfFApg4Oh9dP/OJqe2JGga7yxrZtKuSATnJjB6S0e6Ov+1xNJVx5L2/YEnJRG+qo+mfL+GaMB/N1v4KQUIIEQtSBPQwZ7iKqrd+h3vkFBQUjrz5NFnzf03Qmf3Vj/GXUvXSL/BdfTdadQmNu9bhm/PvmKZCKKlv6/1cF80kFAjgd2XgvfAaLIMuQ5cCQAgRh0IWL2njb6Rm9fNUpY1idbOX/eUaVx9UuHNGAZ/srqbkcCOXnp/LsD6x/3D9eUk9j760FYD3OMSn52Zyz8wRWDqQck2bA9egMbgvuQGz8TBaxV4Ma/vr9AshRKxIEdDDQo4sMuf+jKgrA4DkCyYTcJ56UrDu8uEePp7aN58AILXwm0SsbWe6WwyNssO1vHB4NLvK/IwblsL11VV4M3KhE6tOCCFEb2eP1KEoJmY0gmPSnfzXG/WUHakF4L9f/5x7bxjJ1DH5qGofdD3286OsVpV3Pi5tc+yTXdXUXxki3Xv6NfvDjkycE24nbFohPRWLbxC60rFeBCGEOFMyJ6CHmSiEkvoRtbiJWty4+g4/7dJwpqKiOo6tB63aXZhK219lRLHw+7VVbC9uJBI1eG9HHf/Y0YzSReNghRCiJ9iNZvxrnqN53YsYIT/lZZWUHbebL8CHn1WgqmprAXD8pOBYzA82DJN+2Z42x7J9LjxKqMNtaOax7+SkABBCdAcpArqQooCzqQRnuBorEZz1+3AYHVvxwRk8jKO5HFUxcTcV42rYj6KAw1+KPVCBf/NKUq9agHvEZOpXP4dDa2jz+EDIYPfBtsc+3n0ErQu+BesIp78MR6gaFR1n/T6sevPpHySEEKcRUd0kjZlG895N1Lz9LMl2gySXDZtVxeVo+WA9cnAmhmEQ1HS27K9h2QcH2Hqglq0HatiyswxLzR4sZgRHuAZnY3GnCwPDMJk8pi95GW765XiZVziMMedmsbU0TFMo2gVR9342PYCzfh8qOo5gJY7AoZ6+JCHECWQ4UAwpivnlesoKqmJgjwaoXfEYqsODa+Ao6jauIKPoXhxp+afcZdFOmKb3/0yk+iApk24lWFVM4ydvkzZlPjXvv4hnxCSyb/sVIUcmzv5jcF9wNSFbWps2PE4LwweksbP42GS4SwpysKlKt6/57DAD1L/1FKZu4BkxieoPXiR91g8h6/zuvRAhRK+nKArhqIHVAuppekUdhp+wmoRqd6OoFkzdIKl+Dz+YNxelthSrHuKzQDqTfeVYw1ZWbGlm2Zp9rY+fMWEgk/sEqF/2H7gvnkWgZDtGoBbfzQ8QsnRu3kCOU+MnE032kcfSZbtbj194bhb3zCzA0ouWJO1qigJK1R6q33iMlIm3UrflLSwuL8kzFhFW2t8vRwjR/aQnIEYUxcRa8Sm2ys+wE4YdqzC1IOnXfRftSCmNG1eQNGoKTZ+uBf3U3wxFcOCdfAeYOnVvPoUlOQN7/mDq3nkO1ZWEc9TVNDtzMRQrmsVD2JVzUhsWRWHB9AIuOS8bj8vG1Zf2Y8qFfXpk05ew4iH12u+gN9XQ8MGLJF04DT2jY0ugCiESR0gzeHfrIe7/3Yc8+sp2KuqCX3lfR3MF9ct+iTtUTsOa57Fl5JM+/ds0797IMEc1+TUb8H70NFPVj6j/++NoBz9j4462+698+GkF/zzkJHr+dJo3voZWeQDfjB90ugAAUPQQ9vKtrPu8rs3xT3ZVUdMY7nR7ZzPTBCN7OJ4Rk2l4788YQT8pV98jBYAQvYz0BMSIRQ8R2r+J5s//iSN/GOGynaT78gj761vvEy7bTcr4G9GS+xzdw6tdCgZ6zUGMYADF5kAxDbTKEgD0pjrM+grISD3tNaUn2fk/MwoIRw1cNgtmN1cAqqnBl3MVokdKsOcMJHxoN6EDW0k9/0p05+knzAkhEsfnJbX8aeUuAGobQzx0eDO/vGscHvvJO8KaVgeqy0vlCz8l5eIiLKnZGDkF5N3+K4KWVDzjbiJcugv/J2/hHHwRDLyUyLtb2rSRluykv0/BuuPT1mPh0h1Yzs3p9Lj8iCMDzxW3kb2uqs1xu1XFYUu8HW0t4QZCJS3Pq6mFiFYdQO2X3mt2QRZCSE9AzERVF0njbkR1uAmX7SRp9NWoWYMJf7GF5IuuwTd1AUYkiNF4BFvN3lO2pRoRIof24Bkxmcx/+TWGFsKWnkf2N/4T1+AxaJUHsJhah65LAZxWtdsLAIsZhX0foHyxHrsZQgk3Y8/uT/ZtSzANHYL13Xo9QojeTVUVPjtQ0+ZYYyBCbWP7k2s1eyqe8yeCEaXho9dRU3KIqi6ceYMxFAvawe3ojdWoTg+hL7ZirS/h7tkjWj+Qu51WLj4vmxQzgCXSRNrcB0iZeCvh0s9Ro1/dA3EqIcXDhFF5ZKY6W2P61xkFpHoSa6KvogDNdSiqhazbH8Z7URHh0p2oemL1iAjR2ylmd386/JpqavwYRu+7ZKsRJLL5dQJbVmHL6INWU076rB+Arx9KQznYXVicXiKHdqHmDSdiS223ncxML9XVTVj1IJg6UWsSTr0R0zQJW1OwRhtBdRBVe/ca0s5IDbV/fRDdX4en4AoCn/8TxeYg4+afYdrdhE/obj8adyJJxJhB4o5VW2eit+bPozbsruJ3yz9rve1xWvnVPZfhdZzcae0KlFK/+llSLplB49Z3safn4SqYSFK/YdRU16HsX4feVItrxBSa3v8z7hGT0TLPpS4Qpa4pjC/ZQSSiE44anJOsodlSsJgaqh5Es369fQSCEZ3K+iBJLhsZXjtdvSxzb3xPKYqJI9pEyJKM1QiiGDqaNSmm5+iNcXe1RIwZYh/3mebQeCNFQIyoGKilH6MoKmpeAaFP3sA5fAJhT9sJwKqqnPL64+kN7gwe5shLP8eMhACFzHk/J+Tt3+594ynujkrEmEHijlVbZ6K35s+jmiNR/rGpjDfXF5Pjc3PnjAL6Z3ranctkMyOo5duoeet/yJjxbzRtXolp6GRf/0PqghZUoihGFF11YjOCaGr8jkeX91TiSMSYQYqAriJzAmLEQMXsOxYAExXb2BtaNn4BHOEj6FYXUYsHR7CKkCPjtHsBnO0UTPSGw5ha5MsjJlr1QVRvHxkTKoRol9tuZeblA5g6ti92i4r1FKuZaYode9YQbLmDqX7tEdLGz8GaPxzV4cJT+wXN7j5YzDDWcDVhR2b3BiKEEGcBmRMQQyYq5pdPafRoARBtoP5v/0lk03JsldupeuHHOBqLe/Aqu4dDq6Vu1dMoNgfps36AJSmN+nf/gCNU3dOXJoTozUxw2yxYO7SxoYKiWki7/AYaNq0kvH8TTVtXU/mXn+Oq20144zLq31iKI9rY5ZcthBBnG+kJ6GIRazLJl82h9u+/JbDtHVxDL0F3Z/T0ZXW5sD2d9NmLQLEQSemPb/aPMAN1hFzZp1wZSQghOkJRgJoviBzajXfsDLwjIzR89BoAnuGX07h5FcH9n+CbvpBIjMeii7OfokBTKEpEM0jz2GOyc7QQZxspArqYiYJiP7YUpuJwYyrx3wFjmhBOGfjlDQi5clDcOT2yT4EQIv6YJujZBWTe8v8IuzKx+I+0/p9id2IG6o79LJ3e4jiKAp8V1/P08u0EQlEuG5HL3CuHkOSUj0QisUhm7GKOaAP17zyLa9g40qbdQ/Ona7EGKnv6snqEFABCiFjSFRtBZzb2SD31a/6EZ3QhaZNuwb99DSmXzMQ19BLq33kWh97x4UCaYVIXiKD14gnU4uup9Ud47OWtBEItG3eu/7SC7ScsTytEIpCyt4uFrSmkzVqEYXMTtSaRNb8fYVdWT1+WEELEjZDNR/pNPyVqT8abmoQj/1yCnnxc4/PwaM0nLUn8VY40hXl6+accKG+kX7aXb90wkqzk3r0cs+i8pmaNqG60OXawsonx5+f06tWzhIg16QnoBmFXNprVi4lC0JUrq+MIIUSMhVw5RC1urE4PAU8/DCxoVi9hV3aHHm8AL7+7jwPlLb0GByub+NOqnejymTDuZKY4yUhpu2P9qMEZUgCIhCM9AUIIIbpMc0SnrilMmteB2957vwAJajr9crxk+dz8c+shGgMR9h6sJ6zpMb1ui6lhD9cQcuVgC9eBqRNxxP9iEb2Jy27hh7eO4Z2PD1JWFWDapf0Ykv/1NogT4mwkRYAQQoh2NQajbN/fMlZ65KB0kl2d+5NRXhfkP/68mQZ/hJQkO//31jHkpbXdtEvBxNF0ECMpCzVQjeFIIWJLiVkMHVHjD/P4K9sorfTjsFu4ftJg/vbBAS49PyemBYCigOXw51T//Ql8M/4N/+aVmFGN5Gu/S1h1x+w84vSykh38y9ShKIqCfsLQICEShRQBQggh2vXKmr2s214BwOUjc7mzaHiHl/jVTZPn39xJg79lw8AGf4Q/rdzFD+ddgOW49RitejP+TSswQgG0qmLSr/8RdGMRoKoK728tp7TSD0A4ovP6+/u5c0YBg3Jjex2mCaQPwN7nXGqW/wYsVjLm3E/E4palk3tAy/AfeeJF4pI5AUIIIU4SNU32H2povX3gUCN6J8ZMa7pJebW/zbGyqia0EwbZR60e3COmEC79HFtWf4yk7l04QVUViivarh7UHIrSNzOp0z0fHWNiGvrRHzF1vQvOIYQQpydFgBBCiJPYVIW5Vw1FVUBV4OarhnRwF98WLpvKNeMGtDl2zbgBuGxt/+zY9Gb8m/5O+qwfojqSUJsOx+LyOywaNZhyUd82x0YOziAtyR7zcykKUFuCVrGPjBvuw9H/fPwfLcOmN8f8XEIIcToyHEgIIcRJTBNGnuPjoW+NByDda+/UXh+mCZNG5+NLcbJ93xFGDs5g5MD0k9qIqG6SCu9Bs3hwpZ9D2NL9Y+PP65fG9+eN5qPPDnNOXjIXDctqM2QpVkwT9KzzyLj1QcLOLJIm5YChE5H5AEKIHiBFgBBCiK+U7j3zb8TddguXnpvF5QU5p5x8GVE9YIJm8Zzxub4Om0Xh/P5pjBrowzDMLt3YUFds6I4sMCFiS+26EwkhxGlIESCEEKJLnS2rr+iyKYAQIoHInAAhhBBCCCESjBQBQgghhBBCJBgpAoQQQgghhEgwUgQIIYQQQgiRYKQIEEIIIYQQIsFIESCEEEIIIUSCkSJACCGEEEKIBHPW7ROgdmLb+rNVIsTYnkSMOxFjBok7Uc/fXRIlzuMlYsyQmHEnYsyQuHF3JcU0u3JvRCGEEEIIIURvI8OBhBBCCCGESDBSBAghhBBCCJFgpAgQQgghhBAiwUgRIIQQQgghRIKRIkAIIYQQQogEI0WAEEIIIYQQCUaKACGEEEIIIRKMFAFCCCGEEEIkGCkChBBCCCGESDBSBPQwv9/P9OnTKSsrA2D9+vVcd911FBYWsnTp0h6+uq7xxBNPUFRURFFREQ8//DCQGHE/9thjXHvttRQVFfHcc88BiRE3wEMPPcR9990HJEbM8+fPp6ioiJkzZzJz5ky2bduWEHH3hETLoZI/JX/Ge8ySP7uRKXrM1q1bzenTp5sFBQVmaWmpGQwGzYkTJ5oHDx40NU0zFyxYYK5du7anLzOm1q1bZ958881mOBw2I5GIedttt5lvvPFG3Me9YcMGc+7cuaamaWYwGDQnT55s7ty5M+7jNk3TXL9+vXnJJZeYP/rRjxLiNW4Yhjl+/HhT07TWY4kQd09ItBwq+VPyZ7zHLPmze0lPQA96+eWX+dnPfkZWVhYA27dvp3///vTt2xer1cp1113HqlWrevgqYyszM5P77rsPu92OzWZj0KBBFBcXx33cF198MX/84x+xWq3U1NSg6zqNjY1xH3d9fT1Lly7l7rvvBhLjNX7gwAEAFixYwIwZM3jhhRcSIu6ekGg5VPKn5M94j1nyZ/eSIqAH/fKXv+Siiy5qvV1VVUVmZmbr7aysLCorK3vi0rrMkCFDuOCCCwAoLi5m5cqVKIoS93ED2Gw2Hn/8cYqKihg3blxC/L5/+tOf8r3vfY/k5GQgMV7jjY2NjBs3jieffJI//OEPvPjii5SXl8d93D0h0XKo5E/Jn/Ees+TP7iVFQC9iGAaKorTeNk2zze14snfvXhYsWMCiRYvo27dvwsS9cOFCPvzwQyoqKiguLo7ruF955RVyc3MZN25c67FEeI2PHj2ahx9+GK/Xi8/nY86cOTz++ONxH3dvkAivL5D8KfmzRbzFDJI/u5u1py9AHJOTk0N1dXXr7erq6tZu7niyefNmFi5cyP33309RUREbN26M+7j3799PJBJh+PDhuFwuCgsLWbVqFRaLpfU+8Rb3m2++SXV1NTNnzqShoYHm5mYOHToU1zEDbNq0CU3TWv94m6ZJfn5+3L/Ge4NEyKGSPyV/HhVvMYPkz+4mPQG9yKhRo/jiiy8oKSlB13VWrFjBFVdc0dOXFVMVFRXce++9/OY3v6GoqAhIjLjLyspYvHgxkUiESCTC6tWrmTt3blzH/dxzz7FixQpef/11Fi5cyJQpU3jmmWfiOmaApqYmHn74YcLhMH6/n+XLl/P9738/7uPuDeI9l0j+lPwZzzGD5M/uJj0BvYjD4WDJkiV85zvfIRwOM3HiRKZNm9bTlxVTv//97wmHwyxZsqT12Ny5c+M+7okTJ7J9+3ZmzZqFxWKhsLCQoqIifD5fXMd9okR4jU+ePJlt27Yxa9YsDMPglltuYfTo0XEfd28Q768vyZ+SP+P9dy35s3sppmmaPX0RQgghhBBCiO4jw4GEEEIIIYRIMFIECCGEEEIIkWCkCBBCCCGEECLBSBEghBBCCCFEgpEiQAghhBBCiAQjRYAQQgghhBAJRooAEdcWLFhAbW3t177Phg0bmD59+mnPN2zYsHbbWr16NQ8++CAA8+fPZ9WqVZSVlTF69OjTtimEED1B8qcQ8U02CxNxbd26dTG5z9d15ZVXcuWVV3b5eYQQIlYkfwoR36QnQMStH//4xwDcfvvtbNy4kfnz53PdddcxY8YMXnvttZPuU1FRwZo1a5g7dy7XX389kyZN4tFHH+30eR999FFmz57NzJkzWbNmDQCvvvoqd911V0ziEkKIrib5U4j4Jz0BIm79+te/5tVXX+X555/npptuYtGiRRQWFlJZWcmNN95I//7929wnLS2NRYsWsWTJEgYMGEBlZSWTJ0/mtttu69R5+/TpwwMPPMCePXuYP38+K1eu7KIIhRCia0j+FCL+SREg4t7+/fsJh8MUFhYCkJ2dTWFhIR988EGbMaWKovD000+zdu1aVqxYwf79+zFNk2Aw2KnzzZs3D4ChQ4cyaNAgtmzZErtghBCiG0n+FCJ+yXAgEfcURUFRlDbHTNMkGo22Odbc3Mzs2bPZsWMH5513HosWLcJqtWKaZqfOp6rH3laGYWC1Sq0thDg7Sf4UIn5JESDimsViIT8/H6vVyttvvw1AZWUlb731FpdddlnrfaLRKCUlJfj9fr773e8yZcoUNmzYQCQSwTCMTp1z+fLlAOzYsYODBw8yatSo2AYlhBDdQPKnEPFNSmwR16ZNm8Ydd9zBU089xYMPPshvf/tbdF3n3nvv5dJLL229z/z583nssceYNGkS11xzDXa7naFDhzJ48GBKSkqw2+0dPmdpaSmzZs1CURQeeeQRUlNTuyg6IYToOpI/hYhvitnZvjohhBBCCCHEWU16AoTohGeeeYY33nij3f+78847mTFjRjdfkRBCnB0kfwrRu0hPgBBCCCGEEAlGJgYLIYQQQgiRYKQIEEIIIYQQIsFIESCEEEIIIUSCkSJACCGEEEKIBCNFgBBCCCGEEAnmfwEf1V+EKeAwDAAAAABJRU5ErkJggg==\",\n      \"text/plain\": [\n       \"<Figure size 777.475x360 with 2 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"sns.relplot(\\n\",\n    \"    data=pandas_tips,\\n\",\n    \"    x=\\\"total_bill\\\", y=\\\"tip\\\", col=\\\"time\\\", col_order=[\\\"Lunch\\\", \\\"Dinner\\\"],\\n\",\n    \"    hue=\\\"smoker\\\", style=\\\"smoker\\\", size=\\\"size\\\",\\n\",\n    \")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 31,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='tip'>\"\n      ]\n     },\n     \"execution_count\": 31,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4f0lEQVR4nO2de3wU5fX/P3tPdjebkBsESEADAVTuogaUi5KgxBCIAoGKWqWKl9JiLVDqr/bnCyva/kqlSu23+rWtV6AFEYsIilokFAUF1IKBcAmRkCy5bXazt+zO74/NDnuZ2Uv2Mrs75/16+ZLdnZnnnJnJc57nPM85R8IwDAOCIAhCdEiFFoAgCIIQBjIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEiRCy1AuLS3m+B0Bg5dyMnRorXVGCeJhEMsegLi0VUsegLi0VVoPaVSCfr103D+lnQGwOlkghoA93FiQCx6AuLRVSx6AuLRNVH1JBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYiUmBoAo9GI22+/HY2NjQCA2tpaVFZWory8HOvXr49l0wRBEEQQYmYAjh49ikWLFuHs2bMAAIvFgjVr1mDjxo3YuXMnvvnmG3z66aexap4gCCLxkAAGsx0NehMMlh5AIqw4MTMAmzdvxpNPPon8/HwAwLFjxzBkyBAUFhZCLpejsrISu3btilXzBEEQiYUEON7QiVUba/HrVw5i1Yv7cbyhU1AjEDMD8PTTT+Paa69lP7e0tCAvL4/9nJ+fj+bm5lg1TxAEkVAYuu3YsPkIrHYHAMBqd2DD5iMwdNsFkylu9QCcTickksumjmEYr8+hsvJPtWhpN3P+tqh8BBbPGgkAyMvL6JugSYZY9ATEo6tY9ATEo2teXgYuntKznb8bq92BbrsDxUXZgsgVNwMwYMAA6PV69rNer2fdQ+Hw3EOTAxZX0Ou7kJeXAb2+q09yJhNi0RMQj65i0RMQj65uPdUqOVQKmZcRUClkUCtkMb0PUqkEOTla7t9i1qoPY8eOxZkzZ3Du3Dk4HA689957mDp1aryaJwiCEBRduhzLF4yDSiED4Or8ly8YB51aIZhMcZsBqFQqrFu3Dj/+8Y9htVoxbdo03HrrrfFqniAIQlgYYFRRJp59eDI6TDZkaZSuzl/AapExNwB79+5l/11aWop333031k0SBEEkJgygS1dAl65gPwsJRQITBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQRDgkWFWvSIhbMjiCIIikp7eql7uwizuj56iiTMHz+vQFmgEQBEGESCJW9YqEpJsBBKoINmfKUMy96co4S0QQhFjoMNo4q3p1mGyXM3wmEUlnAIJVBCMIgogVWRkqzqpeWRqlgFL1HXIBEQRBhEgiVvWKhKSbARAEQQhGAlb1igQyAARBEOGQYFW9IoFcQARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIEcQAbN++HRUVFaioqMCzzz4rhAgEQRCiJ+4GwGw24+mnn8Zrr72G7du349ChQ6itrY23GARBEKIn7gbA4XDA6XTCbDajp6cHPT09UKlU8RaDIAhC9MQ9G6hWq8VPfvIT3HbbbUhPT8ekSZMwYcKEeItBEAQheiQMw8Q1memJEyewevVqvPLKK8jIyMDjjz+OMWPGYOnSpfEUgyAIQvTEfQbw2WefobS0FDk5OQCA6upqvPnmmyEbgNZWY9CSkHl5GdDruyKWNdERi56AeHQVi56AeHQVWk+pVIKcHC33b3GWBSNHjkRtbS26u7vBMAz27t2L0aNHx1sMgiAI0RP3GcCNN96I//73v6iuroZCocDo0aPxwAMPxFsMgiAI0SNIScgHHniAOn2CEAsSwNBtR4fRhqwMFXTp8qQuo5hKUE1ggiBihwQ43tCJDZuPwGp3QKWQYfmCcRhVlCm0ZAQoFQRBEDHE0G1nO38AsNod2LD5CAzddoElI4AknAGs/FMtWtrNnL/NmTIUc2+6Ms4SEQTBR4fRxnb+bqx2BzpMNoEkIjxJOgPw3EOTg24DJQgiMcjKUEGlkHkZAZVChiyNUkCpCDfkAiIIImbo0uVYvmAcVAoZALBrADq1QmDJCCAJZwAEQSQRDDCqKBPPPjwZHSYbsjRKV+dPk/iEgAwAQRCxhQF06Qro0hXsZyIxIBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQYgFCWAw29GgN8Fg6QEkQguUpKTQfaRIYIIQA4Hy8lNkbuik2H2kGQBBiADKyx8dUu0+Jt0MIFA9gJLCTKz+wcQ4S0QQiU+gvPxsjh4iKKl2H5POAFA9AKJPCFWXNkHq4VJe/uiQavcx6QwAQYSNUH7bBPIXu/Py+8pCqZnDI9Xuo4RhmKQSu7XVGHQGkJeXAb2+K04SCYdY9AQi09VgtmPVxlq/UduzD0+O6bS9L+3G9Jm6ZyMJkpc/ad/fMO+j0HpKpRLk5Gg5f0u6GQCtARDhIpTfNuH8xZSXPzqk0H1MOgNAawBEuAjlt001fzGRetA2UCLlEaouLdXDJRKdpJsBEETYCFWXlurhEgkOGQBCHAjlt00hfzGRepALiCAIQqQIYgD27t2L6upq3HbbbVi7dq0QIhBEapBCicmI+BN3F9D58+fx5JNPYsuWLcjJycE999yDTz/9FNOmTYu3KASR3CRQoBmRnMR9BrBnzx7Mnj0bAwYMgEKhwPr16zF27Nh4i0EQwhGlUXuqJSYj4k/cZwDnzp2DQqHAsmXL0NTUhOnTp+OnP/1pyOcHCgRbVD4Ci2eNBOCKvhMDYtETiL2uTieDpksmtBnMyNaloyBXA6k0uj4Vp5PBga+bsP6tL9lR+4pFE1A6uoBtK1Q9L57ScwaaddsdKC7KjqrcsUIs72+i6hl3A+BwOHDo0CG89tprUKvVeOihh7Bt2zZUV1eHdH6wQDC9vkvw0Ot4IRY9gTjoGid3isFsZzt/wNVhr3/rSwzo50oPEY6eapWcM9BMrZAlxXshlvdXaD0TKhVEbm4uSktLkZ3tGqHMnDkTx44dC9kABJoBzJkyFHNvujJqshLigc+dEu18QdFMD5FqicmI+BN3AzBjxgysWrUKBoMBGo0G+/btwy233BJvMQjCi3jl7YlqeggKNCMiJO4GYOzYsVi6dCkWL14Mu92OKVOm4I477gj5fMoFRPSZALn545W3J+qjdgo0IyKA0kEnMWLRE4iCrsF8/PHcUhkgnXBeXgb0l7oSoohMrBHL+yu0ngm1BkAQQhDUxx9Pd0qAUbvTydDefiJuJJ0BoEVgoi+E5ONPAHdK0yVTXBajCQKgXECESHD7+D1JxNz8bQYzr6EiiGiTdDMAWgQm+kKybJnM1qVTERkibiSdASCIPpEkWyYLcjVJYaiI1IAMACEeEsDHHwypVJIUhopIDcgAEESiEQ9DJQGMlh4YzD2wWHuQm5mWsttNCX5CNgCdnZ2QyWTQarn3kxJhECAgiUhQYvnMPK5tYyRQShHb90EC1F/oQqPeiLf31NF2UxET1ACcPn0aP//5z3H8+HFIJBKMHz8ezz33HAYOHBgP+VIPyuGefMTymXFc+9H5Y3H10CzAGQ3h/TF021F/wYBtn5yi7aYiJ6gB+MUvfoH58+fjjjvuAMMw2LRpE375y1/i1VdfjYd8fgSKAygpzMTqH0yMs0ThEa+kYylLKCPxaI3We69jtDoCP7MI2uN6H17YchRr7p2Ewlz15esEayMMGTqMNjgZhnO76aUuq7BrDuHcSyFm0ik2ew9qAMxmM2pqatjPS5YswebNm2MqVF+51GkRWoSgxCvpWEoSykg8WqN1j+tUTSvmf2ZqRUTt8b0Px8+2IVOtYA1MNNNYZGWoIJVIOLebnjrfCavVIcyMNBw9hJhJp+DsPWgg2JVXXokvv/yS/VxXV4fBgwfHVKi+cuPoAqFFCEqyBCQlIqFUwIpWlSzf6/A9s0jb43sfnE6wwV/B2ghXBl26HMUDdagpK2HbVilkWDxrBD784pxgVcXC0UOIamipWIEtqAG4cOEClixZgurqaixYsADV1dU4ceIEKisrUVlZGQ8ZQ+bd/WeFFiEo7oAkzz88dp83EZBAs6dwjgm3rb2HGrDQp7N0P7NI29Oly/Ho/LFe115YVoJ9RxrZQUGwNsKWgQGKB2Zg4og8/PKHk/Dj+eOwYOZw7PjsNC51WASLPA5Hj2g951jJlywEdQE9/vjj8ZAjZJI+EjhJApISkVBSNkcrrbPndS51WLCz9gzmTR+GYYMzkatTsc8s4vYY4OqhWVhz7yQcP9sGpxPYc/Ac7rp1VMht9EkGBtCq5HAywNOvfpEQkcfh6BGv9N1CtxlreGcA9fX1AACNRsP533XXXYfrrrsuboKmFL37vItyNWwmSiI4ocyeojXD8r1Ol8mOwnwtrhyg9XpmUWnPCRTmqnHDVf0xbkQuVt010cuvHKyNSGQQbEYqAb5vMaJBb4LB0gNIwpNFCLlTcfbOWw/gwQcfxJ///GdcddVVKCgogOdhZrMZBw4ciJuQnlA9gMuIRU/AQ9cAufRZQjkmFKRAq8GGVoMFObo05OiU3Fszo9UeAjzTYG1EIkMU5Q+1Pd7FVIQhS7zl7mObQv+dBqoHwGsAOjo6ALh2/bz22mtgGAYSiQR2ux133XUXPvjgg5gJHAgyAJcRi56AALoKtOND8Gcah22OBrMdqzbW+rlSUnUrtNDPtE8FYX72s59h//79kEgkKC0tZb+XyWSYNWtW9KUMEaoHQMQDUcZrxMno0VboxIHXALzyyisAXIFgzzzzTNwEIohEQIydVLyMXioupiYrQbeBUudPiJGEjdeQuFwonoun0SJe2xxTcTE1WUm6bKBJvw2USAoSsoBMjF00cRuZ926Ffv6x6bjYaqSt0AKSdAaAIOJCAsZrxNpFE1ejxwCD8rVQShj2MxF/ks4A0CJwgiJ0kqxot++x3U+TpoDRbAckEkGTf8V8XSIBjR4RW5LOAJALKAEROklWtNvnuN7CshI2Qleo5F9xcdEkQdU0InoEXQQmiGDELEmWx4Ln9y1G3gXPaLfPdb1Ne+pw07jBsUn+FaKetHhKRJukmwEEcgGlKWXY+Ni0OEtExMQ1EcaoPtrt810PkhhsBQ1n9kIuGiLKJJ0BIBdQ4hGWayJEX33ABU+1wusa2ZlpUXWN8OkDJvoul7AXdslFQ0QRQQ3As88+i/b2dqxbty7kc5K9IlgqEvLukSiM6i8ZrPi+tRvnm4348Itz6DLZsXzBODy+eAJ+9+aXUdm9wqWPew0g2rtixBhwRiQOghmAAwcOYNu2bZg+fXpY54l6BuAzes6J9X0IdWeNh2vCaLFDpZSj02hDU7sFGWoFtCrX6NlztJublYabJxbhUmc3WgxpsFh7kKW93AbXKLwgRw2b3YlzF7sglUhQPX0Ytn5yih0xB3SNhLNLyMfVkqFWwmp3oKh/BnIz07jP8Uwcl5mGnAyexHE+pERUrNA7wIg+I4gB6OjowPr167Fs2TKcOHEirHNFOwPgGD2vWDQBJYMyYvPHFu7OGgbQqRX4/lI3Nmz+gj2npqwEg/O0KB6YwY52c7PSMHvyFdhz8BzKrh+CJ//nP35t+I7CC3LUuGPGcDy/6Suva1dMvgJ/23kclwxWyKUS3nq5Ye8ScrtaQin5KAWO1rfhpa1fs8csqx6NscXZQY1AQgachYPQO8CIiBBkF9CvfvUrrFixAjqdLqrXHVnUL6rXSyS4fMXr3/oyZuXo+rKzhuuct/fUof6CAYZuOzvavXliEburZtOeOu42PEbhv156PZYvHI+/bP/G79r52WpXLdvGTvz6lYNY9eJ+HG/o9NpJE8kuoVDObTXY2M7ffcxLW79GqyGEFAo+ej7/2PSk6jxTsUyimIj7DGDLli0oKChAaWkptm7dGtVrv7v/LH5UPRaAKwVrKnHxlJ7TV9xtd6C4KDsh2uM7x8kw6LY7cPUVuVixaALONnV67aoJ1EZe7/eH/nuR81irzYGashL8q/YM+92GzUfw/GPTMShf22ddwrkPJy9e4Dym3WjFyCtyAl7fTV7wQxKSSN/LVPs75SNR9Yy7Adi5cyf0ej2qqqrQ2dmJ7u5u/OY3v8GaNWsivvacKUOh13cJnn87FqhVck5fsVohi4mufWmP7xypRAK1QobWViNKBmUgR6fCp182YuiAjJDbGJCj4Ty2IFeDN3efwKUOC/u91e7AxVYjm2YgbF08fNoatQIFOWo0tXbznpvN48fvp1WF/WyS7d2N5L1MNl37itB69qkgTDzYunUrPv/887B2AYm2IEyirwFIAKOlB2cudmHjP45xrgGw58mAIyfbsOWjOpRdP4R1AwVqIydHi8+OfO8nT2F/LX7+x88CFxcJRxeOY5dVj8bmD+vQ1Nod9TUAX5Lu3Y1gDSDpdO0jQutJBiBV8ClHd8XgfmhtNcatPd6FSY9OIEOjwOzJV2BgnhbadAV06XJo07wXZVuNNjzx5wNeu4GkUmBcSR7ydSrONvLyMqC/1OUvD0LsgELUha9a1doHS2Gy2PnPDbV8ZBDi8u7GMG9SOMFpKft36oPQevapIlg8qK6uRnV1dVjnBNoFlK1T4XcPT4mGaImJTxCQVBrFZPAhtMf3R+25EGjtcODvO4+zo3Ctyr9zaTVY2A72UocFmz+qAwAMLdAhP0MVtjwhRcfy6eLTGbZ2WTl92iaLHUW5Gv774ARytErkaJXs54QkFrt2KDgtaUm6SOBA3Di6QGgRREm4wUw5PJG7OTqePfbB6GsHxNEZPrZ4QvLvyw+AKEtdEryklAEg+kiELgG+oC1NmgIX2rqRnqaA2eaAxdqD3Mw05GQqserua1HX0A4nA+z7qhELZpZcdptwycMh7yWDFekqOXRaBRx2p/fxkuCBWVyBaY0tXVi5ZCJe3v6Nl88/afblB6HDJPLIYwpa8yKlDMCJhnahRUg+ouAS4AraWjCzBL9/60tU3nQlrDbXnn3P3zwXTB+ZPxbXXJEFOPjlycnW8sr7o6pr8M+PT3p12JAw2LDpaMBFWd/ANM/F6PvnXA2TxY7hhf0wsF9aynQSmjQF5wxHkyaOzp+C1rxJqXTQqRwI1idCqB8blUAej2CmtQ/egJ/UjEdzWzfuum0UDCY72/kDwE3jBvsFTb245SgMRntAeZoumXh//8v2b3DTuMFex59vNgUNzPINTPM8/pV3v4XV5mTTWPTl3iYiVlsPFpaVeKWUXlhWAqu9R2DJYg8FrfmTUjOAd/efpYpgbkIc7UQtGRlHKoiashI4GZ9gL57gL3d7fPK0dZkxQJfG68Lw7IDdwWe+x7QaLK5F2l43gNFsx6Pzx+J7vZHzmoX9tdyunyQeSWrVSuw5eA5VU4td94wB9hw8hwnDUzSFigeUeM+fpDMAok4GFwahLvZFMxmZb5tOBpBKJJzX52uPT57sjHRAwkDCcz3PjtcdfOYJu8js03kX5Kjx6Pxx2PZJvd81B+Wow09VneAdiS5djrtuHZW8uYciICUS70WZpDMAot4GGgahjnZCSkYW4sKZ0Wy/PLIE8NV3zZgxsRA1ZSWsG2jfV4340dxr8Jd3vmHbe3T+WOg0CsDJL0//bDVOnW/H2aZO3D/napeLpvf3B+aNxj96t5J6rgG4/9jdawAqpQxGS49X593U2o0XthzBo/PH4oUtR0PqFJN6JCniojJJn3gvBiSdAQgEb6peERLyaCdYhxDI3YHLhiE7Mw3tJhu2/7uePW5hWQn2fdWI++ZcgycKr4O+3Yz2Lgv+8/UFNieQ0wm8+cEJr1q7vqmYbT0O1H59ga0BoJBJsWLRBDS2dMFmd6J/dhpW3TXRW34JsHZZKZrbzGhsMeKND06gy2THQ3eMQYZGAWvH5fvS1NqNfhnKkDtFwUeSke5kEeu+fREbPz5SygDQIvBlwhrtBOgQ+Nwdv310Cs63mNjfaspGYNsnp7yO27SnDmvunYQcrRINvccCwIJbSrD+rS+9OlAvF0rveoK+w4Jjpy6xswe3UdlZewbr3/oSVVOLsf3f9Zh8TX9/+RlAIZXgjx6yA8Cf/nkM86YPw9t7vmO/Uylk0KYpQu4Uue7tsuoxcACubRV9DQKTAN+3GHHxkom/Yw/RINMWRx7Eavx4SCkD8K8D52gR2E1fRzs+o0uj2c7p7jCYe3z8/QzncRab61h32cYMjQL9s9VBXSiGbjvqLxg4jUrV1GJs/qgOUin65Kop7K/1cg+F7QZw39tHJqNRb0JjiwlvfHAcXSZ7n3MAhbqwHKpBTqaFaUI4UsoAKOQptas1csId7fR2Qq/vOo6bxg2GVAqMHJKNUUOyoO+0oGLyFcjPVoNhGNh7HFgwcziydWnI1KqgkEmguf0q7PjsNJuZU6WQocfhxNFTegwb3A+PLR4PQIKLrSYvF0puVhpmThqCHgcDg6UHOrUc3TYHBuVpUDWtGHsPNQAAbp5YBEiAoQUZGDUkCxNG5MNg8q885obPVTMoRx25G4AB7D0M/rj5qNf1X9r6NdY+WOq128g3oI1rlB7qwjKfUfM1yHFdmPbVUy2HwRTiTIQCswQlpQwAERmGbjte33XcL0Pnw3eOgcPhRKfRhtffP47Km66ExebA5g9PssfUlJUgTSnDHdOH4Z+fnEKXyY6lVdfgvX31mHR1AZ565SB77OJZI3BPxSj87V/HkaFRoGLyFV5uHt/sm/dUjIK9x4k3P/iOPebB6tFY/9aX7DFcWUd1ajmWVY/2y9J5ecE5MjeAZ04jN+x20wwl54heKZf61S4eVZQZ8sIyn1GzWHuEWZjm2FXlG+jHNxNxOpmk3U6bKqTUkFmdRvYsEjqMNs4qXRv/cQxpSgXe7q3iZTDZ2V087mPe3lOHTpMdnb1F2pcvHIetH5/E6GH5ftd784PvYLb2oGpqMX54+9VegWLuoC3PwC6Dyc52/u7v/uxzjLvymNHSwwZotRps2P2fs6iaWowFM0tcrqMP62AweQT+RBDQ5c5p5Il7uynfiL7+goEzEMndsftey3dh2b3+4BnItXzBOOTyyBLrhWlfPbkC/fiCrZoumTjvkZgDs+JNSvWYbQar0CIkNVkZKkil3IFaZlsPG3DF5+93B1/Zepw439zlKqLCE/hltTux+aM61JSVBA3sksskIQV/qZRSnG7qwp/+ebkegXvR2LNgDDsqjjCgKydDyTnDyNEp0eARicx1jzy/6zDZUJSnCW3Rnm9tBxBki6PfzCVIoJ8nbQZz8m6nTRGSzgBQIFjs0KXLMWpoNqeLIV0pZ0eYfMFd7uCr3AwVFL3Xcf/GFbilUshwxcDMgIFduVlpKOzPXTnMN/irfz8NWzQe8F80dh/nHhVHHNDlBMYWZ2Ptg6V+dQD4XDVcAWpZGiXbsT//2HRcbDUGXpuIJC12lOHTM5Qtstm6dArMEpikcwGt/FMt7lu3l/O/dW8cFlq85EMCGK09uNBhwemLRmRqlXhs0Xg/F4Na5fKz7/uqETqNAotnjfQ6pqasBJkaBYoH6qBTK1CYp8aj88fi3IUOVxCWx7FLq65BepoM86YPg9Fs87vWsurR2HekEQAwc9IQ/PW9b/3y1zx05xj2GHf7Ep7Rp7T3LfcaFSNwQFfI9NYBKBmocy389u7+4XPVFA/U+X3nlgcMMChfi6JcDbsdNix6DUOfz+8Dvnru+6rR73l76ehBQa6G8x5xHUvEBkErgvWF+9fu5o0EnjNlKObedKXgFXjiRcR6SoD6C11o1Bu9FmGXLxiHwnwN2rqsXi4Gl3+9Bw6HA9p0Jax2B0yWHqQpZZDLpdAoZd7Vv2TA0VNt2PxhHburqKSoH/Kz03Cp3QJNmgLdVjuUSjnkUglM5t6KWxoFDCZXumeHk8Ezf/uCTdfszl9TUpSF3EwVLnVakaaSQ5cuh5MBVr24329EyVfNy2Dp4Tw+ajtnuCplIXD1rKR8d3317H1+wWYivFXekqpHCo7QzzRhS0L2BVGXhPQhJ0eLM43tfd5CZzDb8Z//tnjttQei1wl6ln70vPb/feAG3tKPfjLydNLzpg/DDVfle8vYhzrGibYLRSzvLiAeXYXWM2FLQvaFQLmAZFIJ/rJyRpwlEggJcODrJjaiti+dV4fRxrvAGo2FOL5tkhf0JrR2WEKSVZcux0N3jOFc2B05JMtbxj4EvxXma/B/7r8eFlsPcjNUsRmB0l53IkFJOgNAi8AuDN12r3QKXguYagV/hyMF2o12mCx22BxODC/K4lyIS1PJcbrZiHSVnDPIyg+fKl0ZagXy+nEv8qmUMmzYdMTlmjHbA3eKDHDlQB3mTR/m2kHDADtrz6DLZOdeLAw1+I1n9K/rTakQNRJwliEKyOiGRNIZAMoG6oJvAdNosffm5OfocCTA8YYOtLSZWZ//qCFZeGDeaPzPtstbGZdWXYNzTZ3409ZveIOsvODo5NznLF841qsy19Kqa/DOpy6X0+HvWvD2nrqgnaJWJcPQAh3Wv/UlMjQKzJw0BIX9tYBEwq4JhEu8Ujonc+ropIWMbsgk3S6gQIgpGyhf4JBKIecNrmk12HC+2eQVeDV6WD7+8VGdV7DU1o9PQqtWsee7g6z4AnS4Ojn3Of2z1Vhz7yQ2OGz3wbM4eb4TKoUMzt4dM0EDgBigdHQBfvvoFNx16yhs++QUfvv6Yax6cT+ON3T2qRpXVHYAJVA7xGWo8lfopJQBqDvfKbQIcUOXLseKRRP8ttCZLNzJ2zpMrgLpfkFcElc65M0f1WHzh3XY/JErBYO7BKP7fCfD8HZafJ2ck2HQ2mnFH7ccQYfRig2bjrCd//1zrsbeww1+MvIhlUrgdDLsWoD7nL7+YYcaeRsp8WqHuAwZ3dBJOhdQIEoKo+y/TWR6R8UD+nkveBrMPazf3b11Uip1FQPXpCtwtqkrpMAdm93p9VkqkfB2WoGCntRpcnSZ7NhZe4YtFiOVSGC29uBSh8VPxkAunWgWYolXcRAqQhJ/BK/XkETQNtAkhlNPj4yevkndHl88AXanE82t3dh98BxuGjcYmnQZMjPS8Kd/XN5l45uMjXcNoHehzWzrgb7Tihc9KmrVlJUgNzMNQwfq0HDR6NUBLp41Agq5FDv2nfaTkc9Xm5eXgfrz7Xj2tUOuHEC9bp99XzVi1V0T++ZP59qnH4u/hjDaEcu7C8RQ1wRbAxD6maZUHAAFgl2GV08J0NrlvQffnXJ5eGEmMjRKtHaa8Ye3LmdwXFY9BgzD+AViuYOsAFeuJc+0xp6GJk0pg8XmQKZGhTSVDC1tLiOz6q6J0KkVuNDuKu4CBqzr54e3X40Nm474jdS4Fkjz8jKgb+3C0fo2v9w7AfPvJ9luELG8u0CMdY2XcQ8BoZ9pSsUBBOKzr5uoIAwAMIDJo5BLblYaZk++wmukXVNWwpZGbGrtxm/++sXljtfpsZWSZzRVmK/Bhs1HUDW12JVvZ5or06YvbveMNk2O7Z96F14/39wVlkvHYLL7ZZp8aevX/DtqEmwkSMQRqvwVEim1CNxt6RFahITBc/Hx5olFfimZ395T50qt0AvfIhnfjgqjxYEMjcIr+2OgxU6u3DijPBLGcZ3jS7iLewm1GySCtNMEESsEmQG88MILeP/99wEA06ZNw8qVK6Ny3aSrBxDIPRHMdSFxjaC/b+mCSimDWiWHxdYDbboSOrUcYBgsXzgO55uNIaVTdgd/ndEboU1TwGSxQ5OmgM3egzX3TkKbwQqVUoptn5xCe5cVHV0W/GDWKGTrlFi7rBQ2mwNX/3ASWjssSFPJoG83Y2CO5vLUuzdK97ePToHB3AOLtQeZGiUeXzzBr0CKV86cXv3teiMUCllYi3tGs51deAaAvYcacKnD4pUO2u0mcOclkstkoQW+hfksQ5qJuGsCt5qgSVPAauuBVq1MeLcVkbzEvcesra3FZ599hm3btkEikWDp0qXYs2cPysrKQjo/ZSKBgxT3Dthh8AReqZQy7Nh3GvNnlmCLxyLu6nuuDZia2H3+me872WpeC8tK8MW3Tbh50hC8vP0btp1H7hwDg8mG5zd5LuqORJpSiv/d8V8vebjgqlv720en+CWe49Kv9usLqCkr8Utcx+nflQDtRhu2/7veK4XEnoPnXAaD4x66fy+/fkjgwLdwnmWo5R4DyHPXraPIbUXEhLgvAp88eRImkwnjxo0DADz11FMYMmQI7rnnnpDOD7QIXFKYidU/mCj4oksoGMx2rNpYy7kAKpVKUPtNM1s8ZO+hBnSZ7GyHwXfuvOnD4HQy2P7veq8c+K4yfSPw0tbLO31+VHUNTBY7TJYedmG2y2Rnz1MpZFi+cJzfIm1N2QjO5HHzpg/D23u+8/vOM2FbIJ09ffh8x1VNLcbeww2YOWkIhg3ORK6OP3cP3zXW3DsJhblqGLr529j+73ruZHN9eJa6dAUa9Cb8+pWDfuf9eun1KMrVBNV5+7/rUzJyOBn+TqOB0Hom1CLw8OHD2X+fPXsW77//Pt56662oXLvufCfy8jIAgP1/onLxlJ7TLWN1OnGxpZvtZD2Tn3XbHSguyuY918kwl33yHu6dptZuKGQSVE0tRv8cNZpbu9FltuFv/zruL5jk8vUsNodfO8Gqgfl+55Y5kM6exwQ6DhLgUocFb+/5Dr95eLLXOb7wXUMulyIvNyNgG1yyByKYXjaGu4DOgBwt8vK0QXXmukepQqL/nUaLRNVTMKf5yZMn8eCDD2LlypUYOnRoVK6ZrVNBr+8S3OKGglol53bLMBJs/Id3tOumPXWYN30Y1AoZ9Pou/nMlrmhZlcK/WpZUKnHNDKYVs//nuob7PJVChjSlv889WDUw3+/cMgfS2fOYQMd5yuZ7Tqj3V6MMfA/dlcp8ZQ9EML2UUu5yjUopE/TeuOUJVZZkIhn+TqOB0HoGmgEIsgvo8OHDuPfee/Gzn/0M8+bNi9p1bxxdELVrxRq+ilF8qRwK+2td7g4JIJUAD90xxuvcmrIS6DQK7DvSiKVV1/hVy9r26SksnjUC+75qxMLeyl6+VbZqykqw93ADO+t4b189llZd43XM4HwNHpjr/d3iWSPRL0Ppdy13dTBWZ7Uca+6dhJqyEiyYWYKCHDVnBSiue+MpWyhVo/jur/s8rt8XlpVg35FGTtkjacszTfWvl16PZx+e7OfTDyQPVckiYkXc1wCampowb948rF+/HqWlpWGfn1KRwBzBKgZzgCpVagW7UJihUWD25CtQkKtBmlIGhVwKm92JuoYOHD5xERNHDkD/bDW6La5dKRs2He0twO6ESilFfj81urqtGJibgW6LHTqtCjIZ0GWyQ5uuQLelB+o0ORjGCYlEik6jDdp0BV5//7/Qd1rYjJyD8tSw9zDoNFqhVSthMNmQrpIjU63wrg7Gscj56PyxuHpoFncQl8+9SU9XoulSkFq5Idxfrkhmz11AMpkMunS5t+x9fJZhL9pKAJtTgoutRtcuIHsPtGmKlE0bkTR/pxEitJ4JFQm8du1a/POf/0RR0eU96DU1NVi0aFFI56fKIjAvAXaUBFq43PxRHX5+10TkZ6Xj8HctcDovR9zOnnwFCvO1rjz8m4/gUoeFPT83Kw133TrKq+AKb7BUbydntNihUshdZRa1oUXXhroAzEdSP9MwEIuegHh0FVrPhFoEfuKJJ/DEE0/E5NqXOi3BD0p0AlS14guESk+ToaZsBKw2Byx2Bz78ooFNtMYVAfyv2jOsEZg5aQg27fnOa7/867uOc+fXYQCdWtFbb+CLsKJro5nIjSCI6JBkkVMigSeMnSvLYUGOGtp0Jd7e/Y1fJ88XAezesqlSyFA8SAelwjsh28KyEhgtdu50DH0scEIZGgki8SADkGhwlFZ0R6Xq1HI8Mn+sV9bNpXOuwXOvH0aGRoGqicW92wadWDZ3NCw2B2ckbPHgTKy++1rk6FwFdH73hndpyU176rD2Qe71mUsGq1eOoZsnFgESwGjpCeirDpoWOZmStiWTrAQRgJQyAEmXC6i343SnRsjNTIO+3eyVGoFNxTwoA8fPdaLdYL7cqTPA+ZYudkHYcxT/o7nXQCmXckbC1jd2sjOAn9aM53TNmCx25Gj9R+fpvdsVfdvc/mkQV1Cggu3JlLQtmWQliCAkXTrolNkFJAHqL3ShUW/0Sm2weNZI7PisnvXRuyNqJ47IwxN/PuDax++RVXPlXRPR0GwMOTp3xaIJePndr9nr80X28rl0jDYHDp9ogdXuDOu8QPehtcvmWrhmLs9SfK+VKM800sXsYCSKnvFALLoKrWdCLQJHSqCi8ADwv6tvjqM0fcfQbUf9BYNXJ2q1O/DmBye80ji4o1JbDRZY7Q589V0z7p9zNV5591tY7Q7oO7rRP1sdcnRuY0uX1y6gD784h4fuGOO3C4jPnaNVyTA4T4suM3/pyZA7Qp78Nzt7F6kTcYGYFrOJVCLpDEAgkqkkZIfRxptWwS9Lp1KGHF0aCnLUKB09ENs+OYWashL0z1ZDJpcCjGsxuKm12+s8rujcHod3r95lsuPKggxu1wwXDFA8MAOtXbaIF3W5FpQ37alj898k4gIxLWYTqURK1QMYWdRPaBFCJitDxaZV8IQrS2f/7HTkZCqxrHoMNu2pg93hBAPg+U1H8Mxfv8Dv3/wSd8wYjoIcNXve4lkjkKlR+ETs+n+3fME4aNPk0KUrUJSrcY1igzkFGSAnQxk4+jUE+EbT0t7UCYkY/Ro06pcgkoiUmgEkE7p0OYoH6vzSGz+6YCzaOi1YMLMEYIB/1Z5hM4EyvTOGqonFfts7/7L9G6y5dxLau6wYkKNGT48DOrWSHdm7I0s9v4uoVF6gRd0Q4RtNTxyRj5wMZWIuqkZB75hBu5OIMEkpA5BUJSF7XSn9s9MxvKifaxeQTgWj2Y7/98aXfod3mGzI0vZW+fKowuXGanfg2zNt2PxhnVeaYQAevunLbgqdWgFDtx0NLaa+dxYRlt3j2xqasJ2/m0QsN0i7k4g+kHQGIGUKwgAAA2hVcmhVHo+BJ9tmlkbJdpjnW4wBM0cG9UcnSmeRyKPpJKOvAXqEuEk6AxBoF9CcKUOjMwOIdCodQjlHvt+DBUyNKspEYb4GA3LUXjt33Hv8A+3gcWO09OB8ixFV04oBuLZeRr2zCPUeJuJoOgmh3UlEX0g6AxBzIh0dBzs/2O/BRsW9s4YxV/TD849N98ocOWH4xOAjaAlwuqmLs+BM1DqLRJlhiAjanUT0BQoE8yHSQJ9g50czkKgvegYqJxlqCcS+thHJDEPoYJp40Wc9k9Do0jOND6IJBJNJJfjLyhkRXT/SqXSw84WeqvO1zxaciUJnIbSOooTWU4g+kHQGINaLwJFOpYOdL/RUna/9QTnqqHUWQusoWmg9hQiTlAoEiwaRBvr0pRRhPAOJ4tG+0DoSBBEatAbARaTl/cIoRRjJVD0Sf3E02o9nG0L7UeOFWPQExKOr0Hqm1BpAXIh0Kh3sfKGn6vFoX2gdCYIICrmACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRIogBmDHjh2YPXs2ysvL8cYbbwghAkEQhOiJeyqI5uZmrF+/Hlu3boVSqURNTQ2uv/56DBs2LN6iEARBiJq4zwBqa2txww03ICsrC2q1GrNmzcKuXbviLQZBEIToibsBaGlpQV5eHvs5Pz8fzc3N8RaDIAhC9MTdBeR0OiGRSNjPDMN4fQ4GX1pTX/LyMsKWLRkRi56AeHQVi56AeHRNVD3jbgAGDBiAQ4cOsZ/1ej3y8/NDPj8u9QCSBLHoCYhHV7HoCYhHV6H1DFQPIO4uoMmTJ+PAgQNoa2uD2WzG7t27MXXq1HiLQRAEIXriPgPo378/VqxYgbvvvht2ux133nknxowZE28xCIIgRI8gFcEqKytRWVkpRNMEQRBELxQJTBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUQbaBJgvv7DuNuTddGbPz171xGKt/MBHr3jiMkUX9vI59Z99pfPZ1E24cXeB3jXVvHAYATBw1AB/85yzaDVZUThnKHu/ms6+b8LuHp+Dxjftx4+gCnGhox8iifgCA3V+cBwAU9dfiUqcFuZlpqDvfiTlThmLuTVfi8Y37kZuZBgAYWdQPJxra2X+7r/vw7z9F+aRC9lrlkwrxrwPnAADFg3TssTeOLsBnXzcBAPvvbksPyicVYsf+s36yu/V16+nW9fDxi6ysDc1GFPXXsjLNvelKr/v4zr7TAMDq7JbZ9xj3eb56rv7BRL9ruHHL5/7ds213O77vgSee8nk+23VvHGb1dL8Xq38w0e9aXOd7fn+iod3vPK5ruGX2lMn3euHgK6/vNbiu6Sl3sGOD6RPOb9H42w52n4OdH0n70YIMQADe3X82oocU7Py6853s/+vOd3od++7+s7zX8DyP63hf2gxW9nvPczw/txmsXu21Gazsd57neP7bYnN4tef5b7dOvt9z/dv3/259+dr1lct97zzvo68svnK5j3Gfx3Vv+K7hls9TXk99ffF9Jp7yeT5b32twXc8ts+/5vt8HwlNvdxue98PzmHDgun++77TvNT3lDnZsMH3C+S0af9uREGn70YJcQARBECKFDABBEIRISToXkFQaWuroUI8LRH6/9IiuE+x89+/5/dIBeMvs/s73e9/fguF5/Wie05frhtM+0Hc9+WTjOiaUY4PJ53tssOcV6jPnen/43plA75IvXHr7tteXd9/3nGCffWUJdmyo7YbyWzT+toG+90eRth8OgdqRMAwTOLcyQRAEkZKQC4ggCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkpJQB2LFjB2bPno3y8nK88cYbQosTdYxGI26//XY0NjYCAGpra1FZWYny8nKsX79eYOmixwsvvICKigpUVFTgueeeA5C6uj7//POYPXs2Kioq8OqrrwJIXV0B4Nlnn8Xq1asBpKaeS5YsQUVFBaqqqlBVVYWjR48mtp5MinDx4kVmxowZTHt7O2MymZjKykrm5MmTQosVNY4cOcLcfvvtzNVXX82cP3+eMZvNzLRp05iGhgbGbrcz9913H/PJJ58ILWbE7N+/n1m4cCFjtVoZm83G3H333cyOHTtSUteDBw8yNTU1jN1uZ8xmMzNjxgzm+PHjKakrwzBMbW0tc/311zOrVq1KyffX6XQyN954I2O329nvEl3PlJkB1NbW4oYbbkBWVhbUajVmzZqFXbt2CS1W1Ni8eTOefPJJ5OfnAwCOHTuGIUOGoLCwEHK5HJWVlSmhb15eHlavXg2lUgmFQoHi4mKcPXs2JXW97rrr8Pe//x1yuRytra1wOBwwGAwpqWtHRwfWr1+PZcuWAUjN9/f0aVeRnvvuuw9z5szB66+/nvB6powBaGlpQV5eHvs5Pz8fzc3NAkoUXZ5++mlce+217OdU1Xf48OEYN24cAODs2bN4//33IZFIUlJXAFAoFNiwYQMqKipQWlqass/1V7/6FVasWAGdTgcgNd9fg8GA0tJSvPjii/jrX/+Kt99+GxcuXEhoPVPGADidTkgkl9OeMgzj9TnVSHV9T548ifvuuw8rV65EYWFhSuu6fPlyHDhwAE1NTTh79mzK6bplyxYUFBSgtLSU/S4V39/x48fjueeeQ0ZGBrKzs3HnnXdiw4YNCa1n0tUD4GPAgAE4dOgQ+1mv17PuklRkwIAB0Ov17OdU0vfw4cNYvnw51qxZg4qKCnz++ecpqWt9fT1sNhtGjRqF9PR0lJeXY9euXZDJZOwxqaDrzp07odfrUVVVhc7OTnR3d+P7779POT0PHToEu93OGjqGYTBo0KCEfndTZgYwefJkHDhwAG1tbTCbzdi9ezemTp0qtFgxY+zYsThz5gzOnTsHh8OB9957LyX0bWpqwiOPPILf/e53qKioAJC6ujY2NuKJJ56AzWaDzWbDRx99hJqampTT9dVXX8V7772H7du3Y/ny5bj55pvx8ssvp5yeXV1deO6552C1WmE0GrFt2zY89thjCa1nyswA+vfvjxUrVuDuu++G3W7HnXfeiTFjxggtVsxQqVRYt24dfvzjH8NqtWLatGm49dZbhRYrYl555RVYrVasW7eO/a6mpiYldZ02bRqOHTuGuXPnQiaToby8HBUVFcjOzk45XX1Jxfd3xowZOHr0KObOnQun04nFixdj/PjxCa0nVQQjCIIQKSnjAiIIgiDCgwwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQIiS++67D21tbREfc/DgQdx+++1B2xsxYgTntT766COsXbsWgCuV8K5du9DY2Ijx48cHvSZBRErKBIIRRDjs378/KsdEyi233IJbbrkl5u0QBBc0AyBExy9+8QsAwD333IPPP/8cS5YsQWVlJebMmYN33nnH75impiZ8/PHHqKmpQXV1NaZPn44//OEPYbf7hz/8AfPmzUNVVRU+/vhjAMDWrVvx4IMPRkUvgggXmgEQouOZZ57B1q1b8be//Q0LFizAypUrUV5ejubmZsyfPx9DhgzxOqZfv35YuXIl1q1bh6FDh6K5uRkzZszA3XffHVa7gwcPxlNPPYW6ujosWbIE77//fow0JIjQIANAiJb6+npYrVaUl5cDcOWTKi8vx759+7x88BKJBC+99BI++eQTvPfee6ivrwfDMDCbzWG1t2jRIgBASUkJiouL8dVXX0VPGYLoA+QCIkSLRCLxy83OMAx6enq8vuvu7sa8efPw7bff4qqrrsLKlSshl8sRbhotqfTyn5vT6YRcTuMvQljIABCiRCaTYdCgQZDL5di9ezcAoLm5GR988AEmT57MHtPT04Nz587BaDTipz/9KW6++WYcPHgQNpsNTqczrDa3bdsGAPj222/R0NCAsWPHRlcpgggTGoIQouTWW2/Fvffei40bN2Lt2rX44x//CIfDgUceeQQ33HADe8ySJUvw/PPPY/r06bjtttugVCpRUlKCYcOG4dy5c1AqlSG3ef78ecydOxcSiQS///3vkZWVFSPtCCI0KB00QRCESKEZAEFEgZdffhk7duzg/O3+++/HnDlz4iwRQQSHZgAEQRAihRaBCYIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRMr/B0x5tiafPYALAAAAAElFTkSuQmCC\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"sns.scatterplot(data=modin_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\\n\",\n    \"sns.rugplot(data=modin_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 32,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='tip'>\"\n      ]\n     },\n     \"execution_count\": 32,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4f0lEQVR4nO2de3wU5fX/P3tPdjebkBsESEADAVTuogaUi5KgxBCIAoGKWqWKl9JiLVDqr/bnCyva/kqlSu23+rWtV6AFEYsIilokFAUF1IKBcAmRkCy5bXazt+zO74/NDnuZ2Uv2Mrs75/16+ZLdnZnnnJnJc57nPM85R8IwDAOCIAhCdEiFFoAgCIIQBjIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEiRCy1AuLS3m+B0Bg5dyMnRorXVGCeJhEMsegLi0VUsegLi0VVoPaVSCfr103D+lnQGwOlkghoA93FiQCx6AuLRVSx6AuLRNVH1JBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYiUmBoAo9GI22+/HY2NjQCA2tpaVFZWory8HOvXr49l0wRBEEQQYmYAjh49ikWLFuHs2bMAAIvFgjVr1mDjxo3YuXMnvvnmG3z66aexap4gCCLxkAAGsx0NehMMlh5AIqw4MTMAmzdvxpNPPon8/HwAwLFjxzBkyBAUFhZCLpejsrISu3btilXzBEEQiYUEON7QiVUba/HrVw5i1Yv7cbyhU1AjEDMD8PTTT+Paa69lP7e0tCAvL4/9nJ+fj+bm5lg1TxAEkVAYuu3YsPkIrHYHAMBqd2DD5iMwdNsFkylu9QCcTickksumjmEYr8+hsvJPtWhpN3P+tqh8BBbPGgkAyMvL6JugSYZY9ATEo6tY9ATEo2teXgYuntKznb8bq92BbrsDxUXZgsgVNwMwYMAA6PV69rNer2fdQ+Hw3EOTAxZX0Ou7kJeXAb2+q09yJhNi0RMQj65i0RMQj65uPdUqOVQKmZcRUClkUCtkMb0PUqkEOTla7t9i1qoPY8eOxZkzZ3Du3Dk4HA689957mDp1aryaJwiCEBRduhzLF4yDSiED4Or8ly8YB51aIZhMcZsBqFQqrFu3Dj/+8Y9htVoxbdo03HrrrfFqniAIQlgYYFRRJp59eDI6TDZkaZSuzl/AapExNwB79+5l/11aWop333031k0SBEEkJgygS1dAl65gPwsJRQITBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQhEghA0AQBCFSyAAQBEGIFDIABEEQIoUMAEEQRDgkWFWvSIhbMjiCIIikp7eql7uwizuj56iiTMHz+vQFmgEQBEGESCJW9YqEpJsBBKoINmfKUMy96co4S0QQhFjoMNo4q3p1mGyXM3wmEUlnAIJVBCMIgogVWRkqzqpeWRqlgFL1HXIBEQRBhEgiVvWKhKSbARAEQQhGAlb1igQyAARBEOGQYFW9IoFcQARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIIQNAEAQhUsgAEARBiBQyAARBECKFDABBEIRIEcQAbN++HRUVFaioqMCzzz4rhAgEQRCiJ+4GwGw24+mnn8Zrr72G7du349ChQ6itrY23GARBEKIn7gbA4XDA6XTCbDajp6cHPT09UKlU8RaDIAhC9MQ9G6hWq8VPfvIT3HbbbUhPT8ekSZMwYcKEeItBEAQheiQMw8Q1memJEyewevVqvPLKK8jIyMDjjz+OMWPGYOnSpfEUgyAIQvTEfQbw2WefobS0FDk5OQCA6upqvPnmmyEbgNZWY9CSkHl5GdDruyKWNdERi56AeHQVi56AeHQVWk+pVIKcHC33b3GWBSNHjkRtbS26u7vBMAz27t2L0aNHx1sMgiAI0RP3GcCNN96I//73v6iuroZCocDo0aPxwAMPxFsMgiAI0SNIScgHHniAOn2CEAsSwNBtR4fRhqwMFXTp8qQuo5hKUE1ggiBihwQ43tCJDZuPwGp3QKWQYfmCcRhVlCm0ZAQoFQRBEDHE0G1nO38AsNod2LD5CAzddoElI4AknAGs/FMtWtrNnL/NmTIUc2+6Ms4SEQTBR4fRxnb+bqx2BzpMNoEkIjxJOgPw3EOTg24DJQgiMcjKUEGlkHkZAZVChiyNUkCpCDfkAiIIImbo0uVYvmAcVAoZALBrADq1QmDJCCAJZwAEQSQRDDCqKBPPPjwZHSYbsjRKV+dPk/iEgAwAQRCxhQF06Qro0hXsZyIxIBcQQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQYgFCWAw29GgN8Fg6QEkQguUpKTQfaRIYIIQA4Hy8lNkbuik2H2kGQBBiADKyx8dUu0+Jt0MIFA9gJLCTKz+wcQ4S0QQiU+gvPxsjh4iKKl2H5POAFA9AKJPCFWXNkHq4VJe/uiQavcx6QwAQYSNUH7bBPIXu/Py+8pCqZnDI9Xuo4RhmKQSu7XVGHQGkJeXAb2+K04SCYdY9AQi09VgtmPVxlq/UduzD0+O6bS9L+3G9Jm6ZyMJkpc/ad/fMO+j0HpKpRLk5Gg5f0u6GQCtARDhIpTfNuH8xZSXPzqk0H1MOgNAawBEuAjlt001fzGRetA2UCLlEaouLdXDJRKdpJsBEETYCFWXlurhEgkOGQBCHAjlt00hfzGRepALiCAIQqQIYgD27t2L6upq3HbbbVi7dq0QIhBEapBCicmI+BN3F9D58+fx5JNPYsuWLcjJycE999yDTz/9FNOmTYu3KASR3CRQoBmRnMR9BrBnzx7Mnj0bAwYMgEKhwPr16zF27Nh4i0EQwhGlUXuqJSYj4k/cZwDnzp2DQqHAsmXL0NTUhOnTp+OnP/1pyOcHCgRbVD4Ci2eNBOCKvhMDYtETiL2uTieDpksmtBnMyNaloyBXA6k0uj4Vp5PBga+bsP6tL9lR+4pFE1A6uoBtK1Q9L57ScwaaddsdKC7KjqrcsUIs72+i6hl3A+BwOHDo0CG89tprUKvVeOihh7Bt2zZUV1eHdH6wQDC9vkvw0Ot4IRY9gTjoGid3isFsZzt/wNVhr3/rSwzo50oPEY6eapWcM9BMrZAlxXshlvdXaD0TKhVEbm4uSktLkZ3tGqHMnDkTx44dC9kABJoBzJkyFHNvujJqshLigc+dEu18QdFMD5FqicmI+BN3AzBjxgysWrUKBoMBGo0G+/btwy233BJvMQjCi3jl7YlqeggKNCMiJO4GYOzYsVi6dCkWL14Mu92OKVOm4I477gj5fMoFRPSZALn545W3J+qjdgo0IyKA0kEnMWLRE4iCrsF8/PHcUhkgnXBeXgb0l7oSoohMrBHL+yu0ngm1BkAQQhDUxx9Pd0qAUbvTydDefiJuJJ0BoEVgoi+E5ONPAHdK0yVTXBajCQKgXECESHD7+D1JxNz8bQYzr6EiiGiTdDMAWgQm+kKybJnM1qVTERkibiSdASCIPpEkWyYLcjVJYaiI1IAMACEeEsDHHwypVJIUhopIDcgAEESiEQ9DJQGMlh4YzD2wWHuQm5mWsttNCX5CNgCdnZ2QyWTQarn3kxJhECAgiUhQYvnMPK5tYyRQShHb90EC1F/oQqPeiLf31NF2UxET1ACcPn0aP//5z3H8+HFIJBKMHz8ezz33HAYOHBgP+VIPyuGefMTymXFc+9H5Y3H10CzAGQ3h/TF021F/wYBtn5yi7aYiJ6gB+MUvfoH58+fjjjvuAMMw2LRpE375y1/i1VdfjYd8fgSKAygpzMTqH0yMs0ThEa+kYylLKCPxaI3We69jtDoCP7MI2uN6H17YchRr7p2Ewlz15esEayMMGTqMNjgZhnO76aUuq7BrDuHcSyFm0ik2ew9qAMxmM2pqatjPS5YswebNm2MqVF+51GkRWoSgxCvpWEoSykg8WqN1j+tUTSvmf2ZqRUTt8b0Px8+2IVOtYA1MNNNYZGWoIJVIOLebnjrfCavVIcyMNBw9hJhJp+DsPWgg2JVXXokvv/yS/VxXV4fBgwfHVKi+cuPoAqFFCEqyBCQlIqFUwIpWlSzf6/A9s0jb43sfnE6wwV/B2ghXBl26HMUDdagpK2HbVilkWDxrBD784pxgVcXC0UOIamipWIEtqAG4cOEClixZgurqaixYsADV1dU4ceIEKisrUVlZGQ8ZQ+bd/WeFFiEo7oAkzz88dp83EZBAs6dwjgm3rb2HGrDQp7N0P7NI29Oly/Ho/LFe115YVoJ9RxrZQUGwNsKWgQGKB2Zg4og8/PKHk/Dj+eOwYOZw7PjsNC51WASLPA5Hj2g951jJlywEdQE9/vjj8ZAjZJI+EjhJApISkVBSNkcrrbPndS51WLCz9gzmTR+GYYMzkatTsc8s4vYY4OqhWVhz7yQcP9sGpxPYc/Ac7rp1VMht9EkGBtCq5HAywNOvfpEQkcfh6BGv9N1CtxlreGcA9fX1AACNRsP533XXXYfrrrsuboKmFL37vItyNWwmSiI4ocyeojXD8r1Ol8mOwnwtrhyg9XpmUWnPCRTmqnHDVf0xbkQuVt010cuvHKyNSGQQbEYqAb5vMaJBb4LB0gNIwpNFCLlTcfbOWw/gwQcfxJ///GdcddVVKCgogOdhZrMZBw4ciJuQnlA9gMuIRU/AQ9cAufRZQjkmFKRAq8GGVoMFObo05OiU3Fszo9UeAjzTYG1EIkMU5Q+1Pd7FVIQhS7zl7mObQv+dBqoHwGsAOjo6ALh2/bz22mtgGAYSiQR2ux133XUXPvjgg5gJHAgyAJcRi56AALoKtOND8Gcah22OBrMdqzbW+rlSUnUrtNDPtE8FYX72s59h//79kEgkKC0tZb+XyWSYNWtW9KUMEaoHQMQDUcZrxMno0VboxIHXALzyyisAXIFgzzzzTNwEIohEQIydVLyMXioupiYrQbeBUudPiJGEjdeQuFwonoun0SJe2xxTcTE1WUm6bKBJvw2USAoSsoBMjF00cRuZ926Ffv6x6bjYaqSt0AKSdAaAIOJCAsZrxNpFE1ejxwCD8rVQShj2MxF/ks4A0CJwgiJ0kqxot++x3U+TpoDRbAckEkGTf8V8XSIBjR4RW5LOAJALKAEROklWtNvnuN7CshI2Qleo5F9xcdEkQdU0InoEXQQmiGDELEmWx4Ln9y1G3gXPaLfPdb1Ne+pw07jBsUn+FaKetHhKRJukmwEEcgGlKWXY+Ni0OEtExMQ1EcaoPtrt810PkhhsBQ1n9kIuGiLKJJ0BIBdQ4hGWayJEX33ABU+1wusa2ZlpUXWN8OkDJvoul7AXdslFQ0QRQQ3As88+i/b2dqxbty7kc5K9IlgqEvLukSiM6i8ZrPi+tRvnm4348Itz6DLZsXzBODy+eAJ+9+aXUdm9wqWPew0g2rtixBhwRiQOghmAAwcOYNu2bZg+fXpY54l6BuAzes6J9X0IdWeNh2vCaLFDpZSj02hDU7sFGWoFtCrX6NlztJublYabJxbhUmc3WgxpsFh7kKW93AbXKLwgRw2b3YlzF7sglUhQPX0Ytn5yih0xB3SNhLNLyMfVkqFWwmp3oKh/BnIz07jP8Uwcl5mGnAyexHE+pERUrNA7wIg+I4gB6OjowPr167Fs2TKcOHEirHNFOwPgGD2vWDQBJYMyYvPHFu7OGgbQqRX4/lI3Nmz+gj2npqwEg/O0KB6YwY52c7PSMHvyFdhz8BzKrh+CJ//nP35t+I7CC3LUuGPGcDy/6Suva1dMvgJ/23kclwxWyKUS3nq5Ye8ScrtaQin5KAWO1rfhpa1fs8csqx6NscXZQY1AQgachYPQO8CIiBBkF9CvfvUrrFixAjqdLqrXHVnUL6rXSyS4fMXr3/oyZuXo+rKzhuuct/fUof6CAYZuOzvavXliEburZtOeOu42PEbhv156PZYvHI+/bP/G79r52WpXLdvGTvz6lYNY9eJ+HG/o9NpJE8kuoVDObTXY2M7ffcxLW79GqyGEFAo+ej7/2PSk6jxTsUyimIj7DGDLli0oKChAaWkptm7dGtVrv7v/LH5UPRaAKwVrKnHxlJ7TV9xtd6C4KDsh2uM7x8kw6LY7cPUVuVixaALONnV67aoJ1EZe7/eH/nuR81irzYGashL8q/YM+92GzUfw/GPTMShf22ddwrkPJy9e4Dym3WjFyCtyAl7fTV7wQxKSSN/LVPs75SNR9Yy7Adi5cyf0ej2qqqrQ2dmJ7u5u/OY3v8GaNWsivvacKUOh13cJnn87FqhVck5fsVohi4mufWmP7xypRAK1QobWViNKBmUgR6fCp182YuiAjJDbGJCj4Ty2IFeDN3efwKUOC/u91e7AxVYjm2YgbF08fNoatQIFOWo0tXbznpvN48fvp1WF/WyS7d2N5L1MNl37itB69qkgTDzYunUrPv/887B2AYm2IEyirwFIAKOlB2cudmHjP45xrgGw58mAIyfbsOWjOpRdP4R1AwVqIydHi8+OfO8nT2F/LX7+x88CFxcJRxeOY5dVj8bmD+vQ1Nod9TUAX5Lu3Y1gDSDpdO0jQutJBiBV8ClHd8XgfmhtNcatPd6FSY9OIEOjwOzJV2BgnhbadAV06XJo07wXZVuNNjzx5wNeu4GkUmBcSR7ydSrONvLyMqC/1OUvD0LsgELUha9a1doHS2Gy2PnPDbV8ZBDi8u7GMG9SOMFpKft36oPQevapIlg8qK6uRnV1dVjnBNoFlK1T4XcPT4mGaImJTxCQVBrFZPAhtMf3R+25EGjtcODvO4+zo3Ctyr9zaTVY2A72UocFmz+qAwAMLdAhP0MVtjwhRcfy6eLTGbZ2WTl92iaLHUW5Gv774ARytErkaJXs54QkFrt2KDgtaUm6SOBA3Di6QGgRREm4wUw5PJG7OTqePfbB6GsHxNEZPrZ4QvLvyw+AKEtdEryklAEg+kiELgG+oC1NmgIX2rqRnqaA2eaAxdqD3Mw05GQqserua1HX0A4nA+z7qhELZpZcdptwycMh7yWDFekqOXRaBRx2p/fxkuCBWVyBaY0tXVi5ZCJe3v6Nl88/afblB6HDJPLIYwpa8yKlDMCJhnahRUg+ouAS4AraWjCzBL9/60tU3nQlrDbXnn3P3zwXTB+ZPxbXXJEFOPjlycnW8sr7o6pr8M+PT3p12JAw2LDpaMBFWd/ANM/F6PvnXA2TxY7hhf0wsF9aynQSmjQF5wxHkyaOzp+C1rxJqXTQqRwI1idCqB8blUAej2CmtQ/egJ/UjEdzWzfuum0UDCY72/kDwE3jBvsFTb245SgMRntAeZoumXh//8v2b3DTuMFex59vNgUNzPINTPM8/pV3v4XV5mTTWPTl3iYiVlsPFpaVeKWUXlhWAqu9R2DJYg8FrfmTUjOAd/efpYpgbkIc7UQtGRlHKoiashI4GZ9gL57gL3d7fPK0dZkxQJfG68Lw7IDdwWe+x7QaLK5F2l43gNFsx6Pzx+J7vZHzmoX9tdyunyQeSWrVSuw5eA5VU4td94wB9hw8hwnDUzSFigeUeM+fpDMAok4GFwahLvZFMxmZb5tOBpBKJJzX52uPT57sjHRAwkDCcz3PjtcdfOYJu8js03kX5Kjx6Pxx2PZJvd81B+Wow09VneAdiS5djrtuHZW8uYciICUS70WZpDMAot4GGgahjnZCSkYW4sKZ0Wy/PLIE8NV3zZgxsRA1ZSWsG2jfV4340dxr8Jd3vmHbe3T+WOg0CsDJL0//bDVOnW/H2aZO3D/napeLpvf3B+aNxj96t5J6rgG4/9jdawAqpQxGS49X593U2o0XthzBo/PH4oUtR0PqFJN6JCniojJJn3gvBiSdAQgEb6peERLyaCdYhxDI3YHLhiE7Mw3tJhu2/7uePW5hWQn2fdWI++ZcgycKr4O+3Yz2Lgv+8/UFNieQ0wm8+cEJr1q7vqmYbT0O1H59ga0BoJBJsWLRBDS2dMFmd6J/dhpW3TXRW34JsHZZKZrbzGhsMeKND06gy2THQ3eMQYZGAWvH5fvS1NqNfhnKkDtFwUeSke5kEeu+fREbPz5SygDQIvBlwhrtBOgQ+Nwdv310Cs63mNjfaspGYNsnp7yO27SnDmvunYQcrRINvccCwIJbSrD+rS+9OlAvF0rveoK+w4Jjpy6xswe3UdlZewbr3/oSVVOLsf3f9Zh8TX9/+RlAIZXgjx6yA8Cf/nkM86YPw9t7vmO/Uylk0KYpQu4Uue7tsuoxcACubRV9DQKTAN+3GHHxkom/Yw/RINMWRx7Eavx4SCkD8K8D52gR2E1fRzs+o0uj2c7p7jCYe3z8/QzncRab61h32cYMjQL9s9VBXSiGbjvqLxg4jUrV1GJs/qgOUin65Kop7K/1cg+F7QZw39tHJqNRb0JjiwlvfHAcXSZ7n3MAhbqwHKpBTqaFaUI4UsoAKOQptas1csId7fR2Qq/vOo6bxg2GVAqMHJKNUUOyoO+0oGLyFcjPVoNhGNh7HFgwcziydWnI1KqgkEmguf0q7PjsNJuZU6WQocfhxNFTegwb3A+PLR4PQIKLrSYvF0puVhpmThqCHgcDg6UHOrUc3TYHBuVpUDWtGHsPNQAAbp5YBEiAoQUZGDUkCxNG5MNg8q885obPVTMoRx25G4AB7D0M/rj5qNf1X9r6NdY+WOq128g3oI1rlB7qwjKfUfM1yHFdmPbVUy2HwRTiTIQCswQlpQwAERmGbjte33XcL0Pnw3eOgcPhRKfRhtffP47Km66ExebA5g9PssfUlJUgTSnDHdOH4Z+fnEKXyY6lVdfgvX31mHR1AZ565SB77OJZI3BPxSj87V/HkaFRoGLyFV5uHt/sm/dUjIK9x4k3P/iOPebB6tFY/9aX7DFcWUd1ajmWVY/2y9J5ecE5MjeAZ04jN+x20wwl54heKZf61S4eVZQZ8sIyn1GzWHuEWZjm2FXlG+jHNxNxOpmk3U6bKqTUkFmdRvYsEjqMNs4qXRv/cQxpSgXe7q3iZTDZ2V087mPe3lOHTpMdnb1F2pcvHIetH5/E6GH5ftd784PvYLb2oGpqMX54+9VegWLuoC3PwC6Dyc52/u7v/uxzjLvymNHSwwZotRps2P2fs6iaWowFM0tcrqMP62AweQT+RBDQ5c5p5Il7uynfiL7+goEzEMndsftey3dh2b3+4BnItXzBOOTyyBLrhWlfPbkC/fiCrZoumTjvkZgDs+JNSvWYbQar0CIkNVkZKkil3IFaZlsPG3DF5+93B1/Zepw439zlKqLCE/hltTux+aM61JSVBA3sksskIQV/qZRSnG7qwp/+ebkegXvR2LNgDDsqjjCgKydDyTnDyNEp0eARicx1jzy/6zDZUJSnCW3Rnm9tBxBki6PfzCVIoJ8nbQZz8m6nTRGSzgBQIFjs0KXLMWpoNqeLIV0pZ0eYfMFd7uCr3AwVFL3Xcf/GFbilUshwxcDMgIFduVlpKOzPXTnMN/irfz8NWzQe8F80dh/nHhVHHNDlBMYWZ2Ptg6V+dQD4XDVcAWpZGiXbsT//2HRcbDUGXpuIJC12lOHTM5Qtstm6dArMEpikcwGt/FMt7lu3l/O/dW8cFlq85EMCGK09uNBhwemLRmRqlXhs0Xg/F4Na5fKz7/uqETqNAotnjfQ6pqasBJkaBYoH6qBTK1CYp8aj88fi3IUOVxCWx7FLq65BepoM86YPg9Fs87vWsurR2HekEQAwc9IQ/PW9b/3y1zx05xj2GHf7Ep7Rp7T3LfcaFSNwQFfI9NYBKBmocy389u7+4XPVFA/U+X3nlgcMMChfi6JcDbsdNix6DUOfz+8Dvnru+6rR73l76ehBQa6G8x5xHUvEBkErgvWF+9fu5o0EnjNlKObedKXgFXjiRcR6SoD6C11o1Bu9FmGXLxiHwnwN2rqsXi4Gl3+9Bw6HA9p0Jax2B0yWHqQpZZDLpdAoZd7Vv2TA0VNt2PxhHburqKSoH/Kz03Cp3QJNmgLdVjuUSjnkUglM5t6KWxoFDCZXumeHk8Ezf/uCTdfszl9TUpSF3EwVLnVakaaSQ5cuh5MBVr24329EyVfNy2Dp4Tw+ajtnuCplIXD1rKR8d3317H1+wWYivFXekqpHCo7QzzRhS0L2BVGXhPQhJ0eLM43tfd5CZzDb8Z//tnjttQei1wl6ln70vPb/feAG3tKPfjLydNLzpg/DDVfle8vYhzrGibYLRSzvLiAeXYXWM2FLQvaFQLmAZFIJ/rJyRpwlEggJcODrJjaiti+dV4fRxrvAGo2FOL5tkhf0JrR2WEKSVZcux0N3jOFc2B05JMtbxj4EvxXma/B/7r8eFlsPcjNUsRmB0l53IkFJOgNAi8AuDN12r3QKXguYagV/hyMF2o12mCx22BxODC/K4lyIS1PJcbrZiHSVnDPIyg+fKl0ZagXy+nEv8qmUMmzYdMTlmjHbA3eKDHDlQB3mTR/m2kHDADtrz6DLZOdeLAw1+I1n9K/rTakQNRJwliEKyOiGRNIZAMoG6oJvAdNosffm5OfocCTA8YYOtLSZWZ//qCFZeGDeaPzPtstbGZdWXYNzTZ3409ZveIOsvODo5NznLF841qsy19Kqa/DOpy6X0+HvWvD2nrqgnaJWJcPQAh3Wv/UlMjQKzJw0BIX9tYBEwq4JhEu8Ujonc+ropIWMbsgk3S6gQIgpGyhf4JBKIecNrmk12HC+2eQVeDV6WD7+8VGdV7DU1o9PQqtWsee7g6z4AnS4Ojn3Of2z1Vhz7yQ2OGz3wbM4eb4TKoUMzt4dM0EDgBigdHQBfvvoFNx16yhs++QUfvv6Yax6cT+ON3T2qRpXVHYAJVA7xGWo8lfopJQBqDvfKbQIcUOXLseKRRP8ttCZLNzJ2zpMrgLpfkFcElc65M0f1WHzh3XY/JErBYO7BKP7fCfD8HZafJ2ck2HQ2mnFH7ccQYfRig2bjrCd//1zrsbeww1+MvIhlUrgdDLsWoD7nL7+YYcaeRsp8WqHuAwZ3dBJOhdQIEoKo+y/TWR6R8UD+nkveBrMPazf3b11Uip1FQPXpCtwtqkrpMAdm93p9VkqkfB2WoGCntRpcnSZ7NhZe4YtFiOVSGC29uBSh8VPxkAunWgWYolXcRAqQhJ/BK/XkETQNtAkhlNPj4yevkndHl88AXanE82t3dh98BxuGjcYmnQZMjPS8Kd/XN5l45uMjXcNoHehzWzrgb7Tihc9KmrVlJUgNzMNQwfq0HDR6NUBLp41Agq5FDv2nfaTkc9Xm5eXgfrz7Xj2tUOuHEC9bp99XzVi1V0T++ZP59qnH4u/hjDaEcu7C8RQ1wRbAxD6maZUHAAFgl2GV08J0NrlvQffnXJ5eGEmMjRKtHaa8Ye3LmdwXFY9BgzD+AViuYOsAFeuJc+0xp6GJk0pg8XmQKZGhTSVDC1tLiOz6q6J0KkVuNDuKu4CBqzr54e3X40Nm474jdS4Fkjz8jKgb+3C0fo2v9w7AfPvJ9luELG8u0CMdY2XcQ8BoZ9pSsUBBOKzr5uoIAwAMIDJo5BLblYaZk++wmukXVNWwpZGbGrtxm/++sXljtfpsZWSZzRVmK/Bhs1HUDW12JVvZ5or06YvbveMNk2O7Z96F14/39wVlkvHYLL7ZZp8aevX/DtqEmwkSMQRqvwVEim1CNxt6RFahITBc/Hx5olFfimZ395T50qt0AvfIhnfjgqjxYEMjcIr+2OgxU6u3DijPBLGcZ3jS7iLewm1GySCtNMEESsEmQG88MILeP/99wEA06ZNw8qVK6Ny3aSrBxDIPRHMdSFxjaC/b+mCSimDWiWHxdYDbboSOrUcYBgsXzgO55uNIaVTdgd/ndEboU1TwGSxQ5OmgM3egzX3TkKbwQqVUoptn5xCe5cVHV0W/GDWKGTrlFi7rBQ2mwNX/3ASWjssSFPJoG83Y2CO5vLUuzdK97ePToHB3AOLtQeZGiUeXzzBr0CKV86cXv3teiMUCllYi3tGs51deAaAvYcacKnD4pUO2u0mcOclkstkoQW+hfksQ5qJuGsCt5qgSVPAauuBVq1MeLcVkbzEvcesra3FZ599hm3btkEikWDp0qXYs2cPysrKQjo/ZSKBgxT3Dthh8AReqZQy7Nh3GvNnlmCLxyLu6nuuDZia2H3+me872WpeC8tK8MW3Tbh50hC8vP0btp1H7hwDg8mG5zd5LuqORJpSiv/d8V8vebjgqlv720en+CWe49Kv9usLqCkr8Utcx+nflQDtRhu2/7veK4XEnoPnXAaD4x66fy+/fkjgwLdwnmWo5R4DyHPXraPIbUXEhLgvAp88eRImkwnjxo0DADz11FMYMmQI7rnnnpDOD7QIXFKYidU/mCj4oksoGMx2rNpYy7kAKpVKUPtNM1s8ZO+hBnSZ7GyHwXfuvOnD4HQy2P7veq8c+K4yfSPw0tbLO31+VHUNTBY7TJYedmG2y2Rnz1MpZFi+cJzfIm1N2QjO5HHzpg/D23u+8/vOM2FbIJ09ffh8x1VNLcbeww2YOWkIhg3ORK6OP3cP3zXW3DsJhblqGLr529j+73ruZHN9eJa6dAUa9Cb8+pWDfuf9eun1KMrVBNV5+7/rUzJyOBn+TqOB0Hom1CLw8OHD2X+fPXsW77//Pt56662oXLvufCfy8jIAgP1/onLxlJ7TLWN1OnGxpZvtZD2Tn3XbHSguyuY918kwl33yHu6dptZuKGQSVE0tRv8cNZpbu9FltuFv/zruL5jk8vUsNodfO8Gqgfl+55Y5kM6exwQ6DhLgUocFb+/5Dr95eLLXOb7wXUMulyIvNyNgG1yyByKYXjaGu4DOgBwt8vK0QXXmukepQqL/nUaLRNVTMKf5yZMn8eCDD2LlypUYOnRoVK6ZrVNBr+8S3OKGglol53bLMBJs/Id3tOumPXWYN30Y1AoZ9Pou/nMlrmhZlcK/WpZUKnHNDKYVs//nuob7PJVChjSlv889WDUw3+/cMgfS2fOYQMd5yuZ7Tqj3V6MMfA/dlcp8ZQ9EML2UUu5yjUopE/TeuOUJVZZkIhn+TqOB0HoGmgEIsgvo8OHDuPfee/Gzn/0M8+bNi9p1bxxdELVrxRq+ilF8qRwK+2td7g4JIJUAD90xxuvcmrIS6DQK7DvSiKVV1/hVy9r26SksnjUC+75qxMLeyl6+VbZqykqw93ADO+t4b189llZd43XM4HwNHpjr/d3iWSPRL0Ppdy13dTBWZ7Uca+6dhJqyEiyYWYKCHDVnBSiue+MpWyhVo/jur/s8rt8XlpVg35FGTtkjacszTfWvl16PZx+e7OfTDyQPVckiYkXc1wCampowb948rF+/HqWlpWGfn1KRwBzBKgZzgCpVagW7UJihUWD25CtQkKtBmlIGhVwKm92JuoYOHD5xERNHDkD/bDW6La5dKRs2He0twO6ESilFfj81urqtGJibgW6LHTqtCjIZ0GWyQ5uuQLelB+o0ORjGCYlEik6jDdp0BV5//7/Qd1rYjJyD8tSw9zDoNFqhVSthMNmQrpIjU63wrg7Gscj56PyxuHpoFncQl8+9SU9XoulSkFq5Idxfrkhmz11AMpkMunS5t+x9fJZhL9pKAJtTgoutRtcuIHsPtGmKlE0bkTR/pxEitJ4JFQm8du1a/POf/0RR0eU96DU1NVi0aFFI56fKIjAvAXaUBFq43PxRHX5+10TkZ6Xj8HctcDovR9zOnnwFCvO1rjz8m4/gUoeFPT83Kw133TrKq+AKb7BUbydntNihUshdZRa1oUXXhroAzEdSP9MwEIuegHh0FVrPhFoEfuKJJ/DEE0/E5NqXOi3BD0p0AlS14guESk+ToaZsBKw2Byx2Bz78ooFNtMYVAfyv2jOsEZg5aQg27fnOa7/867uOc+fXYQCdWtFbb+CLsKJro5nIjSCI6JBkkVMigSeMnSvLYUGOGtp0Jd7e/Y1fJ88XAezesqlSyFA8SAelwjsh28KyEhgtdu50DH0scEIZGgki8SADkGhwlFZ0R6Xq1HI8Mn+sV9bNpXOuwXOvH0aGRoGqicW92wadWDZ3NCw2B2ckbPHgTKy++1rk6FwFdH73hndpyU176rD2Qe71mUsGq1eOoZsnFgESwGjpCeirDpoWOZmStiWTrAQRgJQyAEmXC6i343SnRsjNTIO+3eyVGoFNxTwoA8fPdaLdYL7cqTPA+ZYudkHYcxT/o7nXQCmXckbC1jd2sjOAn9aM53TNmCx25Gj9R+fpvdsVfdvc/mkQV1Cggu3JlLQtmWQliCAkXTrolNkFJAHqL3ShUW/0Sm2weNZI7PisnvXRuyNqJ47IwxN/PuDax++RVXPlXRPR0GwMOTp3xaIJePndr9nr80X28rl0jDYHDp9ogdXuDOu8QPehtcvmWrhmLs9SfK+VKM800sXsYCSKnvFALLoKrWdCLQJHSqCi8ADwv6tvjqM0fcfQbUf9BYNXJ2q1O/DmBye80ji4o1JbDRZY7Q589V0z7p9zNV5591tY7Q7oO7rRP1sdcnRuY0uX1y6gD784h4fuGOO3C4jPnaNVyTA4T4suM3/pyZA7Qp78Nzt7F6kTcYGYFrOJVCLpDEAgkqkkZIfRxptWwS9Lp1KGHF0aCnLUKB09ENs+OYWashL0z1ZDJpcCjGsxuKm12+s8rujcHod3r95lsuPKggxu1wwXDFA8MAOtXbaIF3W5FpQ37alj898k4gIxLWYTqURK1QMYWdRPaBFCJitDxaZV8IQrS2f/7HTkZCqxrHoMNu2pg93hBAPg+U1H8Mxfv8Dv3/wSd8wYjoIcNXve4lkjkKlR+ETs+n+3fME4aNPk0KUrUJSrcY1igzkFGSAnQxk4+jUE+EbT0t7UCYkY/Ro06pcgkoiUmgEkE7p0OYoH6vzSGz+6YCzaOi1YMLMEYIB/1Z5hM4EyvTOGqonFfts7/7L9G6y5dxLau6wYkKNGT48DOrWSHdm7I0s9v4uoVF6gRd0Q4RtNTxyRj5wMZWIuqkZB75hBu5OIMEkpA5BUJSF7XSn9s9MxvKifaxeQTgWj2Y7/98aXfod3mGzI0vZW+fKowuXGanfg2zNt2PxhnVeaYQAevunLbgqdWgFDtx0NLaa+dxYRlt3j2xqasJ2/m0QsN0i7k4g+kHQGIGUKwgAAA2hVcmhVHo+BJ9tmlkbJdpjnW4wBM0cG9UcnSmeRyKPpJKOvAXqEuEk6AxBoF9CcKUOjMwOIdCodQjlHvt+DBUyNKspEYb4GA3LUXjt33Hv8A+3gcWO09OB8ixFV04oBuLZeRr2zCPUeJuJoOgmh3UlEX0g6AxBzIh0dBzs/2O/BRsW9s4YxV/TD849N98ocOWH4xOAjaAlwuqmLs+BM1DqLRJlhiAjanUT0BQoE8yHSQJ9g50czkKgvegYqJxlqCcS+thHJDEPoYJp40Wc9k9Do0jOND6IJBJNJJfjLyhkRXT/SqXSw84WeqvO1zxaciUJnIbSOooTWU4g+kHQGINaLwJFOpYOdL/RUna/9QTnqqHUWQusoWmg9hQiTlAoEiwaRBvr0pRRhPAOJ4tG+0DoSBBEatAbARaTl/cIoRRjJVD0Sf3E02o9nG0L7UeOFWPQExKOr0Hqm1BpAXIh0Kh3sfKGn6vFoX2gdCYIICrmACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRIogBmDHjh2YPXs2ysvL8cYbbwghAkEQhOiJeyqI5uZmrF+/Hlu3boVSqURNTQ2uv/56DBs2LN6iEARBiJq4zwBqa2txww03ICsrC2q1GrNmzcKuXbviLQZBEIToibsBaGlpQV5eHvs5Pz8fzc3N8RaDIAhC9MTdBeR0OiGRSNjPDMN4fQ4GX1pTX/LyMsKWLRkRi56AeHQVi56AeHRNVD3jbgAGDBiAQ4cOsZ/1ej3y8/NDPj8u9QCSBLHoCYhHV7HoCYhHV6H1DFQPIO4uoMmTJ+PAgQNoa2uD2WzG7t27MXXq1HiLQRAEIXriPgPo378/VqxYgbvvvht2ux133nknxowZE28xCIIgRI8gFcEqKytRWVkpRNMEQRBELxQJTBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQBAEIVLIABAEQYgUQbaBJgvv7DuNuTddGbPz171xGKt/MBHr3jiMkUX9vI59Z99pfPZ1E24cXeB3jXVvHAYATBw1AB/85yzaDVZUThnKHu/ms6+b8LuHp+Dxjftx4+gCnGhox8iifgCA3V+cBwAU9dfiUqcFuZlpqDvfiTlThmLuTVfi8Y37kZuZBgAYWdQPJxra2X+7r/vw7z9F+aRC9lrlkwrxrwPnAADFg3TssTeOLsBnXzcBAPvvbksPyicVYsf+s36yu/V16+nW9fDxi6ysDc1GFPXXsjLNvelKr/v4zr7TAMDq7JbZ9xj3eb56rv7BRL9ruHHL5/7ds213O77vgSee8nk+23VvHGb1dL8Xq38w0e9aXOd7fn+iod3vPK5ruGX2lMn3euHgK6/vNbiu6Sl3sGOD6RPOb9H42w52n4OdH0n70YIMQADe3X82oocU7Py6853s/+vOd3od++7+s7zX8DyP63hf2gxW9nvPczw/txmsXu21Gazsd57neP7bYnN4tef5b7dOvt9z/dv3/259+dr1lct97zzvo68svnK5j3Gfx3Vv+K7hls9TXk99ffF9Jp7yeT5b32twXc8ts+/5vt8HwlNvdxue98PzmHDgun++77TvNT3lDnZsMH3C+S0af9uREGn70YJcQARBECKFDABBEIRISToXkFQaWuroUI8LRH6/9IiuE+x89+/5/dIBeMvs/s73e9/fguF5/Wie05frhtM+0Hc9+WTjOiaUY4PJ53tssOcV6jPnen/43plA75IvXHr7tteXd9/3nGCffWUJdmyo7YbyWzT+toG+90eRth8OgdqRMAwTOLcyQRAEkZKQC4ggCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRAoZAIIgCJFCBoAgCEKkpJQB2LFjB2bPno3y8nK88cYbQosTdYxGI26//XY0NjYCAGpra1FZWYny8nKsX79eYOmixwsvvICKigpUVFTgueeeA5C6uj7//POYPXs2Kioq8OqrrwJIXV0B4Nlnn8Xq1asBpKaeS5YsQUVFBaqqqlBVVYWjR48mtp5MinDx4kVmxowZTHt7O2MymZjKykrm5MmTQosVNY4cOcLcfvvtzNVXX82cP3+eMZvNzLRp05iGhgbGbrcz9913H/PJJ58ILWbE7N+/n1m4cCFjtVoZm83G3H333cyOHTtSUteDBw8yNTU1jN1uZ8xmMzNjxgzm+PHjKakrwzBMbW0tc/311zOrVq1KyffX6XQyN954I2O329nvEl3PlJkB1NbW4oYbbkBWVhbUajVmzZqFXbt2CS1W1Ni8eTOefPJJ5OfnAwCOHTuGIUOGoLCwEHK5HJWVlSmhb15eHlavXg2lUgmFQoHi4mKcPXs2JXW97rrr8Pe//x1yuRytra1wOBwwGAwpqWtHRwfWr1+PZcuWAUjN9/f0aVeRnvvuuw9z5szB66+/nvB6powBaGlpQV5eHvs5Pz8fzc3NAkoUXZ5++mlce+217OdU1Xf48OEYN24cAODs2bN4//33IZFIUlJXAFAoFNiwYQMqKipQWlqass/1V7/6FVasWAGdTgcgNd9fg8GA0tJSvPjii/jrX/+Kt99+GxcuXEhoPVPGADidTkgkl9OeMgzj9TnVSHV9T548ifvuuw8rV65EYWFhSuu6fPlyHDhwAE1NTTh79mzK6bplyxYUFBSgtLSU/S4V39/x48fjueeeQ0ZGBrKzs3HnnXdiw4YNCa1n0tUD4GPAgAE4dOgQ+1mv17PuklRkwIAB0Ov17OdU0vfw4cNYvnw51qxZg4qKCnz++ecpqWt9fT1sNhtGjRqF9PR0lJeXY9euXZDJZOwxqaDrzp07odfrUVVVhc7OTnR3d+P7779POT0PHToEu93OGjqGYTBo0KCEfndTZgYwefJkHDhwAG1tbTCbzdi9ezemTp0qtFgxY+zYsThz5gzOnTsHh8OB9957LyX0bWpqwiOPPILf/e53qKioAJC6ujY2NuKJJ56AzWaDzWbDRx99hJqampTT9dVXX8V7772H7du3Y/ny5bj55pvx8ssvp5yeXV1deO6552C1WmE0GrFt2zY89thjCa1nyswA+vfvjxUrVuDuu++G3W7HnXfeiTFjxggtVsxQqVRYt24dfvzjH8NqtWLatGm49dZbhRYrYl555RVYrVasW7eO/a6mpiYldZ02bRqOHTuGuXPnQiaToby8HBUVFcjOzk45XX1Jxfd3xowZOHr0KObOnQun04nFixdj/PjxCa0nVQQjCIIQKSnjAiIIgiDCgwwAQRCESCEDQBAEIVLIABAEQYgUMgAEQRAihQwAQRCESCEDQIiS++67D21tbREfc/DgQdx+++1B2xsxYgTntT766COsXbsWgCuV8K5du9DY2Ijx48cHvSZBRErKBIIRRDjs378/KsdEyi233IJbbrkl5u0QBBc0AyBExy9+8QsAwD333IPPP/8cS5YsQWVlJebMmYN33nnH75impiZ8/PHHqKmpQXV1NaZPn44//OEPYbf7hz/8AfPmzUNVVRU+/vhjAMDWrVvx4IMPRkUvgggXmgEQouOZZ57B1q1b8be//Q0LFizAypUrUV5ejubmZsyfPx9DhgzxOqZfv35YuXIl1q1bh6FDh6K5uRkzZszA3XffHVa7gwcPxlNPPYW6ujosWbIE77//fow0JIjQIANAiJb6+npYrVaUl5cDcOWTKi8vx759+7x88BKJBC+99BI++eQTvPfee6ivrwfDMDCbzWG1t2jRIgBASUkJiouL8dVXX0VPGYLoA+QCIkSLRCLxy83OMAx6enq8vuvu7sa8efPw7bff4qqrrsLKlSshl8sRbhotqfTyn5vT6YRcTuMvQljIABCiRCaTYdCgQZDL5di9ezcAoLm5GR988AEmT57MHtPT04Nz587BaDTipz/9KW6++WYcPHgQNpsNTqczrDa3bdsGAPj222/R0NCAsWPHRlcpgggTGoIQouTWW2/Fvffei40bN2Lt2rX44x//CIfDgUceeQQ33HADe8ySJUvw/PPPY/r06bjtttugVCpRUlKCYcOG4dy5c1AqlSG3ef78ecydOxcSiQS///3vkZWVFSPtCCI0KB00QRCESKEZAEFEgZdffhk7duzg/O3+++/HnDlz4iwRQQSHZgAEQRAihRaBCYIgRAoZAIIgCJFCBoAgCEKkkAEgCIIQKWQACIIgRMr/B0x5tiafPYALAAAAAElFTkSuQmCC\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"sns.scatterplot(data=pandas_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\\n\",\n    \"sns.rugplot(data=pandas_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 33,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='tip'>\"\n      ]\n     },\n     \"execution_count\": 33,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABOIElEQVR4nO29eZhcZZ33/T1Lbd3VW7qrk86+kQQSSAIINAJh8Q1LEwIIDvoYx0FH8eIRRX0Q0Uvf10s0cDmDMuroCKLiwgMj+yDCsA0hkQAhCUmArJ210/tS1bWe5f3j1H3qnFPn1Na1ddfv8093V5865757ub/3b705VVVVEARBEDUHX+kBEARBEJWBBIAgCKJGIQEgCIKoUUgACIIgahQSAIIgiBqFBIAgCKJGIQEgCIKoUcRKDyBfhofHoShTv3ShtdWPwcFQpYdRMWj+NP9anX+x587zHFpa6m2/N+kEQFHUmhAAADUzTydo/jT/WqVccycXEEEQRI1CAkAQBFGjkAAQBEHUKCUVgFAohKuvvhrHjh0DAGzevBnr1q3D2rVrcd9995Xy0QRBEEQWSiYAO3bswCc/+Ul0d3cDAKLRKO666y784he/wHPPPYddu3bhtddeK9XjCYIgiCyUTAAeffRRfO9730N7ezsAYOfOnZg3bx7mzJkDURSxbt06PP/886V6PEEQxJShVF37SyYAd999N84++2z9676+PgQCAf3r9vZ29Pb2lurxBEEQU4Luk2P45i+3oGdwvOj3LlsdgKIo4DhO/1pVVdPXudLa6i/msKqaQKCh0kOoKDR/mn+twuYeiUl44IE3oQJYOK8Vfp+rqM8pmwDMmDED/f39+tf9/f26eygfBgdDNVEgEgg0oL8/WOlhVAyaP82/VudvnPtvnnsfPQPj+D+fXI1IKIpIKJr3/Xiec9w4ly0NdOXKlTh06BAOHz4MWZbx7LPP4qKLLirX4wmCICYVW9/vxaadPbiqcx6WzWspyTPKZgF4PB5s3LgRX/7ylxGLxbBmzRpcccUV5Xo8QRDEpGFgNILfPf8hFs5sxPoLFpTsOSUXgJdffln/vLOzE08//XSpH0kQBDFpkWUF//HMHqiqii9csxyiUDpHDVUCEwRBVBGPvrQP+4+NYsPapWhv9pX0WSQABEEQVcK+YyN45IUPcN7y6ehcMaPkzyMBIAiCqALC0QT+4+k9CLTUYcPapWV5JgkAQRBEhVFVFb//24cYDsbwjU+fBZ+nPPk5JAAEQRAVZvOuk9j6fh/WX7gAy+ZNK9tzSQAIgiAqSO9wGH94YS+WzmlG13nzyvpsEgCCIIgKIckKfvXUbogCh39edxp4Pv/2OBOBBIAgCKJCPPH6QXSfDOIfr1iGaY3esj+fBIAgCKIC7OkewvN/P4KLVs7E2cvy74tWDEgACIIgykwwHMcDz+7BjNY6fPKyUyo2DhIAgiCIMqKqKh567gOEIgl8Yd1yeNxCxcZCAkAQBFFGXn33OLbvH8ANaxZh3ozKnnlAAkAQBFEmjveH8MjL+7Fi4TR87CNzKj0cEgCCIIhykJBk/Orp3fC5BXyu6zTwBZyIWGxIAAiCIMrAo68cwLH+cdzcdRqa6t2VHg4AEgCCIIiSs2P/AF565xj+n7Pn4IxFrZUejg4JAEEQRIn5/d8+xOyAHzdcvLDSQzFBAkAQBFFiRkIxrD6lDS6xcimfdpAAEARBlBhVBaog5psGCQBBEEQJUVUVAMBVoQKQABAEQZSQ5PpPFgBBEEStoZAFQBAEUZswC6DMrf5zggSAIAiihFAMgCAIokahGABBEESNoscAUH0KQAJAEARRQigGQBAEUaOooBgAQRBETUIxAIIgiBqF6gAIgiBqFIoBEARB1ChUB0AQBFGjUAyAIAiiRiELgCAIokZJBYErPBAbKiIATz31FLq6utDV1YV77rmnEkMgCIIoD3oQuPoUoOwCEIlEcPfdd+Phhx/GU089hbfffhubN28u9zAIgiDKgpL8WIXrf/kFQJZlKIqCSCQCSZIgSRI8Hk+5h0EQBFEWqjkGIJb7gX6/H1/5yldw5ZVXwufz4SMf+QjOPPPMcg+DIAiiLFRzFlDZBeCDDz7AX/7yF7zyyitoaGjAN77xDTz44IP4/Oc/n9P7W1v9JR5h9RAINFR6CBWF5k/znwpEkz6g5sa6nOdUrrmXXQA2bdqEzs5OtLa2AgCuv/56/OlPf8pZAAYHQ1AUtZRDrAoCgQb09wcrPYyKQfOn+U+V+Q8OjQMAgqFoTnMq9tx5nnPcOJc9BrBs2TJs3rwZ4XAYqqri5Zdfxumnn17uYRAEQZQFPQZQ4XHYUXYL4IILLsCePXtw/fXXw+Vy4fTTT8cXvvCFcg+DIAiiLFAMwMIXvvAFWvQJgqgJqjkLiCqBCYIgSkg1WwAkAARBECWEzgMgCIKoUeg8AIIgCAee2dyN/377aKWHUTIoBkAQBOHAW+/34a0P+io9jJJRzTGAimQBEQRBMOIJGbKiZL9wklLNMQASAIIgKkosIUOJT93qfuYCqkZ3CwkAQRAVJZqQkUgoUFS1KnvmT5SUC6j65laNokQQRI2gqiricRmKqiISkyo9nJKg0olgBEEQ6SQkhR2YhVAkUdGxlIrUgTDVpwAkAARBVIxYQtY/D4WnpgDoMQASAIIgiBQmAZiiFkA1p4GSABAEUTFi8VoQgOpNAyUBIAiiYsQSqfz/4BR1ASlkARAEQaRjdAGNR6emAFAMgCAIwgajAExVC4BiAARBEDbEkwIg8BzGKQZQdkgACIKoGNFkELilwYPglBUA7WMVrv8kAARBVA7mAmpt9E5ZC6Cam8GRABAEUTGYC6i1yTv1LYDKDsMWEgCCICpGLCGD5zg0+z0YjyR0f/lUQgX1AiIIgkgjFlfgcfPw+1yQFRWRmJz9TZOM1JGQ1acAJAAEUcMEw3Fs2XWyYs+PJSS4XQIa6lwAgFAkXrGxlApFIQuAIIgqZNPOHvz62T0Va8MQSyjwuATU+5gATL2W0EwA+Co8FZ4EgCBqmIGxKIBUMLbcxOIyvC4BDb6pawHISR+QwFffclt9IyIIomwMj8UAAAm5MmfyxhIy3G4Bfl0Apl4mELMABLIACKL8qKqKhFS54KKiqpAqtMBmYyhpAUhSZcYXT8jwuAT4WQxgCraDkGVyARFExXh523F88cevYTgYq8jzX9t+Anf+aktFnp2NoWBlLYBoUgB8HhEcB4SmYEM4mSwAgqgc2/b2AwB6Bscr8vzhYBRDYzG9IrRaiCVk3eWSqJAFEIvL8Lh48BwHv881JS0ARSUBIIiK4XEJAMyHj5QTtu4zV0C1YLSIKiUA8YQMj1sEAE0ApmAMQE5aV+QCIogK4HFrAhCtUKYLQ1aqKw7A/P8AKhaj0NJAtWVoygoAuYAIonLoFkDFBaC6LIChscpaAIqq6kFgQBOAqdgPSFFV8BxHzeAIohJ43ZV1ATGqzQU0FExZAJUQgERCgQqYBGBKWgCyWpXuH4AEgKgBKh0DYJAFYIZZZG4mAHVaEHiqNYSTFbUq3T8ACQBRA1Q6BpAKAldZDCAYRUuDB0BlYgBMAJiFxhrCRSss1MVGIQEw8/LLL+P666/HlVdeiR/84AeVGAJRQ7AujGQBmBkei2F6iw9AZS0AowsImHrVwLJKLiCdo0eP4nvf+x5+8Ytf4Omnn8aePXvw2muvlXsYRA3BXAqV3llKVSYAQ8Eo2lvqAFSmECzNBTRVBUCuXgtALPcDX3zxRVx11VWYMWMGAOC+++6Dx+Mp9zCIGoIV4lQqC4gdCFIOF5CiaG0n2KLqRCQmIRKTK2sBxJkFoO1DG3xuAFNPABRFhSBUpwCU3QI4fPgwZFnGLbfcgvXr1+NPf/oTmpqayj0MooZgPvhYvLKthsvhAnrhraP4zgNvZr2O1QBMa/RCFPjKuoDcqSAwMPUEQFbUqjwMBqiABSDLMt5++208/PDDqKurw5e+9CU88cQTuP7663N6f2urv8QjrB4CgYZKD6GiFGv+dXXazlJWK/MzrUvubBsafKbnv39oCPM6GlDnddm+r5CxjkYSGBiNoqm5LqMVcGQwDABYOLcFHhcPl1ss+8/Gc3QUANAxvRGBQAM8dUlPQLJt8lT5+3e5BLhdQl7zKdfcyy4AbW1t6OzsxLRp0wAAH/vYx7Bz586cBWBwMKS3V53KBAIN6O8PVnoYFaOY8w+GtHTHUDhekZ9pOKz1uB8cCqHfry32kqzgW7/YhPUXLMDV589Pe0+h8x8ejQAADh0ZwrRGr+N1h44OAwB4WYHAcxgLRsv+sxkY0nozjQej6Oc0VwnHAT39IQCYMn//4eQZB7nOp9j/+zzPOW6cy+4CuuSSS7Bp0yaMjY1BlmW8/vrrWL58ebmHQdQQuQaBt+3tx+//9mHxn5/8aAwCK4oKWVHROxQu6rPYmbrBLE3VhsZi4DigucENl1ghF1Dc7ALieQ71XhfGyQVUNsouACtXrsTnP/95fOpTn8JVV12FmTNn4uMf/3i5h0HUEGzdzSYAu7uHSno+rrESmMUl+kejDlcXRiSmxTmCWU7WGgpG0ez3QOB5iKJQkSygqCUNFJia7SCquRCs7C4gALjhhhtwww03VOLRRA3CLIBITIKqqo49WRRF602T6ZqJYGwGxzKT+kciRX1GJBnozsUCmJYsAnNVKAgcT8gQeA6ikNqH+n1TzwKgLCCCqCB6Ja6S+WQuWdESNou+GGZoBz0SjBX1eboFkE0AgjG0JGMELpGvTB1AXE4LVPt9rqxjn2zIyhQoBBsdHUUoFCrlWAiiJBh7yzAfuR0suSBeot2wMQ2UjUmFuS3zREnFAJxdQKqqYngsarAAuIocCRlLyHobCIa/zoXxKXYqmKyoECZrDODgwYP4+Mc/js7OTpx77rn49Kc/jRMnTpRjbARRFIy9xSIZagF0AShywZheCGZwARltgf5RezeQoqrYe3Qk5+coqopoDhbAeFRCXFL0LKGKBYETzhbAVGoIN6ldQN/61rdw4403YseOHXj33Xdx+eWX49vf/nY5xkYQRcF4FGOmfkBsh16qimG7IDAA9I/YWwAfHB7Gxj9uw/H+3CzvWFzWhSWTBaAXgTELQBQqlgXEqoAZDT4XJFmpeN+mYjKpXUCRSAQ33XQTXC4X3G43NmzYgIGBgXKMjSCKgnGxjSecF7qUBVDcxdAYg0i9lvp8wCEQHI5qu/lYjuNh/n8gczUtawPNLACxUjEAw2EwjPpkP6Cx8cxZTJMJWVEmrwto4cKF2LZtm/713r17MXv27JIOiiCKiXGxzbS7L7UFYAxAmywAh1RQtitXkZs7JJLcNfMcl9EFxA6CmdZY2Swg7ThIswA0MAHIYMFMNjQXUHXm22RNAz1x4gQ2bNiApUuXQhRF7NmzB4FAAOvWrQMAPPPMMyUfJEFMBCVHAWDXxaUSuYCMFoDhdScLgO3Kc3WHMwugtcmTxQUUg8BzaKzXWlS4RK4iFkA8IesixDBaAE0eX9nHVAqq2QWUVQC+8Y1vlGMcBFEyjAtoLhZAsV1A1vtrY0odFO5UC6DvynMUABYAbm/2YXf3sOZ64NN3nuwgGFad6hKEimUBpVkAdQYX0LSpIQDVfCCMowAcOHAAixYtQn19ve33qX0DMVnI1QWklDwInO4Cam30om8kgkhMgs9j/nfM1wUUTgpAoKUO6B5GKCKhKbnLN2IsAgMqVwcQjacLADsTIDilYgDV2wrCUQDuvfde/OpXv8KNN96Ijo4Ocy51JIItW7aUZYAEMVEUFXC7eMQTmbNL5FKlgWYIAgdafOgbiaB/JIK5080dIPN1AbFWF+3N2s45GI47CEAUi2elWrCLyTTQUlVAOxG3CwJ7XeAw1YLA1ZsG6igA99xzD0ZGRrBo0SI8/PDD+h9HIpHApz/96XKOkSAmhKqq8LpFxBPxnCyAcriAGO3NPuwGMDAaTRcAZgHkqAAsa6g9echLyCYQrKgqhoMx/SxgQLMAVFUbn1imhUpRVcQlBW5LGijPc6jzihljGJONSekC+vrXv4433ngDHMehs7NTf10QBFx++eVlGRxBFANVBUSBgyhwucUAihwETp0IZugGyiyA5G7dLg6Q70Ht0bgEDkBbk5beaddULTgeh6yoplbRrmSGSjwh48W3j+LiVbPS3FHFJq4fCJ/+HL/PNeUsgEkXBH7wwQcBaIVgP/rRj8o2IIIoNqqqggMHj0tAPJ69DsBOJFRVxeP/cxDnr5iBjlb7uFg2JEMlMHPr1/tE+DwCBpLFYGPjcQgChwCMFkBu9w/HJHg9gu72sdtFDwWTNQAWCwAAdh0awmOvHMD7h4fxtU+symNm+cNqG6yFYIDWDmKqxQCq1QLImpxKi3/+DCf/yYjqQFEBjtMOHy80C2g8KuG/thzGtr39BY/D6AJiT+A5DoEmn94O4vZ/24Qv/+R1AMYgcG5EYzK8blFPpbSrBTAeBclgAsDYdXAo5+rjQmHHc9qdWub3Ti0LoJpdQNVZnTDJGR0nAagmVGhZGJ4sAqDXAdhcw4LHuVblWgYAwNINNPksjgPamn0YSBaDsSuMXUJzjQFEYhLqPCJEgUedx96PzqqAWwz598wFZKwk/uVTu4seDDeSsgBsBKDONaUKwWRFmXwuIIKYbKiqivGopKcSpl7XFtpsApCqBNYWp0hMgkvkIQq8/j6nRTEhyZAV1danrd/fJg2U4zi0NXmx6+CgaaE/OTSuxwCsy38sIQNq6iQtRiSuuYAALZ/e1gIIRuESeb3iFkhZACyN9JqPzsfTb3TjkZf34zOXL3Wcz0SwHghvJFMMQFFUHB8Yz6tZnMctYHpLXWEDLQKaC6g699okAMSUYceBQfz7k7vwL7d+1CQCLIPN4xYy7mqVpI+eXXPrff+D0xe24vZPrNQXLCcBefSVAzjSG8S3Pn1W2vfYUmVyATELAFogOC4ppkVPUVTHQrBfP7MH8YSMr/3DKtPrkZiku3/8dS7bfkBDY1oGkDHdU7RYAGcuCSAuKXj+zSM499R2LJ3bYjvniaALgJ0F4HMhnpBtC8VeeOsoHn1lf97P+95nP4J5M8p/yPzQWBSqCrhFEgCCmBDW9EUrIyHNbRIMx00CwA4b97gEhDIclagHgQ1ZQO8dHNRei2e2AIaDMYyGMrstJJs0UI7jEGjW/PHGnkCKoqbqACwK0DsUxsmhsNZN07CDjsRktDVpWUUiz9umnQ4Fo6YAMJCyACJRWf/62gsWYMvuk3hq0yHc8aniC0A87iwADXVaEHs8kkj7fu9wGPVeEf901ak5PScYjuN3z3+Io32higjAE68fhChwOG/59LI/OxdIAIhJw+h4ZgFgC7i1sZmqIhkD4DEwmlsQ2OpiiCYyxwAkWTH1HLK9v8EFpOguIOiLtjEVVFHVVHsGy22DkQRkRcW+4yNYsaBVf91YTczz9v19hsZiOHWeeUFPuYA0i8El8HC7BFx13jz8+b/34YPDw1g2r7giEM3gAqr3poLYxmA1oGVJtTR4ceaSQE7PkWQFD/9tL/qKfPRmLhztC2Hzeydx+Tlz9d9xtVGddglBFEAqj9+88CnMBeTK5gJKBYEly/GN2SyAhJRBAGwqgVNBYE7P2zc2hTNaAOa3qXqB1weHR0yPicQl+JIxAI4DVIsFICsKRkKxtAZsugAkC8nY12tWzkST342nNh2yn9cEyOQCYv2AQjYng42Ox9FU70p73QlR4NHa5Cn62cu58Nir++HziLiqc17Zn50rJADElMHpRC/NAgDcbiFjFo+xHbS1CCtbDECWFVuXi9392ZgALQbgdglo8rstLiCjJWM80lLSheaDI8P665KsIJ5QUhYAx6UJ0mgoDlUFpjWYd9UsC4gFgcWkALhdArrOm4cPj47g/cPDKCYpF1D6EsTiGHaVzKOhOBrrna1AO9qbfegbDhcwysLZ0z2EXQeHcPX589OSEqoJEgBiyuB0pq9qsABySwNV0twnKQvAXkASspq249afr1cCpx8JyWKxgSaf2QJQUwfYG9dxVt3b2uhFd09QD9yyPkA+d8oFZB1O6iAY8wIqWrKAXIbe9WtWpayAYh7TyH4PdnUALEPJGsRWVVWzAPzp/Y0yEWipQ99w+SwARVXx2CsH0NrowWVnzSrbcwuBBICYMrAFPC0GgFQaaEJSdKGwYmwFYW2PnM0C0GIAmcdnfyKYpgBtzV7T0ZDGLCDjbdmu+CPL2k1nBjMhMFoAVkHSD4JxsAAiFgsA0I6L7DpvHvYeHcEHRbQCYgkl2Z7DzgLQ5mAVgEhMgiQrtg3uMtHe7MN4VCrbYfNb9/TicG8Q11+0CC4xXeCqCRIAYsrg1M3TGAMAnBdxYzO4NAuA1QE49AmSZGdhsUsDZes/qw9qa/LpC7R2rWJbCMYsgJWLWyEKnO6aSQlAKgZgdQE5WQB6FlBMgihwaa2L16yaiWa/G89s7radXyHEbFpBMwSeh88j6jEJxmgyTbYxXwFINscrhxWQkBQ8/j8HMbfdj3OrNPPHCAkAMWVwdgFpCy3zNzsFco0CYrQAZCXVRtqpnXRuWUDGE8EMQQAAgWavydVjigEYXmcWwLRGLxbPatLjAEwAvJ5MLqAoPG4hrdFbSgDktLYQ2vcFXHbWbHxwZAS9Q8XxpccSsq37h1Hvc+lZSQxWJ1GIBQDYN9wrNq9sO4aB0ShuvGRx1Z4BYIQEgJgysAU8kRYE1iwAdwYLQFFVqGqyNTJS/nAAON4/nkMaqOpoAejXKPaVwIAWAzCNR1FtK4GZW8Tvc2HZ3BYc7Q0hFEno5wHXGYPAlvEMB7WDYKw9/42Lvp1LBgDOX9EBjgM2vdeTcY65YlfkZcTvczlaAPkKAOu42ltiC2A8msAzm7uxfME0LF8wraTPKhYkAMSUIZMFwHGA180EIH0RZ+/1Ja8Zj6QWn4M9Y7rV4OTqySkNVE53AbGluK3Z7JeXVdW2G2gwEococPC6BSyb1wIVwIdHRlIWQHL8mgWQHgOw5tUDMDUqs7MAAKClwYMVC1qxedfJrEKXC9kEoN5OAJKFdk3+/LKAPO5kllWJBeC5LYcRjkq48eJFJX1OMSEBIKYMTv38VWsMwMaNw/zszIViDBgeOjGmZ9kA9haErChQnDNM9Wv05yFVBwBogVnjQpxIyPrO3xgDCIUT8Ptc4DgOC2c2QhQ4HDwxqp8HnLIAkLZQW4+CZHAcpy/8LgcLAAAuOKMDw8EY9nQPZZ5oDsQTsm0RGMPvc5msMAAYC8chJA+MyZf2Zl9Ji8EGR6N48e1jOG/5jLSDfaoZEgBiyuB0opeiqFodQAYXEBMP5h9nu886j2iyAIB0CwMAEpKadCOl744zBYGZN4bnObQadudRh1hDKJKA36e5QERBC5ZG47LuAvIas4AMY5FkrdeQnQUApBZ+JwsAAFYtbkO9VyyKG8juPGAjThZAY727IN96e0tpawGefP0gAOC6ixaU7BmlgASAmDI4pYEqQNYsoDQXUNICWDKnGSf6x3X/s937VYecfSvmdtDaB+NaZnQDReOS9VIAWhYQq5QFkge6SwoiMQkCz+lNxzhLEHg4GIMK2FoA7D7Gj07XnHfaDGzbOzDhlErNBeT8LL9NEHh0PJ53BhCjvdmHkVC8JC2uj/QGsXnXSXzsrNlV2/LBCRIAYsrgXAmsWQAed+4WAIsBLJnTDBVAz2BYX6yt97fr8mkZgON1HFIKYFw8jBaAnQuI4RJ4JGRNALxuQXcpWYPAdgfBGMnFBQRobiBJVvDmnt6M12XD7kB4I/U+FyIx2TSH0fFY3gFgRnuyHXQpMoH+89UDqPOK6Dq/els+OEECQOSNoqi4/z93Yv/x0UoPBS+8dRQvvXMMgHMvIC0InJsFwPr5jyd3nwtnNurXsApV6/uNbSMyBUhlmyCB0QIIGCwAp3TTUCQBv4MFYEzv5C11APpRkI32FgDL/hGztC2eO92P2QE/Nu2cmBsollDgzhIDALT+RgytD1ChAlCaWoDd3UPYdWgIXZ3z9SZ2kwkSACJvhoMxbN8/gH9/clelh4JHXtqHP764F0CmbqAsCJysA7BZXFMWgLYojSQXzOnT6vTFsSG5+FhjDJLNYe92mLOAzEFgIJWuCJhdQOyeiqJiPJJIO8xFEwDZJAAcz5ncUboF0DAxC4DjOFxwRge6TwZxbALHRsYSMrxZLABAO4oT0H4GwfFEwS4g9rMtZiBYa/mwH62N3qpv+eAECQCRN+x4u2yFT4XQMziOgdHC/kll1stHslYCp84EBrLEAJKL6MmhMESBR2OdC63Jbp2NyT711vcbBccuE8guCKy3gzZcZ3QBmSyA5LWhaAIqUv3yAW3nnpBkGwvA4gIKxlDvFR0zb3KJATDOWz4dAs8VbAWwNhfZXEAAEEkKQCiSgKKqBVsAfp8L9V6xqBbAm3t6caQ3hOsvWlj1LR+cIAEg8oZlKzo1P5sI3/71m7jj37fkdK0prVJVHbOAtBiA1ndG4DnbOgAmHmwRHQnF9aIpFjhlwde0GICpz38mF5AhS8hwJjBjVqAeCzo0l5NRZNgdWRWw32IBSLKqtYI2LO7WbqDDYzG0OOz+gdyygBiNdW6sXNyGv+8+mdY1NRcyNYJjMAEIJ4PNYwXWABgJFDEVNCEpePy1ydPywYmKCsA999yDO++8s5JDIAqA0y2Ayo7D2C44lpAz1AGkFlqv274jqDULCEj5y9nC72gBZIkBGDWBjZG9w+gC8rgE3PHJ1QCg5/VrN9A+6FXAdZYgMIsBGPLjed4SAxiLOvr/gfwsAAC44PQOjIUTeO/AYE7XG8l0HjDDb3EBFVoFbKS9xVe0YrCXtx3D4FgUN146OVo+OFExAdiyZQueeOKJSj2emADsD74YFaHZGBqLYsvukwCA//vyfvzLI+/q3zOmZo6FE1ljAIC267QLsLKF2Wtwo7AdM3O5sMWx0BgAYIgD2KSBAtrCDZizgNg92SHvaTEAORkDcFtdQKn7DgVjjhlAgCEInCUGwDh90TQ01rsLqglIHQbj/CzdAogxAdBiMoXGAABNAAZGowVZLUbGowk8y1o+zJ8cLR+cqIgAjIyM4L777sMtt9xSicdXLaFIoiR5yrk8N1OffCdkRcVIKJb1/cPBGBKSrO9gw9GEY5aLlQee3YNfP7MHQ2NRfHhkBLu7Uy2JjYeoB8fjGQ+EYQut05kA7L1uUdCvtVoAbOG33t+4oHT3BG2KwVJfM7eVapMGCqQsAmMQmMHOM7a6gOIJmxgAnyoEiyW0n71TDQC7j/FjNgSex/nLZ2DngUHT78GKoqgYTgbUGbEM5wEz/LoLSPs5jI1rfzsTsQACzT4oqqoHxAtlMrZ8cKIiAvDd734Xt99+OxobG7NfXEPc9tPX8aM/bKvIc7//27fyfl8sIeNrP3sDt/30dfzw4Xccrxsdj+Ff/u8O3PbT1wEA//snr+P//PvmvJ51YnDc5r5GCyBuCAKbd3iyoupWi5MAMAtAEFLpomzHfMqsJgDA3Bl+AJnTQO//y05s3zfgOA92MDyThHQLQHshZqoD0D4aG8ExXCKPSEyCrKh6BpN231QQeDhLCii7j/FjLnz0jA7IiqpbaHZs29uPb/5ys0kEcnEB+TwiOC51TvHoeAxukdd7HRVCexEygVjLh84Vk6vlgxNlPxT+scceQ0dHBzo7O/H444/n/f7WVn8JRlVcRqMyAoHC/jgO9wb19xZ6j0LoGQzn/DxvOH3Hd7Qv5Pj+0aisH1zCrglFElmfFwg0YFqzDzgyApfHZXodAGSkFh6VFyAK2uIgyYrp3pKiorHBg0CgAf56N1Rwac8eZG2WW+rgTbZXWDC7GYFAAwKBBiyYOw2z2/3444v7ILhE0/tPjJh3lPtPBrH2owv1r73e1K61ubkOrU0+NA5obQlaWurSxsJxZhdQQ3LsMrQmcLNmNqe+5/fqfvJAq1+/V4PfAxVAW5sfJ4a18S2cM83xZ16fdHM1N/py/jsIBBqwZG4z/r6nF//rqtPSuowCgLJ3AJKsYiicwJKFbQCAo0PaAjw9+bN1wu9zQeV4BAINiEkqWhq9aG8vfNPIJ11k4YRa8P/WH/57HzgO+Ny1pyOQLC4rBeX63y+7ADz33HPo7+/H+vXrMTo6inA4jB/+8Ie46667cnr/4GCoLL7niTA8Mo5+b+E7lf5+TQT6+4NFHFVuz80F60lN2d4/PDJue0225/X3B6EmfeZ9hpxz9r6TA6nXTvSOIZrcLcbiCvr6xvQFKRJNALKK/v4gOKgIheNpzx5MWhjBYBRichcuqKp+nZcHBgZCcIs8RkYjpvcPWKyT7R/2mb4fiaQEs68/CCUuYWREE4BRy70AzX8fMxZAjUXR3x9E3+A46r0u0/VSInWdHJf077Fn9vaN4eBRzW3GK7LjzzySzLaJx6S8/u7OPXU6Hv7bh3jrvRN6BpOR0WRK73t7+7Bouj/5M9B+b5HxmOOzAoEGeN0CBkfC6O8PondwHH6fOKH/CVVV4RZ5HDw6jP7+trzff6Q3iFfePorLz50LTnL+WU6UYv/v8zznuHEuuwA89NBD+uePP/44tm7dmvPiT9QezE0QsYkZRGISGuvdiCdkzQXEMmxUFbKiQhSS7pSEDLdbc214XIJ+MpYRtqkQOKMLKN1l4nHxGWMAgGZNjYZitimLbIyZYsUcx9m6qaxVwIDZZWOtA9DmlToKsiVDDID5pPJxAQHAuae245GX9mHTez22AsDme6Q3JdbxRPYYAADUeVMN4cbCcd2FUygcxyHQ4iu4FkBv+dA5+Vo+OEF1AERVw5qbRWPpQdFITMt9b6xzIxhOmDJwWMBWkhVIsqovNh6nLKDke3leOzjG4xL01sqm8djEEOyySj5MurwAczM3lgXE2kHbpRDyvDmzSDVkARkzgABz5a4xBmAs1hsai6GhzpVTsRITzVyp87pw5pIA3tzdi4TNcZnsd3KkL7WjzaUOANA6sTIBGA3FJ1QDwGhv9hXUD2j3ocnd8sGJigrA9ddfj40bN1ZyCEQB2LU8LhVsfRywydyIxmV4PSIa6l0YM2QBAdAXI7bb9BoFIEMWkMBrLSOmNaafnAUwAbCmgaa+ZgfPOB2gnosFYBUFvRAsErexAFKLqKkVBCvWU1XtIJgMRWBG7OacjQtO70A4JuFdm+A3K5IbGovprkMW38gW0K3ziggnD4IPRRITygBiBJICkE8Vu6KqeOzVyd3ywQmyAIi8sab1lRKWy25sO8AWa7MFEDe1WoglM4HYYs0aj2XLAuJ5Dh89vQMfO2u27Xg8LsHGBZR6LgcOS+Y04/0jI7bvT6WBJq+3WW+NB8NoF2sftLMAnF1AXgcX0PBYLGMGkJFCSppOndeCaY0e29YQxt/J4V7NCsjVBVTvFTEeTej1D8UQgOktPsQlRT9dLBf0lg9rJm/LBydIAIi8eW37ibTXMv0zTyRob7dTY5W3rAFaQ53bVAgGpM4FZvn0ugXgFrTjGy1jMloAHz29A5ecaS8Abhef0QWkqiqWzW1B71A4JZTGSmDmArJpBsewvsbOG4jE5HQXkEEA6uwEIE8LoJDfFM9zOH9FB3YfGkrLsTcKwJGkAMQSMkSB191UTtR5XIhEpaIUgTECelfQ3A6HSUiy1vJhuh/nnjZ5Wz44QQJA5I3dIpEpeDiRw0NsBSC5u4/GJXjdIhrrXQiFE5BkNVWtK7GiLe2jMQYAaItQz+A4jvVpwUnFYAFkwuMS0uoMJMPXKoBl85oBAB8eSXcDWeMFdhaAdQwqjG0gzIug0WdvdKmwe4SjCURiclYLgP2c06yPHLng9BlQAWzeZa4JUBTtdzKt0aMHgrMdBsPweUXEJQWDo5oAFMMCyLcW4KV3jmstHy6Z3C0fnCABIIqC0y7/rv/4O/704r6C72vnqmUCEIlJqEtaAIqqIhiO64ugUSSAVDaR3hI6IePbv34T3/3NVm38GXbkRtxiehA5YVnU57Y3wOcR8UFSAFRTJXAqUwmwd7mwNZgFwFU11fco3QIQ9GuNbRzYPQZHkxlA2QQgRwF0or2lDkvmNGPTez2mGJGsqOB5DnPbG0wWQKYiMEa9l3Vm1dJsiyEArU3a2cu5ZAKNRxP4ry3dWDEFWj44QQJA5I/NqhyOSYglZISjCQyMRvRd/8mhMHpz+Gc7cHwUvTZmuZ2wJCQZqqoiEpPh9Qh6k7bxqKQLwGDSFRGzWABOLaHZgpxt/fO4hbRmc8YYAKAtokvnNOMDmziAbJmPneCwRdhoVQXD6W0ggFQWkNeSscQa9g2MZj4HgHHGolYAwNz2wgstLzi9A33DEew7ljooSFZUCByHudP9ODkYRiwuI5blPGAGc2n1DGp/F8VwAQk8j9ZGb06ZQP+VbPlwwxRo+eBE2esAiMmPk5/43j9tw6GeVLrfb+68NOd73u3QSsLJBRSJSVBUFXVeEY2GzBjWEO3Xz+xB5/IZhsZjVheQedf+wtajAOzTMo14ssQAGMvmNmP7/gHNJ24TA0hZHOnPYGNwuwSMR7V5Bm06gQIpkfBZBIDdQxeALBbAectnYOXitrT75MPZywL444t7sem9HiyZ0wxAE3BB4DBvegNUAEf7Q4glMp8FwKjzpgTA5xGzpo3mSnuLL+umJBqX8NI7x3De8qnR8sEJsgBqjFJm8BgX/2Jhd+bA4FhM9yfPDvj1k7oAYFZA28EKlp46ugvI4Vzg7pPa2LO5ed2igHhCgaKqiMYlDI3Zd5dcOrcFAHQ3ECM9C8jGAuAsFoCaigE4BYF9FpeKUQA4AM055NBPZPEHtOM0P7KsHW990Kf/3GVF0VxAyUX0SG8wGQPIRQC0uZ4cGi/K7p8RyKEt9O5DQ0hICi48o6Noz61GSAAmMZKs4MePvIuDJ8Zyfg/LqGCUM6e/EOxCCyOhqJ5SOG96g+4CArQd+jmntqMtGexztgDsu5FmiwF4DDGGZ97oxv/327dsD5iZM92Peq+Y5gbSXUAZ0kCZC0iPASAVA6h3EgCrBZD8zx4cjaDJ7865zfNEueCMDsTiMt7+sA+ANl+R5zCt0YN6r5gSgBxiAMwFFInJRfH/M9qbfQjHJMeWJgCwfd8A6r0iFs9uKtpzqxESgEnMycEw9nQP46Hn3i/4HoUc62gVjXxK4/N9np1ASbKKwyeDaGnwoLHeDb/PpQdTeZ5LHpKiLfDWzpNMAOzOBQay58GzRTmWkBEMaznqdtk+PKfVA3xweNi2EjhjEFiPASQXyaQLqM4jpi3kLAbg6AIai2Y8B6DYnDK7Ce0tPr0mgAWBOU6zAg73hhBPyDm5c+oMB9wUVQCyHBCvKCp2HBjE6YtayyaclWJqz26KIyRTAKUJ5Nkb19dcrQHjZU31btNh5kZsWy7I+Y3VTjAkWcHh3iDmz9DcCjzP6b5xnufgcgl6+mcsLoNDauFmQhB1EoCsMYCkgCRk3fXDgpRWls1twcBoVM/EAQBJsaaB2rmAtI8mC8CmDxAAiLoFYHEBJW8yNBbLeA5AseE4rY7iw6Mj6BuJaDGApDkyb3oDjveHtGB9DgJQXyoB0FNB7X9v+4+PIhRJYNXi/BvGTTZIACYxzM8tT+CEI2OWjayoGBiJ5JW3r8J51/ylf30t7TXrOb5AZqtAUdIPAg9HJZwcDGOeITjH3EACz8Et8ohLMv74wl4c6hmD2y3oCy27zniWgHFMWWMAhiCyNf3TyrJ5WhzgwImx1O8qpzRQcwxASwONp/n/AYMF4LZkARkmUk4LAAA+umIGOABv7OyBLKv63OdO90OSVYyNx3OKAbhEQd+BFzUG0JzZAti+fwACz2HFgtaiPbNaIQGoEgrxxRsbfhX+3NTnsqzijl9uwbd+9ffM7zF8PjYez6t/jGwRHO25zgupqqYfU7jv2ChUAPNmpASgwWgBiDziCQUvbTuGXYeGTIuNzyPA7eIxEkrFQozWQF4WgJRZOGYF6uH3uSDJij6H9CMhbSqBWQwg+SwVmgvImgIKZMoCSn1eTgsA0ATntAXTsHlXDyRZMQhA6vfFurNmg7mBimkBuF0CWho8joHg7fsGsGxus8kFNVUhAagSCirB55gFUJxWC8w9YRccGwnF8MundmluHcvj8imQNI6VCUBCSh//D3+7FcPBGKIJOa1DJTtcxigAbIfILAAjxqpTjuPQ7PeYBMDoqspaB2AoJJNkRXdT2Pm0eU6rBwBSFbtM7JxOBGPvA1IuIJYFZOcC8rgFuEQ+rdUzX0ELANBqAgbHYth7bFTfqMyYVge3K9WWOxdYILjJXzwBADQrwK4auGdwHCeHwlh1SqCoz6tWSACqhQkk41iLi/J6rFEAMgjJX147gK3v92Hr+71p38unRN5kAcipls1WtrzXg217+xFPyLqf23iPJr/blNrIDm7nOS6tYZfHZd7Jtfg9GDGkw0bysADMLiAVswJ+tDV54XFohcHcQAKzAPRuoM6VxyyDx2XJAmrwpS+CHpeA73/uHFxgSVfkDEqWrQq4FJy5pA11HhGRmKTHqniew5xkoVmuAlCvWwDFnUN7s/25ADv2DwIAVi6e+u4fgASg6Dzw7B5s29uf9/sm4saRlfxjAJKs4Lm/Hza5PyTJ+T7MhSEpqqm1AZAKRueC0d0j6RaA+bkXrdQWs3hCq/gV+dSfKSv6mmcpzmGvCzyn7zIZHou7obnBgxFDN0jTAexZLQBzENglcPjM5Uux/sKFttcvm9sMIGUB6GcCZ/h1C5YYQDQuIy4pthYAAExvqUtzkxlF2c51VGpcoqA3TxMMY2FuoFwFwJcUgGLGAAAtE2h0PJ6WqLB9Xz/mtPvR1jSxw2cmCyQARSQSk7Bl18m88vInAltECrEAduwfwH++egB/finVp8eYodKfzOAYHI3i5FBYX1AkWUmzVs5copnLs9rqsz53yLDzZu4gqwXAdtlxSYGimgVmfvLUqfkzzALAisFYDMCINeOkxe/BsEMMIJs1w8QllowBiAKPFQtbsXKR/Y5xZls9GupcaQF79iO0czkxlwlb1EMObSAyYdDMijUxY1aJYBAnJty51AEAKRdQg4P4FQpLBTW2hAiG49h3fLQmsn8YUz/KUUa6TwahQhOCfJlIQVYhMQDmJtnTPaS/ZnQBffvXf8dNl52CP7ywN+1Z1iIqtlAdHzCfjWukPXkU38Y/bjM8L9mx02IB8Jzmx39q0yFwnNZvnrFkTjN2HhjEKbObTe9h2T08z8FtcQFZ/fPNfrfJ6ojGjC4gxykAMFsACVnR3VNOriOO43D6wlacGBjHIBfTxZpZW3bN1ziLBRB0aASXCeOiXykBmD+jAXOn+03Fa4tmNuZcmQwAbU0+BJq9Rc/HZwLQOxzB7KRbaueBQagqsOqU2hEAsgCKyKEebefvlGOeiULWf+aKKcQCSJ2YZXDJGBZFSVZtF3RJVtIW7GyctTSAG9akN9RiP6ewJe2U41KLdmujF5+/+jT9ews6GnHPLZ1YvsDcnVFPA+XSg8DWk6eaLQFTowuIy+IDMsYANBeQ9qxMXTQ/vXYJbv/ESggCp/+uBseicIu87a6ebQbYoqc3gstjF2xc9CvVxZjjOHzjptX47BXL9NdmBfy450udumssG+s+Oh/f3nB20cfGagGMFsD2/QNo9rtNyQVTHRKAIsIEoDALoIAHTiBwzBZxYx2AtUjJbh52AdtsTGuw38Gx+48nz31lu2uO43QXQWO9OdjrFnnbwrOGemMhmDULyGoBmAXAaNHkYwFIsqrPK1P2kNettawWeE631gZGo2ht8tpaDuxvgcUNxpIWQH4uIM7283Lj97nS0inbmnw5pw57XELR/f+A1meo3ivqB8MkJBm7Dg5h1eK2Kdn33wkSgCKSEoACLIACVvOJdPFhx/LZtSlgRKLpAlCItcFx9oeNh5kAJNNOm5OpfhxSC631gBKng2daG704Y1ErFs1qyu4CslgARqHL9s/P8xxEQesImpBSLqBcDlIReE4P2A+MRBwrqJ0sgIa63BfCanABVTvtLXV6KugHR0YQS8g15f4BSACKxkgohqGxGOq9ohYgzHOnXJgLqHDs3DjWPvdhGwtAltW84xUcZ79AWi0AtjM3WgDWxctJAESBx1dvXIkFHY3pQWC3NQhsXkhN58PmsFZ6XFqhmVbgxeljzoYg8LqADoxG0dZkn5/PfjNMAMajEjjOfORjNjjDj4DWf3tYXArQir/cLt4Ub6oFSACKBNv9M9+03eKZiVI25YzGJfzq6d2mVtDW1EvAHA8AiucC4sCZMkGs9x+PJsBznJ7Jw3EpC8Dqvsh09CQjvRDMLAAuUTD1mTFmJuXiLXEnD5bPNQbAYC6gcDSBcExyTDVkAmvMfqr3uvJy5ZgsgAq6gKqZQLMPg8l23tv3D2DFgtYpd+h7NkgAisShniB4jsNpyaPjxjO0mrWnEBMgt/ds3zeAN/f04us/f0P3d8dt2iFbX7MTMa0OID9ysQDqfaKpmIq5bay791z+Qa0iYZdyaKycNR5knstOPiUAxhhA7i6g/hHteY4WgJK6nt013zRIcgFlZ3qLD6oKbNvbj+FgrKbSPxkkAEXiUM8YZgXq9YVl3MZ/nolCinlzfYuxj0o4OS5bC0DK0QLI8OBrL1yQ/iJnXyxmjAHUe11w2QSBvZYmZ64c0gHTK4HTBcAYCDZaRrmslR4Xr4+dxQD4HP6TBIGHJKsYGNXcDjnFAJLjybeYyzgPWv/tYT//F946Cg7AGTVS/WuEBKAIqKqK7p4xLOhoRH3yFKP8LYB0Rsfj2HVwMMODc7uP0QWw84B2P9sYgMUCsAtmZ6s56Fw+I+01DpzeEtju/uPRBOp9ou66MQWBhQJcQFmygACzAIwZOoPmagGw368rDwtA5LU0UN0CaHawAJgLiOf0++YrAKYsIFIAW1gtwMETY1g0u8l0sFCtQAJQBPpGIhiPSlg4sxH1Pm3Hmk9LZcC+FcQ9f9yGf310x4RP7TK+/bXtJwCk6gCM5JLfLytKRt0xLjbnnNoOQNuB2s1BdwFFJM0C0IuqUm4bq+vILpvISloMwMYFZMwEMo4sl6XS4xL0329+QWAOsqxgYDQCn0fUNwtWUmmgqXnkkwEEUAwgF5rq3fpmYXWNZf8wSACKwKFk64f5MxoMFkCetQA2q+rJIS1H2S71MhiO6xWiVvYeHcGfXtyLE8lCLjtxycUCsGPr+30ZewYZFxu26+c42PbONwaB672iLgCKquoxAKsA5LLQWmsO7CwAp/bCudzf4xJ0V5q1UV0m3KKAuKRgYDSKgIP/HzAEgXlOd9/kHQMw/NzIALCH4zi9IKwW/f8AtYIoCgd7xuAWecwK1OuVpPlaAHY7ZJ7joKiqdq6qZQ37yv2bHO/1/JtHsH3/AP77nWO495ZOWwFI2Jxja80CcuLDZDtmO3gOuPdLnRAFHn959QAAzQVkt+CagsBel+5OkSQV3qQlZec6yoZ1EbcTAKuVkA9uF68LQC4xCUadV8TYeBwjIRkzptU5Xme2ADgAav4uIM74OSmAE3PatarfjtbsfaymImQBFIFDPWOYN6MBAs+D5zl43ULeFoCdW4WtfUqeEWLjgj8ajsOuWaidBRCzcQvlC8dzaGvyodnv0RdijtOqP79+0yrTtZGYBFlREIlJqPe59J17QlZSdQBF+Au1cwHlEktwvJ9L0H9f+fSoqfOICEclDI5GHQPAgDUGoL2WvwAYLQASACc2XL4Ed3zqzEoPo2KQAEwQSVZwpDeEBckulYB2OlP+FkD6a/qBL3kKgOnfXbV3AdnGAHJwAWXDtNtMfsoWoHbLoheJSfpO2ugCkmRF37UXoz7CzgKYiAAYK43zEQCfV8TgWBRxSXFMAQVSGWHGLKCJuIAIZ7xusSLtsqsFEoAJcrx/HAlJsQiAkHcaqJ0LiB3qke+Zv8Ydn6oCqo2AxCUlbRHM1QWUCTt3A3vF6nYJx2T951TvSwWBE1JKAAopPLNiLwCFF/wYzxdwibkvtHUeURfztgwWgLEQjLkU/TaHwWSCdv1ELpAATBBWAbxgpkEA3BW2AAz/+ypU+yBwQk7b+RTFAjC2ILB8Yu3JE41JCCUD2fVegwvIIE4TOe2MYT0QBqiMBWBsipYpCMxcfgLPpeoAyAIgSgAJwAQ51DMGv89l+of2ecS86wDs6mvZ/7CUwyJoXOSNu7/BsahtkVlcUkz95X0eIe82z3bYtSG29rdn16kAhoJaTrzVBaQfoTiB844ZdoHkicYAGPnGABiZTpxiv0pB4PW/gXzOAgBya2lBECQAE+RQzxjmdzSYFt1CXEB2UWCWAmkNAtsFhSMxCXuPjuCFrUdMFsB/PL0n7XpZUZCQFJNfWRT4IlkAzi4gUeD1OfmTWT6sH7vVBaQfop6MYJ+/YgbOX5FeZFYo+WTvWDEWmuUjJHXJFOGGOlfGE7HYZkDkOQAchGRiQT6QC4jIBRKACRCNSzg+MI6FBv8/AHiTQeB8zvm123s7xQCsJ3IBQCiSwMY/bsMjL+9PcydZxzEaimsuIENxkSjw6D4ZBJBqy2zkR184L5dpWGIAqSwgBlsw2SlRA6MpC8DoAmK7duYC+vzVp5kOhpko+eTvW5moBZDtvNmUBaBFAPx1rrwXdEr9JHKhIgLws5/9DF1dXejq6sK9995biSEUhcMng1BVmALAgBYDUFXNx50zDnUAQLof3FYADEVhxuZmQLrFMByKaRaAz2gBaG0Kmv1uLJnTnHb/XA9+t1t3jIsXWzB1AUhaAHVeMVUHICv6dRN1Af3ktgtsX3flcZC9FbMA5BEETsYAAg4tIBipNFAeHJe/+wcoTvosMfUp+5/J5s2bsWnTJjzxxBN48sknsXv3brz44ovlHkZRONSj7ZjTBMCjLRChPNxA+QSB7QQgGEnorgnW45xhtQCGx2KISwp8Bp80u2TVKQHbvvO5FmRxdjEAw/fZgulPukP6R6PweUQIPA8xmVGTkBRdcCaaBeTU32UiZ8wag9l5FYLlagEkf988p/08C0lTJAuAyIWyC0AgEMCdd94Jt9sNl8uFRYsW4cSJE+UeRlE41DOG1kZv2pF1bGF1CgSrqoqewXFT6qd1/R8ai+qZRJKs4Hh/SP9ezObM4fFIQq+2tbZx/uDwiOnrXYe0hnBGXzZzxaxa3JbWgRMoLKjI3mIUBSYkbFEbHI3qvfldhkIw3QIoQhaQHXbnE+SKyQLIw5XU5HfD5xGxcGZjxuvYlJkL0F9AkzLKAiJyoeytIE455RT98+7ubvz1r3/Fn//855zf39rqL8WwCuJwXwjLFkxDIGA+RDowTRujy+NK+x4A/O3vh/Gzx7bjK/+wGh87Zy4AIGrY6AYCDVj39af0r194+xje+aAP9311DV5795itsBwfiuhdJq1s2X3S9PX/7OgBAMyari1ES+e2QBA47Dk0hAvPmoO+sVjaPaZPb8Q5p83A1j3me51/RodpjsbP3Z5k0LPBq7/Ogp9tyVYIsqKiqcGDQKAB3npP8p4zcUrSDXXDx5bY/gyzcfqiNrx3YMDxvY3NmohefOZsLF/YipfeOoIPDg/n9KxgPPXLmjG90dSora3Jm/Eef/7BVfrO3omLVs/Cc5u7MaujCS2NXiyY1ZT3z8AonIX8/KqByTruYlCuuVesF9C+ffvwxS9+EXfccQfmz5+f8/sGB0N5t0YoBWPhOPqGwrh45Uz09wdN34tEtAV0YGg87XsDIxH8+qn3AABPvrofZ8xvBsdxGBoa16+xvue9AwMAgINHhvDXLd22zdie39Kdcbz/fPVpWHVKGz44MoxmvwcCz2F2ux8/++qFcIk8EpKCWELB6EgYizs0AWtr8uqWQXA0gss/MgvrPzoPzX4PxqMJeN0ivG7BNF7j54MjWjM7yLL+Olv2BIPN4xF5/fv3f+VC1HlENPk9+M2dl9r+PHLhf1+3AglJzvje+79yIXweAQLP44z5LUhISk7PCodSQjs6HEZ0XPt9//z2iyDwXEHjNXLdBfPxv644FcHRCL75yVVwiULe9zRalxMdTyUIBBom5biLQbHnzvOc48a5IqGid955B5/97Gfx9a9/Hdddd10lhjBhulkBWEe6UrNUR7uFeseBQcTiMj521mwc7g3qcYRMLZ9Z47ZQNIFYXC7ILTKjtQ4+j4iWBg8WdDRi7vQG8ByHOq8LLlFAndelH2Yzf0YjfnPnpaYDskWBA8dx6Gith88joq3JB7+hf48dzFIx7pCZb9/rFnU/tbEtst+X39GHTrhEXk+7dMLvc+kuKe363PZDbpMLyJj+K6YVuxWCwPO6W7HO0CY7HygNlMiFsgtAT08Pbr31Vvz4xz9GV1dXuR9fNA6eGAPHAfNm2AgAy2ax6cLGcu2v6pwHj0vAK+8eA2COAVjFgH01OGrv4jGyaJa9f7mQvHfjwlzIghLSBSB1H7bgCgKnB8vrJ1kvFo8r1ea6kG6lBFEtlP2v98EHH0QsFsPGjRuxfv16rF+/Pq8YQLVwqCeImW31tgFTUbcA0nfqLIOnsd6NzuXTsfX9PoQiCVMWkKoC0xo9ae/NRQCWzmmxfd3lyv9XbZcNlA92AsCygASO04Pl9TnuvKsFtsufSDEZQVQDZf/P+853voPvfOc75X5sUVFVFYd6xhwPkciUwsiasPEch4tXz8Kr20/gjfd6cOq81MLdNxKBz+MCYA7GDjgIgChwkJL58k6LaSGLVa4uESdYrKbB0MiMucd43igAk8sCYBXNE0klJYhqgP6CC2BgNIpQJGFqAGeEuQVsBSAh62mEc6c3YPGsJrz67nGTBXDXf/zdNtDdNxJJew0w94p3ajFQiB95ogLw9ZtW47oLF5jGxBZNgZ+8FgCgWQETqSYmiGqA/oILgHUAtbaAYKQsALsunIop//6SM2ehdziCPd1DpuuGg9G0itHhYMz03saka8XYKtipZ0xBAjBBF9Ccdj/WfXSB6TWjBcDuP9liAIBWQzGRamKCqAZIAArgUM8YREE7AtIOFgOwOwc3lpBN7YTPXtoOv8+FV949brouGpcxq60eM1vNRwcumtmkfz6tURMI4+EiHpeDC6gAAWhvcT62sFCYBcBxhiDwJLQAPC6BXEDEpIf+ggvg0IkxzJvud1wA+AwHucQTclo3yQvP6LD176sq0OQ3B4NPmZ0SgNbkwn/W0oAeaHWyAArJVmFpocVEMHT59E1mC0AkFxAx+Zl8W68KIysKunuDuOiMmY7XcBwHUeBMFoCiqjh8MoihYCzNT79m9Sw8/+aRtHYQdV4RHksswNh3qDVpAYyNxxFo9iEYTmRsMwwATfX5Leq3XrfCNtOpUPQun7I6aYPAgHbIjJ2LjyAmEyQAedIzEEY8oTgGgBmiwEOSVBw4Poqt7/fh7Q/7MBzUsnqWzzenarY3+7BiYSveO6j16PnEJYtx8UfmovvYELZ9OGC6dkayhcKyuc1YubgNL7x1FLMC9ZBkBQdPjCHQ5DVV8FrJd1d/1tJ2/fN8xcOOlAWgItDsQ51ncp7J2tbkK8r5CQRRSUgA8uSgXgGcWQDcIo8X3z6KF98+ClHgsGJBK/w+F472hWwbkV121mxdAOq8IuZMb8DJvjE0GBrNnXNqO9qavfj57RfBJfIQBR73f+VC+H0urFjYikvPmo16rwt3//O5+OKPXyvirDWK4RISDS2fL141C2cvDUzodK5K8U9XLrM7w4cgJhUkAHlyqGcMdR4R7S2ZW/r+4xXLcLg3iECzD6tPaUOd14UnXz+Io30h29jAGYtaMWNaHU4OhU3ZN8y3v3x+C25ZvwIA4POkFky2e+Y5TnelTOTA81Kjt8mQVS0TaBK6f4D0840JYjJCApAnh06MYUFHQ9Z+66uXBLB6ScD02px2rSHTicGw7XtaGjw4ORSGz5AVw/rZl6otcrlZPKsJL71zDDPbip9hRBBEfpAA5EE8IeNY/ziu6pxb0PvnTNf6BrFYgJUzlwTw/uFhzGxNpZeyFMmpEnA897TpWNDRUJIUU4Ig8oMEIA+O9IagqCoWzMjs/3fCmK9vx6VnzsJFKztMLhy978wk9JM7QYs/QVQHJAB5oAeAs2QAOcFzHD575TIEHISA4zjT4t9U70GT342uznm4eNWsgp5JEAThBAlAHnT3jKGlwYNmf+HZMBetdK4fsMKybj6+ZlHBzyMIgnCCBCAPDvaMZU3/JIhq4fs3n1NQG3CidiAByJFQJIG+4QguPKOj0kPJi7nT/ViSPF+XqC1mt1fP+dlEdVITAhBLyNh1cBATyaQ8MaCd2evUAbRa+X//6ZxKD4EgiCqlJgRgy+6T+P3zH074Pm6Rx/wcBKAYLROKQTlbLKxa3IZ6X038ORHElIFTM51GXoUMDoZsD0vJhKqqODkUzvt9Vvw+V1p3zlIRCDSgvz9Y8PsTkgyAm7TpoxOd/2SH5l+78y/23HmeQ2urvTuwJrZsHMeho9W+d/9UpZrbQRAEUR1Mzu0hQRAEMWFIAAiCIGoUEgCCIIgahQSAIAiiRiEBIAiCqFFIAAiCIGqUSZcGyvOZD2KZStTSXO2g+dP8a5Vizj3TvSZdIRhBEARRHMgFRBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQAFQJoVAIV199NY4dOwYA2Lx5M9atW4e1a9fivvvuq/DoSsvPfvYzdHV1oaurC/feey+A2pr/T3/6U1x11VXo6urCQw89BKC25s+45557cOeddwKorflv2LABXV1dWL9+PdavX48dO3aUb/4qUXG2b9+uXn311ery5cvVo0ePqpFIRF2zZo165MgRNZFIqDfffLP66quvVnqYJeGNN95Q/+Ef/kGNxWJqPB5XP/OZz6jPPPNMzcz/zTffVG+66SY1kUiokUhEveSSS9T333+/ZubP2Lx5s3ruueeq3/zmN2vq719RFPWCCy5QE4mE/lo5508WQBXw6KOP4nvf+x7a29sBADt37sS8efMwZ84ciKKIdevW4fnnn6/wKEtDIBDAnXfeCbfbDZfLhUWLFqG7u7tm5n/OOefg97//PURRxODgIGRZxtjYWM3MHwBGRkZw33334ZZbbgFQW3//Bw8eBADcfPPNuOaaa/CHP/yhrPMnAagC7r77bpx99tn61319fQgEAvrX7e3t6O3trcTQSs4pp5yCVatWAQC6u7vx17/+FRzH1cz8AcDlcuH+++9HV1cXOjs7a+r3DwDf/e53cfvtt6OxsRFAbf39j42NobOzEz//+c/x29/+Fo888ghOnDhRtvmTAFQhiqKA41ItXFVVNX09Fdm3bx9uvvlm3HHHHZgzZ07Nzf+2227Dli1b0NPTg+7u7pqZ/2OPPYaOjg50dnbqr9XS3//q1atx7733oqGhAdOmTcMNN9yA+++/v2zzn3TnAdQCM2bMQH9/v/51f3+/7h6airzzzju47bbbcNddd6Grqwtbt26tmfkfOHAA8Xgcp556Knw+H9auXYvnn38egiDo10zl+T/33HPo7+/H+vXrMTo6inA4jOPHj9fM/N9++20kEgldAFVVxaxZs8r2908WQBWycuVKHDp0CIcPH4Ysy3j22Wdx0UUXVXpYJaGnpwe33norfvzjH6OrqwtAbc3/2LFj+M53voN4PI54PI6XXnoJN910U83M/6GHHsKzzz6Lp556CrfddhsuvfRSPPDAAzUz/2AwiHvvvRexWAyhUAhPPPEEvva1r5Vt/mQBVCEejwcbN27El7/8ZcRiMaxZswZXXHFFpYdVEh588EHEYjFs3LhRf+2mm26qmfmvWbMGO3fuxLXXXgtBELB27Vp0dXVh2rRpNTF/O2rp7/+SSy7Bjh07cO2110JRFHzqU5/C6tWryzZ/OhGMIAiiRiEXEEEQRI1CAkAQBFGjkAAQBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQNcnNN9+MoaGhCV/z5ptv4uqrr876vKVLl9re66WXXsIPfvADAFpb4Oeffx7Hjh3D6tWrs96TICYKFYIRNckbb7xRlGsmymWXXYbLLrus5M8hCDvIAiBqjm9961sAgH/8x3/E1q1bsWHDBqxbtw7XXHMNnnzyybRrenp68Morr+Cmm27C9ddfj4svvhg/+clP8n7uT37yE1x33XVYv349XnnlFQDA448/ji9+8YtFmRdB5AtZAETN8aMf/QiPP/44fve73+ETn/gE7rjjDqxduxa9vb248cYbMW/ePNM1LS0tuOOOO7Bx40bMnz8fvb29uOSSS/CZz3wmr+fOnj0b3//+97F3715s2LABf/3rX0s0Q4LIDRIAomY5cOAAYrEY1q5dCwCYPn061q5di9dff93kg+c4Dr/85S/x6quv4tlnn8WBAwegqioikUhez/vkJz8JAFiyZAkWLVqEd999t3iTIYgCIBcQUbNwHJfWZ11VVUiSZHotHA7juuuuw+7du3HaaafhjjvugCiKyLeNFs+n/t0URYEo0v6LqCwkAERNIggCZs2aBVEU8cILLwAAent78be//Q3nn3++fo0kSTh8+DBCoRC++tWv4tJLL8Wbb76JeDwORVHyeuYTTzwBANi9ezeOHDmClStXFndSBJEntAUhapIrrrgCn/3sZ/GLX/wCP/jBD/Bv//ZvkGUZt956K8477zz9mg0bNuCnP/0pLr74Ylx55ZVwu91YsmQJFi9ejMOHD8Ptduf8zKNHj+Laa68Fx3H413/9VzQ3N5dodgSRG9QOmiAIokYhC4AgisADDzyAZ555xvZ7n/vc53DNNdeUeUQEkR2yAAiCIGoUCgITBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQBEHUKP8/4tJAb6qNkeYAAAAASUVORK5CYII=\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"sns.lineplot(data=modin_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 34,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='tip'>\"\n      ]\n     },\n     \"execution_count\": 34,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABOIElEQVR4nO29eZhcZZ33/T1Lbd3VW7qrk86+kQQSSAIINAJh8Q1LEwIIDvoYx0FH8eIRRX0Q0Uvf10s0cDmDMuroCKLiwgMj+yDCsA0hkQAhCUmArJ210/tS1bWe5f3j1H3qnFPn1Na1ddfv8093V5865757ub/3b705VVVVEARBEDUHX+kBEARBEJWBBIAgCKJGIQEgCIKoUUgACIIgahQSAIIgiBqFBIAgCKJGIQEgCIKoUcRKDyBfhofHoShTv3ShtdWPwcFQpYdRMWj+NP9anX+x587zHFpa6m2/N+kEQFHUmhAAADUzTydo/jT/WqVccycXEEEQRI1CAkAQBFGjkAAQBEHUKCUVgFAohKuvvhrHjh0DAGzevBnr1q3D2rVrcd9995Xy0QRBEEQWSiYAO3bswCc/+Ul0d3cDAKLRKO666y784he/wHPPPYddu3bhtddeK9XjCYIgiCyUTAAeffRRfO9730N7ezsAYOfOnZg3bx7mzJkDURSxbt06PP/886V6PEEQxJShVF37SyYAd999N84++2z9676+PgQCAf3r9vZ29Pb2lurxBEEQU4Luk2P45i+3oGdwvOj3LlsdgKIo4DhO/1pVVdPXudLa6i/msKqaQKCh0kOoKDR/mn+twuYeiUl44IE3oQJYOK8Vfp+rqM8pmwDMmDED/f39+tf9/f26eygfBgdDNVEgEgg0oL8/WOlhVAyaP82/VudvnPtvnnsfPQPj+D+fXI1IKIpIKJr3/Xiec9w4ly0NdOXKlTh06BAOHz4MWZbx7LPP4qKLLirX4wmCICYVW9/vxaadPbiqcx6WzWspyTPKZgF4PB5s3LgRX/7ylxGLxbBmzRpcccUV5Xo8QRDEpGFgNILfPf8hFs5sxPoLFpTsOSUXgJdffln/vLOzE08//XSpH0kQBDFpkWUF//HMHqiqii9csxyiUDpHDVUCEwRBVBGPvrQP+4+NYsPapWhv9pX0WSQABEEQVcK+YyN45IUPcN7y6ehcMaPkzyMBIAiCqALC0QT+4+k9CLTUYcPapWV5JgkAQRBEhVFVFb//24cYDsbwjU+fBZ+nPPk5JAAEQRAVZvOuk9j6fh/WX7gAy+ZNK9tzSQAIgiAqSO9wGH94YS+WzmlG13nzyvpsEgCCIIgKIckKfvXUbogCh39edxp4Pv/2OBOBBIAgCKJCPPH6QXSfDOIfr1iGaY3esj+fBIAgCKIC7OkewvN/P4KLVs7E2cvy74tWDEgACIIgykwwHMcDz+7BjNY6fPKyUyo2DhIAgiCIMqKqKh567gOEIgl8Yd1yeNxCxcZCAkAQBFFGXn33OLbvH8ANaxZh3ozKnnlAAkAQBFEmjveH8MjL+7Fi4TR87CNzKj0cEgCCIIhykJBk/Orp3fC5BXyu6zTwBZyIWGxIAAiCIMrAo68cwLH+cdzcdRqa6t2VHg4AEgCCIIiSs2P/AF565xj+n7Pn4IxFrZUejg4JAEEQRIn5/d8+xOyAHzdcvLDSQzFBAkAQBFFiRkIxrD6lDS6xcimfdpAAEARBlBhVBaog5psGCQBBEEQJUVUVAMBVoQKQABAEQZSQ5PpPFgBBEEStoZAFQBAEUZswC6DMrf5zggSAIAiihFAMgCAIokahGABBEESNoscAUH0KQAJAEARRQigGQBAEUaOooBgAQRBETUIxAIIgiBqF6gAIgiBqFIoBEARB1ChUB0AQBFGjUAyAIAiiRiELgCAIokZJBYErPBAbKiIATz31FLq6utDV1YV77rmnEkMgCIIoD3oQuPoUoOwCEIlEcPfdd+Phhx/GU089hbfffhubN28u9zAIgiDKgpL8WIXrf/kFQJZlKIqCSCQCSZIgSRI8Hk+5h0EQBFEWqjkGIJb7gX6/H1/5yldw5ZVXwufz4SMf+QjOPPPMcg+DIAiiLFRzFlDZBeCDDz7AX/7yF7zyyitoaGjAN77xDTz44IP4/Oc/n9P7W1v9JR5h9RAINFR6CBWF5k/znwpEkz6g5sa6nOdUrrmXXQA2bdqEzs5OtLa2AgCuv/56/OlPf8pZAAYHQ1AUtZRDrAoCgQb09wcrPYyKQfOn+U+V+Q8OjQMAgqFoTnMq9tx5nnPcOJc9BrBs2TJs3rwZ4XAYqqri5Zdfxumnn17uYRAEQZQFPQZQ4XHYUXYL4IILLsCePXtw/fXXw+Vy4fTTT8cXvvCFcg+DIAiiLFAMwMIXvvAFWvQJgqgJqjkLiCqBCYIgSkg1WwAkAARBECWEzgMgCIKoUeg8AIIgCAee2dyN/377aKWHUTIoBkAQBOHAW+/34a0P+io9jJJRzTGAimQBEQRBMOIJGbKiZL9wklLNMQASAIIgKkosIUOJT93qfuYCqkZ3CwkAQRAVJZqQkUgoUFS1KnvmT5SUC6j65laNokQQRI2gqiricRmKqiISkyo9nJKg0olgBEEQ6SQkhR2YhVAkUdGxlIrUgTDVpwAkAARBVIxYQtY/D4WnpgDoMQASAIIgiBQmAZiiFkA1p4GSABAEUTFi8VoQgOpNAyUBIAiiYsQSqfz/4BR1ASlkARAEQaRjdAGNR6emAFAMgCAIwgajAExVC4BiAARBEDbEkwIg8BzGKQZQdkgACIKoGNFkELilwYPglBUA7WMVrv8kAARBVA7mAmpt9E5ZC6Cam8GRABAEUTGYC6i1yTv1LYDKDsMWEgCCICpGLCGD5zg0+z0YjyR0f/lUQgX1AiIIgkgjFlfgcfPw+1yQFRWRmJz9TZOM1JGQ1acAJAAEUcMEw3Fs2XWyYs+PJSS4XQIa6lwAgFAkXrGxlApFIQuAIIgqZNPOHvz62T0Va8MQSyjwuATU+5gATL2W0EwA+Co8FZ4EgCBqmIGxKIBUMLbcxOIyvC4BDb6pawHISR+QwFffclt9IyIIomwMj8UAAAm5MmfyxhIy3G4Bfl0Apl4mELMABLIACKL8qKqKhFS54KKiqpAqtMBmYyhpAUhSZcYXT8jwuAT4WQxgCraDkGVyARFExXh523F88cevYTgYq8jzX9t+Anf+aktFnp2NoWBlLYBoUgB8HhEcB4SmYEM4mSwAgqgc2/b2AwB6Bscr8vzhYBRDYzG9IrRaiCVk3eWSqJAFEIvL8Lh48BwHv881JS0ARSUBIIiK4XEJAMyHj5QTtu4zV0C1YLSIKiUA8YQMj1sEAE0ApmAMQE5aV+QCIogK4HFrAhCtUKYLQ1aqKw7A/P8AKhaj0NJAtWVoygoAuYAIonLoFkDFBaC6LIChscpaAIqq6kFgQBOAqdgPSFFV8BxHzeAIohJ43ZV1ATGqzQU0FExZAJUQgERCgQqYBGBKWgCyWpXuH4AEgKgBKh0DYJAFYIZZZG4mAHVaEHiqNYSTFbUq3T8ACQBRA1Q6BpAKAldZDCAYRUuDB0BlYgBMAJiFxhrCRSss1MVGIQEw8/LLL+P666/HlVdeiR/84AeVGAJRQ7AujGQBmBkei2F6iw9AZS0AowsImHrVwLJKLiCdo0eP4nvf+x5+8Ytf4Omnn8aePXvw2muvlXsYRA3BXAqV3llKVSYAQ8Eo2lvqAFSmECzNBTRVBUCuXgtALPcDX3zxRVx11VWYMWMGAOC+++6Dx+Mp9zCIGoIV4lQqC4gdCFIOF5CiaG0n2KLqRCQmIRKTK2sBxJkFoO1DG3xuAFNPABRFhSBUpwCU3QI4fPgwZFnGLbfcgvXr1+NPf/oTmpqayj0MooZgPvhYvLKthsvhAnrhraP4zgNvZr2O1QBMa/RCFPjKuoDcqSAwMPUEQFbUqjwMBqiABSDLMt5++208/PDDqKurw5e+9CU88cQTuP7663N6f2urv8QjrB4CgYZKD6GiFGv+dXXazlJWK/MzrUvubBsafKbnv39oCPM6GlDnddm+r5CxjkYSGBiNoqm5LqMVcGQwDABYOLcFHhcPl1ss+8/Gc3QUANAxvRGBQAM8dUlPQLJt8lT5+3e5BLhdQl7zKdfcyy4AbW1t6OzsxLRp0wAAH/vYx7Bz586cBWBwMKS3V53KBAIN6O8PVnoYFaOY8w+GtHTHUDhekZ9pOKz1uB8cCqHfry32kqzgW7/YhPUXLMDV589Pe0+h8x8ejQAADh0ZwrRGr+N1h44OAwB4WYHAcxgLRsv+sxkY0nozjQej6Oc0VwnHAT39IQCYMn//4eQZB7nOp9j/+zzPOW6cy+4CuuSSS7Bp0yaMjY1BlmW8/vrrWL58ebmHQdQQuQaBt+3tx+//9mHxn5/8aAwCK4oKWVHROxQu6rPYmbrBLE3VhsZi4DigucENl1ghF1Dc7ALieQ71XhfGyQVUNsouACtXrsTnP/95fOpTn8JVV12FmTNn4uMf/3i5h0HUEGzdzSYAu7uHSno+rrESmMUl+kejDlcXRiSmxTmCWU7WGgpG0ez3QOB5iKJQkSygqCUNFJia7SCquRCs7C4gALjhhhtwww03VOLRRA3CLIBITIKqqo49WRRF602T6ZqJYGwGxzKT+kciRX1GJBnozsUCmJYsAnNVKAgcT8gQeA6ikNqH+n1TzwKgLCCCqCB6Ja6S+WQuWdESNou+GGZoBz0SjBX1eboFkE0AgjG0JGMELpGvTB1AXE4LVPt9rqxjn2zIyhQoBBsdHUUoFCrlWAiiJBh7yzAfuR0suSBeot2wMQ2UjUmFuS3zREnFAJxdQKqqYngsarAAuIocCRlLyHobCIa/zoXxKXYqmKyoECZrDODgwYP4+Mc/js7OTpx77rn49Kc/jRMnTpRjbARRFIy9xSIZagF0AShywZheCGZwARltgf5RezeQoqrYe3Qk5+coqopoDhbAeFRCXFL0LKGKBYETzhbAVGoIN6ldQN/61rdw4403YseOHXj33Xdx+eWX49vf/nY5xkYQRcF4FGOmfkBsh16qimG7IDAA9I/YWwAfHB7Gxj9uw/H+3CzvWFzWhSWTBaAXgTELQBQqlgXEqoAZDT4XJFmpeN+mYjKpXUCRSAQ33XQTXC4X3G43NmzYgIGBgXKMjSCKgnGxjSecF7qUBVDcxdAYg0i9lvp8wCEQHI5qu/lYjuNh/n8gczUtawPNLACxUjEAw2EwjPpkP6Cx8cxZTJMJWVEmrwto4cKF2LZtm/713r17MXv27JIOiiCKiXGxzbS7L7UFYAxAmywAh1RQtitXkZs7JJLcNfMcl9EFxA6CmdZY2Swg7ThIswA0MAHIYMFMNjQXUHXm22RNAz1x4gQ2bNiApUuXQhRF7NmzB4FAAOvWrQMAPPPMMyUfJEFMBCVHAWDXxaUSuYCMFoDhdScLgO3Kc3WHMwugtcmTxQUUg8BzaKzXWlS4RK4iFkA8IesixDBaAE0eX9nHVAqq2QWUVQC+8Y1vlGMcBFEyjAtoLhZAsV1A1vtrY0odFO5UC6DvynMUABYAbm/2YXf3sOZ64NN3nuwgGFad6hKEimUBpVkAdQYX0LSpIQDVfCCMowAcOHAAixYtQn19ve33qX0DMVnI1QWklDwInO4Cam30om8kgkhMgs9j/nfM1wUUTgpAoKUO6B5GKCKhKbnLN2IsAgMqVwcQjacLADsTIDilYgDV2wrCUQDuvfde/OpXv8KNN96Ijo4Ocy51JIItW7aUZYAEMVEUFXC7eMQTmbNL5FKlgWYIAgdafOgbiaB/JIK5080dIPN1AbFWF+3N2s45GI47CEAUi2elWrCLyTTQUlVAOxG3CwJ7XeAw1YLA1ZsG6igA99xzD0ZGRrBo0SI8/PDD+h9HIpHApz/96XKOkSAmhKqq8LpFxBPxnCyAcriAGO3NPuwGMDAaTRcAZgHkqAAsa6g9echLyCYQrKgqhoMx/SxgQLMAVFUbn1imhUpRVcQlBW5LGijPc6jzihljGJONSekC+vrXv4433ngDHMehs7NTf10QBFx++eVlGRxBFANVBUSBgyhwucUAihwETp0IZugGyiyA5G7dLg6Q70Ht0bgEDkBbk5beaddULTgeh6yoplbRrmSGSjwh48W3j+LiVbPS3FHFJq4fCJ/+HL/PNeUsgEkXBH7wwQcBaIVgP/rRj8o2IIIoNqqqggMHj0tAPJ69DsBOJFRVxeP/cxDnr5iBjlb7uFg2JEMlMHPr1/tE+DwCBpLFYGPjcQgChwCMFkBu9w/HJHg9gu72sdtFDwWTNQAWCwAAdh0awmOvHMD7h4fxtU+symNm+cNqG6yFYIDWDmKqxQCq1QLImpxKi3/+DCf/yYjqQFEBjtMOHy80C2g8KuG/thzGtr39BY/D6AJiT+A5DoEmn94O4vZ/24Qv/+R1AMYgcG5EYzK8blFPpbSrBTAeBclgAsDYdXAo5+rjQmHHc9qdWub3Ti0LoJpdQNVZnTDJGR0nAagmVGhZGJ4sAqDXAdhcw4LHuVblWgYAwNINNPksjgPamn0YSBaDsSuMXUJzjQFEYhLqPCJEgUedx96PzqqAWwz598wFZKwk/uVTu4seDDeSsgBsBKDONaUKwWRFmXwuIIKYbKiqivGopKcSpl7XFtpsApCqBNYWp0hMgkvkIQq8/j6nRTEhyZAV1danrd/fJg2U4zi0NXmx6+CgaaE/OTSuxwCsy38sIQNq6iQtRiSuuYAALZ/e1gIIRuESeb3iFkhZACyN9JqPzsfTb3TjkZf34zOXL3Wcz0SwHghvJFMMQFFUHB8Yz6tZnMctYHpLXWEDLQKaC6g699okAMSUYceBQfz7k7vwL7d+1CQCLIPN4xYy7mqVpI+eXXPrff+D0xe24vZPrNQXLCcBefSVAzjSG8S3Pn1W2vfYUmVyATELAFogOC4ppkVPUVTHQrBfP7MH8YSMr/3DKtPrkZiku3/8dS7bfkBDY1oGkDHdU7RYAGcuCSAuKXj+zSM499R2LJ3bYjvniaALgJ0F4HMhnpBtC8VeeOsoHn1lf97P+95nP4J5M8p/yPzQWBSqCrhFEgCCmBDW9EUrIyHNbRIMx00CwA4b97gEhDIclagHgQ1ZQO8dHNRei2e2AIaDMYyGMrstJJs0UI7jEGjW/PHGnkCKoqbqACwK0DsUxsmhsNZN07CDjsRktDVpWUUiz9umnQ4Fo6YAMJCyACJRWf/62gsWYMvuk3hq0yHc8aniC0A87iwADXVaEHs8kkj7fu9wGPVeEf901ak5PScYjuN3z3+Io32higjAE68fhChwOG/59LI/OxdIAIhJw+h4ZgFgC7i1sZmqIhkD4DEwmlsQ2OpiiCYyxwAkWTH1HLK9v8EFpOguIOiLtjEVVFHVVHsGy22DkQRkRcW+4yNYsaBVf91YTczz9v19hsZiOHWeeUFPuYA0i8El8HC7BFx13jz8+b/34YPDw1g2r7giEM3gAqr3poLYxmA1oGVJtTR4ceaSQE7PkWQFD/9tL/qKfPRmLhztC2Hzeydx+Tlz9d9xtVGddglBFEAqj9+88CnMBeTK5gJKBYEly/GN2SyAhJRBAGwqgVNBYE7P2zc2hTNaAOa3qXqB1weHR0yPicQl+JIxAI4DVIsFICsKRkKxtAZsugAkC8nY12tWzkST342nNh2yn9cEyOQCYv2AQjYng42Ox9FU70p73QlR4NHa5Cn62cu58Nir++HziLiqc17Zn50rJADElMHpRC/NAgDcbiFjFo+xHbS1CCtbDECWFVuXi9392ZgALQbgdglo8rstLiCjJWM80lLSheaDI8P665KsIJ5QUhYAx6UJ0mgoDlUFpjWYd9UsC4gFgcWkALhdArrOm4cPj47g/cPDKCYpF1D6EsTiGHaVzKOhOBrrna1AO9qbfegbDhcwysLZ0z2EXQeHcPX589OSEqoJEgBiyuB0pq9qsABySwNV0twnKQvAXkASspq249afr1cCpx8JyWKxgSaf2QJQUwfYG9dxVt3b2uhFd09QD9yyPkA+d8oFZB1O6iAY8wIqWrKAXIbe9WtWpayAYh7TyH4PdnUALEPJGsRWVVWzAPzp/Y0yEWipQ99w+SwARVXx2CsH0NrowWVnzSrbcwuBBICYMrAFPC0GgFQaaEJSdKGwYmwFYW2PnM0C0GIAmcdnfyKYpgBtzV7T0ZDGLCDjbdmu+CPL2k1nBjMhMFoAVkHSD4JxsAAiFgsA0I6L7DpvHvYeHcEHRbQCYgkl2Z7DzgLQ5mAVgEhMgiQrtg3uMtHe7MN4VCrbYfNb9/TicG8Q11+0CC4xXeCqCRIAYsrg1M3TGAMAnBdxYzO4NAuA1QE49AmSZGdhsUsDZes/qw9qa/LpC7R2rWJbCMYsgJWLWyEKnO6aSQlAKgZgdQE5WQB6FlBMgihwaa2L16yaiWa/G89s7radXyHEbFpBMwSeh88j6jEJxmgyTbYxXwFINscrhxWQkBQ8/j8HMbfdj3OrNPPHCAkAMWVwdgFpCy3zNzsFco0CYrQAZCXVRtqpnXRuWUDGE8EMQQAAgWavydVjigEYXmcWwLRGLxbPatLjAEwAvJ5MLqAoPG4hrdFbSgDktLYQ2vcFXHbWbHxwZAS9Q8XxpccSsq37h1Hvc+lZSQxWJ1GIBQDYN9wrNq9sO4aB0ShuvGRx1Z4BYIQEgJgysAU8kRYE1iwAdwYLQFFVqGqyNTJS/nAAON4/nkMaqOpoAejXKPaVwIAWAzCNR1FtK4GZW8Tvc2HZ3BYc7Q0hFEno5wHXGYPAlvEMB7WDYKw9/42Lvp1LBgDOX9EBjgM2vdeTcY65YlfkZcTvczlaAPkKAOu42ltiC2A8msAzm7uxfME0LF8wraTPKhYkAMSUIZMFwHGA180EIH0RZ+/1Ja8Zj6QWn4M9Y7rV4OTqySkNVE53AbGluK3Z7JeXVdW2G2gwEococPC6BSyb1wIVwIdHRlIWQHL8mgWQHgOw5tUDMDUqs7MAAKClwYMVC1qxedfJrEKXC9kEoN5OAJKFdk3+/LKAPO5kllWJBeC5LYcRjkq48eJFJX1OMSEBIKYMTv38VWsMwMaNw/zszIViDBgeOjGmZ9kA9haErChQnDNM9Wv05yFVBwBogVnjQpxIyPrO3xgDCIUT8Ptc4DgOC2c2QhQ4HDwxqp8HnLIAkLZQW4+CZHAcpy/8LgcLAAAuOKMDw8EY9nQPZZ5oDsQTsm0RGMPvc5msMAAYC8chJA+MyZf2Zl9Ji8EGR6N48e1jOG/5jLSDfaoZEgBiyuB0opeiqFodQAYXEBMP5h9nu886j2iyAIB0CwMAEpKadCOl744zBYGZN4bnObQadudRh1hDKJKA36e5QERBC5ZG47LuAvIas4AMY5FkrdeQnQUApBZ+JwsAAFYtbkO9VyyKG8juPGAjThZAY727IN96e0tpawGefP0gAOC6ixaU7BmlgASAmDI4pYEqQNYsoDQXUNICWDKnGSf6x3X/s937VYecfSvmdtDaB+NaZnQDReOS9VIAWhYQq5QFkge6SwoiMQkCz+lNxzhLEHg4GIMK2FoA7D7Gj07XnHfaDGzbOzDhlErNBeT8LL9NEHh0PJ53BhCjvdmHkVC8JC2uj/QGsXnXSXzsrNlV2/LBCRIAYsrgXAmsWQAed+4WAIsBLJnTDBVAz2BYX6yt97fr8mkZgON1HFIKYFw8jBaAnQuI4RJ4JGRNALxuQXcpWYPAdgfBGMnFBQRobiBJVvDmnt6M12XD7kB4I/U+FyIx2TSH0fFY3gFgRnuyHXQpMoH+89UDqPOK6Dq/els+OEECQOSNoqi4/z93Yv/x0UoPBS+8dRQvvXMMgHMvIC0InJsFwPr5jyd3nwtnNurXsApV6/uNbSMyBUhlmyCB0QIIGCwAp3TTUCQBv4MFYEzv5C11APpRkI32FgDL/hGztC2eO92P2QE/Nu2cmBsollDgzhIDALT+RgytD1ChAlCaWoDd3UPYdWgIXZ3z9SZ2kwkSACJvhoMxbN8/gH9/clelh4JHXtqHP764F0CmbqAsCJysA7BZXFMWgLYojSQXzOnT6vTFsSG5+FhjDJLNYe92mLOAzEFgIJWuCJhdQOyeiqJiPJJIO8xFEwDZJAAcz5ncUboF0DAxC4DjOFxwRge6TwZxbALHRsYSMrxZLABAO4oT0H4GwfFEwS4g9rMtZiBYa/mwH62N3qpv+eAECQCRN+x4u2yFT4XQMziOgdHC/kll1stHslYCp84EBrLEAJKL6MmhMESBR2OdC63Jbp2NyT711vcbBccuE8guCKy3gzZcZ3QBmSyA5LWhaAIqUv3yAW3nnpBkGwvA4gIKxlDvFR0zb3KJATDOWz4dAs8VbAWwNhfZXEAAEEkKQCiSgKKqBVsAfp8L9V6xqBbAm3t6caQ3hOsvWlj1LR+cIAEg8oZlKzo1P5sI3/71m7jj37fkdK0prVJVHbOAtBiA1ndG4DnbOgAmHmwRHQnF9aIpFjhlwde0GICpz38mF5AhS8hwJjBjVqAeCzo0l5NRZNgdWRWw32IBSLKqtYI2LO7WbqDDYzG0OOz+gdyygBiNdW6sXNyGv+8+mdY1NRcyNYJjMAEIJ4PNYwXWABgJFDEVNCEpePy1ydPywYmKCsA999yDO++8s5JDIAqA0y2Ayo7D2C44lpAz1AGkFlqv274jqDULCEj5y9nC72gBZIkBGDWBjZG9w+gC8rgE3PHJ1QCg5/VrN9A+6FXAdZYgMIsBGPLjed4SAxiLOvr/gfwsAAC44PQOjIUTeO/AYE7XG8l0HjDDb3EBFVoFbKS9xVe0YrCXtx3D4FgUN146OVo+OFExAdiyZQueeOKJSj2emADsD74YFaHZGBqLYsvukwCA//vyfvzLI+/q3zOmZo6FE1ljAIC267QLsLKF2Wtwo7AdM3O5sMWx0BgAYIgD2KSBAtrCDZizgNg92SHvaTEAORkDcFtdQKn7DgVjjhlAgCEInCUGwDh90TQ01rsLqglIHQbj/CzdAogxAdBiMoXGAABNAAZGowVZLUbGowk8y1o+zJ8cLR+cqIgAjIyM4L777sMtt9xSicdXLaFIoiR5yrk8N1OffCdkRcVIKJb1/cPBGBKSrO9gw9GEY5aLlQee3YNfP7MHQ2NRfHhkBLu7Uy2JjYeoB8fjGQ+EYQut05kA7L1uUdCvtVoAbOG33t+4oHT3BG2KwVJfM7eVapMGCqQsAmMQmMHOM7a6gOIJmxgAnyoEiyW0n71TDQC7j/FjNgSex/nLZ2DngUHT78GKoqgYTgbUGbEM5wEz/LoLSPs5jI1rfzsTsQACzT4oqqoHxAtlMrZ8cKIiAvDd734Xt99+OxobG7NfXEPc9tPX8aM/bKvIc7//27fyfl8sIeNrP3sDt/30dfzw4Xccrxsdj+Ff/u8O3PbT1wEA//snr+P//PvmvJ51YnDc5r5GCyBuCAKbd3iyoupWi5MAMAtAEFLpomzHfMqsJgDA3Bl+AJnTQO//y05s3zfgOA92MDyThHQLQHshZqoD0D4aG8ExXCKPSEyCrKh6BpN231QQeDhLCii7j/FjLnz0jA7IiqpbaHZs29uPb/5ys0kEcnEB+TwiOC51TvHoeAxukdd7HRVCexEygVjLh84Vk6vlgxNlPxT+scceQ0dHBzo7O/H444/n/f7WVn8JRlVcRqMyAoHC/jgO9wb19xZ6j0LoGQzn/DxvOH3Hd7Qv5Pj+0aisH1zCrglFElmfFwg0YFqzDzgyApfHZXodAGSkFh6VFyAK2uIgyYrp3pKiorHBg0CgAf56N1Rwac8eZG2WW+rgTbZXWDC7GYFAAwKBBiyYOw2z2/3444v7ILhE0/tPjJh3lPtPBrH2owv1r73e1K61ubkOrU0+NA5obQlaWurSxsJxZhdQQ3LsMrQmcLNmNqe+5/fqfvJAq1+/V4PfAxVAW5sfJ4a18S2cM83xZ16fdHM1N/py/jsIBBqwZG4z/r6nF//rqtPSuowCgLJ3AJKsYiicwJKFbQCAo0PaAjw9+bN1wu9zQeV4BAINiEkqWhq9aG8vfNPIJ11k4YRa8P/WH/57HzgO+Ny1pyOQLC4rBeX63y+7ADz33HPo7+/H+vXrMTo6inA4jB/+8Ie46667cnr/4GCoLL7niTA8Mo5+b+E7lf5+TQT6+4NFHFVuz80F60lN2d4/PDJue0225/X3B6EmfeZ9hpxz9r6TA6nXTvSOIZrcLcbiCvr6xvQFKRJNALKK/v4gOKgIheNpzx5MWhjBYBRichcuqKp+nZcHBgZCcIs8RkYjpvcPWKyT7R/2mb4fiaQEs68/CCUuYWREE4BRy70AzX8fMxZAjUXR3x9E3+A46r0u0/VSInWdHJf077Fn9vaN4eBRzW3GK7LjzzySzLaJx6S8/u7OPXU6Hv7bh3jrvRN6BpOR0WRK73t7+7Bouj/5M9B+b5HxmOOzAoEGeN0CBkfC6O8PondwHH6fOKH/CVVV4RZ5HDw6jP7+trzff6Q3iFfePorLz50LTnL+WU6UYv/v8zznuHEuuwA89NBD+uePP/44tm7dmvPiT9QezE0QsYkZRGISGuvdiCdkzQXEMmxUFbKiQhSS7pSEDLdbc214XIJ+MpYRtqkQOKMLKN1l4nHxGWMAgGZNjYZitimLbIyZYsUcx9m6qaxVwIDZZWOtA9DmlToKsiVDDID5pPJxAQHAuae245GX9mHTez22AsDme6Q3JdbxRPYYAADUeVMN4cbCcd2FUygcxyHQ4iu4FkBv+dA5+Vo+OEF1AERVw5qbRWPpQdFITMt9b6xzIxhOmDJwWMBWkhVIsqovNh6nLKDke3leOzjG4xL01sqm8djEEOyySj5MurwAczM3lgXE2kHbpRDyvDmzSDVkARkzgABz5a4xBmAs1hsai6GhzpVTsRITzVyp87pw5pIA3tzdi4TNcZnsd3KkL7WjzaUOANA6sTIBGA3FJ1QDwGhv9hXUD2j3ocnd8sGJigrA9ddfj40bN1ZyCEQB2LU8LhVsfRywydyIxmV4PSIa6l0YM2QBAdAXI7bb9BoFIEMWkMBrLSOmNaafnAUwAbCmgaa+ZgfPOB2gnosFYBUFvRAsErexAFKLqKkVBCvWU1XtIJgMRWBG7OacjQtO70A4JuFdm+A3K5IbGovprkMW38gW0K3ziggnD4IPRRITygBiBJICkE8Vu6KqeOzVyd3ywQmyAIi8sab1lRKWy25sO8AWa7MFEDe1WoglM4HYYs0aj2XLAuJ5Dh89vQMfO2u27Xg8LsHGBZR6LgcOS+Y04/0jI7bvT6WBJq+3WW+NB8NoF2sftLMAnF1AXgcX0PBYLGMGkJFCSppOndeCaY0e29YQxt/J4V7NCsjVBVTvFTEeTej1D8UQgOktPsQlRT9dLBf0lg9rJm/LBydIAIi8eW37ibTXMv0zTyRob7dTY5W3rAFaQ53bVAgGpM4FZvn0ugXgFrTjGy1jMloAHz29A5ecaS8Abhef0QWkqiqWzW1B71A4JZTGSmDmArJpBsewvsbOG4jE5HQXkEEA6uwEIE8LoJDfFM9zOH9FB3YfGkrLsTcKwJGkAMQSMkSB191UTtR5XIhEpaIUgTECelfQ3A6HSUiy1vJhuh/nnjZ5Wz44QQJA5I3dIpEpeDiRw0NsBSC5u4/GJXjdIhrrXQiFE5BkNVWtK7GiLe2jMQYAaItQz+A4jvVpwUnFYAFkwuMS0uoMJMPXKoBl85oBAB8eSXcDWeMFdhaAdQwqjG0gzIug0WdvdKmwe4SjCURiclYLgP2c06yPHLng9BlQAWzeZa4JUBTtdzKt0aMHgrMdBsPweUXEJQWDo5oAFMMCyLcW4KV3jmstHy6Z3C0fnCABIIqC0y7/rv/4O/704r6C72vnqmUCEIlJqEtaAIqqIhiO64ugUSSAVDaR3hI6IePbv34T3/3NVm38GXbkRtxiehA5YVnU57Y3wOcR8UFSAFRTJXAqUwmwd7mwNZgFwFU11fco3QIQ9GuNbRzYPQZHkxlA2QQgRwF0or2lDkvmNGPTez2mGJGsqOB5DnPbG0wWQKYiMEa9l3Vm1dJsiyEArU3a2cu5ZAKNRxP4ry3dWDEFWj44QQJA5I/NqhyOSYglZISjCQyMRvRd/8mhMHpz+Gc7cHwUvTZmuZ2wJCQZqqoiEpPh9Qh6k7bxqKQLwGDSFRGzWABOLaHZgpxt/fO4hbRmc8YYAKAtokvnNOMDmziAbJmPneCwRdhoVQXD6W0ggFQWkNeSscQa9g2MZj4HgHHGolYAwNz2wgstLzi9A33DEew7ljooSFZUCByHudP9ODkYRiwuI5blPGAGc2n1DGp/F8VwAQk8j9ZGb06ZQP+VbPlwwxRo+eBE2esAiMmPk5/43j9tw6GeVLrfb+68NOd73u3QSsLJBRSJSVBUFXVeEY2GzBjWEO3Xz+xB5/IZhsZjVheQedf+wtajAOzTMo14ssQAGMvmNmP7/gHNJ24TA0hZHOnPYGNwuwSMR7V5Bm06gQIpkfBZBIDdQxeALBbAectnYOXitrT75MPZywL444t7sem9HiyZ0wxAE3BB4DBvegNUAEf7Q4glMp8FwKjzpgTA5xGzpo3mSnuLL+umJBqX8NI7x3De8qnR8sEJsgBqjFJm8BgX/2Jhd+bA4FhM9yfPDvj1k7oAYFZA28EKlp46ugvI4Vzg7pPa2LO5ed2igHhCgaKqiMYlDI3Zd5dcOrcFAHQ3ECM9C8jGAuAsFoCaigE4BYF9FpeKUQA4AM055NBPZPEHtOM0P7KsHW990Kf/3GVF0VxAyUX0SG8wGQPIRQC0uZ4cGi/K7p8RyKEt9O5DQ0hICi48o6Noz61GSAAmMZKs4MePvIuDJ8Zyfg/LqGCUM6e/EOxCCyOhqJ5SOG96g+4CArQd+jmntqMtGexztgDsu5FmiwF4DDGGZ97oxv/327dsD5iZM92Peq+Y5gbSXUAZ0kCZC0iPASAVA6h3EgCrBZD8zx4cjaDJ7865zfNEueCMDsTiMt7+sA+ANl+R5zCt0YN6r5gSgBxiAMwFFInJRfH/M9qbfQjHJMeWJgCwfd8A6r0iFs9uKtpzqxESgEnMycEw9nQP46Hn3i/4HoUc62gVjXxK4/N9np1ASbKKwyeDaGnwoLHeDb/PpQdTeZ5LHpKiLfDWzpNMAOzOBQay58GzRTmWkBEMaznqdtk+PKfVA3xweNi2EjhjEFiPASQXyaQLqM4jpi3kLAbg6AIai2Y8B6DYnDK7Ce0tPr0mgAWBOU6zAg73hhBPyDm5c+oMB9wUVQCyHBCvKCp2HBjE6YtayyaclWJqz26KIyRTAKUJ5Nkb19dcrQHjZU31btNh5kZsWy7I+Y3VTjAkWcHh3iDmz9DcCjzP6b5xnufgcgl6+mcsLoNDauFmQhB1EoCsMYCkgCRk3fXDgpRWls1twcBoVM/EAQBJsaaB2rmAtI8mC8CmDxAAiLoFYHEBJW8yNBbLeA5AseE4rY7iw6Mj6BuJaDGApDkyb3oDjveHtGB9DgJQXyoB0FNB7X9v+4+PIhRJYNXi/BvGTTZIACYxzM8tT+CEI2OWjayoGBiJ5JW3r8J51/ylf30t7TXrOb5AZqtAUdIPAg9HJZwcDGOeITjH3EACz8Et8ohLMv74wl4c6hmD2y3oCy27zniWgHFMWWMAhiCyNf3TyrJ5WhzgwImx1O8qpzRQcwxASwONp/n/AYMF4LZkARkmUk4LAAA+umIGOABv7OyBLKv63OdO90OSVYyNx3OKAbhEQd+BFzUG0JzZAti+fwACz2HFgtaiPbNaIQGoEgrxxRsbfhX+3NTnsqzijl9uwbd+9ffM7zF8PjYez6t/jGwRHO25zgupqqYfU7jv2ChUAPNmpASgwWgBiDziCQUvbTuGXYeGTIuNzyPA7eIxEkrFQozWQF4WgJRZOGYF6uH3uSDJij6H9CMhbSqBWQwg+SwVmgvImgIKZMoCSn1eTgsA0ATntAXTsHlXDyRZMQhA6vfFurNmg7mBimkBuF0CWho8joHg7fsGsGxus8kFNVUhAagSCirB55gFUJxWC8w9YRccGwnF8MundmluHcvj8imQNI6VCUBCSh//D3+7FcPBGKIJOa1DJTtcxigAbIfILAAjxqpTjuPQ7PeYBMDoqspaB2AoJJNkRXdT2Pm0eU6rBwBSFbtM7JxOBGPvA1IuIJYFZOcC8rgFuEQ+rdUzX0ELANBqAgbHYth7bFTfqMyYVge3K9WWOxdYILjJXzwBADQrwK4auGdwHCeHwlh1SqCoz6tWSACqhQkk41iLi/J6rFEAMgjJX147gK3v92Hr+71p38unRN5kAcipls1WtrzXg217+xFPyLqf23iPJr/blNrIDm7nOS6tYZfHZd7Jtfg9GDGkw0bysADMLiAVswJ+tDV54XFohcHcQAKzAPRuoM6VxyyDx2XJAmrwpS+CHpeA73/uHFxgSVfkDEqWrQq4FJy5pA11HhGRmKTHqniew5xkoVmuAlCvWwDFnUN7s/25ADv2DwIAVi6e+u4fgASg6Dzw7B5s29uf9/sm4saRlfxjAJKs4Lm/Hza5PyTJ+T7MhSEpqqm1AZAKRueC0d0j6RaA+bkXrdQWs3hCq/gV+dSfKSv6mmcpzmGvCzyn7zIZHou7obnBgxFDN0jTAexZLQBzENglcPjM5Uux/sKFttcvm9sMIGUB6GcCZ/h1C5YYQDQuIy4pthYAAExvqUtzkxlF2c51VGpcoqA3TxMMY2FuoFwFwJcUgGLGAAAtE2h0PJ6WqLB9Xz/mtPvR1jSxw2cmCyQARSQSk7Bl18m88vInAltECrEAduwfwH++egB/finVp8eYodKfzOAYHI3i5FBYX1AkWUmzVs5copnLs9rqsz53yLDzZu4gqwXAdtlxSYGimgVmfvLUqfkzzALAisFYDMCINeOkxe/BsEMMIJs1w8QllowBiAKPFQtbsXKR/Y5xZls9GupcaQF79iO0czkxlwlb1EMObSAyYdDMijUxY1aJYBAnJty51AEAKRdQg4P4FQpLBTW2hAiG49h3fLQmsn8YUz/KUUa6TwahQhOCfJlIQVYhMQDmJtnTPaS/ZnQBffvXf8dNl52CP7ywN+1Z1iIqtlAdHzCfjWukPXkU38Y/bjM8L9mx02IB8Jzmx39q0yFwnNZvnrFkTjN2HhjEKbObTe9h2T08z8FtcQFZ/fPNfrfJ6ojGjC4gxykAMFsACVnR3VNOriOO43D6wlacGBjHIBfTxZpZW3bN1ziLBRB0aASXCeOiXykBmD+jAXOn+03Fa4tmNuZcmQwAbU0+BJq9Rc/HZwLQOxzB7KRbaueBQagqsOqU2hEAsgCKyKEebefvlGOeiULWf+aKKcQCSJ2YZXDJGBZFSVZtF3RJVtIW7GyctTSAG9akN9RiP6ewJe2U41KLdmujF5+/+jT9ews6GnHPLZ1YvsDcnVFPA+XSg8DWk6eaLQFTowuIy+IDMsYANBeQ9qxMXTQ/vXYJbv/ESggCp/+uBseicIu87a6ebQbYoqc3gstjF2xc9CvVxZjjOHzjptX47BXL9NdmBfy450udumssG+s+Oh/f3nB20cfGagGMFsD2/QNo9rtNyQVTHRKAIsIEoDALoIAHTiBwzBZxYx2AtUjJbh52AdtsTGuw38Gx+48nz31lu2uO43QXQWO9OdjrFnnbwrOGemMhmDULyGoBmAXAaNHkYwFIsqrPK1P2kNettawWeE631gZGo2ht8tpaDuxvgcUNxpIWQH4uIM7283Lj97nS0inbmnw5pw57XELR/f+A1meo3ivqB8MkJBm7Dg5h1eK2Kdn33wkSgCKSEoACLIACVvOJdPFhx/LZtSlgRKLpAlCItcFx9oeNh5kAJNNOm5OpfhxSC631gBKng2daG704Y1ErFs1qyu4CslgARqHL9s/P8xxEQesImpBSLqBcDlIReE4P2A+MRBwrqJ0sgIa63BfCanABVTvtLXV6KugHR0YQS8g15f4BSACKxkgohqGxGOq9ohYgzHOnXJgLqHDs3DjWPvdhGwtAltW84xUcZ79AWi0AtjM3WgDWxctJAESBx1dvXIkFHY3pQWC3NQhsXkhN58PmsFZ6XFqhmVbgxeljzoYg8LqADoxG0dZkn5/PfjNMAMajEjjOfORjNjjDj4DWf3tYXArQir/cLt4Ub6oFSACKBNv9M9+03eKZiVI25YzGJfzq6d2mVtDW1EvAHA8AiucC4sCZMkGs9x+PJsBznJ7Jw3EpC8Dqvsh09CQjvRDMLAAuUTD1mTFmJuXiLXEnD5bPNQbAYC6gcDSBcExyTDVkAmvMfqr3uvJy5ZgsgAq6gKqZQLMPg8l23tv3D2DFgtYpd+h7NkgAisShniB4jsNpyaPjxjO0mrWnEBMgt/ds3zeAN/f04us/f0P3d8dt2iFbX7MTMa0OID9ysQDqfaKpmIq5bay791z+Qa0iYZdyaKycNR5knstOPiUAxhhA7i6g/hHteY4WgJK6nt013zRIcgFlZ3qLD6oKbNvbj+FgrKbSPxkkAEXiUM8YZgXq9YVl3MZ/nolCinlzfYuxj0o4OS5bC0DK0QLI8OBrL1yQ/iJnXyxmjAHUe11w2QSBvZYmZ64c0gHTK4HTBcAYCDZaRrmslR4Xr4+dxQD4HP6TBIGHJKsYGNXcDjnFAJLjybeYyzgPWv/tYT//F946Cg7AGTVS/WuEBKAIqKqK7p4xLOhoRH3yFKP8LYB0Rsfj2HVwMMODc7uP0QWw84B2P9sYgMUCsAtmZ6s56Fw+I+01DpzeEtju/uPRBOp9ou66MQWBhQJcQFmygACzAIwZOoPmagGw368rDwtA5LU0UN0CaHawAJgLiOf0++YrAKYsIFIAW1gtwMETY1g0u8l0sFCtQAJQBPpGIhiPSlg4sxH1Pm3Hmk9LZcC+FcQ9f9yGf310x4RP7TK+/bXtJwCk6gCM5JLfLytKRt0xLjbnnNoOQNuB2s1BdwFFJM0C0IuqUm4bq+vILpvISloMwMYFZMwEMo4sl6XS4xL0329+QWAOsqxgYDQCn0fUNwtWUmmgqXnkkwEEUAwgF5rq3fpmYXWNZf8wSACKwKFk64f5MxoMFkCetQA2q+rJIS1H2S71MhiO6xWiVvYeHcGfXtyLE8lCLjtxycUCsGPr+30ZewYZFxu26+c42PbONwaB672iLgCKquoxAKsA5LLQWmsO7CwAp/bCudzf4xJ0V5q1UV0m3KKAuKRgYDSKgIP/HzAEgXlOd9/kHQMw/NzIALCH4zi9IKwW/f8AtYIoCgd7xuAWecwK1OuVpPlaAHY7ZJ7joKiqdq6qZQ37yv2bHO/1/JtHsH3/AP77nWO495ZOWwFI2Jxja80CcuLDZDtmO3gOuPdLnRAFHn959QAAzQVkt+CagsBel+5OkSQV3qQlZec6yoZ1EbcTAKuVkA9uF68LQC4xCUadV8TYeBwjIRkzptU5Xme2ADgAav4uIM74OSmAE3PatarfjtbsfaymImQBFIFDPWOYN6MBAs+D5zl43ULeFoCdW4WtfUqeEWLjgj8ajsOuWaidBRCzcQvlC8dzaGvyodnv0RdijtOqP79+0yrTtZGYBFlREIlJqPe59J17QlZSdQBF+Au1cwHlEktwvJ9L0H9f+fSoqfOICEclDI5GHQPAgDUGoL2WvwAYLQASACc2XL4Ed3zqzEoPo2KQAEwQSVZwpDeEBckulYB2OlP+FkD6a/qBL3kKgOnfXbV3AdnGAHJwAWXDtNtMfsoWoHbLoheJSfpO2ugCkmRF37UXoz7CzgKYiAAYK43zEQCfV8TgWBRxSXFMAQVSGWHGLKCJuIAIZ7xusSLtsqsFEoAJcrx/HAlJsQiAkHcaqJ0LiB3qke+Zv8Ydn6oCqo2AxCUlbRHM1QWUCTt3A3vF6nYJx2T951TvSwWBE1JKAAopPLNiLwCFF/wYzxdwibkvtHUeURfztgwWgLEQjLkU/TaHwWSCdv1ELpAATBBWAbxgpkEA3BW2AAz/+ypU+yBwQk7b+RTFAjC2ILB8Yu3JE41JCCUD2fVegwvIIE4TOe2MYT0QBqiMBWBsipYpCMxcfgLPpeoAyAIgSgAJwAQ51DMGv89l+of2ecS86wDs6mvZ/7CUwyJoXOSNu7/BsahtkVlcUkz95X0eIe82z3bYtSG29rdn16kAhoJaTrzVBaQfoTiB844ZdoHkicYAGPnGABiZTpxiv0pB4PW/gXzOAgBya2lBECQAE+RQzxjmdzSYFt1CXEB2UWCWAmkNAtsFhSMxCXuPjuCFrUdMFsB/PL0n7XpZUZCQFJNfWRT4IlkAzi4gUeD1OfmTWT6sH7vVBaQfop6MYJ+/YgbOX5FeZFYo+WTvWDEWmuUjJHXJFOGGOlfGE7HYZkDkOQAchGRiQT6QC4jIBRKACRCNSzg+MI6FBv8/AHiTQeB8zvm123s7xQCsJ3IBQCiSwMY/bsMjL+9PcydZxzEaimsuIENxkSjw6D4ZBJBqy2zkR184L5dpWGIAqSwgBlsw2SlRA6MpC8DoAmK7duYC+vzVp5kOhpko+eTvW5moBZDtvNmUBaBFAPx1rrwXdEr9JHKhIgLws5/9DF1dXejq6sK9995biSEUhcMng1BVmALAgBYDUFXNx50zDnUAQLof3FYADEVhxuZmQLrFMByKaRaAz2gBaG0Kmv1uLJnTnHb/XA9+t1t3jIsXWzB1AUhaAHVeMVUHICv6dRN1Af3ktgtsX3flcZC9FbMA5BEETsYAAg4tIBipNFAeHJe/+wcoTvosMfUp+5/J5s2bsWnTJjzxxBN48sknsXv3brz44ovlHkZRONSj7ZjTBMCjLRChPNxA+QSB7QQgGEnorgnW45xhtQCGx2KISwp8Bp80u2TVKQHbvvO5FmRxdjEAw/fZgulPukP6R6PweUQIPA8xmVGTkBRdcCaaBeTU32UiZ8wag9l5FYLlagEkf988p/08C0lTJAuAyIWyC0AgEMCdd94Jt9sNl8uFRYsW4cSJE+UeRlE41DOG1kZv2pF1bGF1CgSrqoqewXFT6qd1/R8ai+qZRJKs4Hh/SP9ezObM4fFIQq+2tbZx/uDwiOnrXYe0hnBGXzZzxaxa3JbWgRMoLKjI3mIUBSYkbFEbHI3qvfldhkIw3QIoQhaQHXbnE+SKyQLIw5XU5HfD5xGxcGZjxuvYlJkL0F9AkzLKAiJyoeytIE455RT98+7ubvz1r3/Fn//855zf39rqL8WwCuJwXwjLFkxDIGA+RDowTRujy+NK+x4A/O3vh/Gzx7bjK/+wGh87Zy4AIGrY6AYCDVj39af0r194+xje+aAP9311DV5795itsBwfiuhdJq1s2X3S9PX/7OgBAMyari1ES+e2QBA47Dk0hAvPmoO+sVjaPaZPb8Q5p83A1j3me51/RodpjsbP3Z5k0LPBq7/Ogp9tyVYIsqKiqcGDQKAB3npP8p4zcUrSDXXDx5bY/gyzcfqiNrx3YMDxvY3NmohefOZsLF/YipfeOoIPDg/n9KxgPPXLmjG90dSora3Jm/Eef/7BVfrO3omLVs/Cc5u7MaujCS2NXiyY1ZT3z8AonIX8/KqByTruYlCuuVesF9C+ffvwxS9+EXfccQfmz5+f8/sGB0N5t0YoBWPhOPqGwrh45Uz09wdN34tEtAV0YGg87XsDIxH8+qn3AABPvrofZ8xvBsdxGBoa16+xvue9AwMAgINHhvDXLd22zdie39Kdcbz/fPVpWHVKGz44MoxmvwcCz2F2ux8/++qFcIk8EpKCWELB6EgYizs0AWtr8uqWQXA0gss/MgvrPzoPzX4PxqMJeN0ivG7BNF7j54MjWjM7yLL+Olv2BIPN4xF5/fv3f+VC1HlENPk9+M2dl9r+PHLhf1+3AglJzvje+79yIXweAQLP44z5LUhISk7PCodSQjs6HEZ0XPt9//z2iyDwXEHjNXLdBfPxv644FcHRCL75yVVwiULe9zRalxMdTyUIBBom5biLQbHnzvOc48a5IqGid955B5/97Gfx9a9/Hdddd10lhjBhulkBWEe6UrNUR7uFeseBQcTiMj521mwc7g3qcYRMLZ9Z47ZQNIFYXC7ILTKjtQ4+j4iWBg8WdDRi7vQG8ByHOq8LLlFAndelH2Yzf0YjfnPnpaYDskWBA8dx6Gith88joq3JB7+hf48dzFIx7pCZb9/rFnU/tbEtst+X39GHTrhEXk+7dMLvc+kuKe363PZDbpMLyJj+K6YVuxWCwPO6W7HO0CY7HygNlMiFsgtAT08Pbr31Vvz4xz9GV1dXuR9fNA6eGAPHAfNm2AgAy2ax6cLGcu2v6pwHj0vAK+8eA2COAVjFgH01OGrv4jGyaJa9f7mQvHfjwlzIghLSBSB1H7bgCgKnB8vrJ1kvFo8r1ea6kG6lBFEtlP2v98EHH0QsFsPGjRuxfv16rF+/Pq8YQLVwqCeImW31tgFTUbcA0nfqLIOnsd6NzuXTsfX9PoQiCVMWkKoC0xo9ae/NRQCWzmmxfd3lyv9XbZcNlA92AsCygASO04Pl9TnuvKsFtsufSDEZQVQDZf/P+853voPvfOc75X5sUVFVFYd6xhwPkciUwsiasPEch4tXz8Kr20/gjfd6cOq81MLdNxKBz+MCYA7GDjgIgChwkJL58k6LaSGLVa4uESdYrKbB0MiMucd43igAk8sCYBXNE0klJYhqgP6CC2BgNIpQJGFqAGeEuQVsBSAh62mEc6c3YPGsJrz67nGTBXDXf/zdNtDdNxJJew0w94p3ajFQiB95ogLw9ZtW47oLF5jGxBZNgZ+8FgCgWQETqSYmiGqA/oILgHUAtbaAYKQsALsunIop//6SM2ehdziCPd1DpuuGg9G0itHhYMz03saka8XYKtipZ0xBAjBBF9Ccdj/WfXSB6TWjBcDuP9liAIBWQzGRamKCqAZIAArgUM8YREE7AtIOFgOwOwc3lpBN7YTPXtoOv8+FV949brouGpcxq60eM1vNRwcumtmkfz6tURMI4+EiHpeDC6gAAWhvcT62sFCYBcBxhiDwJLQAPC6BXEDEpIf+ggvg0IkxzJvud1wA+AwHucQTclo3yQvP6LD176sq0OQ3B4NPmZ0SgNbkwn/W0oAeaHWyAArJVmFpocVEMHT59E1mC0AkFxAx+Zl8W68KIysKunuDuOiMmY7XcBwHUeBMFoCiqjh8MoihYCzNT79m9Sw8/+aRtHYQdV4RHksswNh3qDVpAYyNxxFo9iEYTmRsMwwATfX5Leq3XrfCNtOpUPQun7I6aYPAgHbIjJ2LjyAmEyQAedIzEEY8oTgGgBmiwEOSVBw4Poqt7/fh7Q/7MBzUsnqWzzenarY3+7BiYSveO6j16PnEJYtx8UfmovvYELZ9OGC6dkayhcKyuc1YubgNL7x1FLMC9ZBkBQdPjCHQ5DVV8FrJd1d/1tJ2/fN8xcOOlAWgItDsQ51ncp7J2tbkK8r5CQRRSUgA8uSgXgGcWQDcIo8X3z6KF98+ClHgsGJBK/w+F472hWwbkV121mxdAOq8IuZMb8DJvjE0GBrNnXNqO9qavfj57RfBJfIQBR73f+VC+H0urFjYikvPmo16rwt3//O5+OKPXyvirDWK4RISDS2fL141C2cvDUzodK5K8U9XLrM7w4cgJhUkAHlyqGcMdR4R7S2ZW/r+4xXLcLg3iECzD6tPaUOd14UnXz+Io30h29jAGYtaMWNaHU4OhU3ZN8y3v3x+C25ZvwIA4POkFky2e+Y5TnelTOTA81Kjt8mQVS0TaBK6f4D0840JYjJCApAnh06MYUFHQ9Z+66uXBLB6ScD02px2rSHTicGw7XtaGjw4ORSGz5AVw/rZl6otcrlZPKsJL71zDDPbip9hRBBEfpAA5EE8IeNY/ziu6pxb0PvnTNf6BrFYgJUzlwTw/uFhzGxNpZeyFMmpEnA897TpWNDRUJIUU4Ig8oMEIA+O9IagqCoWzMjs/3fCmK9vx6VnzsJFKztMLhy978wk9JM7QYs/QVQHJAB5oAeAs2QAOcFzHD575TIEHISA4zjT4t9U70GT342uznm4eNWsgp5JEAThBAlAHnT3jKGlwYNmf+HZMBetdK4fsMKybj6+ZlHBzyMIgnCCBCAPDvaMZU3/JIhq4fs3n1NQG3CidiAByJFQJIG+4QguPKOj0kPJi7nT/ViSPF+XqC1mt1fP+dlEdVITAhBLyNh1cBATyaQ8MaCd2evUAbRa+X//6ZxKD4EgiCqlJgRgy+6T+P3zH074Pm6Rx/wcBKAYLROKQTlbLKxa3IZ6X038ORHElIFTM51GXoUMDoZsD0vJhKqqODkUzvt9Vvw+V1p3zlIRCDSgvz9Y8PsTkgyAm7TpoxOd/2SH5l+78y/23HmeQ2urvTuwJrZsHMeho9W+d/9UpZrbQRAEUR1Mzu0hQRAEMWFIAAiCIGoUEgCCIIgahQSAIAiiRiEBIAiCqFFIAAiCIGqUSZcGyvOZD2KZStTSXO2g+dP8a5Vizj3TvSZdIRhBEARRHMgFRBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQABAEQdQoJAAEQRA1CgkAQRBEjUICQBAEUaOQAFQJoVAIV199NY4dOwYA2Lx5M9atW4e1a9fivvvuq/DoSsvPfvYzdHV1oaurC/feey+A2pr/T3/6U1x11VXo6urCQw89BKC25s+45557cOeddwKorflv2LABXV1dWL9+PdavX48dO3aUb/4qUXG2b9+uXn311ery5cvVo0ePqpFIRF2zZo165MgRNZFIqDfffLP66quvVnqYJeGNN95Q/+Ef/kGNxWJqPB5XP/OZz6jPPPNMzcz/zTffVG+66SY1kUiokUhEveSSS9T333+/ZubP2Lx5s3ruueeq3/zmN2vq719RFPWCCy5QE4mE/lo5508WQBXw6KOP4nvf+x7a29sBADt37sS8efMwZ84ciKKIdevW4fnnn6/wKEtDIBDAnXfeCbfbDZfLhUWLFqG7u7tm5n/OOefg97//PURRxODgIGRZxtjYWM3MHwBGRkZw33334ZZbbgFQW3//Bw8eBADcfPPNuOaaa/CHP/yhrPMnAagC7r77bpx99tn61319fQgEAvrX7e3t6O3trcTQSs4pp5yCVatWAQC6u7vx17/+FRzH1cz8AcDlcuH+++9HV1cXOjs7a+r3DwDf/e53cfvtt6OxsRFAbf39j42NobOzEz//+c/x29/+Fo888ghOnDhRtvmTAFQhiqKA41ItXFVVNX09Fdm3bx9uvvlm3HHHHZgzZ07Nzf+2227Dli1b0NPTg+7u7pqZ/2OPPYaOjg50dnbqr9XS3//q1atx7733oqGhAdOmTcMNN9yA+++/v2zzn3TnAdQCM2bMQH9/v/51f3+/7h6airzzzju47bbbcNddd6Grqwtbt26tmfkfOHAA8Xgcp556Knw+H9auXYvnn38egiDo10zl+T/33HPo7+/H+vXrMTo6inA4jOPHj9fM/N9++20kEgldAFVVxaxZs8r2908WQBWycuVKHDp0CIcPH4Ysy3j22Wdx0UUXVXpYJaGnpwe33norfvzjH6OrqwtAbc3/2LFj+M53voN4PI54PI6XXnoJN910U83M/6GHHsKzzz6Lp556CrfddhsuvfRSPPDAAzUz/2AwiHvvvRexWAyhUAhPPPEEvva1r5Vt/mQBVCEejwcbN27El7/8ZcRiMaxZswZXXHFFpYdVEh588EHEYjFs3LhRf+2mm26qmfmvWbMGO3fuxLXXXgtBELB27Vp0dXVh2rRpNTF/O2rp7/+SSy7Bjh07cO2110JRFHzqU5/C6tWryzZ/OhGMIAiiRiEXEEEQRI1CAkAQBFGjkAAQBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQNcnNN9+MoaGhCV/z5ptv4uqrr876vKVLl9re66WXXsIPfvADAFpb4Oeffx7Hjh3D6tWrs96TICYKFYIRNckbb7xRlGsmymWXXYbLLrus5M8hCDvIAiBqjm9961sAgH/8x3/E1q1bsWHDBqxbtw7XXHMNnnzyybRrenp68Morr+Cmm27C9ddfj4svvhg/+clP8n7uT37yE1x33XVYv349XnnlFQDA448/ji9+8YtFmRdB5AtZAETN8aMf/QiPP/44fve73+ETn/gE7rjjDqxduxa9vb248cYbMW/ePNM1LS0tuOOOO7Bx40bMnz8fvb29uOSSS/CZz3wmr+fOnj0b3//+97F3715s2LABf/3rX0s0Q4LIDRIAomY5cOAAYrEY1q5dCwCYPn061q5di9dff93kg+c4Dr/85S/x6quv4tlnn8WBAwegqioikUhez/vkJz8JAFiyZAkWLVqEd999t3iTIYgCIBcQUbNwHJfWZ11VVUiSZHotHA7juuuuw+7du3HaaafhjjvugCiKyLeNFs+n/t0URYEo0v6LqCwkAERNIggCZs2aBVEU8cILLwAAent78be//Q3nn3++fo0kSTh8+DBCoRC++tWv4tJLL8Wbb76JeDwORVHyeuYTTzwBANi9ezeOHDmClStXFndSBJEntAUhapIrrrgCn/3sZ/GLX/wCP/jBD/Bv//ZvkGUZt956K8477zz9mg0bNuCnP/0pLr74Ylx55ZVwu91YsmQJFi9ejMOHD8Ptduf8zKNHj+Laa68Fx3H413/9VzQ3N5dodgSRG9QOmiAIokYhC4AgisADDzyAZ555xvZ7n/vc53DNNdeUeUQEkR2yAAiCIGoUCgITBEHUKCQABEEQNQoJAEEQRI1CAkAQBFGjkAAQBEHUKP8/4tJAb6qNkeYAAAAASUVORK5CYII=\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"sns.lineplot(data=pandas_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 35,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<seaborn.axisgrid.FacetGrid at 0x7fc3bbd7f7c0>\"\n      ]\n     },\n     \"execution_count\": 35,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAVwAAAFcCAYAAACEFgYsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA09klEQVR4nO3deXyU9b0v8M/s+2SZzGQjCVvYIYlVBLVE0RPRGEWKFnsvevRcX3qvhVt6XrUeDkfvrV0or/ZQl3Laq61tbe/tS1tQjEg9RcUlLIpCIGwBspNMJsskM5lk1uf+ERIBWTLJPPPMPPN5v159lSST3/P9meSTJ7/ntygEQRBARESiU0pdABFRqmDgEhHFCQOXiChOGLhERHHCwCUiihMGLhFRnKjFbHz16tXo6emBWj18mR/84AcYGBjAT37yE/j9ftxxxx1Yt26dmCUQESUM0QJXEAQ0Njbi/fffHw3coaEhLFu2DK+++ipyc3Px2GOPYffu3SgvLx9zu93dXkQi8pk6nJFhRG+vT+oyRCX3Psq9fwD7GA273XLZj4kWuGfOnAEAPPLII3C73bj//vsxY8YMFBUVoaCgAABQVVWFnTt3RhW4cqNWq6QuQXRy76Pc+wewj7Ei2hhuf38/Fi9ejF/+8pf43e9+hz//+c84e/Ys7Hb76GscDgecTqdYJRARJRTR7nDLyspQVlY2+vbKlSvx/PPP42tf+9ro+wRBgEKhiKpdm80csxoTxZX+BJELufdR7v0D2MdYEC1wP/vsMwSDQSxevBjAcLjm5+fD5XKNvsblcsHhcETVrtzGcO12C1wuj9RliErufZR7/wD2Mdp2Lke0IQWPx4NNmzbB7/fD6/Vi27Zt+O53v4uGhgY0NTUhHA6juroaS5YsEasEIqKEItod7i233IJDhw5h+fLliEQi+Na3voWysjJs3LgRa9asgd/vR3l5OZYtWyZWCURECUWRbNszckgh+ci9j3LvH8A+RtvO5XClGRFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goTkTdnpHiKy3dCK0mNhtwBIJh9LnlvTsUUbwxcGVEq1Hhpa2HYtLWoytKYtIOEX2JQwpERHHCwCUiihMGLhFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goThi4RERxwsAlIooTBi4RUZwwcImI4oSBS0QUJwxcIqI4YeASEcUJA5eIKE64AbmE0tKNAAC73SJxJUQUDwxcCWk1Kvzfvx3HwIA/Ju3xlAaixMYhBSKiOOEdboqKRAT4/CGolArotSooFAqpSyKSPQZuinF7/DjZ6kZn7yAiwvD79FoVChxmTM9Pg0bNP3qIxMLATRGCIOB4sxv1rX3QqJWYnGOF1aRBOCKgs3cQ9a19aHZ6cO1MB2xpeqnLJZIlBm4KiAgCPj/pwtkuHwodZsybkgn1eXeyU3Kt6PX48UW9CzV1Hbim2C5htUTyxb8fZU4QBBw61Y2zXT7MKcpAaXHWBWE7IsOiw00LcpFh0eHzehc+P94pQbVE8sbAlbnGDg9aOr2YMSkN0yelXfG1WrUK18/OhsWowcY/7IezxxenKolSAwNXxvoGAjjS0ANHhgEzC9PH9DkatRLXz86GSqnEr96sQzAUEbdIohTCwJWp4aGELmjUSlxTnBXVtC+DTo3vrCpDk9ODt/c0ilckUYph4MpUQ7sHbm8A86fYoNWoov786+flYtHcbLy9pwlnuwZEqJAo9TBwZSgQCuNEixv2dD3ysozjbmfV0mLotSr86T9PQhCEGFZIlJoYuDJ0qrUPwVAEc4oyJrSCzGrS4u6bpuBYUy+ONPTEsEKi1MTAlZmhQBhn2j2YZDchzaybcHu3lOXDnq7H6++fQoR3uUQTwsCVmTNn+xGJCJhRkB6T9tQqJe5dMhWtrgF8fsIVkzaJUhUDV0a8g0E0dvQjz2aE2aCJWbsLZ2UjJ9OIt2oaOZZLNAEMXBl5d28jQmEBxVdZ4BAtpVKBysVFaOn04tCp7pi2TZRKGLgyIQgC3q5phM2qi8nY7cUWzc1GhkWHvx9oiXnbRKmCgSsTzp5BdPb4MCXXKkr7KqUSt5Tl42hjL+flEo0TA1cmGjs8sKXpkWMb/7zbq1lSmge1Soldn7eKdg0iOWPgysCgP4RO9yBuW1gIpYgnN1iNWlw/24Gawx3wDYVEuw6RXDFwZaCl0wsAuO26QtGvtfRrk+APhvHJkXbRr0UkNwzcJCcIApo7vchK0yPHZhL9elNyrZiWZ8V7B1q5EIIoSgzcJNfr8cM3FEKBwxy3ay792iQ4ewdxotkdt2sSyQEDN8m1dQ1AqVQgJ1O8h2UXu2aGHXqtCjUcViCKCgM3iUUEAW1dA8jOMMT1tF2dRoXrZjnw2QkX/IFw3K5LlOxE/yn96U9/iqeeegoAUFNTg6qqKlRUVGDz5s1iX1r2uvqGEAhGkG8Xf+z2YjfOz4U/EMaBkzz7jGisRA3cPXv2YNu2bQCAoaEhrF+/Hlu2bMGOHTtw5MgR7N69W8zLy15Htw8qpQLZ6Ya4X7t4Uhqy0vSoOdIR92sTJSvRAtftdmPz5s14/PHHAQC1tbUoKipCQUEB1Go1qqqqsHPnTrEuL3uCIKCjxwdHhgEqVfxHhhQKBW6Yl4Njjb3o6R+K+/WJkpFarIaffvpprFu3Du3tww9WOjs7YbfbRz/ucDjgdDqjbtdmi9/T+HgxmaLf+6C7bxBDgTCKcq0XfP542rocu91yxY9XlU/H9k8aUdvYi/tunTGhtpKd3PsHsI+xIErgvv7668jNzcXixYuxdetWAEAkErng9AFBEMZ1GkF3txeRiDzmf458cQcG/FF/bkNbHxQA0o2aCz5/PG1djsvlueLHVQBmTErDf+5rQvn8nMt+Pe12y1XbSmZy7x/APkbbzuWIErg7duyAy+XCPffcg76+Pvh8PrS1tUGl+vIwQ5fLBYfDIcblU0JHjw+ZVt24DoiMpevnZOPVd0+irWsAk+zy++uDKJZEGfx75ZVXUF1djTfffBNr167F0qVL8fLLL6OhoQFNTU0Ih8Oorq7GkiVLxLi87A0MBeHxBeM69/ZyrpnpgEIBfHqMsxWIriZuT1t0Oh02btyINWvW4M4778TUqVOxbNmyeF1eVjq6fQAg6s5gY5Vm0mJmQTo+O9HJ0yCIrkK0h2YjVqxYgRUrVgAAFi9ejO3bt4t9Sdnr6PHBYtTApI/dMToTcd3sbLz6txNocw1gUhyXGBMlG9EDl2IrEAyju9+PGTE+Rudi4XBkzE9sKxZPwZ/ePYG6FjfK5uZesi0iYuAmna6+4TmvjgxxFzuoVEq8tPXQmF+fadXj7Y/PwDfg/8pshUdXlMS6PKKkxL0UkozLPQi1SoF0S+zPLZuIvCwTvIMh9PuCUpdClLAYuEnG5R5CVppe1JMdxiP33IyJdp53RnRZDNwkMjAYhM8fgl2CvROuRqdVwWbVo73HJ3UpRAmLgZtEXO5BAEjIwAWAHJsBHl8QA4McViC6FAZuEnH1DcGgU8GkT8xnnSMLMTp4l0t0SQzcJCEIArrcQ7CnGca1B0U8mPQaWIwaBi7RZTBwk4TbG0AwHIE9XS91KVeUm2lEd78f/iBPgiC6GAM3SYyM32Yl6PjtiJHlxs7eQYkrIUo8DNwk4XIPIs2khU7i3cGuJs2khV6rGt3vgYi+xMBNAuFIBL0eP2xpiT2cAAyfBJGTaYTLPcglvUQXYeAmAbcngIgA2KyJH7jA8GyFcESAq49H7xCdj4GbBLrPnRlmsybWct7LyUrTQ61ScFiB6CIM3CTQ3T8Ei1Ej+ekOY6VUKpCdYURHr4975BKdh4Gb4CKCMDx+myTDCSNyMg0IBCPo8cTujDWiZMfATXD9AwGEwkLSDCeMcGQYoVBw1RnR+Ri4Ca773IOnzCS7w9WolbBZ9XAycIlGMXATXHe/H0a9GgZdYu6fcCU5mUZ4B0M46/JKXQpRQmDgJjBBENDTP5R047cjsjOHV8XtP+qUuBKixMDATWDewSACoUjSjd+OGNnM5tOjHVKXQpQQGLgJrLt/+Al/so3fni87w4i6M93wDXGPXCIGbgLr6R+CTqNM2P1vxyIn04BwRMDhMz1Sl0IkOQZuAuv1+JFh0Sfs/rdjkWHRwWrS4tCpLqlLIZIcAzdBBYJhDAyFkJFgp/NGS6FQ4NrZ2Th8phvhCDezodTGwE1QvedWaCV74ALAwjk5GBgK4VRrn9SlEEmKgZugRgI33ayVuJKJK5tph0qpwKFT3VKXQiQpBm6C6vH4YTVpoVYl/5fIqNdgVmE6DnIcl1Jc8v80y5AgCHB7/ciUwXDCiJLpWejo8XGpL6U0Bm4C8gwGEQoLshi/HVEyPQsAeJdLKY2Bm4Dk9MBshD3dgPwsE6eHUUpj4Cag3n4/NOrkXvBwKSXTs3CypY+rzihlMXATUK/XjwyLLqkXPFxK6fQsRASuOqPUxcBNMMFQBB5fEBlm+QwnjJiaZ4XZoOGwAqUsBm6CcXvlN347QqlUoGSajavOKGUxcBPMSOCmW5J/wcOllEzP4qozSlkM3ATj9gZg1KuhVSfHCb3Rmjslk6vOKGUxcBOM2+uXxXLeyzHo1Fx1RimLgZtA/IEwBv1hpMvwgdn5uOqMUhUDN4G4B0Y2rJF34JZy1RmlKAZuAnF7AwCANJN8hxQAICvdgHw7V51R6mHgJhC3xw+zQQONWv5fltJzq84GuOqMUoj8f7KTiNsbkPUDs/OVnFt1doSrziiFMHATxJA/BH9Q/g/MRkzNtcJi5KozSi0M3AQxMn6bKne4SqUCC6Zy1RmlFgZughhZYWaV+QOz83HVGaUaBm6CcHsDsBg1sjhSZ6zmTsmEWqXg9DBKGanz053ARo7UkeMOYVdi0KkxszADB7nMl1IEAzcBDAXCCIQiSEuR8dvzlU7PgrPHhw6uOqMUwMBNAH0Dww/MUmn8dkTJNBsAcLYCpQRRA/e5557DnXfeicrKSrzyyisAgJqaGlRVVaGiogKbN28W8/JJo/9c4KYZUy9wueqMUologbt//37s3bsX27dvx1//+le8+uqrOH78ONavX48tW7Zgx44dOHLkCHbv3i1WCUmjbyAAk14NdQqsMLsUrjqjVCHaT/jChQvxhz/8AWq1Gt3d3QiHw+jv70dRUREKCgqgVqtRVVWFnTt3ilVC0ugfCKTkcMKIktGzzvjwjORN1GNhNRoNnn/+efz2t7/FsmXL0NnZCbvdPvpxh8MBp9MZVZs2mznWZUoqGIpgYCiEqZPSYTJNfJZCLNqIdVvhcAR2u+WyH8+0mZFmPozjLX2oKi8eU3uqBJs+d6X+yQX7OHGin8O9du1aPProo3j88cfR2Nh4wUm0giBEfTJtd7cXkYgQ6zIlYbdb4PYMAQAMGiUGzm3POBGxaCPWbalUSry09dAVX2M1arGnth2//utBKK/yPfHoihK4XJ6Y1BYLdrsloeoRA/sYXTuXI9ptwunTp3Hs2DEAgMFgQEVFBfbt2weXyzX6GpfLBYfDIVYJSaHXMxxqct+S8WqyMw0IhiPo6Y/dLwyiRCNa4La2tmLDhg0IBAIIBALYtWsXVq1ahYaGBjQ1NSEcDqO6uhpLliwRq4Sk4Pb4oVUrodfK8wyzsXKkGaBUgKdAkKyJNqRQXl6O2tpaLF++HCqVChUVFaisrERmZibWrFkDv9+P8vJyLFu2TKwSkkKvxw+rSRv10IrcqNVK2NL06OjxYc7kjJT/70HyJOoY7po1a7BmzZoL3rd48WJs375dzMsmjXA4ArfXjyk58n8YMRY5mUYcPtMDz2AQ1hSck0zyl1iPelNMm2v4AWAqTwk7X67NCABo7+KwAskTA1dCZ872A+ADsxF6rRo2qw5nuwekLoVIFAxcCTW09UGpUMBs0EhdSsLItZng8QXh9XHVGckPA1dCZ872Ic2shVLJB0QjRoYVeJdLcsTAlYggCGg424cMq17qUhKKQadGhkWH9m6O45L8MHAl4vYG0OcNIMOSWpuOj0WuzYi+gQA3syHZYeBKpKVzeAlhOgP3K/I4W4FkioErkZZOLwCkzLHo0TDqNUgzaTmOS7LDwJVIs9OLHJsRWk1qL+m9nLwsI9zeAHz+kNSlEMXMmAJ3/fr1X3nf2rVrY15MKmnu9GJKXprUZSSsXJsJANDOu1ySkSsu7X3mmWfgdDpx4MAB9PT0jL4/FAqhpaVF9OLkaigQQmePD7deVwhBkMdWk7FmNmhgNWrQ3uXDNP5iIpm4YuCuXLkS9fX1OHHiBG6//fbR96tUKpSWlopdm2y1ugYgAJiaZ8Xptj6py0lYuVkmnGh2Y9AfgkEn+tbNRKK74nfx/PnzMX/+fNxwww3IycmJV02y1+IcnqEwJT+NgXsF+ecCt61rANPzeZdLyW9Mtw3t7e343ve+h76+vgv+BH7rrbdEK0zOmju9MOnVsKcbpC4loZkNGqSbtWhzMXBJHsYUuE8//TRWrFiBOXPmcJ/SGGh2elHgMPO/5RhMsptxpKEHHl8AFm7ZSEluTIGrVqvx8MMPi11LSohEBLS5vLi5LF/qUpJCXpYRRxp60OYawKwiBi4ltzFNCysuLsaJEyfEriUlOHt9CIQiKHDI6/Rhsei1amSl6dHaNcAZHZT0xnSH29LSgm984xvIy8uDTvflyiiO4Uav2Tm8woyBO3aT7CYcPNUNN/eeoCQ3psBdt26d2HWkjOZOD1RKBfKyTFKXkjRybSbUnu5Gq8vLwKWkNqbAnTFjhth1pIwWpxf5WSaoVVxVPVYatRLZmUa0dQ1g7pRMqcshGrcxBe6iRYugUCggCMLok3W73Y4PP/xQ1OLkqLnTi/lTGRrRyreb0N7tQ5d7SOpSiMZtTIF7/Pjx0X8HAgFUV1ejoaFBtKLkqs/rR/9AAIUOntIbrewMA9QqBdq6vFKXQjRuUf9dq9VqsWLFCnzyySdi1CNrI1syFmbzgVm0VEolcm3Dd7n+YFjqcojGZUx3uG63e/TfgiDgyJEj6O/vF6sm2Wru5AyFiSiwm9DS6cWew+2YW8CVZ5R8oh7DBQCbzYZ//dd/FbUwOWp2emCz6mHU85Te8bCl6WHUqfH3/U2YW7BA6nKIohb1GC6NX0unl8MJE6BQKFCQbcah+i50uQeRxb0oKMmMaQw3EongpZdewurVq/HAAw/gxRdfRCjEnfij4Q+E0dHt43DCBA3vQQF8fLhd6lKIojamwP35z3+OvXv34qGHHsLDDz+ML774Aps2bRK7Nllp7fJCAFCYzRkKE2HUqVFabMcnh9sR4VJfSjJjCtyPPvoIv/rVr3DbbbehoqIC//Ef/8E5uFFqObekt5B3uBP2DwuL0N3vx7GmXqlLIYrKmAJXEARoNF8+6NFqtRe8TVfX0umFQaeGLU0vdSlJ7/p5OTDp1fi4lsMKlFzGFLizZs3Cj3/8YzQ3N6OlpQU//vGPudw3Ss2dHhRyD9yY0GpUWDQnBwdOuDAwFJS6HKIxG1PgPvPMM+jv78eqVatw3333obe3F//2b/8mdm2yEYkIaO0cQAFnKMTMTQtyEQpHsO+oU+pSiMbsioEbCATw/e9/H3v27MHGjRtRU1ODBQsWQKVSwWxmeIxVp3sQ/mCYMxRiqCjHgkKHGR9xWIGSyBUD9/nnn4fX68U111wz+r5nn30W/f39eOGFF0QvTi6azx0ayT0UYuvrJXlo6vCM/vclSnRXDNwPPvgAP//5z2Gz2Ubfl52djU2bNuHvf/+76MXJRUunl3vgimDR3Gxo1Uq893mb1KUQjckVA1ej0UCv/+pTdbPZDK2W50uNVbPTi1ybCRo198CNJZNeg0Vzs7H3aAd8fHhGSeCKCaBUKuH1fnU7PK/Xy5VmUWjp9HBJr0iWXjMJgWAEHx/ukLoUoqu6YuDedddd2LBhA3w+3+j7fD4fNmzYgIqKCtGLk4P+gQDc3gAXPIikMNuCaflWvP95K1eeUcK7YuA+9NBDsFgsuPHGG3H//fdj5cqVuPHGG2G1WvHEE0/Eq8akNrIHbgGX9Ipm6TWT4OwdxLFGrjyjxHbF3cKUSiWeffZZPP7446irq4NSqcSCBQvgcDjiVV/Sa+4cfoLOKWHiuXamA3/eVY/3Pm/lmWeU0Ma0PWN+fj7y8/PFrkWWWpxeZFp1MBu4FFosGrUSS0rysGNvE7r7hrh8mhIWH5uLrLnTy/m3cXBz6fANwQcHOUWMEhcDV0SBYBjt3QMcTogDW5oepdOz8OGhswiGIlKXQ3RJDFwRtXUNQBB4aGS8LL1mEjy+IPYf4/4KlJgYuCLiDIX4mjM5A/l2E/62v2X0/D2iRMLAFVGz0wODToUsPsSJC4VCgduvK0Sry4ujnCJGCYiBK6LmTi8K7GYouQdu3Fw/JxtpZi127m+WuhSir2DgiiQiCGjp9KKAMxTiSqNW4ravTUJdQ8/okA5RohjTPFyKnss9CH8gzE3HRRAOR2C3X/4X2crbZuLtPU3YXduOdQ9cc9nXAcMzSfrcviu+hihWGLgiGT00koEbcyqVEi9tPXTF1+TZjHj/QAtUEGDQXf7b/NEVJbEuj+iyRB1SePHFF1FZWYnKysrRY9VrampQVVWFiooKbN68WczLS6q50wulQoF87oErial5VggC0NDeL3UpRKNEC9yamhp8/PHH2LZtG9544w3U1dWhuroa69evx5YtW7Bjxw4cOXIEu3fvFqsESbU4PcjNMkKjVkldSkoy6jXIyzKiscPDhRCUMEQLXLvdjqeeemr0SPVp06ahsbERRUVFKCgogFqtRlVVFXbu3ClWCZIaXtLL4QQpTc9LQygsoKmDR/BQYhAtcIuLi1FaWgoAaGxsxDvvvAOFQgG73T76GofDAadTfquCPL4Aej1+zlCQWLpFB3u6HqfO9iEU5l0uSU/0h2b19fV47LHH8OSTT0KlUqGxsXH0Y4IgQBHlHFWbLfHvGs+edAEA5s+wX/Fp+giTSReza6dCW9G0V1Jsx98/bUFH7xBmFmVc8jVj+RpdTSzaSHTs48SJGrgHDhzA2rVrsX79elRWVmL//v1wuVyjH3e5XFHvrdvd7UUkktjLNg+dHL5rT9Or4XJd/s/ZkS/uwIA/ZtdOhbaiac+oVcFm1aPuTDdyM/VQKb/6R92VvkZjYbdbJtxGomMfo2vnckQbUmhvb8cTTzyBn/3sZ6isrAQAlJSUoKGhAU1NTQiHw6iursaSJUvEKkEyzU4vbNwDN2HMLEiDPxhGs5MLIUhaot3h/uY3v4Hf78fGjRtH37dq1Sps3LgRa9asgd/vR3l5OZYtWyZWCZJp6vCgkBvWJAxbmh6ZFh3qW/tQmG2BSsml1iQN0QJ3w4YN2LBhwyU/tn37drEuK7mhQAjOHh8WzcmWuhQ6R6FQYEZBOvYedaKl04vJOfxlSNLgXgox1tLphQDwDjfB2NP1SDdrcaq1L+GfAZB8MXBjbGScsIh3UQlFoVBgZkE6fP4QWlwcyyVpMHBjrKnDA4tRg3SzVupS6CKODAPSzVqcbHYjHOG8XIo/Bm6MNTs9KMq2RD2/mMSnUCgwuygDg4EwGtvlPcWJEhMDN4aCoQjaugY4fpvA7OkG2NP1qG/t4x4LFHcM3Bg62zWAcETg+G2Cm12YgUAogtNn+6QuhVIMAzeGmpzDf6ZyD9zElm7RIddmxOm2frg9sV0BR3QlDNwYajp3aKQ93SB1KXQVswszEIkIeH3XSalLoRTCwI2h5g4PChwWHhqZBMxGDQqyzdhR04gu96DU5VCKYODGSCQyfGhkER+YJY2ZBelQKICtH52RuhRKEQzcGGnv8SEQinD8NokYdGosL5+GvXVOnG7jAzQSHwM3RprPPTDjHW5yue/WGUgzafH/dtUjInDJL4mLgRsjTR0eaNRK5GYZpS6FomDQqfGN8mk4c7Yf+47K7/QRSiwM3BhpdnowyW665AbXlNhumJ+DyTkW/OWD0/AHwlKXQzLGdIgBQRDQ5OQDs2SlVCjwwG3F6PX48c6+JqnLIRlj4MaAq28Ig/4Ql/QmseJJ6Vg424F39jWju29I6nJIphi4MdDcMbLCjIGbzO67eToA4LX3T0lcCckVAzcGGjr6oVIqUOAwSV0KTYAtTY/KxUX49HgnjpzplrockiEGbgw0tnswyW6GRq2SuhSaoDuuL0J2phF/fPckAkE+QKPYYuBOUEQQ0NjRjym5HE6QA41aiQcrZqDTPYi39/ABGsUWA3eCnD0+DPrDmJxrlboUipHZkzOxeG42duxtQnv3gNTlkIwwcCdo5OSAKQxcWbl/aTF0GhVe/dsJCFyBRjHCwJ2gho5+aNVK5HGFmaykmbRYefM0HG92Y09dh9TlkEwwcCeosd2DwhwLV5jJ0JLSPEzLs+LPu06hfyAgdTkkA2qpC0hm4UgEzU4PykvzpS6FxikcjsBuv/wDz3/+r9fif/77B3h99xk89dB1l32d3W5BIBhGn9snRpkkEwzcCWhzDSAQinCGQhJTqZR4aeuhK75men4aPqk9i//9f2qQl/XVudYmkw4DA348uqJErDJJJvh38AQ0nlthxhkK8jYt34p0sxa1p7vh59xcmgAG7gQ0tPfDoFPDkcEzzORMqVCgtDgLoXAEh09zBRqNHwN3Ahra+zE5h2eYpQKrUYuZhek42+3D2S7OzaXxYeCOUzAURptrgPNvU8i0/DSkmYaHFoYCIanLoSTEh2ZRSks3QqtR4WhDN8IRAaWzsq/4lJvkQ6lQ4JoZWfjwUDu+qO/CojnZUPCvG4oCAzdKWo0KL209hFPnDh38rK4dR+o7x9UWn2onH4tRi7mTM1B7pgcN7R5MzeNfODR2HFIYp16PH0adGnotdwhLNUU5FmRnGHC0sYcLIigqDNxxEAQBPf1+ZFh0UpdCElAoFCidngWNWokDJ10IhyNSl0RJgoE7DoP+MPzBMDKtDNxUpdOqUFacBY8viIP1LqnLoSTBMdxx6PUMn3nFO9zU5sgwYkquBSeb3UgzaqUuh5IAA3ccejx+qJQKWPlDlvLmTM5E30AQX9S70NE9AI7o05VwSGEcej1+pJu1UCo5JSjVqZQK3LggFwDw01c/QzDE8Vy6PAZulPzBMPoGAsiw6KUuhRKE2ahFWXEWTrW48dp7PPGXLo+BG6VTLW4IApDJ8Vs6T67NhOXl07Dr81bsP+aUuhxKUAzcKJ1o6gHAB2b0VQ9VzsG0PCteeec42rjfAl0CAzdKx5t6YdSroeOCB7qIWqXEf18+Dzq1Ei/+tRa+oaDUJVGCYeBGQRAEHG3o5nACXVamVY//ce98dPUN4dfbjyIS4QGU9CUGbhTau33o8wZgS+MDM7q8GQXp+NZtxTh8phvbPjojdTmUQDgPNwonWtwAAJuVgUtXdnNZPpqcHry9pwmF2RZcN8shdUmUAHiHG4WTLW5kWnUw6fl7iq5MoVDgv/zDTEzLt+I3bx9FS6dX6pIoATBwx0gQBJxo7sW8qVncA5XGRKNW4ol758OgU+OFv9bCO8iHaKmOgTtGLvcg3N4A5k6zSV0KJZF0sw7fvnc+3F4/frn1MELcWSylMXDH6ESzGwAwbyoDl6IzLT8ND985Gyda3PjdO8chCJy5kKo4GDlGJ1rcMBs0KMjmcToUvcVzc9DZO4g3P25AdoYBVTdOkbokkgADd4xOtrgxsyCd47c0bnffOBmdvT5s+6gB9gwDFs3JkbokijNRhxS8Xi/uuusutLa2AgBqampQVVWFiooKbN68WcxLx1R33xC6+oYwozBd6lIoiSkUCvzjHbMxoyAdv337OOpb3VKXRHEmWuAeOnQIDzzwABobGwEAQ0NDWL9+PbZs2YIdO3bgyJEj2L17t1iXj6kTLb0AgJkF6dIWQklPo1bi2yvmw2bV4YW/HkZnr0/qkiiORAvc1157Dc888wwcjuEJ37W1tSgqKkJBQQHUajWqqqqwc+dOsS4fU8eb3DDp1ZhkN0tdCsmA2aDBd+4rgSAI2Px6Lfp9PIgyVYg2hvujH/3ogrc7Oztht9tH33Y4HHA6o9/GzmaLb+gJgoBjzb0onelAdvbwkdgmU2z3Uohle6nQVqzbi0VbI23Y7WN7qGq3W/D0f1uEf/tVDba8cQQ/fPxGGHSJ/UhlrH1LZmL3MW5f4UgkcsEDJ0EQxvUAqrvbG9cNQdq6BtDdN4TpuRa4XB7Y7RYMDPhjeo1YtpcKbcW6vYm2ZTLpRttwuTxj/jy7WYvH7pmLF7cexg9e2oO1KxdArUrMmZp2uyWqviWjWPXxSqEdt69uTk4OXK4vTzd1uVyjww2JrK5heP/buZMzJa6E5Kis2I6Hls3CkYYe/HbHMUQ4R1fW4ha4JSUlaGhoQFNTE8LhMKqrq7FkyZJ4XX7c6hp6kJ1pRFa6QepSSKaWlOThG+VTsbfOidfeO8WFETIWtyEFnU6HjRs3Ys2aNfD7/SgvL8eyZcvidflxCYYiONHci68vyJO6FJK5OxcVoc8bwLuftiDNrMUd1xdJXRKJQPTAfe+990b/vXjxYmzfvl3sS8bMqbY+BEIRzJ3C4QQSl0KhwKrbiuEZDOL190/DpNdgSQl/0ctNYj8WlVhdQw9USgVmcsEDjUE4HJnwU+7vP7QQP3plH36/8zisVj1K+cteVhi4V1DX0INp+WkJP12HEoNKpcRLWw9NuJ2cdD1sVj1efO0gHr9nHq7l5uWykZhzUBJAvy+AJqeHwwkUdyqVEgtnOzCzKBO/3l6Hg/VdUpdEMcLAvYzDp7sBAPMYuCQBtUqJ//XoIhRmm7HljcOj0xMpuTFwL+NgfRfSzVoU5ch/dQ0lJqNeg3X3lyLXZsILf63FieZeqUuiCWLgXkIgGMbhhm6UFduh5HaMJCGzQYN/XlWKrHQDNr9+CMcaeaebzBi4l3C0qReBYARlxVlSl0IEq1GL7z1QBke6Ab/4Sy0On+mWuiQaJwbuJRysd8GgU2FWUYbUpRABANJMWjz5rWuQZzPh+b/U4ouTrqt/EiUcBu5FIhEBB+u7MH+qLWE3EqHUZDZo8L0HSlGUY8GWN45g/7Hod9sjaTFRLnLmbD/6fUGUFduv/mKiODPqNfjnb5ZiWp4Vv36zDrsOtEpdEkWBgXuRz+tdUCkVmM/TeSlBGXRqrPtmKUqmZ+FP/3kSf/ngNDe8SRIM3PMIgoAv6rswqygDRj1Xl1Hi0mlUeGLFPNxcmocde5vwm7ePIRSOSF0WXQVT5TzNTi+cPT7cfl2B1KUQjWlvhu/+12sxKeck/rjzOHyBML6/+lqYjdqvvC4QDKPPzfPTpMbAPc+eug6olAquXaeEEM3eDKXTbThU78J/+9F/YuFsBywXhe6jK0rEKJGixCGFcyIRAfuOOrFgmg1mg0bqcoiiUphtwQ3zchAKR/DhoXa0d/NuNhExcM851tSLvoEAFs/NkboUonGxWfVYUpIHi1GDT4934nhzLx+mJRgG7jl76jpg0KlRMp2zEyh5GXRq3Dg/B5PsJpxs6UPNkQ4M+kNSl0XnMHAB+ANhHDjpwnWz7NCoVVKXQzQhKqUSZcVZKJ1ug9sbwAcHz+KT2rNSl0XgQzMAwBenXPAHwhxOINlQKBQozLYg06rH5ydd2Pj7T/EPCwvxT3fPg2mczyjOnzHBWQ/jw8AFUHO4A5lWHYoL0qUuhSimzAYNbpqfC4NRi9d31eOjg22YNzUTuZlGKKLYCc9k0mFgwD/6Nmc9jE/KDyk4e3w40tCDJQvyuBUjyZJSqcCDd87B1xfkQqtR4rPjLnx6vJNjuxJI+cB97/M2qJQKlJfyhFSStwyLDktK8jBncgZc7iG893kbTra4EeYKtbhJ6SGFoUAIHx9ux7WzHEgz66Quh0h0SoUC0/PTkGszoq6hB8eb3Wjq8GBWUQYm2U1RDTNQ9FL6DndvnROD/hBuvWaS1KUQxZVJr8HC2dm4YV4OtBoVvqjvwoeH2tHZO8i5uyJK2cAVBAG7Pm9FYbYZ0/KtUpdDJImsND2WlOSirDgLgVAYe4868fHhDnT2+hi8IkjZIYUTzW60uQbw8B2z+GcUpTSFQoEChxn5WSY0d3pR3+rG3qOdyDBrMbMwHfZ0g9QlykbKBu47+5phNmhw/ZxsqUshSghKpQKTcywodJjR0unFyXPBm27WYs4UGzItWs7kmaCUDNymDg8On+nGiiVTodVwZRnR+ZRKBYpyLCg4F7yn2vpQc7gdBp0KU3OtKMy+8paRdHkpGbjVNY0w6NRYyodlRJc1EryF2Wa4fSEcPdOFusZenGhxQ2/Q4sY5DmRa9VKXmVRSLnAb2vtx4KQLd984mac6EI2BQqHAJIcZGSYNej1+nD7bhzc/PI3tH57G12bacUtZPmYUpPNZyBikXOJs3X0aZoMGty8slLoUoqSTYdHh2pkO3H1zMV579zg+rm3H/mOdyMsy4ebSPNwwL1fSG5m0dOOEhgkvPmEj1ntGpFTgHmnoRl1jL765dDoMupTqOlFMZWcaserWYty7ZCr2H3Pigy/a8H//Xo+/7D6N62dn48b5uSielBb3u16tRjXmUzIudvF+EUDs94xImdQJhSP4f3+vhyPdwLFbohjRaVT4+oI8fH1BHho7+vH+523Yd8yJj2rbkZWmx+K5ObhhXg6yM41Sl5oQUiZw3zvQivZuH9Z8Yz406pRd70Ekmsk5Vjx8pxUP3FaMz0+6sOdIB6prGvFWTSMm2U0oK7bjmhl2FGabU3a8NyUCt8s9iG0fNWDBNBtKp2dJXQ6RrOm1atwwLxc3zMtFr8ePT4858UV9F6r3DIdvplWH2UUZmFWYgdlFGSk100H2gSsIAv7wtxMAgNUVM1P2NyuRFDIsOlQsLETFwkJ4fAEcOtWNQ6e6cLC+C58c7gAA2NP1mJJrRVG2BYXZw9PQLj51WC5kH7jvfd6GIw09+C//MAO2tNT5TUqUaCxGLW5akIubFuQiIgho7fTieLMbJ1vcON3Wj/3HOkdfazZo4MgwDP8v3YDsDCPs6QbY0vRIMyfvijdZB67HF8Br75/C/Kk2LL0mX+pyiOgc5bkjgAqzLai4rgAA4B0MotnpQbPTC2evD529g6hv6cO+OifO30ZHpVTAZtXDlqYf/f+sc/+OqFQQBCFh/5KVdeBq1EosW1iIW782KWG/AEQ0zGzQYM7kTMyZnHnB+4OhCLr6BuFyD6K7bwhd/UPo7hv+3+GGbvR5Axe8XqVUwGLUwGrUwmLSIMOsQ7pZB6VS+gyQdeDqtWrcu2Sq1GUQ0QRo1Erk2kzItZku+fFgKIyefj+6+ofgDwvY8fEZ9PuC6Oj1oblz+DQLpVKBDLMWNqse9nQDMqw6SYYlZB24RCR/GrUK2ZlGZGcaYbdb0Hy2b/RjQ4EQevr96On3o7t/CCdb+3CytQ8atRKODANyMoxwZBjiNlU0JQJ3osv9iOhC4XDkK8tgxysUjkCtEifw9Fo18rLUyMsavjsOhiJwuQfR0TM8RtzmGoBSqUBOhgHTCtKRZtCIOvSQEoE7keV+F+Px0ESASqWM6c9UvH4+NWol8rJMyMsyQRAE9Hj8ONs1gLauAZw9eHb040UOM9LMsZ+alhKBS0R0MYXi3GwHqx5zJ2fCMxTCqVY3Wju9aOrwwGrU4M6vT4Muhje8DFwiSnlKpQJ5djPSjBoEQxG0ubzo6B3EwGAQOqMmdteJWUtERDKgUSsxOdeKRXOyMbMo8+qfEAUGLhFRnDBwiYjihIFLRBQnDFwiojiRJHDfeust3HnnnaioqMCf/vQnKUogIoq7uE8Lczqd2Lx5M7Zu3QqtVotVq1bh+uuvx/Tp0+NdChFRXMU9cGtqarBo0SKkp6cDAG6//Xbs3LkT3/72t8f0+eNddmeO4Vy6WLZl0mugECIxay9R+xnLtmLd3kTbMp73NZRrP42X+D5NlNpi1dal+giMP3MuRSEIgnD1l8XOr3/9a/h8Pqxbtw4A8Prrr6O2thbPPvtsPMsgIoq7uI/hRiKRC/amTeTNgomIYinugZuTkwOXyzX6tsvlgsPhiHcZRERxF/fAveGGG7Bnzx709PRgcHAQ7777LpYsWRLvMoiI4i7uD82ys7Oxbt06PPjggwgGg1i5ciUWLFgQ7zKIiOIu7g/NiIhSFVeaERHFCQOXiChOGLhERHHCwCUiihMGbpx5vV7cddddaG1tBTC81LmqqgoVFRXYvHmzxNVN3IsvvojKykpUVlZi06ZNAOTXx+eeew533nknKisr8corrwCQXx8B4Kc//SmeeuopAPLr3+rVq1FZWYl77rkH99xzDw4dOhSfPgoUNwcPHhTuuusuYe7cuUJLS4swODgolJeXC83NzUIwGBQeeeQR4YMPPpC6zHH75JNPhG9+85uC3+8XAoGA8OCDDwpvvfWWrPq4b98+YdWqVUIwGBQGBweFW265RTh27Jis+igIglBTUyNcf/31wve//33ZfZ9GIhHhpptuEoLB4Oj74tVH3uHG0WuvvYZnnnlmdGVdbW0tioqKUFBQALVajaqqKuzcuVPiKsfPbrfjqaeeglarhUajwbRp09DY2CirPi5cuBB/+MMfoFar0d3djXA4jP7+fln10e12Y/PmzXj88ccByO/79MyZMwCARx55BHfffTf++Mc/xq2PDNw4+tGPfoRrr7129O3Ozk7Y7fbRtx0OB5xOpxSlxURxcTFKS0sBAI2NjXjnnXegUChk1UcA0Gg0eP7551FZWYnFixfL7uv49NNPY926dbBarQDk933a39+PxYsX45e//CV+97vf4c9//jPOnj0blz4ycCUk14186uvr8cgjj+DJJ59EQUGBLPu4du1a7NmzB+3t7WhsbJRNH19//XXk5uZi8eLFo++T2/dpWVkZNm3aBIvFgszMTKxcuRLPP/98XPoY96W99CU5buRz4MABrF27FuvXr0dlZSX2798vqz6ePn0agUAAs2fPhsFgQEVFBXbu3AmVSjX6mmTu444dO+ByuXDPPfegr68PPp8PbW1tsukfAHz22WcIBoOjv1QEQUB+fn5cvk95hyuhkpISNDQ0oKmpCeFwGNXV1Um9kU97ezueeOIJ/OxnP0NlZSUA+fWxtbUVGzZsQCAQQCAQwK5du7Bq1SrZ9PGVV15BdXU13nzzTaxduxZLly7Fyy+/LJv+AYDH48GmTZvg9/vh9Xqxbds2fPe7341LH3mHKyGdToeNGzdizZo18Pv9KC8vx7Jly6Qua9x+85vfwO/3Y+PGjaPvW7Vqlaz6WF5ejtraWixfvhwqlQoVFRWorKxEZmambPp4Mbl9n95yyy04dOgQli9fjkgkgm9961soKyuLSx+5eQ0RUZxwSIGIKE4YuEREccLAJSKKEwYuEVGcMHCJiOKEgUtEFCcMXEoajzzyCHp6eib8mn379uGuu+666vVmzpx5ybZ27dqFH/7whwCGt/nbuXMnWltbUVZWdtU2KbVx4QMljU8++SQmr5moW2+9Fbfeeqvo1yH54R0uJYV/+Zd/AQA89NBD2L9/P1avXo2qqircfffdeOONN77ymvb2drz//vtYtWoVVqxYgZtvvhm/+MUvor7uL37xC9x7772455578P777wMAtm7disceeywm/aLUwjtcSgo/+clPsHXrVvz+97/H/fffjyeffBIVFRVwOp247777UFRUdMFrMjIy8OSTT2Ljxo2YPHkynE4nbrnlFjz44INRXXfSpEn4wQ9+gJMnT2L16tV45513ROohpQIGLiWV06dPw+/3o6KiAgCQnZ2NiooKfPTRRxeMoSoUCvzqV7/CBx98gOrqapw+fRqCIGBwcDCq6z3wwAMAgBkzZmDatGn44osvYtcZSjkcUqCkolAovrJPqSAICIVCF7zP5/Ph3nvvRV1dHebMmYMnn3wSarUa0W4dolR++SMSiUSgVvMehcaPgUtJQ6VSIT8/H2q1Gu+++y4AwOl04m9/+xtuuOGG0deEQiE0NTXB6/XiO9/5DpYuXYp9+/YhEAggEolEdc1t27YBAOrq6tDc3IySkpLYdopSCn9dU9JYtmwZ/vEf/xFbtmzBD3/4Q7zwwgsIh8N44oknsGjRotHXrF69Gs899xxuvvlm3HHHHdBqtZgxYwamT5+OpqYmaLXaMV+zpaUFy5cvh0KhwL//+78jPT1dpN5RKuD2jEREccI7XEpZL7/8Mt56661Lfuyf/umfcPfdd8e5IpI73uESEcUJH5oREcUJA5eIKE4YuEREccLAJSKKEwYuEVGc/H/38wAJadXY2gAAAABJRU5ErkJggg==\",\n      \"text/plain\": [\n       \"<Figure size 360x360 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"sns.displot(data=modin_tips, x=\\\"total_bill\\\", kde=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 36,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<seaborn.axisgrid.FacetGrid at 0x7fc3bd078340>\"\n      ]\n     },\n     \"execution_count\": 36,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAVwAAAFcCAYAAACEFgYsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA09klEQVR4nO3deXyU9b0v8M/s+2SZzGQjCVvYIYlVBLVE0RPRGEWKFnsvevRcX3qvhVt6XrUeDkfvrV0or/ZQl3Laq61tbe/tS1tQjEg9RcUlLIpCIGwBspNMJsskM5lk1uf+ERIBWTLJPPPMPPN5v159lSST3/P9meSTJ7/ntygEQRBARESiU0pdABFRqmDgEhHFCQOXiChOGLhERHHCwCUiihMGLhFRnKjFbHz16tXo6emBWj18mR/84AcYGBjAT37yE/j9ftxxxx1Yt26dmCUQESUM0QJXEAQ0Njbi/fffHw3coaEhLFu2DK+++ipyc3Px2GOPYffu3SgvLx9zu93dXkQi8pk6nJFhRG+vT+oyRCX3Psq9fwD7GA273XLZj4kWuGfOnAEAPPLII3C73bj//vsxY8YMFBUVoaCgAABQVVWFnTt3RhW4cqNWq6QuQXRy76Pc+wewj7Ei2hhuf38/Fi9ejF/+8pf43e9+hz//+c84e/Ys7Hb76GscDgecTqdYJRARJRTR7nDLyspQVlY2+vbKlSvx/PPP42tf+9ro+wRBgEKhiKpdm80csxoTxZX+BJELufdR7v0D2MdYEC1wP/vsMwSDQSxevBjAcLjm5+fD5XKNvsblcsHhcETVrtzGcO12C1wuj9RliErufZR7/wD2Mdp2Lke0IQWPx4NNmzbB7/fD6/Vi27Zt+O53v4uGhgY0NTUhHA6juroaS5YsEasEIqKEItod7i233IJDhw5h+fLliEQi+Na3voWysjJs3LgRa9asgd/vR3l5OZYtWyZWCURECUWRbNszckgh+ci9j3LvH8A+RtvO5XClGRFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goTkTdnpHiKy3dCK0mNhtwBIJh9LnlvTsUUbwxcGVEq1Hhpa2HYtLWoytKYtIOEX2JQwpERHHCwCUiihMGLhFRnDBwiYjihIFLRBQnDFwiojhh4BIRxQkDl4goThi4RERxwsAlIooTBi4RUZwwcImI4oSBS0QUJwxcIqI4YeASEcUJA5eIKE64AbmE0tKNAAC73SJxJUQUDwxcCWk1Kvzfvx3HwIA/Ju3xlAaixMYhBSKiOOEdboqKRAT4/CGolArotSooFAqpSyKSPQZuinF7/DjZ6kZn7yAiwvD79FoVChxmTM9Pg0bNP3qIxMLATRGCIOB4sxv1rX3QqJWYnGOF1aRBOCKgs3cQ9a19aHZ6cO1MB2xpeqnLJZIlBm4KiAgCPj/pwtkuHwodZsybkgn1eXeyU3Kt6PX48UW9CzV1Hbim2C5htUTyxb8fZU4QBBw61Y2zXT7MKcpAaXHWBWE7IsOiw00LcpFh0eHzehc+P94pQbVE8sbAlbnGDg9aOr2YMSkN0yelXfG1WrUK18/OhsWowcY/7IezxxenKolSAwNXxvoGAjjS0ANHhgEzC9PH9DkatRLXz86GSqnEr96sQzAUEbdIohTCwJWp4aGELmjUSlxTnBXVtC+DTo3vrCpDk9ODt/c0ilckUYph4MpUQ7sHbm8A86fYoNWoov786+flYtHcbLy9pwlnuwZEqJAo9TBwZSgQCuNEixv2dD3ysozjbmfV0mLotSr86T9PQhCEGFZIlJoYuDJ0qrUPwVAEc4oyJrSCzGrS4u6bpuBYUy+ONPTEsEKi1MTAlZmhQBhn2j2YZDchzaybcHu3lOXDnq7H6++fQoR3uUQTwsCVmTNn+xGJCJhRkB6T9tQqJe5dMhWtrgF8fsIVkzaJUhUDV0a8g0E0dvQjz2aE2aCJWbsLZ2UjJ9OIt2oaOZZLNAEMXBl5d28jQmEBxVdZ4BAtpVKBysVFaOn04tCp7pi2TZRKGLgyIQgC3q5phM2qi8nY7cUWzc1GhkWHvx9oiXnbRKmCgSsTzp5BdPb4MCXXKkr7KqUSt5Tl42hjL+flEo0TA1cmGjs8sKXpkWMb/7zbq1lSmge1Soldn7eKdg0iOWPgysCgP4RO9yBuW1gIpYgnN1iNWlw/24Gawx3wDYVEuw6RXDFwZaCl0wsAuO26QtGvtfRrk+APhvHJkXbRr0UkNwzcJCcIApo7vchK0yPHZhL9elNyrZiWZ8V7B1q5EIIoSgzcJNfr8cM3FEKBwxy3ay792iQ4ewdxotkdt2sSyQEDN8m1dQ1AqVQgJ1O8h2UXu2aGHXqtCjUcViCKCgM3iUUEAW1dA8jOMMT1tF2dRoXrZjnw2QkX/IFw3K5LlOxE/yn96U9/iqeeegoAUFNTg6qqKlRUVGDz5s1iX1r2uvqGEAhGkG8Xf+z2YjfOz4U/EMaBkzz7jGisRA3cPXv2YNu2bQCAoaEhrF+/Hlu2bMGOHTtw5MgR7N69W8zLy15Htw8qpQLZ6Ya4X7t4Uhqy0vSoOdIR92sTJSvRAtftdmPz5s14/PHHAQC1tbUoKipCQUEB1Go1qqqqsHPnTrEuL3uCIKCjxwdHhgEqVfxHhhQKBW6Yl4Njjb3o6R+K+/WJkpFarIaffvpprFu3Du3tww9WOjs7YbfbRz/ucDjgdDqjbtdmi9/T+HgxmaLf+6C7bxBDgTCKcq0XfP542rocu91yxY9XlU/H9k8aUdvYi/tunTGhtpKd3PsHsI+xIErgvv7668jNzcXixYuxdetWAEAkErng9AFBEMZ1GkF3txeRiDzmf458cQcG/FF/bkNbHxQA0o2aCz5/PG1djsvlueLHVQBmTErDf+5rQvn8nMt+Pe12y1XbSmZy7x/APkbbzuWIErg7duyAy+XCPffcg76+Pvh8PrS1tUGl+vIwQ5fLBYfDIcblU0JHjw+ZVt24DoiMpevnZOPVd0+irWsAk+zy++uDKJZEGfx75ZVXUF1djTfffBNr167F0qVL8fLLL6OhoQFNTU0Ih8Oorq7GkiVLxLi87A0MBeHxBeM69/ZyrpnpgEIBfHqMsxWIriZuT1t0Oh02btyINWvW4M4778TUqVOxbNmyeF1eVjq6fQAg6s5gY5Vm0mJmQTo+O9HJ0yCIrkK0h2YjVqxYgRUrVgAAFi9ejO3bt4t9Sdnr6PHBYtTApI/dMToTcd3sbLz6txNocw1gUhyXGBMlG9EDl2IrEAyju9+PGTE+Rudi4XBkzE9sKxZPwZ/ePYG6FjfK5uZesi0iYuAmna6+4TmvjgxxFzuoVEq8tPXQmF+fadXj7Y/PwDfg/8pshUdXlMS6PKKkxL0UkozLPQi1SoF0S+zPLZuIvCwTvIMh9PuCUpdClLAYuEnG5R5CVppe1JMdxiP33IyJdp53RnRZDNwkMjAYhM8fgl2CvROuRqdVwWbVo73HJ3UpRAmLgZtEXO5BAEjIwAWAHJsBHl8QA4McViC6FAZuEnH1DcGgU8GkT8xnnSMLMTp4l0t0SQzcJCEIArrcQ7CnGca1B0U8mPQaWIwaBi7RZTBwk4TbG0AwHIE9XS91KVeUm2lEd78f/iBPgiC6GAM3SYyM32Yl6PjtiJHlxs7eQYkrIUo8DNwk4XIPIs2khU7i3cGuJs2khV6rGt3vgYi+xMBNAuFIBL0eP2xpiT2cAAyfBJGTaYTLPcglvUQXYeAmAbcngIgA2KyJH7jA8GyFcESAq49H7xCdj4GbBLrPnRlmsybWct7LyUrTQ61ScFiB6CIM3CTQ3T8Ei1Ej+ekOY6VUKpCdYURHr4975BKdh4Gb4CKCMDx+myTDCSNyMg0IBCPo8cTujDWiZMfATXD9AwGEwkLSDCeMcGQYoVBw1RnR+Ri4Ca773IOnzCS7w9WolbBZ9XAycIlGMXATXHe/H0a9GgZdYu6fcCU5mUZ4B0M46/JKXQpRQmDgJjBBENDTP5R047cjsjOHV8XtP+qUuBKixMDATWDewSACoUjSjd+OGNnM5tOjHVKXQpQQGLgJrLt/+Al/so3fni87w4i6M93wDXGPXCIGbgLr6R+CTqNM2P1vxyIn04BwRMDhMz1Sl0IkOQZuAuv1+JFh0Sfs/rdjkWHRwWrS4tCpLqlLIZIcAzdBBYJhDAyFkJFgp/NGS6FQ4NrZ2Th8phvhCDezodTGwE1QvedWaCV74ALAwjk5GBgK4VRrn9SlEEmKgZugRgI33ayVuJKJK5tph0qpwKFT3VKXQiQpBm6C6vH4YTVpoVYl/5fIqNdgVmE6DnIcl1Jc8v80y5AgCHB7/ciUwXDCiJLpWejo8XGpL6U0Bm4C8gwGEQoLshi/HVEyPQsAeJdLKY2Bm4Dk9MBshD3dgPwsE6eHUUpj4Cag3n4/NOrkXvBwKSXTs3CypY+rzihlMXATUK/XjwyLLqkXPFxK6fQsRASuOqPUxcBNMMFQBB5fEBlm+QwnjJiaZ4XZoOGwAqUsBm6CcXvlN347QqlUoGSajavOKGUxcBPMSOCmW5J/wcOllEzP4qozSlkM3ATj9gZg1KuhVSfHCb3Rmjslk6vOKGUxcBOM2+uXxXLeyzHo1Fx1RimLgZtA/IEwBv1hpMvwgdn5uOqMUhUDN4G4B0Y2rJF34JZy1RmlKAZuAnF7AwCANJN8hxQAICvdgHw7V51R6mHgJhC3xw+zQQONWv5fltJzq84GuOqMUoj8f7KTiNsbkPUDs/OVnFt1doSrziiFMHATxJA/BH9Q/g/MRkzNtcJi5KozSi0M3AQxMn6bKne4SqUCC6Zy1RmlFgZughhZYWaV+QOz83HVGaUaBm6CcHsDsBg1sjhSZ6zmTsmEWqXg9DBKGanz053ARo7UkeMOYVdi0KkxszADB7nMl1IEAzcBDAXCCIQiSEuR8dvzlU7PgrPHhw6uOqMUwMBNAH0Dww/MUmn8dkTJNBsAcLYCpQRRA/e5557DnXfeicrKSrzyyisAgJqaGlRVVaGiogKbN28W8/JJo/9c4KYZUy9wueqMUologbt//37s3bsX27dvx1//+le8+uqrOH78ONavX48tW7Zgx44dOHLkCHbv3i1WCUmjbyAAk14NdQqsMLsUrjqjVCHaT/jChQvxhz/8AWq1Gt3d3QiHw+jv70dRUREKCgqgVqtRVVWFnTt3ilVC0ugfCKTkcMKIktGzzvjwjORN1GNhNRoNnn/+efz2t7/FsmXL0NnZCbvdPvpxh8MBp9MZVZs2mznWZUoqGIpgYCiEqZPSYTJNfJZCLNqIdVvhcAR2u+WyH8+0mZFmPozjLX2oKi8eU3uqBJs+d6X+yQX7OHGin8O9du1aPProo3j88cfR2Nh4wUm0giBEfTJtd7cXkYgQ6zIlYbdb4PYMAQAMGiUGzm3POBGxaCPWbalUSry09dAVX2M1arGnth2//utBKK/yPfHoihK4XJ6Y1BYLdrsloeoRA/sYXTuXI9ptwunTp3Hs2DEAgMFgQEVFBfbt2weXyzX6GpfLBYfDIVYJSaHXMxxqct+S8WqyMw0IhiPo6Y/dLwyiRCNa4La2tmLDhg0IBAIIBALYtWsXVq1ahYaGBjQ1NSEcDqO6uhpLliwRq4Sk4Pb4oVUrodfK8wyzsXKkGaBUgKdAkKyJNqRQXl6O2tpaLF++HCqVChUVFaisrERmZibWrFkDv9+P8vJyLFu2TKwSkkKvxw+rSRv10IrcqNVK2NL06OjxYc7kjJT/70HyJOoY7po1a7BmzZoL3rd48WJs375dzMsmjXA4ArfXjyk58n8YMRY5mUYcPtMDz2AQ1hSck0zyl1iPelNMm2v4AWAqTwk7X67NCABo7+KwAskTA1dCZ872A+ADsxF6rRo2qw5nuwekLoVIFAxcCTW09UGpUMBs0EhdSsLItZng8QXh9XHVGckPA1dCZ872Ic2shVLJB0QjRoYVeJdLcsTAlYggCGg424cMq17qUhKKQadGhkWH9m6O45L8MHAl4vYG0OcNIMOSWpuOj0WuzYi+gQA3syHZYeBKpKVzeAlhOgP3K/I4W4FkioErkZZOLwCkzLHo0TDqNUgzaTmOS7LDwJVIs9OLHJsRWk1qL+m9nLwsI9zeAHz+kNSlEMXMmAJ3/fr1X3nf2rVrY15MKmnu9GJKXprUZSSsXJsJANDOu1ySkSsu7X3mmWfgdDpx4MAB9PT0jL4/FAqhpaVF9OLkaigQQmePD7deVwhBkMdWk7FmNmhgNWrQ3uXDNP5iIpm4YuCuXLkS9fX1OHHiBG6//fbR96tUKpSWlopdm2y1ugYgAJiaZ8Xptj6py0lYuVkmnGh2Y9AfgkEn+tbNRKK74nfx/PnzMX/+fNxwww3IycmJV02y1+IcnqEwJT+NgXsF+ecCt61rANPzeZdLyW9Mtw3t7e343ve+h76+vgv+BH7rrbdEK0zOmju9MOnVsKcbpC4loZkNGqSbtWhzMXBJHsYUuE8//TRWrFiBOXPmcJ/SGGh2elHgMPO/5RhMsptxpKEHHl8AFm7ZSEluTIGrVqvx8MMPi11LSohEBLS5vLi5LF/qUpJCXpYRRxp60OYawKwiBi4ltzFNCysuLsaJEyfEriUlOHt9CIQiKHDI6/Rhsei1amSl6dHaNcAZHZT0xnSH29LSgm984xvIy8uDTvflyiiO4Uav2Tm8woyBO3aT7CYcPNUNN/eeoCQ3psBdt26d2HWkjOZOD1RKBfKyTFKXkjRybSbUnu5Gq8vLwKWkNqbAnTFjhth1pIwWpxf5WSaoVVxVPVYatRLZmUa0dQ1g7pRMqcshGrcxBe6iRYugUCggCMLok3W73Y4PP/xQ1OLkqLnTi/lTGRrRyreb0N7tQ5d7SOpSiMZtTIF7/Pjx0X8HAgFUV1ejoaFBtKLkqs/rR/9AAIUOntIbrewMA9QqBdq6vFKXQjRuUf9dq9VqsWLFCnzyySdi1CNrI1syFmbzgVm0VEolcm3Dd7n+YFjqcojGZUx3uG63e/TfgiDgyJEj6O/vF6sm2Wru5AyFiSiwm9DS6cWew+2YW8CVZ5R8oh7DBQCbzYZ//dd/FbUwOWp2emCz6mHU85Te8bCl6WHUqfH3/U2YW7BA6nKIohb1GC6NX0unl8MJE6BQKFCQbcah+i50uQeRxb0oKMmMaQw3EongpZdewurVq/HAAw/gxRdfRCjEnfij4Q+E0dHt43DCBA3vQQF8fLhd6lKIojamwP35z3+OvXv34qGHHsLDDz+ML774Aps2bRK7Nllp7fJCAFCYzRkKE2HUqVFabMcnh9sR4VJfSjJjCtyPPvoIv/rVr3DbbbehoqIC//Ef/8E5uFFqObekt5B3uBP2DwuL0N3vx7GmXqlLIYrKmAJXEARoNF8+6NFqtRe8TVfX0umFQaeGLU0vdSlJ7/p5OTDp1fi4lsMKlFzGFLizZs3Cj3/8YzQ3N6OlpQU//vGPudw3Ss2dHhRyD9yY0GpUWDQnBwdOuDAwFJS6HKIxG1PgPvPMM+jv78eqVatw3333obe3F//2b/8mdm2yEYkIaO0cQAFnKMTMTQtyEQpHsO+oU+pSiMbsioEbCATw/e9/H3v27MHGjRtRU1ODBQsWQKVSwWxmeIxVp3sQ/mCYMxRiqCjHgkKHGR9xWIGSyBUD9/nnn4fX68U111wz+r5nn30W/f39eOGFF0QvTi6azx0ayT0UYuvrJXlo6vCM/vclSnRXDNwPPvgAP//5z2Gz2Ubfl52djU2bNuHvf/+76MXJRUunl3vgimDR3Gxo1Uq893mb1KUQjckVA1ej0UCv/+pTdbPZDK2W50uNVbPTi1ybCRo198CNJZNeg0Vzs7H3aAd8fHhGSeCKCaBUKuH1fnU7PK/Xy5VmUWjp9HBJr0iWXjMJgWAEHx/ukLoUoqu6YuDedddd2LBhA3w+3+j7fD4fNmzYgIqKCtGLk4P+gQDc3gAXPIikMNuCaflWvP95K1eeUcK7YuA+9NBDsFgsuPHGG3H//fdj5cqVuPHGG2G1WvHEE0/Eq8akNrIHbgGX9Ipm6TWT4OwdxLFGrjyjxHbF3cKUSiWeffZZPP7446irq4NSqcSCBQvgcDjiVV/Sa+4cfoLOKWHiuXamA3/eVY/3Pm/lmWeU0Ma0PWN+fj7y8/PFrkWWWpxeZFp1MBu4FFosGrUSS0rysGNvE7r7hrh8mhIWH5uLrLnTy/m3cXBz6fANwQcHOUWMEhcDV0SBYBjt3QMcTogDW5oepdOz8OGhswiGIlKXQ3RJDFwRtXUNQBB4aGS8LL1mEjy+IPYf4/4KlJgYuCLiDIX4mjM5A/l2E/62v2X0/D2iRMLAFVGz0wODToUsPsSJC4VCgduvK0Sry4ujnCJGCYiBK6LmTi8K7GYouQdu3Fw/JxtpZi127m+WuhSir2DgiiQiCGjp9KKAMxTiSqNW4ravTUJdQ8/okA5RohjTPFyKnss9CH8gzE3HRRAOR2C3X/4X2crbZuLtPU3YXduOdQ9cc9nXAcMzSfrcviu+hihWGLgiGT00koEbcyqVEi9tPXTF1+TZjHj/QAtUEGDQXf7b/NEVJbEuj+iyRB1SePHFF1FZWYnKysrRY9VrampQVVWFiooKbN68WczLS6q50wulQoF87oErial5VggC0NDeL3UpRKNEC9yamhp8/PHH2LZtG9544w3U1dWhuroa69evx5YtW7Bjxw4cOXIEu3fvFqsESbU4PcjNMkKjVkldSkoy6jXIyzKiscPDhRCUMEQLXLvdjqeeemr0SPVp06ahsbERRUVFKCgogFqtRlVVFXbu3ClWCZIaXtLL4QQpTc9LQygsoKmDR/BQYhAtcIuLi1FaWgoAaGxsxDvvvAOFQgG73T76GofDAadTfquCPL4Aej1+zlCQWLpFB3u6HqfO9iEU5l0uSU/0h2b19fV47LHH8OSTT0KlUqGxsXH0Y4IgQBHlHFWbLfHvGs+edAEA5s+wX/Fp+giTSReza6dCW9G0V1Jsx98/bUFH7xBmFmVc8jVj+RpdTSzaSHTs48SJGrgHDhzA2rVrsX79elRWVmL//v1wuVyjH3e5XFHvrdvd7UUkktjLNg+dHL5rT9Or4XJd/s/ZkS/uwIA/ZtdOhbaiac+oVcFm1aPuTDdyM/VQKb/6R92VvkZjYbdbJtxGomMfo2vnckQbUmhvb8cTTzyBn/3sZ6isrAQAlJSUoKGhAU1NTQiHw6iursaSJUvEKkEyzU4vbNwDN2HMLEiDPxhGs5MLIUhaot3h/uY3v4Hf78fGjRtH37dq1Sps3LgRa9asgd/vR3l5OZYtWyZWCZJp6vCgkBvWJAxbmh6ZFh3qW/tQmG2BSsml1iQN0QJ3w4YN2LBhwyU/tn37drEuK7mhQAjOHh8WzcmWuhQ6R6FQYEZBOvYedaKl04vJOfxlSNLgXgox1tLphQDwDjfB2NP1SDdrcaq1L+GfAZB8MXBjbGScsIh3UQlFoVBgZkE6fP4QWlwcyyVpMHBjrKnDA4tRg3SzVupS6CKODAPSzVqcbHYjHOG8XIo/Bm6MNTs9KMq2RD2/mMSnUCgwuygDg4EwGtvlPcWJEhMDN4aCoQjaugY4fpvA7OkG2NP1qG/t4x4LFHcM3Bg62zWAcETg+G2Cm12YgUAogtNn+6QuhVIMAzeGmpzDf6ZyD9zElm7RIddmxOm2frg9sV0BR3QlDNwYajp3aKQ93SB1KXQVswszEIkIeH3XSalLoRTCwI2h5g4PChwWHhqZBMxGDQqyzdhR04gu96DU5VCKYODGSCQyfGhkER+YJY2ZBelQKICtH52RuhRKEQzcGGnv8SEQinD8NokYdGosL5+GvXVOnG7jAzQSHwM3RprPPTDjHW5yue/WGUgzafH/dtUjInDJL4mLgRsjTR0eaNRK5GYZpS6FomDQqfGN8mk4c7Yf+47K7/QRSiwM3BhpdnowyW665AbXlNhumJ+DyTkW/OWD0/AHwlKXQzLGdIgBQRDQ5OQDs2SlVCjwwG3F6PX48c6+JqnLIRlj4MaAq28Ig/4Ql/QmseJJ6Vg424F39jWju29I6nJIphi4MdDcMbLCjIGbzO67eToA4LX3T0lcCckVAzcGGjr6oVIqUOAwSV0KTYAtTY/KxUX49HgnjpzplrockiEGbgw0tnswyW6GRq2SuhSaoDuuL0J2phF/fPckAkE+QKPYYuBOUEQQ0NjRjym5HE6QA41aiQcrZqDTPYi39/ABGsUWA3eCnD0+DPrDmJxrlboUipHZkzOxeG42duxtQnv3gNTlkIwwcCdo5OSAKQxcWbl/aTF0GhVe/dsJCFyBRjHCwJ2gho5+aNVK5HGFmaykmbRYefM0HG92Y09dh9TlkEwwcCeosd2DwhwLV5jJ0JLSPEzLs+LPu06hfyAgdTkkA2qpC0hm4UgEzU4PykvzpS6FxikcjsBuv/wDz3/+r9fif/77B3h99xk89dB1l32d3W5BIBhGn9snRpkkEwzcCWhzDSAQinCGQhJTqZR4aeuhK75men4aPqk9i//9f2qQl/XVudYmkw4DA348uqJErDJJJvh38AQ0nlthxhkK8jYt34p0sxa1p7vh59xcmgAG7gQ0tPfDoFPDkcEzzORMqVCgtDgLoXAEh09zBRqNHwN3Ahra+zE5h2eYpQKrUYuZhek42+3D2S7OzaXxYeCOUzAURptrgPNvU8i0/DSkmYaHFoYCIanLoSTEh2ZRSks3QqtR4WhDN8IRAaWzsq/4lJvkQ6lQ4JoZWfjwUDu+qO/CojnZUPCvG4oCAzdKWo0KL209hFPnDh38rK4dR+o7x9UWn2onH4tRi7mTM1B7pgcN7R5MzeNfODR2HFIYp16PH0adGnotdwhLNUU5FmRnGHC0sYcLIigqDNxxEAQBPf1+ZFh0UpdCElAoFCidngWNWokDJ10IhyNSl0RJgoE7DoP+MPzBMDKtDNxUpdOqUFacBY8viIP1LqnLoSTBMdxx6PUMn3nFO9zU5sgwYkquBSeb3UgzaqUuh5IAA3ccejx+qJQKWPlDlvLmTM5E30AQX9S70NE9AI7o05VwSGEcej1+pJu1UCo5JSjVqZQK3LggFwDw01c/QzDE8Vy6PAZulPzBMPoGAsiw6KUuhRKE2ahFWXEWTrW48dp7PPGXLo+BG6VTLW4IApDJ8Vs6T67NhOXl07Dr81bsP+aUuhxKUAzcKJ1o6gHAB2b0VQ9VzsG0PCteeec42rjfAl0CAzdKx5t6YdSroeOCB7qIWqXEf18+Dzq1Ei/+tRa+oaDUJVGCYeBGQRAEHG3o5nACXVamVY//ce98dPUN4dfbjyIS4QGU9CUGbhTau33o8wZgS+MDM7q8GQXp+NZtxTh8phvbPjojdTmUQDgPNwonWtwAAJuVgUtXdnNZPpqcHry9pwmF2RZcN8shdUmUAHiHG4WTLW5kWnUw6fl7iq5MoVDgv/zDTEzLt+I3bx9FS6dX6pIoATBwx0gQBJxo7sW8qVncA5XGRKNW4ol758OgU+OFv9bCO8iHaKmOgTtGLvcg3N4A5k6zSV0KJZF0sw7fvnc+3F4/frn1MELcWSylMXDH6ESzGwAwbyoDl6IzLT8ND985Gyda3PjdO8chCJy5kKo4GDlGJ1rcMBs0KMjmcToUvcVzc9DZO4g3P25AdoYBVTdOkbokkgADd4xOtrgxsyCd47c0bnffOBmdvT5s+6gB9gwDFs3JkbokijNRhxS8Xi/uuusutLa2AgBqampQVVWFiooKbN68WcxLx1R33xC6+oYwozBd6lIoiSkUCvzjHbMxoyAdv337OOpb3VKXRHEmWuAeOnQIDzzwABobGwEAQ0NDWL9+PbZs2YIdO3bgyJEj2L17t1iXj6kTLb0AgJkF6dIWQklPo1bi2yvmw2bV4YW/HkZnr0/qkiiORAvc1157Dc888wwcjuEJ37W1tSgqKkJBQQHUajWqqqqwc+dOsS4fU8eb3DDp1ZhkN0tdCsmA2aDBd+4rgSAI2Px6Lfp9PIgyVYg2hvujH/3ogrc7Oztht9tH33Y4HHA6o9/GzmaLb+gJgoBjzb0onelAdvbwkdgmU2z3Uohle6nQVqzbi0VbI23Y7WN7qGq3W/D0f1uEf/tVDba8cQQ/fPxGGHSJ/UhlrH1LZmL3MW5f4UgkcsEDJ0EQxvUAqrvbG9cNQdq6BtDdN4TpuRa4XB7Y7RYMDPhjeo1YtpcKbcW6vYm2ZTLpRttwuTxj/jy7WYvH7pmLF7cexg9e2oO1KxdArUrMmZp2uyWqviWjWPXxSqEdt69uTk4OXK4vTzd1uVyjww2JrK5heP/buZMzJa6E5Kis2I6Hls3CkYYe/HbHMUQ4R1fW4ha4JSUlaGhoQFNTE8LhMKqrq7FkyZJ4XX7c6hp6kJ1pRFa6QepSSKaWlOThG+VTsbfOidfeO8WFETIWtyEFnU6HjRs3Ys2aNfD7/SgvL8eyZcvidflxCYYiONHci68vyJO6FJK5OxcVoc8bwLuftiDNrMUd1xdJXRKJQPTAfe+990b/vXjxYmzfvl3sS8bMqbY+BEIRzJ3C4QQSl0KhwKrbiuEZDOL190/DpNdgSQl/0ctNYj8WlVhdQw9USgVmcsEDjUE4HJnwU+7vP7QQP3plH36/8zisVj1K+cteVhi4V1DX0INp+WkJP12HEoNKpcRLWw9NuJ2cdD1sVj1efO0gHr9nHq7l5uWykZhzUBJAvy+AJqeHwwkUdyqVEgtnOzCzKBO/3l6Hg/VdUpdEMcLAvYzDp7sBAPMYuCQBtUqJ//XoIhRmm7HljcOj0xMpuTFwL+NgfRfSzVoU5ch/dQ0lJqNeg3X3lyLXZsILf63FieZeqUuiCWLgXkIgGMbhhm6UFduh5HaMJCGzQYN/XlWKrHQDNr9+CMcaeaebzBi4l3C0qReBYARlxVlSl0IEq1GL7z1QBke6Ab/4Sy0On+mWuiQaJwbuJRysd8GgU2FWUYbUpRABANJMWjz5rWuQZzPh+b/U4ouTrqt/EiUcBu5FIhEBB+u7MH+qLWE3EqHUZDZo8L0HSlGUY8GWN45g/7Hod9sjaTFRLnLmbD/6fUGUFduv/mKiODPqNfjnb5ZiWp4Vv36zDrsOtEpdEkWBgXuRz+tdUCkVmM/TeSlBGXRqrPtmKUqmZ+FP/3kSf/ngNDe8SRIM3PMIgoAv6rswqygDRj1Xl1Hi0mlUeGLFPNxcmocde5vwm7ePIRSOSF0WXQVT5TzNTi+cPT7cfl2B1KUQjWlvhu/+12sxKeck/rjzOHyBML6/+lqYjdqvvC4QDKPPzfPTpMbAPc+eug6olAquXaeEEM3eDKXTbThU78J/+9F/YuFsBywXhe6jK0rEKJGixCGFcyIRAfuOOrFgmg1mg0bqcoiiUphtwQ3zchAKR/DhoXa0d/NuNhExcM851tSLvoEAFs/NkboUonGxWfVYUpIHi1GDT4934nhzLx+mJRgG7jl76jpg0KlRMp2zEyh5GXRq3Dg/B5PsJpxs6UPNkQ4M+kNSl0XnMHAB+ANhHDjpwnWz7NCoVVKXQzQhKqUSZcVZKJ1ug9sbwAcHz+KT2rNSl0XgQzMAwBenXPAHwhxOINlQKBQozLYg06rH5ydd2Pj7T/EPCwvxT3fPg2mczyjOnzHBWQ/jw8AFUHO4A5lWHYoL0qUuhSimzAYNbpqfC4NRi9d31eOjg22YNzUTuZlGKKLYCc9k0mFgwD/6Nmc9jE/KDyk4e3w40tCDJQvyuBUjyZJSqcCDd87B1xfkQqtR4rPjLnx6vJNjuxJI+cB97/M2qJQKlJfyhFSStwyLDktK8jBncgZc7iG893kbTra4EeYKtbhJ6SGFoUAIHx9ux7WzHEgz66Quh0h0SoUC0/PTkGszoq6hB8eb3Wjq8GBWUQYm2U1RDTNQ9FL6DndvnROD/hBuvWaS1KUQxZVJr8HC2dm4YV4OtBoVvqjvwoeH2tHZO8i5uyJK2cAVBAG7Pm9FYbYZ0/KtUpdDJImsND2WlOSirDgLgVAYe4868fHhDnT2+hi8IkjZIYUTzW60uQbw8B2z+GcUpTSFQoEChxn5WSY0d3pR3+rG3qOdyDBrMbMwHfZ0g9QlykbKBu47+5phNmhw/ZxsqUshSghKpQKTcywodJjR0unFyXPBm27WYs4UGzItWs7kmaCUDNymDg8On+nGiiVTodVwZRnR+ZRKBYpyLCg4F7yn2vpQc7gdBp0KU3OtKMy+8paRdHkpGbjVNY0w6NRYyodlRJc1EryF2Wa4fSEcPdOFusZenGhxQ2/Q4sY5DmRa9VKXmVRSLnAb2vtx4KQLd984mac6EI2BQqHAJIcZGSYNej1+nD7bhzc/PI3tH57G12bacUtZPmYUpPNZyBikXOJs3X0aZoMGty8slLoUoqSTYdHh2pkO3H1zMV579zg+rm3H/mOdyMsy4ebSPNwwL1fSG5m0dOOEhgkvPmEj1ntGpFTgHmnoRl1jL765dDoMupTqOlFMZWcaserWYty7ZCr2H3Pigy/a8H//Xo+/7D6N62dn48b5uSielBb3u16tRjXmUzIudvF+EUDs94xImdQJhSP4f3+vhyPdwLFbohjRaVT4+oI8fH1BHho7+vH+523Yd8yJj2rbkZWmx+K5ObhhXg6yM41Sl5oQUiZw3zvQivZuH9Z8Yz406pRd70Ekmsk5Vjx8pxUP3FaMz0+6sOdIB6prGvFWTSMm2U0oK7bjmhl2FGabU3a8NyUCt8s9iG0fNWDBNBtKp2dJXQ6RrOm1atwwLxc3zMtFr8ePT4858UV9F6r3DIdvplWH2UUZmFWYgdlFGSk100H2gSsIAv7wtxMAgNUVM1P2NyuRFDIsOlQsLETFwkJ4fAEcOtWNQ6e6cLC+C58c7gAA2NP1mJJrRVG2BYXZw9PQLj51WC5kH7jvfd6GIw09+C//MAO2tNT5TUqUaCxGLW5akIubFuQiIgho7fTieLMbJ1vcON3Wj/3HOkdfazZo4MgwDP8v3YDsDCPs6QbY0vRIMyfvijdZB67HF8Br75/C/Kk2LL0mX+pyiOgc5bkjgAqzLai4rgAA4B0MotnpQbPTC2evD529g6hv6cO+OifO30ZHpVTAZtXDlqYf/f+sc/+OqFQQBCFh/5KVdeBq1EosW1iIW782KWG/AEQ0zGzQYM7kTMyZnHnB+4OhCLr6BuFyD6K7bwhd/UPo7hv+3+GGbvR5Axe8XqVUwGLUwGrUwmLSIMOsQ7pZB6VS+gyQdeDqtWrcu2Sq1GUQ0QRo1Erk2kzItZku+fFgKIyefj+6+ofgDwvY8fEZ9PuC6Oj1oblz+DQLpVKBDLMWNqse9nQDMqw6SYYlZB24RCR/GrUK2ZlGZGcaYbdb0Hy2b/RjQ4EQevr96On3o7t/CCdb+3CytQ8atRKODANyMoxwZBjiNlU0JQJ3osv9iOhC4XDkK8tgxysUjkCtEifw9Fo18rLUyMsavjsOhiJwuQfR0TM8RtzmGoBSqUBOhgHTCtKRZtCIOvSQEoE7keV+F+Px0ESASqWM6c9UvH4+NWol8rJMyMsyQRAE9Hj8ONs1gLauAZw9eHb040UOM9LMsZ+alhKBS0R0MYXi3GwHqx5zJ2fCMxTCqVY3Wju9aOrwwGrU4M6vT4Muhje8DFwiSnlKpQJ5djPSjBoEQxG0ubzo6B3EwGAQOqMmdteJWUtERDKgUSsxOdeKRXOyMbMo8+qfEAUGLhFRnDBwiYjihIFLRBQnDFwiojiRJHDfeust3HnnnaioqMCf/vQnKUogIoq7uE8Lczqd2Lx5M7Zu3QqtVotVq1bh+uuvx/Tp0+NdChFRXMU9cGtqarBo0SKkp6cDAG6//Xbs3LkT3/72t8f0+eNddmeO4Vy6WLZl0mugECIxay9R+xnLtmLd3kTbMp73NZRrP42X+D5NlNpi1dal+giMP3MuRSEIgnD1l8XOr3/9a/h8Pqxbtw4A8Prrr6O2thbPPvtsPMsgIoq7uI/hRiKRC/amTeTNgomIYinugZuTkwOXyzX6tsvlgsPhiHcZRERxF/fAveGGG7Bnzx709PRgcHAQ7777LpYsWRLvMoiI4i7uD82ys7Oxbt06PPjggwgGg1i5ciUWLFgQ7zKIiOIu7g/NiIhSFVeaERHFCQOXiChOGLhERHHCwCUiihMGbpx5vV7cddddaG1tBTC81LmqqgoVFRXYvHmzxNVN3IsvvojKykpUVlZi06ZNAOTXx+eeew533nknKisr8corrwCQXx8B4Kc//SmeeuopAPLr3+rVq1FZWYl77rkH99xzDw4dOhSfPgoUNwcPHhTuuusuYe7cuUJLS4swODgolJeXC83NzUIwGBQeeeQR4YMPPpC6zHH75JNPhG9+85uC3+8XAoGA8OCDDwpvvfWWrPq4b98+YdWqVUIwGBQGBweFW265RTh27Jis+igIglBTUyNcf/31wve//33ZfZ9GIhHhpptuEoLB4Oj74tVH3uHG0WuvvYZnnnlmdGVdbW0tioqKUFBQALVajaqqKuzcuVPiKsfPbrfjqaeeglarhUajwbRp09DY2CirPi5cuBB/+MMfoFar0d3djXA4jP7+fln10e12Y/PmzXj88ccByO/79MyZMwCARx55BHfffTf++Mc/xq2PDNw4+tGPfoRrr7129O3Ozk7Y7fbRtx0OB5xOpxSlxURxcTFKS0sBAI2NjXjnnXegUChk1UcA0Gg0eP7551FZWYnFixfL7uv49NNPY926dbBarQDk933a39+PxYsX45e//CV+97vf4c9//jPOnj0blz4ycCUk14186uvr8cgjj+DJJ59EQUGBLPu4du1a7NmzB+3t7WhsbJRNH19//XXk5uZi8eLFo++T2/dpWVkZNm3aBIvFgszMTKxcuRLPP/98XPoY96W99CU5buRz4MABrF27FuvXr0dlZSX2798vqz6ePn0agUAAs2fPhsFgQEVFBXbu3AmVSjX6mmTu444dO+ByuXDPPfegr68PPp8PbW1tsukfAHz22WcIBoOjv1QEQUB+fn5cvk95hyuhkpISNDQ0oKmpCeFwGNXV1Um9kU97ezueeOIJ/OxnP0NlZSUA+fWxtbUVGzZsQCAQQCAQwK5du7Bq1SrZ9PGVV15BdXU13nzzTaxduxZLly7Fyy+/LJv+AYDH48GmTZvg9/vh9Xqxbds2fPe7341LH3mHKyGdToeNGzdizZo18Pv9KC8vx7Jly6Qua9x+85vfwO/3Y+PGjaPvW7Vqlaz6WF5ejtraWixfvhwqlQoVFRWorKxEZmambPp4Mbl9n95yyy04dOgQli9fjkgkgm9961soKyuLSx+5eQ0RUZxwSIGIKE4YuEREccLAJSKKEwYuEVGcMHCJiOKEgUtEFCcMXEoajzzyCHp6eib8mn379uGuu+666vVmzpx5ybZ27dqFH/7whwCGt/nbuXMnWltbUVZWdtU2KbVx4QMljU8++SQmr5moW2+9Fbfeeqvo1yH54R0uJYV/+Zd/AQA89NBD2L9/P1avXo2qqircfffdeOONN77ymvb2drz//vtYtWoVVqxYgZtvvhm/+MUvor7uL37xC9x7772455578P777wMAtm7disceeywm/aLUwjtcSgo/+clPsHXrVvz+97/H/fffjyeffBIVFRVwOp247777UFRUdMFrMjIy8OSTT2Ljxo2YPHkynE4nbrnlFjz44INRXXfSpEn4wQ9+gJMnT2L16tV45513ROohpQIGLiWV06dPw+/3o6KiAgCQnZ2NiooKfPTRRxeMoSoUCvzqV7/CBx98gOrqapw+fRqCIGBwcDCq6z3wwAMAgBkzZmDatGn44osvYtcZSjkcUqCkolAovrJPqSAICIVCF7zP5/Ph3nvvRV1dHebMmYMnn3wSarUa0W4dolR++SMSiUSgVvMehcaPgUtJQ6VSIT8/H2q1Gu+++y4AwOl04m9/+xtuuOGG0deEQiE0NTXB6/XiO9/5DpYuXYp9+/YhEAggEolEdc1t27YBAOrq6tDc3IySkpLYdopSCn9dU9JYtmwZ/vEf/xFbtmzBD3/4Q7zwwgsIh8N44oknsGjRotHXrF69Gs899xxuvvlm3HHHHdBqtZgxYwamT5+OpqYmaLXaMV+zpaUFy5cvh0KhwL//+78jPT1dpN5RKuD2jEREccI7XEpZL7/8Mt56661Lfuyf/umfcPfdd8e5IpI73uESEcUJH5oREcUJA5eIKE4YuEREccLAJSKKEwYuEVGc/H/38wAJadXY2gAAAABJRU5ErkJggg==\",\n      \"text/plain\": [\n       \"<Figure size 360x360 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"sns.displot(data=pandas_tips, x=\\\"total_bill\\\", kde=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 37,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='Frequency'>\"\n      ]\n     },\n     \"execution_count\": 37,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZRUlEQVR4nO3de3BU9fnH8c8uu8lgE0TSBSla7aBo6SXSiyGWEsAakCQEA8WEmUQKjFKtXDoaICKMchVsIxaZ1sogCiihgkAkqZbbKFCoaEEdFEoJEMnEGMolJiS72fP7wx+pWIFNOOck6/f9+iu7yT7f5yFhP3vO7jnHY1mWJQCAcbyt3QAAoHUQAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQvtZuoLn+85/PFA5//Q9dSEiIU3V1TWu30WqYn/lNnd/u2b1ej6666htf+b2oC4Bw2DIiACQZM+eFMD/zm8qt2dkFBACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovLz/v/uXLlys3N9fJpQEAl+BYAOzdu1c5OTkqKys77/5//etfevbZZ51aFgAQIceOAygqKtKMGTOUn5/fdF9DQ4OmT5+u8ePHa926dU4tjQvocGV7xcbY/yuvbwjp9Kk62+sCcJZjATB79uz/ue93v/udhg0bpmuuuabFdRMS4i6nragSCMTbXrNg8Xbba865/2eO9OpEzWjC/ObO79bsrh0JvH37dlVUVGjq1KnatWtXi+tUV9cYcYRgIBCvqqozttcMBkO21jzHiV7trhlNmN/c+e2e3ev1XPCFs2sBUFxcrIMHDyozM1O1tbX69NNPNXHiRD311FNutQAA+ALXAmDu3LlNX+/atUuLFi3iyR8AWhHHAQCAoRzfAti8efP/3JeUlKSkpCSnlwYAXARbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQUXdReFMEQ2Gjz4UCwHkEQBvl93n18MJtttZcMCHF1noAohu7gADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovL5ckrVq1Sunp6crIyNDUqVPV0NDg5PIAgItwLAD27t2rnJwclZWVSZIOHz6sJUuW6OWXX9b69esVDoe1cuVKp5YHAFyCYwFQVFSkGTNmqHPnzpKkmJgYzZgxQ3FxcfJ4POrRo4eOHz/u1PIAgEtw7HoAs2fPPu92t27d1K1bN0nSiRMntGLFCs2dO9ep5QEAl+D6BWEqKys1duxYDRs2TElJSc1+fEJCnANdtU1+v/2/HidqSnLk6mWmXxGN+c2d363ZXQ2AQ4cOaezYscrNzdXo0aNbVKO6ukbhsGVzZ21PIBCvYDBke10nakpSVdUZW+sFAvG214wmzG/u/HbP7vV6LvjC2bUAqKmp0ZgxYzRx4kQNHTrUrWUBABfg2nEAf/nLX/Tpp59q6dKlyszMVGZmphYuXOjW8gCAL3F8C2Dz5s2SpFGjRmnUqFFOLwcAiBBHAgOAoQgAADAUAQAAhnL9OAB8/QRDYUc+t9zhyvY6farO9roAPkcA4LL5fV49vHCbvTX9Ps25/2e21gRwPnYBAYChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAM5WgA1NTUKD09XeXl5ZKkHTt2KCMjQ6mpqSosLHRyaQDAJTgWAHv37lVOTo7KysokSWfPnlVBQYEWL16sjRs36v3339e2bfZeRQoAEDnHAqCoqEgzZsxQ586dJUn79u3Tddddp2uvvVY+n08ZGRkqLS11ankAwCU4dk3g2bNnn3f7k08+USAQaLrduXNnVVZWNrtuQkLcZfcWLfx++389TtR0sq4TF5uPFibPLpk9v1uzu3ZR+HA4LI/H03TbsqzzbkequrpG4bBlZ2ttUiAQr2AwZHtdJ2o6UfdcoFRVnbG1brQIBOKNnV0ye367Z/d6PRd84ezap4CuvvpqVVVVNd2uqqpq2j0EAHCfawGQmJiow4cP68iRI2psbFRxcbH69u3r1vIAgC9xbRdQbGys5s2bpwcffFD19fVKSUnRoEGD3FoeAPAljgfA5s2bm75OTk7W+vXrnV4SABABjgQGAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhoooAF588UXV1NQ43QsAwEURBcBHH32kgQMH6pFHHtF7773ndE8AABdEdCDYrFmzVFNTow0bNuixxx6TZVnKyclRRkaGYmNjne4RAOCAiN8DiIuL06BBg5Senq6TJ09q5cqVGjRo0HlH+gIAokdEWwA7d+7UqlWrtHPnTg0cOFDPPPOMbr75Zh09elQjR47UgAEDnO4TAGCziALgscce08iRIzVz5kzFx//3QgXf/va3NWLECMeaAwA4J6IAWL9+vUpLSxUfH6+qqiq99tprysvLk9fr1fjx453uEYYKhsKOXBmpviGk06fqbK8LRJuIAmDmzJn67LPPNGTIEHm9Xu3Zs0fl5eWaNm2a0/3BYH6fVw8v3GZ73QUTUmyvCUSjiALg3XffVXFxsSQpISFBCxcuVGZmpqONAQCcFdGngILBoBoaGppuh0LOXFcWAOCeiLYA+vXrpzFjxigzM1Mej0fFxcVKSWEzGgCiWUQBkJ+frxUrVmjTpk3y+Xy64447lJ2d7XRvAAAHRRQA7dq1U15envLy8pzuBwDgkogC4G9/+5vmzJmjU6dOybKspvvfeecdxxoDADgrogBYsGCBpkyZop49e8rj8Vz2ouvWrdOzzz4rSerbt68mT5582TUBAM0TUQB06NBBqamptixYV1en2bNnq7S0VB06dFBOTo527Nih2267zZb6AIDIRPQx0MTERG3bZs8BOY2NjQqHw6qrq1MoFFIoFOKMogDQCiLaAti2bZuWL18uv98vv98vy7Lk8Xha9B5AXFycJkyYoDvvvFPt27fXT3/6U/3oRz9qdh0AwOWJKACef/552xb88MMP9corr2jLli2Kj4/XQw89pCVLlmjs2LERPT4hIc62Xto6vz+iX0+r14zGuk6cY8hu0dCjk0ye363ZI/rf1a1bN5WWlmr//v0aN26cNm3apPT09BYt+NZbbyk5OVkJCQmSpKysLK1cuTLiAKiurlE4bF36B6NcIBCvYND+I66dqOlE3XNP/E71W1V1xpG6dgkE4tt8j04yeX67Z/d6PRd84RzRewDPPvusXnrpJZWWlurs2bNatGiRnnnmmRY1c/PNN2vHjh2qra2VZVnavHmzfvCDH7SoFgCg5SIKgNdee01//vOf1b59e1111VUqKipqOjlcc/Xp00dpaWnKysrSkCFDFAqFdO+997aoFgCg5SLaBeTz+RQTE9N0u0OHDvL5Wr5v9t577+VJHwBaWUTP4l27dtXWrVvl8XjU0NCgJUuWqFu3bk73BgBwUEQB8Oijjyo/P18fffSRbrnlFiUmJurJJ590ujcAgIMiCoAuXbpo2bJlqqurU2Njo+LizPkoJgB8XUUUAEuXLv3K+3/1q1/Z2gwAwD0RBcCBAweavm5oaNA//vEPJScnO9YUAMB5EQXA3Llzz7tdWVmpRx55xJGGAADuiOg4gC/r0qWLPv74Y7t7AQC4qNnvAViWpffff7/pVA4AgOjU7PcApM+PC8jPz3ekIQCAO1r0HgAAIPpFFAC5ubkXvRTkCy+8YFtDAAB3RBQA3//+93Xo0CGNGDFCfr9f69atUygUUlpamtP9AQAcElEAvPPOO1q5cqXatWsnSfr5z3+uESNGaODAgY42BwBwTkQfAz1x4oTq6+ubbn/22Wc6e/asY00BAJwX0RZAenq67r77bt1xxx2yLEslJSXKy8tzujcAgIMiCoAJEyaoZ8+e+vvf/67Y2Fg9/vjjuvXWW53uDQDgoIiPBO7SpYtuvPFGTZw4UX6/38meAAAuiCgAXnnlFU2dOlXPPfeczpw5o/vvv19FRUVO9wYAcFBEAbB8+XKtWrVKcXFxSkhI0Jo1a7Rs2TKnewMAOCiiAPB6veddBKZr165NHwkFAESniAKgY8eO2r9/f9PRwOvXr9eVV17Z4kU3b96srKws3XnnnZo1a1aL6wAAWi6iTwEVFBRowoQJOnr0qPr06aPY2FgtXry4RQseO3ZMM2bM0OrVq5WQkKB77rlH27ZtU0pKSovqAQBaJqIAOHv2rNatW6eysjI1NjbqO9/5Tos/CfTGG29o8ODBuvrqqyVJhYWFio2NbVEtoCWCobACgXhba9Y3hHT6VJ2tNQGnRRQADz30kEpKStS9e/fLXvDIkSPy+/0aN26cKioq1K9fP02cOPGy6wKR8vu8enjhNltrLpjAFiyiT0QBcNNNN2nDhg368Y9/rCuuuKLp/o4dOzZ7wcbGRr399tt68cUXdcUVV+jXv/611q5dq6ysrIgen5AQd+kf+prw+yP69bR6Tep+zu6tCrvrRRuT53dr9oj+F2zatEmlpaXn3efxeLR///5mL/jNb35TycnJ6tSpkyTpF7/4hfbt2xdxAFRX1ygctpq9brQJBOIVDIZsr+tETSfqnnuCjpZ+Jamq6oxttQKBeFvrRRuT57d7dq/Xc8EXzhEFwHvvvWdbM/3799fkyZN1+vRpfeMb39Cbb76p22+/3bb6AIDIXPRjoI8++mjT1ydOnLBlwcTERI0dO1YjR47U4MGD9a1vfUvDhg2zpTYAIHIX3QJ4//33m74eM2aM1q5da8uiw4cP1/Dhw22pBQBomYtuAViW9ZVfAwCiX8RnA73YNYEBANHnoruAwuGwTp06Jcuy1NjY2PT1OS35GCgAoG24aAAcOHBAvXv3bnrST0pKavpeSz8GCgBoGy4aAB9++KFbfQAAXBbxewAAgK8XAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQFz0dNIDIBENhBQLxttYMBOJV3xDS6VN1ttYFziEAABv4fV49vHCbffX8PgWDIS2YkGJbTeDLWnUX0BNPPKEpU6a0ZgsAYKxWC4CdO3dq7dq1rbU8ABivVXYBnTx5UoWFhRo3blzUX3ayw5XtFRvDnjQA0adVnrmmT5+uSZMmqaKiotmPTUiIc6Cjy1OweLvtNefc/zP5/fb/epyoSV1nap6rZ/eby9HC1Lkl92Z3PQBWr16trl27Kjk5WWvWrGn246uraxQOWw501jKBQLyCwZAjtZ2oGy29nnvyi5Z+7a557k1gSaqqOmNb3WgRCMQbObdk/+xer+eCL5xdD4CNGzeqqqpKmZmZOnXqlGprazVnzhwVFBS43QoAGM31AFi6dGnT12vWrNHu3bt58geAVsCRwABgqFb9+EpWVpaysrJaswUAMBZbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAMRQAAgKEIAAAwFAEAAIYiAADAUFzNHGjDgqGwI9eHrW8I6fSpOtvrIroQAEAb5vd59fDCbbbXXTAhxfaaiD7sAgIAQxEAAGAoAgAADEUAAIChWuVN4EWLFqmkpESSlJKSovz8/NZoAwCM5voWwI4dO/TWW29p7dq1evXVV/XBBx/ojTfecLsNADCe61sAgUBAU6ZMUUxMjCSpe/fuOn78uNttAIDxXA+AG2+8senrsrIylZSU6KWXXnK7DcBoHGAGqRUPBDt48KDuu+8+5efn6/rrr4/4cQkJcc411UJ+vzP/jE7UjaZeo62u3TXP1XOkV59XBYu32153zv0/sy1YnAioaOHW7K0SAHv27NH48eNVUFCgtLS0Zj22urpG4bDlUGfNFwjEKxgMOVLbibrR0uu5J71o6dfumn6/r6leNP0bSFJV1ZnLrhEIxNtSJxrZPbvX67ngC2fXA6CiokIPPPCACgsLlZyc7PbyAID/53oALFmyRPX19Zo3b17TfdnZ2crJyXG7FQA2s/O9hXN1eF/BOa4HwLRp0zRt2jS3lwXgArtOXvfFXWCcuM45HAkMAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChuCg8ANiow5XtFRtzeU+tX3UwnRMHxBEAAGCj2BjfZR0M98WD4L7IiQPi2AUEAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChjDkOwI6DMwC4z86rjJ1ft1F+Xzvb60YTY54RL/fgjAvhakWAs+y6ytiXLZiQYvxzAruAAMBQBAAAGIoAAABDtUoAbNiwQYMHD1ZqaqpWrFjRGi0AgPFcfxO4srJShYWFWrNmjWJiYpSdna2kpCTdcMMNbrcCAEZzPQB27Nih3r17q2PHjpKkgQMHqrS0VL/5zW8ierzX62nx2lfFx7b4sV+XutHSq8/vc6TuOW3939bn9ykUbGd73S9qy3W/OL9dNb9KW6z75dm/qCXPfxd7jMeyLKvZFS/Dn/70J9XW1mrSpEmSpNWrV2vfvn2aOXOmm20AgPFcfw8gHA7L4/lvIlmWdd5tAIA7XA+Aq6++WlVVVU23q6qq1LlzZ7fbAADjuR4At912m3bu3KkTJ06orq5Or7/+uvr27et2GwBgPNffBO7SpYsmTZqkvLw8BYNBDR8+XD/84Q/dbgMAjOf6m8AAgLaBI4EBwFAEAAAYigAAAEMRAABgKAKgjaipqVF6errKy8slfX7KjIyMDKWmpqqwsLCVu3PWokWLlJaWprS0NM2fP1+SWfMvXLhQgwcPVlpampYuXSrJrPnPeeKJJzRlyhRJZs2fm5urtLQ0ZWZmKjMzU3v37nVvfgut7p///KeVnp5ufe9737OOHTtm1dXVWSkpKdbRo0etYDBojR492tq6dWtrt+mI7du3W3fffbdVX19vNTQ0WHl5edaGDRuMmX/Xrl1Wdna2FQwGrbq6Oqt///7W/v37jZn/nB07dlhJSUnW5MmTjfr7D4fDVp8+faxgMNh0n5vzswXQBhQVFWnGjBlNR0Tv27dP1113na699lr5fD5lZGSotLS0lbt0RiAQ0JQpUxQTEyO/36/u3burrKzMmPlvvfVWvfDCC/L5fKqurlZjY6NOnz5tzPySdPLkSRUWFmrcuHGSzPr7//e//y1JGj16tIYMGaLly5e7Oj8B0AbMnj1bP/nJT5puf/LJJwoEAk23O3furMrKytZozXE33nijbrnlFklSWVmZSkpK5PF4jJlfkvx+v55++mmlpaUpOTnZqN+/JE2fPl2TJk1Shw4dJJn193/69GklJyfrmWee0fPPP6+XX35Zx48fd21+AqANMvGEeQcPHtTo0aOVn5+va6+91rj5x48fr507d6qiokJlZWXGzL969Wp17dpVycnJTfeZ9Pffq1cvzZ8/X/Hx8erUqZOGDx+up59+2rX5XT8VBC7NtBPm7dmzR+PHj1dBQYHS0tK0e/duY+Y/dOiQGhoa9N3vflft27dXamqqSktL1a7df88H/3Wef+PGjaqqqlJmZqZOnTql2tpaffzxx8bM//bbbysYDDYFoGVZ6tatm2t//2wBtEGJiYk6fPiwjhw5osbGRhUXF39tT5hXUVGhBx54QE8++aTS0tIkmTV/eXm5pk2bpoaGBjU0NGjTpk3Kzs42Zv6lS5equLhY69at0/jx4zVgwAA999xzxsx/5swZzZ8/X/X19aqpqdHatWv129/+1rX52QJog2JjYzVv3jw9+OCDqq+vV0pKigYNGtTabTliyZIlqq+v17x585ruy87ONmb+lJQU7du3T0OHDlW7du2UmpqqtLQ0derUyYj5v4pJf//9+/fX3r17NXToUIXDYY0cOVK9evVybX5OBgcAhmIXEAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAjjR49WidOnLjsn9m1a5fS09Mvud5NN930lbU2bdqkWbNmSfr8tMClpaUqLy9Xr169LlkTuFwcCAYjbd++3ZafuVy33367br/9dsfXAb4KWwAwztSpUyVJ99xzj3bv3q3c3FxlZGRoyJAhevXVV//nZyoqKrRlyxZlZ2crKytL/fr101NPPdXsdZ966indddddyszM1JYtWyRJa9as0X333WfLXEBzsQUA48ydO1dr1qzRsmXLNGLECOXn5ys1NVWVlZX65S9/qeuuu+68n7nqqquUn5+vefPm6frrr1dlZaX69++vvLy8Zq17zTXX6PHHH9eBAweUm5urkpIShyYEIkMAwFiHDh1SfX29UlNTJUldunRRamqq3nzzzfP2wXs8Hv3xj3/U1q1bVVxcrEOHDsmyLNXV1TVrvZycHElSjx491L17d7377rv2DQO0ALuAYCyPx/M/51m3LEuhUOi8+2pra3XXXXfpgw8+UM+ePZWfny+fz6fmnkbL6/3vf7dwOCyfj9dfaF0EAIzUrl07devWTT6fT6+//rokqbKyUn/961912223Nf1MKBTSkSNHVFNTo4kTJ2rAgAHatWuXGhoaFA6Hm7Xm2rVrJUkffPCBjh49qsTERHuHApqJlyAw0qBBgzRq1CgtXrxYs2bN0h/+8Ac1NjbqgQceUO/evZt+Jjc3VwsXLlS/fv105513KiYmRj169NANN9ygI0eOKCYmJuI1jx07pqFDh8rj8ej3v/+9Onbs6NB0QGQ4HTQAGIotAMAGzz33nDZs2PCV3xszZoyGDBnickfApbEFAACG4k1gADAUAQAAhiIAAMBQBAAAGIoAAABD/R+g2LBfFQqybgAAAABJRU5ErkJggg==\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"sns.histplot(data=modin_tips, x=\\\"total_bill\\\", stat='frequency')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 38,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='Frequency'>\"\n      ]\n     },\n     \"execution_count\": 38,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZRUlEQVR4nO3de3BU9fnH8c8uu8lgE0TSBSla7aBo6SXSiyGWEsAakCQEA8WEmUQKjFKtXDoaICKMchVsIxaZ1sogCiihgkAkqZbbKFCoaEEdFEoJEMnEGMolJiS72fP7wx+pWIFNOOck6/f9+iu7yT7f5yFhP3vO7jnHY1mWJQCAcbyt3QAAoHUQAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQvtZuoLn+85/PFA5//Q9dSEiIU3V1TWu30WqYn/lNnd/u2b1ej6666htf+b2oC4Bw2DIiACQZM+eFMD/zm8qt2dkFBACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovLz/v/uXLlys3N9fJpQEAl+BYAOzdu1c5OTkqKys77/5//etfevbZZ51aFgAQIceOAygqKtKMGTOUn5/fdF9DQ4OmT5+u8ePHa926dU4tjQvocGV7xcbY/yuvbwjp9Kk62+sCcJZjATB79uz/ue93v/udhg0bpmuuuabFdRMS4i6nragSCMTbXrNg8Xbba865/2eO9OpEzWjC/ObO79bsrh0JvH37dlVUVGjq1KnatWtXi+tUV9cYcYRgIBCvqqozttcMBkO21jzHiV7trhlNmN/c+e2e3ev1XPCFs2sBUFxcrIMHDyozM1O1tbX69NNPNXHiRD311FNutQAA+ALXAmDu3LlNX+/atUuLFi3iyR8AWhHHAQCAoRzfAti8efP/3JeUlKSkpCSnlwYAXARbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQUXdReFMEQ2Gjz4UCwHkEQBvl93n18MJtttZcMCHF1noAohu7gADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKEcDoKamRunp6SovL5ckrVq1Sunp6crIyNDUqVPV0NDg5PIAgItwLAD27t2rnJwclZWVSZIOHz6sJUuW6OWXX9b69esVDoe1cuVKp5YHAFyCYwFQVFSkGTNmqHPnzpKkmJgYzZgxQ3FxcfJ4POrRo4eOHz/u1PIAgEtw7HoAs2fPPu92t27d1K1bN0nSiRMntGLFCs2dO9ep5QEAl+D6BWEqKys1duxYDRs2TElJSc1+fEJCnANdtU1+v/2/HidqSnLk6mWmXxGN+c2d363ZXQ2AQ4cOaezYscrNzdXo0aNbVKO6ukbhsGVzZ21PIBCvYDBke10nakpSVdUZW+sFAvG214wmzG/u/HbP7vV6LvjC2bUAqKmp0ZgxYzRx4kQNHTrUrWUBABfg2nEAf/nLX/Tpp59q6dKlyszMVGZmphYuXOjW8gCAL3F8C2Dz5s2SpFGjRmnUqFFOLwcAiBBHAgOAoQgAADAUAQAAhnL9OAB8/QRDYUc+t9zhyvY6farO9roAPkcA4LL5fV49vHCbvTX9Ps25/2e21gRwPnYBAYChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAM5WgA1NTUKD09XeXl5ZKkHTt2KCMjQ6mpqSosLHRyaQDAJTgWAHv37lVOTo7KysokSWfPnlVBQYEWL16sjRs36v3339e2bfZeRQoAEDnHAqCoqEgzZsxQ586dJUn79u3Tddddp2uvvVY+n08ZGRkqLS11ankAwCU4dk3g2bNnn3f7k08+USAQaLrduXNnVVZWNrtuQkLcZfcWLfx++389TtR0sq4TF5uPFibPLpk9v1uzu3ZR+HA4LI/H03TbsqzzbkequrpG4bBlZ2ttUiAQr2AwZHtdJ2o6UfdcoFRVnbG1brQIBOKNnV0ye367Z/d6PRd84ezap4CuvvpqVVVVNd2uqqpq2j0EAHCfawGQmJiow4cP68iRI2psbFRxcbH69u3r1vIAgC9xbRdQbGys5s2bpwcffFD19fVKSUnRoEGD3FoeAPAljgfA5s2bm75OTk7W+vXrnV4SABABjgQGAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhoooAF588UXV1NQ43QsAwEURBcBHH32kgQMH6pFHHtF7773ndE8AABdEdCDYrFmzVFNTow0bNuixxx6TZVnKyclRRkaGYmNjne4RAOCAiN8DiIuL06BBg5Senq6TJ09q5cqVGjRo0HlH+gIAokdEWwA7d+7UqlWrtHPnTg0cOFDPPPOMbr75Zh09elQjR47UgAEDnO4TAGCziALgscce08iRIzVz5kzFx//3QgXf/va3NWLECMeaAwA4J6IAWL9+vUpLSxUfH6+qqiq99tprysvLk9fr1fjx453uEYYKhsKOXBmpviGk06fqbK8LRJuIAmDmzJn67LPPNGTIEHm9Xu3Zs0fl5eWaNm2a0/3BYH6fVw8v3GZ73QUTUmyvCUSjiALg3XffVXFxsSQpISFBCxcuVGZmpqONAQCcFdGngILBoBoaGppuh0LOXFcWAOCeiLYA+vXrpzFjxigzM1Mej0fFxcVKSWEzGgCiWUQBkJ+frxUrVmjTpk3y+Xy64447lJ2d7XRvAAAHRRQA7dq1U15envLy8pzuBwDgkogC4G9/+5vmzJmjU6dOybKspvvfeecdxxoDADgrogBYsGCBpkyZop49e8rj8Vz2ouvWrdOzzz4rSerbt68mT5582TUBAM0TUQB06NBBqamptixYV1en2bNnq7S0VB06dFBOTo527Nih2267zZb6AIDIRPQx0MTERG3bZs8BOY2NjQqHw6qrq1MoFFIoFOKMogDQCiLaAti2bZuWL18uv98vv98vy7Lk8Xha9B5AXFycJkyYoDvvvFPt27fXT3/6U/3oRz9qdh0AwOWJKACef/552xb88MMP9corr2jLli2Kj4/XQw89pCVLlmjs2LERPT4hIc62Xto6vz+iX0+r14zGuk6cY8hu0dCjk0ye363ZI/rf1a1bN5WWlmr//v0aN26cNm3apPT09BYt+NZbbyk5OVkJCQmSpKysLK1cuTLiAKiurlE4bF36B6NcIBCvYND+I66dqOlE3XNP/E71W1V1xpG6dgkE4tt8j04yeX67Z/d6PRd84RzRewDPPvusXnrpJZWWlurs2bNatGiRnnnmmRY1c/PNN2vHjh2qra2VZVnavHmzfvCDH7SoFgCg5SIKgNdee01//vOf1b59e1111VUqKipqOjlcc/Xp00dpaWnKysrSkCFDFAqFdO+997aoFgCg5SLaBeTz+RQTE9N0u0OHDvL5Wr5v9t577+VJHwBaWUTP4l27dtXWrVvl8XjU0NCgJUuWqFu3bk73BgBwUEQB8Oijjyo/P18fffSRbrnlFiUmJurJJ590ujcAgIMiCoAuXbpo2bJlqqurU2Njo+LizPkoJgB8XUUUAEuXLv3K+3/1q1/Z2gwAwD0RBcCBAweavm5oaNA//vEPJScnO9YUAMB5EQXA3Llzz7tdWVmpRx55xJGGAADuiOg4gC/r0qWLPv74Y7t7AQC4qNnvAViWpffff7/pVA4AgOjU7PcApM+PC8jPz3ekIQCAO1r0HgAAIPpFFAC5ubkXvRTkCy+8YFtDAAB3RBQA3//+93Xo0CGNGDFCfr9f69atUygUUlpamtP9AQAcElEAvPPOO1q5cqXatWsnSfr5z3+uESNGaODAgY42BwBwTkQfAz1x4oTq6+ubbn/22Wc6e/asY00BAJwX0RZAenq67r77bt1xxx2yLEslJSXKy8tzujcAgIMiCoAJEyaoZ8+e+vvf/67Y2Fg9/vjjuvXWW53uDQDgoIiPBO7SpYtuvPFGTZw4UX6/38meAAAuiCgAXnnlFU2dOlXPPfeczpw5o/vvv19FRUVO9wYAcFBEAbB8+XKtWrVKcXFxSkhI0Jo1a7Rs2TKnewMAOCiiAPB6veddBKZr165NHwkFAESniAKgY8eO2r9/f9PRwOvXr9eVV17Z4kU3b96srKws3XnnnZo1a1aL6wAAWi6iTwEVFBRowoQJOnr0qPr06aPY2FgtXry4RQseO3ZMM2bM0OrVq5WQkKB77rlH27ZtU0pKSovqAQBaJqIAOHv2rNatW6eysjI1NjbqO9/5Tos/CfTGG29o8ODBuvrqqyVJhYWFio2NbVEtoCWCobACgXhba9Y3hHT6VJ2tNQGnRRQADz30kEpKStS9e/fLXvDIkSPy+/0aN26cKioq1K9fP02cOPGy6wKR8vu8enjhNltrLpjAFiyiT0QBcNNNN2nDhg368Y9/rCuuuKLp/o4dOzZ7wcbGRr399tt68cUXdcUVV+jXv/611q5dq6ysrIgen5AQd+kf+prw+yP69bR6Tep+zu6tCrvrRRuT53dr9oj+F2zatEmlpaXn3efxeLR///5mL/jNb35TycnJ6tSpkyTpF7/4hfbt2xdxAFRX1ygctpq9brQJBOIVDIZsr+tETSfqnnuCjpZ+Jamq6oxttQKBeFvrRRuT57d7dq/Xc8EXzhEFwHvvvWdbM/3799fkyZN1+vRpfeMb39Cbb76p22+/3bb6AIDIXPRjoI8++mjT1ydOnLBlwcTERI0dO1YjR47U4MGD9a1vfUvDhg2zpTYAIHIX3QJ4//33m74eM2aM1q5da8uiw4cP1/Dhw22pBQBomYtuAViW9ZVfAwCiX8RnA73YNYEBANHnoruAwuGwTp06Jcuy1NjY2PT1OS35GCgAoG24aAAcOHBAvXv3bnrST0pKavpeSz8GCgBoGy4aAB9++KFbfQAAXBbxewAAgK8XAgAADEUAAIChCAAAMBQBAACGIgAAwFAEAAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQFz0dNIDIBENhBQLxttYMBOJV3xDS6VN1ttYFziEAABv4fV49vHCbffX8PgWDIS2YkGJbTeDLWnUX0BNPPKEpU6a0ZgsAYKxWC4CdO3dq7dq1rbU8ABivVXYBnTx5UoWFhRo3blzUX3ayw5XtFRvDnjQA0adVnrmmT5+uSZMmqaKiotmPTUiIc6Cjy1OweLvtNefc/zP5/fb/epyoSV1nap6rZ/eby9HC1Lkl92Z3PQBWr16trl27Kjk5WWvWrGn246uraxQOWw501jKBQLyCwZAjtZ2oGy29nnvyi5Z+7a557k1gSaqqOmNb3WgRCMQbObdk/+xer+eCL5xdD4CNGzeqqqpKmZmZOnXqlGprazVnzhwVFBS43QoAGM31AFi6dGnT12vWrNHu3bt58geAVsCRwABgqFb9+EpWVpaysrJaswUAMBZbAABgKAIAAAxFAACAoQgAADAUAQAAhiIAAMBQBAAAGIoAAABDEQAAYCgCAAAMRQAAgKEIAAAwFAEAAIYiAADAUFzNHGjDgqGwI9eHrW8I6fSpOtvrIroQAEAb5vd59fDCbbbXXTAhxfaaiD7sAgIAQxEAAGAoAgAADEUAAIChWuVN4EWLFqmkpESSlJKSovz8/NZoAwCM5voWwI4dO/TWW29p7dq1evXVV/XBBx/ojTfecLsNADCe61sAgUBAU6ZMUUxMjCSpe/fuOn78uNttAIDxXA+AG2+8senrsrIylZSU6KWXXnK7DcBoHGAGqRUPBDt48KDuu+8+5efn6/rrr4/4cQkJcc411UJ+vzP/jE7UjaZeo62u3TXP1XOkV59XBYu32153zv0/sy1YnAioaOHW7K0SAHv27NH48eNVUFCgtLS0Zj22urpG4bDlUGfNFwjEKxgMOVLbibrR0uu5J71o6dfumn6/r6leNP0bSFJV1ZnLrhEIxNtSJxrZPbvX67ngC2fXA6CiokIPPPCACgsLlZyc7PbyAID/53oALFmyRPX19Zo3b17TfdnZ2crJyXG7FQA2s/O9hXN1eF/BOa4HwLRp0zRt2jS3lwXgArtOXvfFXWCcuM45HAkMAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChuCg8ANiow5XtFRtzeU+tX3UwnRMHxBEAAGCj2BjfZR0M98WD4L7IiQPi2AUEAIYiAADAUAQAABiKAAAAQxEAAGAoAgAADEUAAIChjDkOwI6DMwC4z86rjJ1ft1F+Xzvb60YTY54RL/fgjAvhakWAs+y6ytiXLZiQYvxzAruAAMBQBAAAGIoAAABDtUoAbNiwQYMHD1ZqaqpWrFjRGi0AgPFcfxO4srJShYWFWrNmjWJiYpSdna2kpCTdcMMNbrcCAEZzPQB27Nih3r17q2PHjpKkgQMHqrS0VL/5zW8ierzX62nx2lfFx7b4sV+XutHSq8/vc6TuOW3939bn9ykUbGd73S9qy3W/OL9dNb9KW6z75dm/qCXPfxd7jMeyLKvZFS/Dn/70J9XW1mrSpEmSpNWrV2vfvn2aOXOmm20AgPFcfw8gHA7L4/lvIlmWdd5tAIA7XA+Aq6++WlVVVU23q6qq1LlzZ7fbAADjuR4At912m3bu3KkTJ06orq5Or7/+uvr27et2GwBgPNffBO7SpYsmTZqkvLw8BYNBDR8+XD/84Q/dbgMAjOf6m8AAgLaBI4EBwFAEAAAYigAAAEMRAABgKAKgjaipqVF6errKy8slfX7KjIyMDKWmpqqwsLCVu3PWokWLlJaWprS0NM2fP1+SWfMvXLhQgwcPVlpampYuXSrJrPnPeeKJJzRlyhRJZs2fm5urtLQ0ZWZmKjMzU3v37nVvfgut7p///KeVnp5ufe9737OOHTtm1dXVWSkpKdbRo0etYDBojR492tq6dWtrt+mI7du3W3fffbdVX19vNTQ0WHl5edaGDRuMmX/Xrl1Wdna2FQwGrbq6Oqt///7W/v37jZn/nB07dlhJSUnW5MmTjfr7D4fDVp8+faxgMNh0n5vzswXQBhQVFWnGjBlNR0Tv27dP1113na699lr5fD5lZGSotLS0lbt0RiAQ0JQpUxQTEyO/36/u3burrKzMmPlvvfVWvfDCC/L5fKqurlZjY6NOnz5tzPySdPLkSRUWFmrcuHGSzPr7//e//y1JGj16tIYMGaLly5e7Oj8B0AbMnj1bP/nJT5puf/LJJwoEAk23O3furMrKytZozXE33nijbrnlFklSWVmZSkpK5PF4jJlfkvx+v55++mmlpaUpOTnZqN+/JE2fPl2TJk1Shw4dJJn193/69GklJyfrmWee0fPPP6+XX35Zx48fd21+AqANMvGEeQcPHtTo0aOVn5+va6+91rj5x48fr507d6qiokJlZWXGzL969Wp17dpVycnJTfeZ9Pffq1cvzZ8/X/Hx8erUqZOGDx+up59+2rX5XT8VBC7NtBPm7dmzR+PHj1dBQYHS0tK0e/duY+Y/dOiQGhoa9N3vflft27dXamqqSktL1a7df88H/3Wef+PGjaqqqlJmZqZOnTql2tpaffzxx8bM//bbbysYDDYFoGVZ6tatm2t//2wBtEGJiYk6fPiwjhw5osbGRhUXF39tT5hXUVGhBx54QE8++aTS0tIkmTV/eXm5pk2bpoaGBjU0NGjTpk3Kzs42Zv6lS5equLhY69at0/jx4zVgwAA999xzxsx/5swZzZ8/X/X19aqpqdHatWv129/+1rX52QJog2JjYzVv3jw9+OCDqq+vV0pKigYNGtTabTliyZIlqq+v17x585ruy87ONmb+lJQU7du3T0OHDlW7du2UmpqqtLQ0derUyYj5v4pJf//9+/fX3r17NXToUIXDYY0cOVK9evVybX5OBgcAhmIXEAAYigAAAEMRAABgKAIAAAxFAACAoQgAADAUAQAjjR49WidOnLjsn9m1a5fS09Mvud5NN930lbU2bdqkWbNmSfr8tMClpaUqLy9Xr169LlkTuFwcCAYjbd++3ZafuVy33367br/9dsfXAb4KWwAwztSpUyVJ99xzj3bv3q3c3FxlZGRoyJAhevXVV//nZyoqKrRlyxZlZ2crKytL/fr101NPPdXsdZ966indddddyszM1JYtWyRJa9as0X333WfLXEBzsQUA48ydO1dr1qzRsmXLNGLECOXn5ys1NVWVlZX65S9/qeuuu+68n7nqqquUn5+vefPm6frrr1dlZaX69++vvLy8Zq17zTXX6PHHH9eBAweUm5urkpIShyYEIkMAwFiHDh1SfX29UlNTJUldunRRamqq3nzzzfP2wXs8Hv3xj3/U1q1bVVxcrEOHDsmyLNXV1TVrvZycHElSjx491L17d7377rv2DQO0ALuAYCyPx/M/51m3LEuhUOi8+2pra3XXXXfpgw8+UM+ePZWfny+fz6fmnkbL6/3vf7dwOCyfj9dfaF0EAIzUrl07devWTT6fT6+//rokqbKyUn/961912223Nf1MKBTSkSNHVFNTo4kTJ2rAgAHatWuXGhoaFA6Hm7Xm2rVrJUkffPCBjh49qsTERHuHApqJlyAw0qBBgzRq1CgtXrxYs2bN0h/+8Ac1NjbqgQceUO/evZt+Jjc3VwsXLlS/fv105513KiYmRj169NANN9ygI0eOKCYmJuI1jx07pqFDh8rj8ej3v/+9Onbs6NB0QGQ4HTQAGIotAMAGzz33nDZs2PCV3xszZoyGDBnickfApbEFAACG4k1gADAUAQAAhiIAAMBQBAAAGIoAAABD/R+g2LBfFQqybgAAAABJRU5ErkJggg==\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"sns.histplot(data=pandas_tips, x=\\\"total_bill\\\", stat='frequency')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 39,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='tip'>\"\n      ]\n     },\n     \"execution_count\": 39,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYEAAAEKCAYAAAD0Luk/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABA7ElEQVR4nO29eZhU5Zn//T1bbb1vNItBQjOIWUg0TBAnV1wYGbAjKMEJ0ZBMzO991Z+jwcmko8YxE18TtN+8cUkMZoxijBI1pCVuEAhKflGwDaJ2UBBpAw3SNL1X13q25/3j1Dld1V3VXdVdVaeqz/25Li851VXnPM+pU/d9P/dzLxxjjIEgCIJwJLzdAyAIgiDsg5QAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEg7FVCdxzzz245ZZb7BwCQRCEo7FNCezduxfPPvusXZcnCIIgAIh2XHRgYAD33nsvrrvuOhw6dCijz/b3B6Hrzshvq6kpRW9vwO5h2IKT5w7Q/Gn+2Zs/z3OoqipJ+XdblMAdd9yBm2++GZ2dnRl/dqzJTEVqakrtHoJtOHnuAM2f5p+f+eddCfzud7/DjBkzsGTJErS0tGT8+d7egGNWAnV1ZejuHrJ7GLbg5LkDNH+af/bmz/PcmAol70rgpZdeQnd3N1atWoXBwUGEQiH8+Mc/xm233ZbvoRAEQTievCuBTZs2Wf9uaWnBG2+8QQqAIAjCJihPgCAIwsHYsjFssnr1aqxevdrOIRAEQRQ0be092N7agZ7BCGorPFi+eDYWNtRm7fy2KgGCIAgiNW3tPXhy52EIAg+fR8RAUMaTOw8DQNYUAbmDCIIgCpTtrR0QBB5uSQDHcXBLAgSBx/bWjqxdg5QAQRBEgdIzGIFLTBTTLpFHz2Aka9cgJUAQBFGg1FZ4IKt6wmuyqqO2wpO1a5ASIAiCKFCWL54NTdMRVTQwxhBVNGiajuWLZ2ftGrQxTBAEUaCYm78UHUQQBOFQFjbUZlXoj4SUQBbJdTwvQRBEtiElkCXyEc9LEASRbWhjOEvkI56XIAgi25ASyBL5iOclCILINqQEskQ+4nkJgiCyDSmBLJGPeF6CIIhsQxvDWSIf8bwEQRDZhpRAFsl1PC9BEES2IXcQQRCEg6GVADHloSQ+gkgNKQFiSkNJfAQxNuQOIqY0lMRHEGNDSoCY0lASH0GMjS3uoPvvvx9//OMfwXEc1qxZg29+85t2DINwALUVHgwEZbglwXqNkviIfFLoe1J5Xwm88cYbeP311/Hcc8/h97//PX7zm9/gww8/zPcwCIdASXyEnZh7UgNBOWFPqq29x+6hWeRdCXz+85/H448/DlEU0dvbC03T4PP58j0MwiEsbKjF1ZfMR2WJC6GIisoSF66+ZH5BWWLE1KUY9qRscQdJkoQHHngAjz76KJYvX476+vq0P1tTU5rDkRUedXVldg/BNrI196V1ZVh63sezcq584uTvHpga8+8LyCjziuA4znpNFDj0B+Rx55ev+XOMMZaXKyUhHA7juuuuw6WXXoqvfOUraX2mtzcAXbdtyHmlrq4M3d1Ddg/DFpw8d4DmP1Xm37x5/6g9qaiiobLEhaarzk35uWzOn+e5MY3nvLuD2tvbcfDgQQCA1+vFsmXL8P777+d7GARBEDmnGPak8q4ETpw4gdtvvx2yLEOWZezatQuf+9zn8j0MgiCInFMMe1J53xO44IIL0NbWhssvvxyCIGDZsmVobGzM9zAIgiDyQqEXlrRlY/jGG2/EjTfeaMelCYIgiDgoY5ggCMLBkBIgCIJwMFRFlCBySKGXDCAIUgIEkSOojDVRDJA7iCByRDGUDCAIWgkQRI7oGYzA50n8iVEZ60TIXWY/tBIgiBxRW+GBrOoJr1EZ62GKocKmEyAlQBA5ohhKBtgJucsKA3IHEUSOMN0a5O5IDrnLCgNSAgSRQwq9ZICdUNe3woCUAEGANijtYPni2Xhy52FEYawAZFUnd5kNkBIgHMFYQp7i+e2B3GWFASkBYsoznpCP36AEALckIApDOJFAyi3kLrMfig4ipjzjRaH0DEbgEhN/CrRBSTgFWgkQU57xolCcvEFJeyEErQSIKc94SVtOjeenZC0CICVAOIDxhHwxtADMBZSsRQDkDioYki3Ll9aV2T2sKUE6UShO3KCkZC0CICVQEKSKXqmo8OHMWp/dw5sSOFHIj4eT90KIYWxxB/385z9HY2MjGhsb0dzcbMcQCopUy/KW3UfsHhoxhXHqXgiRSN6VwJ49e/Dqq6/i2WefxdatW/Huu+9i586d+R5GQZEqRPF0X8imERFOwKl7IUQieXcH1dXV4ZZbboHL5QIANDQ04OTJk/keRkGRalk+rZpcQVOVQgnNJDcZwTHGmF0XP3r0KL761a/it7/9LebMmWPXMGxn38Eu/LKlDaJouIKiigZVZbh29UIsOrve7uHZzr6DXWjZfQRdfSHUV/uw+sJ5RX1f6PsmCgnblMAHH3yAa6+9FjfeeCOuuOKKtD/X2xuArtumt3JG0uig8z6O7u4hu4dmC3V1ZejuHkrYNI8vMlbMbovmzftHrfyiiobKEhearjoXwPD8nQrNP3vz53kONTWlKf9uS3TQm2++iZtuugm33XYbGhsb7RhCwUHL8uRMxbo+FJpJFBJ5VwKdnZ244YYbcO+992LJkiX5vjxRZExFgUmhmUQhkffooEceeQTRaBR33303Vq1ahVWrVuG3v/1tvodBFAlTsU8vhWYShUTeVwK33347br/99nxflihSMmk8UigRN+NBdfSJQoIyhomCJl2BWWyNYWgPiCgUSAkQGZNvizsdgTkVN5AzpVhWQkRhQUqAyIhCtbin4gZyJhTq90IUPqQEHE6m1mOhWtxOj7gp1O+FKHyon4CDmUhTkUJtxej0iJtC/V6IwodWAg5mItZjoVrcuY64KXR/e6F+L0ThQ0rAwUzEj55JyGa+yVXEjV3+9kwUTyF/L0RhQ0rAwUzEenRijLsd/vZ9B7syUjxO/F6I7EBKwMFM1Hp0Qox7vBU+GIiisswNxCnLXPvbW3YfyVjxOOF7IbIPKQEHQ9Zjcka6f/xBGX3+CDiOg9dt/GRy7W/v6gvBI9FGL5F7SAk4nMlYj4W+WTpRRrp/Kkpd6PNHMTAUhccl5MXfXl/tQ3d/iDZ6iZxDIaLEhDCt5a7+MIZCMj44MYAHWw7guVc/tHtok2ZkuKXPI6GqzAXGkLc2jKsvnOfokFcif9BKgJgQ21s7oGoMQyEZAAeB56HpDC++3oE5M8qxsKG2aFcKyTbMRVFAwyyv1fQl1yw6ux5XXzK/KO8fUVyQEihi7BSyPYMRhCIKAA48Z7zGc4CmM2xv7QCAoi1jkOmGea6+B9roJfIBKYEixe5aMbUVHvQPRSDww24TBkASjM3LiYRVxgvTGXWlWHrOTNuarwPpbZjb/T0QxGQhJVCk2F0rZvni2Wj/yA9NZ+A5QwEwAF63gNoKT0IiWiiiwB9SoKo6egciaGvvGbcUdL8/bKswTdcKt/t7IIjJQkqgSMk02zfbLouFDbVoXDIbL77eAU1nkAQeXrcASeSxfPFsbG/twEBQhqbp6BuKIuYxAschqXAfKUw9kgBVY7YJ03Tvl9OrlxLFDymBIiWTbN9cuSxWfmEu5swoTyksn9x5GIMBOaYAOHAcg9ctYDAg48GWv6FhVoX1/okI01z54jO5X9ms2VMo7jDCWZASKFIy2bzMtssiHeFrHj/Y8jcwxiCJxvUDERUcAMZYgnDNVJjm0hefyf3KVs2eQnOHEc6B8gSKlIUNtbj6kvmoLHGNG7uezTLDmZSfXthQi4ZZFair8qG+2oeIolurAkkU4JYECAKP7a0do0pBR2R1TGEaL6g5jks412TJ5H5l8j2Mxcj5eFxi1uZDEGNh20ogEAhg7dq1eOihh3DGGWfYNYyiJt3Ny2y6LDJdVcRbyqqqAwA4jqG8xA1gWLiOjMgx3SEA0Lx5/6hVRy598Zner2yEcprzCUdV+IMyVI1B4IFQWJnUeQliPGxRAu+88w5uv/12HD161I7LO45slhnOVPjGC/fegQg4DqgscyetwRMvTOvqyrDr9b9bLhKOAz486ccDW9ows8YHj2TMIxdlFewoy1xb4bGyr43kO0DVGHSmJY2mIohsYYs76JlnnsEPfvADTJs2zY7LO45suSwAQ1jJMYveZDzhe7TTj46uABRNh6LqCEWUtEohbG/tgKLq6B2MoHsgAiV23dMDEfjDKkJhJSdlFbJ5v9Jl+eLZCIQUMAZwYNB1ABxQ4hHJJUTkFI4xxuy6+MUXX4zHH3+c3EFFxL6DXfhlSxtE0fDDRxUNqspw7eqFWHR2/aj3/3bHITy98zDAAQIHKBoDY0CJV8LcWRVYfeG8pJ8DgK/9YBuCYQWqlviI8hwws64EoiCgrMSF030hTKv2jXmuYuBrP9iGqKxB1XRIIo/KUjd8HhGBsIpfff8Su4dHTFGKLjqotzcAXbdNb+WVuroydHcP2T2MBM6s9WHt0nmjooPOrPUlHevW3e0AAIEztoQlgYOmMzCd4eY1CwEg6efq6sqgKDpME8XMMzCT0niOQyAk445vLEr4XKHdr0yYUe2z9iIkkYei6ghGVFSVuop6XhOhEJ/9fJLN+fM8h5qa0pR/LzolQNhPJhuhEVmFYBYXisFzxuvjIQocogozQkrjXucwNcsqx+9FGHOnyqFE7iElQEyasfIGPC4RUUWDEKcHdGa8Ph4za0vQ1R9GIKxAUY3wUp6HUbF0CgrH+E30/oCMqlLXqByMYq3MShQupAQIi4kImPgkJ4DhgxMDONQxAIHnMKPGh880VKP10GlourEC0BnAwLDsH8ffBzIt45oKDzRNx2BAhqrpmFbpwaIF07C9tQNP7DicU2GYb6FrrrKSuQOoWF1xUuiK21Yl8PLLL9t5eSKOiQiYtvYe/M9z7yGiaOA5JGzgajpDZ28Q/pCCxQum4Z32PkRkFR6XiGX/eAZWfmHuuGMamTswd2a5Zf3nWhi2tfdgy+52nOwJQhR4lJdItgtdKlZXfBSD4qaVgEMZaZ0EQnJGAsZ8uKOKCoHjEhSA6cPnOB6RqIrjpwOYXV9qXWvOjPK0x5ls/6F58/6cCkNzboMBGRzHQWcM/QEZ1WVuK4vXjh8wFasrPopBcZMScCDJrJPTfSFUl7sTkq/GEjDmwy2JRrVPFrd1aygAw/2jqDpO9oYwjSFrllAmwnAiS3Fzbjpj1qa2zhj8IQX1VV7bhG66mcyF7n5wEsWguEkJFCht7T3YtaUNnd2BrP+QzSQss8a/KBoZuf6gghKvy3rfWBE45sNdXuJCnz8yKoJH4DnoDAAHiFm2hDIRhhNZiptzEwUeqmb0S+BglL2wMyopnUzmybgfnnv1Q+z464mM3XZEarJZsiVXkBIoQMwfstsl5MSP+FFPEKGoUc2T5wBN08FgPJxRRUsqYEZal0zXcao3ZDSV4Y0y0WZMv8Aj9m8jw7ei1JVw/claQumWdZjoUtz84ZoKTmdcbF6crVFJ6XQ8m+icn3v1Qzy35yg4cBB4Izz1uT1HAYAUwSSwowRJppASKEDMH7LHJUKJ1cfJph9R0xjAjCQSIObDZ0Yilz8gJ1iCZsP4eOvyVF8Ig0EZgJG0xRgDx3HwShx8Hgn+oAKAob66BACgjkjuG2kJJXNfLK0rSzn+dNs/TnQpbv5wBYFHVZk7FpXEML3GizUXzbPVtTJejsZE57zjrycsBQAY2d2abrxOSmDiZNKq1C5ICRQgufYjmklYOuOssE2dMeiMoabUA58qwB9U8NxrR7Hv/W4ASLAuw7IGnuPA84bQUDUdAs+hpsKLO7+1OOFa1gYykltCqdwXFRU+nFnrSzmHdBLWJroUTxWVVEg/3FRMdM6TSeojxiYbVWZzCSmBAsT8Ibty5Ec0k7DCURWqpkMUeDBwEDjD3dEf6wbGmOE6YsxQHNXlHnjdIlRVN/oKs2FrnzGGUGRYYMRb9x6JBzgOoYg6yhJK5b5o2X3EKisxUSazFC/0H24qJjrnyST1EcUNfcMFiPlDjsgqeI6btB9xpLtlwexK9PojqCxzW4LidF8I5eUu+EOK1fmLAdZur6oxnO4Pg+cNF5DOACmu8Uq8khpp3cuqDk3V8LVloytxplr1nO4LTWiu8RTDUjzbTHTOy/7xDDy35+iEkvqI4oaUQAFi/mB3vXUyaXRQJiGApkBWVB3hqIa+oSjaT/qxaH4t+gOydQ6RA1QGqKpsbBanqNGn64Ae0wwe3tgPGKmkRlr3ZrbvyL7CQGr3xbTqRFfQRMMei9WinwwTmbPp96foIOdhaynpieD0KqLxVnb8cj9Vvfvmzftxqi+EobBh4XMwrDye43DD6k8lKBYzQUpnbFT55pEIPAee4+BxC6gocQGMIaIYq4GPeoKoKnOD4ziEIgr6hqLW6qKuypcw3lTz+d9rPmvtCZjvUTWGUESBqungOR6NS2bnRUjZEXdvdxVNu3MN7J6/3eSziij1GC4yMu2t2zMYQTiqxcJBOXAcF4vh1xM+YzZSmVbpga4zcFzS0yVQU+lBRYnL6CkQlwwWiWrwx6KHTPdSsr7C8dcd2cAlvi/A9tYOqBrDUEiGpscKyOk6/vDaUax/4C9o3rw/aY/jbJBJT+WpghPn7GTSdgcNDg5CEASUlqbWKIRBLq2o8SKHRl7b4xJiljiDorNY5ypAELhR0UamG6GtvQdbXjmCEz3J/fIcAFE0LPfOniBqKr3QNB1dQ1Goqm4knoUMJRCVtdhnGEpjiWgjI53SCXsMRRQAZjQTg7kYjCpaTuuxFEPaf7Zx4pydzLhK4MMPP8R3v/tdHDx4EBzH4ZxzzkFzczNmzpyZj/EVHbkuGDVWCGC822QoGE0ZUspgFHjzuISkfzeF8nOvfojn9xyDNtL9xgHlPinWZpKDqmpWRBHPGefWGTAQkK2P8DyHQFiBSxLA81xGkU4el4CeQbNJPazNai52rVwKqUJO+8+VsVHIcyayz7hK4NZbb8WVV16JL3/5y2CM4emnn8b3v/99bNq0KR/jKzpybUWNFQJouk0GA1GMtW0i8ADAAeNsB638wlzMmVGOLbvb0RnLDhZ4oCpWSE3TdNRXeXB6IGK5mzRdT7i2wBtJR8bWE4eBoSgqYnXy06GtvcdyLQGJQ+ZglKQAhoVUtgVjJiUqtrxyBF39ERiJcj6subAhZ5ZzLo2NYih1QGSPcZVAOBzG2rVrreN169bhmWeeyemgiplcW1FjhQA+seNwrIm78d74lozx8DyPihIJEUXHeMS7auIFbGXJsCB/YEsbOM4osKOPOCUXM911BkuCp9rEjj//jLpSLD1nJra3dsDnlazaRvFz4XkO5SWGi0lWdXhcAh598SAisgZNZ/AHZTz64kFc03h22oIxWTjtawdOjVuv59EXDyIYUcHFdGtnbxCPvnQI11y6ICeKIJfGRjGUOiCyx7hKYO7cudi/fz/OPfdcAMDhw4epMfwYpGNFTdZajffdDzdW6YDHJaB/SE8QlCMVgChwVvTP9GpvRnNL5bufWVuC0/1h6LHcAoE3lAEDRkUZaTrDb7YfwrrlicJxpGV7snsIDz57AKqmwyUKKC+RUFclwh+UEZU1MABlPhc8LsFqw6jIuiWIzb2DYETFlleOjBp3su8AGN2n4LUDp/BPn5qOQx0DY9bricha7LoxZcg4RKJqRkI5mRJM9dlcGhtOzK9wMuMqgZMnT2LdunU466yzIIoi3nvvPdTV1eGyyy4DADz//PM5H2QxMZ4Vla1lfLLzhMLG5qkp+kcqgGGXjSEcF8yuRPPm/ZP+oa+5sMEaS78/ClXTwfEAS+GT6h2S8bPf/w0lHhEza0ssV5Zp2fb7IxgKDVv9sqKhb0hHdZkb9dU+RBUNIgeU+lwJY3+w5cCwII7NVweLuWjGvnePvnQIUdnIoJZEAeUlLnjdIqIADnUMoOmqc1POv2cwYhTSi4uoMvZG9LSF8sgx9fvDYz4XuXbZODG/wqmMqwT+8z//Mx/jmDKMZ0VteeUIBgMyNJ1BFHmU+6QJNSpJ5g4AjCze/kB0lFuG52DVyBcFI0z0tQOnrNj7/qEI2j/yTyj2Pn7OwbACXWco9UkYCERTbjtoOkuI7InIGqrK3AhFFPhDSsJ7GQBdY/AHFWsvYm1Sl1KskmmcMGYM4LjEQYy8d7rOEAwb4aeSYDTI6fNHUF3uiW1Kjy3Iays88AeN/Arz0jozQlnTFcojx+SRjD4NqZ4LctkQ2SKlEmhvb0dDQwNKSkqS/v2Tn/xkzgZV7KSyotrae3CyNwSe56wSzn1DUVSVujJexqdyB6iqjpu+vNDapFQ1HbzAoSZW9wcwwir9scqYQyEZABeLvWd48fUOzJlRnrEVmGzvIBCSocTcQcn2J8zIHr+sIRhWEYyoMHMXh7uTDZcxUFTN2otINr76ah86e4MJhfHiq5mmunfGxrNxRQbzehz8QRk87x5XkC9fPNvaE9BNRQQGjyf9DfBM3TvksiGyRUol0NzcjF/+8pe48sorMWPGDMQnFofDYezdu3fCF33++eexceNGqKqKb3zjG7j66qsnfK5iYntrB0SBh84M4WZk7xoW7tyZwy0XTSF6qi8MWdEgCBxmxdwmqcothCIKBgMyGDOsR7Pkselm4EeUeBAELiH2HhgO75zo5uJIP3vjkjPx4t4OKJqe1DUlCjzCURX+YBQMAItbvZjvF3jOUgjzz6gY0y2z5sIGPPrSIUSiKjRdh8Dz8HhcWHNhQ8L7Rt47VTNyG0SBM1YdMYteSdO6XthQi2saz7YUL8cZiieT6KCJuHfIZUNkg5RK4J577sHAwAAaGhrwm9/8xqoZrygKvva1r034gl1dXbj33nvR0tICl8uFtWvXYvHixZg3b96Ez1ks9AxGUFHqQv9Q1LJWGQNUffSegaox+IPDoZ6HOgZwqGMAZ9QZwiXeHaCqGvqHZAAM1eUey8dd7hURUfSkVTy3t3bggxMDEPjhpHEGQBImtrnY1t6TIID9QRkne0NYdFYt3jjUPSrXgItF9viDsmE5s1jTlrj38dzwHobAc+kJ40sXjGsdj3SlMMaswmmCEDtmgEcSU0YyJbv2ZATyyDFFZLVg3Dt2l5AgckvKshHf+c53cN555+HIkSNYsmQJzj//fCxZsgQXXXQRPvWpT034gnv27MF5552HyspK+Hw+/Mu//Au2b9+e9udfffXPAABVVdHUtB4vv7wTABCJRNDUtB5//vPLAIBgMICmpvV47bX/A8DIeG5qWo/XX98DAOjr60NT03rs2/cGAKC7+zSamtbjrbfeBAB0dp5EU9N6tLW9DQA4caIDTU3r8d57BwAAR4/+HU1N6/H++4cAAO3tR9DUtB7t7UcAAO+/fwhNTetx9OjfAQDvvXcAx1sfhRLqRXW5B8rgMZx68zHo0QHMrC2B5j+Gpqb12PryOxAEHt0nDqL7rcehyQFjfj2H0fv24zjR2Y0ndx7GgbdbEXj3SZRKGgYDCiLd76K/7Qm4RQZdZzj9931o2/UQPC4OKgM62/+K4HtPoumqc7GwoRZV6mF0v/0ENJ2BMYbB42/g9NtPwusWUFvhwdatW/Df//19675v2fI07rrrDuv4mWc2Y8OGO63jB37xPzj+5tOWL7y//RUc378Fx7uDuPHLnwY79Rf4P3jRELQ8h/CxXej82/NQVA06AwaP/BGDR/4IMVbLePCDbRg4stNYuegMQ0dexLY/PGld76c/vQePP/6oddzc/CNs3vw4FjbUoumqc8GdeBHHDuzCEzsOo3nzfnz3lluwZcvTAAyBrbS3YKjjdfQPRQFw6PvbZgRP7jOqpzKgv+0JfKbmpCXsmprWY+fO7Tl79p745V34/MeM0NuB3tM48pdfYckcBQsbarPy7DU1rceJE0apjra2t9HUtB6dnScBAG+99Saamtaju/s0AGDfvjfQ1LQefX19aGvvwcbf/AF/3fZzSFwUA0EZv/j1s7jhxhsQDBrP5p///DKamtYjEjGMh5df3ommpvVQVaO0+M6d29HUtN76rrZtewG33vod6/iFF7biv/7re9bx1q1bcPPNN6f97G3e/Diam39kHT/++KP46U/vsY43bXoY99//E+v44Yc34sEH77OOH3ro53jooZ9bxw8+eB8efnijdXz//T/Bpk0PW8epnj2TDRvuxDPPbLaO77rrDuvZA4D//u/vY+vWLdbxf/3X9/DCC1ut41tv/Q6effZZ63iyz178vUpGypXAI488EhvQrdiwYcOYJ8mE06dPo66uzjqeNm0a2tra0v58SYkbdXVlUFUVkiSgrMyDuroyRCISJElAebkXdXVl8Hq5hGNJ0iBJAioqjGOOiyYca1ow4TgaLYEkCais9KGurgzBYOLx4KAPkiSgqso47utLPD59OvG4stKHuiofwACXxKOu0oOgxKOy1I1vrfo0NP9RSJKAgaCCmhoRmpY8hl9nQJ8/ip3HTqBMFPD/XP8FfPv+vfBGPTh1moMk8ugdUmLx+YBLEuDhBQyIHPoDMupiHbvOnluLfdU+cPywpS2JRjezryxbgA/ePgmXS7DeX1rqhsslWsclJW643cPHgZhryWxMYtYe6uoPY+l5H8fBfXMwODiI739/FfYd7MKGe/6CYDAMj1tCKKxYXc4EngeL8wu5JQFV5W6c6OBx8Fg/jvWEsOjseng8kvUsAEg43newCx1dAZRWl2NmqQuBiILj3QHMD8o41hNCy+4j+KgnhBln8qiu8KKiVINfEmKrIg6iYNyLs+fWWuePf9Zy9eydc/Z0XLd4MU6cOIE773wF535iRtaePeO4JOG4uto4rqjwJj2uqSnBr19+H5LIQ+A5uEQBkkeCX+TRF5BRW1uG0tJSlJcb76+rK4PH40FZmcc6FkUx4RgAyso8Cc9SaenoYwBpP3slJW54PFLCcSAwfOzzuaAoroRjQWAJx/HX83pdcLtdCcc+nyvps5bs2O0WE45dLhGlpfHHAkpLPSP+nngcP55sPHtjkfcqohs3bkQ0GsX69esBAM888wwOHDiAO+8cW1uZFHsV0fGW1s2b92MgKKOrNzTKj25iZuHynBGjDxgtHE1/8kfdhoUmCDymx0oym01fmq8/P6PxJCNZQ/IX93aAgcV6DbDYCsPw/X/7yoVjlrp+sOUAdGaEWMY8Q1Y464za4U1df1BGKKLEvn8O9VWepO0ezXsY71+PKhrEWO/c+Iqlp/tCqKnwwOeRrPemulf5pFCqaDZt3AOfR7SMCiA/96dQ5m8X+awimvd+AtOnT8e+ffus4+7ubkybNi3fw7CN8XzHyxfPxqMvHUqpAABDAXAwsnFP94eNGkCm5S/ylh+93Dcs2FJtMmbqyzYbkoMZq5JQVMXWV4/CLfFQNUBjOuIXMTyPMePdFzbUonHJbLz4egc0nUESeHjdAvwhBVVlww3qw1HVCjkVYo3tO/vCSTOCU0XamMXu4sNqRYHHYEBOUAJUImEYKiEx9cl7Kenzzz8fe/fuRV9fH8LhMHbs2IEvfvGL+R5GYROL5R8LnkesJDSDzytBEjj0+yPo6ApAVnXoupEVzBizMmqzscm4468nLAUADId+RhUdLonHSC9WqVcas9Q1YNQouuGKT2H+GRUo80mYXu3D7PoyCMLw42luIHMY7mXAcUBE1kadu7bCEytuN4xZ7M4lJj7y5SUSVE1HVNGyfq+mAssXz4ZG92dKk/eVQH19PW6++WZ8/etfh6IoWLNmDRYunFwv2bEopsiGtvYe/M9z7yGiaJBEHmVeASFZg6xoCUlXxuaqEWoqxpKnev1RCLzhz2bMCPXsH4piIBCFN+ayyca8I7I6SgGY+NwiwtFY6ehYCGwwosIljZ9wNXJF8qe3PsLv/vQBdGb0QDaFelwwEzgY8xx57lSJVPVVhnKIt2pFUcDMGt+o7ONCfUbyTSHlIxTTb7mYsKW95GWXXWaVncgluS7rPNZ1M31YzbFGFdVq+D4U1lFV5obX7cXAUBRul2B1/jITocpL3BgYihpC1wypjNtYZXEum22tHVixOHVGcDrj9rhEhKLqKAVgFHiT4RL5mH/feIfOgMGAnJAHkc79e/mvx1HiNZSKohmFiAwXWGJIq5CkLHUqwQUgqXJInn1MmBRCPoJdv2UnMKV7DNvRHCN5T98Do3r6JitCJgg8JFGIFV0z3Dk9AxFIIo9pVV6subABW145gpO9IYgCh4pSN3ieM5KdMGyZjywZYRJVdGx99Si6+kL4v1Ymhvmm+yNb9o9nYOurR0ftWXhdAhSVobxEQn8gsYSCqrGM3AfbW40EM6MaqA6XyEMUOISjGhjToTHOqtbpcQtpnzubVi1ZpfmFGt3kjimtBOxojrG9tQOKqls9fYVYeYi9751GiUeEqulJ6/SYYy0vMdwSZgQUg+HPPtEdROu7p3Dn/zrPis7pHYzA4xJRVebGUEixBO94sVN73zuNDz7ag68tm58gGFP9yMy/W+WVP1aBQ8cHrfOZ2b/VZRJUBlSXueEPKVBVHQLPYXqNUa003WJ1J3uCsWqghu9f1RgUVYNbElBd7kFXXwgAh+nVyaODxlNokxUaZJXmH2p0kzumtBKwI7JhZE9fwKxIA4QiKgSBT1qnxxyr1y0aG75JwmD3vncawAEcOelHeakLtaLh4w6FFQgCB01hGF2gIfU47/tdG1wij0vPm53yR3ayJ2gJPIDhw5N+yKoOnjOyfsGYEV/PcVi0YBpeO3AKgsCjvspruVsWLZiWkdBUNcP3k1DOghlrnTu/tTjpfOIt81BEhUvirYifbFuNZJXmH4pSyh1TutG8HZENtRUeKDH3jIm5qWsWJwMS6/TEj9UflKGoKfw5MBTByEbzPq+EmjI3ZlR7x2sWNgpZNVxEA0MRq4NXOKqiqy+Ej7oDCIRVq4aO6eYBDF8/B6CmwosZtSXweUQc6hhI2jT+UMfAqDGPFTEkCJxVV4kxZlyTM15PxsjG6FFFxVBYidVGMsim1dgzGBkVZURWafZoa+/BbRtfQ9PGPWjevB9t7T0UpZRDprQSWNhQm1Qo5dJaW754dix00zg2hSUwnEULjK7Ts7ChFv/0qekIhBPLKCdDVbWEY5fII6LoWHPRPFSXu1FV5h61cTvuOXWjJ/CxU0M43R82FBFnFJ0bCivoH4omrG5iM7IUhykEzbINX1s2HwDwxI7DaP9ocFQG9FhCc1ZtCSpKXbHS10bSW5lXwqza5BVt4y1zjuMgiQLAkFCSOptWY6oQVLJKJ4+p0Pv94VGrxon8ltvae9C8eX+CQiESmdLuICD/kQ0LG2rReN5svLi3A5puhDd6XCICYRVgRralmRVr1ukBjId1x19PQNd1SCI/5mrAH1RQ4h1OpDIFkCkMfR7B6CswFB3zPGPBGENNuQdDIQWKqkPVGSRhuFmLuapRY8I9XgiO9Jn7gzL6/BFwHGeVsx5LaC5fPBtP7TqCqjIhrVr5I11Z5SUu9A4aiiy+cmq2rEaq5Z87zGfY4xKhxMJ5TVebWfcqXWjvJj2mvBLIFplEg5gN2uPfX1Xqwr7DPQlZsZLIY/ni2dbDGlE0CBwXa4SSvA+8zy0gImuIKtooAfTEjsOWMPS6RXjdIkIRBT0DkbR2CuI3lRkAn0eCrGiIyMbKQ9UYOI5Z4aiabiS1jVyaj/SZV5S60OePYmAoCo9LGFdoLmyoRUWFD0/vOJTW/R7pL/a6RZSXuBGVtYTKqSPbWU40uqeQYuenGtncAKa9m/QgJZAGE7Eokq1A6uNq7gg8hws/a/SQbd68PxYeaiR+8RwA3vh3QmN1zhDE1eUeVJYMJzctmF2J7a0dGAxE4Q/KqCh1WZuigsDjrNmVWL54thVemqr0UkJvYgYc7xqy3Fnm6oUxoNwngec5BCMq3JIwqtHLyB+yzyMZlUoDSkqhPJJFZ9fjzFpfyr/Hk8wyFwUO31j5iZTNfSZrIRZC7PxUxFTorixsAFNEUXqQEkiDbFgUbe09eO3AqYSontcOnMKcGeXD4aE+CX1D0YQYe8sFzwwBLKtG39pgWMaZ08uxYHalFZFTWeZGnz+CPn8UjDGIomBZ3PFC67lXP8S21g5EldGuouGOXIkb2pWlLkSiGiKKZvnaF3ysAk1Xf27UOZJFcoiigIZZXquXwRM7DqO2oiMrFnSmlnk2LUTKF8gupkKPyCp4jpuUq40iitKDlEAaZMOiGEvwmA8rYFjdaqwlo9sloNQjYiAQhTbCeg/LOjq6hvD+8QFwMMrNlvsk1FQY2cWDAcUSuiOF0sovzMXKL8xFW3sPfvPH99HrjwIYrk4KJCoDgediPXQTx3Do+CCu//9ewfWXfzrhGql85gtmV+bMR5uJZZ4tCzFZI51HXzqEay5dQIpggpj3bddbJ9HZHZiUYqW9m/QgJZAG2bAoxhI8X1s23+pRy8WargAMPrexOSYKAjhOTyjPzACEYnV6GIb7FVeXuTG9xodQRE1oxZjKYv1//3dtQmloDkCpT0R1uRen+kIx9xQHWU3uQ4oqDA/8vg0rz59jJb6lsswnaoFn29rOloW4ZXc7gmEZHMdbtZyCYRlbdreTEpgECxtqsfS8j0+6lDLt3aQHKYE0SNeiGEtYjSV4FjbUorzEhYisQWcMksijvMQFl8SjbzAKRdMhxDaKR2YEx28gayPKTMSPaywL3FwZAMO1+AFY7qmRrSFHouvAi3sTG9Qns8zjN65NxrPAcxHhkS0L0cxcHpnUZrxOFAK0dzM+UzpPIFukk28wMmHJFFZmXPJ4yS4RWcP0Gh9m1ZWizCdhMCjjVG8IUUUDwKzm9AkKAMNx+6YLKb7MxB2PtFqKKd1krfhxet0iyryS1fVrLHSmj1kuGphYfH0mY0+X7OWPcAm5H4C5h5NplgZB2AetBNJkPItiPFfHeEtTc6VgunUsMcIB0AEtLl403l9vuo90nVkKgueGG848ufMworIGl2TkDaiqDlHkUeYVk1rgI8c5vdqH5Ytno/XdU7GyFaMxaweN51OfiAWeqwiPbFiI9VUedPaFoWO4ZhNjwPRq2ngkigdSAlkiHWE1luAxBeRgQIaZkmUmlnEcB4k3/q9oRjIZxwGaBpT6JPiDUQiCUWiN5wyBDBhlFwSBR0SWrf0GPlbQrn9Ixoxqb9KxJBvnwoZa9A+9mVA4zoTjjDDQkRZ9MvfY1ZfMz8hHW8gRHmsumodHXzwYq3Zq9Gn2uAWsuWie3UMjiLQhJZAlJiusTEH4YMvfwGL7Ajoz/O1czB10Rl0JooqGyhIXmq461xKygZAMjuPAgVmN3s2GMy6Rj/n0GQA+bg9Bxyhfxjg0Xf05tLX34Ikdhy3lJvJAWYkbosAlWPSpfPlXXzI/YcN6PAo5wmNhQy2uaTybNh6JooaUQJbIhrBa2FCLhlkVljL5qDtouX7EWMGy+NWFabGbAtdsOMPiGs6YFT8ry4wSEKpmlLIo83msTOBMWNhQi+bra8eN2MlWLP7Chloc7fSPamxfKIJ2PLdSseQRFMs4iexDSiBLZCscLV6ZiAIXK+QGq2l8stWFeY1kDWc0TUd9tQ+qzlBfPZyBa64oxmIswTCe8MtmLH6qJLvJCKl8CL1iqV2Ty3GScil8SAlkkWxsNsYrk0hUg6YrKPEYdYDGKp8bvyowf3RmOQcgeVvFsVYpkxUM2fLl56L+S76Ec7HUrsnVOItFCTodUgIFiCnQ6+rKsOv1v2dkSY2liDI5z/bWDqgaw1AoarmQvG4xbcGQLV9+LqKD8iWcxxp7IVnIuYrAKhYlmEsK6XtOhW1K4L777oMgCLjxxhvtGkJRkK1kl0zPY7R4VMBxvNXi0R+MjuoLMNb1gMwUT/wPZkZdKZaeMzMn0UH5KiyWauwelzCmhdzW3oNdW9omXTYhk3F29YcRjqoJCr++Knn0WLo4vYBbsayE8p4sNjQ0hNtuuw2bNm3K96WJDDCSzxKzYQHOSkrLNiOT7U52B/BgywEcPeVH74DR9SxbHaXy1RQmVYIgYqG7yRLgUjVVyWUzlAWzKzEYNHpPcAAUVcdgMIoFsysndV6nN9/JRaJjLsi7Eti1axfmzJmDb37zm/m+NJEBgsABnJFroGq60WJSNwRZOgJpvAzqkcT/YCKyFhfpxKHEKyIQVjAwFM1Kd7h8tSpMlZkcUfSU7Snjm6qkIziy0TnrUMcAyn0uiLGwZFHkUe5z4VDHwESmbeH0lpDF0oY07+6gyy+/HADws5/9bEKfr6kpzeJoCp+6ujJbrjtnZgVOdg8lVA/leSP34KldR1BR4cOis+tTfn7Xlja4XQI8LuMRc0kCIrKKXW+dxNLzPj7q/X0BGWVeMZbprMQS2zhouo7aylKUyiqqyr348fX/NOm5La0rQ0WFDy27j+B0XwjTqn1YfeG8MeczmWuNnO+ut06i3x+GJ85NFJFVzKgrRVdfCGVe455JMQEiChz6A/KoZ2HfwS48tesIRJFDRakLgYiS1nczkr6AjJoKD7i4vBHGWNJrZsJk77Ndz362mFFXmvJ7Tmdu+Zp/zpTAtm3bsGHDhoTX5s6di8cee2xS5+3tDUAfp6DZVKGurmzSlRQnytJzZuLJnYfBgYMoAGbpuopSF8ABT+84NGbTl87uAHweMaG9Jc9x6OwOJJ1TdanL8p8rqgaB54wsXMFotTnWZ5Mx3obcmbU+3LxmYcJn8nWvzXuraixh03zpOTOxvbUDA0EZpV7JundRRUNVqWvU+J7ecQjgAIHnoWrM+D+njfvdjCT+3pukumamTPQ+2/nsZ4uxvufx5pbN+fM8N6bxnDMlsGLFCqxYsSJXpyfSIBstFOMzmMtL3PC6RTDGxl3SZrqhGx9NJPAc1JiiHys/IhWFviE33qZ5uk1VsrXxWshZ2cVMsZSyphDRKUq2WijGZzCbpCOQMxUs8T+YUFgBkzX40siPSEYxhCamitbKpKlKtiKnikVYFSPFUMqalMAUpK29B//z3HuIKJphwfsk+DzShAThRK3EiQiW+B/MsZ5Q2o3mR1LsoYnpNlUZ77vJZCVYDMKKyA22KQHKD8gN5gogqqgQOM4qTQ0AXnfy8tFjMRkrcTKCJZNG8yPxSDxO9Yag6SwW6SJBEPgpF5o41ndT6C4xonCglcAUw3SFSKJglZbWGYM/pExYEBaTldjW3gN/2Oj5C8SUoD+KEo+ItRdPvRLPqb6bYnCJEYUBKYEphukKKS9xoc8fgc44KwHICZt921s74POI8LgE+IMyVE2HwHMoL3HZKvzyXT6g2F1iRP4gJTDFMDcLvW4R1eUe+IMyFFWHRxInnWRVDJjCj+M4eN3G480YQyii2jYmO1wzhdyMhygsSAlMMeI3Cz0uATzvhqbpBakAcmEdpyv8zGuf7AkaMfYCh1m1JTmx0O1wzVDYJ5EupASmGMUS7pcr6zgd4WdeW9UYghEFAAeowKm+UE4sdDtcM8XyHBD2Q0pgClIMG7m5so7TEX7mtYdCUXAcD4BB1RgGAzIkkceW3e1ZvX92uWaK4Tkg7IeUAJFXTDfM4eMDkEQB5SVGDgOQPes43a5naqwstlkZlQHQdIaTPUG0tfdkTYCSa4YoZPJeRZRwLvGVRSWRhxrLYQhFFAD527g0SxyLAg8trjQ2xxn/iVku95uqmihZ6UQhQCsBIm/Eu4AqSt3o80fAGOAPGjkM+bKOTcvc6xYRkTXrdaNEHlBeImXdX0+uGaJQISVA5I34DVIzhHUwEIWialY/5HwIyvh9A6NUNgMYIEmClV1cWeLK+TgIohAgJUDkjZEbpF63CJ7nUFniQtNV5+Z1LKZlHh+lRP56womQEiAmTbrx/oW4QUqhlITTISVATIpM4v0LVeCSv55wMqQEiEmRabz/VBW4+a4NRBDZgpQAMSmoUFnhdzIjiLGgPAFiUpgx9/E4rVBZ/GqI4zi4JQFClnMNCCJX0EqAmBSFuNmbb9JdDZHLiChESAkQk6JQN3vzSTq1gZzoMiKlVxyQEiAmzVTd7E2XdFZDTuv05USlV6zkXQm8+eab2LBhAxRFQWVlJX784x9j1qxZ+R4GQWSNdFZDTttAT6X0trxyhFYHBUbelcB3v/td/OIXv8CCBQuwZcsW3HXXXdi4cWO+h0EQWWW81ZDTOn0lU3qqquG0P4ppDLQ6KCDyGh0kyzK+/e1vY8GCBQCAs846C52dnfkcAkHYwvLFs6FpOqKKBsYYooqWcgO9rb0Ht218DU0b96B58360tffYMOLJkSxqzB9UIFIUVcHBMcbY+G/LPrqu4/rrr8enP/1p/Pu//7sdQyCIvLLvYBdadh/B6b4QplX7sPrCeVh0dv2o9/yypQ2iaAjJqKJBVRmuXb1w1HsLmWTzONkTwrQqL0q9kvU+xhgCYRW/+v4lNo7W2eRMCWzbtg0bNmxIeG3u3Ll47LHHIMsybrnlFgwODuKhhx6CJEkpzjKa3t4AdN0WvZV36urK0N09ZPcwbMGpc2/evB8DQRmlXgn+oAx/UIaianBLIv7vlZ8oKrfJyOigQFiBqrMEl1hU0ZIWEHTq92+SzfnzPIeamtKUf8/ZnsCKFSuwYsWKUa8Hg0Fcf/31qKysxMaNGzNSAAQx1TF96cGIij5/BAAHnuMQUbSi85+P3CcxI4bSzSmhENP8YMvG8Jlnnokf/vCH4HlKWCaKj1wKJ3MDeTAgw1AAgM4ASeQt/3mxCsJMckr2HeyyNcTUSQoor0rgvffew65duzBv3jxcccUVAIBp06bh4YcfzucwCGLC5Dr+3cw5kBXNUgAMQLlPmhIhpenmlLTsPmJbXoXTchzyqgQ+8YlP4P3338/nJQkiq+Q66cs8x8MvHEQ4qkISeZT7JPg8EqKKNmVDSkfS1ReCR0r0FORLCTotsY/8MQSRAT2DEbjE3AqnhQ21+M+rP4eacjeqytzwusUxQ0qnIvXVPtsKE+bjOy4kSAkQRAbkq2rqorPrcfUl81FZ4kIooqKyxIWrL5k/JS3RZKy+cF7aeRXZxmmVcal2EEFkQD6rpjq5JpOpBO3YnHVaZVxSAgSRAVQ1NX/YpQSd9h2TEiCIDHGyhe4UnPQd054AQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTiYvCuBffv2YfXq1bjssstw3XXXYXBwMN9DIAiCIGLkXQnceuutaG5uxvPPP4958+bhkUceyfcQCIIgiBh57yz20ksvQZIkKIqCrq4unHXWWfkeAkEQBNraexzTQnIsOMYYy/dF33//fXzzm9+EKIp4+umnMWPGjHwPgSAIB7PvYBd+2dIGUeTglgREFQ2qynDt6oVYdHa93cPLKzlTAtu2bcOGDRsSXps7dy4ee+wx6/ipp57C1q1b8dRTT6V93t7eAHQ973rLFurqytDdPWT3MGzByXMHaP65nn/z5v0YCMpwS4L1WlTRUFniQtNV5+bsuumSzfnzPIeamtKUf8+ZO2jFihVYsWJFwmvRaBR/+tOf8M///M8AgJUrV+Kee+7J1RAIwrGQq2NsegYj8HkSxZ9L5NEzGLFpRPaR141hURTxwx/+EAcOHABgrBbOPdd+rUsQU4m29h48ufMwBoIyfB4RA0EZT+48jLb2HruHVjDUVnggq3rCa7Kqo7bCY9OI7COvG8OCIODee+/FHXfcAU3TUF9fjx/96Ef5HAJBTHm2t3ZAEHjL1eGWBERjr9NqwGD54tl4cudhRGGsAGRVh6bpWL54tt1Dyzt5jw5atGgRWlpa8n1ZgnAM5OoYH1MZksvMBiVAEERuqa3wjNr0dKqrYywWNtQ6UuiPhMpGEMQUY/ni2dA0HVFFA2MMUUVzrKuDGB9aCRDEFINcHUQmkBIgiCkIuTqIdCF3EEEQhIMhJUAQBOFgSAkQBEE4GFICBEEQDqboNoZ5nrN7CHnFafONx8lzB2j+NP/szH+889hSSpogCIIoDMgdRBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIFRCAQwJe+9CWcOHECALBnzx5cdtllWLZsGe69916bR5dbfv7zn6OxsRGNjY1obm4G4Jz533///bj00kvR2NiITZs2AXDO3OO55557cMsttwBw1vzXrVuHxsZGrFq1CqtWrcI777yT3/kzoiB4++232Ze+9CX2yU9+kh0/fpyFw2F2wQUXsI6ODqYoCrvmmmvY7t277R5mTnjttdfYV77yFRaNRpksy+zrX/86e/755x0x/9bWVrZ27VqmKAoLh8PsoosuYgcPHnTE3OPZs2cPW7x4Mfve977nqGdf13X2hS98gSmKYr2W7/nTSqBAeOaZZ/CDH/wA06ZNAwC0tbXhzDPPxMc+9jGIoojLLrsM27dvt3mUuaGurg633HILXC4XJElCQ0MDjh496oj5f/7zn8fjjz8OURTR29sLTdPg9/sdMXeTgYEB3HvvvbjuuusAOOvZ//DDDwEA11xzDVauXIknnngi7/MnJVAg/OhHP8KiRYus49OnT6Ours46njZtGrq6uuwYWs75h3/4B3z2s58FABw9ehTbtm0Dx3GOmb8kSXjggQfQ2NiIJUuWOOq7B4A77rgDN998M8rLywE469n3+/1YsmQJHnzwQTz22GN46qmncPLkybzOn5RAgaLrOjhuuA44YyzheCrywQcf4JprrkFTUxM+9rGPOWr+N910E/bu3YvOzk4cPXrUMXP/3e9+hxkzZmDJkiXWa0569s855xw0NzejrKwM1dXVWLNmDR544IG8zr/omso4henTp6O7u9s67u7utlxFU5E333wTN910E2677TY0NjbijTfecMT829vbIcsyzj77bHi9Xixbtgzbt2+HIAjWe6bq3AHgpZdeQnd3N1atWoXBwUGEQiF89NFHjpn/vn37oCiKpQQZY5g1a1Zen31aCRQon/nMZ/D3v/8dx44dg6ZpeOGFF/DFL37R7mHlhM7OTtxwww34yU9+gsbGRgDOmf+JEydw++23Q5ZlyLKMXbt2Ye3atY6YOwBs2rQJL7zwAv7whz/gpptuwsUXX4xf/epXjpn/0NAQmpubEY1GEQgE8Oyzz+I//uM/8jp/WgkUKG63G3fffTduvPFGRKNRXHDBBVi+fLndw8oJjzzyCKLRKO6++27rtbVr1zpi/hdccAHa2tpw+eWXQxAELFu2DI2Njaiurp7yc0+Fk579iy66CO+88w4uv/xy6LqOq666Cuecc05e50/tJQmCIBwMuYMIgiAcDCkBgiAIB0NKgCAIwsGQEiAIgnAwpAQIgiAcDCkBwpFcc8016Ovrm/R7Wltb8aUvfWnc65111llJz7Vr1y7cddddAIxqktu3b8eJEydwzjnnjHtOgsgGlCdAOJLXXnstK++ZLEuXLsXSpUtzfh2CSAWtBAjHceuttwIAvvGNb+CNN97AunXrcNlll2HlypXYunXrqPd0dnbilVdewdq1a7F69WpceOGFuO+++zK+7n333YcrrrgCq1atwiuvvAIAaGlpwbXXXpuVeRHERKCVAOE4NmzYgJaWFvz617/Gv/7rv6KpqQnLli1DV1cXrrzySpx55pkJ76mqqkJTUxPuvvtuzJkzB11dXbjooovw9a9/PaPrnnHGGbjzzjtx+PBhrFu3Dtu2bcvRDAkifUgJEI6lvb0d0WgUy5YtAwDU19dj2bJl+Mtf/pLgk+c4Dg899BB2796NF154Ae3t7WCMIRwOZ3S9r371qwCA+fPno6GhAW+99Vb2JkMQE4TcQYRj4ThuVIlexhhUVU14LRQK4YorrsC7776LT3ziE2hqaoIoisi04grPD//cdF2HKJINRtgPKQHCkQiCgFmzZkEURezYsQMA0NXVhT/+8Y84//zzrfeoqopjx44hEAhg/fr1uPjii9Ha2gpZlqHrekbXfPbZZwEA7777Ljo6OvCZz3wmu5MiiAlApgjhSJYvX45/+7d/wy9+8Qvcdddd+NnPfgZN03DDDTfgvPPOs96zbt063H///bjwwguxYsUKuFwuzJ8/H/PmzcOxY8fgcrnSvubx48dx+eWXg+M4/PSnP0VlZWWOZkcQ6UNVRAmCIBwMrQQIIgv86le/wvPPP5/0b9/61rewcuXKPI+IINKDVgIEQRAOhjaGCYIgHAwpAYIgCAdDSoAgCMLBkBIgCIJwMKQECIIgHMz/D7RrkTIqFAnbAAAAAElFTkSuQmCC\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df\\n\",\n    \"sns.residplot(data=modin_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 40,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<AxesSubplot:xlabel='total_bill', ylabel='tip'>\"\n      ]\n     },\n     \"execution_count\": 40,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYEAAAEKCAYAAAD0Luk/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABA7ElEQVR4nO29eZhU5Zn//T1bbb1vNItBQjOIWUg0TBAnV1wYGbAjKMEJ0ZBMzO991Z+jwcmko8YxE18TtN+8cUkMZoxijBI1pCVuEAhKflGwDaJ2UBBpAw3SNL1X13q25/3j1Dld1V3VXdVdVaeqz/25Li851VXnPM+pU/d9P/dzLxxjjIEgCIJwJLzdAyAIgiDsg5QAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEg7FVCdxzzz245ZZb7BwCQRCEo7FNCezduxfPPvusXZcnCIIgAIh2XHRgYAD33nsvrrvuOhw6dCijz/b3B6Hrzshvq6kpRW9vwO5h2IKT5w7Q/Gn+2Zs/z3OoqipJ+XdblMAdd9yBm2++GZ2dnRl/dqzJTEVqakrtHoJtOHnuAM2f5p+f+eddCfzud7/DjBkzsGTJErS0tGT8+d7egGNWAnV1ZejuHrJ7GLbg5LkDNH+af/bmz/PcmAol70rgpZdeQnd3N1atWoXBwUGEQiH8+Mc/xm233ZbvoRAEQTievCuBTZs2Wf9uaWnBG2+8QQqAIAjCJihPgCAIwsHYsjFssnr1aqxevdrOIRAEQRQ0be092N7agZ7BCGorPFi+eDYWNtRm7fy2KgGCIAgiNW3tPXhy52EIAg+fR8RAUMaTOw8DQNYUAbmDCIIgCpTtrR0QBB5uSQDHcXBLAgSBx/bWjqxdg5QAQRBEgdIzGIFLTBTTLpFHz2Aka9cgJUAQBFGg1FZ4IKt6wmuyqqO2wpO1a5ASIAiCKFCWL54NTdMRVTQwxhBVNGiajuWLZ2ftGrQxTBAEUaCYm78UHUQQBOFQFjbUZlXoj4SUQBbJdTwvQRBEtiElkCXyEc9LEASRbWhjOEvkI56XIAgi25ASyBL5iOclCILINqQEskQ+4nkJgiCyDSmBLJGPeF6CIIhsQxvDWSIf8bwEQRDZhpRAFsl1PC9BEES2IXcQQRCEg6GVADHloSQ+gkgNKQFiSkNJfAQxNuQOIqY0lMRHEGNDSoCY0lASH0GMjS3uoPvvvx9//OMfwXEc1qxZg29+85t2DINwALUVHgwEZbglwXqNkviIfFLoe1J5Xwm88cYbeP311/Hcc8/h97//PX7zm9/gww8/zPcwCIdASXyEnZh7UgNBOWFPqq29x+6hWeRdCXz+85/H448/DlEU0dvbC03T4PP58j0MwiEsbKjF1ZfMR2WJC6GIisoSF66+ZH5BWWLE1KUY9qRscQdJkoQHHngAjz76KJYvX476+vq0P1tTU5rDkRUedXVldg/BNrI196V1ZVh63sezcq584uTvHpga8+8LyCjziuA4znpNFDj0B+Rx55ev+XOMMZaXKyUhHA7juuuuw6WXXoqvfOUraX2mtzcAXbdtyHmlrq4M3d1Ddg/DFpw8d4DmP1Xm37x5/6g9qaiiobLEhaarzk35uWzOn+e5MY3nvLuD2tvbcfDgQQCA1+vFsmXL8P777+d7GARBEDmnGPak8q4ETpw4gdtvvx2yLEOWZezatQuf+9zn8j0MgiCInFMMe1J53xO44IIL0NbWhssvvxyCIGDZsmVobGzM9zAIgiDyQqEXlrRlY/jGG2/EjTfeaMelCYIgiDgoY5ggCMLBkBIgCIJwMFRFlCBySKGXDCAIUgIEkSOojDVRDJA7iCByRDGUDCAIWgkQRI7oGYzA50n8iVEZ60TIXWY/tBIgiBxRW+GBrOoJr1EZ62GKocKmEyAlQBA5ohhKBtgJucsKA3IHEUSOMN0a5O5IDrnLCgNSAgSRQwq9ZICdUNe3woCUAEGANijtYPni2Xhy52FEYawAZFUnd5kNkBIgHMFYQp7i+e2B3GWFASkBYsoznpCP36AEALckIApDOJFAyi3kLrMfig4ipjzjRaH0DEbgEhN/CrRBSTgFWgkQU57xolCcvEFJeyEErQSIKc94SVtOjeenZC0CICVAOIDxhHwxtADMBZSsRQDkDioYki3Ll9aV2T2sKUE6UShO3KCkZC0CICVQEKSKXqmo8OHMWp/dw5sSOFHIj4eT90KIYWxxB/385z9HY2MjGhsb0dzcbMcQCopUy/KW3UfsHhoxhXHqXgiRSN6VwJ49e/Dqq6/i2WefxdatW/Huu+9i586d+R5GQZEqRPF0X8imERFOwKl7IUQieXcH1dXV4ZZbboHL5QIANDQ04OTJk/keRkGRalk+rZpcQVOVQgnNJDcZwTHGmF0XP3r0KL761a/it7/9LebMmWPXMGxn38Eu/LKlDaJouIKiigZVZbh29UIsOrve7uHZzr6DXWjZfQRdfSHUV/uw+sJ5RX1f6PsmCgnblMAHH3yAa6+9FjfeeCOuuOKKtD/X2xuArtumt3JG0uig8z6O7u4hu4dmC3V1ZejuHkrYNI8vMlbMbovmzftHrfyiiobKEhearjoXwPD8nQrNP3vz53kONTWlKf9uS3TQm2++iZtuugm33XYbGhsb7RhCwUHL8uRMxbo+FJpJFBJ5VwKdnZ244YYbcO+992LJkiX5vjxRZExFgUmhmUQhkffooEceeQTRaBR33303Vq1ahVWrVuG3v/1tvodBFAlTsU8vhWYShUTeVwK33347br/99nxflihSMmk8UigRN+NBdfSJQoIyhomCJl2BWWyNYWgPiCgUSAkQGZNvizsdgTkVN5AzpVhWQkRhQUqAyIhCtbin4gZyJhTq90IUPqQEHE6m1mOhWtxOj7gp1O+FKHyon4CDmUhTkUJtxej0iJtC/V6IwodWAg5mItZjoVrcuY64KXR/e6F+L0ThQ0rAwUzEj55JyGa+yVXEjV3+9kwUTyF/L0RhQ0rAwUzEenRijLsd/vZ9B7syUjxO/F6I7EBKwMFM1Hp0Qox7vBU+GIiisswNxCnLXPvbW3YfyVjxOOF7IbIPKQEHQ9Zjcka6f/xBGX3+CDiOg9dt/GRy7W/v6gvBI9FGL5F7SAk4nMlYj4W+WTpRRrp/Kkpd6PNHMTAUhccl5MXfXl/tQ3d/iDZ6iZxDIaLEhDCt5a7+MIZCMj44MYAHWw7guVc/tHtok2ZkuKXPI6GqzAXGkLc2jKsvnOfokFcif9BKgJgQ21s7oGoMQyEZAAeB56HpDC++3oE5M8qxsKG2aFcKyTbMRVFAwyyv1fQl1yw6ux5XXzK/KO8fUVyQEihi7BSyPYMRhCIKAA48Z7zGc4CmM2xv7QCAoi1jkOmGea6+B9roJfIBKYEixe5aMbUVHvQPRSDww24TBkASjM3LiYRVxgvTGXWlWHrOTNuarwPpbZjb/T0QxGQhJVCk2F0rZvni2Wj/yA9NZ+A5QwEwAF63gNoKT0IiWiiiwB9SoKo6egciaGvvGbcUdL8/bKswTdcKt/t7IIjJQkqgSMk02zfbLouFDbVoXDIbL77eAU1nkAQeXrcASeSxfPFsbG/twEBQhqbp6BuKIuYxAschqXAfKUw9kgBVY7YJ03Tvl9OrlxLFDymBIiWTbN9cuSxWfmEu5swoTyksn9x5GIMBOaYAOHAcg9ctYDAg48GWv6FhVoX1/okI01z54jO5X9ms2VMo7jDCWZASKFIy2bzMtssiHeFrHj/Y8jcwxiCJxvUDERUcAMZYgnDNVJjm0hefyf3KVs2eQnOHEc6B8gSKlIUNtbj6kvmoLHGNG7uezTLDmZSfXthQi4ZZFair8qG+2oeIolurAkkU4JYECAKP7a0do0pBR2R1TGEaL6g5jks412TJ5H5l8j2Mxcj5eFxi1uZDEGNh20ogEAhg7dq1eOihh3DGGWfYNYyiJt3Ny2y6LDJdVcRbyqqqAwA4jqG8xA1gWLiOjMgx3SEA0Lx5/6hVRy598Zner2yEcprzCUdV+IMyVI1B4IFQWJnUeQliPGxRAu+88w5uv/12HD161I7LO45slhnOVPjGC/fegQg4DqgscyetwRMvTOvqyrDr9b9bLhKOAz486ccDW9ows8YHj2TMIxdlFewoy1xb4bGyr43kO0DVGHSmJY2mIohsYYs76JlnnsEPfvADTJs2zY7LO45suSwAQ1jJMYveZDzhe7TTj46uABRNh6LqCEWUtEohbG/tgKLq6B2MoHsgAiV23dMDEfjDKkJhJSdlFbJ5v9Jl+eLZCIQUMAZwYNB1ABxQ4hHJJUTkFI4xxuy6+MUXX4zHH3+c3EFFxL6DXfhlSxtE0fDDRxUNqspw7eqFWHR2/aj3/3bHITy98zDAAQIHKBoDY0CJV8LcWRVYfeG8pJ8DgK/9YBuCYQWqlviI8hwws64EoiCgrMSF030hTKv2jXmuYuBrP9iGqKxB1XRIIo/KUjd8HhGBsIpfff8Su4dHTFGKLjqotzcAXbdNb+WVuroydHcP2T2MBM6s9WHt0nmjooPOrPUlHevW3e0AAIEztoQlgYOmMzCd4eY1CwEg6efq6sqgKDpME8XMMzCT0niOQyAk445vLEr4XKHdr0yYUe2z9iIkkYei6ghGVFSVuop6XhOhEJ/9fJLN+fM8h5qa0pR/LzolQNhPJhuhEVmFYBYXisFzxuvjIQocogozQkrjXucwNcsqx+9FGHOnyqFE7iElQEyasfIGPC4RUUWDEKcHdGa8Ph4za0vQ1R9GIKxAUY3wUp6HUbF0CgrH+E30/oCMqlLXqByMYq3MShQupAQIi4kImPgkJ4DhgxMDONQxAIHnMKPGh880VKP10GlourEC0BnAwLDsH8ffBzIt45oKDzRNx2BAhqrpmFbpwaIF07C9tQNP7DicU2GYb6FrrrKSuQOoWF1xUuiK21Yl8PLLL9t5eSKOiQiYtvYe/M9z7yGiaOA5JGzgajpDZ28Q/pCCxQum4Z32PkRkFR6XiGX/eAZWfmHuuGMamTswd2a5Zf3nWhi2tfdgy+52nOwJQhR4lJdItgtdKlZXfBSD4qaVgEMZaZ0EQnJGAsZ8uKOKCoHjEhSA6cPnOB6RqIrjpwOYXV9qXWvOjPK0x5ls/6F58/6cCkNzboMBGRzHQWcM/QEZ1WVuK4vXjh8wFasrPopBcZMScCDJrJPTfSFUl7sTkq/GEjDmwy2JRrVPFrd1aygAw/2jqDpO9oYwjSFrllAmwnAiS3Fzbjpj1qa2zhj8IQX1VV7bhG66mcyF7n5wEsWguEkJFCht7T3YtaUNnd2BrP+QzSQss8a/KBoZuf6gghKvy3rfWBE45sNdXuJCnz8yKoJH4DnoDAAHiFm2hDIRhhNZiptzEwUeqmb0S+BglL2wMyopnUzmybgfnnv1Q+z464mM3XZEarJZsiVXkBIoQMwfstsl5MSP+FFPEKGoUc2T5wBN08FgPJxRRUsqYEZal0zXcao3ZDSV4Y0y0WZMv8Aj9m8jw7ei1JVw/claQumWdZjoUtz84ZoKTmdcbF6crVFJ6XQ8m+icn3v1Qzy35yg4cBB4Izz1uT1HAYAUwSSwowRJppASKEDMH7LHJUKJ1cfJph9R0xjAjCQSIObDZ0Yilz8gJ1iCZsP4eOvyVF8Ig0EZgJG0xRgDx3HwShx8Hgn+oAKAob66BACgjkjuG2kJJXNfLK0rSzn+dNs/TnQpbv5wBYFHVZk7FpXEML3GizUXzbPVtTJejsZE57zjrycsBQAY2d2abrxOSmDiZNKq1C5ICRQgufYjmklYOuOssE2dMeiMoabUA58qwB9U8NxrR7Hv/W4ASLAuw7IGnuPA84bQUDUdAs+hpsKLO7+1OOFa1gYykltCqdwXFRU+nFnrSzmHdBLWJroUTxWVVEg/3FRMdM6TSeojxiYbVWZzCSmBAsT8Ibty5Ec0k7DCURWqpkMUeDBwEDjD3dEf6wbGmOE6YsxQHNXlHnjdIlRVN/oKs2FrnzGGUGRYYMRb9x6JBzgOoYg6yhJK5b5o2X3EKisxUSazFC/0H24qJjrnyST1EcUNfcMFiPlDjsgqeI6btB9xpLtlwexK9PojqCxzW4LidF8I5eUu+EOK1fmLAdZur6oxnO4Pg+cNF5DOACmu8Uq8khpp3cuqDk3V8LVloytxplr1nO4LTWiu8RTDUjzbTHTOy/7xDDy35+iEkvqI4oaUQAFi/mB3vXUyaXRQJiGApkBWVB3hqIa+oSjaT/qxaH4t+gOydQ6RA1QGqKpsbBanqNGn64Ae0wwe3tgPGKmkRlr3ZrbvyL7CQGr3xbTqRFfQRMMei9WinwwTmbPp96foIOdhaynpieD0KqLxVnb8cj9Vvfvmzftxqi+EobBh4XMwrDye43DD6k8lKBYzQUpnbFT55pEIPAee4+BxC6gocQGMIaIYq4GPeoKoKnOD4ziEIgr6hqLW6qKuypcw3lTz+d9rPmvtCZjvUTWGUESBqungOR6NS2bnRUjZEXdvdxVNu3MN7J6/3eSziij1GC4yMu2t2zMYQTiqxcJBOXAcF4vh1xM+YzZSmVbpga4zcFzS0yVQU+lBRYnL6CkQlwwWiWrwx6KHTPdSsr7C8dcd2cAlvi/A9tYOqBrDUEiGpscKyOk6/vDaUax/4C9o3rw/aY/jbJBJT+WpghPn7GTSdgcNDg5CEASUlqbWKIRBLq2o8SKHRl7b4xJiljiDorNY5ypAELhR0UamG6GtvQdbXjmCEz3J/fIcAFE0LPfOniBqKr3QNB1dQ1Goqm4knoUMJRCVtdhnGEpjiWgjI53SCXsMRRQAZjQTg7kYjCpaTuuxFEPaf7Zx4pydzLhK4MMPP8R3v/tdHDx4EBzH4ZxzzkFzczNmzpyZj/EVHbkuGDVWCGC822QoGE0ZUspgFHjzuISkfzeF8nOvfojn9xyDNtL9xgHlPinWZpKDqmpWRBHPGefWGTAQkK2P8DyHQFiBSxLA81xGkU4el4CeQbNJPazNai52rVwKqUJO+8+VsVHIcyayz7hK4NZbb8WVV16JL3/5y2CM4emnn8b3v/99bNq0KR/jKzpybUWNFQJouk0GA1GMtW0i8ADAAeNsB638wlzMmVGOLbvb0RnLDhZ4oCpWSE3TdNRXeXB6IGK5mzRdT7i2wBtJR8bWE4eBoSgqYnXy06GtvcdyLQGJQ+ZglKQAhoVUtgVjJiUqtrxyBF39ERiJcj6subAhZ5ZzLo2NYih1QGSPcZVAOBzG2rVrreN169bhmWeeyemgiplcW1FjhQA+seNwrIm78d74lozx8DyPihIJEUXHeMS7auIFbGXJsCB/YEsbOM4osKOPOCUXM911BkuCp9rEjj//jLpSLD1nJra3dsDnlazaRvFz4XkO5SWGi0lWdXhcAh598SAisgZNZ/AHZTz64kFc03h22oIxWTjtawdOjVuv59EXDyIYUcHFdGtnbxCPvnQI11y6ICeKIJfGRjGUOiCyx7hKYO7cudi/fz/OPfdcAMDhw4epMfwYpGNFTdZajffdDzdW6YDHJaB/SE8QlCMVgChwVvTP9GpvRnNL5bufWVuC0/1h6LHcAoE3lAEDRkUZaTrDb7YfwrrlicJxpGV7snsIDz57AKqmwyUKKC+RUFclwh+UEZU1MABlPhc8LsFqw6jIuiWIzb2DYETFlleOjBp3su8AGN2n4LUDp/BPn5qOQx0DY9bricha7LoxZcg4RKJqRkI5mRJM9dlcGhtOzK9wMuMqgZMnT2LdunU466yzIIoi3nvvPdTV1eGyyy4DADz//PM5H2QxMZ4Vla1lfLLzhMLG5qkp+kcqgGGXjSEcF8yuRPPm/ZP+oa+5sMEaS78/ClXTwfEAS+GT6h2S8bPf/w0lHhEza0ssV5Zp2fb7IxgKDVv9sqKhb0hHdZkb9dU+RBUNIgeU+lwJY3+w5cCwII7NVweLuWjGvnePvnQIUdnIoJZEAeUlLnjdIqIADnUMoOmqc1POv2cwYhTSi4uoMvZG9LSF8sgx9fvDYz4XuXbZODG/wqmMqwT+8z//Mx/jmDKMZ0VteeUIBgMyNJ1BFHmU+6QJNSpJ5g4AjCze/kB0lFuG52DVyBcFI0z0tQOnrNj7/qEI2j/yTyj2Pn7OwbACXWco9UkYCERTbjtoOkuI7InIGqrK3AhFFPhDSsJ7GQBdY/AHFWsvYm1Sl1KskmmcMGYM4LjEQYy8d7rOEAwb4aeSYDTI6fNHUF3uiW1Kjy3Iays88AeN/Arz0jozQlnTFcojx+SRjD4NqZ4LctkQ2SKlEmhvb0dDQwNKSkqS/v2Tn/xkzgZV7KSyotrae3CyNwSe56wSzn1DUVSVujJexqdyB6iqjpu+vNDapFQ1HbzAoSZW9wcwwir9scqYQyEZABeLvWd48fUOzJlRnrEVmGzvIBCSocTcQcn2J8zIHr+sIRhWEYyoMHMXh7uTDZcxUFTN2otINr76ah86e4MJhfHiq5mmunfGxrNxRQbzehz8QRk87x5XkC9fPNvaE9BNRQQGjyf9DfBM3TvksiGyRUol0NzcjF/+8pe48sorMWPGDMQnFofDYezdu3fCF33++eexceNGqKqKb3zjG7j66qsnfK5iYntrB0SBh84M4WZk7xoW7tyZwy0XTSF6qi8MWdEgCBxmxdwmqcothCIKBgMyGDOsR7Pkselm4EeUeBAELiH2HhgO75zo5uJIP3vjkjPx4t4OKJqe1DUlCjzCURX+YBQMAItbvZjvF3jOUgjzz6gY0y2z5sIGPPrSIUSiKjRdh8Dz8HhcWHNhQ8L7Rt47VTNyG0SBM1YdMYteSdO6XthQi2saz7YUL8cZiieT6KCJuHfIZUNkg5RK4J577sHAwAAaGhrwm9/8xqoZrygKvva1r034gl1dXbj33nvR0tICl8uFtWvXYvHixZg3b96Ez1ks9AxGUFHqQv9Q1LJWGQNUffSegaox+IPDoZ6HOgZwqGMAZ9QZwiXeHaCqGvqHZAAM1eUey8dd7hURUfSkVTy3t3bggxMDEPjhpHEGQBImtrnY1t6TIID9QRkne0NYdFYt3jjUPSrXgItF9viDsmE5s1jTlrj38dzwHobAc+kJ40sXjGsdj3SlMMaswmmCEDtmgEcSU0YyJbv2ZATyyDFFZLVg3Dt2l5AgckvKshHf+c53cN555+HIkSNYsmQJzj//fCxZsgQXXXQRPvWpT034gnv27MF5552HyspK+Hw+/Mu//Au2b9+e9udfffXPAABVVdHUtB4vv7wTABCJRNDUtB5//vPLAIBgMICmpvV47bX/A8DIeG5qWo/XX98DAOjr60NT03rs2/cGAKC7+zSamtbjrbfeBAB0dp5EU9N6tLW9DQA4caIDTU3r8d57BwAAR4/+HU1N6/H++4cAAO3tR9DUtB7t7UcAAO+/fwhNTetx9OjfAQDvvXcAx1sfhRLqRXW5B8rgMZx68zHo0QHMrC2B5j+Gpqb12PryOxAEHt0nDqL7rcehyQFjfj2H0fv24zjR2Y0ndx7GgbdbEXj3SZRKGgYDCiLd76K/7Qm4RQZdZzj9931o2/UQPC4OKgM62/+K4HtPoumqc7GwoRZV6mF0v/0ENJ2BMYbB42/g9NtPwusWUFvhwdatW/Df//19675v2fI07rrrDuv4mWc2Y8OGO63jB37xPzj+5tOWL7y//RUc378Fx7uDuPHLnwY79Rf4P3jRELQ8h/CxXej82/NQVA06AwaP/BGDR/4IMVbLePCDbRg4stNYuegMQ0dexLY/PGld76c/vQePP/6oddzc/CNs3vw4FjbUoumqc8GdeBHHDuzCEzsOo3nzfnz3lluwZcvTAAyBrbS3YKjjdfQPRQFw6PvbZgRP7jOqpzKgv+0JfKbmpCXsmprWY+fO7Tl79p745V34/MeM0NuB3tM48pdfYckcBQsbarPy7DU1rceJE0apjra2t9HUtB6dnScBAG+99Saamtaju/s0AGDfvjfQ1LQefX19aGvvwcbf/AF/3fZzSFwUA0EZv/j1s7jhxhsQDBrP5p///DKamtYjEjGMh5df3ommpvVQVaO0+M6d29HUtN76rrZtewG33vod6/iFF7biv/7re9bx1q1bcPPNN6f97G3e/Diam39kHT/++KP46U/vsY43bXoY99//E+v44Yc34sEH77OOH3ro53jooZ9bxw8+eB8efnijdXz//T/Bpk0PW8epnj2TDRvuxDPPbLaO77rrDuvZA4D//u/vY+vWLdbxf/3X9/DCC1ut41tv/Q6effZZ63iyz178vUpGypXAI488EhvQrdiwYcOYJ8mE06dPo66uzjqeNm0a2tra0v58SYkbdXVlUFUVkiSgrMyDuroyRCISJElAebkXdXVl8Hq5hGNJ0iBJAioqjGOOiyYca1ow4TgaLYEkCais9KGurgzBYOLx4KAPkiSgqso47utLPD59OvG4stKHuiofwACXxKOu0oOgxKOy1I1vrfo0NP9RSJKAgaCCmhoRmpY8hl9nQJ8/ip3HTqBMFPD/XP8FfPv+vfBGPTh1moMk8ugdUmLx+YBLEuDhBQyIHPoDMupiHbvOnluLfdU+cPywpS2JRjezryxbgA/ePgmXS7DeX1rqhsslWsclJW643cPHgZhryWxMYtYe6uoPY+l5H8fBfXMwODiI739/FfYd7MKGe/6CYDAMj1tCKKxYXc4EngeL8wu5JQFV5W6c6OBx8Fg/jvWEsOjseng8kvUsAEg43newCx1dAZRWl2NmqQuBiILj3QHMD8o41hNCy+4j+KgnhBln8qiu8KKiVINfEmKrIg6iYNyLs+fWWuePf9Zy9eydc/Z0XLd4MU6cOIE773wF535iRtaePeO4JOG4uto4rqjwJj2uqSnBr19+H5LIQ+A5uEQBkkeCX+TRF5BRW1uG0tJSlJcb76+rK4PH40FZmcc6FkUx4RgAyso8Cc9SaenoYwBpP3slJW54PFLCcSAwfOzzuaAoroRjQWAJx/HX83pdcLtdCcc+nyvps5bs2O0WE45dLhGlpfHHAkpLPSP+nngcP55sPHtjkfcqohs3bkQ0GsX69esBAM888wwOHDiAO+8cW1uZFHsV0fGW1s2b92MgKKOrNzTKj25iZuHynBGjDxgtHE1/8kfdhoUmCDymx0oym01fmq8/P6PxJCNZQ/IX93aAgcV6DbDYCsPw/X/7yoVjlrp+sOUAdGaEWMY8Q1Y464za4U1df1BGKKLEvn8O9VWepO0ezXsY71+PKhrEWO/c+Iqlp/tCqKnwwOeRrPemulf5pFCqaDZt3AOfR7SMCiA/96dQ5m8X+awimvd+AtOnT8e+ffus4+7ubkybNi3fw7CN8XzHyxfPxqMvHUqpAABDAXAwsnFP94eNGkCm5S/ylh+93Dcs2FJtMmbqyzYbkoMZq5JQVMXWV4/CLfFQNUBjOuIXMTyPMePdFzbUonHJbLz4egc0nUESeHjdAvwhBVVlww3qw1HVCjkVYo3tO/vCSTOCU0XamMXu4sNqRYHHYEBOUAJUImEYKiEx9cl7Kenzzz8fe/fuRV9fH8LhMHbs2IEvfvGL+R5GYROL5R8LnkesJDSDzytBEjj0+yPo6ApAVnXoupEVzBizMmqzscm4468nLAUADId+RhUdLonHSC9WqVcas9Q1YNQouuGKT2H+GRUo80mYXu3D7PoyCMLw42luIHMY7mXAcUBE1kadu7bCEytuN4xZ7M4lJj7y5SUSVE1HVNGyfq+mAssXz4ZG92dKk/eVQH19PW6++WZ8/etfh6IoWLNmDRYunFwv2bEopsiGtvYe/M9z7yGiaJBEHmVeASFZg6xoCUlXxuaqEWoqxpKnev1RCLzhz2bMCPXsH4piIBCFN+ayyca8I7I6SgGY+NwiwtFY6ehYCGwwosIljZ9wNXJF8qe3PsLv/vQBdGb0QDaFelwwEzgY8xx57lSJVPVVhnKIt2pFUcDMGt+o7ONCfUbyTSHlIxTTb7mYsKW95GWXXWaVncgluS7rPNZ1M31YzbFGFdVq+D4U1lFV5obX7cXAUBRul2B1/jITocpL3BgYihpC1wypjNtYZXEum22tHVixOHVGcDrj9rhEhKLqKAVgFHiT4RL5mH/feIfOgMGAnJAHkc79e/mvx1HiNZSKohmFiAwXWGJIq5CkLHUqwQUgqXJInn1MmBRCPoJdv2UnMKV7DNvRHCN5T98Do3r6JitCJgg8JFGIFV0z3Dk9AxFIIo9pVV6subABW145gpO9IYgCh4pSN3ieM5KdMGyZjywZYRJVdGx99Si6+kL4v1Ymhvmm+yNb9o9nYOurR0ftWXhdAhSVobxEQn8gsYSCqrGM3AfbW40EM6MaqA6XyEMUOISjGhjToTHOqtbpcQtpnzubVi1ZpfmFGt3kjimtBOxojrG9tQOKqls9fYVYeYi9751GiUeEqulJ6/SYYy0vMdwSZgQUg+HPPtEdROu7p3Dn/zrPis7pHYzA4xJRVebGUEixBO94sVN73zuNDz7ag68tm58gGFP9yMy/W+WVP1aBQ8cHrfOZ2b/VZRJUBlSXueEPKVBVHQLPYXqNUa003WJ1J3uCsWqghu9f1RgUVYNbElBd7kFXXwgAh+nVyaODxlNokxUaZJXmH2p0kzumtBKwI7JhZE9fwKxIA4QiKgSBT1qnxxyr1y0aG75JwmD3vncawAEcOelHeakLtaLh4w6FFQgCB01hGF2gIfU47/tdG1wij0vPm53yR3ayJ2gJPIDhw5N+yKoOnjOyfsGYEV/PcVi0YBpeO3AKgsCjvspruVsWLZiWkdBUNcP3k1DOghlrnTu/tTjpfOIt81BEhUvirYifbFuNZJXmH4pSyh1TutG8HZENtRUeKDH3jIm5qWsWJwMS6/TEj9UflKGoKfw5MBTByEbzPq+EmjI3ZlR7x2sWNgpZNVxEA0MRq4NXOKqiqy+Ej7oDCIRVq4aO6eYBDF8/B6CmwosZtSXweUQc6hhI2jT+UMfAqDGPFTEkCJxVV4kxZlyTM15PxsjG6FFFxVBYidVGMsim1dgzGBkVZURWafZoa+/BbRtfQ9PGPWjevB9t7T0UpZRDprQSWNhQm1Qo5dJaW754dix00zg2hSUwnEULjK7Ts7ChFv/0qekIhBPLKCdDVbWEY5fII6LoWHPRPFSXu1FV5h61cTvuOXWjJ/CxU0M43R82FBFnFJ0bCivoH4omrG5iM7IUhykEzbINX1s2HwDwxI7DaP9ocFQG9FhCc1ZtCSpKXbHS10bSW5lXwqza5BVt4y1zjuMgiQLAkFCSOptWY6oQVLJKJ4+p0Pv94VGrxon8ltvae9C8eX+CQiESmdLuICD/kQ0LG2rReN5svLi3A5puhDd6XCICYRVgRralmRVr1ukBjId1x19PQNd1SCI/5mrAH1RQ4h1OpDIFkCkMfR7B6CswFB3zPGPBGENNuQdDIQWKqkPVGSRhuFmLuapRY8I9XgiO9Jn7gzL6/BFwHGeVsx5LaC5fPBtP7TqCqjIhrVr5I11Z5SUu9A4aiiy+cmq2rEaq5Z87zGfY4xKhxMJ5TVebWfcqXWjvJj2mvBLIFplEg5gN2uPfX1Xqwr7DPQlZsZLIY/ni2dbDGlE0CBwXa4SSvA+8zy0gImuIKtooAfTEjsOWMPS6RXjdIkIRBT0DkbR2CuI3lRkAn0eCrGiIyMbKQ9UYOI5Z4aiabiS1jVyaj/SZV5S60OePYmAoCo9LGFdoLmyoRUWFD0/vOJTW/R7pL/a6RZSXuBGVtYTKqSPbWU40uqeQYuenGtncAKa9m/QgJZAGE7Eokq1A6uNq7gg8hws/a/SQbd68PxYeaiR+8RwA3vh3QmN1zhDE1eUeVJYMJzctmF2J7a0dGAxE4Q/KqCh1WZuigsDjrNmVWL54thVemqr0UkJvYgYc7xqy3Fnm6oUxoNwngec5BCMq3JIwqtHLyB+yzyMZlUoDSkqhPJJFZ9fjzFpfyr/Hk8wyFwUO31j5iZTNfSZrIRZC7PxUxFTorixsAFNEUXqQEkiDbFgUbe09eO3AqYSontcOnMKcGeXD4aE+CX1D0YQYe8sFzwwBLKtG39pgWMaZ08uxYHalFZFTWeZGnz+CPn8UjDGIomBZ3PFC67lXP8S21g5EldGuouGOXIkb2pWlLkSiGiKKZvnaF3ysAk1Xf27UOZJFcoiigIZZXquXwRM7DqO2oiMrFnSmlnk2LUTKF8gupkKPyCp4jpuUq40iitKDlEAaZMOiGEvwmA8rYFjdaqwlo9sloNQjYiAQhTbCeg/LOjq6hvD+8QFwMMrNlvsk1FQY2cWDAcUSuiOF0sovzMXKL8xFW3sPfvPH99HrjwIYrk4KJCoDgediPXQTx3Do+CCu//9ewfWXfzrhGql85gtmV+bMR5uJZZ4tCzFZI51HXzqEay5dQIpggpj3bddbJ9HZHZiUYqW9m/QgJZAG2bAoxhI8X1s23+pRy8WargAMPrexOSYKAjhOTyjPzACEYnV6GIb7FVeXuTG9xodQRE1oxZjKYv1//3dtQmloDkCpT0R1uRen+kIx9xQHWU3uQ4oqDA/8vg0rz59jJb6lsswnaoFn29rOloW4ZXc7gmEZHMdbtZyCYRlbdreTEpgECxtqsfS8j0+6lDLt3aQHKYE0SNeiGEtYjSV4FjbUorzEhYisQWcMksijvMQFl8SjbzAKRdMhxDaKR2YEx28gayPKTMSPaywL3FwZAMO1+AFY7qmRrSFHouvAi3sTG9Qns8zjN65NxrPAcxHhkS0L0cxcHpnUZrxOFAK0dzM+UzpPIFukk28wMmHJFFZmXPJ4yS4RWcP0Gh9m1ZWizCdhMCjjVG8IUUUDwKzm9AkKAMNx+6YLKb7MxB2PtFqKKd1krfhxet0iyryS1fVrLHSmj1kuGphYfH0mY0+X7OWPcAm5H4C5h5NplgZB2AetBNJkPItiPFfHeEtTc6VgunUsMcIB0AEtLl403l9vuo90nVkKgueGG848ufMworIGl2TkDaiqDlHkUeYVk1rgI8c5vdqH5Ytno/XdU7GyFaMxaweN51OfiAWeqwiPbFiI9VUedPaFoWO4ZhNjwPRq2ngkigdSAlkiHWE1luAxBeRgQIaZkmUmlnEcB4k3/q9oRjIZxwGaBpT6JPiDUQiCUWiN5wyBDBhlFwSBR0SWrf0GPlbQrn9Ixoxqb9KxJBvnwoZa9A+9mVA4zoTjjDDQkRZ9MvfY1ZfMz8hHW8gRHmsumodHXzwYq3Zq9Gn2uAWsuWie3UMjiLQhJZAlJiusTEH4YMvfwGL7Ajoz/O1czB10Rl0JooqGyhIXmq461xKygZAMjuPAgVmN3s2GMy6Rj/n0GQA+bg9Bxyhfxjg0Xf05tLX34Ikdhy3lJvJAWYkbosAlWPSpfPlXXzI/YcN6PAo5wmNhQy2uaTybNh6JooaUQJbIhrBa2FCLhlkVljL5qDtouX7EWMGy+NWFabGbAtdsOMPiGs6YFT8ry4wSEKpmlLIo83msTOBMWNhQi+bra8eN2MlWLP7Chloc7fSPamxfKIJ2PLdSseQRFMs4iexDSiBLZCscLV6ZiAIXK+QGq2l8stWFeY1kDWc0TUd9tQ+qzlBfPZyBa64oxmIswTCe8MtmLH6qJLvJCKl8CL1iqV2Ty3GScil8SAlkkWxsNsYrk0hUg6YrKPEYdYDGKp8bvyowf3RmOQcgeVvFsVYpkxUM2fLl56L+S76Ec7HUrsnVOItFCTodUgIFiCnQ6+rKsOv1v2dkSY2liDI5z/bWDqgaw1AoarmQvG4xbcGQLV9+LqKD8iWcxxp7IVnIuYrAKhYlmEsK6XtOhW1K4L777oMgCLjxxhvtGkJRkK1kl0zPY7R4VMBxvNXi0R+MjuoLMNb1gMwUT/wPZkZdKZaeMzMn0UH5KiyWauwelzCmhdzW3oNdW9omXTYhk3F29YcRjqoJCr++Knn0WLo4vYBbsayE8p4sNjQ0hNtuuw2bNm3K96WJDDCSzxKzYQHOSkrLNiOT7U52B/BgywEcPeVH74DR9SxbHaXy1RQmVYIgYqG7yRLgUjVVyWUzlAWzKzEYNHpPcAAUVcdgMIoFsysndV6nN9/JRaJjLsi7Eti1axfmzJmDb37zm/m+NJEBgsABnJFroGq60WJSNwRZOgJpvAzqkcT/YCKyFhfpxKHEKyIQVjAwFM1Kd7h8tSpMlZkcUfSU7Snjm6qkIziy0TnrUMcAyn0uiLGwZFHkUe5z4VDHwESmbeH0lpDF0oY07+6gyy+/HADws5/9bEKfr6kpzeJoCp+6ujJbrjtnZgVOdg8lVA/leSP34KldR1BR4cOis+tTfn7Xlja4XQI8LuMRc0kCIrKKXW+dxNLzPj7q/X0BGWVeMZbprMQS2zhouo7aylKUyiqqyr348fX/NOm5La0rQ0WFDy27j+B0XwjTqn1YfeG8MeczmWuNnO+ut06i3x+GJ85NFJFVzKgrRVdfCGVe455JMQEiChz6A/KoZ2HfwS48tesIRJFDRakLgYiS1nczkr6AjJoKD7i4vBHGWNJrZsJk77Ndz362mFFXmvJ7Tmdu+Zp/zpTAtm3bsGHDhoTX5s6di8cee2xS5+3tDUAfp6DZVKGurmzSlRQnytJzZuLJnYfBgYMoAGbpuopSF8ABT+84NGbTl87uAHweMaG9Jc9x6OwOJJ1TdanL8p8rqgaB54wsXMFotTnWZ5Mx3obcmbU+3LxmYcJn8nWvzXuraixh03zpOTOxvbUDA0EZpV7JundRRUNVqWvU+J7ecQjgAIHnoWrM+D+njfvdjCT+3pukumamTPQ+2/nsZ4uxvufx5pbN+fM8N6bxnDMlsGLFCqxYsSJXpyfSIBstFOMzmMtL3PC6RTDGxl3SZrqhGx9NJPAc1JiiHys/IhWFviE33qZ5uk1VsrXxWshZ2cVMsZSyphDRKUq2WijGZzCbpCOQMxUs8T+YUFgBkzX40siPSEYxhCamitbKpKlKtiKnikVYFSPFUMqalMAUpK29B//z3HuIKJphwfsk+DzShAThRK3EiQiW+B/MsZ5Q2o3mR1LsoYnpNlUZ77vJZCVYDMKKyA22KQHKD8gN5gogqqgQOM4qTQ0AXnfy8tFjMRkrcTKCJZNG8yPxSDxO9Yag6SwW6SJBEPgpF5o41ndT6C4xonCglcAUw3SFSKJglZbWGYM/pExYEBaTldjW3gN/2Oj5C8SUoD+KEo+ItRdPvRLPqb6bYnCJEYUBKYEphukKKS9xoc8fgc44KwHICZt921s74POI8LgE+IMyVE2HwHMoL3HZKvzyXT6g2F1iRP4gJTDFMDcLvW4R1eUe+IMyFFWHRxInnWRVDJjCj+M4eN3G480YQyii2jYmO1wzhdyMhygsSAlMMeI3Cz0uATzvhqbpBakAcmEdpyv8zGuf7AkaMfYCh1m1JTmx0O1wzVDYJ5EupASmGMUS7pcr6zgd4WdeW9UYghEFAAeowKm+UE4sdDtcM8XyHBD2Q0pgClIMG7m5so7TEX7mtYdCUXAcD4BB1RgGAzIkkceW3e1ZvX92uWaK4Tkg7IeUAJFXTDfM4eMDkEQB5SVGDgOQPes43a5naqwstlkZlQHQdIaTPUG0tfdkTYCSa4YoZPJeRZRwLvGVRSWRhxrLYQhFFAD527g0SxyLAg8trjQ2xxn/iVku95uqmihZ6UQhQCsBIm/Eu4AqSt3o80fAGOAPGjkM+bKOTcvc6xYRkTXrdaNEHlBeImXdX0+uGaJQISVA5I34DVIzhHUwEIWialY/5HwIyvh9A6NUNgMYIEmClV1cWeLK+TgIohAgJUDkjZEbpF63CJ7nUFniQtNV5+Z1LKZlHh+lRP56womQEiAmTbrx/oW4QUqhlITTISVATIpM4v0LVeCSv55wMqQEiEmRabz/VBW4+a4NRBDZgpQAMSmoUFnhdzIjiLGgPAFiUpgx9/E4rVBZ/GqI4zi4JQFClnMNCCJX0EqAmBSFuNmbb9JdDZHLiChESAkQk6JQN3vzSTq1gZzoMiKlVxyQEiAmzVTd7E2XdFZDTuv05USlV6zkXQm8+eab2LBhAxRFQWVlJX784x9j1qxZ+R4GQWSNdFZDTttAT6X0trxyhFYHBUbelcB3v/td/OIXv8CCBQuwZcsW3HXXXdi4cWO+h0EQWWW81ZDTOn0lU3qqquG0P4ppDLQ6KCDyGh0kyzK+/e1vY8GCBQCAs846C52dnfkcAkHYwvLFs6FpOqKKBsYYooqWcgO9rb0Ht218DU0b96B58360tffYMOLJkSxqzB9UIFIUVcHBMcbY+G/LPrqu4/rrr8enP/1p/Pu//7sdQyCIvLLvYBdadh/B6b4QplX7sPrCeVh0dv2o9/yypQ2iaAjJqKJBVRmuXb1w1HsLmWTzONkTwrQqL0q9kvU+xhgCYRW/+v4lNo7W2eRMCWzbtg0bNmxIeG3u3Ll47LHHIMsybrnlFgwODuKhhx6CJEkpzjKa3t4AdN0WvZV36urK0N09ZPcwbMGpc2/evB8DQRmlXgn+oAx/UIaianBLIv7vlZ8oKrfJyOigQFiBqrMEl1hU0ZIWEHTq92+SzfnzPIeamtKUf8/ZnsCKFSuwYsWKUa8Hg0Fcf/31qKysxMaNGzNSAAQx1TF96cGIij5/BAAHnuMQUbSi85+P3CcxI4bSzSmhENP8YMvG8Jlnnokf/vCH4HlKWCaKj1wKJ3MDeTAgw1AAgM4ASeQt/3mxCsJMckr2HeyyNcTUSQoor0rgvffew65duzBv3jxcccUVAIBp06bh4YcfzucwCGLC5Dr+3cw5kBXNUgAMQLlPmhIhpenmlLTsPmJbXoXTchzyqgQ+8YlP4P3338/nJQkiq+Q66cs8x8MvHEQ4qkISeZT7JPg8EqKKNmVDSkfS1ReCR0r0FORLCTotsY/8MQSRAT2DEbjE3AqnhQ21+M+rP4eacjeqytzwusUxQ0qnIvXVPtsKE+bjOy4kSAkQRAbkq2rqorPrcfUl81FZ4kIooqKyxIWrL5k/JS3RZKy+cF7aeRXZxmmVcal2EEFkQD6rpjq5JpOpBO3YnHVaZVxSAgSRAVQ1NX/YpQSd9h2TEiCIDHGyhe4UnPQd054AQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTiYvCuBffv2YfXq1bjssstw3XXXYXBwMN9DIAiCIGLkXQnceuutaG5uxvPPP4958+bhkUceyfcQCIIgiBh57yz20ksvQZIkKIqCrq4unHXWWfkeAkEQBNraexzTQnIsOMYYy/dF33//fXzzm9+EKIp4+umnMWPGjHwPgSAIB7PvYBd+2dIGUeTglgREFQ2qynDt6oVYdHa93cPLKzlTAtu2bcOGDRsSXps7dy4ee+wx6/ipp57C1q1b8dRTT6V93t7eAHQ973rLFurqytDdPWT3MGzByXMHaP65nn/z5v0YCMpwS4L1WlTRUFniQtNV5+bsuumSzfnzPIeamtKUf8+ZO2jFihVYsWJFwmvRaBR/+tOf8M///M8AgJUrV+Kee+7J1RAIwrGQq2NsegYj8HkSxZ9L5NEzGLFpRPaR141hURTxwx/+EAcOHABgrBbOPdd+rUsQU4m29h48ufMwBoIyfB4RA0EZT+48jLb2HruHVjDUVnggq3rCa7Kqo7bCY9OI7COvG8OCIODee+/FHXfcAU3TUF9fjx/96Ef5HAJBTHm2t3ZAEHjL1eGWBERjr9NqwGD54tl4cudhRGGsAGRVh6bpWL54tt1Dyzt5jw5atGgRWlpa8n1ZgnAM5OoYH1MZksvMBiVAEERuqa3wjNr0dKqrYywWNtQ6UuiPhMpGEMQUY/ni2dA0HVFFA2MMUUVzrKuDGB9aCRDEFINcHUQmkBIgiCkIuTqIdCF3EEEQhIMhJUAQBOFgSAkQBEE4GFICBEEQDqboNoZ5nrN7CHnFafONx8lzB2j+NP/szH+889hSSpogCIIoDMgdRBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIEQRAOhpQAQRCEgyElQBAE4WBICRAEQTgYUgIFRCAQwJe+9CWcOHECALBnzx5cdtllWLZsGe69916bR5dbfv7zn6OxsRGNjY1obm4G4Jz533///bj00kvR2NiITZs2AXDO3OO55557cMsttwBw1vzXrVuHxsZGrFq1CqtWrcI777yT3/kzoiB4++232Ze+9CX2yU9+kh0/fpyFw2F2wQUXsI6ODqYoCrvmmmvY7t277R5mTnjttdfYV77yFRaNRpksy+zrX/86e/755x0x/9bWVrZ27VqmKAoLh8PsoosuYgcPHnTE3OPZs2cPW7x4Mfve977nqGdf13X2hS98gSmKYr2W7/nTSqBAeOaZZ/CDH/wA06ZNAwC0tbXhzDPPxMc+9jGIoojLLrsM27dvt3mUuaGurg633HILXC4XJElCQ0MDjh496oj5f/7zn8fjjz8OURTR29sLTdPg9/sdMXeTgYEB3HvvvbjuuusAOOvZ//DDDwEA11xzDVauXIknnngi7/MnJVAg/OhHP8KiRYus49OnT6Ours46njZtGrq6uuwYWs75h3/4B3z2s58FABw9ehTbtm0Dx3GOmb8kSXjggQfQ2NiIJUuWOOq7B4A77rgDN998M8rLywE469n3+/1YsmQJHnzwQTz22GN46qmncPLkybzOn5RAgaLrOjhuuA44YyzheCrywQcf4JprrkFTUxM+9rGPOWr+N910E/bu3YvOzk4cPXrUMXP/3e9+hxkzZmDJkiXWa0569s855xw0NzejrKwM1dXVWLNmDR544IG8zr/omso4henTp6O7u9s67u7utlxFU5E333wTN910E2677TY0NjbijTfecMT829vbIcsyzj77bHi9Xixbtgzbt2+HIAjWe6bq3AHgpZdeQnd3N1atWoXBwUGEQiF89NFHjpn/vn37oCiKpQQZY5g1a1Zen31aCRQon/nMZ/D3v/8dx44dg6ZpeOGFF/DFL37R7mHlhM7OTtxwww34yU9+gsbGRgDOmf+JEydw++23Q5ZlyLKMXbt2Ye3atY6YOwBs2rQJL7zwAv7whz/gpptuwsUXX4xf/epXjpn/0NAQmpubEY1GEQgE8Oyzz+I//uM/8jp/WgkUKG63G3fffTduvPFGRKNRXHDBBVi+fLndw8oJjzzyCKLRKO6++27rtbVr1zpi/hdccAHa2tpw+eWXQxAELFu2DI2Njaiurp7yc0+Fk579iy66CO+88w4uv/xy6LqOq666Cuecc05e50/tJQmCIBwMuYMIgiAcDCkBgiAIB0NKgCAIwsGQEiAIgnAwpAQIgiAcDCkBwpFcc8016Ovrm/R7Wltb8aUvfWnc65111llJz7Vr1y7cddddAIxqktu3b8eJEydwzjnnjHtOgsgGlCdAOJLXXnstK++ZLEuXLsXSpUtzfh2CSAWtBAjHceuttwIAvvGNb+CNN97AunXrcNlll2HlypXYunXrqPd0dnbilVdewdq1a7F69WpceOGFuO+++zK+7n333YcrrrgCq1atwiuvvAIAaGlpwbXXXpuVeRHERKCVAOE4NmzYgJaWFvz617/Gv/7rv6KpqQnLli1DV1cXrrzySpx55pkJ76mqqkJTUxPuvvtuzJkzB11dXbjooovw9a9/PaPrnnHGGbjzzjtx+PBhrFu3Dtu2bcvRDAkifUgJEI6lvb0d0WgUy5YtAwDU19dj2bJl+Mtf/pLgk+c4Dg899BB2796NF154Ae3t7WCMIRwOZ3S9r371qwCA+fPno6GhAW+99Vb2JkMQE4TcQYRj4ThuVIlexhhUVU14LRQK4YorrsC7776LT3ziE2hqaoIoisi04grPD//cdF2HKJINRtgPKQHCkQiCgFmzZkEURezYsQMA0NXVhT/+8Y84//zzrfeoqopjx44hEAhg/fr1uPjii9Ha2gpZlqHrekbXfPbZZwEA7777Ljo6OvCZz3wmu5MiiAlApgjhSJYvX45/+7d/wy9+8Qvcdddd+NnPfgZN03DDDTfgvPPOs96zbt063H///bjwwguxYsUKuFwuzJ8/H/PmzcOxY8fgcrnSvubx48dx+eWXg+M4/PSnP0VlZWWOZkcQ6UNVRAmCIBwMrQQIIgv86le/wvPPP5/0b9/61rewcuXKPI+IINKDVgIEQRAOhjaGCYIgHAwpAYIgCAdDSoAgCMLBkBIgCIJwMKQECIIgHMz/D7RrkTIqFAnbAAAAAElFTkSuQmCC\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"sns.residplot(data=pandas_tips, x=\\\"total_bill\\\", y=\\\"tip\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 41,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<seaborn.axisgrid.JointGrid at 0x7fc3bd50aa90>\"\n      ]\n     },\n     \"execution_count\": 41,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAaEAAAGkCAYAAACYZZpxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACHGElEQVR4nOzddXxcx7nw8d8cWl6tmM3sGGKmxGFmTpqUb/EWb2+5TUopv7ntLd5yUwo3zA6ZYrZjZpQsptXigfePtWUrK9uSLGkF8/180lqj3XOelVb7nJkz84xwHMdBkiRJkjJAyXQAkiRJ0tAlk5AkSZKUMTIJSZIkSRkjk5AkSZKUMTIJSZIkSRkjk5AkSZKUMVqmA+iKmpoWsrO9NDREMh3KKfX3+EDG2FNkjD1jqMSYnx/ooWgGlwHXE9I0NdMhnFZ/jw9kjD1FxtgzZIxD24DqCUn9n5OIYh3diVWzH7vpKE60GWwTVB3hyULJLkHNH4laOBqhuTIdriRJGSaTkHTWHNvE3LeW5PY3sap2o2SXoGQVIrzZKNmloKhgmzixMHbtAczdK7Gbq1FLJqCPPx9t+DSEIt+KkjQUyb98qdscK0li6+skNz6D8Gajlp2DPuUyhGac+bmJGFbVThLr/k182YPoU6/EmHRhp54rSdLgIZOQ1C3mgQ3Elv0N4QthzLgBJVTUpecLw41WPhWtfCp2YyXm7pW0bnoe15xb0cYu6KWoJUnqb2QSkrrEiYWJLf0LVvUe9MmXoOaPPOtjKqFijFk3YjUcIbHhWRLbXiNxw6eArLMPWJKkfm3AzY6TMses3EHro9/AAVznvb9HEtDJ1OxSjIXvQc0bScVfvkZ843M4jt2j55AkqX+RPSHpjBzHIbH5JRLrnsaYdgVqweheO5cQCtrIGYTGTKJu6RNYBzfhvvhjKN5Qr51TkqTMkT0h6bQc2yL21p9JbnkV18L39GoCOpkWyMGYezsikEfksW9iVmzrk/NKktS3ZE9IOiXHTBB95Zc4sTCu+Xch9L5d1yMUBX3cQpTsUmKv/Ap92pUYU69ECNGncUiS1HtkT0jqkJOIEnnuJ2BZGLNu7PMEdDI1fwTGwrtJ7niL2JLf4JiJjMUiSVLPkklISuMkokSe/THC5UOffhVCyXzJEsUTxDX/TpxYmMhT92NHGjMdkiRJPUAmIakdJxkn8txPEN4Q+jmX9quhL6Hq6NOvRsktJ/LEt7DqDmY6JEmSzpJMQlIbx0oSffF/Uj2gcy7pVwnoOCEE+tgFaOPPI/LMjzAPbsp0SJIknQWZhCQAHNsm+upvwHHQp17eLxPQybSSiRgzryf2+u9IbF2S6XAkSeommYQkHMchvvzvOOFa9OlXI8TAeFuoOWUY8+8kseFZYisfkgtbJWkAGhifNlKvSmx6AfPwOxgzb0SoA2vWvuLLxrXgPViHNxN75Vdy5pwkDTAyCQ1xyf1rSW56HmP2zRmdhn02hOHBmHsbTiJC5JkfYEebMx2SJEmdNLAue6UeZdUeIPbGH3HNvgXFE+zUc0zbobLR5GiTSXPMJmk5GKog6FEoDGqUhDRUpe/vJwlVQ59+DeaOt4g88W08V30eNVTS53FIktQ1MgkNUXakieiL/4Mx+ZIzbsOQtBzWHYjx9r4Iu6qSBNwKuX4Vn6GgKqnE1Bp3qG+1aI7ZjC3QmTncw4zhbrxG33W2hRDoE85H+LKJPHk/nos/jlY2uc/OL0lS18kkNAQ5ZpLoSz9DLZ2EWjLhlI+LJmxe2dbKku2tFAQ0JhQZnD/Wi+c0iSWatDlQm2Tl3igPr2lm1gg3V0z2UxDsu7eaVj4F4c0ituTXGDNvxJh8cZ+dW5KkrpFJaAiqfeF3CEVHG7uww+/bjsOyXRGeWB9meK7GbbOC5Pg6VzXBoytMKHYxodhFa9xm4+EY9z9Xy7QyNzecGyC7k8c5W2ruMMT8u0iseQKr7gDuhe8dcJMuJGkokBMThpjE1iVED2xGn95xIdDasMlPXqzj1e0RbjjXzxXn+DudgN7N51JYMNrLBxZm4QD3PV3DMxtbSFrOWb6KzlF82bgWvgen8SiRp76H3drQJ+eVJKnzZBIaQqyq3cRXP0b2+bcjtPSZcGv2R/neM7WUhjRunx2gsIeG0Ny6wsIxHt4zN8jWyjjfeqqG3dV9M5VaaC70mTeg5JQTefxezCNb++S8kiR1jhyfGCLsSCPRl3+BMfUKtGAuNEbavmfZDo+uaWbdwRg3zui55PNuWR6V66cH2FmV4NevNTBnpJsbZwQxtN6dTZcq9TMfJbuY2Ku/RpuwGNesG/tFYVZJGupkT2gIcGyT6Mu/QC2bjFo4pt33ogmbn79az57aJHfNDfZaAjrZuEKDe+YHOdJo8p1najhYl+z1cwKoeSNwLXov1pGtRJ78HnZzdZ+cV5KkU5NJaAiIr/gXQNpEhMaIxQ9fqMOlCW6c7set993bwWMoXDXFx8zhbh54uY4XNoexnd6/VyTcfow5t6Dmj6T18fuIb35ZlvuRpAySw3GDXHL3Csz963AtvKfdRISqZpMHXq5jcomL2SPcGSlYKoRgYrGLkpDGC5tb2VIR50OLQoS8vTtMJoRAGzULpWAkyU0vYu5eifv8D6LmlPbqeSVJSid7QoOYVXeQ2LK/Ycy8HmG429oP1Mb58Qt1zBzuZs5IT8YrZmd5VG6dGSDXp/Ltp2vZcCjWJ+dV/LkY8+9ELRhN5On7iS3/B068tU/OLUlSiuwJDVJOLEz0xZ+jT7oIJVjQ1r6/NsH/Lmng/HEeJhT1n1pxiiKYP9rDsByNf7zdxI5qkxumenH18hChEAJtxLmoxeNI7lhK60NfRj/3WoxJFyJUvVfPLUmS7AkNSo5tEX3ll6gFo9BKJ7W1761J8PNX67lqela/SkAnK83WuXtukLqwybefrmVPX03ldvkwpl6OMecWzL2raP3Xl0hsex3H6ptJE5I0VMkkNAjF334IJxlDm7C4rW1PdYL/XVLPpZN8TCjxZDC6M3PpCtfPzGb+aA+/fK2Bh1Y3ETf7aIFrsADX7JvRp11JcsdbtP7zC8Q3PCeH6SSpl8gkNMgkdizF3Lsa49xrEUrq17u7OsEvXqvn8sk+RuUbGY6w88Yem8pd0Why35M1bDkS77NzqzlluObcgjHzRqyKLYT/+QVib/0Fq/5wn8UgSUOBvCc0iFhVu4mv/CeuebcjjFRvZ1dVgl+9Xs8Vk/2MyBt49zi8hsJVU/zsq0nw1xWNDMvVuXVWkIJA37x1laxCjOnX4MRaMA9uIvrMDxHBfIyJF2JnXdgnMUjSYCaT0CBht9QSfennGFOvRAnkA7DjaJzfvN7AlVP8DM8deAnoZCPzDcpzdNYejHH/s7XMGenh6ql+sjx9U/VAuAPo4xaijZmPXb2H5I63OLDyn6jDpqOPPw+1ePyA2RZdkvoTmYQGASfeSvS5n6CNmoNaOBqArRVxfvdmA1dN9TMsZ2AnoOM0VTB3pIcpJS7e3hfjm/+uYd4oD5dN9pHr75u3slAU1KKxqEVjCbhs6retJfbWn8FMoI1ZgD5uAWq2XG8kSZ0lk9AA55gJoi/+DJFThjZyJgAbDsb4y/JGrpnmpyx7cCSgk3ldChdO8DJ7hJt1B2N8++laxhUaXDDex8QSA6WP1j2pHj/6qNnoo2ZjN1djHdmaGq7zZKGNW4g+Zj6KN6tPYpGkgUomoQHMsW2iS34Lioo+KXV/YvnuCI+ubeaGcwMUZQ3uX6/frXD+OC/zRnnYVhnnoTVNRBMOc0a4mT3Sw/Bcvc8W4irBApRgAdqE87FrD2Id2UJi7b9RC8agj1+ENuLcDiuXS9JQN7g/pQYxx3GIvflHnNZ6jFk3AYIXNod5ZVsrt8wMkusfOhWiDU0wrdzNtHI3tS0mO6oS/PaNRkzbYUqpiyllbsYXGX2y1bgQCmr+CNT8ETiTE1hHd5HY/DKxt/6CNuJc9HGLUIsntM1clKShTiahAchxHOLLHsSu2Y8x91ZsofLQqma2VMS5fXaAoHvoJKB3ywto5AU0Fo6BulaLfTUJXtwc5g9LTYqDGhOKDcYXGYwpMHq9YKvQDLSyyWhlk3FiYcyKbcSWPQiJCNro+antJXKHZbxskiRlkkxCA4zj2MSXPoh1dCfGnFuJ2hr/91o9kaTDbbMCfVoJu7/L9ank+jzMGgGm5VDRZHKoPsm/17dwtMmkIKgxvtBgfJGLsYUGPlfv/eyE+6T7Ry01WEe2E33xf0DV0UbPQR81ByWnXCYkaciRSWgAcWyT2Bt/xK4/jDHnVo6EFX79Wi3lOTqXT/agKPID7FQ0VTAsR2+bKWjaDkebTA43mDx/rKeU61eZUGQwqcTFuMLe6ykpgXyUCflo4xfhNB3FqtxB9IUHQChow89FG56qZSdr10lDgUxCA4STiBJ95Zc4yRj6rJt4c6/JE+tbWDzOy6QSecO7qzRFUJatt80etGyHqmaTg/Umz2wMU9FkUhrSmFziYnKJixF5OmoPJ3khBCJUjBIqRpuwGKelpm3Bsd1cg1o4BrV0ElrxeJS8EQhV/rlKg498Vw8AdksN0Rf+BxHIJzLxWh58M0xti8lts4bWBITepCqCkpBOSSiVlJKWw5FGk4N1SVbvj9IctRlbaDCl1MXEEhf5PfxzF0Igjs2wY+wCnEQUu+4gVs1ekjvewgnXo+SUohaMQs0fiZI7DCVULHtL0oAnk1A/Zx7cSOz136OMmsvS+HiefqaeqeVuLpkY7PErc+kEXRWMyNUZcazSRGvc5kBdkg2HYjy1IYymwrThPkblKowtcPX4xYAwPKjF41GLxwPgmHHsxqPYTUdJ7l6BvfZJnEgDwp+Lkl2Kkl2Gml2CEipGCRXJ6eDSgCGTUD/lJOPE336Y5N417Cq9jsfecaGpUW6ZFSCvj6oDSCf4XAqTSlxMKnHhOA51rRbVrYJlu6I8tKoZXRWMyjcYXaAzPDc1zNeTU8KF5kLNG46aN7ytzbFMnNZ67JZanHAtyapd2OE6nNZ6hDuIklVEbekIEu4ClOwS1FAJwu3vsZgkqSfIT7N+yDy4kchbf2WbOoGX4jfRsg0WjHYzpqDvFl9KpyaEIM+vMaLIzaRCFcdxaIjYVDaa7K5OsHx3lJoWE59LoSSkUZylUZilke/XyA+oZHtVNPXsf49C1U4M4Z3EcWycSBNOuA7HbMY8sB5nyyvYLTUI1Uj1lLJLUbPLUEJFqWE9X7asfSdlhExC/YhVu5+apU/wdqXBssSVaLrOzBFuxhX2XSkaqeuEEOT4VHJ8KpNJDYPZjkNTxKau1aKu1WLT4TjNkQhNUZuWmI3PpZDtVcn2KW3PTX2d+neWR+n2cKsQCsKXDb5s/CEvZmMESK0vI9aS6i211GEe2YKzc2kqWSWjqaG9QD4ikIfiz0XxhhCeLITbn+pBGZ7UMJ8qL4akniOTUIY5jk319k2sX72BdbUeDltTGFPo5pJxLkpCmvxjH6AUIcj2pZLKmHd9z7YdwnGblngqIYVjNvtqkryTiBOO2TRHbSIJm6AnlaDy/Cr5AZW8QKo3ledXyfIqXb4wEUKAJ4jqCUL+yHbfc8xEqvcUacSJNmPXH8Gq3IGTiOAkYpCM4SRjYMbBtkFRQdVAKCCU1LEVBRCpNkUBRUMoGmg6QjNAdyMM77GkFkB4gijeLOJmKXbSnWqT7/chRyahPmbbDpV1Yfbs3M/O3YfYUW0RdXRGZBUycUI2V+W70HtgqEbqvxRFEPSoBE+zDYVlO7QcS0hNMZvasMW+2iRNUZvGiE0saZPlSfWacv0qOb5UzyroUQm6FfxuBa8h8Fud25FWaAYimA/B/DM+1nFssK3Uf46dSko44Dip3pbjpNodCywLxzbBSoKZxDFjOIkYTmMlTvUenHgr1asjJMONYCUQvhyUQB5KsBAlq/DYcGMhSjA/lcikQUcmoV5g2w5NrQnqm2PUNsWorm2i4mgdFbURqsIOXmIUGq0UBTWuOCebgvwQiqwlJp1EVQQhr0rI23GiSlrHklTMpjlq0dBqcaTBJJKwiSQdogmbaMIhblajiNRsP00VaErq2KoCiqCt5yEAIU60KYK2x+pq6j9DE7h1gUcXeAwFjyHw6gpeQ8fnEnhdCr5j7V3ppYVCXhobI6neWLQp1SNrbcSs2o2zby1OpAGntRHh8qWGCoMFiGABaiAvNYToy0nd09LljMCBaEAloeMVAbpaGSAcTZJMWhy/JnRIjY87zvF/g2M72I6DZaeu5izbwbJsTBvMSDPxRAIzaZMwLZJJi1jSIhY3U3/scYvWhE0k4dCaFCQsUIVDQE3gI4JfRMlz2YwpVMkZ58UI5CGMYT36s+kK23b6/U1oGePpGRrk+lVyzzDZze3RCUcSWFaqSoR1rKNiO6m/AfvYH4XT9j/HvwcWDo4Nlk3q78BOlT9KWA6tUUi2OCQsk4TpEDchZjrEkw4OqWTm1gQe46TEpQtcxxKZoQlcaqr4bKDWxkya6IpAVUJoWjZKNqg5AlUAAhRAmBGIhxHxME5zK6JuJ068FScRwW82ogsLXH4Utw9cPjB8CMOTGgLU3QjdANVI/b9iINTjQ4oqQlFTQ4xCtA0xpv6d+jrZFINI5FgwSupemRw67BHCcZzO9dclSZIkqYf170tNSZIkaVCTSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKmAFVOw7ANC0aGiKZDuOUsrO9/To+kDH2FBljzxgqMebnB7r0+Lq6MLY9OKqqne61D7iekKaduvx9f9Df4wMZY0+RMfYMGePQ1qtJKBwOc80113D48GEAHnroIa655hquvfZavvKVr5BIJHrz9JIkSVI/12tJaOPGjdx5553s378fgH379vGHP/yBf/3rXzz11FPYts0//vGP3jq9JEmSNAD0WhJ6+OGHuffeeykoKADAMAzuvfde/H4/QgjGjRtHRUVFb51ekiRJGgB6fT+hiy66iL/+9a+UlZW1tdXX13PLLbfw/e9/n7lz557V8W3bpra2lvr6BizLOttwBz2Px015eTm6rmc6FEmSpL6fHVdVVcWHP/xhbr755m4noJqalrZ/19dXI4QgKysfVdUyvtuhpimYpp3RGE7FcRxaW5s5dOgQgUB+psM5rfz8QLvfc38kY+wZQyVGOTuuY306O27Pnj3ccccd3HjjjXzyk5/skWMmEjFCoVw0Tc94AurvhBD4fEGi0VimQ5EkSQL6sCcUDof50Ic+xGc/+1luuOGGHjyygxADbqZ5xshELUlSf9Jnn96PPvootbW1/OlPf+L666/n+uuv52c/+1lfnV6SJEnqh3q9J7RkyRIA3v/+9/P+97+/t08nSZIkDSByHEuSJEnKmAFXO66/iEQi3H//tzh8+BCKIhg/fiL//d9f5a233uKPf/w9ppnE7XbzyU9+lnPOmcr993+LaDTKd77zA/bu3cOnP/0xfvGL/2PEiJGZfimSJEkZI5NQN7355mtEIhH+/Od/YFkWP/nJ9zly5DC/+c0v+fnPf0NWVoi9e/fwuc99gn/969987nNf5IMffA/PP/8M//jHX/n0pz8vE5AkSUOeTELdNHXqdP7v/37Ff/7nR5g9ey633nonq1e/TW1tLZ/5zCfaHieEwuHDhxg7dhzf+tb9fOQj7+fyy6/issuuzGD0kiRJ/YNMQt1UUlLKv/71BOvXr2Xt2tV87nOf4J57PsCsWbP51re+3/a4qqqj5OWlFoYePHiArKwsdu3aQTKZlFULJEka8uTEhG564olHuf/+bzFnzjw+8YlPM2fOfJqbm1m1aiUHDuwHYMWKpbzvfXcSj8eprKzgZz/7KQ888EuGDRvBr3/988y+AEmSpH5A9oS66Yorrmb9+rXcffetuFxuCguLuOWWOxg9ejT33vtVHMdBVVV++MP/h2EY3Hff17jzznsYNWoMn//8l3jf++5g1qy5LFiwKNMvRZIkKWN6vYBpbzi5htPRowcoKhqewWja68+1446rrj5EQUF5psM4raFST6y3yRh7hqwdd3b6Te04SZIkSTqZTEKSJElSxsgkJEmSJGWMTEKSJElSxsgkJEmSJGWMTEKSJElSxsh1Qr2gsrKCO++8iREjRrVr/+EP/x+FhUU9fq5PfeqjPPro0z16XEmSpL4gk1AvycvL589//kemw5AkSerXhmwSWrHlKI+/sYe65ji5QRc3LR7N/Mk920t5t/r6On784/upqqpCURQ++tFPMnv2XP7wh99SVXWUQ4cO0tjYwHvf+0HWrl3N1q2bGTMmVfjUsix++tPUNhD19fWMGTOG++77XqeOL0mS1F8NySS0YstR/vL8dhLHKhvUNcf5y/PbAXosEdXW1vD+99/V9vVll13Bjh3buPrq61i0aDG1tbV84hMfaust7d27h9/85o+8885GPvOZj/OXv/yL8vJh3H33rezevYvW1jCapvPb3/4J27b59Kc/xooVyxg/fmLbOX72s590eHyv19cjr0mSJKmnDckk9Pgbe9oS0HEJ0+bxN/b0WBLqaDju6qsv5sCBA/z+978FwDRNjhw5DMDs2XPRNI2iomJyc/MYOXJU23FaWpqZMWMWwWAWjz32MAcP7ufw4UNEo9F2x1+zZlWHxx87dnyPvCZJkqSeNiSTUF1zvEvtPcWybH7+818TDGYBUFtbS3Z2Nm+++TqaduJXoapq2nOXLn2D3//+t9x66x1cddV1NDY28u6yf6c6viRJUn81JKdo5wZdXWrvKTNnzuLxxx8BYN++vbz3vbcTj8c69dw1a1Zx0UWXcPXV1+H3+1m/fi22bfXY8SVJkjJhSPaEblo8ut09IQBDU7hp8ehePe/nPvdFfvSj7/G+992B4zh84xvf7vT9mmuvvZFvfetrvPLKi2iazpQpU6moqGDmzJ45viRJUiYM2a0cemt2nNzKoWcMlfL+vU3G2DPkVg5n53SvfUj2hCA1C663p2RLkiR1l2PbgMh0GL1uSN4TkiRJ6vfsZKYj6BMyCUmSJPVDjimTkCRJkpQplkxCkiRJUqaYiUxH0CdkEpIkSeqPLJmEJEmSpAxxkjIJSd1UWVnBokWz+NGP2le53rVrB4sWzeK55069988tt1xLZWVFb4coSVJ/J2fHSWcjKyuLt99egWWdKK3z6qsvEwrJWm6SJJ3ZUOkJDdnFqoldy0msfgwnXIfw52LMvhlj7IIeO77H42Xs2HFs3LieGTNmAbBq1UpmzZoDwGOPPcQLLzxHLBZF13Xuu+97DBs2ou35lmXxq1/9jPXr12JZNldddQ233/6eHotPkqR+Ts6OG7wSu5YTf+vPOOE6AJxwHfG3/kxi1/IePc+FF17Ka6+9CsC2bVsYM2Ysuq7T2trKm2++wS9+8VsefPBhFiw4j8cee7jdc59++gkA/vjHv/O73/2Ft956g40b1/dofJIk9V/OEElCQ7InlFj9WPr0RzNBYvVjPdobWrTofH73u19j2zavvvoyF110Ka+++hI+n4/77vsur7zyEocOHeTtt5en7fmzZs0qdu3aydq1awCIRiPs2bObadPO7bH4JEnqx4bI7LghmYSO94A6295dXq+XMWPGsmnTBtatW83HPvafvPrqS1RXV/HRj36Am2++jXnzFpCTk8uuXTvaPdeybD7xiU+zePFFADQ2NuLxeHo0PkmS+i/HNDMdQp/o1eG4cDjMNddcw+HDqd1Dly9fzrXXXstll13GAw880JunPi3hz+1S+9m46KJL+M1vfsH48ZPaNq5zu92UlZVz++3vYeLESbz55msd7g301FP/xjRNIpEIn/jEh9iy5Z0ej0+S+gudJIYTR4jBX7SzMxx7aPSEei0Jbdy4kTvvvJP9+/cDEIvF+OpXv8qvfvUrnnvuOTZv3swbb7zRW6c/LWP2zaAZ7Rs1I9XewxYuPJ9du3Zw8cWXtrXpuo5t29x996188IN3M3z4CCoq2k/LvuGGWygvL+cDH7iLD3/4Hq666tq2CQ6SNJg4VhJX/U7CT/+Qpke+ibLnDXSrNdNhZd4Q6Qn12nDcww8/zL333ssXv/hFADZt2sTw4cMpL0/tY3PttdfywgsvsHjx4t4K4ZSO3/fprdlxxcUlPPpoai2Q1+vl1VeXtX3va1+7D4Cbb769w+cefx7AZz/73z0SjyT1Z/GKPdQ++n0gtXdO4yt/JHQpiFGL07awH1JsmYTOyve+136hZnV1Nfn5+W1fFxQUUFVV1VunPyNj7IIenYQgSVLXCQHRQ1s5noCOC695jqyRc0ngykxg/YCcHdfDbNtuN9brOE63x35P3qWvulpB0/rXTPP+Fk9HurrLYybIGHtGf4+x+UD6FvSqN4g/y49quDMQUcf6+ufo0hXy+vnvrif0WRIqKiqipqam7euamhoKCgq6dayTt9m1bbtfbac9ELb3BobEdsq9TcbYM7KGTULxBrEjzcdaBMEFt1LflAT6R28gE9t7x6Pxfv+766x+sb33tGnT2LdvHwcOHKCsrIxnnnmGm2/uiYkAAsexEaL/9z76gyE9xi71S0Z+Obm3fhPz6A7seASjZDyJQNm7R+iGHsc682MGgT5LQi6Xix/84Ad86lOfIh6Ps3jxYq644oqzPq5huGlsrCUQyEZVNTm98zQcx6G1tRmPp/8McUgSQMyVB8PzEAJiDjIBAQyRC8ZeT0JLlixp+/f8+fN56qmnevT42dn5hMNN1NdXpa21yQRFUbDt/jscp2kGY8aMpLExlulQJCnNEPnc7RTH6b+fIz1pwFdMEEIQCIQIBEKZDgUYGGPwuq4DMglJUr/Wjy9me5K8kSJJktQvDY1uoUxCkiRJ/dEQGZuUSUiSJEnKGJmEJEmS+qOh0RGSSUiSJEnKHJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJKk/GiIVyGQSkiRJkjJGJiFJkiQpY2QSkiRJ6o+GyI4AMglJkiRJGSOTkCRJvUZRBC47jMsOoyhD48q+xwyRjToH/FYOkiT1T5odw9n7NvXLHgUcggtuQR85l6TiyXRoA8JQ2S16aLxKSZL6nFK9g8ZX/oQdbcGOhml89c+Iqu2ZDmvgkElIkiSpezRNIbL1rbT2yOY30DT5sdMpQ2T0Ur4bJEnqcbbtoOWWpLVruaXY9hApD322hJrpCPqETEKSJPU423Zwj1uA4va1tSkuL+6Ji2QS6ixlaCQhOTFBkqReEfcWkXP7t7DrDgKg5g4j7s4bMvvknDVlaPQRZBKSJKlXOA7EXXlQkgdAEmQC6gIxRHpCQyPVSpIkDTTynpAkSZKUMapMQpIkSVKmKEPjbolMQpIkSf2QkElIkiRJyhg5MUGSJEnKFDFEpmgPjVcpSZI00MiekCRJkpQxsoCpJEmSlDEyCUmSJEkZI7f3liRJkjJFDJG9HGQSkiRJ6o9kT0iSJEnKGGdoVHuVSUiSJKkfcoZIyXGZhCRJkvoj2850BH0iI0noySef5Oqrr+bqq6/mhz/8YSZCkCRJ6udkEuoV0WiU733vezz44IM8+eSTrFmzhuXLl/d1GJIkSf2bZWU6gj7R50nIsixs2yYajWKaJqZp4nK5+joMSZKkfs2xh0YS6vNa4X6/n8985jNceeWVeDweZs+ezYwZM/o6DEmSpP7NTmY6gj4hHKdv5wFu376dL3/5y/zhD38gEAjwhS98galTp/LhD3+4L8OQJEnq1xqWP0b2gpszHUav6/Oe0NKlS5k/fz65ubkA3HTTTfzjH//oUhKqqWnprfDOWn5+oF/HBzLGniJj7BlDJcb8/ECXHh9tacXs5z+Xzjrda+/ze0ITJkxg+fLlRCIRHMdhyZIlTJkypa/DkCRJ6tccM5HpEPpEn/eEFi1axNatW7npppvQdZ0pU6bwkY98pK/DkCRJ6t+S8UxH0Ccyson5Rz7yEZl4JEmSTsMxY5kOoU/IigmSJEn9UUL2hCRJGkBUTLRwJXZTNYovhJVVCnTtZrjUfwyVnpBMQpI0CAjhIPa/Te2Lv2tr8597OdZFd2cwKumsJKOZjqBPyOE4SRoEjEQDja/+pV1beP2LJGsPZSgi6Ww5iaHRE5JJSJIGg0Sswym9ViycgWCkHuHYONbgr5ogk5AkDQKONxs9r7Rdm9Bd6KGiDEUknS2he3CGwEWETEKSNAgkhIfsqz6Nq3wiAHpuCXk3fxnjXYlJGkB015BIQnJigiQNElF3Ib4rP08wGcbW3MSER86NG8BSPaHBUbbndGQSkqRBJIlOUs/OdBhSTzA8ONHmTEfR6+RwnCRJUn9kuGUSkiRJkjJD6B7sSGOmw+h1MglJkiT1Q8Lw4sgkJEmSJGWCcPlwWhsyHUavk0lIkiSpH3IMD3ZrY6bD6HUyCUmSJPVDlu7HiciekCRJkpQBCXSwLZzE4C5kKpOQJElSP5SwQHizsMN1mQ6lV8kkJEmS1A8lLAfhycKRSUiSJEnqa4mkg+IJyp6QJEmS1PfipgOeAHZTdaZD6VUyCUmS1GsUxcFltWAwNDZo60lx004Nx7XUZDqUXiULmEpSBgghMBJ1OC21CHcA01uAhZrpsHqUy2wktv55mt95DTWQS+ii95HMG4ftyGvfzkiYoHizSB7cmOlQepVMQpKUAa6mPdQ+/qNjWzgLshbeijbpEkxhZDq0HqEqDrH1zxNe/yIAZkMltY//iPy7vkPMJ/c46oyE6SC8IeyW2kyH0qvkJYkk9TGXE6Hh+d8cS0AADk3LHkYNV2Y0rp6kJcO0vvNa+0bHxqo/kpmABqCE5YDhBdvEibdmOpxeI5OQJPW1RASzg5vN9iCqE+YoOqo/J61dGJ4MRDMwJUwbIQTCl409iO8LySQkDQqKIlDVnn07q6qCEKJHjwnguPzoHWy7rQTyevxcmZIQbkIXvx848fPTi0YickdkKqQBJ2Gl/l/xhrCbB28SkveEpAFNCHBFKontWoXVUodn/ALs3FFndW8llrTYfrCJNTuqGFceYvrYfILunvtTSeAmdPknqH/qp1gt9QhVJ3Tx+zB9RT12jv4gmTeO/Lu+g9VwBGF4EbnDiWtyw/HOSloOAMKThd1cleFoeo9MQtKA5opWUfvQt9vqa0W2vEnOtZ9BlJyL43TjgAKeXr6f51ccAGD5pkqGFR7my/fMxK31XE8r5isl+/ZvQ2s9wuUj4crF7k68/ZjtKMT8ZeAvy3QoA5J17A0hfCHsxsGbhORwnDSgmVV70go8tix7GN2Jd+t4ja1JXlx5oF3bwaoWKusj3Y7xVOKKn3hgGDFj8CUg6eyZx3tC3mzspqMZjqb3yCQkDWy2ndbk2DbQvU915xTPdGSWkPqYeeytLXzZOM2Dt2qCTELSgKYVjUFo7e//BOfdSFK4u3W8kFdn8bnth48KcjwU53q7HaMkdcfx6x7hCeIkWnGSg7PqhLwnJA1ocW8Rebd/k8jGl7Fa6vBNuwS7cEL37geRmst1ywWjGV2axbJ3Kpg0IodF00rw6IOrmoHU/1nHe0JCIHy52E1VqHnDMxtUL5BJSBrQHAdivjL08z6IwbEFfmfJa6gsOqeI86cW4Thgy6E4KQPsk66kFH8OdmOlTEKS1F9ZPZB8TuY4DpbVo4eUpC45+f0nfNlYDRXomQun18h7QpIkSf2QdVJPSPhzsesPZzCa3iOTkCRJUj9kntQTUgJ52I2Ds+6eTEKSJEn9UNI6sfxA+HJwwnU4ZiKDEfWOjCShJUuWcNNNN3HllVfy3e9+NxMhSFKvM+wIRvMBXOHDaN1cPCsNXXHzpOE4VUvNkGuoyGBEvaPPJyYcOnSIe++9l0ceeYTc3Fze97738cYbb7B48eK+DkWSeo07UUvjsz8nWXMQAM/4eXgXvYeEKmunSZ1zchICULIKsGr3o+aPyExAvaTPe0Ivv/wyV111FUVFRei6zgMPPMC0adP6OgxJ6jSXFcao3Y5etQl3oo4zFdZWFIi9s6QtAQFEd6zEqdrZy5FKg0nCdNpK9wAowQKsmn0ZjKh39HlP6MCBA+i6zsc+9jEqKyu54IIL+OxnP9ulY+Tn9++ryf4eHwydGB3HwUnGUYzuVVBINhzl6L9/TLI6VU9OGB6K33Mv7pKxp4zRirVSsT99S2azai/50y7oVhxnY6j8rntbX8foc6tgGIQCqYnZibKRtKx/eUD8rLqiz5OQZVmsWbOGBx98EK/Xy8c//nGeeOIJbrrppk4fo6ampRcjPDv5+YF+HR8MnRjdiTpiW98gvn8j7jGzcI1fSExP32jtdLSDm9sSEICTiNKw9HHcF32M7LysDmNUFHCPOpdkXfvZTFrh6D7/uQ+V33Vv64kYu5o8fIZgb0UYrdAFgKNkkairoLqiFqG7ziqWvna6197nw3F5eXnMnz+fnJwc3G43l1xyCZs2berrMKRBzmVHaHzmZ7SseopE9QGalz9G88v/16Xq2ooiMBvTqxcnaw6gOMlTPs+2wT35QozCkW1t3knnIQrGde1FSENa0KNS1XxinrZQNZSsIqyavRmMquf1eU/owgsv5Etf+hLNzc34fD7eeustLr744r4OQxrknJYqkrUH27XFD28n0FrT6f1tbNvBVTqed1//+iafj6mcfngvZuQSuO6LiHANKBq2L5/EoFzvLvWWkEdwuKH9lGwluwSrYjtaycQMRdXz+rwnNG3aND784Q9z1113cdVVV1FSUsLNN9/c12FIg5xQTlFw9FTtp2DljCR08fsRhhuEgm/KhejjF3WqnlxCeIgHhhH3lZCUCUjqoly/xoHa9j1uJaccq2JrhiLqHRmpHXfLLbdwyy23ZOLU0iDlsiMQrgbNheXNx/YX4B45ndi+DW2P8U5chOXN69JxTeFCjLmA3OHnIhyLpBEi7sg13lLvyw+oHG5IYloOmpqakqnklpFY/xSOGUdoA+u+0KnIAqbSgOeJV1P/9AOY9ZUA+M+9DGPGDfgv/ACe8dtJVOzAVT4JUTS+W0NijgNxLevYFyfarXgEV7wGNBdJPUtW25Z6lEsV5PpV9tUmGVuY2jNLaK7UfaHKnWjlUzIcYc+QSUga0FRhE179VFsCAgivf4nckdOJ502CYXPRR84j2cNVtt2JWqoe/hOxg1tQ3H5CF38AUTodC7nvkNRzynN0Nh+JtSUhACV3OOahTYMmCXV6XKGpqYlwONybsUhSl2lWjNj+9NmVZt2RtkWlPb3NgyZMwkv/SezgFgDsWJj6Z/8XPVx5hmdKUteMzNPZcKj9jE61YBTmwfR1aAPVGZPQ3r17ufnmm5k/fz5z587l7rvvpqJi8NUvkgYmU3XjHp5+RajllHR7d9Uz0ZJhorvXprVbjTIJST2rJKTRErOobjbb2kRWIU4iit2UvnxgIDpjEvrKV77CrbfeysaNG1m/fj2XX345X/va1/oiNkk6I8tR8M25Hi2rsK3NN+1inNyRp3nW2bFVF1p2cVq78Gb12jmloUkRgjEFBqv3R9vahBCohaNJ7l+fwch6zhmTUDQa5Y477kDXdQzD4J577qG2trYvYpOkTom5Csi69V5yb7+XvPfcjzH3DpKKt9fOlxAesi/9ECgnbql6x8+DUOfWH0lSV4wvMnh7bxTnpK69WjAGc396b3wgOuPEhFGjRrFu3TpmzJgBwM6dOykrk39sUv+SULyQ1Xu9n3eLZ4+m7EM/IlJ9GMXlww6WkFA8fXZ+aegoDWlEkw6H6k2G5aZmdyp5w7E3PIMdaUIZ4D3wMyahiooK7rnnHsaPH4+maWzdupX8/HyuvfZaAJ5++uleD1KS+hvHERgFw2kSXatFJ0ldJYRgYrHBij0RhuWmEo5QtdQEhQPrMSZekNkAz9IZk9AXvvCFvohDkiRJOoWJxS4eWdPCLbOCqEpq2qdaOBZzz9uDNwnt2bOH0aNH4/P5Ovz+5MmTey0oSRpKFAWiSRtdVTKz1bHU7+X4VIIehe2VCSaXpiolKAWjSLzzIk4sjHD7Mxxh950yCf3oRz/it7/9LbfeeivFxcXtbopFo1FWrFjRJwFKZ6aqAgHtNsCSOiYE6HYUYSWw9ABmhkvwtCYs9lY0U9MYxePSGFUapDDL3a4ygyQBjCs0eHtftC0JCc1AzRuBeWA9+vjzMhxd950yCf3whz+ksbGR0aNH8+CDD+I4DkIIkskkd999d1/GKJ2CwMHVcpDW9S9ix8L4zr0MO388piyW2SEhHIy6nTS+/EfM5hq8E+bjm3dzl/YYips2h2rCHK2LUF4UoTjkxtC6l8iEAodqwvzr5Z1U1rUCsGh6CTecN4ocn3GGZ0tDzbhCg7+tbMK0HbRjQ3JK0ViSu1cOziT0X//1XyxbtgwhBPPnz29rV1WVyy+/vE+Ck07P1XqEmoe+DXZqz5HY/k3kXvc5KJLbpXfE1XqUmsd+CI4NQGTbMhzbxH3BRzCdM5fbsRyHx9/cy8urTmwRcd15o7h+4QjOsON3h6JJm9fXHWlLQABLN1Qwa0KhTEJSmoBbIeRV2Xk0waSSVG9ILRxNcvPLOPFWhKvjWyf93Skv4f7whz+wfft2brjhBrZt29b23+bNm/npT3/alzFKHRBCkDjwTlsCOq5l9VPowjzFs4Y2q7GyLQEdF92xCi3RuR0za5vj7RIQwNNL99IQ7vxGeSezbdh5oCGtvao+0lZySJJONiJX553DsbavheZCzRuOuX9dBqM6O2ccR/j+97/fF3FI3aGk//qEokG3rsv7H8NuxdWwG6N+Jy7rRKLQSOIKH8Ko2YorXoMQp7+BIgS0xEwSHWxEp/pDJIXG/upW9lWFiSbtDo6QkkhaaW2OA4loBM2J42o5gFG7FXeiDnEsi5zqNQD4XAqTR6UPBZYV+DF7uCK3EOAyGzFqt+Nq2ofuRNO+3xhJsrOimcqGWI+cX3MSuMKHO/17ks5sRK7O1sp3bXRXNI7knrczFNHZk1W0ByjHcXANm4LQHsMxT7wpA3NvIN6JoaX+LlFfSfNTPyFZvR8ALVRA9vVfxDQCmBuepmH1MwAIVSf3pv8mkTPulLXiqprifOePb3Pp1BAXlU7GObLl2HcEWRd/gO89sovtx3okRblevnj3TEKe9PtqhUGNvJCb2sYTV6IleR7yRCOJ1c/RuuHl1FF1F3k3fwXHk0XTsz9Lew0xV2pPI8eGmy4Yw54jTRytiwBwwYwy3t58lIraVi6cVozSQ10iV2sldY99DzuaKkLsHj0D3wUfJKH6EQL2VbXy/b+uJmmmkvC1i0ZyzYIR6Er3zq85CcxNz9Lw9pOpBlUj74YvkMib0Gs1/YaCgqBKXatFa9zG50pdhA70ITk5I3QAi3lLyLv9XgKzr8E35QLybvs6Zt7YTId11oSAyK41bR/eAGZjNbHtb6FFamk5loAAHCtJwwu/QbdaOzgS2MDfX9xOa8zk36tqec1zOeGFn8R/2cfJf893Wd1S2JaAAI7WRVi2sRKlgw9fnwFfvCKXOeNC+Dw68ydm85nzXHjjNbRueOVETMk4zW/+Hevghg5fw8nHzvbqfOW9s7j7igncdfl46ppivLnhCH9/YTu1Le2veLtLExYtKx5pS0AAsT3roHZf6t+mza8e39iWgACeXrqPqoZo2rE6S22ppOV4AgKwTBpe/A2GKSvxnw1VERQFVfbXndhxdaAPyckkNMDFfKUoM29FX/RB4qExWIOgc6soCrEju9LaE4e3gZV+/8VqqUckO/7ATJg2e440tX39xKpavvZ0CyuiIzGzylmzoz7tOVv213V4TyaJi9IgvNf7Bt9bUM979Jfxb3oY1Rfi3XOqHTNBoqKD13BoK8q7hqWicZO/vbCdf7y4g3f2nKjL2NTNe03vplgxEpW709rNpiqEgGjcate7O67xLM5vR5vS2qxwIyQj3T6mlJIf0DhU/65tv4vGkdy9MkMRnR2ZhAYB23awrFPfyxhoLMvGN2ZGWrtn/Hww0guTGgUjsV2BDo/l1lVmTyxMay/N82GaNjMmFKR9b8GUEuxT/DiTuWPJOe9WcvLzyJ11BaEbvoStpd9rUvy5uEed2+FrsOz2GS7oNcgPta87p6mCvFDP1KKzNC/esXPS2rW8chwH/B6N0aXp9ccKsrtfBFYJ5PPue5N6XjmOO9jtY0opOT6Fisb2k4/UwjFY1Xuwo80Ziqr7ZBKS+iXPyKn4z70stZgG8E5ciDZiJgl3PjlX/yfCSH1A67klhC7/CElcHR/Icbjx/NFMGJ6derym8N4rJ1Cam/qAnTgsm8vnDuf4CNn500uYMTav3eLsk9mOQsxXhjliIf7JC4nr2Zj+ErIv/w+EnopBzx9G8Lw7oGhi+msYOTPt2G5N4XN3ntuWiPwenf+6aybZ3p5Z72XZAs+5V+Eqn5RqUDWyzrsDOzQCAE0IPnbjFErzU/cTPC6NT982jfxg96eJJ70F5F77KYQr9XPWsosJXflxEqQnbKlrsr0qR5vaJyGhGagFo0nuWZWhqLpPOKf6a+vHamo6N6U2E/LzA/06Phg4MdbXNqHF6sBxsNw5mMeGGhVFoMcbIBnB8WSTEGfuMZgONLUmMDSFoEdPSwSNkSS245DtMzo9t/Dkn6MQYCQaIBk7FlPqw1YVNlq0Dmj/GjoSt2yaWhP43Dp+l3bKRNhZLieCYbaQwCBhZKM5CZRoPULVSbhysJ32rzRpOTS2JvC4VIIeHfssZ8gpikBPNEDi9L+ngfJ+PNsY8/M77q2fyso3VhGLpQ+JNkctHlrdwk9ua9/Dt6r3YO5dg++m+84iyt5xutc+8G8gSIOW5ShYrvy0dtt2iOsh0EOdOo5GElfDPpQdy1F82bjGziHuLW43Syt0lr0Ox4G4ns27i1VYjoLlTn8NHXGpCgXBVPJSFIED2N0sxeSOVNDwzM8wG6sQupvsSz+EVTaDpKfoWMDpz9FVQX4w1Zs72wR0/BhxLQRa6KyPJZ3gdymE4zaW7bQVMwVQ8kbibHoRq+EIanZpBiPsGjkcJw16ytGt1D56P63vvE7LyieofehbuKJVmQ6rQ5btsKuymQce3shvn9rK4fpIl5d9GU6Mxhd/g9mYeo1OMkb9c7/CiFT3QsRSX1MUgddQaI62v3EpFAW17ByS217PTGDdJJOQNKjpxGle/mi7NicRw6xMn7nWH+yubOH7f1nDhl01rHinkvt+t5LKhvSZa6cj4i0kaw6+q9XBapZJaLAIuAWN0fTF0+qwaSR3LcdJ9szMyr4gk5A0qAnAsdLLGDl2+h9wximCp97a267JdmDt9uoO1y2dimN4UQPplRgUX+hsI5T6Cb9LoTGSPoVT8Wah5pSR3Lk0A1F1j0xC0qCWFC6C825s36hq6MW9t6hXF0l04l2u/yYc0LX0J2lq1w6UVH1kX/FxhHZidltg3o1Y/uKuBST1Wz6XQkOk4wspdeQsEhuf658XWh2QExOkQc1xwCmdRu51n6V1w0so/hx80y8n7ivp8T17VEy02p00L3sEJxknMOd6nLJpmB3UrOs4VofrzxvNpt11bW2aqjBjfEGXJgo4DiRyxpJ79/dRI3VYug/LVyC3+BhEAm6FupaOCxWrOWWYbj/m7hXo4xb1cWRdJ5OQNOiZiguKpuO5ejoOgpjl9MqmcVrjAWof/1Hb1/Uv/Jqcq/8TSmd1+hjDC3zc9+G5LNtUgcelMf+cYgqzXF2ut+Y4EDdyyS8d0e+nP0tdF/K0L93zbtrYhcRXP442ei5C7d8XHzIJSUOGaUFvbVmqKIL4nrVp7eF1L+Avn0HS7tzItyIEw/J8jLx0PI7jYNuOLPgppcnxK6zce+otW9TccqxALol3XsI1/eo+jKzr5D0hacjQieMKH8LVegSdU19FdofjgOJNL0mj+rJwurG1hmXZPbJWRxqccrwqjVGL2Gm2HtEmXEBi47PYrel7VvUnMglJPS5pO1Q0RNlXFSbSwR483aU7MVzNBzDqd6XtzXMmXrMe9q0kvuU16h76FtE3/oBh9lydLcdxMEZMbytTA4Ci4p91LabdM9sxSNJxiiIoDGrsrz31xZTiz0EbNp3Y0r/2YWRdJ4fjpB4VTVr84+VdLNtUAUAo4OJr75tNrv/stqs2rDCRpX8juiNVKVj1h8i96ctE3UWnfZ4Q4G45RN3zv8JsqEQLFZJz4d00vPUw7pHbYNjcs4rrZHFvEXm330eycgckE+il44n7Bs7KdWlgKQlpbD8aZ0LxKeomAtqY+cSX/pXE7pUYY+b1YXSdJ3tCUo86WN3aloAAGlviPLJkF51d9q8oAkXt4G1Zt68tAUFqW4CWlU+gidP3tAyzmdp//xizoRIAs7GKhrceJjD9YuKHtqB2dK7TEAISlkNrwiJhtj+340DMXYA18jzs8RcT85V1ayhOkjpjeI7O5iOnX5QqVA1j2pXEl/8NO1x32sdmiuwJST3qaF365nLbDzQQMy3c2uk/8JujJsveqWDT7jrmTylmxrh8/C4VIcBsOJr2+MSRHfisOKZy6i0HnHAddqT9sJsdCyNUDVfZRKxT7dnQ0bGAXRUt/P7JzdQ2RVk0rZRbLhxNwJX+ZyQnE0i9rTRbo2aTRX2rRY7v1LspK6FitJGziL7yK7zXfQWh9K+PfdkTknpUeUF6tdyZEwpw66ffcjxu2vz0X+t4ZMludhxs4M/PbuXPz2/DIvWBruWWpz3HPXoGpnr6CtrC7QflXecWCmogB7V0UpeSRW1znB/+dTU1jVEcB97acIRHX9vd5dpuktQTVEUwpsBg1b4z74CrjZoDQhBf+VAfRNY1MglJPaos38ctF45pKzMzpiyL6xaNBMdBCHCZzRgNu3FFK1E5McW0qiHKoar2Wz+v215NfXOMysYoB5Ry3Fd+rm1vHqN4NN4ZV2M5p88ASXcuoQvubtcWWnQbYthM4lr6Rm6nU1HbyrsnrC3bWEFrfGCsTJcGn0klBkt3Rc647YcQAmPaVZj71pLYuayPouuc/tUvkwY8QxVcOXcYC6YUE09a5AbdHK9E4wofpu7xH2JHWwBBYN4NaFOuwBSuU9dGq9qJO1rHy5XZHGqCT7zn54ScJhxfHrFTbWR3EttRUEcvIq9oDHZLHUogB9NfTLwb1QN8nvQ/l9wsD/oZhhklqbeUhjQcB7YfTTDxNBMUAIThwZh1A/EV/0ANFqAW9V7pqq7IaBL64Q9/SENDAz/4wQ8yGcbgJ1JDSUfrIvg8Orr77FZQt8RMDteEURRBWZ4fn6v9cJcAClwJlOgB7MMtqIFs8GbRvOwR7GgLgXMvRcsqQM8uwj66GUVzMzJUxuSROWzZVw/A4skhrp+skWtWIdwO141o5puvaazb18qic0rbhtGEACNeh11/GKHqiJwy4mr79TqW0LH85di+MqqaYtTuD5MdcFGU7UFTBIqw8UQqSdYdQagqSt4Ionpu2usuy/MzZUwu7xwrq6MI+PD1k3Gpos/vAbnMJpyGwzi2hZpdRtyVI+9DDUFCCKYPc/HC5vAZkxCktl03pl1J9KX/xXv911CyCs/4nN6WsSS0YsUKnnjiCS644IJMhTAkCHFse4C/rmlb/Dh3chHvv3ICrm5cwde3Jvj2H1fR3JoAIC/LzdfeP5ssz4nEZlhhwq/8hviBzakGRSP34nvwjZuFkV9G7NBW9Oxial/4LXY0NQSnZuXzxeu+yNL9JVjRMHNjS4m9soTGY8fMXnwH188ezTt7alk8rRjTTE0ocLVWUPvwd3ASqXFxPbeMrOv+K7XBXLufg+DtbdX87snNbW03XTCGq+YNw9u4h+qn/7dtAoMayCH3pi8RdbX/A3XrCh+/fgqHa8O0Rk2GFQcJebQ+//B3J+po+PePMRtTkzUUt4/cW79BzHP66erS4DSpxMXbe2Psr00wIu/MSyHUgtE4Y+cTee7HeG/4JoonfZF1X8rIOEJjYyMPPPAAH/vYxzJx+iElbjr83783t1t9//aWoxypi3T5WKoqeG3dkbYEBFDbFGPN9hrEySWjGw6eSEAAtknzupcwm2txFY/CMU3iR/e2JSAAq6kG59BGFk4u5NIRcWKbl7Q7d+PKpxiTrzF9hA/l0FpcZiOq4hBe83RbAgJI1h3GqtieFntjJMmfn93aru3x13fT0NxK67Zl7WbQWS31xHev7nCI0K0rjCkOMm1UDuOGZff5nAQhIHnonbYEBGDHWolseJEuzjaXBglNEcwa4eaJ9Z1fwK0Nn45aPJ7ocz9u9/eTCRnpCX3zm9/kc5/7HJWVld16flf3au9r/Sm+ytpWahrT32TRuNnlOC3LZu+RprT2g1XN5OWNb/u6pSJ9mnaysQoUDYSKFsxt9yHadvy6QxTk+glXpa8Cd+IRHDPB+MRu6p5+BKNwJAU3/zetaZu3gd10NO211R+oJ2mmT8dujZkEG9J3WU3WHKQo15/W/m6Z+F3XrDqS1pas2U9+0EDR04dk+tP78VRkjOm8PhdaB1t7dGTBBBe/fqWaQy0wpfzUSxZO5sy+lObVz2G++nOK7vxGh++dvtDnSeiRRx6huLiY+fPn8/jjj3frGP25KnB+fqBfxacAsycWsnrbiQ9aISA/y93lOIWAC2aUsm1/fbv2OROL2h3LFUwfFvKOngGqhuLLJV6xi8D0S4hX7G73GNfImdTUtOAOliBUHcc6kYz0gmEUmocJr3sEgETVPmK1lXgnn0/Tm/9sdxy9dGLaa/O5VPKzPdQ0nEjIHpdGTpYH94ipxA5ta/d4z9jZZ/z5ZOp3bQyfButfbtfmnbSY+qYkjpNo197f3o8dGSoxdjWJRVrjxGKd3yF1wWgPv19SwzevzUPt5CaIztjFJDc8y6F//gDP5Z/utTVEp3vtfd6Bf+6551i2bBnXX389P//5z1myZAn3339/X4cxZCjAey4bz4zxBUCqjM6X3zubgqzO7XFzMseBKaNyuXHxaDRVwaWr3HXZeMaWth9TNv0l5F776WMFPQWeUdPxjp2FMWYecX8R2Zd8EC1URODcy0DREJpB1vl34hSkZuvE3XkU3vYVtGA+AK7S8QSmXkT4XcnGjrWij5mPf/plIBSE4SZ08fuxckamxe41NL58z0xGl6amZZfk+/jq+2YTcLtwjZqRikU9FsuCm6B4cpd/Pn3FzhtD6IL3IHQXKBqBWVejjph5xmm60uA2tkDHpQmWbE8fiTgVIRT0aVdBMkrs1d/idGHxdk8RTgbfuY8//jirVq3q8uy4/nzV1F+v6hxSs9oMTaG8OOusYhQCWmIWQoDfreF0UO1Z0wR6sgWRjIDqIqH5MR0VjTjKoXU0LX8cNbuYrFlXoPjziLoLOPn9n58foLm6CpGMgctHZNk/ad38+okYDDe5d32XuJGHKhy0RCMIlaSR1e7+V8y02XmoiTXbjzKyJItzx+WjAB5DQz9px1JDmGixOlA04u5crE4s/cnk71oIMJJN4DiYriysUxRJ7a/vx5MNlRi72hNa+caqLvWEAOpbLR5a3cw3rskn13/6BeIncyyTxOrHULJLcV/wIYTo2f7J6V67XCc0RAgg6O6ZX7fjgP/YtOx3JyAFG6NpP+F1zxNNxvHPuBIzbwyWk3q8Vr2Dmud/C4DZVEP1/k3kXvYhGFmQdp648ILhBQc8c25ADeQQ2fIWWv4wAvNvJO7KAwcsR2Adnw13UjxCgVfWHOLx1/cAsHRjJS+vOsTX3z+rXQICSDgaieOz4fpw7akiQE80gG1hubMxnc59cDgOJxbb9v3Fq9RP5fhUzh3m5q8rGvnsJTntJwydhlA1jFk3klj1KPFlf8O18J5OP/dsZTQJ3XTTTdx0002ZDEHqYUbzQWoe+g7HN4+L7d9E3k1fwsqbiKoqRLavSHtO6463CY6aTdQ59RBhXAshpl5H1jmXYasGMVs54/50zRGTp97a267taF0rFbURxhRn/ka4bkcxty2hduUTYFl4Jy3CO/cW4nrXKjlI0slmj3Dzz1XNLN0V4bxxvk4/T2gGxuybib/9EKx6FPfcW3sxyhPkpE6pxyiKILrrbd6dHcJrn0VTU3vuqIH0RaCpjd/O/FZ0HEgIN+YZdil1WS24mvZCMr3MDoDdT+6diNo9NC97BCwTcIhsfYvk7hWnrh4hSZ2gKoIrJvt4fF0LtS2n3n21I0J34ZpzC+beVcQ3PNtLEbYnk5DUozraz16oqQV0tu3gGT8XYZwoOip0F/5zLiDunN1+Q8d5YlU0PHwftQ99G/W1/+WyGe1n6uUEXJTmdf7qsLcoiiB+aEtae2TbMjSnZ3d9lYaevIDGrBFufv9WI1YXd+gVhhfXnFtJbn6ZxLbXeyfAk8h7QlKPsW0H99jZtKx5DuzUFZiaXQoL7iFqQjxhkXSXkXv3d2luimJYYXxeD/FAOSoWWrQGx0ri+POB1HCZEBBPJNGw8LoUSMZJaj4sR0EngWrFMTVvatKDsGhZ8ShWS6qsjnV0N5dN3UbJVbNZvrWWEUVBLp0zDJ/R+Ru2vcW2HfTc9A3vjKJRWEI741BjpigCdDOMo6gkFY8sFdSPzRzu5kBdmOc2hbl2eteGn4UngDHnVuIr/4XiyUIbcW4vRSmTkNQJ0aTFoepWmlrjlOT5Kc52o5zipmXcX0b+nfcR27mS1pxxrG7MpbBBYfOavby+7jB3XDqeHQfqeXtLFeUFfj5w7TBG2nESa56gYf3LgINRNArP9Z8hZvp5c/1hnlx6ALehcvf5RYw9/DQevw/3rOtYt7eFRNJkWLZKeUkutuoifmy9j1E0Ct+E+TjJGBcEjhCcUsDOWsgPutpK/mSanpWPnl9OsuYQAIo3iG/iAmL99IPdsJpJbH6FunUvongChC58L1bhZCwyn9SldEIILp/s4+9vNzGpxMXogq6NNij+HFyzbiT6xu/xej+PWjC6d+LM5BTt7urP0zkH23TTuGnz639vZtPu2ra2T906jZlj89pNhX43TVN4cfUhsoNuDlWFefLNPcycUEA8YbF574kdHnVN4UcfPAfl0S+0e35w7vUs1+fxu6faLyL92nWFFCz9MVr5Ofw9eQnLt9ajqQr33jqMUSNLaX3zL8QPbCZr9lU0vPGvtueJ4glYi/6DrOz296R0EgjHwlS9p309HTmb37WmKcRe/BlaVi6aPxvHscEyiR/dh+fK/yKZ7Jkpej31fhQC2Pxs6h7WSfLu+Bbx4PCzOvZg+5s53TG6ojtTtDuyuzrBmzsj3HtdPl6j63dgrKO7SG55JVVnLpDXrRj61WJVqf8RAlyJWvSjGzCq3sFlNrZ9r6Iu0i4BAfzxma2E46e/4elEm5hp7CHL5bDmWLWGMWWhdgkIIGnaVNSm17GLxhK8uOpwWvs7R23UYB7moc3MLk915E3L5t9rm7DjEQJzbyQ48wqaVrW/qepUbifPrmn7WhE2rrrttPz7ezT+62uI7S9jWJ1f5He2LMvBKBlHy7qXaHjzIRrfeoTG5U+gF43BsvpHT+1khh2hdeMrae1m1Z4MRCN1xZgCgxF5Bn9d3tStBc1q0VjUETOJvvA/OMmzT4rvJpOQhKu1grp/fpP6p/6Huid/SsOj38WVSCWeaAfJpjWaJGmephckLJJrH0dd9kc0M0JOMDX1Op608HWwVsnrS5+a7Q5mU5STvmtqnl/BjkcQLi8tiRNDgtXNJpZiEDXycY2Z0644apuTCjUazYeofeyHJKsPYLXU0/j637D2vt1nayMcx8EYPQstp6StTQsV4Bo7r8s9sr5gCx01mH4VrHjldPKB4PyxHg41JFm2u+uFiwG0UbMRvhCxt/7c45U5ZBIa4lQFIhtexImfeHNazbWYBzchBBTletM2bZszqZBABxu8tR0zUkvrptcACNa+w3nTS9A1hTfXH+Ha89uPK8+YUEBZfhDP+HltbVqoEP/wCVw7UcE46dy5QRcTvA048QjWjFt5ct2JYqqXzSjAJSy0/ctIHNmOd/yc9kEpGmp26gNfCEhU7iRtKvm659GdvqsoHDNyCd34VXJv+Sq5N3+F0M3fIObq3nBHb0uik3Xene22StdySlAKRmUwKqmzNFVw5Tk+HlvbQk0Xp21D6v6Sfs6lWNV7SO54s2dj69GjSQOOgk2yNr0StVlXgTZekOs3+MYH5vCX57ZxuCbMwqklXL9oZNvVS2r9j2g/DdSxEboLvXQC2uG1jDXcfOWu6VQ1W+QEdO77wAyONibwew2GFwZQDRVt8Qfxzbwax0ygBgtoXPIHsqv2862r7qIiEUB3exhZ4CLUehDljvvYFQmiKDvwe3SuWTiCKWPzaXz+pyQqdgEQWnQLiuGhdftK9Oxisi68h7inEJzUeiPVlV4hW/GFcPp4Zlpc9UPOuL474VmIh0aSf9d3sOqPIDQXIm8YcS2U6bCkTso/Nm37D0sb+eIVuaecXHQqQjMwpl9D/O2H0EomogTTq5x0h0xCQ5zpKHjPuZDE0X3t2l2jziVhpT6Ny3K9fOnumSQtG6+u4jgOmpNAqd5O6/oXEJ4A/hlXEg8Mx0EQceWza/aXeGVzEwUBlUvdCsXrfsvI0jE0NCg8Wj2a5dsaABheFOCzt59LlsfA9JcD4FFieEZNx1U8mqBSR3DHo5hN1fgX3Iw55Tocx2F4CD5581TW7ahhydrDPPHmXj5zxS0Ma/o1VmsjjUsfxTX8HArf+30Sqje1FfhJyUUrHofqD2GFG1MNQiFr0W3EnJ75k1AU0S+H1c6G4whi3hLwlpz5wVK/NHO4m701Lby6rZVLJ515q5J3U4L5aKPmEHvjD3iu+XKPDF/LJDTEOQ6ow2cQmFdPePWzoOlkLboNO7f9sJkmUjO6jo8Hi6NbqHv6Z23fj+5cTeE99+MoGq/uTPD7Z/YDsA1YuV3l+x/8GIknvsyBGZ9h+UnbShw42sLLqw9y6+LR6MlmRONBrNYGrOY6EnWHie5/h5zz76Bp1TMYZZOJHzt/fTjJd/+0CtM68UH/i5eq+PYF16Cv+hsAiSM7sVBJkr5PSszIJeeWb2BV78GJR9CLxhD3l5x1L0i3o4i6PcQPbEHPK0MtnZS2y6skZYoQgksm+XhodTMzhrnJ9Xc9BWgjZxFf/jeSu5ZjjFt41jHJJCSRUP0o024gd/KFgEJSD552lbUuTMKrnmrX5p98Hq3rnqdFzeLRNe23xY4nLfYdjTDel8We2vSZXxt31XLLecOILPs7wSmLad65itjhHRiFI8m96B4aVz5J3rWfbjcVuLEl3i4BAURiJmER4PhHvn/WlSS04CkTS8zIpT7kZ822SvZuaeT8aR7GlOVgqN27ulMEWDtep+mth9ra9PxhZN34JTCTgCChZ8kFnlJGHS9y+s9VzfznRTldfr5QFPTJF5NY9TD6yFmpLUXOgkxCEpAqPh1Xs058cQqqsNHsOHpeOYmqY0N4ioaWXUDjW48gplyF3tE+07obxZfNuGEhWFPT7lvnjs/HFasDt5f61/5Gsi61c2j88HbMhkr855yPpbnaLYrMDrjQVNEuEXndGtkBN3puKb6pF6GOmkPiNB/4LdEE3/rjKloiqTI5q7ZW88GrxnP+9LLTDqWZtsOh2lZe21hBbtDD6NIgPkNFTzRSt7z9Ro3ukjEk1j9H87oXEYpG1sKbUcacR1Lp+n5OktRTZg1389cVTWyvjDOhuOtJRM0uxQqVkNjyCq7pV59VLHJ2nNRp7lgViTf/QN2/volQVXIuugemX4+68B7MxmpAoOx6i9vntb+68nt0mqNJ3HNvobx+NedPPvH9USVBLp5ZhpOMogZy2xLQcVZrM4rbj+MOtWsP+TQ+ddv0ttlzHpfGZ28/l5yxUwncfC/2uEtIqKdfHHjoaFNbAjruoSV7iMdPvRZCCHh7ezXf+eMqHnx+O//z0Hr+99GNxEwb4Vg41omZR4rbh+IJ0Lz6WbBMnGSMxtf/jqjZddq4JKm3aapgwRgPj6xt7vaUa23sAhKbnscxE2d+8OmOc1bPloYMl9VCy9J/4hk2GSOnCKHq2Ik49aULSSgeppSUYhSNAqEyL6Qy7EPncLARKutjjBuWjd8l2HAkjO2ayNXnCK4cF8L2F9Jguth9uJF4bjYloVKyz7sN5/jNTsdGMTzouWWYhzbg9gYhq4S4KxfHEcwY7uUn/zGVxpYo2QE3oaBB0hKAQWfGvBw7faqq7YDhRDEaDmA1VaMG81B9WSSrDyA0g+bQeP7+wvZ2z9l5sJGK2lb8xdn4zlnctvmeq2QssQOb084R27MWvWRatxelChxckUqs2oMo3iwUlwez4SjC5UXkDj+xzxCQbKjCOLoLJxFFzS0j4S/BduS1pwTjCw1W74vxzpE4U8u63jNXAnmooWKSO5diTLqo23HIJCR1TmsdnvIJNLz+97YmV/FoChedSyJSRe3jP25bTa14/BRdcA+rjgbJysnhSE2Yx1/bTTia6nW4DZX73jOB/3tuHweqUuuThIAvXldK6cqnyD7vVhpX/Bs7miqTIjSDnAvuouaJnxJadAvusfNJGlkk1z2Ns/oZskjt65acfQ3quTdgoaGqAsfhtMNqwwt8+Dw6rdETvaGPXl4OO16ndsUTbW2+SQuxWpuIHdhM8ryPE++gpE4iaWM6Cp45N6LlFBPZuhS9YCR2pIl4ZfuqAlre6Yf7zsTVuIeaR+5HaAbZ591G7ZIHOX7jS88fRta1/0Vcy8JlNnH08R+TrD1WeUIo5N38JeI547t9bmnwEEIwa4SbFzaHu5WEIDWpKbnlVfSJF3Z7ppy8JJI6RdWNtFI48co9uJ0Inr2vtyvnYUfDmFV7mVpgkzRtqhuibQkIIJaw2FtPWwKCVMflwaUNKFMux2yqaUtAAI6ZIHpoK0bRSJpWP4fTcATDihA7uLVdPC2rn0VEGzlY28rfX9nNU8v3U9McP+UfR1bQx7fuGsc1swuYNiaHz1xVyuxhGs0rn2z3uNaty/AMnwyA+8AyFkxuP/HC49IozvOmfiZaFky6guDN96JMvx7PtMtQ3CemwqrBPIwR53Z7CEQnSdNb/wLHxjdhLs3rX+LkmRfJmoM4dal1X07t/hMJCMCxaXrjbxj0fOkVaWAaV2hwtMmisrF724coecNxElHs2gPdjkH2hKROcTRXx6VwLAu7uTqt2W5twpVtY5o2TeH0D71IIr03UdcSx/blYdVsTfue1dKA6gmS0INsihbywhP7Cbhu4NILVLLX/gm7pRYUlV0NCt/569ttz3tm6T6++9H55AXSb75ajkpeQQF3LXah2Aks1QORBnDSh8kcOxWvdXATd1x7A/k5o3hzQwUjioPcdtFYQl69bQTQth0SpBa92t4icu/4NlbDYYRQETllxE4aLusqYSfbtqpQPUGscEN6rIlUcrfj6bXwrJZ6hJ0E5exmNEmDg6oIJhUbLN8T5eaZ6XuBnYkQArV0EsmdS1HzR3QrBtkTkjol6QrhnTCvfaOi0eJ4cMaen/Z4o3g0BxoVDF1hTFko7fvD8728ewPRS6bmoOxYgj5qRtrjPSOnEq/ez8GRN/CDf25hw65a3tpcy7f+XU3T9LsB0Ceex6Nvtq/+kDBtNu2pO2VvyHYEMS1ExCggrgbAl4cWar8RnuoPYcdPlPPJ8Thcv2gkP/v8BXzypnMoyHKd8haU40DMyCFZOJVEweR292u6I6l68U+/FIDI3g343v07EUpqDydAzS0H2r9u37RLSGpdX6QoDV7jigzWHoh1+/lqyQTMvatTleC7QSYh6YwURWCj4VtwO/5plyB0N0bBSHJu+SoHIj7qguMInP8eFG8QNZBDaPGdtGSNIa98OGPKsqioDXPnZePJCbrJ8ht84JIyCo8u5au3j6eswI/HpXHtgjIuya/BHLWQh7Z7sRZ8EDWQg+IN4ll0F1YsgmvmdTyxqn0Vbst22FKn4520iMCcG0h0cL8m2YX9g+KKl+zrP49nzEyEZuAeMZXcyz9C67YVaFn55F73WcysYTi2QyjgQvTxmh/HAW3cIoLzb8JqqUMLFRGceSXC8KDnlZJ385dIBFIVDZL+Uopu+wp6TinC5SUw9zpcky/G7n9FuqUMKgioxJMO1c1drykHoPhzwXBjdbOiutxPqIcNpr1RhIDq5jgHD1WTo0bIzc3Cm5OPjwi24iKJgRCCmGmjCPA6YXRhERU+Eo6GrgiEACNShV13gKg7H+HPw+d1oyQjoAgijoeopeJ3a2hmC1srk3zvr+sIeHUunZqNrgre2hHmG3efg9+j852/bWbnocZ2cb73ivFcOqOIpK2y5WAjP/3HurbvKYrgux+ZT1GoazdeNWGhmhFszYOJhmGFcUT73UQz+btWlOM7nGpYihstGcZRdRLvqg6Rnx+gqboWYSdJ6v5+mYAG09/MmY7RFT21n1BnPP9OmFkjPCwa6+3W85M7liJcXtwL3tPh90/32uU9IemUGlqTNBw+wMS9j2FV7EAYblj4Huxx80g6qfFjx3FwHaswkCTA8dub+rFRIHdrBTUPfxsnkeruC83Af9vXifiHpR4gwKOAZdlYwkfcagSgJZLk8ZWpe02KgDguXI7GzReO4ft/XdMWo6EpTB6VR9JOLWQdX5bFF++eyXPL9+P36lw1fwTF2e4uVykwHRVTDbTd848rx4aw+sklm22fFJMNlnrqIbaEcIHqSk0hlKQOFGZp7K1JdDsJqcXjSKx9Emf+XV2eJSeTkHRK4XCE0sMvYVXsAMBJxIi+9gc8hcOgE7tpKoogtmNZWwKC1Ey3yKZXMRZ/uMPdQ0N+F163RiR2Ymhg/pSStjsbo4sD3Puhuby54QhBr8H8KcUUZLnbZptpimBCWRaT7pyOQGBZtiyTI0lnkO9XWbWv+/eFRCAfALvuAGreiC49VyahIcbBYc/hJiprW8gOuMkLuNImCByXYySx9q9La3caj3Y6CcWba9LazZY6/NEqTD0vLUHYtsNdl01gw64aKmtbmTYuH1UIzGPrahQhGJ7v4/1XTMBxHGzb6XC6s2059JtuiyT1c9k+lZpw9+4JwbFZcsXjSO5Z3eUkJCcmDCFCwLrd9Xzuf17nhw+u5cu/WsbyLUdxTvFh7fb50PLL09qTRudmV5mmjXfsnLR2z+gZtKx9Dr2D7bTzs9wsWXuIlkiCkSVB3t5ciWlZhLztp49alt3hgk9FOLjitbia9+OymulupXlFpKoS6BXrMOp3YNjd25FSkgYCnyGIJR0Sp9kx+UzU4vGYe1Z2eQ2c7AkNIY2RJP/373fa9T7+/OxWpo7JBQSqIvC7VOKmTVVjhCCt5C++m/onftxWH0qMnMVr+1QuzmnGpavEFX/6m05AU9TEshxyCyaTfeF7aNmwBBwb/znno/lDNLz+D3znXIDhU0lwYtKAS1P4zK3T2Hqgge3767nnigmMK8uiM7lExULsX0ntK38Cy0TxBsm94QvEjt9/6iQhQK/ZTu0TP25bM+QePQPfBR8iofq6dKyBzrQdwnETr0vDOFWXWRrwhBD4XQotMatb2zsAiGAhCIFdvQe1cEynnyeT0BASjibTpitfe95oHn9jL0s3HMHQVT5+01Sam8Kco+1DW/sw1Qs/TNN5/40ercXWPCT8RaxZXsls/XUCTgvu0TNJ5ozBOvZWipk2z688wPMr9gOCS+cO44JpCyk7Lw+ScRwzTuOyx9AC2US3vEns8HZCl3+MuL+sLTkG3BrXLBrFwsmFxBIWjpP6IznTFZYWqaL2xd+1fW1Hmml49peEbruXuOj8DVfDitDw8u/bLVqN7VmH79zLYQiVvKluivOrxzdxsKqF4lwvH795GuW5HnmPbZBy64JIwiG3m89PDclNJLlzmUxCUsey/S6CPoPm1lSvpiDbQzxh8ub6VOXqWMKiuiHKWFcdytI/IkbM4O/rbdbuPnTsCFGEqOe/7piGuuIftDTXoPpCaIYPy1+OELBxTx1PLz2+S6vD88v3kx1w4R1Zhnj1fqzWhlQP5bIPUvfKX7DDDdT/+ydk3/Gd1GLRYyKxJOt31/Gvl3di2TY3XziG6aNzO94m4hi7pTatzWyqglgLeDqfhISdwGqpT2t3Yh1UjBikokmbH/19LfXNqZvVlXURfvDX1Xz/4wsIuOTHxmCkq4L4WQzHAahlk4kvexDX/DsRmtGp58h7QkOIz6Xy3++ZSX7IA8CsCYWs39l+4oBl2wTN1AdwvHgqa3c3tvu+40C8NYx1bMJBvGIXIp76cFZUhbc2tN+KAWDHgQYakjqB6RcRWngz/snnYdYfxY6k1l1YrY3Q2n4R6ta9dTzwr/VU1rVS3RDl14+/w47DTad9fYo/fYMuNZCL4+pahQBTD+AZO6t9o1BQQ8VdOs5A1tASa0tAx0ViJrVN3Z9BJfVvqhBpG0V2leLNQgkWYB7Y0PnnnNUZpQHFcaAs18NPPnM+P/rkQq5dNILhRSd6H7lZbjRVwTo28UBrrSE/25N2nIB1IhlooUIcNXVl7NgOozso0VOY48WxHRqXPUbjsseI7FqDnYyBY2MUDEfLKQHXiXstqqrw1sb0ZPbSqoOop9n11PQWErrwHhCpt7UwPORc9QmSXbyPYzoq/kV34BkzMxWPP5u86z9Pwld0hmcOHl63jvaun7UQEPB0vb6YNECInplPqpZOJrn9jU4/XvarhxjHgZygGyueWlZ60wVj2HmwkY9ekE1x63ZcTVvQxlxPvHQy9vZX+Y/Fn+OHT1W0bfc9b0I2BQ2bAFADORiFI3ACqR6CbTucP72Ut9YfofFY0dK8kJvJo3Lxe1SyL7wbIQSKN4uWDa+Sc8FdxI7sRPVno1iJtvs+juOQm5We/PI6aDuZJXTUsReQX3YOdqwF4c8lbuR0q2J1TM/FffHH8Z/Xgq0aJFTfkLoXEvLpvP/qSfz+qS1tbbdfMo4cvyx8Olg5jnPK5RpdoRaPI7nlVezWBhRf9hkfL5PQEFeY5eJ/PjiOlse/gx1pJgEkdiwn77avYwM+xcOPPjaHisYkPrfGsJDA0yBgzATUrAJMTx4JJXW/RQjI82t87s5zqaqPgBAEvTq7DzUyNuSl4bW/AeAZMY3A1MXUPvebtjjCm14j785vEfMUY9sOC6eVsHRjBVec4yfLZbOrDhbNHoZ1huECCxXLUwieY9stnEXmMB0NUz/2RzSEEhAADsybWMCo0ixqG6PkBNwUZrt75ENK6p+SFhja2f+ChaqjFo0luXslrmlXnvHxMgkNcY4DWtNh7EgzkNqozjNmBslomB2xfP748kFGlgS5afEY8oNG6p5Q4XQgNROuoSmOx50k4NKor6tFj9bxg8eqicbbL3ybVj6Z4LF/C5eL5jXPt4/DTJA8sg0xthjHgVGFPn5wlU7zK7/CjrUyrmAE2e6PEyW9BpymKdi2c1YbxUnpFCEoynJTlNW9Dc+kgSWWtPEaPXOHRi0ej7l3lUxCUicd6y14x81B9fhpXPoYODalecN438K7+NHTFWzeU8f9H5uP/9jMqJrmOD/+x1pqG2NoqsLdV05giqeKRGtLWgICaDWVE0lIKB2WfXdOqq6ZqDlI4zP/y/EuSLJ6P40v/x7/Nf9NktSsG92OIqp2ENn6JlpeOe5x84l5hs59G0nqSZGEg9/VM0lIyRuOve4pnFgY4T79xCA5MUFCyR2G4vLiKhlDy8YlbetjrNqDlFe/xfjyIOFoksq6VNUA04ZfP/EOtY2pmVKmZfPnZ7YSVXz46ncwoTzY7vguXaUoP4ucaz6F4vYT2buBwJQL2gehahilk9pGz5INR3n3GFiiYhdqPDWjTlEE9p4V1D39P0T3rKPl7Sepe/S7uBPtZ9lJknRmsWSqxqLX6JnxVqGoKLnlmEd3nPGxsickkXDnkXv7vSR3r0r/5pF3mDpsPjsONaNrqWuW1rjJ/srmtIfWWz6CB9byobnTeMQTYvWuRoYV+vnwdeeQ5XVhemeRc9dYsBJgeMkNFtK68WUUXwjf1EuI+0va8o7qTd/8TQ3kYOupoSE92UL98kfbfd+OhrHrDkFxd5fbpVNVBSEEpplebFWSBovGiE1eQO1yBezTUQIF2LUHYcTM0z5OJqFBpKOqAqeqNKAoJ4bEHAfinkKMwlHpBy0Yy86jCSaOyKYkz4eigNetMmFYiEjCpKE5TjiaZO74bCZmRQje/EWSjVV88jwfrfMCePOLsXVvWwxxLYhiiNT9m4Jz8Fw5BccRxCy7LQEJIdDyyvBPv5TwhpePBayRfflHU2VzHNqmkwrDjVB17OjxvV4chBC4zQaspmoUt5+kNx+LM08tVhTRdl9JCIErfJjo5iVYLXV4p1yMXTAOU7SfHXb8b3YozZyTBp/qFpOy7J5NB8ITwG5NX/T9bhlJQr/4xS94/vnUjenFixfzxS9+MRNhDBrueC3Jim04kRZc5ZNIBMvRky1YR3diNR7FVToBK3s4cQwO14R5/u195PsVxuarOAhiejZ7K5ooz8qheOJ5RLe9BaTWxzjTrmNWnYeSPA/Jyp34YkcI5Jbz1Ys04kcP4QQL0PNGElvxMJEnNhHz+PEvfi+rqnwI4TBBayAv2ErcU4hmRRF1e0kc3Y2eU4ZSOJY4QY5nH1eyAatyB3bjUSJZeWihfPJv+HyqbE9WEXF3/onhOi1A/jWfJH5kJ04yjpaVT3jbStS8csTR9UQPbEb1h0jGWkHR8Ey+gKjecQ8pbtrsrwqz53Aj5YUBRpcEybGqqX3oW20186J7N5Bz9X8iymZhJBqxju6k7p0KjNxSHARKdilxb0m3poNLUqYdbbKYUtrD0+9VDY79/ZxOnyeh5cuXs3TpUp544gmEEHz4wx/m5Zdf5tJLL+3rUAYFV6KWuke+3Ta7DSD/pi/Qsv4VhGOTqDtC84rHyb7io6yLjeXXj29qe1xJrocvzEuwP15Awgjx19cquHTGZYy6bCGKlaBOy+GXjx+kKRzHtBwmjwjxH+VVGBtfxTNiKpE1z+Mun0hyv5fEgdRx7WiY5hd+Rd7Cz/Odp2tT55gToXiSRmzbUppXPnEi9uHn4L/skySEB5fZROOTP8asr2j7fmjRLdS//EeCC2/BKprW7gPeiFZR++yvsdtK6QgKbvkS8Z1v07TskRPnKBmLnluCuX8D7uHTiRntE5EDPLfywEmlhmDmhAI+Ot9oS0DHNS9/jJybxtLw/M9IHj3x+Kw519K6/Amyr/wEMV9pF357kpR5juNwsD7JLTO7tvPrGSWiCNeZF4r3+cSE/Px8vvzlL2MYBrquM3r0aCoqKs78RKlDVtWedgkIwKyvxFU4HDsZwztmBqHzbqW+oZkHn9/W7nEVdVEqnVxGuRupbYyyaFoJ//fsbtbVB1jVkMP9D++irinWVspjy/5G6oITSNYeQfWmJh+4yycQ3bM+La4su77tHBWiEJqraH77yXaPiR/YjGiqBMCpP9QuAQE0r3kB36SFtKx6Gt2JtrULAckj209KQAAOTSufIFG1r90x4hW70LOLsBMRrOq9aXE2tCZ4dln756zdXk2VmT6jRwiB01jZLgEBNK9/Cd+4WcR2rkCRC2mkAaa6xUJToCirZ/skdksNSk76VjDv1uc9obFjx7b9e//+/Tz//PP885//7NIxurpXe1/ry/ia9rXfg947bg7RveuJHdwKQPzITvS8ctQ57yGWqEx7ftIWqIpFNG6iH1tvI0htqd1RHamEnfqQPT6d2myuR88uIvmuBBJXfEBL2zkcK9muKvVxmmIRzA/QUp1+499OxFB0A8Xw4A94Ud0nrqoatkfTHx8NowXSh9wcx0boLoSVSPvdNMUa6Gh5ka15ELobJ3miVlro/NsQooOp5WYSoek44UZyc7tWp64v9Pe/F5AxdsTrc6H1wOLRM1m+t4nzJgTJzu65bUocy6S6dj8F134MPfv0P7eMTUzYtWsXH/3oR/niF7/IiBEjuvTcmpqWMz8oQ/LzA30an7tgVKpW2rEPeKNwOI1vPdLuMcnaQ+ToCS6fN5xnl+1va3cZKsVGmGorm+HFQTbvrWfSyBw27a7l3HH5TB2Tx6bdJypTZ/kNCp1ahO5um0XTun0FORfeTd2rfwU7tT5IlE9n1VEX0IJLVynWW1CCw3GPmEps/4nhQNWXhektoKamBXewGKG1HwLzT15IZPc6gotup77FhpYTP1d3yURAcPI07sCsq4gePFFmBkDxBjFyy2lc+W+yLv5Q2u/GZ6hMHJHDtv0nbqDmZbnJy8ki+/ZvEtu+DCtcj2fS+Zi5o9GSLQiXFyd+YpM779hZRPdvJjDvpn733uzr92N3DJUYu5rEIq1xYrH4mR94FpKWw8aDEb56dR6NjT23caN56B1EqIRG0wc1Lad97RlJQmvXruXTn/40X/3qV7n66qszEcKgkfCXkn/b12he8Th2uAE9f0SHj9PcPkaVhrjjkrG8sb6C0lwXN8wM4dJMNHLZtbGS6voI77t8NKt31tMUjnPD4lGUFfhZu72asWVBrpkgCBxYgv+6TxGrOoAWKkTNH4GZPYzsi+5B0XQcd5BdkSCrXq5k1thsbpyZxfBsh5i7kMCFH8BzeDNWUxWOZeOevJiYlpqKHfcUknf7N2hZ+QRmfQX+iQtQc0pwT1qMmZW+KV0iUEb+rV+lecVj2NEW/LOuhvJz8ZVMQgvmE92xEqNoFP5zzie8822yFt9N3FeS/nNRBB+94RxeXXOIVVurmDwqh6vnj8SjK8T0EpRZt6EpgsSxfZgsVy55t36d8Kp/k6w+gGf0uWiBXJRQMWb2yJ77xUpSH9h8JM7ofIOCQM+lAicZx9yxFM8Vn+nU44XTx9N5KisrufHGG3nggQeYP39+t47Rn6+aMnVVpwsLHBOESvTNPxLdtrzte67yibgv/hh7d+4jqCdxsssxhUFlU5KSXC/FNW9jBQpwkkka/CN54IndjCnLoqo+QlGuh4tnlTOywINqx1EAE5WYbeByooh4C+Gl/0CoOv4ZV2Bmj8IgTtTR0LFBqCRs9diU50O0rHgMs/Yw3nPOw5iwmLjWfj2QptgodpKs/Dzq6sJnLMWTet0WpjDaZs4pCmhWDFt1IewECI2ko572OEIIEpaNoSqdmuGmCZugVyEcNXEcSDr9c7XDUOll9LZM9IRWvrGqV3tCScvhz8ua+M+LshmR17m9f87EcRyS655EyS7Bveh9be39qif0hz/8gXg8zg9+8IO2tjvuuIM777yzr0MZVFIfsio44J1/B+7hU4nv34irbCJa+TlEt71O7kkz0/S5d/HY6iB3z/HgWfm3tvaDc7/MkZowR2pSN/23H2jgSE2ED1wziWjcIj/oJuDRETgkhAfcHjyXfzY11dtywAbzWH23BLSNlrliNdQ+/B2cZOqPqnn54/haGtAX3IPlnJgfY9oKkJoq2placCe/7uNsGxLCDTapY3XiMstxHHTlzLu3tsXpKKjeAInW/v3hKUmnsvZAjNEFeo8mIHP7GzhmAtf8uzr9vD5PQl//+tf5+te/3tenHVISWhAxfB7GyAVYto3aWkH4pAQEoKx9lA9e9FXyWk/MGFPKziEnP5c7LvWzcVct2/bXc87oXIYVBvjab5anynq4Nb783tmU5ZzYVkGYMdTmStR4BBEqJOHKT/swtxqOtCWg41o3v0H+rOuw9DOXe5ckqec0RizWH4zx9WvyeuR4juNg7ngLu/4Q3mu/ilA7v+9U/xxDkM6a44BlHauI8O5tqYfPpKL0YtbujVPgKWTqoo9htFbzZM1wXnvwHQAWTi3hsrnDKcj28PArO7ni3HzKczRW7I7wq0c3ct+H5mCoCrodIbbs70S2LQNAaAZ5t3yVWHBEu1N2tNWvMNw4yumHySRJ6lmO4/DqtgiXT/aR5z/7FOBYJsl3XsKJteC55stnLFj6bjIJDQEikI8wPDiJKIo3yOb8y/jtMyd2Ln3W7+KTNyxgyasnZq4t3VjBh6+bTH7IxXevz8a78V9YByqZMmo2+yZfSCRuYXgVRMOhtgQEqS0ZGl/9I8Ebv0aCEyuwRXYZev5wkjUH2tpC599JUs+iwznSkiT1ivWH4jjAZZPPfjmBHWkiuf4pRFYR3uu+gtC6XnVBJqFBKGbaNLUm8Ht0Am6NhJFDwY3/Rd1Lv8ccMZ9/La9p9/im1jiKcPjCFXkkHZVn32lld0WY9Ttq+MxluTQ8979Yx6Zf23veZrQZw3vOp3AAJ9IIgNDd+KcsRvX4cSwT3YmTOKnOWlwNkHXN57CrdmM1V6MXj8UKDcdxwGU2QTwM7iAJLdAnddiEELTETFpjSbJ8Bh5dkfXfpEGvusVk1d4oX74qD/UsF1abFdtIblmCMf1qjKlXdLv4qUxCg4SiCDQVGsIJvvfXNRyti+J1a3zi+vFMGZFFvL4Sd/lEEuWTSC6rZN64LK4Yr+CzmsnKy0WpWELruudBUfnUlCv5d85oirMFHN3etv7nOPPARlzJJmJGLpo/G6G7yLngLhpXPonVUodwedHzh6GUnIt90qSDuB6CslkIIYg7DkI4GDVbqX/ul9ixMKovRM41nyYe6qCQag/bdqiRXz66kdaYSU7QzefvPJfSHI9MRNKgFU/aPLspzB1zghQGu//R78QjJLcuwQ7X4r3y86gFZ/f3KvcTGgTciTqMyvVEX/pfjNd/xrcu05k9JkgkZvL/HtlKbVUtdrie8KbXsJc/yPsvH80dpfvJXvYzjJV/IvrMT1CwMIpGgW3Bxme4aqzNucF6SKZPEVU8fhw1dY/HcfvJu/JjNL79FFZLai8fJx6h9plfYkRr054LtE1acMXrqXvqgbbyO1ZrI3VPPYBhpm8T0ZMaIkn+3z/X0RpLJdf65hj/75/riSbTqyFI0mDgOA4vbmllcqmLuaO83T6GeegdYm/+CSWnFN/N3znrBAQyCQ14LqsZ68gWap7+XyK71hA7sJnYi7/gvdMsXIaKbTscrarDXTIGANXlZlwogdj4TLvjtKx/Bd/YWW1f58YO4t7wCMm6I7jLJ7V7bOiiD5DQUrXjLE8udiKK1fyuhOPY2C3th/3ezYpHsKZdhzJ2IRyboGBHW6C1oVs/i86qbYqmlSSqb47R1Hrmir+SNBCt2BvFtOHO2en7dHWG3VxNYuW/sA6/g/eqL+Cef1eHk426Qw7HDXTN1STrjqR6MCdRt77I7LHXs3RLHSHdxIq0knv5f9Cy6TWURBjn3YtnHLvdltua4cKOhQlveYvAjMsJzLoKKxZBCRVhBk5sWZDEhbdoDIrHjx1tPwtPeEOnDLslZvL0qijLNgUpzy/k7sXnkbPq19iJGLh7t0ZXyOdCiPZ7APncGn5356eVStJAseNonO2VCb52dR6a2rX7Nk4iSnLXMqyKHbhm3Yg+8UKE0rN9F9kTGugU0eGbQqgapuVw13mFFKhNWJEwDUsfxSgYTjAvH9UXavd41Z+NHU8VBXWPnIrZUp+qR6douIZPIV4whWT5HOKBYVjvunaJugrJufLjoJxozzr/TkxvQYchO8CDL+7gldWHicZNdh5u4XtPVdE67TayL/8ICVfO2f1MziA34OK9V05s+1pVBJ+4eRoBj7wmkwaXyiaT13ZE+M+Lcgh6Or8cwrEtzL1riL3xBxRPEN/t38eYfHGPJyCQPaGBz1+InlOSVvzTM+s6LrFLyPGAT+jUPHw/en45enYhomIzOVf8B7GDW2lZ/Sx6yViyFt+DlYhTMHJ6av2OmcQ1agYE8km48k5bScBxHBJ5k8i/5wfYLTUIb6ow6buT1XHNkSRrtlW1a4snLeoCkykszen1yQGKgPOmFDNpRA6NrXHysjxke7VOVWiQpIGiOWrx9IYw71sQojync718x3Gwj+4kueNNlKxivNd9FTW7d/fIkklogIurfjzlU8i/7tNE923CTsZxRs7hB6+E2X54A6oi+NptYygMFeKftIiG1//R9lzfxAXkXvlRFE+QRNYwbBvM05zrdBwEMVceuM68AlvTFLxujUis/dncbne72XS9SRGQH3SRH+zh3SRP4VTbrEtSb4gnbf69Pszl5/iYXu7u1HOs+sOY298Ex8Z9/gfRys7p5ShT5HDcIBBVQ8Tyz0FfcA9VE+/gP/5SxfbDrQBYtsPvXzmCb/4tNL39VLvntW5bjh1povapn6HHzrwXfE8JuDXed1X7yQ5Tx+RRnNu9WTv9mebEcdXvRGx+Bv3ImtSaKEnqRZbt8MymViaWuLh00pn3CLLD9STWPklyw7PoUy7De/N3+iwBgewJDRqOA8mkRVV9+p4gR+siCFcRdqw1/XmWmRrGS0TAaH8vRgiIWw6m5eA1lE4VAu0M23aYMTaXb//HPI7UtBIKGAwr8OPSBtc1kaKAs2MZta/9ta3NKB5D4OrPklD63+Z30sDnOA5LtkfwuRTunBM87QJSJx4huXs5VsU2jKlX4rny8z02460rZBIaZIrz0q98Fk4twcgrxSgcRaLqpC2uFQ2hqGihQvC2T0C247DzcDN/fGYLjS0JLpszjCvnDcfn6plab6oQlOV6KRuEvZ/j9HgjdUsfateWqNwNDRWQOy5DUUmD2ZoDMerCFl+6MveUFREcy8Tcvw5z7yr00XNx3/YDFE+wjyM9YXBdeg5AqipQOzlt8lSPVRSBqqZ+lQVZLv7rrhlk+VNXNHMmFXLzhWOIKgGyrvgYrvLUrDA1mEfORXcTPbyTnGs/S1xpnwyONsb4+4s70DUV07J5bsV+Xll7CNHFUh+pfXocTNuhm1U9BixhWzjJ9LVHJ08gkaSesqsqwcZDcT59cQ5uPf2j3XEcrModxN/8I05rPb7rv4F70XszmoBA9oQyqrIxyrJNldi2w8KpJZTkeOjoc9oBKuqjLNtUgSIEC6cWU5yd2kqhujnOis2VtESSnDethPI8H+cMz+b+j84naTn43VrblUbMVYDvys+TL6IkLQfbsvCOmk+U9jfnFWwKkkf48rjtOEKhKX86v3yjmVdXH+LyOcNwd3LYLG7arNlRwxNv7MGlq9x1+XgmlGehDpFsZLpDeCfMJ7L9xAaDwvCgZheTzGBc0uBT1WzyyrZWPntJDtm+9NEKu7mG5NZXwUrivuA/0EondXCUzJBJKEMqGqJ883cr26YFv/j2Ae778DzKOxieOlIX4d7fr2ybuvzC2wf49n/MQ9cUvvl/K9q2nl6y5hBffd9sxhQH8Ogqng5mZSbRMfJyaKppOWU/2GjaT9O/v8fxm0BBsYRPLv4cf1qVQFc7l4CEgM376vnD01va2n76j3Xc+8G5DC84883SwcB0VLzzb0MNFRDdugy9YDj+eTcRd+X22P01SWqN2zy1Mczd87LSNqhzkvHUYtMj244tNr0A0c+2T5HDcRmgqgpvbqhoty7FcVKJSHtXL0PTFF54+0C7tTO27bBqaxXv7KlrS0DHPf76bpyz6GioqiC84SXafUo6NqHq9Xz4usl0dsG1jeCFlQfS2tfurEY5y+q9A0lcD6FMv4Gs27+N+5KPE/OWyCKpUo9JzYQLc94YD7NGeNp/r3IH8Tf+iNBcJy027V8JCGRPKGNMK71YpmnaaR9QjpNqfzfLdrA6WFxpWvZZXmULsNIHi9yaQzDH27ZR3pkoAvKyPew50n5Kck7APeQ+hFPbjXuObTcuST3n9R0Rsn0q104/UerKiYVJbnkFu7UR92X/iVbUvyfByJ5QBliWzeJzS9Nu1F8+b3jah7xl2Vwxb3jaMeZMLGTq6PQZMDcsHt3hfaWuxOabfvm7WgW+yed3OgEBOLbDdYtGoZ00fBf0GUwbc/rqC5Ikdc7WijhHGk0+tCiEcuzDxKrcQXzpX1AKxuC75Tv9PgEBCGcAfiLU1LRkOoRTys8PdCo+BzhU28pzK/ZjWQ5XLRjB8Hw/HY1U2Q4cqAnz/Ir9KEJw1YIRlOf5EKTuLb2wcj8tkSRXzhvBqOIA2hmGu84Uo4qJ1rD32P5CGv4ZV5AIjexyNQMhoKY5zt6KZnRNYWRxkJC3c+VDOvtzzCQZY88YKjHm53etMO/KN1YRi6VvpQJQF7Z4eE0z/315LqXZOo6VJLnlVeyGCjwXfRS1cMxZxdrTTvfaZRLqYV19sx6fWt2ZXsapHpuati063VPpbIyaKnAAy+r7t8hQ+WDqbTLGntGfklDScvjnqmaumOzjvHG+VMWDdU+i5o/Cff77EXrnyvT0pdO9dnlPKMO6MsR1qsemkkTPJ4p377kjSVLmLd0VpTxbZ9FYL1b1XhIbn8M1+5bUzLcBuPxBJiFJkqQB4lB9kj01Ce67Lh/r0DuYO5fiuezTaMXjMx1at8kkJEmSNAAkLYeXt7Zy97wsjCNrMQ9uwHvd11BCRZkO7azIJCRJkjQArN4XY0SeweTkZsxDm/Be93UUf+9uANkX5BRtSZKkfq4pYrHxcIxby49i7l+H99qvDIoEBDIJSZIk9XvL9kS5aKSDb++reK76PIo/N9Mh9RiZhCRJkvqx2haTQ/VJFrc8i/v8D6DmlGc6pB4lk5AkSVI/tvpAjAtzjuAdcQ76yFmZDqfHySQkSZLUT7XEbPZVx1kkNuCed0emw+kVMglJkiT1U5uPxDnXc4TQ/JsQhufMTxiAZBKSJEnqhxwcth2JMD9wGG3MvEyH02tkEpIkSeqHasM2ihVn7OyFCDF4P6oH7yuTJEkawA7WRJmiH0AfxL0gkElIkiSpX6qoj3FOmQ+huzIdSq+SSUiSJKkfqo0qjDtn4BYm7ayMJKGnn36aq666issuu4y///3vmQhBkiSpX8tWwviGTc50GL2uzwuYVlVV8cADD/D4449jGAZ33HEHc+fOZcyY/rUToCRJUiYVuaIItz/TYfS6Pu8JLV++nHnz5hEKhfB6vVx++eW88MILfR2GJElSv1YUHBqbHPR5EqquriY/P7/t64KCAqqqqvo6DEmSpH4tNzT4e0GQgeE427bbbUHrOE6Xt6Tt6l7tfa2/xwcyxp4iY+wZMsZ0paV5A+Lncrb6PAkVFRWxZs2atq9ramooKCjo0jFqalp6Oqwek58f6NfxgYyxp8gYe8ZQibGrCUVzefr9z6WzTvfa+3w4bsGCBaxYsYL6+nqi0SgvvfQS559/fl+HIUmS1K/5s0KZDqFP9HlPqLCwkM997nO8973vJZlMcssttzB16tS+DkOSJKlfM/wBnEwH0QcyMv3i2muv5dprr83EqSVJkgYG1WAoZCFZMUGSJKkf6uqErYFKJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY7RMB9AdiiIyHcJp9ff4QMbYU2SMPUPGOHQJx3GcTAchSZIkDU1yOE6SJEnKGJmEJEmSpIz5/+3dfUxT1x/H8XdHwYc4o2YiCxqIj9ElMjIzrZph0VTKpRSND+BSVOYDxojTPxAXo5nBiMYI4rbMROOzMZqABiLowiRTMRAWlcQsmcEBoozh3FZB0pb2/P4w9if57UEz/d1b/L7+u6cHzufeE/j2XMq5UoSEEELoRoqQEEII3UgREkIIoRspQkIIIXQjRUgIIYRupAgJIYTQjRQhIYQQugmpIlRUVMT+/fuDx263m1WrVmG32/n444/p6OjQMR2UlZWRnJyMzWbj5MmTumZ5XmdnJykpKbS2tgJQU1ODw+HAZrNRWFioczr44osv0DQNTdPYvXs3YLyM+/btIzk5GU3TOHz4MGC8jM/s2rWLvLw8wHgZXS4XmqbhdDpxOp3cunXLcBm//fZb5s+fj91uJz8/HzDedexTVAhwu91q8+bNavLkyaq4uDjY/vnnn6sDBw4opZQqLS1V69ev1ymhUj///LOyWq3qt99+U11dXcrhcKg7d+7olueZmzdvqpSUFPXee++pe/fuqe7ubpWQkKBaWlqUz+dTWVlZqrq6Wrd8165dU4sXL1Yej0d5vV6VmZmpysrKDJWxtrZWpaenK5/Pp7q7u5XValU//PCDoTI+U1NTo6ZOnao2bdpkuLkOBAJq5syZyufzBduMlrGlpUXNnDlTtbW1Ka/XqzIyMlR1dbWhMvY1IbESqqqqIjY2luXLl/dqr66uxuFwAJCSksJ3332Hz+fTIyI1NTVMmzaNIUOGMHDgQObOnUtlZaUuWZ535swZtm3bRmRkJAANDQ3ExMQwatQozGYzDodD15zDhw8nLy+PiIgIwsPDGTNmDE1NTYbK+OGHH3Ls2DHMZjO//vorfr8ft9ttqIwAv//+O4WFhWRnZwPGm+u7d+8CkJWVRWpqKidOnDBcxm+++Ybk5GSioqIIDw+nsLCQAQMGGCpjXxMSRSgtLY1Vq1YRFhbWq/2XX35h+PDhAJjNZgYNGsSjR4/0iNgrC0BkZCTt7e26ZHnejh07mDJlSvDYaDnHjRvH+++/D0BTUxMVFRWYTCZDZQQIDw+nuLgYTdOwWCyGu44AW7duZcOGDQwePBgw3ly73W4sFgtffvklR44c4fTp0zx48MBQGZubm/H7/WRnZ+N0Ojl16pThrmNfY6hHOVRUVLBz585ebaNHj+bIkSMv9PVKKd56S5+6GggEMJn+u9W7UqrXsVEYNeedO3dYvXo1ubm5hIWF0dTUFHzNKBlzcnJYuXIl2dnZNDU1Geo6nj17lnfffReLxUJJSQlgvLmOj48nPj4+eLxgwQKKi4v54IMPgm16Z/T7/dTX13P8+HEGDhzImjVr6N+/v6GuY19jqCJkt9ux2+0v3D8yMpKHDx8SFRVFT08PXV1dDBky5PUF/BtRUVHU19cHjzs6OoK3wIwkKiqq1wc4jJDz+++/Jycnh88++wxN06irqzNUxsbGRrxeLxMnTmTAgAHYbDYqKyt7rcz1znjhwgU6OjpwOp388ccfPHnyhPv37xsqY319PT6fD4vFAjz9ZR4dHW2ouX7nnXewWCwMGzYMgDlz5hhurvuakLgd91cSEhI4d+4c8PSHcMqUKYSHh+uSZfr06Vy/fp1Hjx7R3d3NpUuX+Oijj3TJ8nfi4uL46aefgrcdysvLdc3Z1tbG2rVr2bNnD5qmGTJja2srW7Zswev14vV6qaqqIj093VAZDx8+THl5OefPnycnJ4fExEQOHjxoqIyPHz9m9+7deDweOjs7KS0tZePGjYbKaLVauXr1Km63G7/fz5UrV0hKSjJUxr7GUCuhl7V+/Xry8vLQNI23336bPXv26JZlxIgRbNiwgczMTHw+HwsWLGDy5Mm65fkr/fr1o6CggHXr1uHxeEhISCApKUm3PIcOHcLj8VBQUBBsS09PN1TGhIQEGhoaSEtLIywsDJvNhqZpDBs2zDAZ/4zR5tpqtXLr1i3S0tIIBAIsWbKE+Ph4Q2WMi4tjxYoVLFmyBJ/Px4wZM8jIyGD06NGGydjXyJNVhRBC6Cakb8cJIYQIbVKEhBBC6EaKkBBCCN1IERJCCKEbKUJCCCF0I0VICCGEbqQIiZCUlZX1j/sEvkif2tpaUlJS/nG8CRMm/On3qqqqCm7373K5qKyspLW1tdf2NEKIvxbS/6wq3lzXrl17JX3+rdmzZzN79uzXPo4QfZWshETI2bx5MwBLly6lrq4Ol8uFw+EgNTU1uI3T833a2tq4fPky6enpzJ8/n1mzZlFUVPTS4xYVFTFv3jycTieXL18GoKSkhNWrV7+S8xLiTSQrIRFydu7cSUlJCUePHmXRokXk5uZis9lob29n4cKFxMTE9OozdOhQcnNzKSgoIDY2lvb2dqxWK5mZmS817siRI9m+fTs//vgjLpeLioqK13SGQrw5pAiJkNXY2IjH48FmswFP9++z2WxcuXKl199kTCYTX3/9NdXV1ZSXl9PY2IhSiu7u7pcaLyMjA4Dx48czZswYbty48epORog3lNyOEyHLZDL9z3NdlFL09PT0anvy5Anz5s3j9u3bTJo0idzcXMxmMy+7beLzz6oKBAKYzfIeToh/S4qQCElhYWFER0djNpu5dOkSAO3t7Vy8eJHp06cH+/T09NDc3ExnZyeffvopiYmJ1NbW4vV6CQQCLzVmaWkpALdv36alpYW4uLhXe1JCvIHkrZwISUlJSSxbtoyvvvqK/Px89u/fj9/vZ+3atUybNi3Yx+VysW/fPmbNmoXdbiciIoLx48czduxYmpubiYiIeOEx7927R1paGiaTib179+r2AEUh+hJ5lIMQQgjdyEpICODgwYOUlZX96WuffPIJqamp/+dEQrwZZCUkhBBCN/LBBCGEELqRIiSEEEI3UoSEEELoRoqQEEII3UgREkIIoZv/AP/kVwligiBHAAAAAElFTkSuQmCC\",\n      \"text/plain\": [\n       \"<Figure size 432x432 with 3 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with Modin df \\n\",\n    \"sns.jointplot(data=modin_tips, x=\\\"total_bill\\\", y=\\\"tip\\\", hue=\\\"sex\\\", hue_order=[\\\"Female\\\", \\\"Male\\\"])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 42,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"<seaborn.axisgrid.JointGrid at 0x7fc3bda21520>\"\n      ]\n     },\n     \"execution_count\": 42,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAaEAAAGkCAYAAACYZZpxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAACHGElEQVR4nOzddXxcx7nw8d8cWl6tmM3sGGKmxGFmTpqUb/EWb2+5TUopv7ntLd5yUwo3zA6ZYrZjZpQsptXigfePtWUrK9uSLGkF8/180lqj3XOelVb7nJkz84xwHMdBkiRJkjJAyXQAkiRJ0tAlk5AkSZKUMTIJSZIkSRkjk5AkSZKUMTIJSZIkSRkjk5AkSZKUMVqmA+iKmpoWsrO9NDREMh3KKfX3+EDG2FNkjD1jqMSYnx/ooWgGlwHXE9I0NdMhnFZ/jw9kjD1FxtgzZIxD24DqCUn9n5OIYh3diVWzH7vpKE60GWwTVB3hyULJLkHNH4laOBqhuTIdriRJGSaTkHTWHNvE3LeW5PY3sap2o2SXoGQVIrzZKNmloKhgmzixMHbtAczdK7Gbq1FLJqCPPx9t+DSEIt+KkjQUyb98qdscK0li6+skNz6D8Gajlp2DPuUyhGac+bmJGFbVThLr/k182YPoU6/EmHRhp54rSdLgIZOQ1C3mgQ3Elv0N4QthzLgBJVTUpecLw41WPhWtfCp2YyXm7pW0bnoe15xb0cYu6KWoJUnqb2QSkrrEiYWJLf0LVvUe9MmXoOaPPOtjKqFijFk3YjUcIbHhWRLbXiNxw6eArLMPWJKkfm3AzY6TMses3EHro9/AAVznvb9HEtDJ1OxSjIXvQc0bScVfvkZ843M4jt2j55AkqX+RPSHpjBzHIbH5JRLrnsaYdgVqweheO5cQCtrIGYTGTKJu6RNYBzfhvvhjKN5Qr51TkqTMkT0h6bQc2yL21p9JbnkV18L39GoCOpkWyMGYezsikEfksW9iVmzrk/NKktS3ZE9IOiXHTBB95Zc4sTCu+Xch9L5d1yMUBX3cQpTsUmKv/Ap92pUYU69ECNGncUiS1HtkT0jqkJOIEnnuJ2BZGLNu7PMEdDI1fwTGwrtJ7niL2JLf4JiJjMUiSVLPkklISuMkokSe/THC5UOffhVCyXzJEsUTxDX/TpxYmMhT92NHGjMdkiRJPUAmIakdJxkn8txPEN4Q+jmX9quhL6Hq6NOvRsktJ/LEt7DqDmY6JEmSzpJMQlIbx0oSffF/Uj2gcy7pVwnoOCEE+tgFaOPPI/LMjzAPbsp0SJIknQWZhCQAHNsm+upvwHHQp17eLxPQybSSiRgzryf2+u9IbF2S6XAkSeommYQkHMchvvzvOOFa9OlXI8TAeFuoOWUY8+8kseFZYisfkgtbJWkAGhifNlKvSmx6AfPwOxgzb0SoA2vWvuLLxrXgPViHNxN75Vdy5pwkDTAyCQ1xyf1rSW56HmP2zRmdhn02hOHBmHsbTiJC5JkfYEebMx2SJEmdNLAue6UeZdUeIPbGH3HNvgXFE+zUc0zbobLR5GiTSXPMJmk5GKog6FEoDGqUhDRUpe/vJwlVQ59+DeaOt4g88W08V30eNVTS53FIktQ1MgkNUXakieiL/4Mx+ZIzbsOQtBzWHYjx9r4Iu6qSBNwKuX4Vn6GgKqnE1Bp3qG+1aI7ZjC3QmTncw4zhbrxG33W2hRDoE85H+LKJPHk/nos/jlY2uc/OL0lS18kkNAQ5ZpLoSz9DLZ2EWjLhlI+LJmxe2dbKku2tFAQ0JhQZnD/Wi+c0iSWatDlQm2Tl3igPr2lm1gg3V0z2UxDsu7eaVj4F4c0ituTXGDNvxJh8cZ+dW5KkrpFJaAiqfeF3CEVHG7uww+/bjsOyXRGeWB9meK7GbbOC5Pg6VzXBoytMKHYxodhFa9xm4+EY9z9Xy7QyNzecGyC7k8c5W2ruMMT8u0iseQKr7gDuhe8dcJMuJGkokBMThpjE1iVED2xGn95xIdDasMlPXqzj1e0RbjjXzxXn+DudgN7N51JYMNrLBxZm4QD3PV3DMxtbSFrOWb6KzlF82bgWvgen8SiRp76H3drQJ+eVJKnzZBIaQqyq3cRXP0b2+bcjtPSZcGv2R/neM7WUhjRunx2gsIeG0Ny6wsIxHt4zN8jWyjjfeqqG3dV9M5VaaC70mTeg5JQTefxezCNb++S8kiR1jhyfGCLsSCPRl3+BMfUKtGAuNEbavmfZDo+uaWbdwRg3zui55PNuWR6V66cH2FmV4NevNTBnpJsbZwQxtN6dTZcq9TMfJbuY2Ku/RpuwGNesG/tFYVZJGupkT2gIcGyT6Mu/QC2bjFo4pt33ogmbn79az57aJHfNDfZaAjrZuEKDe+YHOdJo8p1najhYl+z1cwKoeSNwLXov1pGtRJ78HnZzdZ+cV5KkU5NJaAiIr/gXQNpEhMaIxQ9fqMOlCW6c7set993bwWMoXDXFx8zhbh54uY4XNoexnd6/VyTcfow5t6Dmj6T18fuIb35ZlvuRpAySw3GDXHL3Csz963AtvKfdRISqZpMHXq5jcomL2SPcGSlYKoRgYrGLkpDGC5tb2VIR50OLQoS8vTtMJoRAGzULpWAkyU0vYu5eifv8D6LmlPbqeSVJSid7QoOYVXeQ2LK/Ycy8HmG429oP1Mb58Qt1zBzuZs5IT8YrZmd5VG6dGSDXp/Ltp2vZcCjWJ+dV/LkY8+9ELRhN5On7iS3/B068tU/OLUlSiuwJDVJOLEz0xZ+jT7oIJVjQ1r6/NsH/Lmng/HEeJhT1n1pxiiKYP9rDsByNf7zdxI5qkxumenH18hChEAJtxLmoxeNI7lhK60NfRj/3WoxJFyJUvVfPLUmS7AkNSo5tEX3ll6gFo9BKJ7W1761J8PNX67lqela/SkAnK83WuXtukLqwybefrmVPX03ldvkwpl6OMecWzL2raP3Xl0hsex3H6ptJE5I0VMkkNAjF334IJxlDm7C4rW1PdYL/XVLPpZN8TCjxZDC6M3PpCtfPzGb+aA+/fK2Bh1Y3ETf7aIFrsADX7JvRp11JcsdbtP7zC8Q3PCeH6SSpl8gkNMgkdizF3Lsa49xrEUrq17u7OsEvXqvn8sk+RuUbGY6w88Yem8pd0Why35M1bDkS77NzqzlluObcgjHzRqyKLYT/+QVib/0Fq/5wn8UgSUOBvCc0iFhVu4mv/CeuebcjjFRvZ1dVgl+9Xs8Vk/2MyBt49zi8hsJVU/zsq0nw1xWNDMvVuXVWkIJA37x1laxCjOnX4MRaMA9uIvrMDxHBfIyJF2JnXdgnMUjSYCaT0CBht9QSfennGFOvRAnkA7DjaJzfvN7AlVP8DM8deAnoZCPzDcpzdNYejHH/s7XMGenh6ql+sjx9U/VAuAPo4xaijZmPXb2H5I63OLDyn6jDpqOPPw+1ePyA2RZdkvoTmYQGASfeSvS5n6CNmoNaOBqArRVxfvdmA1dN9TMsZ2AnoOM0VTB3pIcpJS7e3hfjm/+uYd4oD5dN9pHr75u3slAU1KKxqEVjCbhs6retJfbWn8FMoI1ZgD5uAWq2XG8kSZ0lk9AA55gJoi/+DJFThjZyJgAbDsb4y/JGrpnmpyx7cCSgk3ldChdO8DJ7hJt1B2N8++laxhUaXDDex8QSA6WP1j2pHj/6qNnoo2ZjN1djHdmaGq7zZKGNW4g+Zj6KN6tPYpGkgUomoQHMsW2iS34Lioo+KXV/YvnuCI+ubeaGcwMUZQ3uX6/frXD+OC/zRnnYVhnnoTVNRBMOc0a4mT3Sw/Bcvc8W4irBApRgAdqE87FrD2Id2UJi7b9RC8agj1+ENuLcDiuXS9JQN7g/pQYxx3GIvflHnNZ6jFk3AYIXNod5ZVsrt8wMkusfOhWiDU0wrdzNtHI3tS0mO6oS/PaNRkzbYUqpiyllbsYXGX2y1bgQCmr+CNT8ETiTE1hHd5HY/DKxt/6CNuJc9HGLUIsntM1clKShTiahAchxHOLLHsSu2Y8x91ZsofLQqma2VMS5fXaAoHvoJKB3ywto5AU0Fo6BulaLfTUJXtwc5g9LTYqDGhOKDcYXGYwpMHq9YKvQDLSyyWhlk3FiYcyKbcSWPQiJCNro+antJXKHZbxskiRlkkxCA4zj2MSXPoh1dCfGnFuJ2hr/91o9kaTDbbMCfVoJu7/L9ank+jzMGgGm5VDRZHKoPsm/17dwtMmkIKgxvtBgfJGLsYUGPlfv/eyE+6T7Ry01WEe2E33xf0DV0UbPQR81ByWnXCYkaciRSWgAcWyT2Bt/xK4/jDHnVo6EFX79Wi3lOTqXT/agKPID7FQ0VTAsR2+bKWjaDkebTA43mDx/rKeU61eZUGQwqcTFuMLe6ykpgXyUCflo4xfhNB3FqtxB9IUHQChow89FG56qZSdr10lDgUxCA4STiBJ95Zc4yRj6rJt4c6/JE+tbWDzOy6QSecO7qzRFUJatt80etGyHqmaTg/Umz2wMU9FkUhrSmFziYnKJixF5OmoPJ3khBCJUjBIqRpuwGKelpm3Bsd1cg1o4BrV0ElrxeJS8EQhV/rlKg498Vw8AdksN0Rf+BxHIJzLxWh58M0xti8lts4bWBITepCqCkpBOSSiVlJKWw5FGk4N1SVbvj9IctRlbaDCl1MXEEhf5PfxzF0Igjs2wY+wCnEQUu+4gVs1ekjvewgnXo+SUohaMQs0fiZI7DCVULHtL0oAnk1A/Zx7cSOz136OMmsvS+HiefqaeqeVuLpkY7PErc+kEXRWMyNUZcazSRGvc5kBdkg2HYjy1IYymwrThPkblKowtcPX4xYAwPKjF41GLxwPgmHHsxqPYTUdJ7l6BvfZJnEgDwp+Lkl2Kkl2Gml2CEipGCRXJ6eDSgCGTUD/lJOPE336Y5N417Cq9jsfecaGpUW6ZFSCvj6oDSCf4XAqTSlxMKnHhOA51rRbVrYJlu6I8tKoZXRWMyjcYXaAzPDc1zNeTU8KF5kLNG46aN7ytzbFMnNZ67JZanHAtyapd2OE6nNZ6hDuIklVEbekIEu4ClOwS1FAJwu3vsZgkqSfIT7N+yDy4kchbf2WbOoGX4jfRsg0WjHYzpqDvFl9KpyaEIM+vMaLIzaRCFcdxaIjYVDaa7K5OsHx3lJoWE59LoSSkUZylUZilke/XyA+oZHtVNPXsf49C1U4M4Z3EcWycSBNOuA7HbMY8sB5nyyvYLTUI1Uj1lLJLUbPLUEJFqWE9X7asfSdlhExC/YhVu5+apU/wdqXBssSVaLrOzBFuxhX2XSkaqeuEEOT4VHJ8KpNJDYPZjkNTxKau1aKu1WLT4TjNkQhNUZuWmI3PpZDtVcn2KW3PTX2d+neWR+n2cKsQCsKXDb5s/CEvZmMESK0vI9aS6i211GEe2YKzc2kqWSWjqaG9QD4ikIfiz0XxhhCeLITbn+pBGZ7UMJ8qL4akniOTUIY5jk319k2sX72BdbUeDltTGFPo5pJxLkpCmvxjH6AUIcj2pZLKmHd9z7YdwnGblngqIYVjNvtqkryTiBOO2TRHbSIJm6AnlaDy/Cr5AZW8QKo3ledXyfIqXb4wEUKAJ4jqCUL+yHbfc8xEqvcUacSJNmPXH8Gq3IGTiOAkYpCM4SRjYMbBtkFRQdVAKCCU1LEVBRCpNkUBRUMoGmg6QjNAdyMM77GkFkB4gijeLOJmKXbSnWqT7/chRyahPmbbDpV1Yfbs3M/O3YfYUW0RdXRGZBUycUI2V+W70HtgqEbqvxRFEPSoBE+zDYVlO7QcS0hNMZvasMW+2iRNUZvGiE0saZPlSfWacv0qOb5UzyroUQm6FfxuBa8h8Fud25FWaAYimA/B/DM+1nFssK3Uf46dSko44Dip3pbjpNodCywLxzbBSoKZxDFjOIkYTmMlTvUenHgr1asjJMONYCUQvhyUQB5KsBAlq/DYcGMhSjA/lcikQUcmoV5g2w5NrQnqm2PUNsWorm2i4mgdFbURqsIOXmIUGq0UBTWuOCebgvwQiqwlJp1EVQQhr0rI23GiSlrHklTMpjlq0dBqcaTBJJKwiSQdogmbaMIhblajiNRsP00VaErq2KoCiqCt5yEAIU60KYK2x+pq6j9DE7h1gUcXeAwFjyHw6gpeQ8fnEnhdCr5j7V3ppYVCXhobI6neWLQp1SNrbcSs2o2zby1OpAGntRHh8qWGCoMFiGABaiAvNYToy0nd09LljMCBaEAloeMVAbpaGSAcTZJMWhy/JnRIjY87zvF/g2M72I6DZaeu5izbwbJsTBvMSDPxRAIzaZMwLZJJi1jSIhY3U3/scYvWhE0k4dCaFCQsUIVDQE3gI4JfRMlz2YwpVMkZ58UI5CGMYT36s+kK23b6/U1oGePpGRrk+lVyzzDZze3RCUcSWFaqSoR1rKNiO6m/AfvYH4XT9j/HvwcWDo4Nlk3q78BOlT9KWA6tUUi2OCQsk4TpEDchZjrEkw4OqWTm1gQe46TEpQtcxxKZoQlcaqr4bKDWxkya6IpAVUJoWjZKNqg5AlUAAhRAmBGIhxHxME5zK6JuJ068FScRwW82ogsLXH4Utw9cPjB8CMOTGgLU3QjdANVI/b9iINTjQ4oqQlFTQ4xCtA0xpv6d+jrZFINI5FgwSupemRw67BHCcZzO9dclSZIkqYf170tNSZIkaVCTSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJEnKmAFVOw7ANC0aGiKZDuOUsrO9/To+kDH2FBljzxgqMebnB7r0+Lq6MLY9OKqqne61D7iekKaduvx9f9Df4wMZY0+RMfYMGePQ1qtJKBwOc80113D48GEAHnroIa655hquvfZavvKVr5BIJHrz9JIkSVI/12tJaOPGjdx5553s378fgH379vGHP/yBf/3rXzz11FPYts0//vGP3jq9JEmSNAD0WhJ6+OGHuffeeykoKADAMAzuvfde/H4/QgjGjRtHRUVFb51ekiRJGgB6fT+hiy66iL/+9a+UlZW1tdXX13PLLbfw/e9/n7lz557V8W3bpra2lvr6BizLOttwBz2Px015eTm6rmc6FEmSpL6fHVdVVcWHP/xhbr755m4noJqalrZ/19dXI4QgKysfVdUyvtuhpimYpp3RGE7FcRxaW5s5dOgQgUB+psM5rfz8QLvfc38kY+wZQyVGOTuuY306O27Pnj3ccccd3HjjjXzyk5/skWMmEjFCoVw0Tc94AurvhBD4fEGi0VimQ5EkSQL6sCcUDof50Ic+xGc/+1luuOGGHjyygxADbqZ5xshELUlSf9Jnn96PPvootbW1/OlPf+L666/n+uuv52c/+1lfnV6SJEnqh3q9J7RkyRIA3v/+9/P+97+/t08nSZIkDSByHEuSJEnKmAFXO66/iEQi3H//tzh8+BCKIhg/fiL//d9f5a233uKPf/w9ppnE7XbzyU9+lnPOmcr993+LaDTKd77zA/bu3cOnP/0xfvGL/2PEiJGZfimSJEkZI5NQN7355mtEIhH+/Od/YFkWP/nJ9zly5DC/+c0v+fnPf0NWVoi9e/fwuc99gn/969987nNf5IMffA/PP/8M//jHX/n0pz8vE5AkSUOeTELdNHXqdP7v/37Ff/7nR5g9ey633nonq1e/TW1tLZ/5zCfaHieEwuHDhxg7dhzf+tb9fOQj7+fyy6/issuuzGD0kiRJ/YNMQt1UUlLKv/71BOvXr2Xt2tV87nOf4J57PsCsWbP51re+3/a4qqqj5OWlFoYePHiArKwsdu3aQTKZlFULJEka8uTEhG564olHuf/+bzFnzjw+8YlPM2fOfJqbm1m1aiUHDuwHYMWKpbzvfXcSj8eprKzgZz/7KQ888EuGDRvBr3/988y+AEmSpH5A9oS66Yorrmb9+rXcffetuFxuCguLuOWWOxg9ejT33vtVHMdBVVV++MP/h2EY3Hff17jzznsYNWoMn//8l3jf++5g1qy5LFiwKNMvRZIkKWN6vYBpbzi5htPRowcoKhqewWja68+1446rrj5EQUF5psM4raFST6y3yRh7hqwdd3b6Te04SZIkSTqZTEKSJElSxsgkJEmSJGWMTEKSJElSxsgkJEmSJGWMTEKSJElSxsh1Qr2gsrKCO++8iREjRrVr/+EP/x+FhUU9fq5PfeqjPPro0z16XEmSpL4gk1AvycvL589//kemw5AkSerXhmwSWrHlKI+/sYe65ji5QRc3LR7N/Mk920t5t/r6On784/upqqpCURQ++tFPMnv2XP7wh99SVXWUQ4cO0tjYwHvf+0HWrl3N1q2bGTMmVfjUsix++tPUNhD19fWMGTOG++77XqeOL0mS1F8NySS0YstR/vL8dhLHKhvUNcf5y/PbAXosEdXW1vD+99/V9vVll13Bjh3buPrq61i0aDG1tbV84hMfaust7d27h9/85o+8885GPvOZj/OXv/yL8vJh3H33rezevYvW1jCapvPb3/4J27b59Kc/xooVyxg/fmLbOX72s590eHyv19cjr0mSJKmnDckk9Pgbe9oS0HEJ0+bxN/b0WBLqaDju6qsv5sCBA/z+978FwDRNjhw5DMDs2XPRNI2iomJyc/MYOXJU23FaWpqZMWMWwWAWjz32MAcP7ufw4UNEo9F2x1+zZlWHxx87dnyPvCZJkqSeNiSTUF1zvEvtPcWybH7+818TDGYBUFtbS3Z2Nm+++TqaduJXoapq2nOXLn2D3//+t9x66x1cddV1NDY28u6yf6c6viRJUn81JKdo5wZdXWrvKTNnzuLxxx8BYN++vbz3vbcTj8c69dw1a1Zx0UWXcPXV1+H3+1m/fi22bfXY8SVJkjJhSPaEblo8ut09IQBDU7hp8ehePe/nPvdFfvSj7/G+992B4zh84xvf7vT9mmuvvZFvfetrvPLKi2iazpQpU6moqGDmzJ45viRJUiYM2a0cemt2nNzKoWcMlfL+vU3G2DPkVg5n53SvfUj2hCA1C663p2RLkiR1l2PbgMh0GL1uSN4TkiRJ6vfsZKYj6BMyCUmSJPVDjimTkCRJkpQplkxCkiRJUqaYiUxH0CdkEpIkSeqPLJmEJEmSpAxxkjIJSd1UWVnBokWz+NGP2le53rVrB4sWzeK55069988tt1xLZWVFb4coSVJ/J2fHSWcjKyuLt99egWWdKK3z6qsvEwrJWm6SJJ3ZUOkJDdnFqoldy0msfgwnXIfw52LMvhlj7IIeO77H42Xs2HFs3LieGTNmAbBq1UpmzZoDwGOPPcQLLzxHLBZF13Xuu+97DBs2ou35lmXxq1/9jPXr12JZNldddQ233/6eHotPkqR+Ts6OG7wSu5YTf+vPOOE6AJxwHfG3/kxi1/IePc+FF17Ka6+9CsC2bVsYM2Ysuq7T2trKm2++wS9+8VsefPBhFiw4j8cee7jdc59++gkA/vjHv/O73/2Ft956g40b1/dofJIk9V/OEElCQ7InlFj9WPr0RzNBYvVjPdobWrTofH73u19j2zavvvoyF110Ka+++hI+n4/77vsur7zyEocOHeTtt5en7fmzZs0qdu3aydq1awCIRiPs2bObadPO7bH4JEnqx4bI7LghmYSO94A6295dXq+XMWPGsmnTBtatW83HPvafvPrqS1RXV/HRj36Am2++jXnzFpCTk8uuXTvaPdeybD7xiU+zePFFADQ2NuLxeHo0PkmS+i/HNDMdQp/o1eG4cDjMNddcw+HDqd1Dly9fzrXXXstll13GAw880JunPi3hz+1S+9m46KJL+M1vfsH48ZPaNq5zu92UlZVz++3vYeLESbz55msd7g301FP/xjRNIpEIn/jEh9iy5Z0ej0+S+gudJIYTR4jBX7SzMxx7aPSEei0Jbdy4kTvvvJP9+/cDEIvF+OpXv8qvfvUrnnvuOTZv3swbb7zRW6c/LWP2zaAZ7Rs1I9XewxYuPJ9du3Zw8cWXtrXpuo5t29x996188IN3M3z4CCoq2k/LvuGGWygvL+cDH7iLD3/4Hq666tq2CQ6SNJg4VhJX/U7CT/+Qpke+ibLnDXSrNdNhZd4Q6Qn12nDcww8/zL333ssXv/hFADZt2sTw4cMpL0/tY3PttdfywgsvsHjx4t4K4ZSO3/fprdlxxcUlPPpoai2Q1+vl1VeXtX3va1+7D4Cbb769w+cefx7AZz/73z0SjyT1Z/GKPdQ++n0gtXdO4yt/JHQpiFGL07awH1JsmYTOyve+136hZnV1Nfn5+W1fFxQUUFVV1VunPyNj7IIenYQgSVLXCQHRQ1s5noCOC695jqyRc0ngykxg/YCcHdfDbNtuN9brOE63x35P3qWvulpB0/rXTPP+Fk9HurrLYybIGHtGf4+x+UD6FvSqN4g/y49quDMQUcf6+ufo0hXy+vnvrif0WRIqKiqipqam7euamhoKCgq6dayTt9m1bbtfbac9ELb3BobEdsq9TcbYM7KGTULxBrEjzcdaBMEFt1LflAT6R28gE9t7x6Pxfv+766x+sb33tGnT2LdvHwcOHKCsrIxnnnmGm2/uiYkAAsexEaL/9z76gyE9xi71S0Z+Obm3fhPz6A7seASjZDyJQNm7R+iGHsc682MGgT5LQi6Xix/84Ad86lOfIh6Ps3jxYq644oqzPq5huGlsrCUQyEZVNTm98zQcx6G1tRmPp/8McUgSQMyVB8PzEAJiDjIBAQyRC8ZeT0JLlixp+/f8+fN56qmnevT42dn5hMNN1NdXpa21yQRFUbDt/jscp2kGY8aMpLExlulQJCnNEPnc7RTH6b+fIz1pwFdMEEIQCIQIBEKZDgUYGGPwuq4DMglJUr/Wjy9me5K8kSJJktQvDY1uoUxCkiRJ/dEQGZuUSUiSJEnKGJmEJEmS+qOh0RGSSUiSJEnKHJmEJEmSpIyRSUiSJEnKGJmEJEmSpIyRSUiSJKk/GiIVyGQSkiRJkjJGJiFJkiQpY2QSkiRJ6o+GyI4AMglJkiRJGSOTkCRJvUZRBC47jMsOoyhD48q+xwyRjToH/FYOkiT1T5odw9n7NvXLHgUcggtuQR85l6TiyXRoA8JQ2S16aLxKSZL6nFK9g8ZX/oQdbcGOhml89c+Iqu2ZDmvgkElIkiSpezRNIbL1rbT2yOY30DT5sdMpQ2T0Ur4bJEnqcbbtoOWWpLVruaXY9hApD322hJrpCPqETEKSJPU423Zwj1uA4va1tSkuL+6Ji2QS6ixlaCQhOTFBkqReEfcWkXP7t7DrDgKg5g4j7s4bMvvknDVlaPQRZBKSJKlXOA7EXXlQkgdAEmQC6gIxRHpCQyPVSpIkDTTynpAkSZKUMapMQpIkSVKmKEPjbolMQpIkSf2QkElIkiRJyhg5MUGSJEnKFDFEpmgPjVcpSZI00MiekCRJkpQxsoCpJEmSlDEyCUmSJEkZI7f3liRJkjJFDJG9HGQSkiRJ6o9kT0iSJEnKGGdoVHuVSUiSJKkfcoZIyXGZhCRJkvoj2850BH0iI0noySef5Oqrr+bqq6/mhz/8YSZCkCRJ6udkEuoV0WiU733vezz44IM8+eSTrFmzhuXLl/d1GJIkSf2bZWU6gj7R50nIsixs2yYajWKaJqZp4nK5+joMSZKkfs2xh0YS6vNa4X6/n8985jNceeWVeDweZs+ezYwZM/o6DEmSpP7NTmY6gj4hHKdv5wFu376dL3/5y/zhD38gEAjwhS98galTp/LhD3+4L8OQJEnq1xqWP0b2gpszHUav6/Oe0NKlS5k/fz65ubkA3HTTTfzjH//oUhKqqWnprfDOWn5+oF/HBzLGniJj7BlDJcb8/ECXHh9tacXs5z+Xzjrda+/ze0ITJkxg+fLlRCIRHMdhyZIlTJkypa/DkCRJ6tccM5HpEPpEn/eEFi1axNatW7npppvQdZ0pU6bwkY98pK/DkCRJ6t+S8UxH0Ccyson5Rz7yEZl4JEmSTsMxY5kOoU/IigmSJEn9UUL2hCRJGkBUTLRwJXZTNYovhJVVCnTtZrjUfwyVnpBMQpI0CAjhIPa/Te2Lv2tr8597OdZFd2cwKumsJKOZjqBPyOE4SRoEjEQDja/+pV1beP2LJGsPZSgi6Ww5iaHRE5JJSJIGg0Sswym9ViycgWCkHuHYONbgr5ogk5AkDQKONxs9r7Rdm9Bd6KGiDEUknS2he3CGwEWETEKSNAgkhIfsqz6Nq3wiAHpuCXk3fxnjXYlJGkB015BIQnJigiQNElF3Ib4rP08wGcbW3MSER86NG8BSPaHBUbbndGQSkqRBJIlOUs/OdBhSTzA8ONHmTEfR6+RwnCRJUn9kuGUSkiRJkjJD6B7sSGOmw+h1MglJkiT1Q8Lw4sgkJEmSJGWCcPlwWhsyHUavk0lIkiSpH3IMD3ZrY6bD6HUyCUmSJPVDlu7HiciekCRJkpQBCXSwLZzE4C5kKpOQJElSP5SwQHizsMN1mQ6lV8kkJEmS1A8lLAfhycKRSUiSJEnqa4mkg+IJyp6QJEmS1PfipgOeAHZTdaZD6VUyCUmS1GsUxcFltWAwNDZo60lx004Nx7XUZDqUXiULmEpSBgghMBJ1OC21CHcA01uAhZrpsHqUy2wktv55mt95DTWQS+ii95HMG4ftyGvfzkiYoHizSB7cmOlQepVMQpKUAa6mPdQ+/qNjWzgLshbeijbpEkxhZDq0HqEqDrH1zxNe/yIAZkMltY//iPy7vkPMJ/c46oyE6SC8IeyW2kyH0qvkJYkk9TGXE6Hh+d8cS0AADk3LHkYNV2Y0rp6kJcO0vvNa+0bHxqo/kpmABqCE5YDhBdvEibdmOpxeI5OQJPW1RASzg5vN9iCqE+YoOqo/J61dGJ4MRDMwJUwbIQTCl409iO8LySQkDQqKIlDVnn07q6qCEKJHjwnguPzoHWy7rQTyevxcmZIQbkIXvx848fPTi0YickdkKqQBJ2Gl/l/xhrCbB28SkveEpAFNCHBFKontWoXVUodn/ALs3FFndW8llrTYfrCJNTuqGFceYvrYfILunvtTSeAmdPknqH/qp1gt9QhVJ3Tx+zB9RT12jv4gmTeO/Lu+g9VwBGF4EbnDiWtyw/HOSloOAMKThd1cleFoeo9MQtKA5opWUfvQt9vqa0W2vEnOtZ9BlJyL43TjgAKeXr6f51ccAGD5pkqGFR7my/fMxK31XE8r5isl+/ZvQ2s9wuUj4crF7k68/ZjtKMT8ZeAvy3QoA5J17A0hfCHsxsGbhORwnDSgmVV70go8tix7GN2Jd+t4ja1JXlx5oF3bwaoWKusj3Y7xVOKKn3hgGDFj8CUg6eyZx3tC3mzspqMZjqb3yCQkDWy2ndbk2DbQvU915xTPdGSWkPqYeeytLXzZOM2Dt2qCTELSgKYVjUFo7e//BOfdSFK4u3W8kFdn8bnth48KcjwU53q7HaMkdcfx6x7hCeIkWnGSg7PqhLwnJA1ocW8Rebd/k8jGl7Fa6vBNuwS7cEL37geRmst1ywWjGV2axbJ3Kpg0IodF00rw6IOrmoHU/1nHe0JCIHy52E1VqHnDMxtUL5BJSBrQHAdivjL08z6IwbEFfmfJa6gsOqeI86cW4Thgy6E4KQPsk66kFH8OdmOlTEKS1F9ZPZB8TuY4DpbVo4eUpC45+f0nfNlYDRXomQun18h7QpIkSf2QdVJPSPhzsesPZzCa3iOTkCRJUj9kntQTUgJ52I2Ds+6eTEKSJEn9UNI6sfxA+HJwwnU4ZiKDEfWOjCShJUuWcNNNN3HllVfy3e9+NxMhSFKvM+wIRvMBXOHDaN1cPCsNXXHzpOE4VUvNkGuoyGBEvaPPJyYcOnSIe++9l0ceeYTc3Fze97738cYbb7B48eK+DkWSeo07UUvjsz8nWXMQAM/4eXgXvYeEKmunSZ1zchICULIKsGr3o+aPyExAvaTPe0Ivv/wyV111FUVFRei6zgMPPMC0adP6OgxJ6jSXFcao3Y5etQl3oo4zFdZWFIi9s6QtAQFEd6zEqdrZy5FKg0nCdNpK9wAowQKsmn0ZjKh39HlP6MCBA+i6zsc+9jEqKyu54IIL+OxnP9ulY+Tn9++ryf4eHwydGB3HwUnGUYzuVVBINhzl6L9/TLI6VU9OGB6K33Mv7pKxp4zRirVSsT99S2azai/50y7oVhxnY6j8rntbX8foc6tgGIQCqYnZibKRtKx/eUD8rLqiz5OQZVmsWbOGBx98EK/Xy8c//nGeeOIJbrrppk4fo6ampRcjPDv5+YF+HR8MnRjdiTpiW98gvn8j7jGzcI1fSExP32jtdLSDm9sSEICTiNKw9HHcF32M7LysDmNUFHCPOpdkXfvZTFrh6D7/uQ+V33Vv64kYu5o8fIZgb0UYrdAFgKNkkairoLqiFqG7ziqWvna6197nw3F5eXnMnz+fnJwc3G43l1xyCZs2berrMKRBzmVHaHzmZ7SseopE9QGalz9G88v/16Xq2ooiMBvTqxcnaw6gOMlTPs+2wT35QozCkW1t3knnIQrGde1FSENa0KNS1XxinrZQNZSsIqyavRmMquf1eU/owgsv5Etf+hLNzc34fD7eeustLr744r4OQxrknJYqkrUH27XFD28n0FrT6f1tbNvBVTqed1//+iafj6mcfngvZuQSuO6LiHANKBq2L5/EoFzvLvWWkEdwuKH9lGwluwSrYjtaycQMRdXz+rwnNG3aND784Q9z1113cdVVV1FSUsLNN9/c12FIg5xQTlFw9FTtp2DljCR08fsRhhuEgm/KhejjF3WqnlxCeIgHhhH3lZCUCUjqoly/xoHa9j1uJaccq2JrhiLqHRmpHXfLLbdwyy23ZOLU0iDlsiMQrgbNheXNx/YX4B45ndi+DW2P8U5chOXN69JxTeFCjLmA3OHnIhyLpBEi7sg13lLvyw+oHG5IYloOmpqakqnklpFY/xSOGUdoA+u+0KnIAqbSgOeJV1P/9AOY9ZUA+M+9DGPGDfgv/ACe8dtJVOzAVT4JUTS+W0NijgNxLevYFyfarXgEV7wGNBdJPUtW25Z6lEsV5PpV9tUmGVuY2jNLaK7UfaHKnWjlUzIcYc+QSUga0FRhE179VFsCAgivf4nckdOJ502CYXPRR84j2cNVtt2JWqoe/hOxg1tQ3H5CF38AUTodC7nvkNRzynN0Nh+JtSUhACV3OOahTYMmCXV6XKGpqYlwONybsUhSl2lWjNj+9NmVZt2RtkWlPb3NgyZMwkv/SezgFgDsWJj6Z/8XPVx5hmdKUteMzNPZcKj9jE61YBTmwfR1aAPVGZPQ3r17ufnmm5k/fz5z587l7rvvpqJi8NUvkgYmU3XjHp5+RajllHR7d9Uz0ZJhorvXprVbjTIJST2rJKTRErOobjbb2kRWIU4iit2UvnxgIDpjEvrKV77CrbfeysaNG1m/fj2XX345X/va1/oiNkk6I8tR8M25Hi2rsK3NN+1inNyRp3nW2bFVF1p2cVq78Gb12jmloUkRgjEFBqv3R9vahBCohaNJ7l+fwch6zhmTUDQa5Y477kDXdQzD4J577qG2trYvYpOkTom5Csi69V5yb7+XvPfcjzH3DpKKt9fOlxAesi/9ECgnbql6x8+DUOfWH0lSV4wvMnh7bxTnpK69WjAGc396b3wgOuPEhFGjRrFu3TpmzJgBwM6dOykrk39sUv+SULyQ1Xu9n3eLZ4+m7EM/IlJ9GMXlww6WkFA8fXZ+aegoDWlEkw6H6k2G5aZmdyp5w7E3PIMdaUIZ4D3wMyahiooK7rnnHsaPH4+maWzdupX8/HyuvfZaAJ5++uleD1KS+hvHERgFw2kSXatFJ0ldJYRgYrHBij0RhuWmEo5QtdQEhQPrMSZekNkAz9IZk9AXvvCFvohDkiRJOoWJxS4eWdPCLbOCqEpq2qdaOBZzz9uDNwnt2bOH0aNH4/P5Ovz+5MmTey0oSRpKFAWiSRtdVTKz1bHU7+X4VIIehe2VCSaXpiolKAWjSLzzIk4sjHD7Mxxh950yCf3oRz/it7/9LbfeeivFxcXtbopFo1FWrFjRJwFKZ6aqAgHtNsCSOiYE6HYUYSWw9ABmhkvwtCYs9lY0U9MYxePSGFUapDDL3a4ygyQBjCs0eHtftC0JCc1AzRuBeWA9+vjzMhxd950yCf3whz+ksbGR0aNH8+CDD+I4DkIIkskkd999d1/GKJ2CwMHVcpDW9S9ix8L4zr0MO388piyW2SEhHIy6nTS+/EfM5hq8E+bjm3dzl/YYips2h2rCHK2LUF4UoTjkxtC6l8iEAodqwvzr5Z1U1rUCsGh6CTecN4ocn3GGZ0tDzbhCg7+tbMK0HbRjQ3JK0ViSu1cOziT0X//1XyxbtgwhBPPnz29rV1WVyy+/vE+Ck07P1XqEmoe+DXZqz5HY/k3kXvc5KJLbpXfE1XqUmsd+CI4NQGTbMhzbxH3BRzCdM5fbsRyHx9/cy8urTmwRcd15o7h+4QjOsON3h6JJm9fXHWlLQABLN1Qwa0KhTEJSmoBbIeRV2Xk0waSSVG9ILRxNcvPLOPFWhKvjWyf93Skv4f7whz+wfft2brjhBrZt29b23+bNm/npT3/alzFKHRBCkDjwTlsCOq5l9VPowjzFs4Y2q7GyLQEdF92xCi3RuR0za5vj7RIQwNNL99IQ7vxGeSezbdh5oCGtvao+0lZySJJONiJX553DsbavheZCzRuOuX9dBqM6O2ccR/j+97/fF3FI3aGk//qEokG3rsv7H8NuxdWwG6N+Jy7rRKLQSOIKH8Ko2YorXoMQp7+BIgS0xEwSHWxEp/pDJIXG/upW9lWFiSbtDo6QkkhaaW2OA4loBM2J42o5gFG7FXeiDnEsi5zqNQD4XAqTR6UPBZYV+DF7uCK3EOAyGzFqt+Nq2ofuRNO+3xhJsrOimcqGWI+cX3MSuMKHO/17ks5sRK7O1sp3bXRXNI7knrczFNHZk1W0ByjHcXANm4LQHsMxT7wpA3NvIN6JoaX+LlFfSfNTPyFZvR8ALVRA9vVfxDQCmBuepmH1MwAIVSf3pv8mkTPulLXiqprifOePb3Pp1BAXlU7GObLl2HcEWRd/gO89sovtx3okRblevnj3TEKe9PtqhUGNvJCb2sYTV6IleR7yRCOJ1c/RuuHl1FF1F3k3fwXHk0XTsz9Lew0xV2pPI8eGmy4Yw54jTRytiwBwwYwy3t58lIraVi6cVozSQ10iV2sldY99DzuaKkLsHj0D3wUfJKH6EQL2VbXy/b+uJmmmkvC1i0ZyzYIR6Er3zq85CcxNz9Lw9pOpBlUj74YvkMib0Gs1/YaCgqBKXatFa9zG50pdhA70ITk5I3QAi3lLyLv9XgKzr8E35QLybvs6Zt7YTId11oSAyK41bR/eAGZjNbHtb6FFamk5loAAHCtJwwu/QbdaOzgS2MDfX9xOa8zk36tqec1zOeGFn8R/2cfJf893Wd1S2JaAAI7WRVi2sRKlgw9fnwFfvCKXOeNC+Dw68ydm85nzXHjjNbRueOVETMk4zW/+Hevghg5fw8nHzvbqfOW9s7j7igncdfl46ppivLnhCH9/YTu1Le2veLtLExYtKx5pS0AAsT3roHZf6t+mza8e39iWgACeXrqPqoZo2rE6S22ppOV4AgKwTBpe/A2GKSvxnw1VERQFVfbXndhxdaAPyckkNMDFfKUoM29FX/RB4qExWIOgc6soCrEju9LaE4e3gZV+/8VqqUckO/7ATJg2e440tX39xKpavvZ0CyuiIzGzylmzoz7tOVv213V4TyaJi9IgvNf7Bt9bUM979Jfxb3oY1Rfi3XOqHTNBoqKD13BoK8q7hqWicZO/vbCdf7y4g3f2nKjL2NTNe03vplgxEpW709rNpiqEgGjcate7O67xLM5vR5vS2qxwIyQj3T6mlJIf0DhU/65tv4vGkdy9MkMRnR2ZhAYB23awrFPfyxhoLMvGN2ZGWrtn/Hww0guTGgUjsV2BDo/l1lVmTyxMay/N82GaNjMmFKR9b8GUEuxT/DiTuWPJOe9WcvLzyJ11BaEbvoStpd9rUvy5uEed2+FrsOz2GS7oNcgPta87p6mCvFDP1KKzNC/esXPS2rW8chwH/B6N0aXp9ccKsrtfBFYJ5PPue5N6XjmOO9jtY0opOT6Fisb2k4/UwjFY1Xuwo80Ziqr7ZBKS+iXPyKn4z70stZgG8E5ciDZiJgl3PjlX/yfCSH1A67klhC7/CElcHR/Icbjx/NFMGJ6derym8N4rJ1Cam/qAnTgsm8vnDuf4CNn500uYMTav3eLsk9mOQsxXhjliIf7JC4nr2Zj+ErIv/w+EnopBzx9G8Lw7oGhi+msYOTPt2G5N4XN3ntuWiPwenf+6aybZ3p5Z72XZAs+5V+Eqn5RqUDWyzrsDOzQCAE0IPnbjFErzU/cTPC6NT982jfxg96eJJ70F5F77KYQr9XPWsosJXflxEqQnbKlrsr0qR5vaJyGhGagFo0nuWZWhqLpPOKf6a+vHamo6N6U2E/LzA/06Phg4MdbXNqHF6sBxsNw5mMeGGhVFoMcbIBnB8WSTEGfuMZgONLUmMDSFoEdPSwSNkSS245DtMzo9t/Dkn6MQYCQaIBk7FlPqw1YVNlq0Dmj/GjoSt2yaWhP43Dp+l3bKRNhZLieCYbaQwCBhZKM5CZRoPULVSbhysJ32rzRpOTS2JvC4VIIeHfssZ8gpikBPNEDi9L+ngfJ+PNsY8/M77q2fyso3VhGLpQ+JNkctHlrdwk9ua9/Dt6r3YO5dg++m+84iyt5xutc+8G8gSIOW5ShYrvy0dtt2iOsh0EOdOo5GElfDPpQdy1F82bjGziHuLW43Syt0lr0Ox4G4ns27i1VYjoLlTn8NHXGpCgXBVPJSFIED2N0sxeSOVNDwzM8wG6sQupvsSz+EVTaDpKfoWMDpz9FVQX4w1Zs72wR0/BhxLQRa6KyPJZ3gdymE4zaW7bQVMwVQ8kbibHoRq+EIanZpBiPsGjkcJw16ytGt1D56P63vvE7LyieofehbuKJVmQ6rQ5btsKuymQce3shvn9rK4fpIl5d9GU6Mxhd/g9mYeo1OMkb9c7/CiFT3QsRSX1MUgddQaI62v3EpFAW17ByS217PTGDdJJOQNKjpxGle/mi7NicRw6xMn7nWH+yubOH7f1nDhl01rHinkvt+t5LKhvSZa6cj4i0kaw6+q9XBapZJaLAIuAWN0fTF0+qwaSR3LcdJ9szMyr4gk5A0qAnAsdLLGDl2+h9wximCp97a267JdmDt9uoO1y2dimN4UQPplRgUX+hsI5T6Cb9LoTGSPoVT8Wah5pSR3Lk0A1F1j0xC0qCWFC6C825s36hq6MW9t6hXF0l04l2u/yYc0LX0J2lq1w6UVH1kX/FxhHZidltg3o1Y/uKuBST1Wz6XQkOk4wspdeQsEhuf658XWh2QExOkQc1xwCmdRu51n6V1w0so/hx80y8n7ivp8T17VEy02p00L3sEJxknMOd6nLJpmB3UrOs4VofrzxvNpt11bW2aqjBjfEGXJgo4DiRyxpJ79/dRI3VYug/LVyC3+BhEAm6FupaOCxWrOWWYbj/m7hXo4xb1cWRdJ5OQNOiZiguKpuO5ejoOgpjl9MqmcVrjAWof/1Hb1/Uv/Jqcq/8TSmd1+hjDC3zc9+G5LNtUgcelMf+cYgqzXF2ut+Y4EDdyyS8d0e+nP0tdF/K0L93zbtrYhcRXP442ei5C7d8XHzIJSUOGaUFvbVmqKIL4nrVp7eF1L+Avn0HS7tzItyIEw/J8jLx0PI7jYNuOLPgppcnxK6zce+otW9TccqxALol3XsI1/eo+jKzr5D0hacjQieMKH8LVegSdU19FdofjgOJNL0mj+rJwurG1hmXZPbJWRxqccrwqjVGL2Gm2HtEmXEBi47PYrel7VvUnMglJPS5pO1Q0RNlXFSbSwR483aU7MVzNBzDqd6XtzXMmXrMe9q0kvuU16h76FtE3/oBh9lydLcdxMEZMbytTA4Ci4p91LabdM9sxSNJxiiIoDGrsrz31xZTiz0EbNp3Y0r/2YWRdJ4fjpB4VTVr84+VdLNtUAUAo4OJr75tNrv/stqs2rDCRpX8juiNVKVj1h8i96ctE3UWnfZ4Q4G45RN3zv8JsqEQLFZJz4d00vPUw7pHbYNjcs4rrZHFvEXm330eycgckE+il44n7Bs7KdWlgKQlpbD8aZ0LxKeomAtqY+cSX/pXE7pUYY+b1YXSdJ3tCUo86WN3aloAAGlviPLJkF51d9q8oAkXt4G1Zt68tAUFqW4CWlU+gidP3tAyzmdp//xizoRIAs7GKhrceJjD9YuKHtqB2dK7TEAISlkNrwiJhtj+340DMXYA18jzs8RcT85V1ayhOkjpjeI7O5iOnX5QqVA1j2pXEl/8NO1x32sdmiuwJST3qaF365nLbDzQQMy3c2uk/8JujJsveqWDT7jrmTylmxrh8/C4VIcBsOJr2+MSRHfisOKZy6i0HnHAddqT9sJsdCyNUDVfZRKxT7dnQ0bGAXRUt/P7JzdQ2RVk0rZRbLhxNwJX+ZyQnE0i9rTRbo2aTRX2rRY7v1LspK6FitJGziL7yK7zXfQWh9K+PfdkTknpUeUF6tdyZEwpw66ffcjxu2vz0X+t4ZMludhxs4M/PbuXPz2/DIvWBruWWpz3HPXoGpnr6CtrC7QflXecWCmogB7V0UpeSRW1znB/+dTU1jVEcB97acIRHX9vd5dpuktQTVEUwpsBg1b4z74CrjZoDQhBf+VAfRNY1MglJPaos38ctF45pKzMzpiyL6xaNBMdBCHCZzRgNu3FFK1E5McW0qiHKoar2Wz+v215NfXOMysYoB5Ry3Fd+rm1vHqN4NN4ZV2M5p88ASXcuoQvubtcWWnQbYthM4lr6Rm6nU1HbyrsnrC3bWEFrfGCsTJcGn0klBkt3Rc647YcQAmPaVZj71pLYuayPouuc/tUvkwY8QxVcOXcYC6YUE09a5AbdHK9E4wofpu7xH2JHWwBBYN4NaFOuwBSuU9dGq9qJO1rHy5XZHGqCT7zn54ScJhxfHrFTbWR3EttRUEcvIq9oDHZLHUogB9NfTLwb1QN8nvQ/l9wsD/oZhhklqbeUhjQcB7YfTTDxNBMUAIThwZh1A/EV/0ANFqAW9V7pqq7IaBL64Q9/SENDAz/4wQ8yGcbgJ1JDSUfrIvg8Orr77FZQt8RMDteEURRBWZ4fn6v9cJcAClwJlOgB7MMtqIFs8GbRvOwR7GgLgXMvRcsqQM8uwj66GUVzMzJUxuSROWzZVw/A4skhrp+skWtWIdwO141o5puvaazb18qic0rbhtGEACNeh11/GKHqiJwy4mr79TqW0LH85di+MqqaYtTuD5MdcFGU7UFTBIqw8UQqSdYdQagqSt4Ionpu2usuy/MzZUwu7xwrq6MI+PD1k3Gpos/vAbnMJpyGwzi2hZpdRtyVI+9DDUFCCKYPc/HC5vAZkxCktl03pl1J9KX/xXv911CyCs/4nN6WsSS0YsUKnnjiCS644IJMhTAkCHFse4C/rmlb/Dh3chHvv3ICrm5cwde3Jvj2H1fR3JoAIC/LzdfeP5ssz4nEZlhhwq/8hviBzakGRSP34nvwjZuFkV9G7NBW9Oxial/4LXY0NQSnZuXzxeu+yNL9JVjRMHNjS4m9soTGY8fMXnwH188ezTt7alk8rRjTTE0ocLVWUPvwd3ASqXFxPbeMrOv+K7XBXLufg+DtbdX87snNbW03XTCGq+YNw9u4h+qn/7dtAoMayCH3pi8RdbX/A3XrCh+/fgqHa8O0Rk2GFQcJebQ+//B3J+po+PePMRtTkzUUt4/cW79BzHP66erS4DSpxMXbe2Psr00wIu/MSyHUgtE4Y+cTee7HeG/4JoonfZF1X8rIOEJjYyMPPPAAH/vYxzJx+iElbjr83783t1t9//aWoxypi3T5WKoqeG3dkbYEBFDbFGPN9hrEySWjGw6eSEAAtknzupcwm2txFY/CMU3iR/e2JSAAq6kG59BGFk4u5NIRcWKbl7Q7d+PKpxiTrzF9hA/l0FpcZiOq4hBe83RbAgJI1h3GqtieFntjJMmfn93aru3x13fT0NxK67Zl7WbQWS31xHev7nCI0K0rjCkOMm1UDuOGZff5nAQhIHnonbYEBGDHWolseJEuzjaXBglNEcwa4eaJ9Z1fwK0Nn45aPJ7ocz9u9/eTCRnpCX3zm9/kc5/7HJWVld16flf3au9r/Sm+ytpWahrT32TRuNnlOC3LZu+RprT2g1XN5OWNb/u6pSJ9mnaysQoUDYSKFsxt9yHadvy6QxTk+glXpa8Cd+IRHDPB+MRu6p5+BKNwJAU3/zetaZu3gd10NO211R+oJ2mmT8dujZkEG9J3WU3WHKQo15/W/m6Z+F3XrDqS1pas2U9+0EDR04dk+tP78VRkjOm8PhdaB1t7dGTBBBe/fqWaQy0wpfzUSxZO5sy+lObVz2G++nOK7vxGh++dvtDnSeiRRx6huLiY+fPn8/jjj3frGP25KnB+fqBfxacAsycWsnrbiQ9aISA/y93lOIWAC2aUsm1/fbv2OROL2h3LFUwfFvKOngGqhuLLJV6xi8D0S4hX7G73GNfImdTUtOAOliBUHcc6kYz0gmEUmocJr3sEgETVPmK1lXgnn0/Tm/9sdxy9dGLaa/O5VPKzPdQ0nEjIHpdGTpYH94ipxA5ta/d4z9jZZ/z5ZOp3bQyfButfbtfmnbSY+qYkjpNo197f3o8dGSoxdjWJRVrjxGKd3yF1wWgPv19SwzevzUPt5CaIztjFJDc8y6F//gDP5Z/utTVEp3vtfd6Bf+6551i2bBnXX389P//5z1myZAn3339/X4cxZCjAey4bz4zxBUCqjM6X3zubgqzO7XFzMseBKaNyuXHxaDRVwaWr3HXZeMaWth9TNv0l5F776WMFPQWeUdPxjp2FMWYecX8R2Zd8EC1URODcy0DREJpB1vl34hSkZuvE3XkU3vYVtGA+AK7S8QSmXkT4XcnGjrWij5mPf/plIBSE4SZ08fuxckamxe41NL58z0xGl6amZZfk+/jq+2YTcLtwjZqRikU9FsuCm6B4cpd/Pn3FzhtD6IL3IHQXKBqBWVejjph5xmm60uA2tkDHpQmWbE8fiTgVIRT0aVdBMkrs1d/idGHxdk8RTgbfuY8//jirVq3q8uy4/nzV1F+v6hxSs9oMTaG8OOusYhQCWmIWQoDfreF0UO1Z0wR6sgWRjIDqIqH5MR0VjTjKoXU0LX8cNbuYrFlXoPjziLoLOPn9n58foLm6CpGMgctHZNk/ad38+okYDDe5d32XuJGHKhy0RCMIlaSR1e7+V8y02XmoiTXbjzKyJItzx+WjAB5DQz9px1JDmGixOlA04u5crE4s/cnk71oIMJJN4DiYriysUxRJ7a/vx5MNlRi72hNa+caqLvWEAOpbLR5a3cw3rskn13/6BeIncyyTxOrHULJLcV/wIYTo2f7J6V67XCc0RAgg6O6ZX7fjgP/YtOx3JyAFG6NpP+F1zxNNxvHPuBIzbwyWk3q8Vr2Dmud/C4DZVEP1/k3kXvYhGFmQdp648ILhBQc8c25ADeQQ2fIWWv4wAvNvJO7KAwcsR2Adnw13UjxCgVfWHOLx1/cAsHRjJS+vOsTX3z+rXQICSDgaieOz4fpw7akiQE80gG1hubMxnc59cDgOJxbb9v3Fq9RP5fhUzh3m5q8rGvnsJTntJwydhlA1jFk3klj1KPFlf8O18J5OP/dsZTQJ3XTTTdx0002ZDEHqYUbzQWoe+g7HN4+L7d9E3k1fwsqbiKoqRLavSHtO6463CY6aTdQ59RBhXAshpl5H1jmXYasGMVs54/50zRGTp97a267taF0rFbURxhRn/ka4bkcxty2hduUTYFl4Jy3CO/cW4nrXKjlI0slmj3Dzz1XNLN0V4bxxvk4/T2gGxuybib/9EKx6FPfcW3sxyhPkpE6pxyiKILrrbd6dHcJrn0VTU3vuqIH0RaCpjd/O/FZ0HEgIN+YZdil1WS24mvZCMr3MDoDdT+6diNo9NC97BCwTcIhsfYvk7hWnrh4hSZ2gKoIrJvt4fF0LtS2n3n21I0J34ZpzC+beVcQ3PNtLEbYnk5DUozraz16oqQV0tu3gGT8XYZwoOip0F/5zLiDunN1+Q8d5YlU0PHwftQ99G/W1/+WyGe1n6uUEXJTmdf7qsLcoiiB+aEtae2TbMjSnZ3d9lYaevIDGrBFufv9WI1YXd+gVhhfXnFtJbn6ZxLbXeyfAk8h7QlKPsW0H99jZtKx5DuzUFZiaXQoL7iFqQjxhkXSXkXv3d2luimJYYXxeD/FAOSoWWrQGx0ri+POB1HCZEBBPJNGw8LoUSMZJaj4sR0EngWrFMTVvatKDsGhZ8ShWS6qsjnV0N5dN3UbJVbNZvrWWEUVBLp0zDJ/R+Ru2vcW2HfTc9A3vjKJRWEI741BjpigCdDOMo6gkFY8sFdSPzRzu5kBdmOc2hbl2eteGn4UngDHnVuIr/4XiyUIbcW4vRSmTkNQJ0aTFoepWmlrjlOT5Kc52o5zipmXcX0b+nfcR27mS1pxxrG7MpbBBYfOavby+7jB3XDqeHQfqeXtLFeUFfj5w7TBG2nESa56gYf3LgINRNArP9Z8hZvp5c/1hnlx6ALehcvf5RYw9/DQevw/3rOtYt7eFRNJkWLZKeUkutuoifmy9j1E0Ct+E+TjJGBcEjhCcUsDOWsgPutpK/mSanpWPnl9OsuYQAIo3iG/iAmL99IPdsJpJbH6FunUvongChC58L1bhZCwyn9SldEIILp/s4+9vNzGpxMXogq6NNij+HFyzbiT6xu/xej+PWjC6d+LM5BTt7urP0zkH23TTuGnz639vZtPu2ra2T906jZlj89pNhX43TVN4cfUhsoNuDlWFefLNPcycUEA8YbF574kdHnVN4UcfPAfl0S+0e35w7vUs1+fxu6faLyL92nWFFCz9MVr5Ofw9eQnLt9ajqQr33jqMUSNLaX3zL8QPbCZr9lU0vPGvtueJ4glYi/6DrOz296R0EgjHwlS9p309HTmb37WmKcRe/BlaVi6aPxvHscEyiR/dh+fK/yKZ7Jkpej31fhQC2Pxs6h7WSfLu+Bbx4PCzOvZg+5s53TG6ojtTtDuyuzrBmzsj3HtdPl6j63dgrKO7SG55JVVnLpDXrRj61WJVqf8RAlyJWvSjGzCq3sFlNrZ9r6Iu0i4BAfzxma2E46e/4elEm5hp7CHL5bDmWLWGMWWhdgkIIGnaVNSm17GLxhK8uOpwWvs7R23UYB7moc3MLk915E3L5t9rm7DjEQJzbyQ48wqaVrW/qepUbifPrmn7WhE2rrrttPz7ezT+62uI7S9jWJ1f5He2LMvBKBlHy7qXaHjzIRrfeoTG5U+gF43BsvpHT+1khh2hdeMrae1m1Z4MRCN1xZgCgxF5Bn9d3tStBc1q0VjUETOJvvA/OMmzT4rvJpOQhKu1grp/fpP6p/6Huid/SsOj38WVSCWeaAfJpjWaJGmephckLJJrH0dd9kc0M0JOMDX1Op608HWwVsnrS5+a7Q5mU5STvmtqnl/BjkcQLi8tiRNDgtXNJpZiEDXycY2Z0644apuTCjUazYeofeyHJKsPYLXU0/j637D2vt1nayMcx8EYPQstp6StTQsV4Bo7r8s9sr5gCx01mH4VrHjldPKB4PyxHg41JFm2u+uFiwG0UbMRvhCxt/7c45U5ZBIa4lQFIhtexImfeHNazbWYBzchBBTletM2bZszqZBABxu8tR0zUkvrptcACNa+w3nTS9A1hTfXH+Ha89uPK8+YUEBZfhDP+HltbVqoEP/wCVw7UcE46dy5QRcTvA048QjWjFt5ct2JYqqXzSjAJSy0/ctIHNmOd/yc9kEpGmp26gNfCEhU7iRtKvm659GdvqsoHDNyCd34VXJv+Sq5N3+F0M3fIObq3nBHb0uik3Xene22StdySlAKRmUwKqmzNFVw5Tk+HlvbQk0Xp21D6v6Sfs6lWNV7SO54s2dj69GjSQOOgk2yNr0StVlXgTZekOs3+MYH5vCX57ZxuCbMwqklXL9oZNvVS2r9j2g/DdSxEboLvXQC2uG1jDXcfOWu6VQ1W+QEdO77wAyONibwew2GFwZQDRVt8Qfxzbwax0ygBgtoXPIHsqv2862r7qIiEUB3exhZ4CLUehDljvvYFQmiKDvwe3SuWTiCKWPzaXz+pyQqdgEQWnQLiuGhdftK9Oxisi68h7inEJzUeiPVlV4hW/GFcPp4Zlpc9UPOuL474VmIh0aSf9d3sOqPIDQXIm8YcS2U6bCkTso/Nm37D0sb+eIVuaecXHQqQjMwpl9D/O2H0EomogTTq5x0h0xCQ5zpKHjPuZDE0X3t2l2jziVhpT6Ny3K9fOnumSQtG6+u4jgOmpNAqd5O6/oXEJ4A/hlXEg8Mx0EQceWza/aXeGVzEwUBlUvdCsXrfsvI0jE0NCg8Wj2a5dsaABheFOCzt59LlsfA9JcD4FFieEZNx1U8mqBSR3DHo5hN1fgX3Iw55Tocx2F4CD5581TW7ahhydrDPPHmXj5zxS0Ma/o1VmsjjUsfxTX8HArf+30Sqje1FfhJyUUrHofqD2GFG1MNQiFr0W3EnJ75k1AU0S+H1c6G4whi3hLwlpz5wVK/NHO4m701Lby6rZVLJ515q5J3U4L5aKPmEHvjD3iu+XKPDF/LJDTEOQ6ow2cQmFdPePWzoOlkLboNO7f9sJkmUjO6jo8Hi6NbqHv6Z23fj+5cTeE99+MoGq/uTPD7Z/YDsA1YuV3l+x/8GIknvsyBGZ9h+UnbShw42sLLqw9y6+LR6MlmRONBrNYGrOY6EnWHie5/h5zz76Bp1TMYZZOJHzt/fTjJd/+0CtM68UH/i5eq+PYF16Cv+hsAiSM7sVBJkr5PSszIJeeWb2BV78GJR9CLxhD3l5x1L0i3o4i6PcQPbEHPK0MtnZS2y6skZYoQgksm+XhodTMzhrnJ9Xc9BWgjZxFf/jeSu5ZjjFt41jHJJCSRUP0o024gd/KFgEJSD552lbUuTMKrnmrX5p98Hq3rnqdFzeLRNe23xY4nLfYdjTDel8We2vSZXxt31XLLecOILPs7wSmLad65itjhHRiFI8m96B4aVz5J3rWfbjcVuLEl3i4BAURiJmER4PhHvn/WlSS04CkTS8zIpT7kZ822SvZuaeT8aR7GlOVgqN27ulMEWDtep+mth9ra9PxhZN34JTCTgCChZ8kFnlJGHS9y+s9VzfznRTldfr5QFPTJF5NY9TD6yFmpLUXOgkxCEpAqPh1Xs058cQqqsNHsOHpeOYmqY0N4ioaWXUDjW48gplyF3tE+07obxZfNuGEhWFPT7lvnjs/HFasDt5f61/5Gsi61c2j88HbMhkr855yPpbnaLYrMDrjQVNEuEXndGtkBN3puKb6pF6GOmkPiNB/4LdEE3/rjKloiqTI5q7ZW88GrxnP+9LLTDqWZtsOh2lZe21hBbtDD6NIgPkNFTzRSt7z9Ro3ukjEk1j9H87oXEYpG1sKbUcacR1Lp+n5OktRTZg1389cVTWyvjDOhuOtJRM0uxQqVkNjyCq7pV59VLHJ2nNRp7lgViTf/QN2/volQVXIuugemX4+68B7MxmpAoOx6i9vntb+68nt0mqNJ3HNvobx+NedPPvH9USVBLp5ZhpOMogZy2xLQcVZrM4rbj+MOtWsP+TQ+ddv0ttlzHpfGZ28/l5yxUwncfC/2uEtIqKdfHHjoaFNbAjruoSV7iMdPvRZCCHh7ezXf+eMqHnx+O//z0Hr+99GNxEwb4Vg41omZR4rbh+IJ0Lz6WbBMnGSMxtf/jqjZddq4JKm3aapgwRgPj6xt7vaUa23sAhKbnscxE2d+8OmOc1bPloYMl9VCy9J/4hk2GSOnCKHq2Ik49aULSSgeppSUYhSNAqEyL6Qy7EPncLARKutjjBuWjd8l2HAkjO2ayNXnCK4cF8L2F9Jguth9uJF4bjYloVKyz7sN5/jNTsdGMTzouWWYhzbg9gYhq4S4KxfHEcwY7uUn/zGVxpYo2QE3oaBB0hKAQWfGvBw7faqq7YDhRDEaDmA1VaMG81B9WSSrDyA0g+bQeP7+wvZ2z9l5sJGK2lb8xdn4zlnctvmeq2QssQOb084R27MWvWRatxelChxckUqs2oMo3iwUlwez4SjC5UXkDj+xzxCQbKjCOLoLJxFFzS0j4S/BduS1pwTjCw1W74vxzpE4U8u63jNXAnmooWKSO5diTLqo23HIJCR1TmsdnvIJNLz+97YmV/FoChedSyJSRe3jP25bTa14/BRdcA+rjgbJysnhSE2Yx1/bTTia6nW4DZX73jOB/3tuHweqUuuThIAvXldK6cqnyD7vVhpX/Bs7miqTIjSDnAvuouaJnxJadAvusfNJGlkk1z2Ns/oZskjt65acfQ3quTdgoaGqAsfhtMNqwwt8+Dw6rdETvaGPXl4OO16ndsUTbW2+SQuxWpuIHdhM8ryPE++gpE4iaWM6Cp45N6LlFBPZuhS9YCR2pIl4ZfuqAlre6Yf7zsTVuIeaR+5HaAbZ591G7ZIHOX7jS88fRta1/0Vcy8JlNnH08R+TrD1WeUIo5N38JeI547t9bmnwEEIwa4SbFzaHu5WEIDWpKbnlVfSJF3Z7ppy8JJI6RdWNtFI48co9uJ0Inr2vtyvnYUfDmFV7mVpgkzRtqhuibQkIIJaw2FtPWwKCVMflwaUNKFMux2yqaUtAAI6ZIHpoK0bRSJpWP4fTcATDihA7uLVdPC2rn0VEGzlY28rfX9nNU8v3U9McP+UfR1bQx7fuGsc1swuYNiaHz1xVyuxhGs0rn2z3uNaty/AMnwyA+8AyFkxuP/HC49IozvOmfiZaFky6guDN96JMvx7PtMtQ3CemwqrBPIwR53Z7CEQnSdNb/wLHxjdhLs3rX+LkmRfJmoM4dal1X07t/hMJCMCxaXrjbxj0fOkVaWAaV2hwtMmisrF724coecNxElHs2gPdjkH2hKROcTRXx6VwLAu7uTqt2W5twpVtY5o2TeH0D71IIr03UdcSx/blYdVsTfue1dKA6gmS0INsihbywhP7Cbhu4NILVLLX/gm7pRYUlV0NCt/569ttz3tm6T6++9H55AXSb75ajkpeQQF3LXah2Aks1QORBnDSh8kcOxWvdXATd1x7A/k5o3hzQwUjioPcdtFYQl69bQTQth0SpBa92t4icu/4NlbDYYRQETllxE4aLusqYSfbtqpQPUGscEN6rIlUcrfj6bXwrJZ6hJ0E5exmNEmDg6oIJhUbLN8T5eaZ6XuBnYkQArV0EsmdS1HzR3QrBtkTkjol6QrhnTCvfaOi0eJ4cMaen/Z4o3g0BxoVDF1hTFko7fvD8728ewPRS6bmoOxYgj5qRtrjPSOnEq/ez8GRN/CDf25hw65a3tpcy7f+XU3T9LsB0Ceex6Nvtq/+kDBtNu2pO2VvyHYEMS1ExCggrgbAl4cWar8RnuoPYcdPlPPJ8Thcv2gkP/v8BXzypnMoyHKd8haU40DMyCFZOJVEweR292u6I6l68U+/FIDI3g343v07EUpqDydAzS0H2r9u37RLSGpdX6QoDV7jigzWHoh1+/lqyQTMvatTleC7QSYh6YwURWCj4VtwO/5plyB0N0bBSHJu+SoHIj7qguMInP8eFG8QNZBDaPGdtGSNIa98OGPKsqioDXPnZePJCbrJ8ht84JIyCo8u5au3j6eswI/HpXHtgjIuya/BHLWQh7Z7sRZ8EDWQg+IN4ll0F1YsgmvmdTyxqn0Vbst22FKn4520iMCcG0h0cL8m2YX9g+KKl+zrP49nzEyEZuAeMZXcyz9C67YVaFn55F73WcysYTi2QyjgQvTxmh/HAW3cIoLzb8JqqUMLFRGceSXC8KDnlZJ385dIBFIVDZL+Uopu+wp6TinC5SUw9zpcky/G7n9FuqUMKgioxJMO1c1drykHoPhzwXBjdbOiutxPqIcNpr1RhIDq5jgHD1WTo0bIzc3Cm5OPjwi24iKJgRCCmGmjCPA6YXRhERU+Eo6GrgiEACNShV13gKg7H+HPw+d1oyQjoAgijoeopeJ3a2hmC1srk3zvr+sIeHUunZqNrgre2hHmG3efg9+j852/bWbnocZ2cb73ivFcOqOIpK2y5WAjP/3HurbvKYrgux+ZT1GoazdeNWGhmhFszYOJhmGFcUT73UQz+btWlOM7nGpYihstGcZRdRLvqg6Rnx+gqboWYSdJ6v5+mYAG09/MmY7RFT21n1BnPP9OmFkjPCwa6+3W85M7liJcXtwL3tPh90/32uU9IemUGlqTNBw+wMS9j2FV7EAYblj4Huxx80g6qfFjx3FwHaswkCTA8dub+rFRIHdrBTUPfxsnkeruC83Af9vXifiHpR4gwKOAZdlYwkfcagSgJZLk8ZWpe02KgDguXI7GzReO4ft/XdMWo6EpTB6VR9JOLWQdX5bFF++eyXPL9+P36lw1fwTF2e4uVykwHRVTDbTd848rx4aw+sklm22fFJMNlnrqIbaEcIHqSk0hlKQOFGZp7K1JdDsJqcXjSKx9Emf+XV2eJSeTkHRK4XCE0sMvYVXsAMBJxIi+9gc8hcOgE7tpKoogtmNZWwKC1Ey3yKZXMRZ/uMPdQ0N+F163RiR2Ymhg/pSStjsbo4sD3Puhuby54QhBr8H8KcUUZLnbZptpimBCWRaT7pyOQGBZtiyTI0lnkO9XWbWv+/eFRCAfALvuAGreiC49VyahIcbBYc/hJiprW8gOuMkLuNImCByXYySx9q9La3caj3Y6CcWba9LazZY6/NEqTD0vLUHYtsNdl01gw64aKmtbmTYuH1UIzGPrahQhGJ7v4/1XTMBxHGzb6XC6s2059JtuiyT1c9k+lZpw9+4JwbFZcsXjSO5Z3eUkJCcmDCFCwLrd9Xzuf17nhw+u5cu/WsbyLUdxTvFh7fb50PLL09qTRudmV5mmjXfsnLR2z+gZtKx9Dr2D7bTzs9wsWXuIlkiCkSVB3t5ciWlZhLztp49alt3hgk9FOLjitbia9+OymulupXlFpKoS6BXrMOp3YNjd25FSkgYCnyGIJR0Sp9kx+UzU4vGYe1Z2eQ2c7AkNIY2RJP/373fa9T7+/OxWpo7JBQSqIvC7VOKmTVVjhCCt5C++m/onftxWH0qMnMVr+1QuzmnGpavEFX/6m05AU9TEshxyCyaTfeF7aNmwBBwb/znno/lDNLz+D3znXIDhU0lwYtKAS1P4zK3T2Hqgge3767nnigmMK8uiM7lExULsX0ntK38Cy0TxBsm94QvEjt9/6iQhQK/ZTu0TP25bM+QePQPfBR8iofq6dKyBzrQdwnETr0vDOFWXWRrwhBD4XQotMatb2zsAiGAhCIFdvQe1cEynnyeT0BASjibTpitfe95oHn9jL0s3HMHQVT5+01Sam8Kco+1DW/sw1Qs/TNN5/40ercXWPCT8RaxZXsls/XUCTgvu0TNJ5ozBOvZWipk2z688wPMr9gOCS+cO44JpCyk7Lw+ScRwzTuOyx9AC2US3vEns8HZCl3+MuL+sLTkG3BrXLBrFwsmFxBIWjpP6IznTFZYWqaL2xd+1fW1Hmml49peEbruXuOj8DVfDitDw8u/bLVqN7VmH79zLYQiVvKluivOrxzdxsKqF4lwvH795GuW5HnmPbZBy64JIwiG3m89PDclNJLlzmUxCUsey/S6CPoPm1lSvpiDbQzxh8ub6VOXqWMKiuiHKWFcdytI/IkbM4O/rbdbuPnTsCFGEqOe/7piGuuIftDTXoPpCaIYPy1+OELBxTx1PLz2+S6vD88v3kx1w4R1Zhnj1fqzWhlQP5bIPUvfKX7DDDdT/+ydk3/Gd1GLRYyKxJOt31/Gvl3di2TY3XziG6aNzO94m4hi7pTatzWyqglgLeDqfhISdwGqpT2t3Yh1UjBikokmbH/19LfXNqZvVlXURfvDX1Xz/4wsIuOTHxmCkq4L4WQzHAahlk4kvexDX/DsRmtGp58h7QkOIz6Xy3++ZSX7IA8CsCYWs39l+4oBl2wTN1AdwvHgqa3c3tvu+40C8NYx1bMJBvGIXIp76cFZUhbc2tN+KAWDHgQYakjqB6RcRWngz/snnYdYfxY6k1l1YrY3Q2n4R6ta9dTzwr/VU1rVS3RDl14+/w47DTad9fYo/fYMuNZCL4+pahQBTD+AZO6t9o1BQQ8VdOs5A1tASa0tAx0ViJrVN3Z9BJfVvqhBpG0V2leLNQgkWYB7Y0PnnnNUZpQHFcaAs18NPPnM+P/rkQq5dNILhRSd6H7lZbjRVwTo28UBrrSE/25N2nIB1IhlooUIcNXVl7NgOozso0VOY48WxHRqXPUbjsseI7FqDnYyBY2MUDEfLKQHXiXstqqrw1sb0ZPbSqoOop9n11PQWErrwHhCpt7UwPORc9QmSXbyPYzoq/kV34BkzMxWPP5u86z9Pwld0hmcOHl63jvaun7UQEPB0vb6YNECInplPqpZOJrn9jU4/XvarhxjHgZygGyueWlZ60wVj2HmwkY9ekE1x63ZcTVvQxlxPvHQy9vZX+Y/Fn+OHT1W0bfc9b0I2BQ2bAFADORiFI3ACqR6CbTucP72Ut9YfofFY0dK8kJvJo3Lxe1SyL7wbIQSKN4uWDa+Sc8FdxI7sRPVno1iJtvs+juOQm5We/PI6aDuZJXTUsReQX3YOdqwF4c8lbuR0q2J1TM/FffHH8Z/Xgq0aJFTfkLoXEvLpvP/qSfz+qS1tbbdfMo4cvyx8Olg5jnPK5RpdoRaPI7nlVezWBhRf9hkfL5PQEFeY5eJ/PjiOlse/gx1pJgEkdiwn77avYwM+xcOPPjaHisYkPrfGsJDA0yBgzATUrAJMTx4JJXW/RQjI82t87s5zqaqPgBAEvTq7DzUyNuSl4bW/AeAZMY3A1MXUPvebtjjCm14j785vEfMUY9sOC6eVsHRjBVec4yfLZbOrDhbNHoZ1huECCxXLUwieY9stnEXmMB0NUz/2RzSEEhAADsybWMCo0ixqG6PkBNwUZrt75ENK6p+SFhja2f+ChaqjFo0luXslrmlXnvHxMgkNcY4DWtNh7EgzkNqozjNmBslomB2xfP748kFGlgS5afEY8oNG6p5Q4XQgNROuoSmOx50k4NKor6tFj9bxg8eqicbbL3ybVj6Z4LF/C5eL5jXPt4/DTJA8sg0xthjHgVGFPn5wlU7zK7/CjrUyrmAE2e6PEyW9BpymKdi2c1YbxUnpFCEoynJTlNW9Dc+kgSWWtPEaPXOHRi0ej7l3lUxCUicd6y14x81B9fhpXPoYODalecN438K7+NHTFWzeU8f9H5uP/9jMqJrmOD/+x1pqG2NoqsLdV05giqeKRGtLWgICaDWVE0lIKB2WfXdOqq6ZqDlI4zP/y/EuSLJ6P40v/x7/Nf9NktSsG92OIqp2ENn6JlpeOe5x84l5hs59G0nqSZGEg9/VM0lIyRuOve4pnFgY4T79xCA5MUFCyR2G4vLiKhlDy8YlbetjrNqDlFe/xfjyIOFoksq6VNUA04ZfP/EOtY2pmVKmZfPnZ7YSVXz46ncwoTzY7vguXaUoP4ucaz6F4vYT2buBwJQL2gehahilk9pGz5INR3n3GFiiYhdqPDWjTlEE9p4V1D39P0T3rKPl7Sepe/S7uBPtZ9lJknRmsWSqxqLX6JnxVqGoKLnlmEd3nPGxsickkXDnkXv7vSR3r0r/5pF3mDpsPjsONaNrqWuW1rjJ/srmtIfWWz6CB9byobnTeMQTYvWuRoYV+vnwdeeQ5XVhemeRc9dYsBJgeMkNFtK68WUUXwjf1EuI+0va8o7qTd/8TQ3kYOupoSE92UL98kfbfd+OhrHrDkFxd5fbpVNVBSEEpplebFWSBovGiE1eQO1yBezTUQIF2LUHYcTM0z5OJqFBpKOqAqeqNKAoJ4bEHAfinkKMwlHpBy0Yy86jCSaOyKYkz4eigNetMmFYiEjCpKE5TjiaZO74bCZmRQje/EWSjVV88jwfrfMCePOLsXVvWwxxLYhiiNT9m4Jz8Fw5BccRxCy7LQEJIdDyyvBPv5TwhpePBayRfflHU2VzHNqmkwrDjVB17OjxvV4chBC4zQaspmoUt5+kNx+LM08tVhTRdl9JCIErfJjo5iVYLXV4p1yMXTAOU7SfHXb8b3YozZyTBp/qFpOy7J5NB8ITwG5NX/T9bhlJQr/4xS94/vnUjenFixfzxS9+MRNhDBrueC3Jim04kRZc5ZNIBMvRky1YR3diNR7FVToBK3s4cQwO14R5/u195PsVxuarOAhiejZ7K5ooz8qheOJ5RLe9BaTWxzjTrmNWnYeSPA/Jyp34YkcI5Jbz1Ys04kcP4QQL0PNGElvxMJEnNhHz+PEvfi+rqnwI4TBBayAv2ErcU4hmRRF1e0kc3Y2eU4ZSOJY4QY5nH1eyAatyB3bjUSJZeWihfPJv+HyqbE9WEXF3/onhOi1A/jWfJH5kJ04yjpaVT3jbStS8csTR9UQPbEb1h0jGWkHR8Ey+gKjecQ8pbtrsrwqz53Aj5YUBRpcEybGqqX3oW20186J7N5Bz9X8iymZhJBqxju6k7p0KjNxSHARKdilxb0m3poNLUqYdbbKYUtrD0+9VDY79/ZxOnyeh5cuXs3TpUp544gmEEHz4wx/m5Zdf5tJLL+3rUAYFV6KWuke+3Ta7DSD/pi/Qsv4VhGOTqDtC84rHyb7io6yLjeXXj29qe1xJrocvzEuwP15Awgjx19cquHTGZYy6bCGKlaBOy+GXjx+kKRzHtBwmjwjxH+VVGBtfxTNiKpE1z+Mun0hyv5fEgdRx7WiY5hd+Rd7Cz/Odp2tT55gToXiSRmzbUppXPnEi9uHn4L/skySEB5fZROOTP8asr2j7fmjRLdS//EeCC2/BKprW7gPeiFZR++yvsdtK6QgKbvkS8Z1v07TskRPnKBmLnluCuX8D7uHTiRntE5EDPLfywEmlhmDmhAI+Ot9oS0DHNS9/jJybxtLw/M9IHj3x+Kw519K6/Amyr/wEMV9pF357kpR5juNwsD7JLTO7tvPrGSWiCNeZF4r3+cSE/Px8vvzlL2MYBrquM3r0aCoqKs78RKlDVtWedgkIwKyvxFU4HDsZwztmBqHzbqW+oZkHn9/W7nEVdVEqnVxGuRupbYyyaFoJ//fsbtbVB1jVkMP9D++irinWVspjy/5G6oITSNYeQfWmJh+4yycQ3bM+La4su77tHBWiEJqraH77yXaPiR/YjGiqBMCpP9QuAQE0r3kB36SFtKx6Gt2JtrULAckj209KQAAOTSufIFG1r90x4hW70LOLsBMRrOq9aXE2tCZ4dln756zdXk2VmT6jRwiB01jZLgEBNK9/Cd+4WcR2rkCRC2mkAaa6xUJToCirZ/skdksNSk76VjDv1uc9obFjx7b9e//+/Tz//PP885//7NIxurpXe1/ry/ia9rXfg947bg7RveuJHdwKQPzITvS8ctQ57yGWqEx7ftIWqIpFNG6iH1tvI0htqd1RHamEnfqQPT6d2myuR88uIvmuBBJXfEBL2zkcK9muKvVxmmIRzA/QUp1+499OxFB0A8Xw4A94Ud0nrqoatkfTHx8NowXSh9wcx0boLoSVSPvdNMUa6Gh5ka15ELobJ3miVlro/NsQooOp5WYSoek44UZyc7tWp64v9Pe/F5AxdsTrc6H1wOLRM1m+t4nzJgTJzu65bUocy6S6dj8F134MPfv0P7eMTUzYtWsXH/3oR/niF7/IiBEjuvTcmpqWMz8oQ/LzA30an7tgVKpW2rEPeKNwOI1vPdLuMcnaQ+ToCS6fN5xnl+1va3cZKsVGmGorm+HFQTbvrWfSyBw27a7l3HH5TB2Tx6bdJypTZ/kNCp1ahO5um0XTun0FORfeTd2rfwU7tT5IlE9n1VEX0IJLVynWW1CCw3GPmEps/4nhQNWXhektoKamBXewGKG1HwLzT15IZPc6gotup77FhpYTP1d3yURAcPI07sCsq4gePFFmBkDxBjFyy2lc+W+yLv5Q2u/GZ6hMHJHDtv0nbqDmZbnJy8ki+/ZvEtu+DCtcj2fS+Zi5o9GSLQiXFyd+YpM779hZRPdvJjDvpn733uzr92N3DJUYu5rEIq1xYrH4mR94FpKWw8aDEb56dR6NjT23caN56B1EqIRG0wc1Lad97RlJQmvXruXTn/40X/3qV7n66qszEcKgkfCXkn/b12he8Th2uAE9f0SHj9PcPkaVhrjjkrG8sb6C0lwXN8wM4dJMNHLZtbGS6voI77t8NKt31tMUjnPD4lGUFfhZu72asWVBrpkgCBxYgv+6TxGrOoAWKkTNH4GZPYzsi+5B0XQcd5BdkSCrXq5k1thsbpyZxfBsh5i7kMCFH8BzeDNWUxWOZeOevJiYlpqKHfcUknf7N2hZ+QRmfQX+iQtQc0pwT1qMmZW+KV0iUEb+rV+lecVj2NEW/LOuhvJz8ZVMQgvmE92xEqNoFP5zzie8822yFt9N3FeS/nNRBB+94RxeXXOIVVurmDwqh6vnj8SjK8T0EpRZt6EpgsSxfZgsVy55t36d8Kp/k6w+gGf0uWiBXJRQMWb2yJ77xUpSH9h8JM7ofIOCQM+lAicZx9yxFM8Vn+nU44XTx9N5KisrufHGG3nggQeYP39+t47Rn6+aMnVVpwsLHBOESvTNPxLdtrzte67yibgv/hh7d+4jqCdxsssxhUFlU5KSXC/FNW9jBQpwkkka/CN54IndjCnLoqo+QlGuh4tnlTOywINqx1EAE5WYbeByooh4C+Gl/0CoOv4ZV2Bmj8IgTtTR0LFBqCRs9diU50O0rHgMs/Yw3nPOw5iwmLjWfj2QptgodpKs/Dzq6sJnLMWTet0WpjDaZs4pCmhWDFt1IewECI2ko572OEIIEpaNoSqdmuGmCZugVyEcNXEcSDr9c7XDUOll9LZM9IRWvrGqV3tCScvhz8ua+M+LshmR17m9f87EcRyS655EyS7Bveh9be39qif0hz/8gXg8zg9+8IO2tjvuuIM777yzr0MZVFIfsio44J1/B+7hU4nv34irbCJa+TlEt71O7kkz0/S5d/HY6iB3z/HgWfm3tvaDc7/MkZowR2pSN/23H2jgSE2ED1wziWjcIj/oJuDRETgkhAfcHjyXfzY11dtywAbzWH23BLSNlrliNdQ+/B2cZOqPqnn54/haGtAX3IPlnJgfY9oKkJoq2placCe/7uNsGxLCDTapY3XiMstxHHTlzLu3tsXpKKjeAInW/v3hKUmnsvZAjNEFeo8mIHP7GzhmAtf8uzr9vD5PQl//+tf5+te/3tenHVISWhAxfB7GyAVYto3aWkH4pAQEoKx9lA9e9FXyWk/MGFPKziEnP5c7LvWzcVct2/bXc87oXIYVBvjab5anynq4Nb783tmU5ZzYVkGYMdTmStR4BBEqJOHKT/swtxqOtCWg41o3v0H+rOuw9DOXe5ckqec0RizWH4zx9WvyeuR4juNg7ngLu/4Q3mu/ilA7v+9U/xxDkM6a44BlHauI8O5tqYfPpKL0YtbujVPgKWTqoo9htFbzZM1wXnvwHQAWTi3hsrnDKcj28PArO7ni3HzKczRW7I7wq0c3ct+H5mCoCrodIbbs70S2LQNAaAZ5t3yVWHBEu1N2tNWvMNw4yumHySRJ6lmO4/DqtgiXT/aR5z/7FOBYJsl3XsKJteC55stnLFj6bjIJDQEikI8wPDiJKIo3yOb8y/jtMyd2Ln3W7+KTNyxgyasnZq4t3VjBh6+bTH7IxXevz8a78V9YByqZMmo2+yZfSCRuYXgVRMOhtgQEqS0ZGl/9I8Ebv0aCEyuwRXYZev5wkjUH2tpC599JUs+iwznSkiT1ivWH4jjAZZPPfjmBHWkiuf4pRFYR3uu+gtC6XnVBJqFBKGbaNLUm8Ht0Am6NhJFDwY3/Rd1Lv8ccMZ9/La9p9/im1jiKcPjCFXkkHZVn32lld0WY9Ttq+MxluTQ8979Yx6Zf23veZrQZw3vOp3AAJ9IIgNDd+KcsRvX4cSwT3YmTOKnOWlwNkHXN57CrdmM1V6MXj8UKDcdxwGU2QTwM7iAJLdAnddiEELTETFpjSbJ8Bh5dkfXfpEGvusVk1d4oX74qD/UsF1abFdtIblmCMf1qjKlXdLv4qUxCg4SiCDQVGsIJvvfXNRyti+J1a3zi+vFMGZFFvL4Sd/lEEuWTSC6rZN64LK4Yr+CzmsnKy0WpWELruudBUfnUlCv5d85oirMFHN3etv7nOPPARlzJJmJGLpo/G6G7yLngLhpXPonVUodwedHzh6GUnIt90qSDuB6CslkIIYg7DkI4GDVbqX/ul9ixMKovRM41nyYe6qCQag/bdqiRXz66kdaYSU7QzefvPJfSHI9MRNKgFU/aPLspzB1zghQGu//R78QjJLcuwQ7X4r3y86gFZ/f3KvcTGgTciTqMyvVEX/pfjNd/xrcu05k9JkgkZvL/HtlKbVUtdrie8KbXsJc/yPsvH80dpfvJXvYzjJV/IvrMT1CwMIpGgW3Bxme4aqzNucF6SKZPEVU8fhw1dY/HcfvJu/JjNL79FFZLai8fJx6h9plfYkRr054LtE1acMXrqXvqgbbyO1ZrI3VPPYBhpm8T0ZMaIkn+3z/X0RpLJdf65hj/75/riSbTqyFI0mDgOA4vbmllcqmLuaO83T6GeegdYm/+CSWnFN/N3znrBAQyCQ14LqsZ68gWap7+XyK71hA7sJnYi7/gvdMsXIaKbTscrarDXTIGANXlZlwogdj4TLvjtKx/Bd/YWW1f58YO4t7wCMm6I7jLJ7V7bOiiD5DQUrXjLE8udiKK1fyuhOPY2C3th/3ezYpHsKZdhzJ2IRyboGBHW6C1oVs/i86qbYqmlSSqb47R1Hrmir+SNBCt2BvFtOHO2en7dHWG3VxNYuW/sA6/g/eqL+Cef1eHk426Qw7HDXTN1STrjqR6MCdRt77I7LHXs3RLHSHdxIq0knv5f9Cy6TWURBjn3YtnHLvdltua4cKOhQlveYvAjMsJzLoKKxZBCRVhBk5sWZDEhbdoDIrHjx1tPwtPeEOnDLslZvL0qijLNgUpzy/k7sXnkbPq19iJGLh7t0ZXyOdCiPZ7APncGn5356eVStJAseNonO2VCb52dR6a2rX7Nk4iSnLXMqyKHbhm3Yg+8UKE0rN9F9kTGugU0eGbQqgapuVw13mFFKhNWJEwDUsfxSgYTjAvH9UXavd41Z+NHU8VBXWPnIrZUp+qR6douIZPIV4whWT5HOKBYVjvunaJugrJufLjoJxozzr/TkxvQYchO8CDL+7gldWHicZNdh5u4XtPVdE67TayL/8ICVfO2f1MziA34OK9V05s+1pVBJ+4eRoBj7wmkwaXyiaT13ZE+M+Lcgh6Or8cwrEtzL1riL3xBxRPEN/t38eYfHGPJyCQPaGBz1+InlOSVvzTM+s6LrFLyPGAT+jUPHw/en45enYhomIzOVf8B7GDW2lZ/Sx6yViyFt+DlYhTMHJ6av2OmcQ1agYE8km48k5bScBxHBJ5k8i/5wfYLTUIb6ow6buT1XHNkSRrtlW1a4snLeoCkykszen1yQGKgPOmFDNpRA6NrXHysjxke7VOVWiQpIGiOWrx9IYw71sQojync718x3Gwj+4kueNNlKxivNd9FTW7d/fIkklogIurfjzlU8i/7tNE923CTsZxRs7hB6+E2X54A6oi+NptYygMFeKftIiG1//R9lzfxAXkXvlRFE+QRNYwbBvM05zrdBwEMVceuM68AlvTFLxujUis/dncbne72XS9SRGQH3SRH+zh3SRP4VTbrEtSb4gnbf69Pszl5/iYXu7u1HOs+sOY298Ex8Z9/gfRys7p5ShT5HDcIBBVQ8Tyz0FfcA9VE+/gP/5SxfbDrQBYtsPvXzmCb/4tNL39VLvntW5bjh1povapn6HHzrwXfE8JuDXed1X7yQ5Tx+RRnNu9WTv9mebEcdXvRGx+Bv3ImtSaKEnqRZbt8MymViaWuLh00pn3CLLD9STWPklyw7PoUy7De/N3+iwBgewJDRqOA8mkRVV9+p4gR+siCFcRdqw1/XmWmRrGS0TAaH8vRgiIWw6m5eA1lE4VAu0M23aYMTaXb//HPI7UtBIKGAwr8OPSBtc1kaKAs2MZta/9ta3NKB5D4OrPklD63+Z30sDnOA5LtkfwuRTunBM87QJSJx4huXs5VsU2jKlX4rny8z02460rZBIaZIrz0q98Fk4twcgrxSgcRaLqpC2uFQ2hqGihQvC2T0C247DzcDN/fGYLjS0JLpszjCvnDcfn6plab6oQlOV6KRuEvZ/j9HgjdUsfateWqNwNDRWQOy5DUUmD2ZoDMerCFl+6MveUFREcy8Tcvw5z7yr00XNx3/YDFE+wjyM9YXBdeg5AqipQOzlt8lSPVRSBqqZ+lQVZLv7rrhlk+VNXNHMmFXLzhWOIKgGyrvgYrvLUrDA1mEfORXcTPbyTnGs/S1xpnwyONsb4+4s70DUV07J5bsV+Xll7CNHFUh+pfXocTNuhm1U9BixhWzjJ9LVHJ08gkaSesqsqwcZDcT59cQ5uPf2j3XEcrModxN/8I05rPb7rv4F70XszmoBA9oQyqrIxyrJNldi2w8KpJZTkeOjoc9oBKuqjLNtUgSIEC6cWU5yd2kqhujnOis2VtESSnDethPI8H+cMz+b+j84naTn43VrblUbMVYDvys+TL6IkLQfbsvCOmk+U9jfnFWwKkkf48rjtOEKhKX86v3yjmVdXH+LyOcNwd3LYLG7arNlRwxNv7MGlq9x1+XgmlGehDpFsZLpDeCfMJ7L9xAaDwvCgZheTzGBc0uBT1WzyyrZWPntJDtm+9NEKu7mG5NZXwUrivuA/0EondXCUzJBJKEMqGqJ883cr26YFv/j2Ae778DzKOxieOlIX4d7fr2ybuvzC2wf49n/MQ9cUvvl/K9q2nl6y5hBffd9sxhQH8Ogqng5mZSbRMfJyaKppOWU/2GjaT9O/v8fxm0BBsYRPLv4cf1qVQFc7l4CEgM376vnD01va2n76j3Xc+8G5DC84883SwcB0VLzzb0MNFRDdugy9YDj+eTcRd+X22P01SWqN2zy1Mczd87LSNqhzkvHUYtMj244tNr0A0c+2T5HDcRmgqgpvbqhoty7FcVKJSHtXL0PTFF54+0C7tTO27bBqaxXv7KlrS0DHPf76bpyz6GioqiC84SXafUo6NqHq9Xz4usl0dsG1jeCFlQfS2tfurEY5y+q9A0lcD6FMv4Gs27+N+5KPE/OWyCKpUo9JzYQLc94YD7NGeNp/r3IH8Tf+iNBcJy027V8JCGRPKGNMK71YpmnaaR9QjpNqfzfLdrA6WFxpWvZZXmULsNIHi9yaQzDH27ZR3pkoAvKyPew50n5Kck7APeQ+hFPbjXuObTcuST3n9R0Rsn0q104/UerKiYVJbnkFu7UR92X/iVbUvyfByJ5QBliWzeJzS9Nu1F8+b3jah7xl2Vwxb3jaMeZMLGTq6PQZMDcsHt3hfaWuxOabfvm7WgW+yed3OgEBOLbDdYtGoZ00fBf0GUwbc/rqC5Ikdc7WijhHGk0+tCiEcuzDxKrcQXzpX1AKxuC75Tv9PgEBCGcAfiLU1LRkOoRTys8PdCo+BzhU28pzK/ZjWQ5XLRjB8Hw/HY1U2Q4cqAnz/Ir9KEJw1YIRlOf5EKTuLb2wcj8tkSRXzhvBqOIA2hmGu84Uo4qJ1rD32P5CGv4ZV5AIjexyNQMhoKY5zt6KZnRNYWRxkJC3c+VDOvtzzCQZY88YKjHm53etMO/KN1YRi6VvpQJQF7Z4eE0z/315LqXZOo6VJLnlVeyGCjwXfRS1cMxZxdrTTvfaZRLqYV19sx6fWt2ZXsapHpuati063VPpbIyaKnAAy+r7t8hQ+WDqbTLGntGfklDScvjnqmaumOzjvHG+VMWDdU+i5o/Cff77EXrnyvT0pdO9dnlPKMO6MsR1qsemkkTPJ4p377kjSVLmLd0VpTxbZ9FYL1b1XhIbn8M1+5bUzLcBuPxBJiFJkqQB4lB9kj01Ce67Lh/r0DuYO5fiuezTaMXjMx1at8kkJEmSNAAkLYeXt7Zy97wsjCNrMQ9uwHvd11BCRZkO7azIJCRJkjQArN4XY0SeweTkZsxDm/Be93UUf+9uANkX5BRtSZKkfq4pYrHxcIxby49i7l+H99qvDIoEBDIJSZIk9XvL9kS5aKSDb++reK76PIo/N9Mh9RiZhCRJkvqx2haTQ/VJFrc8i/v8D6DmlGc6pB4lk5AkSVI/tvpAjAtzjuAdcQ76yFmZDqfHySQkSZLUT7XEbPZVx1kkNuCed0emw+kVMglJkiT1U5uPxDnXc4TQ/JsQhufMTxiAZBKSJEnqhxwcth2JMD9wGG3MvEyH02tkEpIkSeqHasM2ihVn7OyFCDF4P6oH7yuTJEkawA7WRJmiH0AfxL0gkElIkiSpX6qoj3FOmQ+huzIdSq+SSUiSJKkfqo0qjDtn4BYm7ayMJKGnn36aq666issuu4y///3vmQhBkiSpX8tWwviGTc50GL2uzwuYVlVV8cADD/D4449jGAZ33HEHc+fOZcyY/rUToCRJUiYVuaIItz/TYfS6Pu8JLV++nHnz5hEKhfB6vVx++eW88MILfR2GJElSv1YUHBqbHPR5EqquriY/P7/t64KCAqqqqvo6DEmSpH4tNzT4e0GQgeE427bbbUHrOE6Xt6Tt6l7tfa2/xwcyxp4iY+wZMsZ0paV5A+Lncrb6PAkVFRWxZs2atq9ramooKCjo0jFqalp6Oqwek58f6NfxgYyxp8gYe8ZQibGrCUVzefr9z6WzTvfa+3w4bsGCBaxYsYL6+nqi0SgvvfQS559/fl+HIUmS1K/5s0KZDqFP9HlPqLCwkM997nO8973vJZlMcssttzB16tS+DkOSJKlfM/wBnEwH0QcyMv3i2muv5dprr83EqSVJkgYG1WAoZCFZMUGSJKkf6uqErYFKJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY2QSkiRJkjJGJiFJkiQpY7RMB9AdiiIyHcJp9ff4QMbYU2SMPUPGOHQJx3GcTAchSZIkDU1yOE6SJEnKGJmEJEmSpIz5/+3dfUxT1x/H8XdHwYc4o2YiCxqIj9ElMjIzrZph0VTKpRSND+BSVOYDxojTPxAXo5nBiMYI4rbMROOzMZqABiLowiRTMRAWlcQsmcEBoozh3FZB0pb2/P4w9if57UEz/d1b/L7+u6cHzufeE/j2XMq5UoSEEELoRoqQEEII3UgREkIIoRspQkIIIXQjRUgIIYRupAgJIYTQjRQhIYQQugmpIlRUVMT+/fuDx263m1WrVmG32/n444/p6OjQMR2UlZWRnJyMzWbj5MmTumZ5XmdnJykpKbS2tgJQU1ODw+HAZrNRWFioczr44osv0DQNTdPYvXs3YLyM+/btIzk5GU3TOHz4MGC8jM/s2rWLvLw8wHgZXS4XmqbhdDpxOp3cunXLcBm//fZb5s+fj91uJz8/HzDedexTVAhwu91q8+bNavLkyaq4uDjY/vnnn6sDBw4opZQqLS1V69ev1ymhUj///LOyWq3qt99+U11dXcrhcKg7d+7olueZmzdvqpSUFPXee++pe/fuqe7ubpWQkKBaWlqUz+dTWVlZqrq6Wrd8165dU4sXL1Yej0d5vV6VmZmpysrKDJWxtrZWpaenK5/Pp7q7u5XValU//PCDoTI+U1NTo6ZOnao2bdpkuLkOBAJq5syZyufzBduMlrGlpUXNnDlTtbW1Ka/XqzIyMlR1dbWhMvY1IbESqqqqIjY2luXLl/dqr66uxuFwAJCSksJ3332Hz+fTIyI1NTVMmzaNIUOGMHDgQObOnUtlZaUuWZ535swZtm3bRmRkJAANDQ3ExMQwatQozGYzDodD15zDhw8nLy+PiIgIwsPDGTNmDE1NTYbK+OGHH3Ls2DHMZjO//vorfr8ft9ttqIwAv//+O4WFhWRnZwPGm+u7d+8CkJWVRWpqKidOnDBcxm+++Ybk5GSioqIIDw+nsLCQAQMGGCpjXxMSRSgtLY1Vq1YRFhbWq/2XX35h+PDhAJjNZgYNGsSjR4/0iNgrC0BkZCTt7e26ZHnejh07mDJlSvDYaDnHjRvH+++/D0BTUxMVFRWYTCZDZQQIDw+nuLgYTdOwWCyGu44AW7duZcOGDQwePBgw3ly73W4sFgtffvklR44c4fTp0zx48MBQGZubm/H7/WRnZ+N0Ojl16pThrmNfY6hHOVRUVLBz585ebaNHj+bIkSMv9PVKKd56S5+6GggEMJn+u9W7UqrXsVEYNeedO3dYvXo1ubm5hIWF0dTUFHzNKBlzcnJYuXIl2dnZNDU1Geo6nj17lnfffReLxUJJSQlgvLmOj48nPj4+eLxgwQKKi4v54IMPgm16Z/T7/dTX13P8+HEGDhzImjVr6N+/v6GuY19jqCJkt9ux2+0v3D8yMpKHDx8SFRVFT08PXV1dDBky5PUF/BtRUVHU19cHjzs6OoK3wIwkKiqq1wc4jJDz+++/Jycnh88++wxN06irqzNUxsbGRrxeLxMnTmTAgAHYbDYqKyt7rcz1znjhwgU6OjpwOp388ccfPHnyhPv37xsqY319PT6fD4vFAjz9ZR4dHW2ouX7nnXewWCwMGzYMgDlz5hhurvuakLgd91cSEhI4d+4c8PSHcMqUKYSHh+uSZfr06Vy/fp1Hjx7R3d3NpUuX+Oijj3TJ8nfi4uL46aefgrcdysvLdc3Z1tbG2rVr2bNnD5qmGTJja2srW7Zswev14vV6qaqqIj093VAZDx8+THl5OefPnycnJ4fExEQOHjxoqIyPHz9m9+7deDweOjs7KS0tZePGjYbKaLVauXr1Km63G7/fz5UrV0hKSjJUxr7GUCuhl7V+/Xry8vLQNI23336bPXv26JZlxIgRbNiwgczMTHw+HwsWLGDy5Mm65fkr/fr1o6CggHXr1uHxeEhISCApKUm3PIcOHcLj8VBQUBBsS09PN1TGhIQEGhoaSEtLIywsDJvNhqZpDBs2zDAZ/4zR5tpqtXLr1i3S0tIIBAIsWbKE+Ph4Q2WMi4tjxYoVLFmyBJ/Px4wZM8jIyGD06NGGydjXyJNVhRBC6Cakb8cJIYQIbVKEhBBC6EaKkBBCCN1IERJCCKEbKUJCCCF0I0VICCGEbqQIiZCUlZX1j/sEvkif2tpaUlJS/nG8CRMm/On3qqqqCm7373K5qKyspLW1tdf2NEKIvxbS/6wq3lzXrl17JX3+rdmzZzN79uzXPo4QfZWshETI2bx5MwBLly6lrq4Ol8uFw+EgNTU1uI3T833a2tq4fPky6enpzJ8/n1mzZlFUVPTS4xYVFTFv3jycTieXL18GoKSkhNWrV7+S8xLiTSQrIRFydu7cSUlJCUePHmXRokXk5uZis9lob29n4cKFxMTE9OozdOhQcnNzKSgoIDY2lvb2dqxWK5mZmS817siRI9m+fTs//vgjLpeLioqK13SGQrw5pAiJkNXY2IjH48FmswFP9++z2WxcuXKl199kTCYTX3/9NdXV1ZSXl9PY2IhSiu7u7pcaLyMjA4Dx48czZswYbty48epORog3lNyOEyHLZDL9z3NdlFL09PT0anvy5Anz5s3j9u3bTJo0idzcXMxmMy+7beLzz6oKBAKYzfIeToh/S4qQCElhYWFER0djNpu5dOkSAO3t7Vy8eJHp06cH+/T09NDc3ExnZyeffvopiYmJ1NbW4vV6CQQCLzVmaWkpALdv36alpYW4uLhXe1JCvIHkrZwISUlJSSxbtoyvvvqK/Px89u/fj9/vZ+3atUybNi3Yx+VysW/fPmbNmoXdbiciIoLx48czduxYmpubiYiIeOEx7927R1paGiaTib179+r2AEUh+hJ5lIMQQgjdyEpICODgwYOUlZX96WuffPIJqamp/+dEQrwZZCUkhBBCN/LBBCGEELqRIiSEEEI3UoSEEELoRoqQEEII3UgREkIIoZv/AP/kVwligiBHAAAAAElFTkSuQmCC\",\n      \"text/plain\": [\n       \"<Figure size 432x432 with 3 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"# Create a visualization with pandas df\\n\",\n    \"sns.jointplot(data=pandas_tips, x=\\\"total_bill\\\", y=\\\"tip\\\", hue=\\\"sex\\\", hue_order=[\\\"Female\\\", \\\"Male\\\"])\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.10 64-bit\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/sklearn.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating sklearn Modin Interoperability\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Logistic Regression example taken / adapted from https://www.ritchieng.com/pandas-scikit-learn/\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import numpy as np\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# From https://www.ritchieng.com/pandas-scikit-learn/\\n\",\n    \"\\n\",\n    \"url = 'http://bit.ly/kaggletrain'\\n\",\n    \"train = pd.read_csv(url)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"train.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Pclass: passenger class\\n\",\n    \"# Parch: parents and children\\n\",\n    \"feature_cols = ['Pclass', 'Parch']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# you want all rows, and the feature_cols' columns\\n\",\n    \"X = train.loc[:, feature_cols]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# now we want to create our response vector\\n\",\n    \"y = train.Survived\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# 1. import\\n\",\n    \"from sklearn.linear_model import LogisticRegression\\n\",\n    \"\\n\",\n    \"# 2. instantiate model\\n\",\n    \"logreg = LogisticRegression()\\n\",\n    \"\\n\",\n    \"# 3. fit \\n\",\n    \"logreg.fit(X, y)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"url_test = 'http://bit.ly/kaggletest'\\n\",\n    \"test = pd.read_csv(url_test)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# missing Survived column because we are predicting\\n\",\n    \"test.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"X_new = test.loc[:, feature_cols]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# 4. predict\\n\",\n    \"new_pred_class = logreg.predict(X_new)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# kaggle wants 2 columns\\n\",\n    \"# new_pred_class\\n\",\n    \"# PassengerId\\n\",\n    \"\\n\",\n    \"# pandas would align them next to each other\\n\",\n    \"# to ensure the first column is PassengerId, use .set_index\\n\",\n    \"kaggle_data = pd.DataFrame({'PassengerId':test.PassengerId, 'Survived':new_pred_class}).set_index('PassengerId')\\n\",\n    \"kaggle_data.to_csv('sub.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# save train data to disk using pickle\\n\",\n    \"train.to_pickle('train.pkl')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# read data\\n\",\n    \"pd.read_pickle('train.pkl')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# From https://scikit-learn.org/stable/modules/generated/sklearn.compose.ColumnTransformer.html\\n\",\n    \"\\n\",\n    \"import numpy as np\\n\",\n    \"from sklearn.compose import ColumnTransformer\\n\",\n    \"from sklearn.preprocessing import Normalizer\\n\",\n    \"ct = ColumnTransformer(\\n\",\n    \"    [(\\\"norm1\\\", Normalizer(norm='l1'), [0, 1]),\\n\",\n    \"     (\\\"norm2\\\", Normalizer(norm='l1'), slice(2, 4))])\\n\",\n    \"X = pd.DataFrame(np.array([[0., 1., 2., 2.],\\n\",\n    \"              [1., 1., 0., 1.]]))\\n\",\n    \"# Normalizer scales each row of X to unit norm. A separate scaling\\n\",\n    \"# is applied for the two first and two last elements of each\\n\",\n    \"# row independently.\\n\",\n    \"ct.fit_transform(X)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from sklearn.feature_extraction import FeatureHasher\\n\",\n    \"from sklearn.preprocessing import MinMaxScaler\\n\",\n    \"X = pd.DataFrame({\\n\",\n    \"    \\\"documents\\\": [\\\"First item\\\", \\\"second one here\\\", \\\"Is this the last?\\\"],\\n\",\n    \"    \\\"width\\\": [3, 4, 5],\\n\",\n    \"})  \\n\",\n    \"ct = ColumnTransformer(\\n\",\n    \"    [(\\\"text_preprocess\\\", FeatureHasher(input_type=\\\"string\\\"), \\\"documents\\\"),\\n\",\n    \"     (\\\"num_preprocess\\\", MinMaxScaler(), [\\\"width\\\"])])\\n\",\n    \"X_trans = ct.fit_transform(X)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# From https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\\n\",\n    \"\\n\",\n    \"import numpy as np\\n\",\n    \"from sklearn.impute import SimpleImputer\\n\",\n    \"imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\\n\",\n    \"imp_mean.fit(pd.DataFrame([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]]))\\n\",\n    \"\\n\",\n    \"X = pd.DataFrame([[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]])\\n\",\n    \"print(imp_mean.transform(X))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# From https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html\\n\",\n    \"\\n\",\n    \"import numpy as np\\n\",\n    \"from sklearn.model_selection import train_test_split\\n\",\n    \"X, y = pd.DataFrame(np.arange(10).reshape((5, 2))), pd.Series(range(5))\\n\",\n    \"X\\n\",\n    \"list(y)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"X_train, X_test, y_train, y_test = train_test_split(\\n\",\n    \"    X, y, test_size=0.33, random_state=42)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"type(X_train)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"train_test_split(y, shuffle=False)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Linear Regression example taken / adapted from https://github.com/chendaniely/2021-07-13-scipy-pandas/blob/main/05-models.ipynb\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import seaborn as sns\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tips = sns.load_dataset(\\\"tips\\\")\\n\",\n    \"tips = pd.DataFrame(tips)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pd.get_dummies(tips, drop_first=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from sklearn import linear_model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# 1. create the model object\\n\",\n    \"lr = linear_model.LinearRegression()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# 2. fit the model object\\n\",\n    \"lr.fit(X=tips[[\\\"total_bill\\\", \\\"size\\\"]], y=tips[\\\"tip\\\"])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# look at the coefficients\\n\",\n    \"lr.coef_\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# look at the intercept\\n\",\n    \"lr.intercept_\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tips_dummy = pd.get_dummies(tips, drop_first=True)[[\\\"tip\\\", \\\"total_bill\\\", \\\"smoker_No\\\"]]\\n\",\n    \"tips_dummy\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"lr2 = linear_model.LinearRegression()\\n\",\n    \"lr2.fit(X=tips_dummy.iloc[:, 1:], y=tips_dummy[\\\"tip\\\"])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"lr2.coef_, lr2.intercept_\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"new_data = tips_dummy[[\\\"total_bill\\\", \\\"smoker_No\\\"]].tail() # not really new data\\n\",\n    \"new_data\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# use the model to give predicted tip values\\n\",\n    \"new_data[\\\"predicted_tips\\\"] = lr2.predict(new_data)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"new_data\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"type(new_data)\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/statsmodels.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating statsmodels Modin Interoperability\\n\",\n    \"###  Currently statsmodels is not completely interoperable with Modin. All the examples in this section are taken/ adapted from https://www.statsmodels.org/devel/gettingstarted.html or https://www.statsmodels.org/stable/index.html\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import statsmodels.api as sm\\n\",\n    \"import pandas\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"from patsy import dmatrices\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Example with sm.OLS()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = sm.datasets.get_rdataset(\\\"Guerry\\\", \\\"HistData\\\").data\\n\",\n    \"modin_df = pd.DataFrame(df)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']\\n\",\n    \"\\n\",\n    \"modin_df = modin_df[vars]\\n\",\n    \"\\n\",\n    \"modin_df[-5:]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df = modin_df.dropna()\\n\",\n    \"\\n\",\n    \"modin_df[-5:]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=modin_df, return_type='dataframe')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"y = pd.DataFrame(y)\\n\",\n    \"X = pd.DataFrame(X)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"mod = sm.OLS(y, X)    # Describe model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"res = mod.fit()       # Fit model\\n\",\n    \"\\n\",\n    \"print(res.summary())\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"sm.ols() is not interoperable with Modin currently.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Example with sm.ols(formula=)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df = pd.DataFrame({\\\"A\\\": [10,20,30,40,50], \\\"B\\\": [20, 30, 10, 40, 50], \\\"C\\\": [32, 234, 23, 23, 42523]})\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import statsmodels.formula.api as sm\\n\",\n    \"result = sm.ols(formula=\\\"A ~ B + C\\\", data=modin_df).fit()\\n\",\n    \"print(result.params)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print(result.summary())\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Replicating statsmodels workflow with pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import statsmodels.api as sm\\n\",\n    \"\\n\",\n    \"df = sm.datasets.get_rdataset(\\\"Guerry\\\", \\\"HistData\\\").data\\n\",\n    \"pandas_df = pandas.DataFrame(df)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']\\n\",\n    \"\\n\",\n    \"pandas_df = pandas_df[vars]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df = pandas_df.dropna()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=df, return_type='dataframe')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"y = pandas.DataFrame(y)\\n\",\n    \"X = pandas.DataFrame(X)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"mod = sm.OLS(y, X)    # Describe model\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"res = mod.fit()       # Fit model\\n\",\n    \"\\n\",\n    \"print(res.summary())\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Example with sm.ols(formula=)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df = pd.DataFrame({\\\"A\\\": [10,20,30,40,50], \\\"B\\\": [20, 30, 10, 40, 50], \\\"C\\\": [32, 234, 23, 23, 42523]})\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import statsmodels.formula.api as sm\\n\",\n    \"result = sm.ols(formula=\\\"A ~ B + C\\\", data=pandas_df).fit()\\n\",\n    \"print(result.params)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print(result.summary())\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/tensorflow.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating Tensorflow Modin Interoperability\\n\",\n    \"## All the examples in this section are taken/ adapted from https://www.tensorflow.org/tutorials/load_data/pandas_dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import tensorflow as tf\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"SHUFFLE_BUFFER = 500\\n\",\n    \"BATCH_SIZE = 2\\n\",\n    \"\\n\",\n    \"csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')\\n\",\n    \"\\n\",\n    \"modin_df = pd.read_csv(csv_file)\\n\",\n    \"modin_df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"target = modin_df.pop('target')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"numeric_feature_names = ['age', 'thalach', 'trestbps',  'chol', 'oldpeak']\\n\",\n    \"numeric_features = modin_df[numeric_feature_names]\\n\",\n    \"numeric_features.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tf.convert_to_tensor(numeric_features)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"normalizer = tf.keras.layers.Normalization(axis=-1)\\n\",\n    \"normalizer.adapt(numeric_features)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Replicating statsmodels workflow with pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"SHUFFLE_BUFFER = 500\\n\",\n    \"BATCH_SIZE = 2\\n\",\n    \"\\n\",\n    \"csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.read_csv(csv_file)\\n\",\n    \"pandas_df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"target = pandas_df.pop('target')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"numeric_feature_names = ['age', 'thalach', 'trestbps',  'chol', 'oldpeak']\\n\",\n    \"numeric_features = pandas_df[numeric_feature_names]\\n\",\n    \"numeric_features.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"tf.convert_to_tensor(numeric_features)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"normalizer = tf.keras.layers.Normalization(axis=-1)\\n\",\n    \"normalizer.adapt(numeric_features)\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/jupyter/integrations/xgboost.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Demonstrating XGBoost Modin Interoperability\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## All the examples in this section are taken / adapted from https://xgboost.readthedocs.io/en/stable/python/python_intro.html\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import xgboost as xgb\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"import numpy as np\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data_train = pd.DataFrame(np.arange(36).reshape((12,3)), columns=['a', 'b', 'c'])\\n\",\n    \"label_train = pd.DataFrame(np.random.randint(2, size=12))\\n\",\n    \"dtrain = xgb.DMatrix(data_train, label=label_train)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data_test = pd.DataFrame(np.arange(12).reshape((4,3)), columns=['a', 'b', 'c'])\\n\",\n    \"label_test = pd.DataFrame(np.random.randint(2, size=4))\\n\",\n    \"dtest = xgb.DMatrix(data_test, label=label_test)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}\\n\",\n    \"param['nthread'] = 4\\n\",\n    \"param['eval_metric'] = 'auc'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"evallist = [(dtrain, 'train'), (dtest, 'eval')]\\n\",\n    \"num_round = 10\\n\",\n    \"bst = xgb.train(param, dtrain, num_round, evallist)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"bst.save_model('0001.model')\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"9752fa87da8bf164654ccc33a595e9110c8fc9bb15d763374a7037fd32519b1f\"\n  },\n  \"kernelspec\": {\n   \"display_name\": \"Python 3.9.7 ('base')\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.7\"\n  },\n  \"orig_nbformat\": 4\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/modin-scikit-learn-example.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Process STDOUT and STDERR is being redirected to /tmp/raylogs/.\\n\",\n      \"Waiting for redis server at 127.0.0.1:35043 to respond...\\n\",\n      \"Waiting for redis server at 127.0.0.1:49923 to respond...\\n\",\n      \"Starting local scheduler with the following resources: {'CPU': 4, 'GPU': 0}.\\n\",\n      \"\\n\",\n      \"======================================================================\\n\",\n      \"View the web UI at http://localhost:8889/notebooks/ray_ui93764.ipynb?token=23507892afd3d95e7604e7cd889b30382368ed888e79fc8c\\n\",\n      \"======================================================================\\n\",\n      \"\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%matplotlib inline\\n\",\n    \"\\n\",\n    \"import numpy as np\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"import matplotlib.pyplot as plt\\n\",\n    \"import sklearn\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>Unnamed: 0</th>\\n\",\n       \"      <th>CRIM</th>\\n\",\n       \"      <th>ZN</th>\\n\",\n       \"      <th>INDUS</th>\\n\",\n       \"      <th>CHAS</th>\\n\",\n       \"      <th>NOX</th>\\n\",\n       \"      <th>RM</th>\\n\",\n       \"      <th>AGE</th>\\n\",\n       \"      <th>DIS</th>\\n\",\n       \"      <th>RAD</th>\\n\",\n       \"      <th>TAX</th>\\n\",\n       \"      <th>PTRATIO</th>\\n\",\n       \"      <th>B</th>\\n\",\n       \"      <th>LSTAT</th>\\n\",\n       \"      <th>PRICE</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>0</td>\\n\",\n       \"      <td>0.00632</td>\\n\",\n       \"      <td>18.0</td>\\n\",\n       \"      <td>2.31</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.538</td>\\n\",\n       \"      <td>6.575</td>\\n\",\n       \"      <td>65.2</td>\\n\",\n       \"      <td>4.0900</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>296.0</td>\\n\",\n       \"      <td>15.3</td>\\n\",\n       \"      <td>396.90</td>\\n\",\n       \"      <td>4.98</td>\\n\",\n       \"      <td>24.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>1</td>\\n\",\n       \"      <td>0.02731</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>7.07</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.469</td>\\n\",\n       \"      <td>6.421</td>\\n\",\n       \"      <td>78.9</td>\\n\",\n       \"      <td>4.9671</td>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>242.0</td>\\n\",\n       \"      <td>17.8</td>\\n\",\n       \"      <td>396.90</td>\\n\",\n       \"      <td>9.14</td>\\n\",\n       \"      <td>21.6</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2</td>\\n\",\n       \"      <td>0.02729</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>7.07</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.469</td>\\n\",\n       \"      <td>7.185</td>\\n\",\n       \"      <td>61.1</td>\\n\",\n       \"      <td>4.9671</td>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>242.0</td>\\n\",\n       \"      <td>17.8</td>\\n\",\n       \"      <td>392.83</td>\\n\",\n       \"      <td>4.03</td>\\n\",\n       \"      <td>34.7</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>3</td>\\n\",\n       \"      <td>0.03237</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>2.18</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.458</td>\\n\",\n       \"      <td>6.998</td>\\n\",\n       \"      <td>45.8</td>\\n\",\n       \"      <td>6.0622</td>\\n\",\n       \"      <td>3.0</td>\\n\",\n       \"      <td>222.0</td>\\n\",\n       \"      <td>18.7</td>\\n\",\n       \"      <td>394.63</td>\\n\",\n       \"      <td>2.94</td>\\n\",\n       \"      <td>33.4</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>4</td>\\n\",\n       \"      <td>0.06905</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>2.18</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.458</td>\\n\",\n       \"      <td>7.147</td>\\n\",\n       \"      <td>54.2</td>\\n\",\n       \"      <td>6.0622</td>\\n\",\n       \"      <td>3.0</td>\\n\",\n       \"      <td>222.0</td>\\n\",\n       \"      <td>18.7</td>\\n\",\n       \"      <td>396.90</td>\\n\",\n       \"      <td>5.33</td>\\n\",\n       \"      <td>36.2</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   Unnamed: 0     CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD  \\\\\\n\",\n       \"0           0  0.00632  18.0   2.31   0.0  0.538  6.575  65.2  4.0900  1.0   \\n\",\n       \"1           1  0.02731   0.0   7.07   0.0  0.469  6.421  78.9  4.9671  2.0   \\n\",\n       \"2           2  0.02729   0.0   7.07   0.0  0.469  7.185  61.1  4.9671  2.0   \\n\",\n       \"3           3  0.03237   0.0   2.18   0.0  0.458  6.998  45.8  6.0622  3.0   \\n\",\n       \"4           4  0.06905   0.0   2.18   0.0  0.458  7.147  54.2  6.0622  3.0   \\n\",\n       \"\\n\",\n       \"     TAX  PTRATIO       B  LSTAT  PRICE  \\n\",\n       \"0  296.0     15.3  396.90   4.98   24.0  \\n\",\n       \"1  242.0     17.8  396.90   9.14   21.6  \\n\",\n       \"2  242.0     17.8  392.83   4.03   34.7  \\n\",\n       \"3  222.0     18.7  394.63   2.94   33.4  \\n\",\n       \"4  222.0     18.7  396.90   5.33   36.2  \"\n      ]\n     },\n     \"execution_count\": 2,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"data = pd.read_csv(\\\"data/boston_housing.csv\\\")\\n\",\n    \"\\n\",\n    \"data.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"modin.pandas.dataframe.DataFrame\"\n      ]\n     },\n     \"execution_count\": 3,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"features = data.drop(\\\"PRICE\\\", axis=1)\\n\",\n    \"labels = data[\\\"PRICE\\\"]\\n\",\n    \"\\n\",\n    \"type(features)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)\"\n      ]\n     },\n     \"execution_count\": 4,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"from sklearn.linear_model import LinearRegression\\n\",\n    \"\\n\",\n    \"lm = LinearRegression()\\n\",\n    \"lm.fit(features, labels)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXmcXVWV77+/qlxIhakC5iEUhOAEiiGkSQs2tAIOqAimIYqIttq0tO/52vGlDU9awpNu0Girrwd9KAqKIpMWUyvYQDtgMySEgBHSrULAYgqQYkoBlar1/jjnVE7dOufec4dz77n3ru/nU5+69wx7rzPcvfZea+21ZWY4juM4vUtfuwVwHMdx2osrAsdxnB7HFYHjOE6P44rAcRynx3FF4DiO0+O4InAcx+lxXBF0EJKOkPSHBs7/uqS/baZMCXWYpJel7DtZ0nV1lrtS0oWNSecUlWY+X0nzJT0jqb8Z5fUCrghajKT7JI2FL+rDks6XtGMO9XxA0i/j28zsw2b2uWbXlRUz+56ZvbnV9XaCEgnfgxfC9+IJST+VtH+75SoCYQdoMrw3T0vaIOmDaceb2f1mtqOZTbRSzk7GFUF7ONbMdgQOAhYDp7VZHqcYfCF8L4aAEeC8NstTJB4M783OwKeBb0h6VflBkma1XLIuwBVBGzGzh4FrCRQCAJK2l/RFSfdLeiQ05wwknS9phaTfhb2k30j6s3D7K4GvA68Ne1Gj4fbzJZ0VO/9Dkn4b9kCvlLRnbJ9J+rCk/5I0KumfJSnc9zJJP5P0pKTHJF1cJtobU86bNkoJ6/iopN+H5aySVOmdnC3p4vB6b5e0KFbWnpIul7RJ0r2SPhpufwvwv4ETw3uxTtKRku6KnftTSbfFvv9C0tJK5Yb7+mLP4HFJl0jaNdy3ILy+94fP8jFJn6lwbVOY2RhwCdPfiz5Jp0vaKOlRSd+RtEts/3GS1of3/N/DdyDad5+k5ZLulPSspPMk7S7px+G9/DdJc8NjZ0u6MLyeUUm3Sdo9Sc609y/c9wFJvwzf5c3hvXtrbP++4Tv0tKSfAi/KeG/MzIaBzcCrYvf5FEn3AzfEts0K69pV0rclPRjKMhyT4+2S7giv9VeSDswiR9dhZv7Xwj/gPuCN4ee9gLuAr8b2fxm4EtgV2Am4Cjg73HcE8IfYse8E9iRQ6CcCzwJ7hPs+APyyrO7zgbPCz0cBjwF/BGwP/CPw89ixBlwNDALzgU3AW8J9FwGfCeudDRye8bxpMoXH3hhe63zgP4G/TLlvK4FxYBlQAv4XcG/4uQ9YA3wW2A54CfB74OjYuRfGyhoAniNofErAIwQ98J3CfWPAbhnK/Rhwc/gctwf+H3BRuG9BeH3fCMtcBDwPvDLl+uLPZgfgu8C62P6/AH4byrAj8EPgu+G+V4TP/k3h9fxNeOx2sXfuZmB3gtHGo8DtBKPR2cANwBnhsX9F8M7NAfqBg4GdU2Su9v6NAx8Ky/nvwIOAwv3/AfxDeN9eBzwdf0Zl9RxB+N6Hdf1ZWPZ+sfv8nfC+DcS2zQrPuQa4GJgb3p/Xh9sXh/fikFDG94f3avt2txMtb5faLUCv/YUv2jPhi2/A9cBguE/hj+mlseNfC9wbfp76QaSUfQfwjvDzB6isCM4jMEVE+3YMf1wLwu/G9Ab+EmBF+Pk7wLnAXgkyVDpvmkzhsW+Jff8fwPUp17YSuDn2vQ94CPjT8Id8f9nxpwHfjp17Ydn+XwDHA4cC14VyvgU4ErgzPKZauXcDb4jt2yO8h7NijdFesf23Au9Oub7zCZTTKDBJoOQOjO2/Hvgfse/7xer6W+CSsnszAhwRe+dOju2/HPha7PtfA8Ph578AfhWvu4Z3u/z9+21s35zwfryYQOlvBXaI7f9++TOK7TsivCejwBNhPe8O90X3+SWx46Nts8JnMgnMTSj3a8DnyrZtIFQUvfTn9rT2sNTM/k3S6wl+AC8ieMnnEfxg1oTWFAiUQ2L0g6Q/Bz5J8OJD0JhnGmIT9ORuj76Y2TOSHifoMd4Xbn44dvyWsHwIepyfA26VtBn4kpl9K3Zs2nlJPBD7vDGUq+qxZjapIIJqT4If/Z4KTWAh/QSNfRo/I1Ss4efNwOsJeu0/C4/Zp0q5+wA/kjQZ2z9B0POOqOVefNHMTpc0H/gJQWN/Z7hvT4L7E7GRoKHbvXxfeG8eIHiWEY/EPo8lfI/k+i6wN/ADSYPAhcBnzGy8XNgM79/UtZvZlvCdjo7ZbGbPll3P3uV1xHjQzPaqsP+BlO17A0+Y2eaEffsA75f017Ft21H5HexK3EfQRszsZwQ9wS+Gmx4j+FEeYGaD4d8uFjjJpiFpHwKzw/8EdjOzQeDXBIoDgsaxEg8S/BCi8nYgMIeMZJD7YTP7kJntSWBK+BelhIxmIP7jnx/KVfXY0JewV3j8AwSjpsHY305m9rZI5ISyIkXwuvDzzwgUwevZpgiqlfsA8Nay/bPNrOo9rISZ3U9gdvqqtvmHpj0vtvWqHynfF/pk9ibDs0yoe9zMzjSzVwF/Arwd+PPy4zK8f5V4CJgbvnPx62mEtPf9AWDXUKkl7fu7suc3x8wualCWjsMVQfv5CvAmSYvMbJLgx/VlSf8NQNKQpKMTztuB4OXfFB73QeDVsf2PAHtJ2i6l3ouAD0o6SNL2wN8Dt5jZfdUElvROSVHvbHMox2SFUyqxXNJcSXsTNH7ljuc4B0s6PnQCfpyg934zgcnlaUmfljQgqV/SqyX9cXjeI8ACTXdE/4qgx/0a4FYzW0/QmB4C/Dw8plq5Xwf+LmwUkTRP0jvqvA/TMLOfEjTwp4abLgI+ETpZdyR4Xheb2VYCs9Yxkt4gqQR8Krw3v6q1XgWO9IUKYvCfIjA/JT3bau9fpWvbCKwGzpS0naTDgWNrlTVjXQ8BPyborMyVVJL0unD3N4APSzpEATtIOkbSTnnIUmRcEbQZM9tEYHP/bLjp0wSOvpslPQX8G0GDVX7eb4AvETjdHgEWAjfFDrkBWA88LOmxhPP/jcC2fDlBD+2lwLsziv3HwC2SniFwbH/MzH6f8dxyriBwyN5B4NSrFDJ5BYFTcjPwPuD4sAc7QdBzPYjAtv4Y8E0giqq5NPz/uKTbAUKzxO3AejN7Idz/H8BGM3s0PKZauV8Nr/86SU8TKKVD6rsNiawC/iZU1N8iMNv8PJTlOQLbPma2AXgvgcP/MYJG9djYddXCi4HLCJTA3QSjo++WH5Th/avGewju1RPAGQS/gbx4H4FCu4fAOfxxADNbTeDM/ieCd+q3BL6NniPy4DtOy5FkwMvN7LftlsVxehkfETiO4/Q4rggcx3F6HDcNOY7j9Dg+InAcx+lxOmJC2Yte9CJbsGBBu8VwHMfpKNasWfOYmc2rdlxHKIIFCxawevXqdovhOI7TUUjaWP0oNw05juP0PK4IHMdxehxXBI7jOD2OKwLHcZwexxWB4zhOj5Nr1JCk+wgWYJkAtprZEgVL+V1MkMP8PuBdKbnCnQIwvHaEVddu4MHRMfYcHGD50fuxdPFQ9RMLTLOvqZX3qJl1tfrZNqu+qJyR0TH6JSbMpv7PKfUxtnUSM+iXOOmQvTlr6cKG6y8/98j953HjPZua+hzi1zPU4t9arjOLQ0WwxMwei237AsFCEedIWkGwctCnK5WzZMkS8/DR1jO8doTTfngXY+MTU9sGSv2cffzCjlUGzb6mVt6jZtbV6mfbrPqSyqnGew+dz5J9dq27/ix1NvM5NFpmHElrzGxJtePaYRp6B3BB+PkCYGkbZHAysOraDTNe0LHxCVZdu6FNEjVOs6+plfeomXW1+tk2q76kcqpx0S0PNFR/ljqb+RwaLbMe8lYERpCrfY2kaIGN3cPFIiBYym73pBMlnSpptaTVmzZtyllMJ4kHR8dq2t4JNPuaWnmPmllXq59ts+qrR74Js4bqz1pnM59DI2XWQ96K4HAz+yPgrcBHYisDAWCBXSrRNmVm55rZEjNbMm9e1RnSTg7sOThQ0/ZOoNnX1Mp71My6Wv1sm1VfPfL1Sw3Vn7XOZj6HRsqsh1wVQbR2a7ji048IlgV8RNIeAOH/R/OUwamf5Ufvx0Cpf9q2gVI/y4+esWBax9Dsa2rlPWpmXa1+ts2qL6mcapx0yN4N1Z+lzmY+h0bLrIfcoobChan7zOzp8PObgf9DsLTf+4Fzwv9X5CWD0xiRk6qbooaafU2tvEfNrKvVz7ZZ9cXLqTVqqN76k2SPooYiGeL2/FquKe16uiZqSNJLCEYBECic75vZ30najWCx7fnARoLw0ScqleVRQ47jJNHO8OZOiKrLGjWU24ggXMx8UcL2x4E35FWv4zi9QXlDPDI6xmk/vAuorVdeqfxKSqZSJFJRFEFWfGax4zgdSZ4hsJGSGRkdw9imZIbXjkwd001Rda4IHMfpSPJsiLMomW6KqnNF4DhOR5JnQ5xFyXRTVJ0rAsdxOpI8G+IsSmbp4iHOPn4hQ4MDCBgaHCiUo7gWOmKpSsdxnHLyDIFdfvR+iRFB5Upm6eKhjmz4y3FF4DhOx5JXQ9yNc2gq4YrAcZzCUYT0593S28+CKwLHcQpF3vMDnJm4s9hxnELRjenPi44rAsdxCsPw2hFGumiiVqfgisBxnEIQmYTS6MSJWp2C+wgcxykElVbrKg/dLIIzuZtwReA4TiGoZPqJT9RyZ3LzcdOQ4ziFIM30MzQ4kDnrp1MfrggcxykEWVNGtDLr5/DaEQ475wb2XXENh51zw7Tso92Em4YcxykEWWfz7jk4kBhZ1Gxnci+ZoFwROI5TGLLM5s2aB6hRumnhmWq4InAcp6NoVR6gblp4phquCBzHyZU8Qj1bkQeoVSaoIuDOYsdxciPLko/1lpu3E7ebFp6phisCx3FyI83OvvLK9XWXmZdyKaebFp6phpuGHMfJjTR7+ujYOMNrR+pqVFvpxO2VVNQ+InAcJzcq2dM/fvEddZl1esmJ2ypcETiOkxvV7On1mHXyXLS+nF6ZUOaKwHGc3Fi6eIi5c0oVj6k1PUSrnLit8kUUAVcEjuPkyhnHHjCj4S5nZHQsc8+7VU7cXspp5M5ix3FyJT4BLG3RGcHUviypHFrhxO0lX4SPCBzHyZ2li4e4acVRfOXEg2aMDgRY2fFF6Hm30hfRblwROI7TMpLMOuVKIKLdPe8kX0SpXzz7/Naucx67achxeoSirOpVbtY57JwbCpnKoTyn0eCcEs88t5XRsXGgu7KR+ojAcXqAIkfAFDmVQ2TSuvecY5iz3SzGJ6ePX4pgwmoGrggcpwcocgRMp6Ry6GbnsZuGHKcHKHoj1gmpHLo5G6mPCBynB+ilCJi8KLIJq1FcEThOD9DNjVir6BQTVj3kbhqS1A+sBkbM7O2S9gV+AOwGrAHeZ2Yv5C2H4/QyrVrVq9vpBBNWPbTCR/Ax4G5g5/D754Evm9kPJH0dOAX4WgvkcJyeptMbsaKEv3YjuZqGJO0FHAN8M/wu4CjgsvCQC4ClecrgOE7nU+Tw124gbx/BV4C/ASbD77sBo2a2Nfz+ByBRpUs6VdJqSas3bdqUs5iO4xSZIoe/dgO5KQJJbwceNbM19ZxvZuea2RIzWzJv3rwmS+c4TidR9PDXTidPH8FhwHGS3gbMJvARfBUYlDQrHBXsBfjYznGcinRzDH8RyG1EYGanmdleZrYAeDdwg5mdDNwILAsPez9wRV4yOI7THTQz/LVXVh2rhXbMI/g08ElJvyXwGZzXBhkcx+kgmhXD707nZGSWlgS2OCxZssRWr17dbjEcx+lw0jKdDg0OcNOKo9ogUb5IWmNmS6od57mGHMdJpBvj9t3pnIynmHAcZwbdakLxnEvJuCJwHGcG3Ri3P7x2hC0vbJ2x3XMuuWnIcXqOLCafbjOhRCOccuU2OFBi5XEHdLzJq1FcEThOD3H68F187+b7p9YJTltucZeB0tSSjHF2GSi1QsymkzTCAdhh+1k9rwTATUOO0zMMrx2ZpgQikkw+UnIZaduLTreNcJqNKwLH6RFWXbthhhKIKG8QR7fMHA1U2l503ElcGVcEjtMjVOr9ljeIaQ2kAYv/z3UdFz3kC/NUxhWB49RIp6YoSGvcBTMaxOVH70epP9kOtHnLOB+/+I6OUgjdvLpYM3BnsePUQHn0SZqztYgsP3q/GZEzAk4+dH6y7FWSDmzeMj517VD81c86fWGePHFF4Dg1UCm+vuiNTC3LVa66dgPjk9XTz4yNT3DmVet5bnyyI5WjE+CmIcepgU6PPlm6eIjlR+/HnoMDPDg6xqprNySad2q5ns1bxrtu8lmv4SMCx6mBTs+Ln9W0lXadtdCocuzGXEdFxUcEjlMDRY8+qebIzpo6Iuk6a6UR5dituY6KiisCx6mBIkefZGk803rpI6Nj0xRH/DrroVHl2I25joqMm4Ycp0aKGn2SxZE9OKfE5pRJYeVmoqWLh1i98QkuvPn+qnUPlPrYdYftm2bG6XRfTKfhisBxuoQsjWe1dajKFcdFtzyQqe6tk9ZUG36n+2I6jaqmIUmvkHS9pF+H3w+UdHr+ojmOUwtZ0ig8mZBIrpy44pjIuILh+IQ11WxTdF9Mt5HFR/AN4DRgHMDM7iRYjN5xnAKRpfHM0qOOH9NfQ5a5ZpptiuyL6UaymIbmmNmtmv5CzFzdwXGctpJlwljS7OI45YrjpEP2zuQjgOabbYrqi+lGsiiCxyS9lHDCuaRlwEO5SuU4Tl1UazyjfWdetX7KaSyCH/dQguI4a+lCfnT7CM++kKw4IpLMNj4PoHPIogg+ApwL7C9pBLgXeG+uUjmOUxdZGt/VG5+Ylk7a2NaQJzXUW6oogW2lTJdj+WXrGJ8Ito+MjrH8snWAp50oIlUVgZn9HnijpB2APjN7On+xHMeplSyzhrMsTrPq2g2MjI7RLzFhNvW/EmPjkyy/dFtDf+ZV66eUQMT4hHHmVetdERSQLFFDfy9p0MyeNbOnJc2VdFYrhHMcJztZJmFVWpwmUhxR2GbU+GeOHJrcFjmUNlchbbvTXrJEDb3VzEajL2a2GXhbfiI5Tm9T73oHWeYRVIrs6ZdSncgQ+BLqlcEpNll8BP2Stjez5wEkDQDb5yuW4/Qmta53EPcJ9KWYcOLRPGkTtUT1nn+WcUFU1+BAidGEOQuDA6UMpTitJsuI4HvA9ZJOkXQK8FPggnzFcpzepJYcO+W5hdIa8mef3zo1qkiaaxAtTtNoI13q01Tk0MrjDqDUpxn7Vx53QEN1OPmQxVn8eUl3Am8IN33OzK7NVyzH6U1qybGTpDSSGB0bnzGqSIosuubO9KjwgVI/s0t9qTb+wYESK487YKr8WhbBcdpPplxDZvZj4Mc5y+I4PU8tOXZqscfHcwilzTUYreDIHRufYPtZfZT6NS0aaKDUnzjj1+cQdBapikDSL83scElPM908KMDMbOfcpXOcHuPI/efNCO9My7FT6+Ix1RRHtfJGx8Yp9Ym5c0qMbhmf0cBHjf/I6NjUJDXwpSs7gVQfgZkdHv7fycx2jv3t5ErAcZrP8NoRLl8zMqPXdcLByT34WheP2aWKDyBLeeOTxpztZnHvOcdw04qjpimBeOhppXkKTvGo6CyW1C/pnlYJ4zi9TJLN34Ab79mUeHyUmG3unGxO3mdf2FoxFLU80Vsa5SOL4bUjfOqSdVX9FR5aWlwqKgIzmwA2SJrfInkcp2fJunpYnKWLh1j72TdnUgblqaKT5issXTzETSuO4t5zjkldnSzur4hGAlkmnflaAsUli7N4LrBe0q3As9FGMzsuN6kcpwepZKNPSxcROWSzzf3dpmyyzFdIylRa7q/IGrnkawkUmyyK4G9zl8JxnKopouORP+UNeVaiXnm1+QqRgtlloMTsUl+icxgqm3sqZTV1ikVFRSBpKfAy4K5a5w5Img38nGAW8izgMjM7Q9K+wA+A3YA1wPvM7IV6hHecbiIee19pZBAdU6sSiPfKK5mh4gpmdGycgVI/Xz7xoMSGPG0U0y/xpXct8sa/Q0j1EUj6F+ATBA325yTVOjJ4HjjKzBYBBwFvkXQo8Hngy2b2MmAzcEpdkjtOF7J08RDLj94vdWUwEZh1agkbBdhhu23x/sNrR+hLKT8p31CliJ+0VdFcCXQWlUYErwMWmdmEpDnAL4DPZS3YzAx4JvxaCv8MOAp4T7j9AmAl8LXaxHac9pHnZKlqzlcjGA1kSQ0dJ1pT4PThuxLTUEPQgKeNMtJGED6DuDuopAheCKOGMLMtUg2Ll4ZI6icw/7wM+Gfgd8ComUVLXf4BSHxjJJ0KnAowf74HLTnFoNakcLVy5lXrM4VhZlcBAQasvHI9T46NJ57bL3H28QtTzVKVIn58ScnOp5Ii2D/MMQTBiPSl4fdoZvGB1QoPFclBkgaBHwH7ZxXMzM4lWBmNJUuW1PreO04uVHKy1tMYxkcXg3NKmfL1R41ymm0+baSQlA00YsKMT1x8B4NzSpT6xPjk9DQSHvHT3VRSBK9sViVmNirpRuC1wKCkWeGoYC8gW7J1xykAtSSFq0b56CKLEij1Bxk+V298InFR+UNfMpdf/e6JmkcMEIwaNm8Zp9QvBgdKPDmWHCnkdB+pisDMNjZSsKR5wHioBAaANxE4im8ElhFEDr0fuKKRehynldSSFC6NeE6eWhmfMD51ybrUXv99j49x8qHzE/MVVcoeWl7H089tTY0UcrqPLOsR1MsewI2hOek24KdmdjXwaeCTkn5LEJF0Xo4yOE5TSYuSyWo6Kc/JUw+VnMQPjo5x1tKFnHzo/KnIo36JEw4e4oxjD8icm2jCjNN+eFfm1dGcziZTGup6MLM7gcUJ238PvCaveh0nT+qNkmlkFFALew4OMLx2hItvfWDamsMX3/oAS/bZdcohHMm+5YWtqaOERnwfTmchqyEErV0sWbLEVq9e3W4xnALRSfnua5kFXOoXO2w3q6Jjt9K5q5YtYuWV61OXibzjjDfXJJuAe885pmZZnGIgaY2ZLal2XNURgaS7mJlV9klgNXCWmT1en4iOUx95h3A2m6yzgMtTMSxYcU1tFYW/0jQlkrQ9qivN7+CJ4nqDLKahHwMTwPfD7+8G5gAPA+cDx+YiWRPopF6jk51mh3DmTbWIorRVvoZqXHhmfNLqyvkf1VstwZzTvWRxFr/RzE4zs7vCv88ArzezzwML8hWvfsoX9o56je786nyaGcLZCir1qocGBxKVAFBXIzwyOsacUvrPOu39L1+LoJJcTveRZUTQL+k1ZnYrgKQ/BqLQg63pp7WXTus1OtlpRghnoySNNiHZiZyWzjnLWr8DpT7Gxiczy9UvsX2pny0p51R6/32GcO+SRRH8JfAtSTsS+I6eAv5S0g7A2XkK1wid1mt0spMlT36eJPkoll+6DsTUwu5JfotqZsqkckv9Spzpm+ZzmDCruAi9v/9OElUVgZndBiyUtEv4/cnY7kvyEqxRitBrdPKh3YnOkkab8YY6Ij4CzdLbTix3wthhu34mxyeZMJuaE3DjPZtS/QcSpAUDStuc0IMDJVYed4CPApzqPgJJ20t6D/AR4GOSPivps/mL1hiNTvxxiku7gwBq6VVnOTZaMjKtYX/2hYlpcwIuXzPCkfvPS50clqCTEveNjo2z/NJ17jdzMjmLrwDeQeAPeDb2V2jc+dWdFCEIoJZRZbVj65lpPDY+wdXrHmJ2BacwBL3/qc8px9QbaeR0F1l8BHuZ2VtylyQH3PnVfRQhCCDJR1Hq0zQfAWQbgdaz0hhUziQ6hcF94WSwfSvMSXC/gZNlRPArSQtzl8RxMlCEIICk0eaqdy5i1bJFNY9AK8k9NDjA4ECpbjnjo5FKIxP3mzlZRgSHAx+QdC/B8pOZ1yNwnGZTlCCAtNFmraOStOsZGhzgphVH1b1IfZSuOmL50fux/LJ100YsEIxk3G/mZFEEb81dCsfJSLtDRyPKHdZH7j+PG+/ZVLMDu9r1JEVIVUoUBzB3Tokzjp0eDRR9PvOq9VPnetSQE5GqCCTtbGZPAU+3UB7HqUi7QkfLVxJ75rmtUyGjI6Nj0xaJSct9NLx2JLEhPvv4hdO2bz+rssX2mAP34PI1I9OUhwhSDZXnK4pTyWfW7kgsp72kZh+VdLWZvT00CRnTAw/MzF7SCgHBs486+VOpIazXPNMvMWk2NWK46NYHmEiI7ZxT6mN8wqbNRYga9rllSgeCEUN8LkF0bHz/2ccHbr0sjXvS9aXNfHY6i6zZRz0NtdPTlPfSI+INYaUY/3YR+RDSZBscKPH81slMjXtaGVEdTueSVRFkmVB2WJhOAknvlfQPkuY3Q0jHaSdRTzjJ3j42PsHKK9dz+nBjq4nlRSRTWtTR6Nh4aphtOUWIxHLaS5bw0a8BWyQtAj4F/A74bq5SOU4LqBbDPzo2nrhAfBGIlqGsNVoqqXFPK8PDSnuHLFFDW83MJL0D+CczO0/SKXkL5jh50aplI/Nkwox9V1zD4JxSYlK6tIXqo6UsyyOeyp3Pno6lt8iiCJ6WdBrwXuB1kvqA+me5OE4CrYpaqdfxW0QMZjT2USQSJC80c+T+82ZkOL3w5vsZKPUxd06J0S3jHjXUg2RRBCcC7wFOMbOHQ//AqnzFcnqJVi49WW9Kh07h+a3BOgRpYbZp1x+seSC+fOJBrgB6EI8aqoLHV+dP1qiVtGdRyzPad8U1Mxbg7jYqRftUu36PFOoumrl4/dNsC1PejsAs9IyZ7dKYiMWn0xZJ71SyRK2kPYvVG5+YZt+u9ozSUjp0E5Wifapdv0cK9SZVo4bMbCcz29nMdgYGgBMIIom6nkqZLp3mkSVqJe1ZXHTLAzU9oyP3n9egtMWnUrRP0jodWc91upcsPoIpLLAjDUs6A1iRj0jFweOrW0OW/EFpvdiJFNNm9IyG146w8sr12dI2dwHVon2Scg5lPdfpXrKYho6Pfe0DlgDP5SZRgShKpstuJ0v+oH4ptdFPIgqTXH7pusRlJLuRfilTWogo55D7v5yIqs5iSd+Ofd0K3Ad8w8wezVGuabTLWew5WIrDggoLq5Qv5h49o06fK1APgrod6U730TRnsZl9sDkidR7tXiS9CBSo2fxuAAAcaklEQVSlIRmqkLc/CoscGR2jX2JsfCLR9NELxJfvrNWR7hSHVv/usowI9gL+ETgs3PQL4GNm9ofcpCrDk861hyKNiKrJUoSJYpH5qlYzVjlfOfEgPnXJutQySv0Co6rJK00ODxEtNs383TUt6RzwbeBKYM/w76pwm9PlFClqKml5yPgPowgTxaJGtxElAMG1Vipj1bJFrHrntmUxq8lTjgc7FJt2/O6yRA3NM7N4w3++pI/nJZBTHIoWNVVpYZVuadzmzgmyt1QyhUX3IPqfNiEvbUTgwQ7Fph2/uywjgsfD9NP94d97gcdzk8gpDHlnpRxeO8Jh59zAviuu4bBzbmB47UjdZWTpg8+dU6KvUhe6zZT6xRnHBnmCkuL9S/3i2ee3zrhfSccOlPo56ZC9E7d7iGixaUc22CyK4C+AdwEPAw8By4CedSD3EmkNTDMaksgOOjI6Ns3BWYsyiJdRjYFSP8+PT1DUSNJ+iVXLFk3r7cdNYXPnlMCC1Njl9yvNbHbW0oUVzWlOMcnzd5eG5xpyKpJX9EIzVsWqtHLY3DklzODJsW3ZND9+8R0NyZwXWRyBvopYb9Gs313D4aOS/hHSR9xm9tGapXI6jkp2+UaoZAfN+iNIK0PA2s++Gdj2g/pEAZSABHvuMjAV5jphNhX+CkFjn3bNRfPXOPmS1+8ujUrO4ngX/EzgjFoKlrQ38B1gdwKFcq6ZfVXSrsDFwAKCyWnvMrPNtZTtdD6Dc0qJcf6Dc0qZE/2lzfzuk1iw4hr6RKFMQWYk9t6zJDf0We5OnqT6CMzsgugP2Bz/Hm6rxlbgU2b2KuBQ4COSXkWQo+h6M3s5cD09kLPImc7w2hGeeW5r4r5a1tpNS6AWRcoUSQlAYMYpZ3jtCJ+6ZF3Va26H3djpHbImnav5J2VmDxE4lzGzpyXdDQwB7wCOCA+7APh34NO1lu90Lquu3ZA6GSrNZZVkAimf+d3X4ESuvFmw23RFEI0EssT7+yx3J09qyj5aL5IWAIuBW4DdQyUBQSTS7innnAqcCjB//vz8hXRaRj127bgJJMmHABTWGRxx0++e4PThuzhr6UKg+iS4crNPq+3GTu+QahqS9LSkpyQ9BRwYfY62Z61A0o7A5cDHzWzaeWFa68TukJmda2ZLzGzJvHndn0O+l6jVrh03gSSFnS6/bB2fLLgSiLjolgemPlcKe3Wzj9NKKvkIphakMbNZsc87hYvUVEVSiUAJfM/MfhhufkTSHuH+PYCWZTF1ikG1xVHiZEklMT5hTDZdynyIzEDDa0dS00NkTSftOM0iN9OQJAHnAXeb2T/Edl0JvB84J/x/RV4yOMUk3qhXmwxWHmXT6eGSfao8/0HAl961yJWA01Ly9BEcBrwPuEtSNG7/3wQK4BJJpwAbCWYtOz1G3N79qr/9MVvGZ/bpo7w7cTp+zWGrbBIqrqvb6WaypJioCzP7pZnJzA40s4PCv381s8fN7A1m9nIze6OZPZGXDE5n8PfHHxikVo4Rz7sTp4hrDvdXSGBUvieLCavWVBuO0ygtiRpyWktRFpPJSi2hkTfes6nV4lVkTqkvcTTTCNEcgiI/M6e7cEXQZWSZpdoKGWpVRFlDI4vmI6imBOo19RTtOmuh0zoiTo6mIac9tHsxmWZkFa1Udp8KnEe6iXRq6og8n7+TH64Iuox2JyfLSxFVm4VbROpVWUlzCJqxdkMraHdHxKkPVwRdRjsWtYiTlyKqdynKUp+moo8aWZSmnlNPPnR+5vkSEUlrBnRSL7vdHRGnPlwRdBntTk6WlyKqpyERcOJr9uaMYw9gaHCgoSR0BgwOzAxnTWNocGBqYZj+jOYsEURFldvTO6mX3e6OiFMf7izuMtqdnGz50ftNc1ZDcxRRPfMHDLh63UNcfNsDjE+0zqQUv97ovpffkyQMuPDm+7l63UPTFtTppF52Xs/fyRdfocxpGlG0SNLCK40qovJoqKIxNDhQUfEOrx2pKyneQKmf2aW+xLUbonqLFpXjUUPFoeEVyhwnIssPu7yhnjCb6gk2oxGoJS1FPYigRz43ZcGcSiQtF5mWIbVWxsYn2H5WHwOl/kQl2I7w4Gp4ltTOw30ETkWyOipbYcdeuniIm1YcxX3nHNOU8iLb/dDgACcfOp+hwQFGqyiBLP6XtHs2UKrv5/bk2PjUIvRJFNVf4HQOrgicimRt4Jtpx84SKlmL4zaJwYESvzv7bXzlxIN49vmtXHjz/VMNd6Vzzj5+4bS6Zyc07mn3bHaNEUQRew4OTCnBNLdzEf0FTufgisCpSNYGvlnRIllGIKcP38XoWG3mmzilPrHyuAOm6spSVnQOwPNbt80m3rxlfIZ8afes2mgjifIRh0flOHngisCpSFoD0ydNa/yaFbZabQQyvHaE7918f01lxhkaHGDVO4M0z1nmJijDOeUjpEqNdZp5p7zOSNZoTkE0ShoZHZsxKvCoHKdR3FnsVCQpHBACZ3DcSdmssNU0R3C0fdW1G+rO3yOmr29QzZyS5ASuJh8E92z5petmrMv8YDjKiRzTaVhZ3eWO+HgZ/dI0RdQOJ61HCXU+rgicikQ/6E9dsm5GeofyLJnNiBbpT1mAPnLsNmIL36XMr1BpbsJAqZ8j95/HYefcMK2BqybfFAnGfIv9r6YM4teZNAqJyohkaVf0UBGSHDqN46YhJ5G4w3bVtRtSc/w020mZVk+0vRFb+OjY+DTnc9qSmXPnlDjh4CEuXzMyzVfx8YvvqChfVO6qazdUncAW9frTTEXx60y7x+U1tCN6qJNmPTvp+IjAmUFSLy+tB9sMJ2XctJDW444azDRTVVaSeqxJ8f5JI6BqROVmVY7RaKT83iY5iLPOnWh19FAnzXp20nFF4MwgzRSRRKMrhiVNRCsnKWXDmVetnzHxq5q5JSJu0io3ZzWS5TQqt9Z0GNVs/knKr5JibqXNPu1aPYqps3DTUM50SvrgOLU0Yo2uGFYtcicpGyfAU2NbZxwbNahZSOux1pvlNGJkdCzV5FSJNJv/8NoRli4e4oSDh6b8EP0Sf/LSXROjtI7cf15LM5W2O8mh0xxcEeRIJ6UPjhheO1JTyuVGTQCVzi9PUTG8doSDzryuoq0+sr1DggM3RlKPdXjtSNPSV1SaCZxGms1/eO0Il68ZmbrmCTNuv/9JTjh4iKHBgakQ17OPX8iN92xqqc1+6eKhqWuNy+GO4s7Ck87lSBT3XU5SWGJRSJM5jUavpVp9UflZk86Vy5N0XmRWGYr5BFZeub6hSWppMlSqPwsiiHZKki2SvzzRX1o59zYpNYfTOXjSuQLQiY60aj30etMLp9mt02Luy+XJYrIp9Ytnn9/KviuumWEbjxrLeCM8MjrG8kvXMQlMVFisoJaGOy4zJPs0Zpf62DppmVJjD1ZIgheNMCv5VyLcZu9Uwk1DOdKJ6QDSZIuG/PWYACqZyJYuHmLH2en9kUieaspTYWs9OjaeWMdNK45iaHBgRoM+PmkVlUBQeNVLTJQ5znOxRe7HxiczKYGBUj+VBuyRYzlLOW6zdyrhiiBHOtGRVknmqEG995xjuGnFUZntwNVizSvl4InuVTXlKZgxqhgbn+DMq9ZPfa9nJDY0OFCxMU6SY2R0bFpgQD0O6EjRPlnBXJUlsslt9k4WXBHkSCc60vKQuZqJLK2RHxwoTdVbLRInrVO/ecv4VINcz0jsyP3nZV5qEqabnaIRSa0KKEqFsXTxUKrMc+eUqjqjI19Fkd83pxi4s9jJnWpO8ySH6kCpn7OPXwhsm/A1OKeEWZCfv6+CY7SWerKce+T+87iwSqK7ND9C1FjX64Cvdm/Sric6xpVAb5PVWewjAid30nrzW17YOmXDTxqFANN8C5u3jPP81km+fOJBTNbQgYl65FE9tfDg6BhnLV3Iew+dPy2O/7CX7jpN3jRpHqxxXkG56bDSCC2+L5ILOmPk6RQLHxHkTCdmZsxD5uG1I4khmpV6rpVCSyuFSpZTHlJaT4hs2j2Ir9Ncqe4saxYPDpRYedwBhX8/nM7BRwQFoFMnlOUh89LFQ+yw/czooMihmzT7upJtPUkJlPpFqW+6PT/JOZ/UQy/1iVJ/si8g7R7E71US5akxqvka4gveOE4rcUWQI52YmTFPmdMa9s1bxhMVTxbnbr+0bfGYZYtY9c5FVR3dSeaWVe9cxKpli2paF7hSNFBS3dVGMEV/N5zuxSeU5Ug3TShrhsxZk7FFDeLyo/dj+WXrKsbcT5rNmDGbxbSStnbC0sVD7LvimkSbf/k9SLsn5QvgRAxluP4ivxtO9+IjghzppgllzZC5Fqfpg6NjgTlpu8p9lXrWRK6WBDDrPaj1XmW5/sE5pY5LUuh0Pq4IcqTbJpQ1SpJJZrBs1bCIqDGtNKGqVrmy+j+y3oMFuyU3+Gnby6N8yj0GpX7xzHNbO8qn5HQHbhrKkWat41sP9Ub+tFrmty/ag8vXjKTmMEozJ/VLNYVIDq8dybTcJmS/Bzf/fnNiXWnbo7Lj2VTjdTz7/NYZUVVJ8jVKJ0ayOfni4aNdSKVJSJV+8Hk3EGlynXDwEDfesymx3nqvpVq9cerNzLlgxTWp++6ro7w030QzM4c24346nUPbs49K+hbwduBRM3t1uG1X4GJgAXAf8C4zS+8+OXVRKfIn7cfeikXI0+S68Z5NqamsmzFCqZbrp17/R6W5DNFEuVpoxWpf9bwbTveTp4/gfOAtZdtWANeb2cuB68PvTpOpJ/Ina9hoIyuu1RuRVG+yuyzlC+r2f5x0yN6p++oJA22FT6kTI9mc/MlNEZjZz4Enyja/A7gg/HwBsDSv+nuZeiJ/sjQQjU42a1cUVaXyjfpHPGctTU9XUX4/kxRo+TYg9ySFnRjJ5uRPq6OGdjezh8LPDwO7px0o6VRJqyWt3rSpsXVxe416epZZGohGJ5u1K4pq+dH7pS4pUOtyklnPj9+3JAW6/NJ1LL9s3QylCjQ0+qlGJ0ayOfnTtvBRC7zUqZ5qMzvXzJaY2ZJ58+a1ULLOp55U0lkaiEbNCu1Ky7108RAnHzp/hjJoRgOY5b4lKdDxhBXKWjGzuBNTozv50+rw0Uck7WFmD0naA3i0xfV3BM2I3kmbOVvpeKjslG2GM7NWuZrFWUsXsmSfXVOvL89w21rs762w1bfrGTjFpdWK4Erg/cA54f8rWlx/4WlF9E4a1RqI5Ufvlxh6mLdZoZ5GOu2ctAyitdzzpLLTop4ge2qN6FjHaTV5ho9eBBwBvEjSH4AzCBTAJZJOATYC78qr/k6lyOF9zZxslrVxr0cxZjknXj/MtFGm3fN65ElSoKU+gZhmHmqGUvXJYk495KYIzOyklF1vyKvObqDo4X3lveoo8qXW3nrWxrQexVjtnKwrlSXd83rkSVOgSdsaabTbOZp0OhtPMVEwWjGpqFnU2/DU0pjWoxirnZN1Mfmke97IXIi0bKfNosijSafYeNK5gtFJ4X31hpPW0pjWE/de7Zyso6uke17kOPyijyad4uKKoGAUNbwvaUJUvQ1PLY1pPYqx2jlZGu25c0qJ97yoinp47Qh9KSugFUFJOcXGTUMFpGjhfWkmoME5JTZvmZkmOqnhiTsxdxkoUepXJkdpPQ7qauckOW/jDJT6OePYA+oqu9J1x49tplM3ej5JeY+KoKSc4uPZRzuUVkaHpC32PjhQ4vmtk1UzWSY5Z0t9YsfZsxjdMs6egwMcuf+81AykeVCumCSmZGlW3ZWyrSal3q535Jf2fPolvvSuRYXqVDitpe3ZR538aHV0SJqp58mxcb584kFVFVLazNo5281i7Wff3JZol1aMutJ8KBfd8kCmdRGykvZ8Js1cCTiZcEXQgbQ6OqRSJFOWBrWeKJ5uiHZJu+601NX1OnU7KdLMKSbuLO5AWh0d0qiDtN4onjyup5E02rWSdt39TXbqFtWB7XQOrgg6kFaHMDYayVRvFE+zr6fRNNq1knbdJx2yd1Mb7qJGmjmdg5uGOpB25PxpxKZeTxRPHtfTahNUpeuulACv3rq84XfqxaOGOpRuyynTiuvJsiZwt91Xp7fxqKEup9t6gK24nmpOVc/V4/Qq7iNweoZqvopGV2BznE7FRwRO4Wjm+gNxqvkqPFeP06u4InAKRV7rD0RUMkF5PL7Tq7hpyCkU9ZhnmmXS8Xh8p1fxEYFTKPJYfyArzVyBzXE6CVcETqGoxzzTTJNOt0VjOU4W3DTkFIo81h9wHKcyPiJwCkUe6w84jlMZn1nsOI7TpWSdWeymIcdxnB7HFYHjOE6P44rAcRynx3FF4DiO0+O4InAcx+lxOiJqSNImYGO75ajCi4DH2i1EC/Dr7C565Tqhd641fp37mNm8aid0hCLoBCStzhKm1en4dXYXvXKd0DvXWs91umnIcRynx3FF4DiO0+O4Imge57ZbgBbh19ld9Mp1Qu9ca83X6T4Cx3GcHsdHBI7jOD2OKwLHcZwexxVBE5DUL2mtpKvbLUueSLpP0l2S7pDUtelgJQ1KukzSPZLulvTadsvUbCTtFz7H6O8pSR9vt1x5IOkTktZL+rWkiyTNbrdMeSDpY+E1rq/1Wfp6BM3hY8DdwM7tFqQFHGlm3T4p56vAT8xsmaTtgDntFqjZmNkG4CAIOjLACPCjtgqVA5KGgI8CrzKzMUmXAO8Gzm+rYE1G0quBDwGvAV4AfiLpajP7bZbzfUTQIJL2Ao4BvtluWZzGkbQL8DrgPAAze8HMRtsrVe68AfidmRV99n69zAIGJM0iUOoPtlmePHglcIuZbTGzrcDPgOOznuyKoHG+AvwNMNluQVqAAddJWiPp1HYLkxP7ApuAb4fmvm9K2qHdQuXMu4GL2i1EHpjZCPBF4H7gIeBJM7uuvVLlwq+BP5W0m6Q5wNuAvbOe7IqgASS9HXjUzNa0W5YWcbiZ/RHwVuAjkl7XboFyYBbwR8DXzGwx8Cywor0i5Udo+joOuLTdsuSBpLnAOwgU/J7ADpLe216pmo+Z3Q18HrgO+AlwBzCR9XxXBI1xGHCcpPuAHwBHSbqwvSLlR9i7wsweJbAnv6a9EuXCH4A/mNkt4ffLCBRDt/JW4HYze6TdguTEG4F7zWyTmY0DPwT+pM0y5YKZnWdmB5vZ64DNwH9mPdcVQQOY2WlmtpeZLSAYXt9gZl3X2wCQtIOknaLPwJsJhqNdhZk9DDwgab9w0xuA37RRpLw5iS41C4XcDxwqaY4kETzPu9ssUy5I+m/h//kE/oHvZz3Xo4acrOwO/Cj4LTEL+L6Z/aS9IuXGXwPfC80mvwc+2GZ5ciFU6G8C/qrdsuSFmd0i6TLgdmArsJbuTTVxuaTdgHHgI7UEOXiKCcdxnB7HTUOO4zg9jisCx3GcHscVgeM4To/jisBxHKfHcUXgOI7T47gi6AIkLZVkkvZvtyztRNIzLarnIkl3SvpEK+orEpKOaDTLrqQFkn5dXp6k4yR17SzuIuPzCLqDk4Bfhv/PaLQwSbPCxFU9Q9ZrlvRi4I/N7GXNKK9ohJOuZGYtz51lZlcCV7a6XsdHBB2PpB2Bw4FTCGY3R9t/IOmY2PfzJS0L105YJem2sFf7V+H+IyT9QtKVhDNpJQ2HCebWx5PMSTpF0n9KulXSNyT9U7h9nqTLw7Jvk3RYgrwfkPRDST+R9F+SvhDb90zs8zJJ58dk/5qkmyX9PpT1W+FaAeeXlf/lUN7rJc0Lt700rG9NeI37x8r9uqRbgC+UlTNb0rcVrL+wVtKR4a7rgCEFOfz/tOycaeVJ2jW8h3eGsh8YHpe2faWkC0IZN0o6XtIXQhl+IqkUHneOpN+E538x4R6vlPRdSf8R3uMPxfYtjz37M8NtCyRtkPQdgtnie5eV9xYFazPcTiyjZSjXoAIel/Tn4fbvSHpT2ruWRvhuRO/S+ZL+r6Rfhc98Wbi9T9K/hPL8VNK/RvucBjAz/+vgP+Bk4Lzw86+Ag8PPfwZcEH7eDngAGABOBU4Pt28PrCZIyHUEQYK1fWNl7xr+HyBoIHYjSNx1H7ArUAJ+AfxTeNz3CRLTAcwH7k6Q9wMEs3V3AWYDG4G9w33PxI5bBpwffj6fIJeTCBKIPQUsJOjIrAEOCo8z4OTw82djcl0PvDz8fAhBKpCo3KuB/gQ5PwV8K/y8P0GqgtnAAuDXKc9iWnnAPwJnhJ+PAu6osn0lwciuBCwCtgBvDff9CFgaPoMNbJsMOpggx0pgXfjcXhQ++z0J0oKcG97HvlDW14XXNAkcmlDW7PD8l4fnXQJcHe77OkEK9lcDtwHfCLf/F7AD6e/a1D0keO+i8j4Qe2bnEyTC6wNeBfw29l78a7j9xQQ5dZa1+3fY6X9uGup8TiJYSAWCxvIkgsbxx8BXJW0PvAX4uQULc7wZODDWi9qF4Ef+AnCrmd0bK/ujkv4s/Lx3eNyLgZ+Z2RMAki4FXhEe80bgVQrSUADsLGlHMyu33V9vZk+G5/8G2IegsanEVWZmku4CHjGzu8Lz1xM0LHcQNGYXh8dfCPxQwYjpT4BLY3JtHyv3UjNLytJ4OEGDjZndI2ljeJ1PVZEzXt7hwAlhGTcoSBG8c4XtAD82s/HwOvsJMkkC3BVe59XAc8B5Cmzrafb6K8xsDBiTdCNBgsDDCZTB2vCYHQme6f3ARjO7OaGc/QmStv0XgIKkitHo8BcEimQj8DXgVAULwWw2s2crvGtZk6ENW2Ci+o2k3cNthxPc40ng4fDanAZxRdDBSNqVoEe5UJIRNBwmabmZPSfp34GjgRMJlAQEvbq/NrNry8o6gmBEEP/+RuC1ZrYlLKvaEn99BL3K56oc93zs8wTb3sN4vpPyuqJzJsvOnyT9PbZQplEzOyjlmGdTttdLo+U9D2Bmk5LGLewGE16nmW2V9BqC5GnLgP9J8A6UU547xgie/dlm9v/iOyQtqFPunwMfIRj9fYZgFLqMQEFA+ru2IGP58ees1KOchnEfQWezDPiume1jZgvMbG/gXiCyXV9MkDDtT9nWs7wW+O8xe/MrlLzwyi4EPbstoU390HD7bcDrJc1VsOLTCbFzriNI2EZYdlrjm8Yjkl4pqY+gUamVPoJ7AvAe4Jdm9hRwr6R3hjJJ0qIMZf2CwOyGpFcQNHYbapQnXsYRwGOhPGnbqxKOcHYxs38FPkFgQkriHQr8HLsRmF9uI3j2fxGWgaQhhRkrK3APsEDSS8PvJ0U7zOwBAtPTy83s9wRmrf9FoCAg+7tWCzcBJ4S+gt3Da3MaxEcEnc1JBItRxLk83P5zgob5uwRmghfC/d8kMDHcrsBWsonA9lzOT4APS7qboAG8GYI1CST9PXAr8ARBQ/FkeM5HgX+WdCfBu/Vz4MM1XM8KAlPHJgJ78o41nAtBr/Y1kk4HHiUYCUHQ6H4t3F4iGB2tq1LWv4Tn3EWQtfIDZvZ8zLyUhZXAt8L7sQV4f5XtWdgJuELBAuwCPply3J3AjQQN9efM7EHgQUmvBP4jvI5ngPdSYQGTcGR5KnCNpC0ESmyn2CG3EIxECfedTaAQIPu7VguXsy01+AMEWUWfrHiGUxXPPurUTGT3D0cEPyJwqnbdwuediqSVBI73GRFF3UDs/duNoENymAXrSDh14iMCpx5WSnojgR3/OmC4zfI4vcXVkgYJouE+50qgcXxE4DiO0+O4s9hxHKfHcUXgOI7T47gicBzH6XFcETiO4/Q4rggcx3F6nP8PE+7o+52RjEYAAAAASUVORK5CYII=\\n\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"plt.scatter(data[\\\"RM\\\"], labels)\\n\",\n    \"plt.xlabel(\\\"Average number of rooms per dwelling\\\")\\n\",\n    \"plt.ylabel(\\\"Housing Price\\\")\\n\",\n    \"plt.title(\\\"Relationship between Rooms and Price\\\")\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"predicted_prices = lm.predict(features)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXucXHV999/f3QxkF5ANECgshKDwhIpIIlFi01ZJK1G5uAXlUrDYaqm9IYjR4ItHYh8o0VRR+/RGRYuFQgLBCKJGXhK0xodLQhIwEl5V7itKkKxCdiGb3e/zx5yzOTt7zpkzs3NmZmc+79drXzvnzLl85+zs7/v7fa/m7gghhGhfOhotgBBCiMYiRSCEEG2OFIEQQrQ5UgRCCNHmSBEIIUSbI0UghBBtjhSBGMPMvm1mFzZajnbEzO41sw8Fr883s+/W4Z6zzczNbFqNrqfvzxRFiqCFMbMnzWzIzF42s1+a2X+Y2b5Jx7v7u9z9hnrKOFUws2VmNhw8ywEz+5GZvTWPe7n7Te5+SkaZbsxDhuD6+v60CVIErc/p7r4v8CZgPnBF6QFWZEp+F+os+8rgWc4EfgjcbmYWI1NNZthNQkt/f0QR/fHaBHfvB74NvAHGTBFXm9l6YBB4bdQ8ERzz52b2qJm9ZGY/MbM3BfsPM7PVZrbdzJ4ws4sj57zFzDaY2W+CWeTn4+QJrntaZHtacL3wHguCWfeAmW0xs7dHjo2T/QNm9ngg6xNmdn5w7LhZc6k5JOm8Ms9yGLgB+C3gwOAa683sWjP7FbAsuPafBZ9zh5mtNbMjI3K8w8y2mdmvzez/AhZ57wNm9sPI9nFmdreZvRg800+a2TuBTwLnBDP2LcGx+5vZ9Wb2nJn1m9lVZtYZvNdpZv9gZi+Y2ePAqeU+a+QzN9X3R9QYd9dPi/4ATwJ/GLw+AtgK/J9g+17gaeA4YBpQCPZ9KHj/fUA/8GaKg9TRwJEUJw8bgU8BewGvBR4HFgfn/T/g/cHrfYEFCbJ9Crgpsn0q8Gjwuhf4FfDu4H7vCLZnJsi+P/AbYE7w/qHAccHrZcCNkfvMBjw4b5+k82LkHbsOsDewAng62P4AsBv42+C6XcB7gJ8Cvx3suwL4UXD8QcBLwHuD535pcP6HItf7YfB6P+A54DJgerB9UtxnC/Z9Hfi34LMdDDwA/EXw3oeBbcF34QBgXfgsptr3Rz+1/dGKoPVZY2YDFE0Z3wf+PvLef7j7Vnff7cVZbpQPAZ919we9yE/d/SmK/9gz3f3v3H2Xuz8O/DtwbnDeMHC0mR3k7i+7+30Jcv0XcIaZdQfbfwzcHLy+APiWu3/L3Ufd/W5gA0XFMEF2ioPoKPAGM+ty9+fcfWvG51PJeWcHz/IZ4ETgjyLv/dzd/zF4lkMUB91r3P3RQMa/B+YGq4J3A1vd/bbguX8B+EXCPU8DfuHun3P3V9z9JXe/P+5AMzskuPYl7r7T3Z8HrmXP3+Zs4Avu/oy7vwhcU/bpNO/3R9QQKYLWp8/de9z9SHf/q2CQCnkm5bwjgJ/F7D8SOCww2QwEg8QngUOC9z8I/C9gm5k9GDX/RHH3nwKPAqcHyuAMisohvMf7Su7xuxRn7BNkd/edwDkUB9/nzOwuMzs25bNVe96q4Fke7O6L3H1jnDyRz/DFiPwvUpwZ9wKHlcjvMeeHJP0d4jiS4sz8uch9/43iyoDS+wJPZbhmU35/RG1pJaeWqJy00rPPAK9L2P+Eux8Te0H3/wHOs6Lz8EzgNjM7MBh0S7kZOI/ihOQngXII7/Gf7v7nWWV397XAWjPrAq6iOMv8PWAn0B059Lcynlcppc/yGeBqd7+p9EAzO4biQBluW3Q75jrnJrwXd89XgYOCVUgpz5XcZ1bCdbPS6O+PqBFaEYgkvgx8zMxOtCJHB2aNB4CXzOwTZtYVOCDfYGZvBjCzC8xspruPAgPBtUYT7nELcArwl+xZDQDcSHGlsDi4/nQze7uZHR53ETM7xMzeY2b7UBwIX47cczPw+2Y2y8z2By7PeN5k+VfgcjM7LrjX/mb2vuC9u4DjzOxMKzqtL6ZEQUX4JnComV1iZnub2X5mdlLw3i+B2cGgibs/B3wX+JyZvcbMOszsdWb2tuD4VcDFZna4mc0Altbos8ZRj++PqBFSBCIWd78VuJriAP0SsAY4wN1HKNqt5wJPAC9Q/KffPzj1ncBWM3sZ+CJwbok5IXqP5yg6B38HWBnZ/wxFZ+snge0UZ5FLSP6+dgAfBX5O0QTzNorKhcC/sBJ4mKKT8ptZzpss7v514DPALWb2G+DHwLuC916g6ExdTtEJfgywPuE6L1F0lp9O0Y/wP8DJwdu3Br9/ZWYPBa//hKIT9ifADuA29pjU/h1YC2wBHgJur8FHjaUe3x9RO6xonhRCCNGuaEUghBBtjhSBEEK0OVIEQgjR5kgRCCFEmzMl8ggOOuggnz17dqPFEEKIKcXGjRtfcPeZ5Y6bEopg9uzZbNiwodFiCCHElMLMsmSPyzQkhBDtjhSBEEK0OVIEQgjR5kgRCCFEmyNFIIQQbc6UiBoSQoh2Ys2mflasfYyfDwxxWE8XSxbPoW9eb273kyIQQogmYs2mfi6//RGGhkcA6B8Y4vLbHwHITRnINCSEEE3EirWPjSmBkKHhEVasfSy3e0oRCCFEE/Hzgfj2C0n7a4EUgRBCNBGH9XRVtL8WSBEIIUQTsWTxHAqdNm5fodNYsnhObveUIhBCiCZjZMRTt2uNFIEQQjQRy+7YymjJvtFgf15IEQghRBMxMDRc0f5aIEUghBBtjhLKhBCiTmTJGJ7RXWDH4MTZ/4zuQm5yaUUghBB1IMwY7h8YwtmTMbxmU/+4405946Gx5yftrwVSBEIIUQeyZgyv27Y99vyk/bVAikAIIepA1oxhZRYLIUSLkjVjWJnFQgjRoixZPIeuQueE/YO7do/zEyxZPIdCR0lmcYcyi4UQYsrTN6+Xa848np6u8dE/OwaHJziNd4+OzyQu3a41UgRCCFEn+ub1ss/eE6P2o07jT97+MKXDvgf78yL3PAIz6wQ2AP3ufpqZHQXcAhwIbATe7+678pZDCCEmQ626hpVzBg8OlxaYIHV/LajHiuAjwKOR7c8A17r70cAO4IN1kEEIIaomaw5AFhrhDC5HrorAzA4HTgW+HGwbsAi4LTjkBqAvTxmEEGKy1LJrWJzTuKvQOeYMtriTUvbXgrxNQ18APg7sF2wfCAy4++5g+1kgv47MQghRA2oZ2x+ak5LMTF2FjlgzUFchv3l7borAzE4Dnnf3jWb29irOvwi4CGDWrFk1lk4I0U5M1r5/WE8X/TGDfrXmnL55vYn3H0rwBSTtrwV5moYWAmeY2ZMUncOLgC8CPWYWKqDDgVgjm7tf5+7z3X3+zJkzcxRTCNHK1MK+X86cU0taKqHM3S9398PdfTZwLnCPu58PrAPeGxx2IfCNvGQQQohq7ftrNvWzcPk9HLX0LlasfYyzTuylt6cLA3p7urjmzOOrihoqRz2VTkgjylB/ArjFzK4CNgHXN0AGIUSbkNW+HzUf7d9VYOeu3QwHLSL7B4ZYvbE/t8E/SjkfQh7URRG4+73AvcHrx4G31OO+QgiRxb4fmo/ClUNcN7BwFZG3IoB0H0IeKLNYCDHliJptFi6/J9Xen8XUEmc+iiPPCqCNRB3KRFNTq2xOkT/1+luVzt5D5y8Qe78sppasA3wjk77yRIpANC2V/sOLxlHPv1Wa8zfpXqWmlnBFESqG/bsKZZvD5+2wjVLvCZBMQ6JpqWU2p8iXev6tkmbv/QNDmUxFceGkO3ftji39PKO7ULMooazmrFqWs8iKVgSiaWlEpyZRHfX8WyU5f4FxA2dI6cw6TmkNjzgzugt07zUtl1l4JSumalY8k0WKQDQttc7mFPlRz7/VksVzxg2qcQwNj7Dsjq28unt0wuCbdN7A4DCbPnVKzeWFygb3JCWXtL8WyDQkmpZGJNaI6qjn3yps8BImdyUxMDQcO/h2WvxZeU4wKlkxJcmXtL8WaEUgmpZGJNa0ArV0NGa9Vr3/VlHn78Ll91Q0Wx5xp6vQOU5J5D3BqGTFNOLx3ciS9tcCKQLR1NQ7sWYqETdIAzWL3qkmTDPpHnlGwcSZiroKnUwvdLBjcGIkUG/EV1CvCUaSjHHKpzdBafTmuGIxz1HL1Ir58+f7hg0bGi2GEE1D6SANxYFl72kdsWGQvT1drF+6qKJ7pM20eysYPJNkrWW5hixKsV73nawyvGLNI9x439MT9l+wYBZX9R1fkXxmttHd55c7TisCIaYgSc7HJEdoNY7GtIifSlYa9YiCSVuNRAffk4+dyYq1j3Hpys01MZvVasUUZd227RXtrwVSBEJMQaoJy1yzqT9xILpizSPcfP8zjLjTacZ5Jx2RGqYJ2QfzRoYBRwffWie95aXgGvG8FDUkxBQkKcKlIyWwJCm5KzRFhM7IEXduvO9pZh/YNSESqJQsg1Oz9OitddJbXgN2S/UjEEKUp5LiaVGSwjVHU1x+/QNDY/cI7zt76V2x9miA+x7fMRammUSWwalcaGm1z6BSaj1w5zVgL1k8JzbLOc+oJikCIRrEZEoJlMbS9/Z0cdaJvWUbnPcPDLHkti0suXVLWb/BiDt983pZv3QRXzhnbtV5AnGyhg7bepZTqPXAnWvuROkfMs/O9chHIETDKGeqKBdhUup8XLj8HrLEAIbNVrIQ+hWS8gTC+2bJM4jbX89yCpWEcGYhr9yJFWsfm/A3Gh5xlZgQohVJK55WjVMzD2di9L5xFTwn63ytp2M0j4E7jzyXRjiLpQiEaBBJUTmdZqmz5LgIn6v6ji8b5VMNabPzpNn8p+/cmnmwrXc9qamQoNiIGlvyEQjRIJJszEmlBPoHhsacu6URPleseST2eoVOm+h4jNmXRqUz1B2Dw5lt/pXa2evlWG4k7dK8XogpRaXlESZbn2fF2scqntnffP8zY1mncXb8pH1Z7nNYT1fsZ8q6AklbVVRirmmXRkWNqLGlEhNCpFBpeYRalFOIu0YWnlx+akXHZ7lvocPYa1oHO3eNl6Wr0MlZJ/ay8sFnMjmfDXhikvIllbyopnxGu6ASE0LUgCQ7+GWrtgDVNxWJzrD37ypgVqyHH87+rjnz+LH3s0zVwhLFkynuVjoT3b+rwM5duycogfAz3fXwc2QSjtrYt9upUZFaVQrRRCQNMiPuE2zfazb1J5pKotcpjZ0fGBpmx+DwhO5a65cu4tpz5mYKIT/vpCNiY/IvXbmZK9Y8Uvb8kDBv4Inlp7LP3tNSZ/s7BocZjslgK5W3VvbtZslQzptGtKqUIhAihbRBZmh4hEtWbmbh8nu4Ys0j49ojpl0nbtVQet0Vax9jzaZ+Llu1peykO6xKGXddB2667+mqBpFqZ9oOscljk6VdGhU1ole3TENCpJClLWL/wBA33fd04oBdOlhlGWD7B4a4ZOXm1GMMuPacuWODbNJ1HapKRkpzBte65HUW2qVRkfIIhGgywkHmslVbUjtEpc3aX9ldXDlctmpLpqqeWXHG+yjSrpt1ECn1XRQ6bYJ5qKerwLIzjgPi6/3nOUOfCnkAk6UReQRSBEKUIRx4qonkAQj1Rxjzf8zB+8QOsJXSacZRS+8amxkvWTyHS1dujlVKWQaR0qihgaFhCh3GjO7COEd2nIO8lWfo9abWpTCyoPBRITISzpZrMZvvAEYnL9IYYYjqhqdenGCmyhq+qvDM5qFWUUNZw0elCISokGrj/MvR29PFzld3x9rdszCju0D3XtPoHxii04wR99SWkqWDTZKCi8sBqHd4o6gO5RGItmPNpn6W3bF1bCCd0V3gytOPq/kAFXVapg2elU6x+geGKHRWX294x+DwWLP2Efcxc0LWLN0kmUvNSu2S4dtOaEUgWoI1m/pZcuuW2Lh22NNsHWpr067l6sBsjz+hViSZdZLMQKXKIM6sJBPS1EErAtFWrFj7WKISgKAhy61bwPbU46/FTLZ0dVDNSgCKpRzS5C8l630qDUUMcwDSFGU7Zfi2C7kpAjObDvwA2Du4z23ufqWZHQXcAhwIbATe7+678pJDtAdZBqG4gTatXERWwpDGpJlyOXp7uhjctXvMrFNK3Cz9rBN7E1tMRknLxk1aEZRbJTUivLFS5MOojDwzi18FFrn7CcBc4J1mtgD4DHCtux8N7AA+mKMMok2YzCAUVy6iGqqZEfd0FQASlQDA+QtmTcjUvaovvZcwpIccLlk8J7Z0RZh8lkazZ/g2okTDVCc3ReBFXg42C8GPA4uA24L9NwB9eckgWp+wPv1kQzprkcJfqTIqdBg7d+1Olb2nq8BVfceP1f9Zv3TR2Mw2aTCHYo5BWsho37zeRNNSOYWW1oO4GWhEiYapTq4+AjPrpGj+ORr4J+BnwIC77w4OeRaI/faY2UXARQCzZs3KU0wxRblizSOppR0qZbI27rhEoCRbfqcZ+06flroS6Cp0jmXwxtE3rzexDMVo0Hg+jd5JmHiaOcNXPozKybXonLuPuPtc4HDgLcCxFZx7nbvPd/f5M2fOzE1GMbUIVwBhp66kQbYaJmvjjpspn79gVqwZ5XNnn8BAihLIOstOMg/1dBfKdvJqdhNPtbRLldJaUnZFYGYfAb4KvAR8GZgHLHX372a9ibsPmNk64K1Aj5lNC1YFhwMy3IlUohm9WaJl0moCpVE6AFbicIzK2Gk2JuP8Iw9g/pEHxF4nKQ+hkjDMuFVIodN4+ZU9zuek6KhWLeLWiBINU52yeQRmtsXdTzCzxcBfAP8b+E93f1OZ82YCw4ES6AK+S9FRfCGw2t1vMbN/BR52939Ou5byCNqXvLJ4S+npKrD5ylNS72sUHbdhS8gsMubdzSy8TnQwT8pObqc4f0UNFallHkG4zn43RQWw1SzT2vtQ4IbAT9ABrHL3b5rZT4BbzOwqYBNwfYZriTalXO3+WnHaCYeWvW9Y23/+kQeMG1TSZAx7Fmx46sUJCqTcjLyS3sfR/UctvStWlmpt5FNxUG1mH0YzkkURbDSz7wJHAZeb2X5kqJfl7g9TNCOV7n+cor9AiLLUy8G3btv2TPeNq+2fRcYw5j9OGdS6UXst4/xVTqI9yOIs/iCwFHizuw8CewF/mqtUQgTUy8FXOnCm3beSY6PcfP8zmeWZTAhkLZ3ACsVsD7IoAgdeD1wcbO8DTM9NIiEiLFk8Z1KF2LJSGmmUNmjGHVs68MZRiRN7MiGQtYzzT8pxqEUpbtE8ZDEN/TNFU9Ai4O8oRg+tBt6co1yiTchifx6ZZAOXLJQO0mkx+nHHQno1UqgsrHWy5p1a2cjDctZx+0XrkGVFcJK7/zXwCoC776BoHhJiUmQpBbDsjq01beCSRKfZhFj7pBj9sDNYND6/b14v65cu4snlp3LBgvgEyPNOOiKzPM0S45+0iqk2RFc0J1kUwXAQ+eMwFhZaj/9N0eJksT9X26SlUuLqDSWZfEbcU2vYXNV3PBcsmDU2a+4044KYsNM0mqWMQ5IyLFfnSEwtspiGvgR8HTjYzK4G3gtckatUoi1otlIAoRIKB9vS8M6OGDNJ6TkhV/UdX9HAH0czhEAqOas9KKsI3P0mM9sI/AHFnII+d380d8lEy9OM5YxLlVB0MK51fP5UoFWzj5udeuduZCkxsQDY6u7/FGy/xsxOcvf7c5NKtATlvsxZZpszuguphdlqTZoSakbFVQ+aYWXSTjQidyOLj+BfgJcj2y8H+4RIJIsjOIsd/MrTk6tv1ppSJRQWuAsdwycfO7MpHLiitWlE7kamEhMeKUjk7qNmphaXU4w8l5px1077Mldy3755vVy6anPNe/mW0mnGWSfumfnGzcpWb+znrBN7Wbdte9nnWO55T8WyDaI+NMJ3lmVAf9zMLmbPKuCvgMdzk0jUnDyXmknXTqq9E/0yp8m14akXufn+Z+oWpjjizuqN/WN1hJIU2bpt22MLt0UH9p7uAi+/snusNWbp81bZBpFGI0yQWUxDHwZ+h2K56GeBkwgaxoipQZ5LzaRrJyUcRb/MSed+8vaHufG+p+seqx59JpXMykrNYDsGhyf0Rw6vvWZTP5et2hL7uS9btUXtFEVsNn2h03I1QWaJGnoeODc3CUTu5LnUTLrGiDtdhc5UR3DSuYPDjUtTCWUqNyuLrgDiwkrjCGf+aUlaWhkIYGLTjZznRIkrAjP7ePD7H83sS6U/+YolakmeHZuSrhE6ftMcwc0YbRPKlJbZW7oCyLpy6TQrW1JbBd3EirWPTVhRDo96rt+LNNNQmCuwgWLf4dIfMUXIs1xBUvZt/8AQl6zczOCu3Vx7ztxxTddDTj525oTm642sYBN9JmkRTdX0SOgqdGZWGK2clyDK01TOYne/Mygtcby7fyw3CUTu5JkUVK7g2o7B4djGLGs29bN6Y/+4FW+WNpR5Eg70pRE9154zt+L+A4VOY5+9pvHroeGx533Zqi2ZlEEzrpRE/WiEszjVR+DuI2a2MLe7i7qRZ1JQeO2Fy+9JrL5Z2tkrqQNYI8ka0ZP0j9ppxqh7oqJNqmYaRXkJYsniOSy5bQvDkaq7DXcWA5vN7A7gVmBnuNPdb89NKjElSZsph529oHy55kbQ01UA0iOswoE9KSO6XFG43ioVSFaUm9BC1NlZnEURTAd+RbEfQYgDUgRiHEkz5ZD+gaEJM51moNBhLDujmMGcxT5bramtWgWSBeUmtA5pzuK8/pZZFMESd38hl7uLlmLJ4jksuXXLhC9xlGZTAh0G57zliLJmn1L7bDWmtjx9NbXK5BaNp6mcxWZ2OvAViv0IRoGz3f1HuUkipjzhgHP57Q8z1MBcgEoYdVj5wDNj/ou8yy7n5atptpLeonqazVl8NfB77r7NzE4CPgu8LTdJRNMRtTnv31XADAYGh1NnsuFAd9ynvsPOXZWFWDaK4VFn2R1bxw3SoR8jjP0P/RvNOrtu18qorUgjekCk5RHsdvdtAEHJ6f1yk0I0HaVJUwNDw+wYHB6rJHrpys3MLmnXGGWqKIGQaCe0cGUQjf1P6kbWLDRLa0sxeRrRnS5tRXCwmX00advdP5+bVKLhlEuaCi39reqUnGo2dzWQaS3q3QMiTRH8O+NXAaXbooWpxLYcHSBDc1IzUegw9p0+LbXBzYzuwrjtpM/fbGGvUdRARlRLWmbxp+spiGgcpSWU3SsPW/75wNCEEMZmoLdkZrxmU39ssk5pA5wkm7sF19CAK1oJNZhpQuqZGFQ6eFfbFvKwnq6qavDkSW9P14TeAVlNKEsWz+HSlZtj83qa1TwkRLVIETQZ9U4Myjp49wRRQzsGhyfUBAqdkllKKNSLQsf4lPxKlWvfvN7Ez6OQTNFqSBE0GfV2UmYZ1AzYfOUpY9txg+qGp16suWzV0l3o4O/PfGNq28ksyjWpJIRCMkWrkZZQ9tGk90BRQ3lR78SgcmUhwmOiRJ2SoVKotxPVgP27CuPCPkNm7LP3uAG+WuXaiHhuIRpB2oogjBCaA7wZuCPYPh14IE+h2pksiUFrNvXz6Tu3jtnze7oKnHbCoRU1VQ+TpUbcy5Z/3rHz1VgHaSOdw+cvmMVN9z0d+16oNMspqXLKVSGZol0oGzVkZj8A3uTuLwXby4C7yl3YzI4AvgYcQnGcuc7dv2hmBwArgdnAkxRLV+yY1KdoIcrNQuOiXgaGhrkxMigmmT5KB+4wWapchNDg8ChLbt0ytl1pi8ZaM6O7wFV9x7N647OxpSz27ypkUlJZTDxZQjJV9VNMdbL4CA4BdkW2dwX7yrEbuMzdHzKz/YCNZnY38AHge+6+3MyWAkuBT1QmdutSbha6Yu1jmQq3xZk+JhPVE5ZheHX36ARFUm9OfeOhrNnUn1jPyCzbZz352JkV37vciqpVE+xEa5NFEXwNeMDMvh5s9wE3lDvJ3Z8Dngtev2RmjwK9wHuAtweH3QDcixTBONJmoZX4CkqPnayfIc4e3wjWbdvOum3bE98fGBxmIEMYbNo14si6omrmDGQh4iirCNz9ajP7NvB7wa4/dfdNldzEzGYD84D7gUMCJQHwCxJWF2Z2EXARwKxZsyq5XcsRNT1UYo4p9Su0Clmd2+WOq1QxVrKiUoipmEpkDR/tBn7j7l81s5lmdpS7P5HlRDPbF1gNXOLuvzHb057c3d3MYkc1d78OuA5g/vz5zVXEvo4kzULL0VXo5ORjZ6a2j2xFDMb8KbXwEUSpZHBXiKmYSpRVBGZ2JTCfYvTQV4ECcCNQtpexmRUoKoGbIq0tf2lmh7r7c2Z2KPB8tcK3A0mz0A4r1tKH+Kihk4+dyeqN/ZlmsI1uGh9H1oimKEYxmihqkklqGB9VGFnJEmoLCjEVU48sK4I/omjWeQjA3X8eOH9TseLU/3rg0ZKcgzuAC4Hlwe9vVCp0K5A10iRpFuoOTy4/NfG6NyaEVsZeK7vYdeNn17wbKH6epMG8lGvPmTvuGe5plDN+ZRCnMLIQF9EVvaYzsbaREFOBLIpgV9SEY2b7ZLz2QuD9wCNmFubqf5KiAlhlZh8EngLOrlDmKU+WTNdwQE8a/uJMD81Y9K1awryFvnm9XJqhdEVvT1dioxyoTS5AXNOaEXcN/mLKk0URrDKzfwN6zOzPgT8DvlzuJHf/IcWJUhx/kF3E1qNcpmuWAT0u9DHvom+dZpx30hGs27Y9d7/Dp+/cWraPcEg5U0wtyzOr1LNoRdI6lAHg7v8A3EbR1j8H+JS7fylvwRrJmk39LFx+D0eldOCaDOXKSGQZ0Fc+8Mw4udZs6s9tcO4qdHLBglnsN30aN973dF2cz9EqqHHdt8IZRj26NwnR6pRVBGb2GXe/292XuPvH3P1uM/tMPYRrBKUtGvNoUZgUURLuzxKdMjzqYw1gQpnzYmh4hBvve7pheQTR1n1QXJnIHi9E7SirCIB3xOx7V60FaRbSzDa1olx/2ayhh5WsIKYaPV3jO4al9RG+Ys0jua7ghGh1EhWBmf2lmT0CHGtmD0d+ngDym342mHpU/yzXnDpOUcTRYcaaTf1TPnmp9EtY6DCWnXHchOOSlPRNgbkqrxWcEK1OmrP4v4BvA9dQrAchNtkvAAAUEklEQVQU8pK7N0/x+RqTpfpnpSSFiiaZNEqjU5IYcefy2x9JLMecha5CJ2ed2DuWg9DTXeDlV3YzPFqfoNJ99urk6j86fnKhtCXbKvEgRGWkVR/9NfBrM/si8GKk+uhrzOwkd7+/XkLWk1rXoK+2KUppzf9o2ekoQ8MjTC900FXorMo8FOdorWePgZ27Rtjw1IsTWkrGkTWhC1TiQYhKyOIj+Bfg5cj2y8G+lqSc2SYka2RRLXwOffN62fSpUxJjcXcMDjO90JH4fhJpsffrly4ac87mzY33Pc3sDPb9rCYzUIkHISohSx6Bue9J63T3UTNr6RaX5WLFK5nlJ81gq5ltp82Iq2k6X64Mc737EJdbLZUmh+3fVeClV3czUmLGKu1XLIRIJ8uK4HEzu9jMCsHPR4DH8xasmalklt9p8fP0uP3lVhmVzIizEM7EX3f5t7hizR7/fyhHlozeWlNutRSuVp5Yfir77D1tghIA2Hf6NPkHhKiALDP7DwNfAq6g6Jf7HkF56HalksiipBo5pfuzrDL65vWy4akXK6ojBMUZ8m53ksr1jLiPXXP+kQfkVqai0GmZmupEn2NaTaakv0OWXgRCiD1kySx+3t3PdfeD3f0Qd/9jd2/riqHlEsKiJNnZS/dnXWVU2kzFgHPeckSmynI33vc0l6zcnIsS6O3pYsV7T2Dh6w4oe2z4HMsl91XydxBCJJOWR/Dx4Pc/mtmXSn/qJ2LzUS4hrJpjs64yKo2GceCuh59LrvpUBwzGooIeevrXqcdGn0055VjJ30EIkUyaaejR4PeGeggylaikomXWY7PmL1QSQhlSjSM5K12FjsTewSHhZ0jKgO40Y9Q9s+kn3F/LyqJCtDNpeQR3Br/L9iduN7L2EgjJUrFyyeI5LLltyzgbeqFzYvTLksVz+OiqzdQp3wvYk3S2euOzY4N+h8EfnzSrrE8hOkNPGthH3XkiprdCFuWoaqBCTJ5ERWBmd5JiWXb3M3KRqMmppJdAqaIoq0BKn3bC0w9n0KUUOgyMTA7ZrPQG3c7WbdvOK8OjiYXews/V013AHX49NDzhM1aatV3r5L5aUulkQIhmxjwhlMTM3ha8PBP4LYrtKQHOA37p7pfmL16R+fPn+4YNzWGhSuoB3NvTxfqli2J7CeyZUSe3jkxqyRhet9z9Q3q6CphN3hzUVejkmjOPByZ2+ArfSxr40hRhra7VSKr5HEI0AjPb6O7zyx2XZhr6fnChz5Vc6E4za45RuQFU00tgaHiEm+9/JrXdYtI7lTqLB4aGKXRW7xk2GDfgLlx+T6rDtnSQBsqumGptVqs35RoLCTHVyJJHsI+ZvdbdHwcws6OArO0qW45y5o2kgTpLz92k+2W5f5RqTUOlqw9I/jzhAF864E8vdKQOks04sFdKPSrUClFPsmQWXwrca2b3mtn3gXXAJfmK1RzEZfpW20sgKcO4HKX28HJlIcpxwYJZzOguTNifVJZh/66Jx0Jx5RA34CeZpPoHhlqmX4DyF0SrkSWh7DvAMcBHgIuBOe6+Nm/BGk1SMhNQcS+BrkIn5510RMXlIboLHaxY+9i4AbTShLIovT1dXNV3PFeeftxE81GCnkrSX9WsOVqlX4DyF0SrkaVVZTewBPgbd98CzDKz03KXrMEk2YE/fefWVBt337xezjqxd2wF0GnGWSf2clXf8ePaLWZhcHh0nCJacuuWqktDR2f8K9Y+NsF8NDzisTV+8ijXUOuOb/Uma4VaIaYKWXwEXwU2Am8NtvuBW4Fv5iVUM5Bk790xODxm/kgKHV29sX/MJzDizuqN/cw/ck9pBQN6ugu8MjxSNhkrSrXNYnq6Ciw747jMiVpRknwSM7oLvDI8mhgFFYa4ZnWCTzVawdchREgWH8Hr3P2zwDCAuw/S0IIF9SGrvbd0dpu0klh2x9ZxpqaiMrEJvXlrSaHT+MI5c9l85SnjBq1KbNxJZpArTz9uLLw0jjBJLGkFJHu6EM1DFkWwy8y6CMzCZvY64NVcpWoCKin5HJ3dJlbEHBqOVRBm1LS0dEh3oYMV7z0hdta6ZPGcYvJZhCRncZoZpG9eb9mBXvZ0IZqfLKahK4HvAEeY2U3AQuADeQqVB9WUhYDxMe87X90d2xs4OruttBbQwOAw154zt+atIb3coq307WC70v7K5bJ/VQ9IiOYnMbMYwMwMOBwYBBZQHC7uc/cX6iNekclmFtcqEzTLdZKOmV7oiA2tjMbux52blHGchbi8AEjOTu4OCshF75flOTVj9q8QogaZxQDu7mb2LXc/HrirZtLVmVplgmaZ3SYdAxNLNQAM7trNmk3942bdy+7YOrby6Oku8PpD92P9z16cIM8xB+/Dk78aTEwgKzVTlWtKPxjjuM7ynOQ4FWJqk8U09JCZvdndH8xdmpyoZSZolkEv7ZjoIA9Fp3Fp5NGru0fHvf+jGCUAMLhrlBXvPYHLVm2JzVyOmqziVhtZmWoRPlqhCFEZWRTBScAFZvYksJPAWuHub8xTsFqSZLfv6S6wcPk9dRsw+ub1smLtYxP8DNFZd9zqJS0EM5S3XJXOpF4AWWjmCJ/SQf/kY2eOK+4XF+IrhBhPFkWwOHcpcibOoVnoNF5+ZXdqTkAelFudVDL7DgfoLCartOv29nQxuGt3rA/DmFjmolmIKwl+031PT1CcKggnRDpp/QimU2xcfzTwCHC9u++ul2C1JGsEUD0GjHJF65LeL3Ual874y5mskq6bVj7bgPMXzGraAbTS1ZMQIp60PIIbgPkUlcC7gM/VRaKc6JvXy/qli3hi+amsX7qIX8eEgUL+A0a5uPqk989fMGtSJQ3K3TcuX+Dac+ZyVV9y0lijqWb1JISYSJpp6PVBtBBmdj3wQCUXNrOvAKcBz7v7G4J9BwArgdnAk8DZ7r6jcrEnT6XdskKyOiLLxeMnXSOvuPusEU9Z7tMszthqV09CiPGkdSh7yN3flLRd9sJmvw+8DHwtogg+C7zo7svNbCkww90/Ue5aeXQoq7ZbVpZzWrmDVTN9trRucOu2bW+4ohKi0dQij+AEM/tNeD2gK9gOo4Zek3Zhd/+Bmc0u2f0e4O3B6xuAe4GyiiAP0mbISTPerPkIrdzBqpk+m7KWhagNaa0qa18ABw5x9+eC178ADkk60MwuAi4CmDVrVg6ixJtC0prTZ81HqEcHq3LmmbzMN83WnUvJbEJMnixF53LBizapxOoJ7n6du8939/kzZ06uK1clpM14s1btnEwHq7iuaHHHxDXNCY8t9/5kUHcuIVqPeiuCX5rZoQDB7+frfP+ypM144yJvjOJAGx20q624mXUAT1NWae8vu2Nr6v2zoGqiQrQe9VYEdwAXBq8vBL5R5/uXJW3GGw2xhPHRKdFBu9oOVuUG+JBqk9IGhoYnvSpQdy4hWo8smcVVYWY3U3QMH2Rmz1IsZ70cWGVmHwSeAs7O6/7V2siXLJ7Dktu2jCvkVui0cfH2ffN6Yyt4Rp2m1dius9rfq01KA2ri1JVdXojWIrcVgbuf5+6HunvB3Q939+vd/Vfu/gfufoy7/6G7x1dTmySTtpGXei5iPBl5OE2TViP7l3Qxy5KUloQybIUQpTTMWZwnWU0scSy7Y+uE3sDDoxMbu+fhNI3rHAawMyhVHVLOPNM3r5cZ3fEtMOXUFUKU0pKKoNrZ+ppN/bEdyOLOPfnYmROafE3Wado3r5d9p0+01g2PTFREpSUzSk01V55+nJy6QohMtKQiqHa2nrZiKK3tv3pj/ziLkQFnnTh52/lATAVQqNykI6euECIruTmLG0m5PrpJpA225Wr7O7Bu2/bU62dxYFdbAykOOXWFEFloyRVBtbPhpMF2RnchU23/NEWS1YGtOH0hRL1pSUVQLUmD8JWnHzduX6WmpzWb+rls1ZZMDmyZdIQQ9aYlTUNp9YLKNWGH8kXMKjE9hbLE9RSG+FVENSadZikNLYSYerSkIphMhcyszenD+/QPDNFpNm52X64SaZSehDDPSqhW8QkhBLSoaagWyV7lir/1zesdMyWFs/04u3+5eyYsFCpiMnkTQgjRkopgssletSr+luWeSS0zK6HZSkMLIaYWLakIJht5U6vib0myRKlFpq9KQwshJkNLKoLJRt5UUvwtjuj+UJa4kg+1CgtVyKkQYjK0pLMYJpdMVS6pK4zQ6R8YytQoPZQlr8getWwUQkyGxOb1zUQezevTSGvQDkx4L1QGvRqAhRBNRC2a17ctaTPshcvviS0v0dvTxfqlixogrRBCTA4pggSSTEtTJUJHCWZCiKy0pLM4T6ZChE6ezeuFEK2HFEGFTIUIHSWYCSEqQYqgQkpDU2d0F9h7WgeXrtwcm4HcCKaK+UoI0RxIEVRAWHbi0pWbATh/wSxeGR5lYGi4qUwwU8F8JYRoHqQIMhJnd7/pvqeb0gQzFcxXQojmQVFDGUnqShZHo00wSjATQlSCFEFGKhncm8EEozaVQoisyDSUkaTB3Uq2ZYIRQkw1pAgykmR3P3/BLLWVFEJMaWQayojs7ntQ1rIQrYUUQQXI7q62mEK0IjINiYpQ1rIQrYdWBC1Cvcw1yloWovXQiqAFqGeROWUtC9F6SBG0APU01yhrWYjWoyGKwMzeaWaPmdlPzWxpI2RoJepprplsP2ghRPNRdx+BmXUC/wS8A3gWeNDM7nD3n9RbllahXI/lWqPoKSFai0asCN4C/NTdH3f3XcAtwHsaIEfLIHONEGIyNEIR9ALPRLafDfaNw8wuMrMNZrZh+/btdRNuKiJzjRBiMjRt+Ki7XwdcBzB//vykQp8iQOYaIUS1NGJF0A8cEdk+PNgnhBCiATRCETwIHGNmR5nZXsC5wB0NkEMIIQQNMA25+24z+xtgLdAJfMXdt9ZbDiGEEEUa4iNw928B32rEvYUQQoxHmcVCCNHmSBEIIUSbI0UghBBtjhSBEEK0OVIEQgjR5kgRCCFEmyNFIIQQbY4UgRBCtDlSBEII0eY0bfXRRlGvJvBCCNEsSBFECJvAh/1/wybwgJSBEKJlkWkoQj2bwAshRLMgRRChnk3ghRCiWZAiiJDU7D2vJvBCCNEMSBFEUBN4IUQ7ImdxhNAhrKghIUQ7IUVQgprACyHaDZmGhBCizZEiEEKINkeKQAgh2hwpAiGEaHOkCIQQos0xd2+0DGUxs+3AU42WY5IcBLzQaCGaCD2PPehZjEfPYw+TfRZHuvvMcgdNCUXQCpjZBnef32g5mgU9jz3oWYxHz2MP9XoWMg0JIUSbI0UghBBtjhRB/biu0QI0GXoee9CzGI+exx7q8izkIxBCiDZHKwIhhGhzpAiEEKLNkSLIATP7ipk9b2Y/juw7wMzuNrP/CX7PaKSM9cLMjjCzdWb2EzPbamYfCfa36/OYbmYPmNmW4Hl8Oth/lJndb2Y/NbOVZrZXo2WtF2bWaWabzOybwXY7P4snzewRM9tsZhuCfbn/r0gR5MN/AO8s2bcU+J67HwN8L9huB3YDl7n764EFwF+b2etp3+fxKrDI3U8A5gLvNLMFwGeAa939aGAH8MEGylhvPgI8Gtlu52cBcLK7z43kD+T+vyJFkAPu/gPgxZLd7wFuCF7fAPTVVagG4e7PuftDweuXKP7D99K+z8Pd/eVgsxD8OLAIuC3Y3zbPw8wOB04FvhxsG236LFLI/X9FiqB+HOLuzwWvfwEc0khhGoGZzQbmAffTxs8jMIVsBp4H7gZ+Bgy4++7gkGcpKst24AvAx4HRYPtA2vdZQHFS8F0z22hmFwX7cv9fUYeyBuDubmZtFbdrZvsCq4FL3P03xYlfkXZ7Hu4+Asw1sx7g68CxDRapIZjZacDz7r7RzN7eaHmahN91934zOxi428y2Rd/M639FK4L68UszOxQg+P18g+WpG2ZWoKgEbnL324Pdbfs8Qtx9AFgHvBXoMbNwYnY40N8wwerHQuAMM3sSuIWiSeiLtOezAMDd+4Pfz1OcJLyFOvyvSBHUjzuAC4PXFwLfaKAsdSOw+V4PPOrun4+81a7PY2awEsDMuoB3UPSbrAPeGxzWFs/D3S9398PdfTZwLnCPu59PGz4LADPbx8z2C18DpwA/pg7/K8oszgEzuxl4O8USsr8ErgTWAKuAWRRLap/t7qUO5ZbDzH4X+G/gEfbYgT9J0U/Qjs/jjRQdfp0UJ2Kr3P3vzOy1FGfFBwCbgAvc/dXGSVpfAtPQx9z9tHZ9FsHn/nqwOQ34L3e/2swOJOf/FSkCIYRoc2QaEkKINkeKQAgh2hwpAiGEaHOkCIQQos2RIhBCiDZHikAIwMxGgoqPPzazW82sO+G4b4V5AEK0CgofFQIws5fdfd/g9U3AxmgCXJAYZ+4+mnQNIaYqWhEIMZH/Bo42s9lm9piZfY1ihucRQb34gwDM7E/M7OGgt8B/BvtmmtlqM3sw+FkY7H9bsOLYHNTe369hn06IElR0TogIQY2bdwHfCXYdA1zo7vcF74fHHQdcAfyOu79gZgcEx3+RYi39H5rZLGAt8NvAx4C/dvf1QQG+V+r1mYQohxSBEEW6gtLQUFwRXA8cBjwVKoESFgG3uvsLAJGU/z8EXh+prvqaYOBfD3w+MDvd7u7P5vQ5hKgYKQIhigy5+9zojmAw31nhdTqABe5eOuNfbmZ3Ae8G1pvZYnffNvF0IeqPfARCVMc9wPuCgmBETEPfBf42PMjM5ga/X+fuj7j7Z4AHadMeBKI5kSIQogrcfStwNfB9M9sChBFGFwPzAyfyT4APB/svCUJTHwaGgW/XXWghElD4qBBCtDlaEQghRJsjRSCEEG2OFIEQQrQ5UgRCCNHmSBEIIUSbI0UghBBtjhSBEEK0Of8ftB+tAZaR+TgAAAAASUVORK5CYII=\\n\",\n      \"text/plain\": [\n       \"<Figure size 432x288 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"plt.scatter(labels, predicted_prices)\\n\",\n    \"plt.xlabel(\\\"Prices\\\")\\n\",\n    \"plt.ylabel(\\\"Predicted Prices\\\")\\n\",\n    \"plt.title(\\\"Prices versus Predicted Prices\\\")\\n\",\n    \"plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"21.831934375295628\"\n      ]\n     },\n     \"execution_count\": 8,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"training_error = \\\\\\n\",\n    \"    (labels - predicted_prices).apply(lambda x: x ** 2).mean()\\n\",\n    \"\\n\",\n    \"training_error\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Citation: http://bigdata-madesimple.com/how-to-run-linear-regression-in-python-scikit-learn/\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python [conda env:modin-dev]\",\n   \"language\": \"python\",\n   \"name\": \"conda-env-modin-dev-py\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.6\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/quickstart.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/img/MODIN_ver2_hrz.png?raw=True)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Getting Started\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"To install the most recent stable release for Modin run the following code on your command line:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"!pip install \\\"modin[all]\\\" \"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"For further instructions on how to install Modin with conda or for specific platforms or engines, see our detailed [installation guide](https://modin.readthedocs.io/en/latest/getting_started/installation.html).\\n\",\n    \"\\n\",\n    \"Modin acts as a drop-in replacement for pandas so you can simply change a single line of import to speed up your pandas workflows. To use Modin, you simply have to replace the import of pandas with the import of Modin, as follows.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"2022-01-07 07:29:30,173\\tINFO services.py:1250 -- View the Ray dashboard at \\u001b[1m\\u001b[32mhttp://127.0.0.1:8265\\u001b[39m\\u001b[22m\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"#############################################\\n\",\n    \"### For the purpose of timing comparisons ###\\n\",\n    \"#############################################\\n\",\n    \"import time\\n\",\n    \"import ray\\n\",\n    \"# Look at the Ray documentation with respect to the Ray configuration suited to you most.\\n\",\n    \"ray.init()\\n\",\n    \"from IPython.display import Markdown, display\\n\",\n    \"def printmd(string):\\n\",\n    \"    display(Markdown(string))\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Dataset: NYC taxi trip data\\n\",\n    \"\\n\",\n    \"Link to raw dataset: https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv (**Size: ~200MB**)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"('taxi.csv', <http.client.HTTPMessage at 0x1307faf70>)\"\n      ]\n     },\n     \"execution_count\": 3,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# This may take a few minutes to download\\n\",\n    \"import urllib.request\\n\",\n    \"dataset_url = \\\"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\\\"\\n\",\n    \"urllib.request.urlretrieve(dataset_url, \\\"taxi.csv\\\")  \"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Faster Data Loading with Modin's ``read_csv``\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"DtypeWarning: Columns (6) have mixed types.Specify dtype option on import or set low_memory=False.\\n\"\n     ]\n    },\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Time to read with pandas: 2.744 seconds\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.read_csv(\\\"taxi.csv\\\", parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to read with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Time to read with Modin: 1.35 seconds\\n\"\n     ]\n    },\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"UserWarning: `read_*` implementation has mismatches with pandas:\\n\",\n      \"Data types of partitions are different! Please refer to the troubleshooting section of the Modin documentation to fix this issue.\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"text/markdown\": [\n       \"## Modin is 2.03x faster than pandas at `read_csv`!\"\n      ],\n      \"text/plain\": [\n       \"<IPython.core.display.Markdown object>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_df = pd.read_csv(\\\"taxi.csv\\\", parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to read with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"## Modin is {}x faster than pandas at `read_csv`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"You can quickly check that the result from pandas and Modin is exactly the same.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>VendorID</th>\\n\",\n       \"      <th>tpep_pickup_datetime</th>\\n\",\n       \"      <th>tpep_dropoff_datetime</th>\\n\",\n       \"      <th>passenger_count</th>\\n\",\n       \"      <th>trip_distance</th>\\n\",\n       \"      <th>RatecodeID</th>\\n\",\n       \"      <th>store_and_fwd_flag</th>\\n\",\n       \"      <th>PULocationID</th>\\n\",\n       \"      <th>DOLocationID</th>\\n\",\n       \"      <th>payment_type</th>\\n\",\n       \"      <th>fare_amount</th>\\n\",\n       \"      <th>extra</th>\\n\",\n       \"      <th>mta_tax</th>\\n\",\n       \"      <th>tip_amount</th>\\n\",\n       \"      <th>tolls_amount</th>\\n\",\n       \"      <th>improvement_surcharge</th>\\n\",\n       \"      <th>total_amount</th>\\n\",\n       \"      <th>congestion_surcharge</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:30:10</td>\\n\",\n       \"      <td>2021-01-01 00:36:12</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2.10</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>142</td>\\n\",\n       \"      <td>43</td>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>8.00</td>\\n\",\n       \"      <td>3.00</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>11.80</td>\\n\",\n       \"      <td>2.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:51:20</td>\\n\",\n       \"      <td>2021-01-01 00:52:19</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>0.20</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>238</td>\\n\",\n       \"      <td>151</td>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>3.00</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>4.30</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:43:30</td>\\n\",\n       \"      <td>2021-01-01 01:11:06</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>14.70</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>132</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>42.00</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>8.65</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>51.95</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:15:48</td>\\n\",\n       \"      <td>2021-01-01 00:31:01</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>10.60</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>138</td>\\n\",\n       \"      <td>132</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>29.00</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>6.05</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>36.35</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>2021-01-01 00:31:49</td>\\n\",\n       \"      <td>2021-01-01 00:48:21</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>4.94</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>68</td>\\n\",\n       \"      <td>33</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>16.50</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>4.06</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>24.36</td>\\n\",\n       \"      <td>2.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>...</th>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369760</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:32:04</td>\\n\",\n       \"      <td>2021-01-25 08:49:32</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>8.80</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>135</td>\\n\",\n       \"      <td>82</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>21.84</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>25.39</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369761</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:34:00</td>\\n\",\n       \"      <td>2021-01-25 09:04:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>5.86</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>42</td>\\n\",\n       \"      <td>161</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>26.67</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>30.22</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369762</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:37:00</td>\\n\",\n       \"      <td>2021-01-25 08:53:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>4.45</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>14</td>\\n\",\n       \"      <td>106</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>25.29</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>28.84</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369763</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:28:00</td>\\n\",\n       \"      <td>2021-01-25 08:50:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>10.04</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>175</td>\\n\",\n       \"      <td>216</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>28.24</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>31.79</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369764</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:38:00</td>\\n\",\n       \"      <td>2021-01-25 08:50:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>4.93</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>248</td>\\n\",\n       \"      <td>168</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>20.76</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>24.31</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"<p>1369765 rows × 18 columns</p>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"         VendorID tpep_pickup_datetime tpep_dropoff_datetime  passenger_count  \\\\\\n\",\n       \"0             1.0  2021-01-01 00:30:10   2021-01-01 00:36:12              1.0   \\n\",\n       \"1             1.0  2021-01-01 00:51:20   2021-01-01 00:52:19              1.0   \\n\",\n       \"2             1.0  2021-01-01 00:43:30   2021-01-01 01:11:06              1.0   \\n\",\n       \"3             1.0  2021-01-01 00:15:48   2021-01-01 00:31:01              0.0   \\n\",\n       \"4             2.0  2021-01-01 00:31:49   2021-01-01 00:48:21              1.0   \\n\",\n       \"...           ...                  ...                   ...              ...   \\n\",\n       \"1369760       NaN  2021-01-25 08:32:04   2021-01-25 08:49:32              NaN   \\n\",\n       \"1369761       NaN  2021-01-25 08:34:00   2021-01-25 09:04:00              NaN   \\n\",\n       \"1369762       NaN  2021-01-25 08:37:00   2021-01-25 08:53:00              NaN   \\n\",\n       \"1369763       NaN  2021-01-25 08:28:00   2021-01-25 08:50:00              NaN   \\n\",\n       \"1369764       NaN  2021-01-25 08:38:00   2021-01-25 08:50:00              NaN   \\n\",\n       \"\\n\",\n       \"         trip_distance  RatecodeID store_and_fwd_flag  PULocationID  \\\\\\n\",\n       \"0                 2.10         1.0                  N           142   \\n\",\n       \"1                 0.20         1.0                  N           238   \\n\",\n       \"2                14.70         1.0                  N           132   \\n\",\n       \"3                10.60         1.0                  N           138   \\n\",\n       \"4                 4.94         1.0                  N            68   \\n\",\n       \"...                ...         ...                ...           ...   \\n\",\n       \"1369760           8.80         NaN                NaN           135   \\n\",\n       \"1369761           5.86         NaN                NaN            42   \\n\",\n       \"1369762           4.45         NaN                NaN            14   \\n\",\n       \"1369763          10.04         NaN                NaN           175   \\n\",\n       \"1369764           4.93         NaN                NaN           248   \\n\",\n       \"\\n\",\n       \"         DOLocationID  payment_type  fare_amount  extra  mta_tax  tip_amount  \\\\\\n\",\n       \"0                  43           2.0         8.00   3.00      0.5        0.00   \\n\",\n       \"1                 151           2.0         3.00   0.50      0.5        0.00   \\n\",\n       \"2                 165           1.0        42.00   0.50      0.5        8.65   \\n\",\n       \"3                 132           1.0        29.00   0.50      0.5        6.05   \\n\",\n       \"4                  33           1.0        16.50   0.50      0.5        4.06   \\n\",\n       \"...               ...           ...          ...    ...      ...         ...   \\n\",\n       \"1369760            82           NaN        21.84   2.75      0.5        0.00   \\n\",\n       \"1369761           161           NaN        26.67   2.75      0.5        0.00   \\n\",\n       \"1369762           106           NaN        25.29   2.75      0.5        0.00   \\n\",\n       \"1369763           216           NaN        28.24   2.75      0.5        0.00   \\n\",\n       \"1369764           168           NaN        20.76   2.75      0.5        0.00   \\n\",\n       \"\\n\",\n       \"         tolls_amount  improvement_surcharge  total_amount  \\\\\\n\",\n       \"0                 0.0                    0.3         11.80   \\n\",\n       \"1                 0.0                    0.3          4.30   \\n\",\n       \"2                 0.0                    0.3         51.95   \\n\",\n       \"3                 0.0                    0.3         36.35   \\n\",\n       \"4                 0.0                    0.3         24.36   \\n\",\n       \"...               ...                    ...           ...   \\n\",\n       \"1369760           0.0                    0.3         25.39   \\n\",\n       \"1369761           0.0                    0.3         30.22   \\n\",\n       \"1369762           0.0                    0.3         28.84   \\n\",\n       \"1369763           0.0                    0.3         31.79   \\n\",\n       \"1369764           0.0                    0.3         24.31   \\n\",\n       \"\\n\",\n       \"         congestion_surcharge  \\n\",\n       \"0                         2.5  \\n\",\n       \"1                         0.0  \\n\",\n       \"2                         0.0  \\n\",\n       \"3                         0.0  \\n\",\n       \"4                         2.5  \\n\",\n       \"...                       ...  \\n\",\n       \"1369760                   0.0  \\n\",\n       \"1369761                   0.0  \\n\",\n       \"1369762                   0.0  \\n\",\n       \"1369763                   0.0  \\n\",\n       \"1369764                   0.0  \\n\",\n       \"\\n\",\n       \"[1369765 rows x 18 columns]\"\n      ]\n     },\n     \"execution_count\": 6,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"pandas_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>VendorID</th>\\n\",\n       \"      <th>tpep_pickup_datetime</th>\\n\",\n       \"      <th>tpep_dropoff_datetime</th>\\n\",\n       \"      <th>passenger_count</th>\\n\",\n       \"      <th>trip_distance</th>\\n\",\n       \"      <th>RatecodeID</th>\\n\",\n       \"      <th>store_and_fwd_flag</th>\\n\",\n       \"      <th>PULocationID</th>\\n\",\n       \"      <th>DOLocationID</th>\\n\",\n       \"      <th>payment_type</th>\\n\",\n       \"      <th>fare_amount</th>\\n\",\n       \"      <th>extra</th>\\n\",\n       \"      <th>mta_tax</th>\\n\",\n       \"      <th>tip_amount</th>\\n\",\n       \"      <th>tolls_amount</th>\\n\",\n       \"      <th>improvement_surcharge</th>\\n\",\n       \"      <th>total_amount</th>\\n\",\n       \"      <th>congestion_surcharge</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:30:10</td>\\n\",\n       \"      <td>2021-01-01 00:36:12</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2.10</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>142</td>\\n\",\n       \"      <td>43</td>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>8.00</td>\\n\",\n       \"      <td>3.00</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>11.80</td>\\n\",\n       \"      <td>2.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:51:20</td>\\n\",\n       \"      <td>2021-01-01 00:52:19</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>0.20</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>238</td>\\n\",\n       \"      <td>151</td>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>3.00</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>4.30</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:43:30</td>\\n\",\n       \"      <td>2021-01-01 01:11:06</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>14.70</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>132</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>42.00</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>8.65</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>51.95</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>2021-01-01 00:15:48</td>\\n\",\n       \"      <td>2021-01-01 00:31:01</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>10.60</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>138</td>\\n\",\n       \"      <td>132</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>29.00</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>6.05</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>36.35</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>2.0</td>\\n\",\n       \"      <td>2021-01-01 00:31:49</td>\\n\",\n       \"      <td>2021-01-01 00:48:21</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>4.94</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>N</td>\\n\",\n       \"      <td>68</td>\\n\",\n       \"      <td>33</td>\\n\",\n       \"      <td>1.0</td>\\n\",\n       \"      <td>16.50</td>\\n\",\n       \"      <td>0.50</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>4.06</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>24.36</td>\\n\",\n       \"      <td>2.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>...</th>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"      <td>...</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369760</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:32:04</td>\\n\",\n       \"      <td>2021-01-25 08:49:32</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>8.80</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>135</td>\\n\",\n       \"      <td>82</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>21.84</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>25.39</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369761</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:34:00</td>\\n\",\n       \"      <td>2021-01-25 09:04:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>5.86</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>42</td>\\n\",\n       \"      <td>161</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>26.67</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>30.22</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369762</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:37:00</td>\\n\",\n       \"      <td>2021-01-25 08:53:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>4.45</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>14</td>\\n\",\n       \"      <td>106</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>25.29</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>28.84</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369763</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:28:00</td>\\n\",\n       \"      <td>2021-01-25 08:50:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>10.04</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>175</td>\\n\",\n       \"      <td>216</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>28.24</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>31.79</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1369764</th>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>2021-01-25 08:38:00</td>\\n\",\n       \"      <td>2021-01-25 08:50:00</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>4.93</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>248</td>\\n\",\n       \"      <td>168</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>20.76</td>\\n\",\n       \"      <td>2.75</td>\\n\",\n       \"      <td>0.5</td>\\n\",\n       \"      <td>0.00</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"      <td>0.3</td>\\n\",\n       \"      <td>24.31</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"<p>1369765 rows x 18 columns</p>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"         VendorID tpep_pickup_datetime tpep_dropoff_datetime  passenger_count  \\\\\\n\",\n       \"0             1.0  2021-01-01 00:30:10   2021-01-01 00:36:12              1.0   \\n\",\n       \"1             1.0  2021-01-01 00:51:20   2021-01-01 00:52:19              1.0   \\n\",\n       \"2             1.0  2021-01-01 00:43:30   2021-01-01 01:11:06              1.0   \\n\",\n       \"3             1.0  2021-01-01 00:15:48   2021-01-01 00:31:01              0.0   \\n\",\n       \"4             2.0  2021-01-01 00:31:49   2021-01-01 00:48:21              1.0   \\n\",\n       \"...           ...                  ...                   ...              ...   \\n\",\n       \"1369760       NaN  2021-01-25 08:32:04   2021-01-25 08:49:32              NaN   \\n\",\n       \"1369761       NaN  2021-01-25 08:34:00   2021-01-25 09:04:00              NaN   \\n\",\n       \"1369762       NaN  2021-01-25 08:37:00   2021-01-25 08:53:00              NaN   \\n\",\n       \"1369763       NaN  2021-01-25 08:28:00   2021-01-25 08:50:00              NaN   \\n\",\n       \"1369764       NaN  2021-01-25 08:38:00   2021-01-25 08:50:00              NaN   \\n\",\n       \"\\n\",\n       \"         trip_distance  RatecodeID store_and_fwd_flag  PULocationID  \\\\\\n\",\n       \"0                 2.10         1.0                  N           142   \\n\",\n       \"1                 0.20         1.0                  N           238   \\n\",\n       \"2                14.70         1.0                  N           132   \\n\",\n       \"3                10.60         1.0                  N           138   \\n\",\n       \"4                 4.94         1.0                  N            68   \\n\",\n       \"...                ...         ...                ...           ...   \\n\",\n       \"1369760           8.80         NaN                NaN           135   \\n\",\n       \"1369761           5.86         NaN                NaN            42   \\n\",\n       \"1369762           4.45         NaN                NaN            14   \\n\",\n       \"1369763          10.04         NaN                NaN           175   \\n\",\n       \"1369764           4.93         NaN                NaN           248   \\n\",\n       \"\\n\",\n       \"         DOLocationID  payment_type  fare_amount  extra  mta_tax  tip_amount  \\\\\\n\",\n       \"0                  43           2.0         8.00   3.00      0.5        0.00   \\n\",\n       \"1                 151           2.0         3.00   0.50      0.5        0.00   \\n\",\n       \"2                 165           1.0        42.00   0.50      0.5        8.65   \\n\",\n       \"3                 132           1.0        29.00   0.50      0.5        6.05   \\n\",\n       \"4                  33           1.0        16.50   0.50      0.5        4.06   \\n\",\n       \"...               ...           ...          ...    ...      ...         ...   \\n\",\n       \"1369760            82           NaN        21.84   2.75      0.5        0.00   \\n\",\n       \"1369761           161           NaN        26.67   2.75      0.5        0.00   \\n\",\n       \"1369762           106           NaN        25.29   2.75      0.5        0.00   \\n\",\n       \"1369763           216           NaN        28.24   2.75      0.5        0.00   \\n\",\n       \"1369764           168           NaN        20.76   2.75      0.5        0.00   \\n\",\n       \"\\n\",\n       \"         tolls_amount  improvement_surcharge  total_amount  \\\\\\n\",\n       \"0                 0.0                    0.3         11.80   \\n\",\n       \"1                 0.0                    0.3          4.30   \\n\",\n       \"2                 0.0                    0.3         51.95   \\n\",\n       \"3                 0.0                    0.3         36.35   \\n\",\n       \"4                 0.0                    0.3         24.36   \\n\",\n       \"...               ...                    ...           ...   \\n\",\n       \"1369760           0.0                    0.3         25.39   \\n\",\n       \"1369761           0.0                    0.3         30.22   \\n\",\n       \"1369762           0.0                    0.3         28.84   \\n\",\n       \"1369763           0.0                    0.3         31.79   \\n\",\n       \"1369764           0.0                    0.3         24.31   \\n\",\n       \"\\n\",\n       \"         congestion_surcharge  \\n\",\n       \"0                         2.5  \\n\",\n       \"1                         0.0  \\n\",\n       \"2                         0.0  \\n\",\n       \"3                         0.0  \\n\",\n       \"4                         2.5  \\n\",\n       \"...                       ...  \\n\",\n       \"1369760                   0.0  \\n\",\n       \"1369761                   0.0  \\n\",\n       \"1369762                   0.0  \\n\",\n       \"1369763                   0.0  \\n\",\n       \"1369764                   0.0  \\n\",\n       \"\\n\",\n       \"[1369765 rows x 18 columns]\"\n      ]\n     },\n     \"execution_count\": 7,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"modin_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Faster Append with Modin's ``concat``\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Our previous ``read_csv`` example operated on a relatively small dataframe. In the following example, we duplicate the same taxi dataset 100 times and then concatenate them together.\\n\",\n    \"\\n\",\n    \"Please note that this quickstart notebook is assumed to be run on a machine that has enough memory in order to be able to perform the operations both with pandas and Modin in a single pipeline (which at least doubles the amount of required memory). If your machine doesn't have enough resources to execute every cell of the notebook and you see an OOM issue, you most likely need to reduce ``N_copies`` in the cell below.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Time to concat with pandas: 34.144 seconds\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"N_copies= 100\\n\",\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"big_pandas_df = pandas.concat([pandas_df for _ in range(N_copies)])\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to concat with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Time to concat with Modin: 0.564 seconds\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"text/markdown\": [\n       \"### Modin is 60.57x faster than pandas at `concat`!\"\n      ],\n      \"text/plain\": [\n       \"<IPython.core.display.Markdown object>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"big_modin_df = pd.concat([modin_df for _ in range(N_copies)])\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to concat with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `concat`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"The result dataset is around 19GB in size.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 10,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"\\u001b[2m\\u001b[36m(apply_list_of_funcs pid=73415)\\u001b[0m \\n\",\n      \"\\u001b[2m\\u001b[36m(apply_list_of_funcs pid=73416)\\u001b[0m \\n\",\n      \"<class 'modin.pandas.dataframe.DataFrame'>\\n\",\n      \"Int64Index: 136976500 entries, 0 to 1369764\\n\",\n      \"Data columns (total 18 columns):\\n\",\n      \" #   Column                 Non-Null Count      Dtype         \\n\",\n      \"---  ---------------------  ------------------  -----         \\n\",\n      \" 0   VendorID               127141300 non-null  float64\\n\",\n      \" 1   tpep_pickup_datetime   136976500 non-null  datetime64[ns]\\n\",\n      \" 2   tpep_dropoff_datetime  136976500 non-null  datetime64[ns]\\n\",\n      \" 3   passenger_count        127141300 non-null  float64\\n\",\n      \" 4   trip_distance          136976500 non-null  float64\\n\",\n      \" 5   RatecodeID             127141300 non-null  float64\\n\",\n      \" 6   store_and_fwd_flag     127141300 non-null  object\\n\",\n      \" 7   PULocationID           136976500 non-null  int64\\n\",\n      \" 8   DOLocationID           136976500 non-null  int64\\n\",\n      \" 9   payment_type           127141300 non-null  float64\\n\",\n      \" 10  fare_amount            136976500 non-null  float64\\n\",\n      \" 11  extra                  136976500 non-null  float64\\n\",\n      \" 12  mta_tax                136976500 non-null  float64\\n\",\n      \" 13  tip_amount             136976500 non-null  float64\\n\",\n      \" 14  tolls_amount           136976500 non-null  float64\\n\",\n      \" 15  improvement_surcharge  136976500 non-null  float64\\n\",\n      \" 16  total_amount           136976500 non-null  float64\\n\",\n      \" 17  congestion_surcharge   136976500 non-null  float64\\n\",\n      \"dtypes: float64(13), datetime64[ns](2), int64(2), object(1)\\n\",\n      \"memory usage: 19.4 GB\\n\"\n     ]\n    },\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"UserWarning: Distributing <class 'int'> object. This may take some time.\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"big_modin_df.info()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Faster ``apply`` over a single column\\n\",\n    \"\\n\",\n    \"The performance benefits of Modin becomes aparent when we operate on large gigabyte-scale datasets. For example, let's say that we want to round up the number across a single column via the ``apply`` operation. \"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 11,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Time to apply with pandas: 43.969 seconds\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"rounded_trip_distance_pandas = big_pandas_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to apply with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 12,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Time to apply with Modin: 1.225 seconds\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"text/markdown\": [\n       \"### Modin is 35.88x faster than pandas at `apply` on one column!\"\n      ],\n      \"text/plain\": [\n       \"<IPython.core.display.Markdown object>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"rounded_trip_distance_modin = big_modin_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to apply with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `apply` on one column!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Summary\\n\",\n    \"\\n\",\n    \"Hopefully, this tutorial demonstrated how Modin delivers significant speedup on pandas operations without the need for any extra effort. Throughout example, we moved from working with 100MBs of data to 20GBs of data all without having to change anything or manually optimize our code to achieve the level of scalable performance that Modin provides.\\n\",\n    \"\\n\",\n    \"Note that in this quickstart example, we've only shown ``read_csv``, ``concat``, ``apply``, but these are not the only pandas operations that Modin optimizes for. In fact, Modin covers [more than 90% of the pandas API](https://github.com/modin-project/modin/blob/main/README.md#pandas-api-coverage), yielding considerable speedups for many common operations.\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.9\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/spreadsheet/requirements.txt",
    "content": "ray==1.1.0\ngit+https://github.com/modin-project/modin\ngit+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5"
  },
  {
    "path": "examples/spreadsheet/tutorial.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../tutorial/tutorial_notebooks/img/MODIN_ver2_hrz.png)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## modin.spreadsheet\\n\",\n    \"`modin.spreadsheet` is a Jupyter notebook widget that allows users to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, edit data and export reproducible code. \\n\",\n    \"\\n\",\n    \"This tutorial will showcase how to use `modin.spreadsheet`. Before starting, please install the required packages using `pip install -r requirements.txt` in the current directory. Then just run the cells; no editing required!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Please install the required packages using `pip install -r requirements.txt` in the current directory\\n\",\n    \"# For all ways to install Modin see official documentation at:\\n\",\n    \"# https://modin.readthedocs.io/en/latest/installation.html\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"import modin.spreadsheet as mss\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Create a Modin DataFrame\\n\",\n    \"The following cells creates a DataFrame using a NYC taxi dataset.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"columns_names = [\\n\",\n    \"        \\\"trip_id\\\", \\\"vendor_id\\\", \\\"pickup_datetime\\\", \\\"dropoff_datetime\\\", \\\"store_and_fwd_flag\\\",\\n\",\n    \"        \\\"rate_code_id\\\", \\\"pickup_longitude\\\", \\\"pickup_latitude\\\", \\\"dropoff_longitude\\\", \\\"dropoff_latitude\\\",\\n\",\n    \"        \\\"passenger_count\\\", \\\"trip_distance\\\", \\\"fare_amount\\\", \\\"extra\\\", \\\"mta_tax\\\", \\\"tip_amount\\\",\\n\",\n    \"        \\\"tolls_amount\\\", \\\"ehail_fee\\\", \\\"improvement_surcharge\\\", \\\"total_amount\\\", \\\"payment_type\\\",\\n\",\n    \"        \\\"trip_type\\\", \\\"pickup\\\", \\\"dropoff\\\", \\\"cab_type\\\", \\\"precipitation\\\", \\\"snow_depth\\\", \\\"snowfall\\\",\\n\",\n    \"        \\\"max_temperature\\\", \\\"min_temperature\\\", \\\"average_wind_speed\\\", \\\"pickup_nyct2010_gid\\\",\\n\",\n    \"        \\\"pickup_ctlabel\\\", \\\"pickup_borocode\\\", \\\"pickup_boroname\\\", \\\"pickup_ct2010\\\",\\n\",\n    \"        \\\"pickup_boroct2010\\\", \\\"pickup_cdeligibil\\\", \\\"pickup_ntacode\\\", \\\"pickup_ntaname\\\", \\\"pickup_puma\\\",\\n\",\n    \"        \\\"dropoff_nyct2010_gid\\\", \\\"dropoff_ctlabel\\\", \\\"dropoff_borocode\\\", \\\"dropoff_boroname\\\",\\n\",\n    \"        \\\"dropoff_ct2010\\\", \\\"dropoff_boroct2010\\\", \\\"dropoff_cdeligibil\\\", \\\"dropoff_ntacode\\\",\\n\",\n    \"        \\\"dropoff_ntaname\\\", \\\"dropoff_puma\\\",\\n\",\n    \"    ]\\n\",\n    \"parse_dates=[\\\"pickup_datetime\\\", \\\"dropoff_datetime\\\"]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('s3://modin-datasets/trips_data.csv', names=columns_names,\\n\",\n    \"                header=None, parse_dates=parse_dates)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": true\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Generate a spreadsheet widget with the DataFrame\\n\",\n    \"`mss.from_dataframe` takes in a DataFrame, optional configuration options, and returns a `SpreadsheetWidget`, which contains all the logic for displaying the spreadsheet view of the DataFrame. The object returned will not be rendered unless displayed.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"spreadsheet = mss.from_dataframe(df)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Displaying the Spreadsheet\\n\",\n    \"The widget is displayed when the widget is returned by an input cell or passed to the `display` function e.g. `display(spreadsheet)`. When displayed, the SpreadsheetWidget will generate a transformation history cell that contains a record of the transformations applied to the DataFrame unless the cell already exists or the feature is disabled.\\n\",\n    \"\\n\",\n    \"### Basic Usage\\n\",\n    \"`from_dataframe` creates a copy of the input DataFrame, so changes do not alter the original DataFrame.\\n\",\n    \"\\n\",\n    \"**Filter** - Each column can be filtered according to its datatype using the filter button to the right of the column header. Any number of columns can be filtered simultaneously.\\\\\\n\",\n    \"**Sort** - Each column can be sorted by clicking on the column header. Assumptions on the order of the data should only be made according to the latest sort i.e. the 2nd last sort may not be in order even if grouped by the duplicates in the last sorted column.\\\\\\n\",\n    \"**Cell Edit** - Double click on a cell to edit its value.\\\\\\n\",\n    \"**Add Row**(toolbar) - Click on the `Add Row` button in the toolbar to duplicate the last row in the DataFrame.\\\\\\n\",\n    \"**Remove Row**(toolbar) - Select row(s) on the spreadsheet and click the `Remove Row` button in the toolbar to remove them.\\\\\\n\",\n    \"**Reset Filters**(toolbar) - Click on the `Reset Filters` button in the toolbar to remove all filters on the data.\\\\\\n\",\n    \"**Reset Sort**(toolbar) - Click on the `Reset Sort` button in the toolbar to remove any sorting on the data.\\n\",\n    \"\\n\",\n    \"### Transformation History and Reproducible Code\\n\",\n    \"The widget records the history of transformations, such as filtering, that occur on the spreadsheet. These transformations are updated in the `spreadsheet transformation history` cell as they happen and can be easily copied for reproducibility. The history can be cleared using the `Clear History` button in the toolbar.\\n\",\n    \"\\n\",\n    \"**Try making some changes to the spreadsheet!**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": true\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"spreadsheet\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Exporting Changes\\n\",\n    \"`to_dataframe` takes in a `SpreadsheetWidget` and returns a copy of the DataFrame reflecting the current state of the UI on the widget. Specifically, any filters, edits, or sorts will be applied on the returned Dataframe.\\n\",\n    \"\\n\",\n    \"**Export a DataFrame after making some changes on the spreadsheet UI**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"changed_df = mss.to_dataframe(spreadsheet)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"changed_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## SpreadsheetWidget API\\n\",\n    \"The API on `SpreadsheetWidget` allows users to replicate some of the functionality on the GUI, but also provides other functionality such as applying the transformation history on another DataFrame or getting the DataFrame that matches the spreadsheet state like `to_dataframe`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Duplicates the `Reset Filters` button\\n\",\n    \"spreadsheet.reset_filters()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Duplicates the `Reset Sort` button\\n\",\n    \"spreadsheet.reset_sort()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"qgrid6f69f373-ae0e-423e-8e26-429f52e1669d\": true\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Duplicates the `Clear History` button\\n\",\n    \"spreadsheet.clear_history()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Gets the modified DataFrame that matches the changes to the spreadsheet\\n\",\n    \"# This is the same functionality as `mss.to_dataframe`\\n\",\n    \"spreadsheet.get_changed_df()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Retrieving and Applying Transformation History \\n\",\n    \"The transformation history can be retrieved as a list of code snippets using the `get_history` API. The `apply_history` API will apply the transformations on the input DataFrame and return the resultant DataFrame.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"spreadsheet.get_history()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"another_df = df.copy()\\n\",\n    \"spreadsheet.apply_history(another_df)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Additional Example\\n\",\n    \"Here is another example of how to use `from_dataframe` with configuration options.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"mss.from_dataframe(df, show_toolbar=False, grid_options={'forceFitColumns': False, 'editable': False, 'highlightSelectedCell': True})\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.5\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 4\n}\n"
  },
  {
    "path": "examples/tutorial/README.md",
    "content": "# Modin tutorial Jupyter Notebooks\n\nTutorial for how to use different features of Modin.\n"
  },
  {
    "path": "examples/tutorial/jupyter/README.md",
    "content": "# Jupyter notebook examples to run with Modin\n\nCurrently we provide tutorial notebooks for the following execution backends:\n\n- [PandasOnRay](https://modin.readthedocs.io/en/latest/development/using_pandas_on_ray.html)\n- [PandasOnDask](https://modin.readthedocs.io/en/latest/development/using_pandas_on_dask.html)\n- [PandasOnMPI through unidist](https://modin.readthedocs.io/en/latest/development/using_pandas_on_mpi.html)\n\n## Creating a development environment\n\nTo get required dependencies for `PandasOnRay`, `PandasOnDask` and `PandasOnUnidist` Jupyter Notebooks\nyou should create a development environment with `pip`\nusing `requirements.txt` file located in the respective directory:\n\n```bash\npip install -r execution/pandas_on_ray/requirements.txt\n```\n\nto install dependencies needed to run notebooks with Modin on `PandasOnRay` execution or\n\n```bash\npip install -r execution/pandas_on_dask/requirements.txt\n```\n\nto install dependencies needed to run notebooks with Modin on `PandasOnDask` execution or\n\n```bash\npip install -r execution/pandas_on_unidist/requirements.txt\n```\n\nto install dependencies needed to run notebooks with Modin on `PandasOnUnidist` execution.\n\n**Note:** Sometimes pip is installing every version of a package. If you encounter that issue,\nplease install every package listed in `requirements.txt` file individually with `pip install <package>`.\n\n## Run Jupyter Notebooks\n\nA Jupyter Notebook server can be run from the current directory as follows:\n\n```bash\njupyter notebook\n```\n\nNavigate to a concrete notebook (for example, to the `execution/pandas_on_ray/local/exercise_1.ipynb`).\n\n**Note:** Since there are some specifics regarding the run of jupyter notebooks with the `Unidist` engine,\nrefer to [PandasOnUnidist](https://github.com/modin-project/modin/blob/main/examples/tutorial/jupyter/execution/pandas_on_unidist/README.md) document\nto get more information on the matter."
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/Dockerfile",
    "content": "FROM continuumio/miniconda3\n\nRUN conda install -c conda-forge psutil setproctitle\nRUN pip install -r requirements-dev.txt\n\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/cluster/exercise_5.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<h1>Scale your pandas workflows by changing one line of code</h2>\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 5: Setting up cluster environment\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn how to set up a Dask cluster for Modin, connect Modin to a Dask cluster and run pandas queries on a cluster.\\n\",\n    \"\\n\",\n    \"**NOTE**: This exercise has extra requirements. Read instructions carefully before attempting. \\n\",\n    \"\\n\",\n    \"**This exercise instructs users on how to start a 500+ core Dask cluster, and it is not shut down until the end of exercise.**\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Often in practice we have a need to exceed the capabilities of a single machine. Modin works and performs well \\n\",\n    \"in both local mode and in a cluster environment. The key advantage of Modin is that your python code does not \\n\",\n    \"change between local development and cluster execution. Users are not required to think about how many workers \\n\",\n    \"exist or how to distribute and partition their data; Modin handles all of this seamlessly and transparently.\\n\",\n    \"\\n\",\n    \"![Cluster](../../../img/modin_cluster.png)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Extra requirements for AWS authentication\\n\",\n    \"\\n\",\n    \"First of all, install the necessary dependencies in your environment:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"!pip install dask_cloudprovider[aws]\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"The next step is to setup your AWS credentials, namely, set ``AWS_ACCESS_KEY_ID``, ``AWS_SECRET_ACCESS_KEY``\\n\",\n    \"and ``AWS_SESSION_TOKEN`` (Optional) (refer to [AWS CLI environment variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html) to get more insight on this):\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import os\\n\",\n    \"\\n\",\n    \"os.environ[\\\"AWS_ACCESS_KEY_ID\\\"] = \\\"<aws_access_key_id>\\\"\\n\",\n    \"os.environ[\\\"AWS_SECRET_ACCESS_KEY\\\"] = \\\"<aws_secret_access_key>\\\"\\n\",\n    \"os.environ[\\\"AWS_SESSION_TOKEN\\\"] = \\\"<aws_session_token>\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Starting and connecting to the cluster\\n\",\n    \"\\n\",\n    \"This example starts 1 scheduler node (m5.24xlarge) and 6 worker nodes (m5.24xlarge), 576 total CPUs. Keep in mind the scheduler node manages cluster operation but doesn't perform any execution.\\n\",\n    \"\\n\",\n    \"You can check the [Amazon EC2 pricing](https://aws.amazon.com/ec2/pricing/on-demand/) page.\\n\",\n    \"\\n\",\n    \"Dask cluster can be deployed in different ways (refer to [Dask documentaion](https://docs.dask.org/en/latest/deploying.html) to get more information about it), but in this tutorial we will use the ``EC2Cluster`` from [dask_cloudprovider](https://cloudprovider.dask.org/en/latest/) to create and initialize a Dask cluster on Amazon Web Service (AWS).\\n\",\n    \"\\n\",\n    \"**Note**: EC2Cluster uses a docker container to run the scheduler and each of the workers. Probably you need to use another docker image depending on your python version and requirements. You can find more docker-images on [daskdev](https://hub.docker.com/u/daskdev) page.\\n\",\n    \"\\n\",\n    \"In the next cell you can see how the EC2Cluster is being created. <b>Set your ``key_name`` and modify AWS settings as required before running it.</b>\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from dask_cloudprovider.aws import EC2Cluster\\n\",\n    \"\\n\",\n    \"n_workers = 6\\n\",\n    \"cluster = EC2Cluster(\\n\",\n    \"    # AWS parameters\\n\",\n    \"    key_name = \\\"\\\", # set your keyname\\n\",\n    \"    region = \\\"us-west-2\\\",\\n\",\n    \"    availability_zone = [\\\"us-west-2a\\\"],\\n\",\n    \"    ami = \\\"ami-0387d929287ab193e\\\",\\n\",\n    \"    instance_type = \\\"m5.24xlarge\\\",\\n\",\n    \"    vpc = \\\"vpc-002bd14c63f227832\\\",\\n\",\n    \"    subnet_id = \\\"subnet-09860dafd79720938\\\",\\n\",\n    \"    filesystem_size = 200, # in GB\\n\",\n    \"\\n\",\n    \"    # DASK parameters\\n\",\n    \"    n_workers = n_workers,\\n\",\n    \"    docker_image = \\\"daskdev/dask:latest\\\",\\n\",\n    \"    debug = True,\\n\",\n    \"    security=False,\\n\",\n    \")\\n\",\n    \"\\n\",\n    \"scheduler_adress = cluster.scheduler_address\\n\",\n    \"print(f\\\"Scheduler IP address of Dask cluster: {scheduler_adress}\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"After creating the cluster you need to connect to it. To do this you should put the ``EC2Cluster`` instance or the scheduler IP address in ``distributed.Client``.\\n\",\n    \"\\n\",\n    \"When you connect to the cluster, the workers may not be initialized yet, so you need to wait for them using ``client.wait_for_workers``.\\n\",\n    \"\\n\",\n    \"Then you can call ``client.ncores()`` and check which workers are available and how many threads are used for each of them.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from distributed import Client\\n\",\n    \"\\n\",\n    \"client = Client(cluster)\\n\",\n    \"# Or use an IP address connection if the cluster instance is unavailable:\\n\",\n    \"# client = Client(f\\\"{scheduler_adress}:8687\\\")\\n\",\n    \"\\n\",\n    \"client.wait_for_workers(n_workers)\\n\",\n    \"client.ncores()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"After successful initialization of the cluster, you need to configure it.\\n\",\n    \"\\n\",\n    \"You can use plugins to install any requirements into workers:\\n\",\n    \"* [InstallPlugin](https://distributed.dask.org/en/stable/plugins.html#distributed.diagnostics.plugin.InstallPlugin)\\n\",\n    \"* [PipInstall](https://distributed.dask.org/en/stable/plugins.html#distributed.diagnostics.plugin.PipInstall)\\n\",\n    \"* [CondaInstall](https://distributed.dask.org/en/stable/plugins.html#distributed.diagnostics.plugin.CondaInstall).\\n\",\n    \"\\n\",\n    \"You have to install Modin package on each worker using ``PipInstall`` plugin.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from dask.distributed import PipInstall\\n\",\n    \"\\n\",\n    \"client.register_plugin(PipInstall(packages=[\\\"modin\\\"]))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"If you need an additional workers configuration, you can create your own [WorkerPlugin](https://distributed.dask.org/en/stable/plugins.html#worker-plugins) or function that will be executed on each worker upon calling ``client.run()``.\\n\",\n    \"\\n\",\n    \"**NOTE**: Dask cluster does not check if this plugin or function has been called before. Therefore, you need to take this into account when using them.\\n\",\n    \"\\n\",\n    \"In this tutorial a CSV file will be read, so you need to download it to each of the workers and local machine with the same global path.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from dask.distributed import Worker\\n\",\n    \"import os\\n\",\n    \"import urllib\\n\",\n    \"\\n\",\n    \"def dataset_upload(file_url, file_path):\\n\",\n    \"    try:\\n\",\n    \"        dir_name = os.path.dirname(file_path)\\n\",\n    \"        if not os.path.exists(dir_name):\\n\",\n    \"            os.makedirs(dir_name)\\n\",\n    \"        if os.path.exists(file_path):\\n\",\n    \"            return \\\"File has already existed.\\\"\\n\",\n    \"        else:\\n\",\n    \"            urllib.request.urlretrieve(file_url, file_path)\\n\",\n    \"        return \\\"OK\\\"\\n\",\n    \"    except Exception as ex:\\n\",\n    \"        return str(ex)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Set the directory where it should be downloaded (the local directory will be used by default):\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"directory_path = \\\"./\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Then you need to run `dataset_upload` function on all workers. As the result, you will get a dictionary, where the result of the function execution will be for each workers:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"file_path = os.path.join(os.path.abspath(directory_path), \\\"taxi.csv\\\")\\n\",\n    \"client.run(dataset_upload, \\\"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\\\", file_path)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"You have to also execute this function on the local machine:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"dataset_upload(\\\"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\\\", file_path)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"<b>Congratulations! The cluster is now fully configured and we can start running Pandas queries.</b>\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Executing in a cluster environment\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Same as local mode Modin on cluster uses Ray as an execution engine by default so no additional action is required to start to use it. Alternatively, if you need to use another engine, it should be specified either by setting the Modin config or by setting Modin environment variable before the first operation with Modin as it is shown below. Also, note that the full list of Modin configs and corresponding environment variables can be found in the [Modin Configuration Settings](https://modin.readthedocs.io/en/stable/flow/modin/config.html#modin-configs-list) section of the Modin documentation.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Modin engine can be specified either by config\\n\",\n    \"import modin.config as cfg\\n\",\n    \"cfg.Engine.put(\\\"dask\\\")\\n\",\n    \"\\n\",\n    \"# or by setting the environment variable\\n\",\n    \"# import os\\n\",\n    \"# os.environ[\\\"MODIN_ENGINE\\\"] = \\\"dask\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Now you can use Modin on the Dask cluster.\\n\",\n    \"\\n\",\n    \"Let's read the downloaded CSV file and execute such pandas operations as count, groupby and map:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import time\\n\",\n    \"\\n\",\n    \"t0 = time.perf_counter()\\n\",\n    \"\\n\",\n    \"df = pd.read_csv(file_path, quoting=3)\\n\",\n    \"df_count = df.count()\\n\",\n    \"df_groupby_count = df.groupby(\\\"passenger_count\\\").count()\\n\",\n    \"df_map = df.map(str)\\n\",\n    \"\\n\",\n    \"t1 = time.perf_counter()\\n\",\n    \"print(f\\\"Full script time is {(t1 - t0):.3f}\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Shutting down the cluster\\n\",\n    \"\\n\",\n    \"Now that we have finished computation, we can shut down the cluster:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cluster.close()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### This ends the cluster exercise\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.10.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_1.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 1: How to use Modin\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn how to import Modin to accelerate and scale pandas workflows.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Modin is a drop-in replacement for pandas that distributes the computation \\n\",\n    \"across all of the cores in your machine or in a cluster.\\n\",\n    \"In practical terms, this means that you can continue using the same pandas scripts\\n\",\n    \"as before and expect the behavior and results to be the same. The only thing that needs\\n\",\n    \"to change is the import statement. Normally, you would change:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import pandas as pd\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"to:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"Changing this line of code will allow you to use all of the cores in your machine to do computation on your data. One of the major performance bottlenecks of pandas is that it only uses a single core for any given computation. Modin exposes an API that is identical to pandas, allowing you to continue interacting with your data as you would with pandas. There are no additional commands required to use Modin locally. Partitioning, scheduling, data transfer, and other related concerns are all handled by Modin under the hood.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"<p style=\\\"text-align:left;\\\">\\n\",\n    \"        <h1>pandas on a multicore laptop\\n\",\n    \"    <span style=\\\"float:right;\\\">\\n\",\n    \"        Modin on a multicore laptop\\n\",\n    \"    </span>\\n\",\n    \"\\n\",\n    \"<div>\\n\",\n    \"<img align=\\\"left\\\" src=\\\"../../../img/pandas_multicore.png\\\"><img src=\\\"../../../img/modin_multicore.png\\\">\\n\",\n    \"</div>\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: setting Modin engine\\n\",\n    \"\\n\",\n    \"Modin uses Ray as an execution engine by default so no additional action is required to start to use it. Alternatively, if you need to use another engine, it should be specified either by setting the Modin config or by setting Modin environment variable before the first operation with Modin as it is shown below. Also, note that the full list of Modin configs and corresponding environment variables can be found in the [Modin Configuration Settings](https://modin.readthedocs.io/en/stable/flow/modin/config.html#modin-configs-list) section of the Modin documentation.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Modin engine can be specified either by config\\n\",\n    \"import modin.config as cfg\\n\",\n    \"cfg.Engine.put(\\\"dask\\\")\\n\",\n    \"\\n\",\n    \"# or by setting the environment variable\\n\",\n    \"# import os\\n\",\n    \"# os.environ[\\\"MODIN_ENGINE\\\"] = \\\"dask\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: Dataframe constructor\\n\",\n    \"\\n\",\n    \"Often when playing around in pandas, it is useful to create a DataFrame with the constructor. That is where we will start.\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas as pd\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\\n\",\n    \"df = pd.DataFrame(frame_data)\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"When creating a dataframe from a non-distributed object, it will take extra time to partition the data. When this is happening, you will see this message:\\n\",\n    \"\\n\",\n    \"```\\n\",\n    \"UserWarning: Distributing <class 'numpy.ndarray'> object. This may take some time.\\n\",\n    \"```\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Note: Do not change this code!\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas\\n\",\n    \"import sys\\n\",\n    \"import modin\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas.__version__\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin.__version__\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Implement your answer here. You are also free to play with the size\\n\",\n    \"# and shape of the DataFrame, but beware of exceeding your memory!\\n\",\n    \"\\n\",\n    \"import pandas as pd\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\\n\",\n    \"df = pd.DataFrame(frame_data)\\n\",\n    \"\\n\",\n    \"# ***** Do not change the code below! It verifies that \\n\",\n    \"# ***** the exercise has been done correctly. *****\\n\",\n    \"\\n\",\n    \"try:\\n\",\n    \"    assert df is not None\\n\",\n    \"    assert frame_data is not None\\n\",\n    \"    assert isinstance(frame_data, np.ndarray)\\n\",\n    \"except:\\n\",\n    \"    raise AssertionError(\\\"Don't change too much of the original code!\\\")\\n\",\n    \"assert \\\"modin.pandas\\\" in sys.modules, \\\"Not quite correct. Remember the single line of code change (See above)\\\"\\n\",\n    \"\\n\",\n    \"import modin.pandas\\n\",\n    \"assert pd == modin.pandas, \\\"Remember the single line of code change (See above)\\\"\\n\",\n    \"assert hasattr(df, \\\"_query_compiler\\\"), \\\"Make sure that `df` is a modin.pandas DataFrame.\\\"\\n\",\n    \"\\n\",\n    \"print(\\\"Success! You only need to change one line of code!\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Now that we have created a toy example for playing around with the DataFrame, let's print it out in different ways.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for Exercise: Data Interaction and Printing\\n\",\n    \"\\n\",\n    \"When interacting with data, it is very imporant to look at different parts of the data (e.g. `df.head()`). Here we will show that you can print the modin.pandas DataFrame in the same ways you would pandas.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Print the first 10 lines.\\n\",\n    \"df.head(10)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Print the DataFrame.\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Free cell for custom interaction (Play around here!)\\n\",\n    \"df.add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.count()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Please move on to [Exercise 2](./exercise_2.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_2.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 2: Speed improvements\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn about common functionality that Modin speeds up by using all of your machine's cores.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for Exercise: `read_csv` speedups\\n\",\n    \"\\n\",\n    \"The most commonly used data ingestion method used in pandas is CSV files (link to pandas survey). This concept is designed to give an idea of the kinds of speedups possible, even on a non-distributed filesystem. Modin also supports other file formats for parallel and distributed reads, which can be found in the documentation. We will import both Modin and pandas so that the speedups are evident.\\n\",\n    \"\\n\",\n    \"**Note: Rerunning the `read_csv` cells many times may result in degraded performance, depending on the memory of the machine**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import time\\n\",\n    \"from IPython.display import Markdown, display\\n\",\n    \"\\n\",\n    \"def printmd(string):\\n\",\n    \"    display(Markdown(string))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Dataset: 2015 NYC taxi trip data\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"We will be using a version of this data already in S3, originally posted in this blog post: https://matthewrocklin.com/blog/work/2017/01/12/dask-dataframes\\n\",\n    \"\\n\",\n    \"**Size: ~1.8GB**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"path = \\\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Modin execution engine setting:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.config as cfg\\n\",\n    \"cfg.Engine.put(\\\"dask\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## `pandas.read_csv`\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.read_csv(path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to read with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Expect pandas to take >3 minutes on EC2, longer locally\\n\",\n    \"\\n\",\n    \"This is a good time to chat with your neighbor\\n\",\n    \"Dicussion topics\\n\",\n    \"- Do you work with a large amount of data daily?\\n\",\n    \"- How big is your data?\\n\",\n    \"- What’s the common use case of your data?\\n\",\n    \"- Do you use any big data analytics tools?\\n\",\n    \"- Do you use any interactive analytics tool?\\n\",\n    \"- What’s are some drawbacks of your current interative analytic tools today?\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## `modin.pandas.read_csv`\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_df = pd.read_csv(path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to read with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `read_csv`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Reduces\\n\",\n    \"\\n\",\n    \"In pandas, a reduce would be something along the lines of a `sum` or `count`. It computes some summary statistics about the rows or columns. We will be using `count`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_count = pandas_df.count()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"\\n\",\n    \"print(\\\"Time to count with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_count = modin_df.count()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to count with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `count`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_count\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_count\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Map operations\\n\",\n    \"\\n\",\n    \"In pandas, map operations are operations that do a single pass over the data and do not change its shape. Operations like `isnull` and `applymap` are included in this. We will be using `isnull`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_isnull = pandas_df.isnull()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"\\n\",\n    \"print(\\\"Time to isnull with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_isnull = modin_df.isnull()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to isnull with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `isnull`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_isnull\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_isnull\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Apply over a single column\\n\",\n    \"\\n\",\n    \"Sometimes we want to compute some summary statistics on a single column from our dataset.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"rounded_trip_distance_pandas = pandas_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to groupby with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"rounded_trip_distance_modin = modin_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to add a column with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `apply` on one column!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"rounded_trip_distance_pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"rounded_trip_distance_modin\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Add a column\\n\",\n    \"\\n\",\n    \"It is common to need to add a new column to an existing dataframe, here we show that this is significantly faster in Modin due to metadata management and an efficient zero copy implementation.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"pandas_df[\\\"rounded_trip_distance\\\"] = rounded_trip_distance_pandas\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to groupby with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_df[\\\"rounded_trip_distance\\\"] = rounded_trip_distance_modin\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to add a column with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas add a column!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Please move on to [Exercise 3](./exercise_3.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_3.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 3: Not Implemented\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn what happens when a function is not yet supported in Modin as well as how to extend Modin's functionality using the DataFrame Algebra.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"When functionality has not yet been implemented, we default to pandas\\n\",\n    \"\\n\",\n    \"![](../../../img/convert_to_pandas.png)\\n\",\n    \"\\n\",\n    \"We convert a Modin dataframe to pandas to do the operation, then convert it back once it is finished. These operations will have a high overhead due to the communication involved and will take longer than pandas.\\n\",\n    \"\\n\",\n    \"When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. For example, `DataFrame.mask` is not yet implemented. In this case, when a user tries to use it, they will see this warning:\\n\",\n    \"\\n\",\n    \"```\\n\",\n    \"UserWarning: `DataFrame.mask` defaulting to pandas implementation.\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Default to pandas\\n\",\n    \"\\n\",\n    \"In this section of the exercise we will see first-hand how the runtime is affected by operations that are not implemented.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import numpy as np\\n\",\n    \"import time\\n\",\n    \"import modin.config as cfg\\n\",\n    \"cfg.Engine.put(\\\"dask\\\")\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\\n\",\n    \"df = pd.DataFrame(frame_data).add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df = pandas.DataFrame(frame_data).add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.mask(df < 50))\\n\",\n    \"\\n\",\n    \"modin_end = time.time()\\n\",\n    \"print(\\\"Modin mask took {} seconds.\\\".format(round(modin_end - modin_start, 4)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_start = time.time()\\n\",\n    \"\\n\",\n    \"print(pandas_df.mask(pandas_df < 50))\\n\",\n    \"\\n\",\n    \"pandas_end = time.time()\\n\",\n    \"print(\\\"pandas mask took {} seconds.\\\".format(round(pandas_end - pandas_start, 4)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Register custom functions\\n\",\n    \"\\n\",\n    \"Modin's user-facing API is pandas, but it is possible that we do not yet support your favorite or most-needed functionalities. Your user-defined function may also be able to be executed more efficiently if you pre-define the type of function it is (e.g. map, reduce, etc.) using the DataFrame Algebra. To solve either case, it is possible to register a custom function to be applied to your data.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Registering a custom function for all query compilers\\n\",\n    \"\\n\",\n    \"To register a custom function for a query compiler, we first need to import it:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"The `PandasQueryCompiler` is responsible for defining and compiling the queries that can be operated on by Modin, and is specific to the pandas storage format. Any queries defined here must also both be compatible with and result in a `pandas.DataFrame`. Many functionalities are very simply implemented, as you can see in the current code: [Link](https://github.com/modin-project/modin/blob/7a8158873e77cb5f1a5a3b89be4ddac89f576269/modin/core/storage_formats/pandas/query_compiler.py#L216).\\n\",\n    \"\\n\",\n    \"If we want to register a new function, we need to understand what kind of function it is. In our example, we will try to implement a `kurtosis` on the unary negation of the values in the dataframe, which is a map (unargy negation of each cell) followed by a reduce. So we next want to import the function type so we can use it in our definition:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"from modin.core.dataframe.algebra import TreeReduce\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"Then we can just use the `TreeReduce.register` `classmethod` and assign it to the `PandasQueryCompiler`:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"PandasQueryCompiler.neg_kurtosis = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value, pandas.DataFrame.kurtosis)\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"We include `**kwargs` to the `lambda` function since the query compiler will pass all keyword arguments to both the map and reduce functions.\\n\",\n    \"\\n\",\n    \"Finally, we want a handle to it from the `DataFrame`, so we need to create a way to do that:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"def neg_kurtosis_func(self, **kwargs):\\n\",\n    \"    # The constructor allows you to pass in a query compiler as a keyword argument\\n\",\n    \"    return self.__constructor__(query_compiler=self._query_compiler.neg_kurtosis(**kwargs))\\n\",\n    \"\\n\",\n    \"pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"And then you can use it like you usually would:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"df.neg_kurtosis_custom()\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\\n\",\n    \"from modin.core.dataframe.algebra import TreeReduce\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"PandasQueryCompiler.neg_kurtosis_custom = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value,\\n\",\n    \"                                                             pandas.DataFrame.kurtosis)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from pandas._libs import lib\\n\",\n    \"# The function signature came from the pandas documentation:\\n\",\n    \"# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html\\n\",\n    \"def neg_kurtosis_func(self, axis=lib.no_default, skipna=True, level=None, numeric_only=None, **kwargs):\\n\",\n    \"    # We need to specify the axis for the query compiler\\n\",\n    \"    if axis in [None, lib.no_default]:\\n\",\n    \"        axis = 0\\n\",\n    \"    # The constructor allows you to pass in a query compiler as a keyword argument\\n\",\n    \"    # Reduce dimension is used for reduces\\n\",\n    \"    # We also pass all keyword arguments here to ensure correctness\\n\",\n    \"    return self._reduce_dimension(\\n\",\n    \"        self._query_compiler.neg_kurtosis_custom(\\n\",\n    \"            axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs\\n\",\n    \"        )\\n\",\n    \"    )\\n\",\n    \"\\n\",\n    \"pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Speed improvements\\n\",\n    \"If we were to try and replicate this functionality using the pandas API, we would need to call `df.applymap` with our unary negation function, and subsequently `df.kurtosis` on the result of the first call. Let's see how this compares with our new, custom function!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"print(pandas_df.applymap(lambda cell_value: ~cell_value).kurtosis())\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"pandas unary negation kurtosis took {} seconds.\\\".format(pandas_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.applymap(lambda x: ~x).kurtosis())\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Modin unary negation kurtosis took {} seconds.\\\".format(modin_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"custom_start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.neg_kurtosis_custom())\\n\",\n    \"\\n\",\n    \"custom_end = time.time()\\n\",\n    \"modin_custom_duration = custom_end - custom_start\\n\",\n    \"print(\\\"Modin neg_kurtosis_custom took {} seconds.\\\".format(modin_custom_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from IPython.display import Markdown, display\\n\",\n    \"\\n\",\n    \"display(Markdown(\\\"### As expected, Modin is {}x faster than pandas when chaining the functions; however we see that our custom function is even faster than that - beating pandas by {}x, and Modin (when chaining the functions) by {}x!\\\".format(round(pandas_duration / modin_duration, 2), round(pandas_duration / modin_custom_duration, 2), round(modin_duration / modin_custom_duration, 2))))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Congratulations! You have just implemented new DataFrame functionality!\\n\",\n    \"\\n\",\n    \"## Consider opening a pull request: https://github.com/modin-project/modin/pulls\\n\",\n    \"\\n\",\n    \"For a complete list of what is implemented, see the [Supported APIs](https://modin.readthedocs.io/en/latest/supported_apis/index.html) section.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Test your knowledge: Add a custom function for another tree reduce: finding `DataFrame.mad` after squaring all of the values\\n\",\n    \"\\n\",\n    \"See the pandas documentation for the correct signature: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mad.html\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_mad_custom_start = time.time()\\n\",\n    \"\\n\",\n    \"# Implement your function here! Put the result of your custom squared `mad` in the variable `modin_mad_custom`\\n\",\n    \"# Hint: Look at the kurtosis walkthrough above\\n\",\n    \"\\n\",\n    \"modin_mad_custom = ...\\n\",\n    \"print(modin_mad_custom)\\n\",\n    \"\\n\",\n    \"modin_mad_custom_end = time.time()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Evaluation code, do not change!\\n\",\n    \"modin_mad_start = time.time()\\n\",\n    \"modin_mad = df.applymap(lambda x: x**2).mad()\\n\",\n    \"print(modin_mad)\\n\",\n    \"modin_mad_end = time.time()\\n\",\n    \"\\n\",\n    \"assert modin_mad_end - modin_mad_start > modin_mad_custom_end - modin_mad_custom_start, \\\\\\n\",\n    \"    \\\"Your implementation was too slow, or you used the chaining functions approach. Try again\\\"\\n\",\n    \"assert modin_mad._to_pandas().equals(modin_mad_custom._to_pandas()), \\\"Your result did not match the result of chaining the functions, try again\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Now that you are able to create custom functions, you know enough to contribute to Modin!\\n\",\n    \"\\n\",\n    \"**Please move on to [Exercise 4](./exercise_4.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/local/exercise_4.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"99f41d2d\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"fdda1c9c\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 4: Experimental Features\\n\",\n    \"\\n\",\n    \"**GOAL**: Explore some of the experimental features being added to Modin.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"e7bf87a5\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: Spreadsheet\\n\",\n    \"\\n\",\n    \"For those who have worked with Excel, the Spreadsheet API will definitely feel familiar! The Spreadsheet API is a Jupyter notebook widget that allows us to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, and edit data as well as export the changes as reproducible code.\\n\",\n    \"\\n\",\n    \"Let's look back at a subset of the 2015 NYC Taxi Data from Exercise 2, and see how the Spreadsheet API can make it easy to play with the data!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"5d5c4a3e\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"!jupyter nbextension enable --py --sys-prefix modin_spreadsheet\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"dc8d5903\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import modin.experimental.spreadsheet as mss\\n\",\n    \"from modin.config import Engine\\n\",\n    \"Engine.put(\\\"dask\\\")\\n\",\n    \"\\n\",\n    \"s3_path = \\\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\\\"\\n\",\n    \"modin_df = pd.read_csv(s3_path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3, nrows=1000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"145e7bbe\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"spreadsheet = mss.from_dataframe(modin_df)\\n\",\n    \"spreadsheet\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"3c18b7f2\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Thank you for participating!\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/requirements.txt",
    "content": "fsspec>=2022.11.0\njupyterlab\nipywidgets\nmodin[dask]\nmodin[spreadsheet]\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport os\nimport sys\n\nimport nbformat\n\nMODIN_DIR = os.path.abspath(\n    os.path.join(os.path.dirname(__file__), *[\"..\" for _ in range(6)])\n)\nsys.path.insert(0, MODIN_DIR)\nfrom examples.tutorial.jupyter.execution.test.utils import (  # noqa: E402\n    _execute_notebook,\n    _replace_str,\n    download_taxi_dataset,\n    test_dataset_path,\n)\n\nlocal_notebooks_dir = \"examples/tutorial/jupyter/execution/pandas_on_dask/local\"\n\n\n# in this notebook user should replace 'import pandas as pd' with\n# 'import modin.pandas as pd' to make notebook work\ndef test_exercise_1():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_1_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_1.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    _replace_str(nb, \"import pandas as pd\", \"import modin.pandas as pd\")\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n\n\n# this notebook works \"as is\" but for testing purposes we can use smaller dataset\ndef test_exercise_2():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_2_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_2.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    new_cell = f'path = \"{test_dataset_path}\"\\n' + download_taxi_dataset\n\n    _replace_str(\n        nb,\n        'path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"',\n        new_cell,\n    )\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n\n\n# in this notebook user should add custom mad implementation\n# to make notebook work\ndef test_exercise_3():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_3_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_3.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    user_mad_implementation = \"\"\"PandasQueryCompiler.sq_mad_custom = TreeReduce.register(lambda cell_value, **kwargs: cell_value ** 2,\n                                                             pandas.DataFrame.mad)\n\ndef sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):\n    if axis is None:\n        axis = 0\n\n    return self._reduce_dimension(\n        self._query_compiler.sq_mad_custom(\n            axis=axis, skipna=skipna, level=level, **kwargs\n        )\n    )\n\npd.DataFrame.sq_mad_custom = sq_mad_func\n\nmodin_mad_custom = df.sq_mad_custom()\n    \"\"\"\n\n    _replace_str(nb, \"modin_mad_custom = ...\", user_mad_implementation)\n\n    nbformat.write(nb, modified_notebook_path)\n    # need to update example, `.mad` doesn't exist\n    # _execute_notebook(modified_notebook_path)\n\n\n# this notebook works \"as is\" but for testing purposes we can use smaller dataset\ndef test_exercise_4():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_4_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_4.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    s3_path_cell = f's3_path = \"{test_dataset_path}\"\\n' + download_taxi_dataset\n    _replace_str(\n        nb,\n        's3_path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"',\n        s3_path_cell,\n    )\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/Dockerfile",
    "content": "FROM continuumio/miniconda3\n\nRUN conda install -c conda-forge psutil setproctitle\nRUN pip install -r requirements-dev.txt\n\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/cluster/README.md",
    "content": "![LOGO](../../../img/MODIN_ver2_hrz.png)\n\n<center>\n<h1>Scale your pandas workflows on a Ray cluster</h2>\n</center>\n\n**NOTE**: Before starting the exercise, please read the full instructions in the \n[Modin documenation](https://modin.readthedocs.io/en/latest/getting_started/using_modin/using_modin_cluster.html).\n\nThe basic steps to run the script on a remote Ray cluster are:\n\nStep 1. Install the necessary dependencies\n\n```bash\npip install boto3\n```\n\nStep 2. Setup your AWS credentials.\n\n```bash\naws configure\n```\n\nStep 3. Modify configuration file and start up the Ray cluster.\n\n```bash\nray up modin-cluster.yaml\n```\n\nStep 4. Submit your script to the remote cluster.\n\n```bash\nray submit modin-cluster.yaml exercise_5.py\n```\n\nStep 5. Shut down the Ray remote cluster.\n\n```bash\nray down \n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/cluster/exercise_5.py",
    "content": "import time\n\nimport ray\n\nimport modin.pandas as pd\n\nray.init(address=\"auto\")\ncpu_count = ray.cluster_resources()[\"CPU\"]\nassert cpu_count == 576, f\"Expected 576 CPUs, but found {cpu_count}\"\n\nfile_path = \"big_yellow.csv\"\n\nt0 = time.perf_counter()\n\ndf = pd.read_csv(file_path, quoting=3)\ndf_count = df.count()\ndf_groupby_count = df.groupby(\"passenger_count\").count()\ndf_map = df.map(str)\n\nt1 = time.perf_counter()\nprint(f\"Full script time is {(t1 - t0):.3f}\")  # noqa: T201\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/cluster/modin-cluster.yaml",
    "content": "# An unique identifier for the head node and workers of this cluster.\ncluster_name: modin_init\n\n# The maximum number of workers nodes to launch in addition to the head\n# node.\nmax_workers: 5\n\n# The autoscaler will scale up the cluster faster with higher upscaling speed.\n# E.g., if the task requires adding more nodes then autoscaler will gradually\n# scale up the cluster in chunks of upscaling_speed*currently_running_nodes.\n# This number should be > 0.\nupscaling_speed: 1.0\n\n# This executes all commands on all nodes in the docker container,\n# and opens all the necessary ports to support the Ray cluster.\n# Empty string means disabled.\ndocker:\n    # image: \"rayproject/ray-ml:latest-gpu\" # You can change this to latest-cpu if you don't need GPU support and want a faster startup\n    image: rayproject/ray:latest-cpu   # use this one if you don't need ML dependencies, it's faster to pull\n    container_name: \"ray_container\"\n    # If true, pulls latest version of image. Otherwise, `docker run` will only pull the image\n    # if no cached version is present.\n    pull_before_run: True\n    run_options:   # Extra options to pass into \"docker run\"\n        - --ulimit nofile=65536:65536\n\n    # Example of running a GPU head with CPU workers\n    # head_image: \"rayproject/ray-ml:latest-gpu\"\n    # Allow Ray to automatically detect GPUs\n\n    # worker_image: \"rayproject/ray-ml:latest-cpu\"\n    # worker_run_options: []\n\n# If a node is idle for this many minutes, it will be removed.\nidle_timeout_minutes: 5\n\n# Cloud-provider specific configuration.\nprovider:\n    type: aws\n    region: us-west-2\n    # Availability zone(s), comma-separated, that nodes may be launched in.\n    # Nodes will be launched in the first listed availability zone and will\n    # be tried in the subsequent availability zones if launching fails.\n    availability_zone: us-west-2a,us-west-2b\n    # Whether to allow node reuse. If set to False, nodes will be terminated\n    # instead of stopped.\n    cache_stopped_nodes: False # If not present, the default is True.\n\n# How Ray will authenticate with newly launched nodes.\nauth:\n    ssh_user: ubuntu\n# By default Ray creates a new private keypair, but you can also use your own.\n# If you do so, make sure to also set \"KeyName\" in the head and worker node\n# configurations below.\n#    ssh_private_key: /path/to/your/key.pem\n\n# Tell the autoscaler the allowed node types and the resources they provide.\n# The key is the name of the node type, which is just for debugging purposes.\n# The node config specifies the launch config and physical instance type.\navailable_node_types:\n    ray.head.default:\n        # The node type's CPU and GPU resources are auto-detected based on AWS instance type.\n        # If desired, you can override the autodetected CPU and GPU resources advertised to the autoscaler.\n        # You can also set custom resources.\n        # For example, to mark a node type as having 1 CPU, 1 GPU, and 5 units of a resource called \"custom\", set\n        # resources: {\"CPU\": 1, \"GPU\": 1, \"custom\": 5}\n        resources: {}\n        # Provider-specific config for this node type, e.g. instance type. By default\n        # Ray will auto-configure unspecified fields such as SubnetId and KeyName.\n        # For more documentation on available fields, see:\n        # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances\n        node_config:\n            InstanceType: m5.24xlarge\n            # Default AMI for us-west-2.\n            # Check https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/aws/config.py\n            # for default images for other zones.\n            ImageId: ami-0387d929287ab193e\n            # You can provision additional disk space with a conf as follows\n            BlockDeviceMappings:\n                - DeviceName: /dev/sda1\n                  Ebs:\n                      VolumeSize: 500\n                      VolumeType: gp3\n            # Additional options in the boto docs.\n    ray.worker.default:\n        # The minimum number of worker nodes of this type to launch.\n        # This number should be >= 0.\n        min_workers: 5\n        # The maximum number of worker nodes of this type to launch.\n        # This takes precedence over min_workers.\n        max_workers: 5\n        # The node type's CPU and GPU resources are auto-detected based on AWS instance type.\n        # If desired, you can override the autodetected CPU and GPU resources advertised to the autoscaler.\n        # You can also set custom resources.\n        # For example, to mark a node type as having 1 CPU, 1 GPU, and 5 units of a resource called \"custom\", set\n        # resources: {\"CPU\": 1, \"GPU\": 1, \"custom\": 5}\n        resources: {}\n        # Provider-specific config for this node type, e.g. instance type. By default\n        # Ray will auto-configure unspecified fields such as SubnetId and KeyName.\n        # For more documentation on available fields, see:\n        # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances\n        node_config:\n            InstanceType: m5.24xlarge\n            # Default AMI for us-west-2.\n            # Check https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/aws/config.py\n            # for default images for other zones.\n            ImageId: ami-0387d929287ab193e\n            # You can provision additional disk space with a conf as follows\n            BlockDeviceMappings:\n                - DeviceName: /dev/sda1\n                  Ebs:\n                      VolumeSize: 500\n                      VolumeType: gp3\n            # Run workers on spot by default. Comment this out to use on-demand.\n            # NOTE: If relying on spot instances, it is best to specify multiple different instance\n            # types to avoid interruption when one instance type is experiencing heightened demand.\n            # Demand information can be found at https://aws.amazon.com/ec2/spot/instance-advisor/\n            # InstanceMarketOptions:\n                # MarketType: spot\n                # Additional options can be found in the boto docs, e.g.\n                #   SpotOptions:\n                #       MaxPrice: MAX_HOURLY_PRICE\n            # Additional options in the boto docs.\n\n# Specify the node type of the head node (as configured above).\nhead_node_type: ray.head.default\n\n# Files or directories to copy to the head and worker nodes. The format is a\n# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.\nfile_mounts: {\n#    \"/path1/on/remote/machine\": \"/path1/on/local/machine\",\n#    \"/path2/on/remote/machine\": \"/path2/on/local/machine\",\n}\n\n# Files or directories to copy from the head node to the worker nodes. The format is a\n# list of paths. The same path on the head node will be copied to the worker node.\n# This behavior is a subset of the file_mounts behavior. In the vast majority of cases\n# you should just use file_mounts. Only use this if you know what you're doing!\ncluster_synced_files: []\n\n# Whether changes to directories in file_mounts or cluster_synced_files in the head node\n# should sync to the worker node continuously\nfile_mounts_sync_continuously: False\n\n# Patterns for files to exclude when running rsync up or rsync down\nrsync_exclude:\n    - \"**/.git\"\n    - \"**/.git/**\"\n\n# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for\n# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided\n# as a value, the behavior will match git's behavior for finding and using .gitignore files.\nrsync_filter:\n    - \".gitignore\"\n\n# List of commands that will be run before `setup_commands`. If docker is\n# enabled, these commands will run outside the container and before docker\n# is setup.\ninitialization_commands: []\n\n# List of shell commands to run to set up nodes.\nsetup_commands:\n    # Note: if you're developing Ray, you probably want to create a Docker image that\n    # has your Ray repo pre-cloned. Then, you can replace the pip installs\n    # below with a git checkout <your_sha> (and possibly a recompile).\n    # To run the nightly version of ray (as opposed to the latest), either use a rayproject docker image\n    # that has the \"nightly\" (e.g. \"rayproject/ray-ml:nightly-gpu\") or uncomment the following line:\n    # - pip install -U \"ray[default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl\"\n    - conda create -n \"modin\" -c conda-forge modin \"ray-default\">=2.10.0,<3 -y\n    - conda activate modin && pip install -U fsspec>=2022.11.0 boto3\n    - echo \"conda activate modin\" >> ~/.bashrc\n    - wget https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\n    - printf \"VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee\\n\" > big_yellow.csv\n    - tail -n +2 yellow_tripdata_2015-01.csv{,}{,}{,}{,}{,}{,} >> big_yellow.csv\n    - echo 'export MODIN_RAY_CLUSTER=True' >> ~/.bashrc\n    \n# Custom commands that will be run on the head node after common setup.\nhead_setup_commands: \n    - echo 'export MODIN_REDIS_ADDRESS=\"localhost:6379\"' >> ~/.bashrc\n\n# Custom commands that will be run on worker nodes after common setup.\nworker_setup_commands: []\n\n# Command to start ray on the head node. You don't need to change this.\nhead_start_ray_commands:\n    - ray stop\n    - echo 'export MEMORY_STORE_SIZE=$(awk \"/MemFree/ { printf \\\"%d \\\\n\\\", \\$2*1024}\" /proc/meminfo)' >> ~/.bashrc\n    - echo 'export TMPDIR=\"$(dirname $(mktemp tmp.XXXXXXXXXX -ut))\"' >> ~/.bashrc\n    - ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --object-store-memory=$MEMORY_STORE_SIZE --plasma-directory=$TMPDIR --dashboard-host=0.0.0.0\n\n# Command to start ray on worker nodes. You don't need to change this.\nworker_start_ray_commands:\n    - ray stop\n    - echo 'export MEMORY_STORE_SIZE=$(awk \"/MemFree/ { printf \\\"%d \\\\n\\\", \\$2*1024}\" /proc/meminfo)' >> ~/.bashrc\n    - echo 'export TMPDIR=\"$(dirname $(mktemp tmp.XXXXXXXXXX -ut))\"' >> ~/.bashrc\n    - ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076  --object-store-memory=$MEMORY_STORE_SIZE --plasma-directory=$TMPDIR\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_1.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 1: How to use Modin\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn how to import Modin to accelerate and scale pandas workflows.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Modin is a drop-in replacement for pandas that distributes the computation \\n\",\n    \"across all of the cores in your machine or in a cluster.\\n\",\n    \"In practical terms, this means that you can continue using the same pandas scripts\\n\",\n    \"as before and expect the behavior and results to be the same. The only thing that needs\\n\",\n    \"to change is the import statement. Normally, you would change:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import pandas as pd\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"to:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"Changing this line of code will allow you to use all of the cores in your machine to do computation on your data. One of the major performance bottlenecks of pandas is that it only uses a single core for any given computation. Modin exposes an API that is identical to pandas, allowing you to continue interacting with your data as you would with pandas. There are no additional commands required to use Modin locally. Partitioning, scheduling, data transfer, and other related concerns are all handled by Modin under the hood.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"<p style=\\\"text-align:left;\\\">\\n\",\n    \"        <h1>pandas on a multicore laptop\\n\",\n    \"    <span style=\\\"float:right;\\\">\\n\",\n    \"        Modin on a multicore laptop\\n\",\n    \"    </span>\\n\",\n    \"\\n\",\n    \"<div>\\n\",\n    \"<img align=\\\"left\\\" src=\\\"../../../img/pandas_multicore.png\\\"><img src=\\\"../../../img/modin_multicore.png\\\">\\n\",\n    \"</div>\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: Dataframe constructor\\n\",\n    \"\\n\",\n    \"Often when playing around in pandas, it is useful to create a DataFrame with the constructor. That is where we will start.\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas as pd\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\\n\",\n    \"df = pd.DataFrame(frame_data)\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"When creating a dataframe from a non-distributed object, it will take extra time to partition the data. When this is happening, you will see this message:\\n\",\n    \"\\n\",\n    \"```\\n\",\n    \"UserWarning: Distributing <class 'numpy.ndarray'> object. This may take some time.\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Note: Do not change this code!\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas\\n\",\n    \"import sys\\n\",\n    \"import modin\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas.__version__\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin.__version__\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Implement your answer here. You are also free to play with the size\\n\",\n    \"# and shape of the DataFrame, but beware of exceeding your memory!\\n\",\n    \"\\n\",\n    \"import pandas as pd\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\\n\",\n    \"df = pd.DataFrame(frame_data)\\n\",\n    \"\\n\",\n    \"# ***** Do not change the code below! It verifies that \\n\",\n    \"# ***** the exercise has been done correctly. *****\\n\",\n    \"\\n\",\n    \"try:\\n\",\n    \"    assert df is not None\\n\",\n    \"    assert frame_data is not None\\n\",\n    \"    assert isinstance(frame_data, np.ndarray)\\n\",\n    \"except:\\n\",\n    \"    raise AssertionError(\\\"Don't change too much of the original code!\\\")\\n\",\n    \"assert \\\"modin.pandas\\\" in sys.modules, \\\"Not quite correct. Remember the single line of code change (See above)\\\"\\n\",\n    \"\\n\",\n    \"import modin.pandas\\n\",\n    \"assert pd == modin.pandas, \\\"Remember the single line of code change (See above)\\\"\\n\",\n    \"assert hasattr(df, \\\"_query_compiler\\\"), \\\"Make sure that `df` is a modin.pandas DataFrame.\\\"\\n\",\n    \"\\n\",\n    \"print(\\\"Success! You only need to change one line of code!\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Now that we have created a toy example for playing around with the DataFrame, let's print it out in different ways.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for Exercise: Data Interaction and Printing\\n\",\n    \"\\n\",\n    \"When interacting with data, it is very imporant to look at different parts of the data (e.g. `df.head()`). Here we will show that you can print the modin.pandas DataFrame in the same ways you would pandas.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Print the first 10 lines.\\n\",\n    \"df.head(10)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Print the DataFrame.\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Free cell for custom interaction (Play around here!)\\n\",\n    \"df.add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.count()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Please move on to [Exercise 2](./exercise_2.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.8\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_2.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 2: Speed improvements\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn about common functionality that Modin speeds up by using all of your machine's cores.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for Exercise: `read_csv` speedups\\n\",\n    \"\\n\",\n    \"The most commonly used data ingestion method used in pandas is CSV files (link to pandas survey). This concept is designed to give an idea of the kinds of speedups possible, even on a non-distributed filesystem. Modin also supports other file formats for parallel and distributed reads, which can be found in the documentation.\\n\",\n    \"\\n\",\n    \"![](../../../img/read_csv_perf.png)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"We will import both Modin and pandas so that the speedups are evident.\\n\",\n    \"\\n\",\n    \"**Note: Rerunning the `read_csv` cells many times may result in degraded performance, depending on the memory of the machine**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import time\\n\",\n    \"from IPython.display import Markdown, display\\n\",\n    \"\\n\",\n    \"def printmd(string):\\n\",\n    \"    display(Markdown(string))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Dataset: 2015 NYC taxi trip data\\n\",\n    \"\\n\",\n    \"We will be using a version of this data already in S3, originally posted in this blog post: https://matthewrocklin.com/blog/work/2017/01/12/dask-dataframes\\n\",\n    \"\\n\",\n    \"**Size: ~1.8GB**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"path = \\\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Optional:** Note that the dataset takes a while to download. To speed things up a bit, if you prefer to download this file once locally, you can run the following code in the notebook:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# [Optional] Download data locally. This may take a few minutes to download.\\n\",\n    \"# import urllib.request\\n\",\n    \"# url_path = \\\"https://dask-data.s3.amazonaws.com/nyc-taxi/2015/yellow_tripdata_2015-01.csv\\\"\\n\",\n    \"# urllib.request.urlretrieve(url_path, \\\"taxi.csv\\\")\\n\",\n    \"# path = \\\"taxi.csv\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## `pandas.read_csv`\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.read_csv(path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to read with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Expect pandas to take >3 minutes on EC2, longer locally\\n\",\n    \"\\n\",\n    \"This is a good time to chat with your neighbor\\n\",\n    \"Dicussion topics\\n\",\n    \"- Do you work with a large amount of data daily?\\n\",\n    \"- How big is your data?\\n\",\n    \"- What’s the common use case of your data?\\n\",\n    \"- Do you use any big data analytics tools?\\n\",\n    \"- Do you use any interactive analytics tool?\\n\",\n    \"- What’s are some drawbacks of your current interative analytic tools today?\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## `modin.pandas.read_csv`\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_df = pd.read_csv(path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to read with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `read_csv`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Reduces\\n\",\n    \"\\n\",\n    \"In pandas, a reduce would be something along the lines of a `sum` or `count`. It computes some summary statistics about the rows or columns. We will be using `count`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_count = pandas_df.count()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"\\n\",\n    \"print(\\\"Time to count with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_count = modin_df.count()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to count with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `count`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_count\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_count\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Map operations\\n\",\n    \"\\n\",\n    \"In pandas, map operations are operations that do a single pass over the data and do not change its shape. Operations like `isnull` and `applymap` are included in this. We will be using `isnull`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_isnull = pandas_df.isnull()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"\\n\",\n    \"print(\\\"Time to isnull with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_isnull = modin_df.isnull()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to isnull with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `isnull`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_isnull\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_isnull\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Apply over a single column\\n\",\n    \"\\n\",\n    \"Sometimes we want to compute some summary statistics on a single column from our dataset.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"rounded_trip_distance_pandas = pandas_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to groupby with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"rounded_trip_distance_modin = modin_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to add a column with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `apply` on one column!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"rounded_trip_distance_pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"rounded_trip_distance_modin\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Add a column\\n\",\n    \"\\n\",\n    \"It is common to need to add a new column to an existing dataframe, here we show that this is significantly faster in Modin due to metadata management and an efficient zero copy implementation.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"pandas_df[\\\"rounded_trip_distance\\\"] = rounded_trip_distance_pandas\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to groupby with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_df[\\\"rounded_trip_distance\\\"] = rounded_trip_distance_modin\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to add a column with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas add a column!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Please move on to [Exercise 3](./exercise_3.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_3.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 3: Not Implemented\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn what happens when a function is not yet supported in Modin as well as how to extend Modin's functionality using the DataFrame Algebra.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"When functionality has not yet been implemented, we default to pandas\\n\",\n    \"\\n\",\n    \"![](../../../img/convert_to_pandas.png)\\n\",\n    \"\\n\",\n    \"We convert a Modin dataframe to pandas to do the operation, then convert it back once it is finished. These operations will have a high overhead due to the communication involved and will take longer than pandas.\\n\",\n    \"\\n\",\n    \"When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. For example, `DataFrame.mask` is not yet implemented. In this case, when a user tries to use it, they will see this warning:\\n\",\n    \"\\n\",\n    \"```\\n\",\n    \"UserWarning: `DataFrame.mask` defaulting to pandas implementation.\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Default to pandas\\n\",\n    \"\\n\",\n    \"In this section of the exercise we will see first-hand how the runtime is affected by operations that are not implemented.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import numpy as np\\n\",\n    \"import time\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\\n\",\n    \"df = pd.DataFrame(frame_data).add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df = pandas.DataFrame(frame_data).add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.mask(df < 50))\\n\",\n    \"\\n\",\n    \"modin_end = time.time()\\n\",\n    \"print(\\\"Modin mask took {} seconds.\\\".format(round(modin_end - modin_start, 4)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_start = time.time()\\n\",\n    \"\\n\",\n    \"print(pandas_df.mask(pandas_df < 50))\\n\",\n    \"\\n\",\n    \"pandas_end = time.time()\\n\",\n    \"print(\\\"pandas mask took {} seconds.\\\".format(round(pandas_end - pandas_start, 4)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Register custom functions\\n\",\n    \"\\n\",\n    \"Modin's user-facing API is pandas, but it is possible that we do not yet support your favorite or most-needed functionalities. Your user-defined function may also be able to be executed more efficiently if you pre-define the type of function it is (e.g. map, reduce, etc.) using the DataFrame Algebra. To solve either case, it is possible to register a custom function to be applied to your data.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Registering a custom function for all query compilers\\n\",\n    \"\\n\",\n    \"To register a custom function for a query compiler, we first need to import it:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"The `PandasQueryCompiler` is responsible for defining and compiling the queries that can be operated on by Modin, and is specific to the pandas storage format. Any queries defined here must also both be compatible with and result in a `pandas.DataFrame`. Many functionalities are very simply implemented, as you can see in the current code: [Link](https://github.com/modin-project/modin/blob/7a8158873e77cb5f1a5a3b89be4ddac89f576269/modin/core/storage_formats/pandas/query_compiler.py#L216).\\n\",\n    \"\\n\",\n    \"If we want to register a new function, we need to understand what kind of function it is. In our example, we will try to implement a `kurtosis` on the unary negation of the values in the dataframe, which is a map (unargy negation of each cell) followed by a reduce. So we next want to import the function type so we can use it in our definition:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"from modin.core.dataframe.algebra import TreeReduce\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"Then we can just use the `TreeReduce.register` `classmethod` and assign it to the `PandasQueryCompiler`:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"PandasQueryCompiler.neg_kurtosis = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value, pandas.DataFrame.kurtosis)\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"We include `**kwargs` to the `lambda` function since the query compiler will pass all keyword arguments to both the map and reduce functions.\\n\",\n    \"\\n\",\n    \"Finally, we want a handle to it from the `DataFrame`, so we need to create a way to do that:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"def neg_kurtosis_func(self, **kwargs):\\n\",\n    \"    # The constructor allows you to pass in a query compiler as a keyword argument\\n\",\n    \"    return self.__constructor__(query_compiler=self._query_compiler.neg_kurtosis(**kwargs))\\n\",\n    \"\\n\",\n    \"pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"And then you can use it like you usually would:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"df.neg_kurtosis_custom()\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\\n\",\n    \"from modin.core.dataframe.algebra import TreeReduce\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"PandasQueryCompiler.neg_kurtosis_custom = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value,\\n\",\n    \"                                                             pandas.DataFrame.kurtosis)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from pandas._libs import lib\\n\",\n    \"# The function signature came from the pandas documentation:\\n\",\n    \"# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html\\n\",\n    \"def neg_kurtosis_func(self, axis=lib.no_default, skipna=True, level=None, numeric_only=None, **kwargs):\\n\",\n    \"    # We need to specify the axis for the query compiler\\n\",\n    \"    if axis in [None, lib.no_default]:\\n\",\n    \"        axis = 0\\n\",\n    \"    # The constructor allows you to pass in a query compiler as a keyword argument\\n\",\n    \"    # Reduce dimension is used for reduces\\n\",\n    \"    # We also pass all keyword arguments here to ensure correctness\\n\",\n    \"    return self._reduce_dimension(\\n\",\n    \"        self._query_compiler.neg_kurtosis_custom(\\n\",\n    \"            axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs\\n\",\n    \"        )\\n\",\n    \"    )\\n\",\n    \"\\n\",\n    \"pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Speed improvements\\n\",\n    \"If we were to try and replicate this functionality using the pandas API, we would need to call `df.applymap` with our unary negation function, and subsequently `df.kurtosis` on the result of the first call. Let's see how this compares with our new, custom function!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"print(pandas_df.applymap(lambda cell_value: ~cell_value).kurtosis())\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"pandas unary negation kurtosis took {} seconds.\\\".format(pandas_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.applymap(lambda x: ~x).kurtosis())\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Modin unary negation kurtosis took {} seconds.\\\".format(modin_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"custom_start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.neg_kurtosis_custom())\\n\",\n    \"\\n\",\n    \"custom_end = time.time()\\n\",\n    \"modin_custom_duration = custom_end - custom_start\\n\",\n    \"print(\\\"Modin neg_kurtosis_custom took {} seconds.\\\".format(modin_custom_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from IPython.display import Markdown, display\\n\",\n    \"\\n\",\n    \"display(Markdown(\\\"### As expected, Modin is {}x faster than pandas when chaining the functions; however we see that our custom function is even faster than that - beating pandas by {}x, and Modin (when chaining the functions) by {}x!\\\".format(round(pandas_duration / modin_duration, 2), round(pandas_duration / modin_custom_duration, 2), round(modin_duration / modin_custom_duration, 2))))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Congratulations! You have just implemented new DataFrame functionality!\\n\",\n    \"\\n\",\n    \"## Consider opening a pull request: https://github.com/modin-project/modin/pulls\\n\",\n    \"\\n\",\n    \"For a complete list of what is implemented, see the [Supported APIs](https://modin.readthedocs.io/en/latest/supported_apis/index.html) section.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Test your knowledge: Add a custom function for another tree reduce: finding `DataFrame.mad` after squaring all of the values\\n\",\n    \"\\n\",\n    \"See the pandas documentation for the correct signature: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mad.html\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_mad_custom_start = time.time()\\n\",\n    \"\\n\",\n    \"# Implement your function here! Put the result of your custom squared `mad` in the variable `modin_mad_custom`\\n\",\n    \"# Hint: Look at the kurtosis walkthrough above\\n\",\n    \"\\n\",\n    \"modin_mad_custom = ...\\n\",\n    \"print(modin_mad_custom)\\n\",\n    \"\\n\",\n    \"modin_mad_custom_end = time.time()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Evaluation code, do not change!\\n\",\n    \"modin_mad_start = time.time()\\n\",\n    \"modin_mad = df.applymap(lambda x: x**2).mad()\\n\",\n    \"print(modin_mad)\\n\",\n    \"modin_mad_end = time.time()\\n\",\n    \"\\n\",\n    \"assert modin_mad_end - modin_mad_start > modin_mad_custom_end - modin_mad_custom_start, \\\\\\n\",\n    \"    \\\"Your implementation was too slow, or you used the chaining functions approach. Try again\\\"\\n\",\n    \"assert modin_mad._to_pandas().equals(modin_mad_custom._to_pandas()), \\\"Your result did not match the result of chaining the functions, try again\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Now that you are able to create custom functions, you know enough to contribute to Modin!\\n\",\n    \"\\n\",\n    \"**Please move on to [Exercise 4](./exercise_4.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/local/exercise_4.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"99f41d2d\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"fdda1c9c\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 4: Experimental Features\\n\",\n    \"\\n\",\n    \"**GOAL**: Explore some of the experimental features being added to Modin.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"9b487c51\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: Progress Bar\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"Sometimes when running long functions on DataFrames, it can be hard to tell how much progress has been made, as well as how much longer the function will run. A progress bar allows users to see the estimated progress and completion time of each line they run, in environments such as a shell or Jupyter notebook.\\n\",\n    \"\\n\",\n    \"To enable Modin's Progress Bar, add the following lines of code after importing `modin.pandas`:\\n\",\n    \"```python\\n\",\n    \"from modin.config import ProgressBar\\n\",\n    \"ProgressBar.enable()\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"In this exercise, we'll see how the progress bar can improve our experience running dataframe queries!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"f95d4874\",\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import numpy as np\\n\",\n    \"from modin.config import ProgressBar\\n\",\n    \"ProgressBar.enable()\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\\n\",\n    \"df = pd.DataFrame(frame_data).add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"6905bc6b\",\n   \"metadata\": {},\n   \"source\": [\n    \"On longer functions, its nice to be able to see an estimation of how much longer things will take!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"236ec8e2\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.applymap(lambda x: ~x)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"e7bf87a5\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: Spreadsheet\\n\",\n    \"\\n\",\n    \"For those who have worked with Excel, the Spreadsheet API will definitely feel familiar! The Spreadsheet API is a Jupyter notebook widget that allows us to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, and edit data as well as export the changes as reproducible code.\\n\",\n    \"\\n\",\n    \"Let's look back at a subset of the 2015 NYC Taxi Data from Exercise 2, and see how the Spreadsheet API can make it easy to play with the data!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"5d5c4a3e\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"!jupyter nbextension enable --py --sys-prefix modin_spreadsheet\\n\",\n    \"ProgressBar.disable()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"dc8d5903\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.experimental.spreadsheet as mss\\n\",\n    \"\\n\",\n    \"s3_path = \\\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\\\"\\n\",\n    \"modin_df = pd.read_csv(s3_path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3, nrows=1000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"145e7bbe\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"spreadsheet = mss.from_dataframe(modin_df)\\n\",\n    \"spreadsheet\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"3c18b7f2\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Thank you for participating!\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/requirements.txt",
    "content": "fsspec>=2022.11.0\njupyterlab\nipywidgets\ntqdm>=4.60.0\nmodin[ray]\nmodin[spreadsheet]\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport os\nimport sys\n\nimport nbformat\n\nMODIN_DIR = os.path.abspath(\n    os.path.join(os.path.dirname(__file__), *[\"..\" for _ in range(6)])\n)\nsys.path.insert(0, MODIN_DIR)\nfrom examples.tutorial.jupyter.execution.test.utils import (  # noqa: E402\n    _execute_notebook,\n    _find_code_cell_idx,\n    _replace_str,\n    download_taxi_dataset,\n    test_dataset_path,\n)\n\nlocal_notebooks_dir = \"examples/tutorial/jupyter/execution/pandas_on_ray/local\"\n\n\n# in this notebook user should replace 'import pandas as pd' with\n# 'import modin.pandas as pd' to make notebook work\ndef test_exercise_1():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_1_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_1.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    _replace_str(nb, \"import pandas as pd\", \"import modin.pandas as pd\")\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n\n\n# this notebook works \"as is\" but for testing purposes we can use smaller dataset\ndef test_exercise_2():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_2_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_2.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    _replace_str(\n        nb,\n        'path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"',\n        '# path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"',\n    )\n\n    new_optional_cell = f'path = \"{test_dataset_path}\"\\n' + download_taxi_dataset\n\n    optional_cell_idx = _find_code_cell_idx(nb, \"[Optional] Download data locally.\")\n    nb[\"cells\"][optional_cell_idx][\"source\"] = new_optional_cell\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n\n\n# in this notebook user should add custom mad implementation\n# to make notebook work\ndef test_exercise_3():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_3_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_3.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    user_mad_implementation = \"\"\"PandasQueryCompiler.sq_mad_custom = TreeReduce.register(lambda cell_value, **kwargs: cell_value ** 2,\n                                                             pandas.DataFrame.mad)\n\ndef sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):\n    if axis is None:\n        axis = 0\n\n    return self._reduce_dimension(\n        self._query_compiler.sq_mad_custom(\n            axis=axis, skipna=skipna, level=level, **kwargs\n        )\n    )\n\npd.DataFrame.sq_mad_custom = sq_mad_func\n\nmodin_mad_custom = df.sq_mad_custom()\n    \"\"\"\n\n    _replace_str(nb, \"modin_mad_custom = ...\", user_mad_implementation)\n\n    nbformat.write(nb, modified_notebook_path)\n    # need to update example, `.mad` doesn't exist\n    # _execute_notebook(modified_notebook_path)\n\n\n# this notebook works \"as is\" but for testing purposes we can use smaller dataset\ndef test_exercise_4():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_4_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_4.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    s3_path_cell = f's3_path = \"{test_dataset_path}\"\\n' + download_taxi_dataset\n    _replace_str(\n        nb,\n        's3_path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"',\n        s3_path_cell,\n    )\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/Dockerfile",
    "content": "FROM continuumio/miniconda3\n\nRUN conda env create -f jupyter_unidist_env.yml\nRUN conda install -c conda-forge psutil setproctitle\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/README.md",
    "content": "# Jupyter notebook examples to run with PandasOnUnidist\n\nCurrently, Modin supports `PandasOnUnidist` execution only with MPI backend of [unidist](https://github.com/modin-project/unidist).\nThere are some specifics on how to run a jupyter notebook with MPI, namely, you should use `mpiexec` command.\n\n```bash\nmpiexec -n 1 jupyter notebook\n```\n\n**Important**\n\nMPI is not reliable yet to work in interactive environment such as jupyter notebooks. Thus, some things may not work.\nFor example, if you are experiencing the error `The kernel appears to have died. It will restart automatically.`,\nyou may want to modify `kernel.json` file or create a new one in order to fix the problem.\n\nFor simplicity, you can just run `setup_kernel.py` script located in this directory. This will install a new MPI enabled kernel,\nwhich you can then select using the dropdown menu in your browser. Otherwise, you can follow the steps below:\n\n1. First, what you should do is locate `kernel.json` file with `jupyter kernelspec list` command. It should generally be like this.\n\n```bash\njupyter kernelspec list\n\nAvailable kernels:\n  python3    $PREFIX/share/jupyter/kernels/python3\n```\n\n`kernel.json` file should be located in `python3` folder.\n\n2. Second, you should make a copy of the `python3` folder, say to `python3mpi` folder.\n\n```bash\ncp -r $PREFIX/share/jupyter/kernels/python3 $PREFIX/share/jupyter/kernels/python3mpi\n```\n\n3. Third, modify `kernel.json` file in `python3mpi` folder to add `mpiexec -n 1` command\n(like \"mpiexec\", \"-n\", \"1\") to the beginning of the launched command (`argv`).\n\n4. Fourth, change `display_name` in `kernel.json` file to something like `Python 3 (ipykernel) with MPI`.\nThat way you can specifically select the Python kernel with MPI-enabled using the dropdown menu in your browser.\n\n## Run Jupyter Notebooks with PandasOnUnidist\n\nAfter the `setup_kernel.py` script is run or the steps above are done, you can run a jupyter notebook with `PandasOnUnidist` in a normal way.\n\n```bash\njupyter notebook\n```\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/jupyter_unidist_env.yml",
    "content": "name: jupyter_modin_on_unidist\nchannels:\n  - conda-forge\ndependencies:\n  - pip\n  - fsspec>=2022.11.0\n  - jupyterlab\n  - ipywidgets\n  - modin-mpi\n  - pip:\n      - modin[spreadsheet]\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_1.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 1: How to use Modin\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn how to import Modin to accelerate and scale pandas workflows.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Modin is a drop-in replacement for pandas that distributes the computation \\n\",\n    \"across all of the cores in your machine or in a cluster.\\n\",\n    \"In practical terms, this means that you can continue using the same pandas scripts\\n\",\n    \"as before and expect the behavior and results to be the same. The only thing that needs\\n\",\n    \"to change is the import statement. Normally, you would change:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import pandas as pd\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"to:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import modin.pandas as pd\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"Changing this line of code will allow you to use all of the cores in your machine to do computation on your data. One of the major performance bottlenecks of pandas is that it only uses a single core for any given computation. Modin exposes an API that is identical to pandas, allowing you to continue interacting with your data as you would with pandas. There are no additional commands required to use Modin locally. Partitioning, scheduling, data transfer, and other related concerns are all handled by Modin under the hood.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"<p style=\\\"text-align:left;\\\">\\n\",\n    \"        <h1>pandas on a multicore laptop\\n\",\n    \"    <span style=\\\"float:right;\\\">\\n\",\n    \"        Modin on a multicore laptop\\n\",\n    \"    </span>\\n\",\n    \"\\n\",\n    \"<div>\\n\",\n    \"<img align=\\\"left\\\" src=\\\"../../../img/pandas_multicore.png\\\"><img src=\\\"../../../img/modin_multicore.png\\\">\\n\",\n    \"</div>\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: setting Modin engine\\n\",\n    \"\\n\",\n    \"Modin uses Ray as an execution engine by default so no additional action is required to start to use it. Alternatively, if you need to use another engine, it should be specified either by setting the Modin config or by setting Modin environment variable before the first operation with Modin as it is shown below. Also, note that the full list of Modin configs and corresponding environment variables can be found in the [Modin Configuration Settings](https://modin.readthedocs.io/en/stable/flow/modin/config.html#modin-configs-list) section of the Modin documentation.\\n\",\n    \"\\n\",\n    \"One of the execution engines that Modin uses is Unidist. Currently, Modin only supports MPI through unidist, so it should be specified either by setting the Unidist config or by setting Unidist environment variable. The full list of Unidist configs and corresponding environment variables can be found in the [Unidist Configuration Settings](https://unidist.readthedocs.io/en/latest/flow/unidist/config.html#unidist-configuration-settings-list) section of the Unidist documentation.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Modin engine and Unidist backend can be specified either by config\\n\",\n    \"import modin.config as modin_cfg\\n\",\n    \"import unidist.config as unidist_cfg\\n\",\n    \"modin_cfg.Engine.put(\\\"unidist\\\")\\n\",\n    \"unidist_cfg.Backend.put(\\\"mpi\\\")\\n\",\n    \"\\n\",\n    \"# or by setting the environment variable\\n\",\n    \"# import os\\n\",\n    \"# os.environ[\\\"MODIN_ENGINE\\\"] = \\\"unidist\\\"\\n\",\n    \"# os.environ[\\\"UNIDIST_BACKEND\\\"] = \\\"mpi\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: Dataframe constructor\\n\",\n    \"\\n\",\n    \"Often when playing around in pandas, it is useful to create a DataFrame with the constructor. That is where we will start.\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas as pd\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\\n\",\n    \"df = pd.DataFrame(frame_data)\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"When creating a dataframe from a non-distributed object, it will take extra time to partition the data. When this is happening, you will see this message:\\n\",\n    \"\\n\",\n    \"```\\n\",\n    \"UserWarning: Distributing <class 'numpy.ndarray'> object. This may take some time.\\n\",\n    \"```\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Note: Do not change this code!\\n\",\n    \"import numpy as np\\n\",\n    \"import pandas\\n\",\n    \"import sys\\n\",\n    \"import modin\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"pandas.__version__\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"modin.__version__\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Implement your answer here. You are also free to play with the size\\n\",\n    \"# and shape of the DataFrame, but beware of exceeding your memory!\\n\",\n    \"\\n\",\n    \"# import pandas as pd\\n\",\n    \"import pandas as pd\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**5, 2**5))\\n\",\n    \"df = pd.DataFrame(frame_data)\\n\",\n    \"\\n\",\n    \"# ***** Do not change the code below! It verifies that \\n\",\n    \"# ***** the exercise has been done correctly. *****\\n\",\n    \"\\n\",\n    \"try:\\n\",\n    \"    assert df is not None\\n\",\n    \"    assert frame_data is not None\\n\",\n    \"    assert isinstance(frame_data, np.ndarray)\\n\",\n    \"except:\\n\",\n    \"    raise AssertionError(\\\"Don't change too much of the original code!\\\")\\n\",\n    \"assert \\\"modin.pandas\\\" in sys.modules, \\\"Not quite correct. Remember the single line of code change (See above)\\\"\\n\",\n    \"\\n\",\n    \"import modin.pandas\\n\",\n    \"assert pd == modin.pandas, \\\"Remember the single line of code change (See above)\\\"\\n\",\n    \"assert hasattr(df, \\\"_query_compiler\\\"), \\\"Make sure that `df` is a modin.pandas DataFrame.\\\"\\n\",\n    \"\\n\",\n    \"print(\\\"Success! You only need to change one line of code!\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Now that we have created a toy example for playing around with the DataFrame, let's print it out in different ways.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for Exercise: Data Interaction and Printing\\n\",\n    \"\\n\",\n    \"When interacting with data, it is very imporant to look at different parts of the data (e.g. `df.head()`). Here we will show that you can print the modin.pandas DataFrame in the same ways you would pandas.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Print the first 10 lines.\\n\",\n    \"df.head(10)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Print the DataFrame.\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# Free cell for custom interaction (Play around here!)\\n\",\n    \"df.add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"vscode\": {\n     \"languageId\": \"python\"\n    }\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"df.count()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Please move on to [Exercise 2](./exercise_2.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel) with MPI\",\n   \"language\": \"python\",\n   \"name\": \"python3mpi\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_2.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 2: Speed improvements\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn about common functionality that Modin speeds up by using all of your machine's cores.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for Exercise: `read_csv` speedups\\n\",\n    \"\\n\",\n    \"The most commonly used data ingestion method used in pandas is CSV files (link to pandas survey). This concept is designed to give an idea of the kinds of speedups possible, even on a non-distributed filesystem. Modin also supports other file formats for parallel and distributed reads, which can be found in the documentation. We will import both Modin and pandas so that the speedups are evident.\\n\",\n    \"\\n\",\n    \"**Note: Rerunning the `read_csv` cells many times may result in degraded performance, depending on the memory of the machine**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import time\\n\",\n    \"from IPython.display import Markdown, display\\n\",\n    \"\\n\",\n    \"def printmd(string):\\n\",\n    \"    display(Markdown(string))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Dataset: 2015 NYC taxi trip data\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"We will be using a version of this data already in S3, originally posted in this blog post: https://matthewrocklin.com/blog/work/2017/01/12/dask-dataframes\\n\",\n    \"\\n\",\n    \"**Size: ~1.8GB**\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"path = \\\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Modin execution engine setting:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.config as modin_cfg\\n\",\n    \"import unidist.config as unidist_cfg\\n\",\n    \"modin_cfg.Engine.put(\\\"unidist\\\")\\n\",\n    \"unidist_cfg.Backend.put(\\\"mpi\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## `pandas.read_csv`\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_df = pandas.read_csv(path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to read with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Expect pandas to take >3 minutes on EC2, longer locally\\n\",\n    \"\\n\",\n    \"This is a good time to chat with your neighbor\\n\",\n    \"Dicussion topics\\n\",\n    \"- Do you work with a large amount of data daily?\\n\",\n    \"- How big is your data?\\n\",\n    \"- What’s the common use case of your data?\\n\",\n    \"- Do you use any big data analytics tools?\\n\",\n    \"- Do you use any interactive analytics tool?\\n\",\n    \"- What’s are some drawbacks of your current interative analytic tools today?\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## `modin.pandas.read_csv`\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_df = pd.read_csv(path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to read with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `read_csv`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Reduces\\n\",\n    \"\\n\",\n    \"In pandas, a reduce would be something along the lines of a `sum` or `count`. It computes some summary statistics about the rows or columns. We will be using `count`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_count = pandas_df.count()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"\\n\",\n    \"print(\\\"Time to count with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_count = modin_df.count()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to count with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `count`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_count\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_count\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Map operations\\n\",\n    \"\\n\",\n    \"In pandas, map operations are operations that do a single pass over the data and do not change its shape. Operations like `isnull` and `applymap` are included in this. We will be using `isnull`.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"pandas_isnull = pandas_df.isnull()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"\\n\",\n    \"print(\\\"Time to isnull with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_isnull = modin_df.isnull()\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to isnull with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `isnull`!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_isnull\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_isnull\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Apply over a single column\\n\",\n    \"\\n\",\n    \"Sometimes we want to compute some summary statistics on a single column from our dataset.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"rounded_trip_distance_pandas = pandas_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to groupby with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"rounded_trip_distance_modin = modin_df[\\\"trip_distance\\\"].apply(round)\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to add a column with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas at `apply` on one column!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"rounded_trip_distance_pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"rounded_trip_distance_modin\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Add a column\\n\",\n    \"\\n\",\n    \"It is common to need to add a new column to an existing dataframe, here we show that this is significantly faster in Modin due to metadata management and an efficient zero copy implementation.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"pandas_df[\\\"rounded_trip_distance\\\"] = rounded_trip_distance_pandas\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"Time to groupby with pandas: {} seconds\\\".format(round(pandas_duration, 3)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"modin_df[\\\"rounded_trip_distance\\\"] = rounded_trip_distance_modin\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Time to add a column with Modin: {} seconds\\\".format(round(modin_duration, 3)))\\n\",\n    \"\\n\",\n    \"printmd(\\\"### Modin is {}x faster than pandas add a column!\\\".format(round(pandas_duration / modin_duration, 2)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Are they equal?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Please move on to [Exercise 3](./exercise_3.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n    \"kernelspec\": {\n     \"display_name\": \"Python 3 (ipykernel) with MPI\",\n     \"language\": \"python\",\n     \"name\": \"python3mpi\"\n    },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_3.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 3: Not Implemented\\n\",\n    \"\\n\",\n    \"**GOAL**: Learn what happens when a function is not yet supported in Modin as well as how to extend Modin's functionality using the DataFrame Algebra.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"When functionality has not yet been implemented, we default to pandas\\n\",\n    \"\\n\",\n    \"![](../../../img/convert_to_pandas.png)\\n\",\n    \"\\n\",\n    \"We convert a Modin dataframe to pandas to do the operation, then convert it back once it is finished. These operations will have a high overhead due to the communication involved and will take longer than pandas.\\n\",\n    \"\\n\",\n    \"When this is happening, a warning will be given to the user to inform them that this operation will take longer than usual. For example, `DataFrame.mask` is not yet implemented. In this case, when a user tries to use it, they will see this warning:\\n\",\n    \"\\n\",\n    \"```\\n\",\n    \"UserWarning: `DataFrame.mask` defaulting to pandas implementation.\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Default to pandas\\n\",\n    \"\\n\",\n    \"In this section of the exercise we will see first-hand how the runtime is affected by operations that are not implemented.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import pandas\\n\",\n    \"import numpy as np\\n\",\n    \"import time\\n\",\n    \"import modin.config as modin_cfg\\n\",\n    \"import unidist.config as unidist_cfg\\n\",\n    \"modin_cfg.Engine.put(\\\"unidist\\\")\\n\",\n    \"unidist_cfg.Backend.put(\\\"mpi\\\")\\n\",\n    \"\\n\",\n    \"frame_data = np.random.randint(0, 100, size=(2**18, 2**8))\\n\",\n    \"df = pd.DataFrame(frame_data).add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_df = pandas.DataFrame(frame_data).add_prefix(\\\"col\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.mask(df < 50))\\n\",\n    \"\\n\",\n    \"modin_end = time.time()\\n\",\n    \"print(\\\"Modin mask took {} seconds.\\\".format(round(modin_end - modin_start, 4)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"pandas_start = time.time()\\n\",\n    \"\\n\",\n    \"print(pandas_df.mask(pandas_df < 50))\\n\",\n    \"\\n\",\n    \"pandas_end = time.time()\\n\",\n    \"print(\\\"pandas mask took {} seconds.\\\".format(round(pandas_end - pandas_start, 4)))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Concept for exercise: Register custom functions\\n\",\n    \"\\n\",\n    \"Modin's user-facing API is pandas, but it is possible that we do not yet support your favorite or most-needed functionalities. Your user-defined function may also be able to be executed more efficiently if you pre-define the type of function it is (e.g. map, reduce, etc.) using the DataFrame Algebra. To solve either case, it is possible to register a custom function to be applied to your data.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Registering a custom function for all query compilers\\n\",\n    \"\\n\",\n    \"To register a custom function for a query compiler, we first need to import it:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"The `PandasQueryCompiler` is responsible for defining and compiling the queries that can be operated on by Modin, and is specific to the pandas storage format. Any queries defined here must also both be compatible with and result in a `pandas.DataFrame`. Many functionalities are very simply implemented, as you can see in the current code: [Link](https://github.com/modin-project/modin/blob/7a8158873e77cb5f1a5a3b89be4ddac89f576269/modin/core/storage_formats/pandas/query_compiler.py#L216).\\n\",\n    \"\\n\",\n    \"If we want to register a new function, we need to understand what kind of function it is. In our example, we will try to implement a `kurtosis` on the unary negation of the values in the dataframe, which is a map (unargy negation of each cell) followed by a reduce. So we next want to import the function type so we can use it in our definition:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"from modin.core.dataframe.algebra import TreeReduce\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"Then we can just use the `TreeReduce.register` `classmethod` and assign it to the `PandasQueryCompiler`:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"PandasQueryCompiler.neg_kurtosis = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value, pandas.DataFrame.kurtosis)\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"We include `**kwargs` to the `lambda` function since the query compiler will pass all keyword arguments to both the map and reduce functions.\\n\",\n    \"\\n\",\n    \"Finally, we want a handle to it from the `DataFrame`, so we need to create a way to do that:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"def neg_kurtosis_func(self, **kwargs):\\n\",\n    \"    # The constructor allows you to pass in a query compiler as a keyword argument\\n\",\n    \"    return self.__constructor__(query_compiler=self._query_compiler.neg_kurtosis(**kwargs))\\n\",\n    \"\\n\",\n    \"pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\\n\",\n    \"```\\n\",\n    \"\\n\",\n    \"And then you can use it like you usually would:\\n\",\n    \"\\n\",\n    \"```python\\n\",\n    \"df.neg_kurtosis_custom()\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\\n\",\n    \"from modin.core.dataframe.algebra import TreeReduce\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"PandasQueryCompiler.neg_kurtosis_custom = TreeReduce.register(lambda cell_value, **kwargs: ~cell_value,\\n\",\n    \"                                                             pandas.DataFrame.kurtosis)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from pandas._libs import lib\\n\",\n    \"# The function signature came from the pandas documentation:\\n\",\n    \"# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.kurtosis.html\\n\",\n    \"def neg_kurtosis_func(self, axis=lib.no_default, skipna=True, level=None, numeric_only=None, **kwargs):\\n\",\n    \"    # We need to specify the axis for the query compiler\\n\",\n    \"    if axis in [None, lib.no_default]:\\n\",\n    \"        axis = 0\\n\",\n    \"    # The constructor allows you to pass in a query compiler as a keyword argument\\n\",\n    \"    # Reduce dimension is used for reduces\\n\",\n    \"    # We also pass all keyword arguments here to ensure correctness\\n\",\n    \"    return self._reduce_dimension(\\n\",\n    \"        self._query_compiler.neg_kurtosis_custom(\\n\",\n    \"            axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs\\n\",\n    \"        )\\n\",\n    \"    )\\n\",\n    \"\\n\",\n    \"pd.DataFrame.neg_kurtosis_custom = neg_kurtosis_func\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Speed improvements\\n\",\n    \"If we were to try and replicate this functionality using the pandas API, we would need to call `df.applymap` with our unary negation function, and subsequently `df.kurtosis` on the result of the first call. Let's see how this compares with our new, custom function!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"print(pandas_df.applymap(lambda cell_value: ~cell_value).kurtosis())\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"pandas_duration = end - start\\n\",\n    \"print(\\\"pandas unary negation kurtosis took {} seconds.\\\".format(pandas_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.applymap(lambda x: ~x).kurtosis())\\n\",\n    \"\\n\",\n    \"end = time.time()\\n\",\n    \"modin_duration = end - start\\n\",\n    \"print(\\\"Modin unary negation kurtosis took {} seconds.\\\".format(modin_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"custom_start = time.time()\\n\",\n    \"\\n\",\n    \"print(df.neg_kurtosis_custom())\\n\",\n    \"\\n\",\n    \"custom_end = time.time()\\n\",\n    \"modin_custom_duration = custom_end - custom_start\\n\",\n    \"print(\\\"Modin neg_kurtosis_custom took {} seconds.\\\".format(modin_custom_duration))\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from IPython.display import Markdown, display\\n\",\n    \"\\n\",\n    \"display(Markdown(\\\"### As expected, Modin is {}x faster than pandas when chaining the functions; however we see that our custom function is even faster than that - beating pandas by {}x, and Modin (when chaining the functions) by {}x!\\\".format(round(pandas_duration / modin_duration, 2), round(pandas_duration / modin_custom_duration, 2), round(modin_duration / modin_custom_duration, 2))))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Congratulations! You have just implemented new DataFrame functionality!\\n\",\n    \"\\n\",\n    \"## Consider opening a pull request: https://github.com/modin-project/modin/pulls\\n\",\n    \"\\n\",\n    \"For a complete list of what is implemented, see the [Supported APIs](https://modin.readthedocs.io/en/latest/supported_apis/index.html) section.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Test your knowledge: Add a custom function for another tree reduce: finding `DataFrame.mad` after squaring all of the values\\n\",\n    \"\\n\",\n    \"See the pandas documentation for the correct signature: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mad.html\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"modin_mad_custom_start = time.time()\\n\",\n    \"\\n\",\n    \"# Implement your function here! Put the result of your custom squared `mad` in the variable `modin_mad_custom`\\n\",\n    \"# Hint: Look at the kurtosis walkthrough above\\n\",\n    \"\\n\",\n    \"modin_mad_custom = ...\\n\",\n    \"print(modin_mad_custom)\\n\",\n    \"\\n\",\n    \"modin_mad_custom_end = time.time()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Evaluation code, do not change!\\n\",\n    \"modin_mad_start = time.time()\\n\",\n    \"modin_mad = df.applymap(lambda x: x**2).mad()\\n\",\n    \"print(modin_mad)\\n\",\n    \"modin_mad_end = time.time()\\n\",\n    \"\\n\",\n    \"assert modin_mad_end - modin_mad_start > modin_mad_custom_end - modin_mad_custom_start, \\\\\\n\",\n    \"    \\\"Your implementation was too slow, or you used the chaining functions approach. Try again\\\"\\n\",\n    \"assert modin_mad._to_pandas().equals(modin_mad_custom._to_pandas()), \\\"Your result did not match the result of chaining the functions, try again\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Now that you are able to create custom functions, you know enough to contribute to Modin!\\n\",\n    \"\\n\",\n    \"**Please move on to [Exercise 4](./exercise_4.ipynb) when you are ready**\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel) with MPI\",\n   \"language\": \"python\",\n   \"name\": \"python3mpi\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/local/exercise_4.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"99f41d2d\",\n   \"metadata\": {},\n   \"source\": [\n    \"![LOGO](../../../img/MODIN_ver2_hrz.png)\\n\",\n    \"\\n\",\n    \"<center><h2>Scale your pandas workflows by changing one line of code</h2>\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"fdda1c9c\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 4: Experimental Features\\n\",\n    \"\\n\",\n    \"**GOAL**: Explore some of the experimental features being added to Modin.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"e7bf87a5\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Concept for exercise: Spreadsheet\\n\",\n    \"\\n\",\n    \"For those who have worked with Excel, the Spreadsheet API will definitely feel familiar! The Spreadsheet API is a Jupyter notebook widget that allows us to interact with Modin DataFrames in a spreadsheet-like fashion while taking advantage of the underlying capabilities of Modin. The widget makes it quick and easy to explore, sort, filter, and edit data as well as export the changes as reproducible code.\\n\",\n    \"\\n\",\n    \"Let's look back at a subset of the 2015 NYC Taxi Data from Exercise 2, and see how the Spreadsheet API can make it easy to play with the data!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"5d5c4a3e\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"!jupyter nbextension enable --py --sys-prefix modin_spreadsheet\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"dc8d5903\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import modin.pandas as pd\\n\",\n    \"import modin.experimental.spreadsheet as mss\\n\",\n    \"import modin.config as modin_cfg\\n\",\n    \"import unidist.config as unidist_cfg\\n\",\n    \"modin_cfg.Engine.put(\\\"unidist\\\")\\n\",\n    \"unidist_cfg.Backend.put(\\\"mpi\\\")\\n\",\n    \"\\n\",\n    \"s3_path = \\\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\\\"\\n\",\n    \"modin_df = pd.read_csv(s3_path, parse_dates=[\\\"tpep_pickup_datetime\\\", \\\"tpep_dropoff_datetime\\\"], quoting=3, nrows=1000)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"145e7bbe\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"spreadsheet = mss.from_dataframe(modin_df)\\n\",\n    \"spreadsheet\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"3c18b7f2\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Thank you for participating!\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel) with MPI\",\n   \"language\": \"python\",\n   \"name\": \"python3mpi\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.12\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/setup_kernel.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport sys\n\nfrom ipykernel import kernelspec\n\ndefault_make_ipkernel_cmd = kernelspec.make_ipkernel_cmd\n\n\ndef custom_make_ipkernel_cmd(*args, **kwargs):\n    \"\"\"\n    Build modified Popen command list for launching an IPython kernel with MPI.\n\n    Parameters\n    ----------\n    *args : iterable\n        Additional positional arguments to be passed in `default_make_ipkernel_cmd`.\n    **kwargs : dict\n        Additional keyword arguments to be passed in `default_make_ipkernel_cmd`.\n\n    Returns\n    -------\n    array\n        A Popen command list.\n\n    Notes\n    -----\n    The parameters of the function should be kept in sync with the ones of the original function.\n    \"\"\"\n    mpi_arguments = [\"mpiexec\", \"-n\", \"1\"]\n    arguments = default_make_ipkernel_cmd(*args, **kwargs)\n    return mpi_arguments + arguments\n\n\nkernelspec.make_ipkernel_cmd = custom_make_ipkernel_cmd\n\nif __name__ == \"__main__\":\n    kernel_name = \"python3mpi\"\n    display_name = \"Python 3 (ipykernel) with MPI\"\n    dest = kernelspec.install(\n        kernel_name=kernel_name, display_name=display_name, prefix=sys.prefix\n    )\n    print(f\"Installed kernelspec {kernel_name} in {dest}\")  # noqa: T201\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/pandas_on_unidist/test/test_notebooks.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport os\nimport sys\n\nimport nbformat\n\nMODIN_DIR = os.path.abspath(\n    os.path.join(os.path.dirname(__file__), *[\"..\" for _ in range(6)])\n)\nsys.path.insert(0, MODIN_DIR)\nfrom examples.tutorial.jupyter.execution.test.utils import (  # noqa: E402\n    _execute_notebook,\n    _replace_str,\n    download_taxi_dataset,\n    set_kernel,\n    test_dataset_path,\n)\n\n# the kernel name \"python3mpi\" must match the one\n# that is set up in `examples/tutorial/jupyter/execution/pandas_on_unidist/setup_kernel.py`\n# for `Unidist` engine\nset_kernel(kernel_name=\"python3mpi\")\n\nlocal_notebooks_dir = \"examples/tutorial/jupyter/execution/pandas_on_unidist/local\"\n\n\n# in this notebook user should replace 'import pandas as pd' with\n# 'import modin.pandas as pd' to make notebook work\ndef test_exercise_1():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_1_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_1.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    _replace_str(nb, \"import pandas as pd\", \"import modin.pandas as pd\")\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n\n\n# this notebook works \"as is\" but for testing purposes we can use smaller dataset\ndef test_exercise_2():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_2_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_2.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    new_cell = f'path = \"{test_dataset_path}\"\\n' + download_taxi_dataset\n\n    _replace_str(\n        nb,\n        'path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"',\n        new_cell,\n    )\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n\n\n# in this notebook user should add custom mad implementation\n# to make notebook work\ndef test_exercise_3():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_3_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_3.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    user_mad_implementation = \"\"\"PandasQueryCompiler.sq_mad_custom = TreeReduce.register(lambda cell_value, **kwargs: cell_value ** 2,\n                                                             pandas.DataFrame.mad)\n\ndef sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):\n    if axis is None:\n        axis = 0\n\n    return self._reduce_dimension(\n        self._query_compiler.sq_mad_custom(\n            axis=axis, skipna=skipna, level=level, **kwargs\n        )\n    )\n\npd.DataFrame.sq_mad_custom = sq_mad_func\n\nmodin_mad_custom = df.sq_mad_custom()\n    \"\"\"\n\n    _replace_str(nb, \"modin_mad_custom = ...\", user_mad_implementation)\n\n    nbformat.write(nb, modified_notebook_path)\n    # need to update example, `.mad` doesn't exist\n    # _execute_notebook(modified_notebook_path)\n\n\n# this notebook works \"as is\" but for testing purposes we can use smaller dataset\ndef test_exercise_4():\n    modified_notebook_path = os.path.join(local_notebooks_dir, \"exercise_4_test.ipynb\")\n    nb = nbformat.read(\n        os.path.join(local_notebooks_dir, \"exercise_4.ipynb\"),\n        as_version=nbformat.NO_CONVERT,\n    )\n\n    s3_path_cell = f's3_path = \"{test_dataset_path}\"\\n' + download_taxi_dataset\n    _replace_str(\n        nb,\n        's3_path = \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\"',\n        s3_path_cell,\n    )\n\n    nbformat.write(nb, modified_notebook_path)\n    _execute_notebook(modified_notebook_path)\n"
  },
  {
    "path": "examples/tutorial/jupyter/execution/test/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport nbformat\nfrom nbconvert.preprocessors import ExecutePreprocessor\n\ntest_dataset_path = \"taxi.csv\"\ndownload_taxi_dataset = f\"\"\"import os\nimport urllib.request\nif not os.path.exists(\"{test_dataset_path}\"):\n    url_path = \"https://modin-datasets.intel.com/testing/yellow_tripdata_2015-01.csv\"\n    urllib.request.urlretrieve(url_path, \"{test_dataset_path}\")\n    \"\"\"\n\n\n# Default kernel name for ``ExecutePreprocessor`` to be created\n_default_kernel_name = \"python3\"\n\n\ndef set_kernel(kernel_name):\n    \"\"\"\n    Set custom kernel for ``ExecutePreprocessor`` to be created.\n\n    Parameters\n    ----------\n    kernel_name : str\n        Kernel name.\n    \"\"\"\n    global _default_kernel_name\n    _default_kernel_name = kernel_name\n\n\ndef make_execute_preprocessor():\n    \"\"\"\n    Make ``ExecutePreprocessor`` with the `_default_kernel_name`.\n\n    Returns\n    -------\n    nbconvert.preprocessors.ExecutePreprocessor\n        Execute processor entity.\n\n    Notes\n    -----\n    Note that `_default_kernel_name` can be changed for the concrete executions\n    (e.g., ``PandasOnUnidist`` with MPI backend).\n    \"\"\"\n    return ExecutePreprocessor(timeout=600, kernel_name=_default_kernel_name)\n\n\ndef _execute_notebook(notebook):\n    \"\"\"\n    Execute a jupyter notebook.\n\n    Parameters\n    ----------\n    notebook : file-like or str\n        File-like object or path to the notebook to execute.\n    \"\"\"\n    nb = nbformat.read(notebook, as_version=nbformat.NO_CONVERT)\n    ep = make_execute_preprocessor()\n    ep.preprocess(nb)\n\n\ndef _find_code_cell_idx(nb, identifier):\n    \"\"\"\n    Find code cell index by provided ``identifier``.\n\n    Parameters\n    ----------\n    nb : dict\n        Dictionary representation of the notebook to look for.\n    identifier : str\n        Unique string which target code cell should contain.\n\n    Returns\n    -------\n    int\n        Code cell index by provided ``identifier``.\n\n    Notes\n    -----\n    Assertion will be raised if ``identifier`` is found in\n    several code cells or isn't found at all.\n    \"\"\"\n    import_cell_idx = [\n        idx\n        for idx, cell in enumerate(nb[\"cells\"])\n        if cell[\"cell_type\"] == \"code\" and identifier in cell[\"source\"]\n    ]\n    assert len(import_cell_idx) == 1\n    return import_cell_idx[0]\n\n\ndef _replace_str(nb, original_str, str_to_replace):\n    \"\"\"\n    Replace ``original_str`` with ``str_to_replace`` in the provided notebook.\n\n    Parameters\n    ----------\n    nb : dict\n        Dictionary representation of the notebook which requires replacement.\n    original_str : str\n        Original string which should be replaced.\n    str_to_replace : str\n        String to replace original string.\n\n    Notes\n    -----\n    Assertion will be raised if ``original_str`` is found in\n    several code cells or isn't found at all.\n    \"\"\"\n    import_cell_idx = _find_code_cell_idx(nb, original_str)\n    nb[\"cells\"][import_cell_idx][\"source\"] = nb[\"cells\"][import_cell_idx][\n        \"source\"\n    ].replace(original_str, str_to_replace)\n"
  },
  {
    "path": "modin/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport warnings\nfrom typing import Any, Optional, Tuple, Type, Union\n\nfrom . import _version\n\n\ndef custom_formatwarning(\n    message: Union[Warning, str],\n    category: Type[Warning],\n    filename: str,\n    lineno: int,\n    line: Optional[str] = None,\n) -> str:\n    # ignore everything except the message\n    return \"{}: {}\\n\".format(category.__name__, message)\n\n\nwarnings.formatwarning = custom_formatwarning\n# Filter numpy version warnings because they are not relevant\nwarnings.filterwarnings(\"ignore\", message=\"numpy.dtype size changed\")\nwarnings.filterwarnings(\"ignore\", message=\"Large object of size\")\n\n\ndef set_execution(engine: Any = None, storage_format: Any = None) -> Tuple[Any, Any]:\n    \"\"\"\n    Method to set the _pair_ of execution engine and storage format format simultaneously.\n    This is needed because there might be cases where switching one by one would be\n    impossible, as not all pairs of values are meaningful.\n\n    The method returns pair of old values, so it is easy to return back.\n    \"\"\"\n    from .config import Backend, Engine, Execution, StorageFormat\n\n    old_engine, old_storage_format = None, None\n    # defer callbacks until both entities are set\n    if engine is not None:\n        old_engine = Engine._put_nocallback(engine)\n    if storage_format is not None:\n        old_storage_format = StorageFormat._put_nocallback(storage_format)\n    # execute callbacks if something was changed\n    if old_engine is not None:\n        Engine._check_callbacks(old_engine)\n    if old_storage_format is not None:\n        StorageFormat._check_callbacks(old_storage_format)\n    old_backend = Backend.get()\n    Backend._put_nocallback(\n        Backend.get_backend_for_execution(\n            Execution(engine=Engine.get(), storage_format=StorageFormat.get())\n        )\n    )\n    Backend._check_callbacks(old_backend)\n\n    return old_engine, old_storage_format\n\n\n__version__ = _version.get_versions()[\"version\"]\n"
  },
  {
    "path": "modin/__main__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Command-line interface piece, called when user issues \"python -m modin --foo\".\"\"\"\n\nimport argparse\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(\n        \"python -m modin\",\n        description=\"Drop-in pandas replacement; refer to https://modin.readthedocs.io/ for details.\",\n    )\n    parser.add_argument(\n        \"--versions\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show versions of all known components\",\n    )\n\n    args = parser.parse_args()\n    if args.versions:\n        from modin.utils import show_versions\n\n        show_versions()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "modin/_version.py",
    "content": "# This file helps to compute a version number in source trees obtained from\n# git-archive tarball (such as those provided by githubs download-from-tag\n# feature). Distribution tarballs (built by setup.py sdist) and build\n# directories (produced by setup.py build) will contain a much shorter file\n# that just contains the computed version number.\n\n# This file is released into the public domain.\n# Generated by versioneer-0.29\n# https://github.com/python-versioneer/python-versioneer\n\n\"\"\"Git implementation of _version.py.\"\"\"\n\nimport errno\nimport functools\nimport os\nimport re\nimport subprocess\nimport sys\nfrom typing import Any, Callable, Dict, List, Optional, Tuple\n\n\ndef get_keywords() -> Dict[str, str]:\n    \"\"\"Get the keywords needed to look up the version information.\"\"\"\n    # these strings will be replaced by git during git-archive.\n    # setup.py/versioneer.py will grep for the variable names, so they must\n    # each be defined on a line of their own. _version.py will just call\n    # get_keywords().\n    git_refnames = \"$Format:%d$\"\n    git_full = \"$Format:%H$\"\n    git_date = \"$Format:%ci$\"\n    keywords = {\"refnames\": git_refnames, \"full\": git_full, \"date\": git_date}\n    return keywords\n\n\nclass VersioneerConfig:\n    \"\"\"Container for Versioneer configuration parameters.\"\"\"\n\n    VCS: str\n    style: str\n    tag_prefix: str\n    parentdir_prefix: str\n    versionfile_source: str\n    verbose: bool\n\n\ndef get_config() -> VersioneerConfig:\n    \"\"\"Create, populate and return the VersioneerConfig() object.\"\"\"\n    # these strings are filled in when 'setup.py versioneer' creates\n    # _version.py\n    cfg = VersioneerConfig()\n    cfg.VCS = \"git\"\n    cfg.style = \"pep440\"\n    cfg.tag_prefix = \"\"\n    cfg.parentdir_prefix = \"modin-\"\n    cfg.versionfile_source = \"modin/_version.py\"\n    cfg.verbose = False\n    return cfg\n\n\nclass NotThisMethod(Exception):\n    \"\"\"Exception raised if a method is not valid for the current scenario.\"\"\"\n\n\nLONG_VERSION_PY: Dict[str, str] = {}\nHANDLERS: Dict[str, Dict[str, Callable]] = {}\n\n\ndef register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator\n    \"\"\"Create decorator to mark a method as the handler of a VCS.\"\"\"\n\n    def decorate(f: Callable) -> Callable:\n        \"\"\"Store f in HANDLERS[vcs][method].\"\"\"\n        if vcs not in HANDLERS:\n            HANDLERS[vcs] = {}\n        HANDLERS[vcs][method] = f\n        return f\n\n    return decorate\n\n\ndef run_command(\n    commands: List[str],\n    args: List[str],\n    cwd: Optional[str] = None,\n    verbose: bool = False,\n    hide_stderr: bool = False,\n    env: Optional[Dict[str, str]] = None,\n) -> Tuple[Optional[str], Optional[int]]:\n    \"\"\"Call the given command(s).\"\"\"\n    assert isinstance(commands, list)\n    process = None\n\n    popen_kwargs: Dict[str, Any] = {}\n    if sys.platform == \"win32\":\n        # This hides the console window if pythonw.exe is used\n        startupinfo = subprocess.STARTUPINFO()\n        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW\n        popen_kwargs[\"startupinfo\"] = startupinfo\n\n    for command in commands:\n        try:\n            dispcmd = str([command] + args)\n            # remember shell=False, so use git.cmd on windows, not just git\n            process = subprocess.Popen(\n                [command] + args,\n                cwd=cwd,\n                env=env,\n                stdout=subprocess.PIPE,\n                stderr=(subprocess.PIPE if hide_stderr else None),\n                **popen_kwargs,\n            )\n            break\n        except OSError as e:\n            if e.errno == errno.ENOENT:\n                continue\n            if verbose:\n                print(\"unable to run %s\" % dispcmd)\n                print(e)\n            return None, None\n    else:\n        if verbose:\n            print(\"unable to find command, tried %s\" % (commands,))\n        return None, None\n    stdout = process.communicate()[0].strip().decode()\n    if process.returncode != 0:\n        if verbose:\n            print(\"unable to run %s (error)\" % dispcmd)\n            print(\"stdout was %s\" % stdout)\n        return None, process.returncode\n    return stdout, process.returncode\n\n\ndef versions_from_parentdir(\n    parentdir_prefix: str,\n    root: str,\n    verbose: bool,\n) -> Dict[str, Any]:\n    \"\"\"Try to determine the version from the parent directory name.\n\n    Source tarballs conventionally unpack into a directory that includes both\n    the project name and a version string. We will also support searching up\n    two directory levels for an appropriately named parent directory\n    \"\"\"\n    rootdirs = []\n\n    for _ in range(3):\n        dirname = os.path.basename(root)\n        if dirname.startswith(parentdir_prefix):\n            return {\n                \"version\": dirname[len(parentdir_prefix) :],\n                \"full-revisionid\": None,\n                \"dirty\": False,\n                \"error\": None,\n                \"date\": None,\n            }\n        rootdirs.append(root)\n        root = os.path.dirname(root)  # up a level\n\n    if verbose:\n        print(\n            \"Tried directories %s but none started with prefix %s\"\n            % (str(rootdirs), parentdir_prefix)\n        )\n    raise NotThisMethod(\"rootdir doesn't start with parentdir_prefix\")\n\n\n@register_vcs_handler(\"git\", \"get_keywords\")\ndef git_get_keywords(versionfile_abs: str) -> Dict[str, str]:\n    \"\"\"Extract version information from the given file.\"\"\"\n    # the code embedded in _version.py can just fetch the value of these\n    # keywords. When used from setup.py, we don't want to import _version.py,\n    # so we do it with a regexp instead. This function is not used from\n    # _version.py.\n    keywords: Dict[str, str] = {}\n    try:\n        with open(versionfile_abs, \"r\") as fobj:\n            for line in fobj:\n                if line.strip().startswith(\"git_refnames =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"refnames\"] = mo.group(1)\n                if line.strip().startswith(\"git_full =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"full\"] = mo.group(1)\n                if line.strip().startswith(\"git_date =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"date\"] = mo.group(1)\n    except OSError:\n        pass\n    return keywords\n\n\n@register_vcs_handler(\"git\", \"keywords\")\ndef git_versions_from_keywords(\n    keywords: Dict[str, str],\n    tag_prefix: str,\n    verbose: bool,\n) -> Dict[str, Any]:\n    \"\"\"Get version information from git keywords.\"\"\"\n    if \"refnames\" not in keywords:\n        raise NotThisMethod(\"Short version file found\")\n    date = keywords.get(\"date\")\n    if date is not None:\n        # Use only the last line.  Previous lines may contain GPG signature\n        # information.\n        date = date.splitlines()[-1]\n\n        # git-2.2.0 added \"%cI\", which expands to an ISO-8601 -compliant\n        # datestamp. However we prefer \"%ci\" (which expands to an \"ISO-8601\n        # -like\" string, which we must then edit to make compliant), because\n        # it's been around since git-1.5.3, and it's too difficult to\n        # discover which version we're using, or to work around using an\n        # older one.\n        date = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n    refnames = keywords[\"refnames\"].strip()\n    if refnames.startswith(\"$Format\"):\n        if verbose:\n            print(\"keywords are unexpanded, not using\")\n        raise NotThisMethod(\"unexpanded keywords, not a git-archive tarball\")\n    refs = {r.strip() for r in refnames.strip(\"()\").split(\",\")}\n    # starting in git-1.8.3, tags are listed as \"tag: foo-1.0\" instead of\n    # just \"foo-1.0\". If we see a \"tag: \" prefix, prefer those.\n    TAG = \"tag: \"\n    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}\n    if not tags:\n        # Either we're using git < 1.8.3, or there really are no tags. We use\n        # a heuristic: assume all version tags have a digit. The old git %d\n        # expansion behaves like git log --decorate=short and strips out the\n        # refs/heads/ and refs/tags/ prefixes that would let us distinguish\n        # between branches and tags. By ignoring refnames without digits, we\n        # filter out many common branch names like \"release\" and\n        # \"stabilization\", as well as \"HEAD\" and \"master\".\n        tags = {r for r in refs if re.search(r\"\\d\", r)}\n        if verbose:\n            print(\"discarding '%s', no digits\" % \",\".join(refs - tags))\n    if verbose:\n        print(\"likely tags: %s\" % \",\".join(sorted(tags)))\n    for ref in sorted(tags):\n        # sorting will prefer e.g. \"2.0\" over \"2.0rc1\"\n        if ref.startswith(tag_prefix):\n            r = ref[len(tag_prefix) :]\n            # Filter out refs that exactly match prefix or that don't start\n            # with a number once the prefix is stripped (mostly a concern\n            # when prefix is '')\n            if not re.match(r\"\\d\", r):\n                continue\n            if verbose:\n                print(\"picking %s\" % r)\n            return {\n                \"version\": r,\n                \"full-revisionid\": keywords[\"full\"].strip(),\n                \"dirty\": False,\n                \"error\": None,\n                \"date\": date,\n            }\n    # no suitable tags, so version is \"0+unknown\", but full hex is still there\n    if verbose:\n        print(\"no suitable tags, using unknown + full revision id\")\n    return {\n        \"version\": \"0+unknown\",\n        \"full-revisionid\": keywords[\"full\"].strip(),\n        \"dirty\": False,\n        \"error\": \"no suitable tags\",\n        \"date\": None,\n    }\n\n\n@register_vcs_handler(\"git\", \"pieces_from_vcs\")\ndef git_pieces_from_vcs(\n    tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command\n) -> Dict[str, Any]:\n    \"\"\"Get version from 'git describe' in the root of the source tree.\n\n    This only gets called if the git-archive 'subst' keywords were *not*\n    expanded, and _version.py hasn't already been rewritten with a short\n    version string, meaning we're inside a checked out source tree.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n\n    # GIT_DIR can interfere with correct operation of Versioneer.\n    # It may be intended to be passed to the Versioneer-versioned project,\n    # but that should not change where we get our version from.\n    env = os.environ.copy()\n    env.pop(\"GIT_DIR\", None)\n    runner = functools.partial(runner, env=env)\n\n    _, rc = runner(GITS, [\"rev-parse\", \"--git-dir\"], cwd=root, hide_stderr=not verbose)\n    if rc != 0:\n        if verbose:\n            print(\"Directory %s not under git control\" % root)\n        raise NotThisMethod(\"'git rev-parse --git-dir' returned error\")\n\n    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]\n    # if there isn't one, this yields HEX[-dirty] (no NUM)\n    describe_out, rc = runner(\n        GITS,\n        [\n            \"describe\",\n            \"--tags\",\n            \"--dirty\",\n            \"--always\",\n            \"--long\",\n            \"--match\",\n            f\"{tag_prefix}[[:digit:]]*\",\n        ],\n        cwd=root,\n    )\n    # --long was added in git-1.5.5\n    if describe_out is None:\n        raise NotThisMethod(\"'git describe' failed\")\n    describe_out = describe_out.strip()\n    full_out, rc = runner(GITS, [\"rev-parse\", \"HEAD\"], cwd=root)\n    if full_out is None:\n        raise NotThisMethod(\"'git rev-parse' failed\")\n    full_out = full_out.strip()\n\n    pieces: Dict[str, Any] = {}\n    pieces[\"long\"] = full_out\n    pieces[\"short\"] = full_out[:7]  # maybe improved later\n    pieces[\"error\"] = None\n\n    branch_name, rc = runner(GITS, [\"rev-parse\", \"--abbrev-ref\", \"HEAD\"], cwd=root)\n    # --abbrev-ref was added in git-1.6.3\n    if rc != 0 or branch_name is None:\n        raise NotThisMethod(\"'git rev-parse --abbrev-ref' returned error\")\n    branch_name = branch_name.strip()\n\n    if branch_name == \"HEAD\":\n        # If we aren't exactly on a branch, pick a branch which represents\n        # the current commit. If all else fails, we are on a branchless\n        # commit.\n        branches, rc = runner(GITS, [\"branch\", \"--contains\"], cwd=root)\n        # --contains was added in git-1.5.4\n        if rc != 0 or branches is None:\n            raise NotThisMethod(\"'git branch --contains' returned error\")\n        branches = branches.split(\"\\n\")\n\n        # Remove the first line if we're running detached\n        if \"(\" in branches[0]:\n            branches.pop(0)\n\n        # Strip off the leading \"* \" from the list of branches.\n        branches = [branch[2:] for branch in branches]\n        if \"master\" in branches:\n            branch_name = \"master\"\n        elif not branches:\n            branch_name = None\n        else:\n            # Pick the first branch that is returned. Good or bad.\n            branch_name = branches[0]\n\n    pieces[\"branch\"] = branch_name\n\n    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]\n    # TAG might have hyphens.\n    git_describe = describe_out\n\n    # look for -dirty suffix\n    dirty = git_describe.endswith(\"-dirty\")\n    pieces[\"dirty\"] = dirty\n    if dirty:\n        git_describe = git_describe[: git_describe.rindex(\"-dirty\")]\n\n    # now we have TAG-NUM-gHEX or HEX\n\n    if \"-\" in git_describe:\n        # TAG-NUM-gHEX\n        mo = re.search(r\"^(.+)-(\\d+)-g([0-9a-f]+)$\", git_describe)\n        if not mo:\n            # unparsable. Maybe git-describe is misbehaving?\n            pieces[\"error\"] = \"unable to parse git-describe output: '%s'\" % describe_out\n            return pieces\n\n        # tag\n        full_tag = mo.group(1)\n        if not full_tag.startswith(tag_prefix):\n            if verbose:\n                fmt = \"tag '%s' doesn't start with prefix '%s'\"\n                print(fmt % (full_tag, tag_prefix))\n            pieces[\"error\"] = \"tag '%s' doesn't start with prefix '%s'\" % (\n                full_tag,\n                tag_prefix,\n            )\n            return pieces\n        pieces[\"closest-tag\"] = full_tag[len(tag_prefix) :]\n\n        # distance: number of commits since tag\n        pieces[\"distance\"] = int(mo.group(2))\n\n        # commit: short hex revision ID\n        pieces[\"short\"] = mo.group(3)\n\n    else:\n        # HEX: no tags\n        pieces[\"closest-tag\"] = None\n        out, rc = runner(GITS, [\"rev-list\", \"HEAD\", \"--left-right\"], cwd=root)\n        pieces[\"distance\"] = len(out.split())  # total number of commits\n\n    # commit date: see ISO-8601 comment in git_versions_from_keywords()\n    date = runner(GITS, [\"show\", \"-s\", \"--format=%ci\", \"HEAD\"], cwd=root)[0].strip()\n    # Use only the last line.  Previous lines may contain GPG signature\n    # information.\n    date = date.splitlines()[-1]\n    pieces[\"date\"] = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n\n    return pieces\n\n\ndef plus_or_dot(pieces: Dict[str, Any]) -> str:\n    \"\"\"Return a + if we don't already have one, else return a .\"\"\"\n    if \"+\" in pieces.get(\"closest-tag\", \"\"):\n        return \".\"\n    return \"+\"\n\n\ndef render_pep440(pieces: Dict[str, Any]) -> str:\n    \"\"\"Build up version string, with post-release \"local version identifier\".\n\n    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you\n    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty\n\n    Exceptions:\n    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += plus_or_dot(pieces)\n            rendered += \"%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0+untagged.%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_branch(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch. Note that .dev0 sorts backwards\n    (a feature branch will appear \"older\" than the master branch).\n\n    Exceptions:\n    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0\"\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+untagged.%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:\n    \"\"\"Split pep440 version string at the post-release segment.\n\n    Returns the release segments before the post-release and the\n    post-release version number (or -1 if no post-release segment is present).\n    \"\"\"\n    vc = str.split(ver, \".post\")\n    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None\n\n\ndef render_pep440_pre(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postN.devDISTANCE] -- No -dirty.\n\n    Exceptions:\n    1: no tags. 0.post0.devDISTANCE\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        if pieces[\"distance\"]:\n            # update the post release segment\n            tag_version, post_version = pep440_split_post(pieces[\"closest-tag\"])\n            rendered = tag_version\n            if post_version is not None:\n                rendered += \".post%d.dev%d\" % (post_version + 1, pieces[\"distance\"])\n            else:\n                rendered += \".post0.dev%d\" % (pieces[\"distance\"])\n        else:\n            # no commits, use the tag as the version\n            rendered = pieces[\"closest-tag\"]\n    else:\n        # exception #1\n        rendered = \"0.post0.dev%d\" % pieces[\"distance\"]\n    return rendered\n\n\ndef render_pep440_post(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX] .\n\n    The \".dev0\" means dirty. Note that .dev0 sorts backwards\n    (a dirty tree will appear \"older\" than the corresponding clean one),\n    but you shouldn't be releasing software with -dirty anyways.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%s\" % pieces[\"short\"]\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n        rendered += \"+g%s\" % pieces[\"short\"]\n    return rendered\n\n\ndef render_pep440_post_branch(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%s\" % pieces[\"short\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+g%s\" % pieces[\"short\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_old(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]] .\n\n    The \".dev0\" means dirty.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n    return rendered\n\n\ndef render_git_describe(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[-DISTANCE-gHEX][-dirty].\n\n    Like 'git describe --tags --dirty --always'.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"]:\n            rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render_git_describe_long(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG-DISTANCE-gHEX[-dirty].\n\n    Like 'git describe --tags --dirty --always -long'.\n    The distance/hash is unconditional.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:\n    \"\"\"Render the given version pieces into the requested style.\"\"\"\n    if pieces[\"error\"]:\n        return {\n            \"version\": \"unknown\",\n            \"full-revisionid\": pieces.get(\"long\"),\n            \"dirty\": None,\n            \"error\": pieces[\"error\"],\n            \"date\": None,\n        }\n\n    if not style or style == \"default\":\n        style = \"pep440\"  # the default\n\n    if style == \"pep440\":\n        rendered = render_pep440(pieces)\n    elif style == \"pep440-branch\":\n        rendered = render_pep440_branch(pieces)\n    elif style == \"pep440-pre\":\n        rendered = render_pep440_pre(pieces)\n    elif style == \"pep440-post\":\n        rendered = render_pep440_post(pieces)\n    elif style == \"pep440-post-branch\":\n        rendered = render_pep440_post_branch(pieces)\n    elif style == \"pep440-old\":\n        rendered = render_pep440_old(pieces)\n    elif style == \"git-describe\":\n        rendered = render_git_describe(pieces)\n    elif style == \"git-describe-long\":\n        rendered = render_git_describe_long(pieces)\n    else:\n        raise ValueError(\"unknown style '%s'\" % style)\n\n    return {\n        \"version\": rendered,\n        \"full-revisionid\": pieces[\"long\"],\n        \"dirty\": pieces[\"dirty\"],\n        \"error\": None,\n        \"date\": pieces.get(\"date\"),\n    }\n\n\ndef get_versions() -> Dict[str, Any]:\n    \"\"\"Get version information or return default if unable to do so.\"\"\"\n    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have\n    # __file__, we can work backwards from there to the root. Some\n    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which\n    # case we can only use expanded keywords.\n\n    cfg = get_config()\n    verbose = cfg.verbose\n\n    try:\n        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)\n    except NotThisMethod:\n        pass\n\n    try:\n        root = os.path.realpath(__file__)\n        # versionfile_source is the relative path from the top of the source\n        # tree (where the .git directory might live) to this file. Invert\n        # this to find the root from __file__.\n        for _ in cfg.versionfile_source.split(\"/\"):\n            root = os.path.dirname(root)\n    except NameError:\n        return {\n            \"version\": \"0+unknown\",\n            \"full-revisionid\": None,\n            \"dirty\": None,\n            \"error\": \"unable to find root of source tree\",\n            \"date\": None,\n        }\n\n    try:\n        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)\n        return render(pieces, cfg.style)\n    except NotThisMethod:\n        pass\n\n    try:\n        if cfg.parentdir_prefix:\n            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)\n    except NotThisMethod:\n        pass\n\n    return {\n        \"version\": \"0+unknown\",\n        \"full-revisionid\": None,\n        \"dirty\": None,\n        \"error\": \"unable to compute version\",\n        \"date\": None,\n    }\n"
  },
  {
    "path": "modin/config/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses config entities which can be used for Modin behavior tuning.\"\"\"\n\nfrom modin.config.envvars import (\n    AsvDataSizeConfig,\n    AsvImplementation,\n    AsyncReadMode,\n    AutoSwitchBackend,\n    Backend,\n    BackendJoinConsiderAllBackends,\n    BackendMergeCastInPlace,\n    BenchmarkMode,\n    CIAWSAccessKeyID,\n    CIAWSSecretAccessKey,\n    CpuCount,\n    DaskThreadsPerWorker,\n    DocModule,\n    DynamicPartitioning,\n    Engine,\n    EnvironmentVariable,\n    Execution,\n    GithubCI,\n    GpuCount,\n    IsDebug,\n    IsExperimental,\n    IsRayCluster,\n    LazyExecution,\n    LogFileSize,\n    LogMemoryInterval,\n    LogMode,\n    Memory,\n    MetricsMode,\n    MinColumnPartitionSize,\n    MinPartitionSize,\n    MinRowPartitionSize,\n    ModinNumpy,\n    NativePandasDeepCopy,\n    NativePandasMaxRows,\n    NativePandasTransferThreshold,\n    NPartitions,\n    PersistentPickle,\n    ProgressBar,\n    RangePartitioning,\n    RayInitCustomResources,\n    RayRedisAddress,\n    RayRedisPassword,\n    RayTaskCustomResources,\n    ReadSqlEngine,\n    ShowBackendSwitchProgress,\n    StorageFormat,\n    TestDatasetSize,\n    TestReadFromPostgres,\n    TestReadFromSqlServer,\n    TrackFileLeaks,\n)\nfrom modin.config.pubsub import Parameter, ValueSource, context\n\n__all__ = [\n    \"EnvironmentVariable\",\n    \"Parameter\",\n    \"ValueSource\",\n    \"context\",\n    # General settings\n    \"IsDebug\",\n    \"Engine\",\n    \"StorageFormat\",\n    \"CpuCount\",\n    \"GpuCount\",\n    \"Memory\",\n    \"Backend\",\n    \"BackendJoinConsiderAllBackends\",\n    \"BackendMergeCastInPlace\",\n    \"Execution\",\n    \"AutoSwitchBackend\",\n    \"ShowBackendSwitchProgress\",\n    # Ray specific\n    \"IsRayCluster\",\n    \"RayRedisAddress\",\n    \"RayRedisPassword\",\n    \"RayInitCustomResources\",\n    \"RayTaskCustomResources\",\n    \"LazyExecution\",\n    # Dask specific\n    \"DaskThreadsPerWorker\",\n    # Native Pandas Specific\n    \"NativePandasMaxRows\",\n    \"NativePandasTransferThreshold\",\n    \"NativePandasDeepCopy\",\n    # Partitioning\n    \"NPartitions\",\n    \"MinPartitionSize\",\n    \"MinRowPartitionSize\",\n    \"MinColumnPartitionSize\",\n    # ASV specific\n    \"TestDatasetSize\",\n    \"AsvImplementation\",\n    \"AsvDataSizeConfig\",\n    # Specific features\n    \"ProgressBar\",\n    \"BenchmarkMode\",\n    \"PersistentPickle\",\n    \"ModinNumpy\",\n    \"RangePartitioning\",\n    \"AsyncReadMode\",\n    \"ReadSqlEngine\",\n    \"IsExperimental\",\n    \"DynamicPartitioning\",\n    # For tests\n    \"TrackFileLeaks\",\n    \"TestReadFromSqlServer\",\n    \"TestReadFromPostgres\",\n    \"GithubCI\",\n    \"CIAWSSecretAccessKey\",\n    \"CIAWSAccessKeyID\",\n    # Logging\n    \"LogMode\",\n    \"LogMemoryInterval\",\n    \"LogFileSize\",\n    \"MetricsMode\",\n    # Plugin settings\n    \"DocModule\",\n]\n"
  },
  {
    "path": "modin/config/__main__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nContent of this file should be executed if module `modin.config` is called.\n\nIf module is called (using `python -m modin.config`) configs help will be printed.\nUsing `-export_path` option configs description can be exported to the external CSV file\nprovided with this flag.\n\"\"\"\n\nimport argparse\nfrom textwrap import dedent\n\nimport pandas\n\nimport modin.config as cfg\n\n\ndef print_config_help() -> None:\n    \"\"\"Print configs help messages.\"\"\"\n    for objname in sorted(cfg.__all__):\n        obj = getattr(cfg, objname)\n        if (\n            isinstance(obj, type)\n            and issubclass(obj, cfg.Parameter)\n            and not obj.is_abstract\n        ):\n            print(f\"{obj.get_help()}\\n\\tCurrent value: {obj.get()}\")  # noqa: T201\n\n\ndef export_config_help(filename: str) -> None:\n    \"\"\"\n    Export all configs help messages to the CSV file.\n\n    Parameters\n    ----------\n    filename : str\n        Name of the file to export configs data.\n    \"\"\"\n    configs_data = []\n    default_values = dict(\n        RayRedisPassword=\"random string\",\n        CpuCount=\"multiprocessing.cpu_count()\",\n        NPartitions=\"equals to MODIN_CPUS env\",\n    )\n    for objname in sorted(cfg.__all__):\n        obj = getattr(cfg, objname)\n        if (\n            isinstance(obj, type)\n            and issubclass(obj, cfg.Parameter)\n            and not obj.is_abstract\n        ):\n            data = {\n                \"Config Name\": obj.__name__,\n                \"Env. Variable Name\": getattr(\n                    obj, \"varname\", \"not backed by environment\"\n                ),\n                \"Default Value\": default_values.get(obj.__name__, obj._get_default()),\n                # `Notes` `-` underlining can't be correctly parsed inside csv table by sphinx\n                \"Description\": dedent(obj.__doc__ or \"\").replace(\n                    \"Notes\\n-----\", \"Notes:\\n\"\n                ),\n                \"Options\": obj.choices,\n            }\n            configs_data.append(data)\n\n    pandas.DataFrame(\n        configs_data,\n        columns=[\n            \"Config Name\",\n            \"Env. Variable Name\",\n            \"Default Value\",\n            \"Description\",\n            \"Options\",\n        ],\n    ).to_csv(filename, index=False)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--export-path\",\n        dest=\"export_path\",\n        type=str,\n        required=False,\n        default=None,\n        help=\"File path to export configs data.\",\n    )\n    export_path = parser.parse_args().export_path\n    if export_path:\n        export_config_help(export_path)\n    else:\n        print_config_help()\n"
  },
  {
    "path": "modin/config/envvars.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses Modin configs originated from environment variables.\"\"\"\n\nimport os\nimport secrets\nimport sys\nimport warnings\nfrom collections import namedtuple\nfrom textwrap import dedent\nfrom typing import Any, NoReturn, Optional\n\nfrom packaging import version\nfrom pandas.util._decorators import doc  # type: ignore[attr-defined]\n\nfrom modin import set_execution\nfrom modin.config.pubsub import (\n    _TYPE_PARAMS,\n    _UNSET,\n    DeprecationDescriptor,\n    ExactStr,\n    Parameter,\n    ValueSource,\n)\n\n\nclass EnvironmentVariable(Parameter, type=str, abstract=True):\n    \"\"\"Base class for environment variables-based configuration.\"\"\"\n\n    varname: Optional[str] = None\n\n    @classmethod\n    def _get_value_from_config(cls) -> Any:\n        \"\"\"\n        Read the value from environment variable.\n\n        Returns\n        -------\n        Any\n            Config raw value if it's set, otherwise `_UNSET`.\n        \"\"\"\n        if cls.varname is None:\n            raise TypeError(\"varname should not be None\")\n        if cls.varname not in os.environ:\n            return _UNSET\n        raw = os.environ[cls.varname]\n        if not _TYPE_PARAMS[cls.type].verify(raw):\n            # TODO: use and test a better error message, like \"Invalid value\n            # for {cls.varname}: {raw}\"\n            raise ValueError(f\"Unsupported raw value: {raw}\")\n        return _TYPE_PARAMS[cls.type].decode(raw)\n\n    @classmethod\n    def get_help(cls) -> str:\n        \"\"\"\n        Generate user-presentable help for the config.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        help = f\"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\\n\\tProvide {_TYPE_PARAMS[cls.type].help}\"\n        if cls.choices:\n            help += f\" (valid examples are: {', '.join(str(c) for c in cls.choices)})\"\n        return help\n\n\nclass EnvWithSibilings(\n    EnvironmentVariable,\n    # 'type' is a mandatory parameter for '__init_subclasses__', so we have to pass something here,\n    # this doesn't force child classes to have 'str' type though, they actually can be any type\n    type=str,\n):\n    \"\"\"Ensure values synchronization between sibling parameters.\"\"\"\n\n    _update_sibling = True\n\n    @classmethod\n    def _sibling(cls) -> type[\"EnvWithSibilings\"]:\n        \"\"\"Return a sibling parameter.\"\"\"\n        raise NotImplementedError()\n\n    @classmethod\n    def get(cls) -> Any:\n        \"\"\"\n        Get parameter's value and ensure that it's equal to the sibling's value.\n\n        Returns\n        -------\n        Any\n        \"\"\"\n        sibling = cls._sibling()\n\n        if sibling._value is _UNSET and cls._value is _UNSET:\n            super().get()\n            with warnings.catch_warnings():\n                # filter warnings that can potentially come from the potentially deprecated sibling\n                warnings.filterwarnings(\"ignore\", category=FutureWarning)\n                super(EnvWithSibilings, sibling).get()\n\n            if (\n                cls._value_source\n                == sibling._value_source\n                == ValueSource.GOT_FROM_CFG_SOURCE\n            ):\n                raise ValueError(\n                    f\"Configuration is ambiguous. You cannot set '{cls.varname}' and '{sibling.varname}' at the same time.\"\n                )\n\n            # further we assume that there are only two valid sources for the variables: 'GOT_FROM_CFG' and 'DEFAULT',\n            # as otherwise we wouldn't ended-up in this branch at all, because all other ways of setting a value\n            # changes the '._value' attribute from '_UNSET' to something meaningful\n            from modin.error_message import ErrorMessage\n\n            if cls._value_source == ValueSource.GOT_FROM_CFG_SOURCE:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=sibling._value_source != ValueSource.DEFAULT\n                )\n                sibling._value = cls._value\n                sibling._value_source = ValueSource.GOT_FROM_CFG_SOURCE\n            elif sibling._value_source == ValueSource.GOT_FROM_CFG_SOURCE:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=cls._value_source != ValueSource.DEFAULT\n                )\n                cls._value = sibling._value\n                cls._value_source = ValueSource.GOT_FROM_CFG_SOURCE\n            else:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=cls._value_source != ValueSource.DEFAULT\n                    or sibling._value_source != ValueSource.DEFAULT\n                )\n                # propagating 'cls' default value to the sibling\n                sibling._value = cls._value\n        return super().get()\n\n    @classmethod\n    def put(cls, value: Any) -> None:\n        \"\"\"\n        Set a new value to this parameter as well as to its sibling.\n\n        Parameters\n        ----------\n        value : Any\n        \"\"\"\n        super().put(value)\n        # avoid getting into an infinite recursion\n        if cls._update_sibling:\n            cls._update_sibling = False\n            try:\n                with warnings.catch_warnings():\n                    # filter potential future warnings of the sibling\n                    warnings.filterwarnings(\"ignore\", category=FutureWarning)\n                    cls._sibling().put(value)\n            finally:\n                cls._update_sibling = True\n\n\nclass EnvironmentVariableDisallowingExecutionAndBackendBothSet(\n    EnvironmentVariable,\n    type=EnvironmentVariable.type,\n    abstract=True,\n):\n    \"\"\"Subclass to disallow getting this variable from the environment when both execution and backend are set in the environment.\"\"\"\n\n    @classmethod\n    @doc(EnvironmentVariable._get_value_from_config.__doc__)\n    def _get_value_from_config(cls) -> str:\n        if Backend.varname in os.environ and (\n            Engine.varname in os.environ or StorageFormat.varname in os.environ\n        ):\n            # Handling this case is tricky, in part because the combination of\n            # Backend and Engine/StorageFormat may be invalid. For now just\n            # disallow it.\n            raise ValueError(\"Can't specify both execution and backend in environment\")\n        return super()._get_value_from_config()\n\n\nclass IsDebug(EnvironmentVariable, type=bool):\n    \"\"\"Force Modin engine to be \"Python\" unless specified by $MODIN_ENGINE.\"\"\"\n\n    varname = \"MODIN_DEBUG\"\n\n\nclass Engine(\n    EnvironmentVariableDisallowingExecutionAndBackendBothSet,\n    type=str,\n):\n    \"\"\"Distribution engine to run queries by.\"\"\"\n\n    varname = \"MODIN_ENGINE\"\n    choices = (\"Ray\", \"Dask\", \"Python\", \"Unidist\", \"Native\")\n\n    NOINIT_ENGINES = {\n        \"Python\",\n        \"Native\",\n    }  # engines that don't require initialization, useful for unit tests\n\n    has_custom_engine = False\n\n    @classmethod\n    def _get_default(cls) -> str:\n        \"\"\"\n        Get default value of the config.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        from modin.utils import MIN_DASK_VERSION, MIN_RAY_VERSION, MIN_UNIDIST_VERSION\n\n        # If there's a custom engine, we don't need to check for any engine\n        # dependencies. Return the default \"Python\" engine.\n        if IsDebug.get() or cls.has_custom_engine:\n            return \"Python\"\n        try:\n            import ray\n\n        except ImportError:\n            pass\n        else:\n            if version.parse(ray.__version__) < MIN_RAY_VERSION:\n                raise ImportError(\n                    'Please `pip install \"modin[ray]\"` to install compatible Ray '\n                    + \"version \"\n                    + f\"(>={MIN_RAY_VERSION}).\"\n                )\n            return \"Ray\"\n        try:\n            import dask\n            import distributed\n\n        except ImportError:\n            pass\n        else:\n            if (\n                version.parse(dask.__version__) < MIN_DASK_VERSION\n                or version.parse(distributed.__version__) < MIN_DASK_VERSION\n            ):\n                raise ImportError(\n                    f'Please `pip install \"modin[dask]\"` to install compatible Dask version (>={MIN_DASK_VERSION}).'\n                )\n            return \"Dask\"\n        try:\n            import unidist\n\n        except ImportError:\n            pass\n        else:\n            if version.parse(unidist.__version__) < MIN_UNIDIST_VERSION:\n                raise ImportError(\n                    'Please `pip install \"unidist[mpi]\"` to install compatible unidist on MPI '\n                    + \"version \"\n                    + f\"(>={MIN_UNIDIST_VERSION}).\"\n                )\n            return \"Unidist\"\n        raise ImportError(\n            \"Please refer to installation documentation page to install an engine\"\n        )\n\n    @classmethod\n    @doc(Parameter.add_option.__doc__)\n    def add_option(cls, choice: Any) -> Any:\n        choice = super().add_option(choice)\n        cls.NOINIT_ENGINES.add(choice)\n        cls.has_custom_engine = True\n        return choice\n\n    @classmethod\n    def put(cls, value: str) -> None:\n        \"\"\"\n        Set the engine value.\n\n        Parameters\n        ----------\n        value : str\n            Engine value to set.\n        \"\"\"\n        value = cls.normalize(value)\n        # Backend.put() will set Engine.\n        Backend.put(\n            Backend.get_backend_for_execution(\n                Execution(engine=value, storage_format=StorageFormat.get())\n            )\n        )\n\n    @classmethod\n    def get(cls) -> str:\n        \"\"\"\n        Get the engine value.\n\n        Returns\n        -------\n        str\n            Engine value.\n        \"\"\"\n        # We have to override get() because Engine may need to get its value\n        # from the OS's environment variables for Backend or Engine.\n\n        cls._warn_if_deprecated()\n\n        # First, check if we've already set the engine value.\n        if cls._value is not _UNSET:\n            return cls._value\n\n        engine_config_value = cls._get_value_from_config()\n        backend_config_value = Backend._get_value_from_config()\n\n        # If Engine is in the OS's configuration, use the configured Engine value.\n        # Otherwise, use the Backend config value if that exists. If it doesn't,\n        # fall back to the default Engine value.\n        cls._value = (\n            engine_config_value\n            if engine_config_value is not _UNSET\n            else (\n                Backend.get_execution_for_backend(backend_config_value).engine\n                if backend_config_value is not _UNSET\n                else cls._get_default()\n            )\n        )\n\n        return cls._value\n\n\nclass StorageFormat(EnvironmentVariableDisallowingExecutionAndBackendBothSet, type=str):\n    \"\"\"Engine to run on a single node of distribution.\"\"\"\n\n    @classmethod\n    def put(cls, value: str) -> None:\n        \"\"\"\n        Set the storage format value.\n\n        Parameters\n        ----------\n        value : str\n            Storage format value to set.\n        \"\"\"\n        value = cls.normalize(value)\n        # Backend.put() will set StorageFormat.\n        Backend.put(\n            Backend.get_backend_for_execution(\n                Execution(engine=Engine.get(), storage_format=value)\n            )\n        )\n\n    @classmethod\n    def get(cls) -> str:\n        \"\"\"\n        Get the storage format value.\n\n        Returns\n        -------\n        str\n            Storage format value.\n        \"\"\"\n        # We have to override get() because StorageFormat may need to get its\n        # value from the OS's environment variables for Backend or StorageFormat.\n\n        cls._warn_if_deprecated()\n\n        # First, check if we've already set the engine value.\n        if cls._value is not _UNSET:\n            return cls._value\n\n        storage_format_config_value = cls._get_value_from_config()\n        backend_config_value = Backend._get_value_from_config()\n\n        # If StorageFormat is in the OS's configuration, use the configured\n        # StorageFormat value. Otherwise, use the Backend config value if that\n        # exists. If it doesn't, fall back to the default StorageFormat value.\n        cls._value = (\n            storage_format_config_value\n            if storage_format_config_value is not _UNSET\n            else (\n                Backend.get_execution_for_backend(backend_config_value).storage_format\n                if backend_config_value is not _UNSET\n                else cls._get_default()\n            )\n        )\n\n        return cls._value\n\n    varname = \"MODIN_STORAGE_FORMAT\"\n    default = \"Pandas\"\n    choices = (\"Pandas\", \"Native\")\n\n\nExecution = namedtuple(\"Execution\", [\"storage_format\", \"engine\"])\n\n\nclass Backend(EnvironmentVariableDisallowingExecutionAndBackendBothSet, type=str):\n    \"\"\"\n    An alias for execution, i.e. the combination of StorageFormat and Engine.\n\n    Setting backend may change StorageFormat and/or Engine to the corresponding\n    respective values, and setting Engine or StorageFormat may change Backend.\n\n    Modin's built-in backends include:\n        - \"Ray\" <-> (StorageFormat=\"Pandas\", Engine=\"Ray\")\n        - \"Dask\" <-> (StorageFormat=\"Pandas\", Engine=\"Dask\")\n        - \"Python_Test\" <-> (StorageFormat=\"Pandas\", Engine=\"Python\")\n            - This execution mode is meant for testing only.\n        - \"Unidist\" <-> (StorageFormat=\"Pandas\", Engine=\"Unidist\")\n        - \"Pandas\" <-> (StorageFormat=\"Native\", Engine=\"Native\")\n    \"\"\"\n\n    _BACKEND_TO_EXECUTION: dict[str, Execution] = {}\n    _EXECUTION_TO_BACKEND: dict[Execution, str] = {}\n    varname: str = \"MODIN_BACKEND\"\n    choices: tuple[str, ...] = (\"Ray\", \"Dask\", \"Python_Test\", \"Unidist\", \"Pandas\")\n\n    @classmethod\n    def put(cls, value: str) -> None:\n        \"\"\"\n        Set the backend value.\n\n        Parameters\n        ----------\n        value : str\n            Backend value to set.\n        \"\"\"\n        execution = cls.get_execution_for_backend(value)\n        set_execution(execution.engine, execution.storage_format)\n\n    @classmethod\n    def _get_default(cls) -> str:\n        \"\"\"\n        Get the default backend value.\n\n        Returns\n        -------\n        str\n            Default backend value.\n        \"\"\"\n        return cls._EXECUTION_TO_BACKEND[\n            Execution(StorageFormat._get_default(), Engine._get_default())\n        ]\n\n    @classmethod\n    def register_backend(cls: type[\"Backend\"], name: str, execution: Execution) -> None:\n        \"\"\"\n        Register a new backend.\n\n        Parameters\n        ----------\n        name : str\n            Backend name.\n        execution : Execution\n            Execution that corresponds to the backend.\n        \"\"\"\n        name = cls.normalize(name)\n        super().add_option(name)\n        if name in cls._BACKEND_TO_EXECUTION:\n            raise ValueError(\n                f\"Backend '{name}' is already registered with the execution {cls._BACKEND_TO_EXECUTION[name]}.\"\n            )\n        if execution in cls._EXECUTION_TO_BACKEND:\n            raise ValueError(\n                f\"{execution} is already registered with the backend {cls._EXECUTION_TO_BACKEND[execution]}.\"\n            )\n        cls._BACKEND_TO_EXECUTION[name] = execution\n        cls._EXECUTION_TO_BACKEND[execution] = name\n\n    @classmethod\n    def add_option(cls, choice: str) -> NoReturn:\n        \"\"\"\n        Raise an exception for trying to add an option to Backend directly.\n\n        Parameters\n        ----------\n        choice : str\n            Choice to add. Unused.\n\n        Raises\n        ------\n        ValueError\n            Always.\n        \"\"\"\n        raise ValueError(\n            \"Cannot add an option to Backend directly. Use Backend.register_backend instead.\"\n        )\n\n    @classmethod\n    def set_active_backends(cls, new_choices: tuple) -> None:\n        \"\"\"\n        Set the active backends available for manual and automatic switching.\n\n        Other backends may have been registered, and those backends remain registered, but the\n        set of engines that can be used is dynamically modified.\n\n        Parameters\n        ----------\n        new_choices : tuple\n            Choices to add.\n\n        Raises\n        ------\n        ValueError\n            Raises a ValueError when the set of new_choices are not already registered\n        \"\"\"\n        registered_backends = cls._BACKEND_TO_EXECUTION\n        for i in new_choices:\n            if i not in registered_backends:\n                raise ValueError(\n                    f\"Active backend choices {new_choices} are not all registered.\"\n                )\n        cls.choices = new_choices\n\n    @classmethod\n    def activate(cls, backend: str) -> None:\n        \"\"\"\n        Activate a backend that was previously registered.\n\n        This is a no-op if the backend is already active.\n\n        Parameters\n        ----------\n        backend : str\n            Backend to activate.\n\n        Raises\n        ------\n        ValueError\n            Raises a ValueError if backend was not previously registered.\n        \"\"\"\n        if backend not in cls._BACKEND_TO_EXECUTION:\n            raise ValueError(f\"Unknown backend '{backend}' is not registered.\")\n        cls.choices = (*cls.choices, backend)\n\n    @classmethod\n    def get_active_backends(cls) -> tuple[str, ...]:\n        \"\"\"\n        Get the active backends available for manual and automatic switching.\n\n        Returns\n        -------\n        tuple[str, ...]\n            returns the active set of backends for switching\n        \"\"\"\n        return cls.choices\n\n    @classmethod\n    def get_backend_for_execution(cls, execution: Execution) -> str:\n        \"\"\"\n        Get the backend for the execution.\n\n        Parameters\n        ----------\n        execution : Execution\n            Execution to get the backend for.\n\n        Returns\n        -------\n        str\n            Backend for the execution.\n        \"\"\"\n        if execution not in cls._EXECUTION_TO_BACKEND:\n            raise ValueError(\n                f\"{execution} has no known backend. Please register a \"\n                + \"backend for it with Backend.register_backend()\"\n            )\n        return cls._EXECUTION_TO_BACKEND[execution]\n\n    @classmethod\n    def get_execution_for_backend(cls, backend: str) -> Execution:\n        \"\"\"\n        Get the execution for the given backend.\n\n        Parameters\n        ----------\n        backend : str\n            Backend to get the execution for.\n\n        Returns\n        -------\n        execution : Execution\n            The execution for the given backend\n        \"\"\"\n        if not isinstance(backend, str):\n            raise TypeError(\n                \"Backend value should be a string, but instead it is \"\n                + f\"{repr(backend)} of type {type(backend)}.\"\n            )\n        normalized_value = cls.normalize(backend)\n        if normalized_value not in cls.choices:\n            if normalized_value in cls._BACKEND_TO_EXECUTION:\n                raise ValueError(\n                    f\"Backend '{backend}' is not currently active. Activate it first with Backend.activate('{backend})'.\"\n                )\n            backend_choice_string = \", \".join(f\"'{choice}'\" for choice in cls.choices)\n            raise ValueError(\n                f\"Unknown backend '{backend}'. Available backends are: \"\n                + backend_choice_string\n            )\n        if normalized_value not in cls._BACKEND_TO_EXECUTION:\n            raise ValueError(\n                f\"Backend '{backend}' has no known execution. Please \"\n                + \"register an execution for it with Backend.register_backend().\"\n            )\n        return cls._BACKEND_TO_EXECUTION[normalized_value]\n\n    @classmethod\n    def get(cls) -> str:\n        \"\"\"\n        Get the backend.\n\n        Returns\n        -------\n        str\n            Backend.\n        \"\"\"\n        # We have to override get() because Backend may need to get its value\n        # from the OS's environment variables for Backend or Engine.\n\n        cls._warn_if_deprecated()\n\n        # First, check if we've already set the Backend value.\n        if cls._value is not _UNSET:\n            return cls._value\n\n        backend_config_value = Backend._get_value_from_config()\n\n        # If Backend is in the OS's configuration, use the configured Backend\n        # value. Otherwise, we need to figure out the Backend value based on\n        # the Engine and StorageFormat values.\n        cls._value = (\n            backend_config_value\n            if backend_config_value is not _UNSET\n            else cls.get_backend_for_execution(\n                Execution(storage_format=StorageFormat.get(), engine=Engine.get())\n            )\n        )\n\n        return cls._value\n\n\nBackend.register_backend(\"Ray\", Execution(\"Pandas\", \"Ray\"))\nBackend.register_backend(\"Dask\", Execution(\"Pandas\", \"Dask\"))\nBackend.register_backend(\"Python_Test\", Execution(\"Pandas\", \"Python\"))\nBackend.register_backend(\"Unidist\", Execution(\"Pandas\", \"Unidist\"))\nBackend.register_backend(\"Pandas\", Execution(\"Native\", \"Native\"))\n\n\nclass AutoSwitchBackend(EnvironmentVariable, type=bool):\n    \"\"\"\n    Whether automatic backend switching is allowed.\n\n    When this flag is set, a Modin backend can attempt to automatically choose an appropriate backend\n    for different operations based on features of the input data. When disabled, backends should\n    avoid implicit backend switching outside of explicit operations like `to_pandas` and `to_ray`.\n    \"\"\"\n\n    varname = \"MODIN_AUTO_SWITCH_BACKENDS\"\n    default = False\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable automatic backend switching.\"\"\"\n        cls.put(True)\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable automatic backend switching.\"\"\"\n        cls.put(False)\n\n\nclass ShowBackendSwitchProgress(EnvironmentVariable, type=bool):\n    \"\"\"\n    Whether to show progress when switching between backends.\n\n    When enabled, progress messages are displayed during backend switches to inform users\n    about data transfer operations. When disabled, backend switches occur silently.\n    \"\"\"\n\n    varname = \"MODIN_BACKEND_SWITCH_PROGRESS\"\n    default = True\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable backend switch progress display.\"\"\"\n        cls.put(True)\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable backend switch progress display.\"\"\"\n        cls.put(False)\n\n\nclass IsExperimental(EnvironmentVariable, type=bool):\n    \"\"\"Whether to Turn on experimental features.\"\"\"\n\n    varname = \"MODIN_EXPERIMENTAL\"\n\n\nclass IsRayCluster(EnvironmentVariable, type=bool):\n    \"\"\"Whether Modin is running on pre-initialized Ray cluster.\"\"\"\n\n    varname = \"MODIN_RAY_CLUSTER\"\n\n\nclass RayRedisAddress(EnvironmentVariable, type=ExactStr):\n    \"\"\"Redis address to connect to when running in Ray cluster.\"\"\"\n\n    varname = \"MODIN_REDIS_ADDRESS\"\n\n\nclass RayRedisPassword(EnvironmentVariable, type=ExactStr):\n    \"\"\"What password to use for connecting to Redis.\"\"\"\n\n    varname = \"MODIN_REDIS_PASSWORD\"\n    default = secrets.token_hex(32)\n\n\nclass RayInitCustomResources(EnvironmentVariable, type=dict):\n    \"\"\"\n    Ray node's custom resources to initialize with.\n\n    Visit Ray documentation for more details:\n    https://docs.ray.io/en/latest/ray-core/scheduling/resources.html#custom-resources\n\n    Notes\n    -----\n    Relying on Modin to initialize Ray, you should set this config\n    for the proper initialization with custom resources.\n    \"\"\"\n\n    varname = \"MODIN_RAY_INIT_CUSTOM_RESOURCES\"\n    default = None\n\n\nclass RayTaskCustomResources(EnvironmentVariable, type=dict):\n    \"\"\"\n    Ray node's custom resources to request them in tasks or actors.\n\n    Visit Ray documentation for more details:\n    https://docs.ray.io/en/latest/ray-core/scheduling/resources.html#custom-resources\n\n    Notes\n    -----\n    You can use this config to limit the parallelism for the entire workflow\n    by setting the config at the very beginning.\n    >>> import modin.config as cfg\n    >>> cfg.RayTaskCustomResources.put({\"special_hardware\": 0.001})\n    This way each single remote task or actor will require 0.001 of \"special_hardware\" to run.\n    You can also use this config to limit the parallelism for a certain operation\n    by setting the config with context.\n    >>> with context(RayTaskCustomResources={\"special_hardware\": 0.001}):\n    ...     df.<op>\n    This way each single remote task or actor will require 0.001 of \"special_hardware\" to run\n    within the context only.\n    \"\"\"\n\n    varname = \"MODIN_RAY_TASK_CUSTOM_RESOURCES\"\n    default = None\n\n\nclass CpuCount(EnvironmentVariable, type=int):\n    \"\"\"How many CPU cores to use during initialization of the Modin engine.\"\"\"\n\n    varname = \"MODIN_CPUS\"\n\n    @classmethod\n    def _put(cls, value: int) -> None:\n        \"\"\"\n        Put specific value if CpuCount wasn't set by a user yet.\n\n        Parameters\n        ----------\n        value : int\n            Config value to set.\n\n        Notes\n        -----\n        This method is used to set CpuCount from cluster resources internally\n        and should not be called by a user.\n        \"\"\"\n        if cls.get_value_source() == ValueSource.DEFAULT:\n            cls.put(value)\n\n    @classmethod\n    def _get_default(cls) -> int:\n        \"\"\"\n        Get default value of the config.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        import multiprocessing\n\n        return multiprocessing.cpu_count()\n\n    @classmethod\n    def get(cls) -> int:\n        \"\"\"\n        Get ``CpuCount`` with extra checks.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        cpu_count = super().get()\n        if cpu_count <= 0:\n            raise ValueError(f\"`CpuCount` should be > 0; current value: {cpu_count}\")\n        return cpu_count\n\n\nclass GpuCount(EnvironmentVariable, type=int):\n    \"\"\"How may GPU devices to utilize across the whole distribution.\"\"\"\n\n    varname = \"MODIN_GPUS\"\n\n\nclass Memory(EnvironmentVariable, type=int):\n    \"\"\"\n    How much memory (in bytes) give to an execution engine.\n\n    Notes\n    -----\n    * In Ray case: the amount of memory to start the Plasma object store with.\n    * In Dask case: the amount of memory that is given to each worker depending on CPUs used.\n    \"\"\"\n\n    varname = \"MODIN_MEMORY\"\n\n\nclass NPartitions(EnvironmentVariable, type=int):\n    \"\"\"How many partitions to use for a Modin DataFrame (along each axis).\"\"\"\n\n    varname = \"MODIN_NPARTITIONS\"\n\n    @classmethod\n    def _put(cls, value: int) -> None:\n        \"\"\"\n        Put specific value if NPartitions wasn't set by a user yet.\n\n        Parameters\n        ----------\n        value : int\n            Config value to set.\n\n        Notes\n        -----\n        This method is used to set NPartitions from cluster resources internally\n        and should not be called by a user.\n        \"\"\"\n        if cls.get_value_source() == ValueSource.DEFAULT:\n            cls.put(value)\n\n    @classmethod\n    def _get_default(cls) -> int:\n        \"\"\"\n        Get default value of the config.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        return CpuCount.get()\n\n    @classmethod\n    def get(cls) -> int:\n        \"\"\"\n        Get ``NPartitions`` with extra checks.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        nparts = super().get()\n        if nparts <= 0:\n            raise ValueError(f\"`NPartitions` should be > 0; current value: {nparts}\")\n        return nparts\n\n\nclass TestDatasetSize(EnvironmentVariable, type=str):\n    \"\"\"Dataset size for running some tests.\"\"\"\n\n    varname = \"MODIN_TEST_DATASET_SIZE\"\n    choices = (\"Small\", \"Normal\", \"Big\")\n\n\nclass TrackFileLeaks(EnvironmentVariable, type=bool):\n    \"\"\"Whether to track for open file handles leakage during testing.\"\"\"\n\n    varname = \"MODIN_TEST_TRACK_FILE_LEAKS\"\n    # Turn off tracking on Windows by default because\n    # psutil's open_files() can be extremely slow on Windows (up to adding a few hours).\n    # see https://github.com/giampaolo/psutil/pull/597\n    default = sys.platform != \"win32\"\n\n\nclass AsvImplementation(EnvironmentVariable, type=ExactStr):\n    \"\"\"Allows to select a library that we will use for testing performance.\"\"\"\n\n    varname = \"MODIN_ASV_USE_IMPL\"\n    choices = (\"modin\", \"pandas\")\n\n    default = \"modin\"\n\n\nclass AsvDataSizeConfig(EnvironmentVariable, type=ExactStr):\n    \"\"\"Allows to override default size of data (shapes).\"\"\"\n\n    varname = \"MODIN_ASV_DATASIZE_CONFIG\"\n    default = None\n\n\nclass ProgressBar(EnvironmentVariable, type=bool):\n    \"\"\"Whether or not to show the progress bar.\"\"\"\n\n    varname = \"MODIN_PROGRESS_BAR\"\n    default = False\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable ``ProgressBar`` feature.\"\"\"\n        cls.put(True)\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable ``ProgressBar`` feature.\"\"\"\n        cls.put(False)\n\n    @classmethod\n    def put(cls, value: bool) -> None:\n        \"\"\"\n        Set ``ProgressBar`` value only if synchronous benchmarking is disabled.\n\n        Parameters\n        ----------\n        value : bool\n            Config value to set.\n        \"\"\"\n        if value and BenchmarkMode.get():\n            raise ValueError(\"ProgressBar isn't compatible with BenchmarkMode\")\n        super().put(value)\n\n\nclass BenchmarkMode(EnvironmentVariable, type=bool):\n    \"\"\"Whether or not to perform computations synchronously.\"\"\"\n\n    varname = \"MODIN_BENCHMARK_MODE\"\n    default = False\n\n    @classmethod\n    def put(cls, value: bool) -> None:\n        \"\"\"\n        Set ``BenchmarkMode`` value only if progress bar feature is disabled.\n\n        Parameters\n        ----------\n        value : bool\n            Config value to set.\n        \"\"\"\n        if value and ProgressBar.get():\n            raise ValueError(\"BenchmarkMode isn't compatible with ProgressBar\")\n        super().put(value)\n\n\nclass LogMode(EnvironmentVariable, type=ExactStr):\n    \"\"\"Set ``LogMode`` value if users want to opt-in.\"\"\"\n\n    varname = \"MODIN_LOG_MODE\"\n    choices = (\"enable\", \"disable\")\n    default = \"disable\"\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable all logging levels.\"\"\"\n        cls.put(\"enable\")\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable logging feature.\"\"\"\n        cls.put(\"disable\")\n\n\nclass LogMemoryInterval(EnvironmentVariable, type=int):\n    \"\"\"Interval (in seconds) to profile memory utilization for logging.\"\"\"\n\n    varname = \"MODIN_LOG_MEMORY_INTERVAL\"\n    default = 5\n\n    @classmethod\n    def put(cls, value: int) -> None:\n        \"\"\"\n        Set ``LogMemoryInterval`` with extra checks.\n\n        Parameters\n        ----------\n        value : int\n            Config value to set.\n        \"\"\"\n        if value <= 0:\n            raise ValueError(f\"Log memory Interval should be > 0, passed value {value}\")\n        super().put(value)\n\n    @classmethod\n    def get(cls) -> int:\n        \"\"\"\n        Get ``LogMemoryInterval`` with extra checks.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        log_memory_interval = super().get()\n        if log_memory_interval <= 0:\n            raise ValueError(\n                f\"`LogMemoryInterval` should be > 0; current value: {log_memory_interval}\"\n            )\n        return log_memory_interval\n\n\nclass LogFileSize(EnvironmentVariable, type=int):\n    \"\"\"Max size of logs (in MBs) to store per Modin job.\"\"\"\n\n    varname = \"MODIN_LOG_FILE_SIZE\"\n    default = 10\n\n    @classmethod\n    def put(cls, value: int) -> None:\n        \"\"\"\n        Set ``LogFileSize`` with extra checks.\n\n        Parameters\n        ----------\n        value : int\n            Config value to set.\n        \"\"\"\n        if value <= 0:\n            raise ValueError(f\"Log file size should be > 0 MB, passed value {value}\")\n        super().put(value)\n\n    @classmethod\n    def get(cls) -> int:\n        \"\"\"\n        Get ``LogFileSize`` with extra checks.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        log_file_size = super().get()\n        if log_file_size <= 0:\n            raise ValueError(\n                f\"`LogFileSize` should be > 0; current value: {log_file_size}\"\n            )\n        return log_file_size\n\n\nclass MetricsMode(EnvironmentVariable, type=ExactStr):\n    \"\"\"\n    Set ``MetricsMode`` value to disable/enable metrics collection.\n\n    Metric handlers are registered through `add_metric_handler` and can\n    be used to record graphite-style timings or values. It is the\n    responsibility of the handler to define how those emitted metrics\n    are handled.\n    \"\"\"\n\n    varname = \"MODIN_METRICS_MODE\"\n    choices = (\"enable\", \"disable\")\n    default = \"enable\"\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable all metric collection.\"\"\"\n        cls.put(\"enable\")\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable all metric collection.\"\"\"\n        cls.put(\"disable\")\n\n\nclass PersistentPickle(EnvironmentVariable, type=bool):\n    \"\"\"Whether serialization should be persistent.\"\"\"\n\n    varname = \"MODIN_PERSISTENT_PICKLE\"\n    # When set to off, it allows faster serialization which is only\n    # valid in current run (i.e. useless for saving to disk).\n    # When set to on, Modin objects could be saved to disk and loaded\n    # but serialization/deserialization could take more time.\n    default = False\n\n\nclass MinPartitionSize(EnvironmentVariable, type=int):\n    \"\"\"\n    Minimum number of rows/columns in a single pandas partition split.\n\n    Once a partition for a pandas dataframe has more than this many elements,\n    Modin adds another partition.\n    \"\"\"\n\n    varname = \"MODIN_MIN_PARTITION_SIZE\"\n    default = 32\n\n    @classmethod\n    def put(cls, value: int) -> None:\n        \"\"\"\n        Set ``MinPartitionSize`` with extra checks.\n\n        Parameters\n        ----------\n        value : int\n            Config value to set.\n        \"\"\"\n        if value <= 0:\n            raise ValueError(f\"Min partition size should be > 0, passed value {value}\")\n        super().put(value)\n\n    @classmethod\n    def get(cls) -> int:\n        \"\"\"\n        Get ``MinPartitionSize`` with extra checks.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        from modin.error_message import ErrorMessage\n\n        ErrorMessage.single_warning(\n            \"`MinPartitionSize` is deprecated and will be removed in a future version. \"\n            + \"This config has no longer effect, \"\n            + \"use `MinRowPartitionSize` and `MinColumnPartitionSize` instead.\",\n            FutureWarning,\n        )\n        min_partition_size = super().get()\n        if min_partition_size <= 0:\n            raise ValueError(\n                f\"`MinPartitionSize` should be > 0; current value: {min_partition_size}\"\n            )\n        return min_partition_size\n\n\nclass MinRowPartitionSize(EnvironmentVariable, type=int):\n    \"\"\"\n    Minimum number of rows in a single pandas partition split.\n\n    Once a partition for a pandas dataframe has more than this many elements,\n    Modin adds another partition.\n    \"\"\"\n\n    varname = \"MODIN_MIN_ROW_PARTITION_SIZE\"\n    default = 32\n\n    @classmethod\n    def put(cls, value: int) -> None:\n        \"\"\"\n        Set ``MinRowPartitionSize`` with extra checks.\n\n        Parameters\n        ----------\n        value : int\n            Config value to set.\n        \"\"\"\n        if value <= 0:\n            raise ValueError(\n                f\"Min row partition size should be > 0, passed value {value}\"\n            )\n        super().put(value)\n\n    @classmethod\n    def get(cls) -> int:\n        \"\"\"\n        Get ``MinRowPartitionSize`` with extra checks.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        min_row_partition_size = super().get()\n        if min_row_partition_size <= 0:\n            raise ValueError(\n                f\"`MinRowPartitionSize` should be > 0; current value: {min_row_partition_size}\"\n            )\n        return min_row_partition_size\n\n\nclass MinColumnPartitionSize(EnvironmentVariable, type=int):\n    \"\"\"\n    Minimum number of columns in a single pandas partition split.\n\n    Once a partition for a pandas dataframe has more than this many elements,\n    Modin adds another partition.\n    \"\"\"\n\n    varname = \"MODIN_MIN_COLUMN_PARTITION_SIZE\"\n    default = 32\n\n    @classmethod\n    def put(cls, value: int) -> None:\n        \"\"\"\n        Set ``MinColumnPartitionSize`` with extra checks.\n\n        Parameters\n        ----------\n        value : int\n            Config value to set.\n        \"\"\"\n        if value <= 0:\n            raise ValueError(\n                f\"Min column partition size should be > 0, passed value {value}\"\n            )\n        super().put(value)\n\n    @classmethod\n    def get(cls) -> int:\n        \"\"\"\n        Get ``MinColumnPartitionSize`` with extra checks.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        min_column_partition_size = super().get()\n        if min_column_partition_size <= 0:\n            raise ValueError(\n                f\"`MinColumnPartitionSize` should be > 0; current value: {min_column_partition_size}\"\n            )\n        return min_column_partition_size\n\n\nclass TestReadFromSqlServer(EnvironmentVariable, type=bool):\n    \"\"\"Set to true to test reading from SQL server.\"\"\"\n\n    varname = \"MODIN_TEST_READ_FROM_SQL_SERVER\"\n    default = False\n\n\nclass TestReadFromPostgres(EnvironmentVariable, type=bool):\n    \"\"\"Set to true to test reading from Postgres.\"\"\"\n\n    varname = \"MODIN_TEST_READ_FROM_POSTGRES\"\n    default = False\n\n\nclass GithubCI(EnvironmentVariable, type=bool):\n    \"\"\"Set to true when running Modin in GitHub CI.\"\"\"\n\n    varname = \"MODIN_GITHUB_CI\"\n    default = False\n\n\nclass ModinNumpy(EnvironmentVariable, type=bool):\n    \"\"\"Set to true to use Modin's implementation of NumPy API.\"\"\"\n\n    varname = \"MODIN_NUMPY\"\n    default = False\n\n\nclass RangePartitioning(EnvironmentVariable, type=bool):\n    \"\"\"\n    Set to true to use Modin's range-partitioning implementation where possible.\n\n    Please refer to documentation for cases where enabling this options would be beneficial:\n    https://modin.readthedocs.io/en/stable/flow/modin/experimental/range_partitioning_groupby.html\n    \"\"\"\n\n    varname = \"MODIN_RANGE_PARTITIONING\"\n    default = False\n\n\nclass CIAWSSecretAccessKey(EnvironmentVariable, type=str):\n    \"\"\"Set to AWS_SECRET_ACCESS_KEY when running mock S3 tests for Modin in GitHub CI.\"\"\"\n\n    varname = \"AWS_SECRET_ACCESS_KEY\"\n    default = \"foobar_secret\"\n\n\nclass CIAWSAccessKeyID(EnvironmentVariable, type=str):\n    \"\"\"Set to AWS_ACCESS_KEY_ID when running mock S3 tests for Modin in GitHub CI.\"\"\"\n\n    varname = \"AWS_ACCESS_KEY_ID\"\n    default = \"foobar_key\"\n\n\nclass AsyncReadMode(EnvironmentVariable, type=bool):\n    \"\"\"\n    It does not wait for the end of reading information from the source.\n\n    It basically means, that the reading function only launches tasks for the dataframe\n    to be read/created, but not ensures that the construction is finalized by the time\n    the reading function returns a dataframe.\n\n    This option was brought to improve performance of reading/construction\n    of Modin DataFrames, however it may also:\n\n    1. Increase the peak memory consumption. Since the garbage collection of the\n    temporary objects created during the reading is now also lazy and will only\n    be performed when the reading/construction is actually finished.\n\n    2. Can break situations when the source is manually deleted after the reading\n    function returns a result, for example, when reading inside of a context-block\n    that deletes the file on ``__exit__()``.\n    \"\"\"\n\n    varname = \"MODIN_ASYNC_READ_MODE\"\n    default = False\n\n\nclass ReadSqlEngine(EnvironmentVariable, type=str):\n    \"\"\"Engine to run `read_sql`.\"\"\"\n\n    varname = \"MODIN_READ_SQL_ENGINE\"\n    default = \"Pandas\"\n    choices = (\"Pandas\", \"Connectorx\")\n\n\nclass LazyExecution(EnvironmentVariable, type=str):\n    \"\"\"\n    Lazy execution mode.\n\n    Supported values:\n        `Auto` - the execution mode is chosen by the engine for each operation (default value).\n        `On`   - the lazy execution is performed wherever it's possible.\n        `Off`  - the lazy execution is disabled.\n    \"\"\"\n\n    varname = \"MODIN_LAZY_EXECUTION\"\n    choices = (\"Auto\", \"On\", \"Off\")\n    default = \"Auto\"\n\n\nclass DocModule(EnvironmentVariable, type=ExactStr):\n    \"\"\"\n    The module to use that will be used for docstrings.\n\n    The value set here must be a valid, importable module. It should have\n    a `DataFrame`, `Series`, and/or several APIs directly (e.g. `read_csv`).\n    \"\"\"\n\n    varname = \"MODIN_DOC_MODULE\"\n    default = \"pandas\"\n\n\nclass DaskThreadsPerWorker(EnvironmentVariable, type=int):\n    \"\"\"Number of threads per Dask worker.\"\"\"\n\n    varname = \"MODIN_DASK_THREADS_PER_WORKER\"\n    default = 1\n\n\nclass NativePandasMaxRows(EnvironmentVariable, type=int):\n    \"\"\"Maximum number of rows which can be processed using local, native, pandas.\"\"\"\n\n    varname = \"MODIN_NATIVE_MAX_ROWS\"\n    default = 10_000_000\n\n\nclass NativePandasTransferThreshold(EnvironmentVariable, type=int):\n    \"\"\"\n    Targeted max number of dataframe rows which should be transferred between engines.\n\n    This is often the same value as MODIN_NATIVE_MAX_ROWS but it can be independently\n    set to change how transfer costs are considered.\n    \"\"\"\n\n    varname = \"MODIN_NATIVE_MAX_XFER_ROWS\"\n    default = 10_000_000\n\n\nclass NativePandasDeepCopy(EnvironmentVariable, type=bool):\n    \"\"\"\n    Whether to perform deep copies when transferring data with the native pandas backend.\n\n    Copies occur when constructing a Modin frame from a native pandas object with\n    `pd.DataFrame(pandas.DataFrame([]))`, or when creating a native pandas frame from a Modin one\n    via `df.modin.to_pandas()`.\n\n    Leaving this flag disabled produces significant performance improvements by reducing the number\n    of copy operations performed. However, it may create unexpected results if the user mutates\n    the Modin frame or native pandas frame in-place.\n\n    >>> import pandas  # doctest: +SKIP\n    >>> import modin.pandas as pd  # doctest: +SKIP\n    >>> from modin.config import Backend  # doctest: + SKIP\n    >>> Backend.put(\"Pandas\")  # doctest: +SKIP\n    >>> pandas.set_option(\"mode.copy_on_write\", False)  # doctest: +SKIP\n    >>> native_df = pandas.DataFrame([0])  # doctest: +SKIP\n    >>> modin_df = pd.DataFrame(native_df)  # doctest: +SKIP\n    >>> native_df.loc[0, 0] = -1  # doctest: +SKIP\n    >>> modin_df  # doctest: +SKIP\n       0\n    0 -1\n    \"\"\"\n\n    varname = \"MODIN_NATIVE_DEEP_COPY\"\n    default = False\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable deep copy on frames with the native pandas backend.\"\"\"\n        cls.put(True)\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable deep copy on frames with the native pandas backend.\"\"\"\n        cls.put(False)\n\n\nclass BackendMergeCastInPlace(EnvironmentVariable, type=bool):\n    \"\"\"\n    Whether to cast a DataFrame in-place when performing a merge when using hybrid mode.\n\n    This flag modifies the behavior of a cast performed on operations involving more\n    than one type of query compiler. If enabled the actual cast will be performed in-place\n    and the input DataFrame will have a new backend. If disabled the original DataFrame\n    will remain on the same underlying engine.\n    \"\"\"\n\n    varname = \"MODIN_BACKEND_MERGE_CAST_IN_PLACE\"\n    default = True\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable casting in place when performing a merge operation betwen two different compilers.\"\"\"\n        cls.put(True)\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable casting in place when performing a merge operation betwen two different compilers.\"\"\"\n        cls.put(False)\n\n\nclass BackendJoinConsiderAllBackends(EnvironmentVariable, type=bool):\n    \"\"\"\n    Whether to consider all active backends when performing a pre-operation switch for join operations.\n\n    Only used when AutoSwitchBackend is active.\n    By default, only backends already present in the arguments of a join operation are considered when\n    switching backends. Enabling this flag will allow join operations that are registered\n    as pre-op switches to consider backends other than those directly present in the arguments.\n    \"\"\"\n\n    varname = \"MODIN_BACKEND_JOIN_CONSIDER_ALL_BACKENDS\"\n    default = True\n\n    @classmethod\n    def enable(cls) -> None:\n        \"\"\"Enable casting in place when performing a merge operation betwen two different compilers.\"\"\"\n        cls.put(True)\n\n    @classmethod\n    def disable(cls) -> None:\n        \"\"\"Disable casting in place when performing a merge operation betwen two different compilers.\"\"\"\n        cls.put(False)\n\n\nclass DynamicPartitioning(EnvironmentVariable, type=bool):\n    \"\"\"\n    Set to true to use Modin's dynamic-partitioning implementation where possible.\n\n    Please refer to documentation for cases where enabling this options would be beneficial:\n    https://modin.readthedocs.io/en/stable/usage_guide/optimization_notes/index.html#dynamic-partitioning-in-modin\n    \"\"\"\n\n    varname = \"MODIN_DYNAMIC_PARTITIONING\"\n    default = False\n\n\ndef _check_vars() -> None:\n    \"\"\"\n    Check validity of environment variables.\n\n    Look out for any environment variables that start with \"MODIN_\" prefix\n    that are unknown - they might be a typo, so warn a user.\n    \"\"\"\n    valid_names = {\n        obj.varname\n        for obj in globals().values()\n        if isinstance(obj, type)\n        and issubclass(obj, EnvironmentVariable)\n        and not obj.is_abstract\n    }\n    found_names = {name for name in os.environ if name.startswith(\"MODIN_\")}\n    unknown = found_names - valid_names\n    deprecated: dict[str, DeprecationDescriptor] = {\n        obj.varname: obj._deprecation_descriptor\n        for obj in globals().values()\n        if isinstance(obj, type)\n        and issubclass(obj, EnvironmentVariable)\n        and not obj.is_abstract\n        and obj.varname is not None\n        and obj._deprecation_descriptor is not None\n    }\n    found_deprecated = found_names & deprecated.keys()\n    if unknown:\n        warnings.warn(\n            f\"Found unknown environment variable{'s' if len(unknown) > 1 else ''},\"\n            + f\" please check {'their' if len(unknown) > 1 else 'its'} spelling: \"\n            + \", \".join(sorted(unknown))\n        )\n    for depr_var in found_deprecated:\n        warnings.warn(\n            deprecated[depr_var].deprecation_message(use_envvar_names=True),\n            FutureWarning,\n        )\n\n\n_check_vars()\n"
  },
  {
    "path": "modin/config/pubsub.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses ``Parameter`` class - base class for all configs.\"\"\"\n\nimport contextlib\nimport warnings\nfrom collections import defaultdict\nfrom enum import IntEnum\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Callable,\n    DefaultDict,\n    Iterator,\n    NamedTuple,\n    Optional,\n    Tuple,\n    cast,\n)\n\nif TYPE_CHECKING:\n    from modin.config.envvars import EnvironmentVariable\n\n\nclass DeprecationDescriptor:\n    \"\"\"\n    Describe deprecated parameter.\n\n    Parameters\n    ----------\n    parameter : type[Parameter]\n        Deprecated parameter.\n    new_parameter : type[Parameter], optional\n        If there's a replacement parameter for the deprecated one, specify it here.\n    when_removed : str, optional\n        If known, the exact release when the deprecated parameter is planned to be removed.\n    \"\"\"\n\n    _parameter: type[\"Parameter\"]\n    _new_parameter: Optional[type[\"Parameter\"]]\n    _when_removed: str\n\n    def __init__(\n        self,\n        parameter: type[\"Parameter\"],\n        new_parameter: Optional[type[\"Parameter\"]] = None,\n        when_removed: Optional[str] = None,\n    ):\n        self._parameter = parameter\n        self._new_parameter = new_parameter\n        self._when_removed = \"a future\" if when_removed is None else when_removed\n\n    def deprecation_message(self, use_envvar_names: bool = False) -> str:\n        \"\"\"\n        Generate a message to be used in a warning raised when using the deprecated parameter.\n\n        Parameters\n        ----------\n        use_envvar_names : bool, default: False\n            Whether to use environment variable names in the warning. If ``True``, both\n            ``self._parameter`` and ``self._new_parameter`` have to be a type of ``EnvironmentVariable``.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        name = (\n            cast(\"EnvironmentVariable\", self._parameter).varname\n            if use_envvar_names\n            else self._parameter.__name__\n        )\n        msg = f\"'{name}' is deprecated and will be removed in {self._when_removed} version.\"\n        if self._new_parameter is not None:\n            new_name = (\n                cast(\"EnvironmentVariable\", self._new_parameter).varname\n                if use_envvar_names\n                else self._new_parameter.__name__\n            )\n            msg += f\" Use '{new_name}' instead.\"\n        return msg\n\n\nclass TypeDescriptor(NamedTuple):\n    \"\"\"\n    Class for config data manipulating of exact type.\n\n    Parameters\n    ----------\n    decode : callable\n        Callable to decode config value from the raw data.\n    normalize : callable\n        Callable to bring different config value variations to\n        the single form.\n    verify : callable\n        Callable to check that config value satisfies given config\n        type requirements.\n    help : str\n        Class description string.\n    \"\"\"\n\n    decode: Callable[[str], object]\n    normalize: Callable[[object], object]\n    verify: Callable[[object], bool]\n    help: str\n\n\nclass ExactStr(str):\n    \"\"\"Class to be used in type params where no transformations are needed.\"\"\"\n\n\n_TYPE_PARAMS = {\n    str: TypeDescriptor(\n        decode=lambda value: value.strip().title(),\n        normalize=lambda value: str(value).strip().title(),\n        verify=lambda value: True,\n        help=\"a case-insensitive string\",\n    ),\n    ExactStr: TypeDescriptor(\n        decode=lambda value: value,\n        normalize=lambda value: value,\n        verify=lambda value: True,\n        help=\"a string\",\n    ),\n    bool: TypeDescriptor(\n        decode=lambda value: value.strip().lower() in {\"true\", \"yes\", \"1\"},\n        normalize=bool,\n        verify=lambda value: isinstance(value, bool)\n        or (\n            isinstance(value, str)\n            and value.strip().lower() in {\"true\", \"yes\", \"1\", \"false\", \"no\", \"0\"}\n        ),\n        help=\"a boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)\",\n    ),\n    int: TypeDescriptor(\n        decode=lambda value: int(value.strip()),\n        normalize=int,  # type: ignore\n        verify=lambda value: isinstance(value, int)\n        or (isinstance(value, str) and value.strip().isdigit()),\n        help=\"an integer value\",\n    ),\n    dict: TypeDescriptor(\n        decode=lambda value: {\n            key: int(val) if val.isdigit() else val\n            for key_value in value.split(\",\")\n            for key, val in [[v.strip() for v in key_value.split(\"=\", maxsplit=1)]]\n        },\n        normalize=lambda value: (\n            value\n            if isinstance(value, dict)\n            else {\n                key: int(val) if val.isdigit() else val\n                for key_value in str(value).split(\",\")\n                for key, val in [[v.strip() for v in key_value.split(\"=\", maxsplit=1)]]\n            }\n        ),\n        verify=lambda value: isinstance(value, dict)\n        or (\n            isinstance(value, str)\n            and all(\n                key_value.find(\"=\") not in (-1, len(key_value) - 1)\n                for key_value in value.split(\",\")\n            )\n        ),\n        help=\"a sequence of KEY=VALUE values separated by comma (Example: 'KEY1=VALUE1,KEY2=VALUE2,KEY3=VALUE3')\",\n    ),\n}\n\n# special marker to distinguish unset value from None value\n# as someone may want to use None as a real value for a parameter\n_UNSET = object()\n\n\nclass ValueSource(IntEnum):  # noqa: PR01\n    \"\"\"Class that describes the method of getting the value for a parameter.\"\"\"\n\n    # got from default, i.e. neither user nor configuration source had the value\n    DEFAULT = 0\n    # set by user\n    SET_BY_USER = 1\n    # got from parameter configuration source, like environment variable\n    GOT_FROM_CFG_SOURCE = 2\n\n\nclass Parameter(object):\n    \"\"\"\n    Base class describing interface for configuration entities.\n\n    Attributes\n    ----------\n    choices : Optional[Sequence[str]]\n        Array with possible options of ``Parameter`` values.\n    type : str\n        String that denotes ``Parameter`` type.\n    default : Optional[Any]\n        ``Parameter`` default value.\n    is_abstract : bool, default: True\n        Whether or not ``Parameter`` is abstract.\n    _value_source : Optional[ValueSource]\n        Source of the ``Parameter`` value, should be set by\n        ``ValueSource``.\n    _deprecation_descriptor : Optional[DeprecationDescriptor]\n        Indicate whether this parameter is deprecated.\n    \"\"\"\n\n    choices: Optional[Tuple[str, ...]] = None\n    type = str\n    default: Optional[Any] = None\n    is_abstract = True\n    _value_source: Optional[ValueSource] = None\n    _value: Any = _UNSET\n    _subs: list = []\n    _once: DefaultDict[Any, list] = defaultdict(list)\n    _deprecation_descriptor: Optional[DeprecationDescriptor] = None\n\n    @classmethod\n    def _warn_if_deprecated(cls) -> None:\n        \"\"\"Warn that the variable is deprecated if it has a deprecation descriptor.\"\"\"\n        if cls._deprecation_descriptor is not None:\n            warnings.warn(\n                cls._deprecation_descriptor.deprecation_message(), FutureWarning\n            )\n\n    @classmethod\n    def _get_value_from_config(cls) -> Any:\n        \"\"\"\n        Read the value from config storage.\n\n        Returns\n        -------\n        Any\n            Config raw value if it's set, otherwise `_UNSET`.\n\n        Notes\n        -----\n        Config storage can be config file or environment variable or whatever.\n        Method should be implemented in the child class.\n        \"\"\"\n        raise NotImplementedError()\n\n    @classmethod\n    def get_help(cls) -> str:\n        \"\"\"\n        Generate user-presentable help for the option.\n\n        Returns\n        -------\n        str\n\n        Notes\n        -----\n        Method should be implemented in the child class.\n        \"\"\"\n        raise NotImplementedError()\n\n    def __init_subclass__(cls, type: Any, abstract: bool = False, **kw: dict):\n        \"\"\"\n        Initialize subclass.\n\n        Parameters\n        ----------\n        type : Any\n            Type of the config.\n        abstract : bool, default: False\n            Whether config is abstract.\n        **kw : dict\n            Optional arguments for config initialization.\n        \"\"\"\n        assert type in _TYPE_PARAMS, f\"Unsupported variable type: {type}\"\n        cls.type = type\n        cls.is_abstract = abstract\n        cls._value = _UNSET\n        cls._subs = []\n        cls._once = defaultdict(list)\n        super().__init_subclass__(**kw)\n\n    @classmethod\n    def subscribe(cls, callback: Callable) -> None:\n        \"\"\"\n        Add `callback` to the `_subs` list and then execute it.\n\n        Parameters\n        ----------\n        callback : callable\n            Callable to execute.\n        \"\"\"\n        cls._subs.append(callback)\n        callback(cls)\n\n    @classmethod\n    def _get_default(cls) -> Any:\n        \"\"\"\n        Get default value of the config.\n\n        Returns\n        -------\n        Any\n        \"\"\"\n        return cls.default\n\n    @classmethod\n    def get_value_source(cls) -> ValueSource:\n        \"\"\"\n        Get value source of the config.\n\n        Returns\n        -------\n        ValueSource\n        \"\"\"\n        if cls._value_source is None:\n            # dummy call to .get() to initialize the value\n            cls.get()\n        assert (\n            cls._value_source is not None\n        ), \"_value_source must be initialized by now in get()\"\n        return cls._value_source\n\n    @classmethod\n    def get(cls) -> Any:\n        \"\"\"\n        Get config value.\n\n        Returns\n        -------\n        Any\n            Decoded and verified config value.\n        \"\"\"\n        cls._warn_if_deprecated()\n        if cls._value is _UNSET:\n            # get the value from env\n            config_value = cls._get_value_from_config()\n            if config_value is _UNSET:\n                cls._value = cls._get_default()\n                cls._value_source = ValueSource.DEFAULT\n            else:\n                cls._value = config_value\n                cls._value_source = ValueSource.GOT_FROM_CFG_SOURCE\n        return cls._value\n\n    @classmethod\n    def put(cls, value: Any) -> None:\n        \"\"\"\n        Set config value.\n\n        Parameters\n        ----------\n        value : Any\n            Config value to set.\n        \"\"\"\n        cls._warn_if_deprecated()\n        cls._check_callbacks(cls._put_nocallback(value))\n        cls._value_source = ValueSource.SET_BY_USER\n\n    @classmethod\n    def normalize(cls, value: Any) -> Any:\n        \"\"\"\n        Normalize config value.\n\n        Parameters\n        ----------\n        value : Any\n            Config value to normalize.\n\n        Returns\n        -------\n        Any\n            Normalized config value.\n        \"\"\"\n        return _TYPE_PARAMS[cls.type].normalize(value)\n\n    @classmethod\n    def once(cls, onvalue: Any, callback: Callable) -> None:\n        \"\"\"\n        Execute `callback` if config value matches `onvalue` value.\n\n        Otherwise accumulate callbacks associated with the given `onvalue`\n        in the `_once` container.\n\n        Parameters\n        ----------\n        onvalue : Any\n            Config value to set.\n        callback : callable\n            Callable that should be executed if config value matches `onvalue`.\n        \"\"\"\n        onvalue = cls.normalize(onvalue)\n        if onvalue == cls.get():\n            callback(cls)\n        else:\n            cls._once[onvalue].append(callback)\n\n    @classmethod\n    def _put_nocallback(cls, value: Any) -> Any:\n        \"\"\"\n        Set config value without executing callbacks.\n\n        Parameters\n        ----------\n        value : Any\n            Config value to set.\n\n        Returns\n        -------\n        Any\n            Replaced (old) config value.\n        \"\"\"\n        if not _TYPE_PARAMS[cls.type].verify(value):\n            raise ValueError(f\"Unsupported value: {value}\")\n        value = cls.normalize(value)\n        oldvalue, cls._value = cls.get(), value\n        return oldvalue\n\n    @classmethod\n    def _check_callbacks(cls, oldvalue: Any) -> None:\n        \"\"\"\n        Execute all needed callbacks if config value was changed.\n\n        Parameters\n        ----------\n        oldvalue : Any\n            Previous (old) config value.\n        \"\"\"\n        if oldvalue == cls.get():\n            return\n        for callback in cls._subs:\n            callback(cls)\n        for callback in cls._once.pop(cls.get(), ()):\n            callback(cls)\n\n    @classmethod\n    def add_option(cls, choice: Any) -> Any:\n        \"\"\"\n        Add a new choice for the parameter.\n\n        Parameters\n        ----------\n        choice : Any\n            New choice to add to the available choices.\n\n        Returns\n        -------\n        Any\n            Added choice normalized according to the parameter type.\n        \"\"\"\n        if cls.choices is not None:\n            if not _TYPE_PARAMS[cls.type].verify(choice):\n                raise ValueError(f\"Unsupported choice value: {choice}\")\n            choice = cls.normalize(choice)\n            if choice not in cls.choices:\n                cls.choices += (choice,)\n            return choice\n        raise TypeError(\"Cannot add a choice to a parameter where choices is None\")\n\n\n@contextlib.contextmanager\ndef context(**config: dict[str, Any]) -> Iterator[None]:\n    \"\"\"\n    Set a value(s) for the specified config(s) from ``modin.config`` in the scope of the context.\n\n    Parameters\n    ----------\n    **config : dict[str, Any]\n        Keyword describing a name of a config variable from ``modin.config`` as a key\n        and a new value as a value.\n\n    Examples\n    --------\n    >>> RangePartitioning.get()\n    False\n    >>> with context(RangePartitioning=True):\n    ...     print(RangePartitioning.get()) # True\n    True\n    False\n    >>> RangePartitioning.get()\n    False\n    >>> with context(RangePartitioning=True, AsyncReadMode=True):\n    ...     print(RangePartitioning.get()) # True\n    ...     print(AsyncReadMode.get()) # True\n    True\n    True\n    >>> RangePartitioning.get()\n    False\n    >>> AsyncReadMode.get()\n    False\n    \"\"\"\n    import modin.config as cfg\n\n    old_values = {}\n    for name, val in config.items():\n        var = getattr(cfg, name)\n        old_values[var] = var.get()\n        var.put(val)\n    try:\n        yield\n    finally:\n        for var, val in old_values.items():\n            var.put(val)\n\n\n__all__ = [\"Parameter\", \"context\"]\n"
  },
  {
    "path": "modin/conftest.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n# We turn off mypy type checks in this file because it's not imported anywhere\n# type: ignore\n\nimport copy\nimport logging\nimport os\nimport platform\nimport shutil\nimport subprocess\nimport sys\nimport time\nfrom collections import defaultdict\nfrom contextlib import contextmanager\nfrom typing import Iterable, Optional\n\nimport boto3\nimport numpy as np\nimport pandas\nimport pytest\nimport requests\nimport s3fs\nfrom pandas.util._decorators import doc\n\nfrom modin.config import Backend, Execution\n\nassert (\n    \"modin.utils\" not in sys.modules\n), \"Do not import modin.utils before patching, or tests could fail\"\n# every import under this assert has to be postfixed with 'noqa: E402'\n# as flake8 complains about that... but we _have_ to make sure we\n# monkey-patch at the right spot, otherwise testing doc URLs might\n# not catch all of them\nimport modin.utils  # noqa: E402\n\n_generated_doc_urls = set()\n\n\ndef _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):\n    url = _make_api_url(token)\n    _generated_doc_urls.add(url)\n    return url\n\n\nmodin.utils._make_api_url = _saving_make_api_url\n\nimport uuid  # noqa: E402\n\nimport modin  # noqa: E402\nimport modin.config  # noqa: E402\nimport modin.pandas as pd  # noqa: E402\nimport modin.tests.config  # noqa: E402\nfrom modin.config import (  # noqa: E402\n    AsyncReadMode,\n    BenchmarkMode,\n    GithubCI,\n    IsExperimental,\n    MinRowPartitionSize,\n    NPartitions,\n)\nfrom modin.core.execution.dispatching.factories import factories  # noqa: E402\nfrom modin.core.execution.python.implementations.pandas_on_python.io import (  # noqa: E402\n    PandasOnPythonIO,\n)\nfrom modin.core.storage_formats import (  # noqa: E402\n    BaseQueryCompiler,\n    PandasQueryCompiler,\n)\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (  # noqa: E402\n    _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS,\n    _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS,\n    _GENERAL_EXTENSIONS,\n)\nfrom modin.tests.pandas.utils import (  # noqa: E402\n    NROWS,\n    _make_csv_file,\n    get_unique_filename,\n    make_default_file,\n)\n\n\ndef pytest_addoption(parser):\n    parser.addoption(\n        \"--execution\",\n        action=\"store\",\n        default=None,\n        help=\"specifies execution to run tests on\",\n    )\n\n\ndef set_experimental_env(mode):\n    IsExperimental.put(mode == \"experimental\")\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef enforce_config():\n    \"\"\"\n    A fixture that ensures that all checks for MODIN_* variables\n    are done using modin.config to prevent leakage\n    \"\"\"\n    orig_env = os.environ\n    modin_start = os.path.dirname(modin.__file__)\n    modin_exclude = [\n        os.path.dirname(modin.config.__file__),\n        os.path.dirname(modin.tests.config.__file__),\n    ]\n\n    class PatchedEnv:\n        @staticmethod\n        def __check_var(name):\n            if name.upper().startswith(\"MODIN_\"):\n                frame = sys._getframe()\n                try:\n                    # get the path to module where caller of caller is defined;\n                    # caller of this function is inside PatchedEnv, and we're\n                    # interested in whomever called a method on PatchedEnv\n                    caller_file = frame.f_back.f_back.f_code.co_filename\n                finally:\n                    del frame\n                pkg_name = os.path.dirname(caller_file)\n                if pkg_name.startswith(modin_start):\n                    assert any(\n                        pkg_name.startswith(excl) for excl in modin_exclude\n                    ), \"Do not access MODIN_ environment variable bypassing modin.config\"\n\n        def __getitem__(self, name):\n            self.__check_var(name)\n            return orig_env[name]\n\n        def __setitem__(self, name, value):\n            self.__check_var(name)\n            orig_env[name] = value\n\n        def __delitem__(self, name):\n            self.__check_var(name)\n            del orig_env[name]\n\n        def pop(self, name, default=object()):\n            self.__check_var(name)\n            return orig_env.pop(name, default)\n\n        def get(self, name, default=None):\n            self.__check_var(name)\n            return orig_env.get(name, default)\n\n        def __contains__(self, name):\n            self.__check_var(name)\n            return name in orig_env\n\n        def __getattr__(self, name):\n            return getattr(orig_env, name)\n\n        def __iter__(self):\n            return iter(orig_env)\n\n    os.environ = PatchedEnv()\n    yield\n    os.environ = orig_env\n\n\nBASE_EXECUTION_NAME = \"BaseOnPython\"\n\n\nclass TestQC(BaseQueryCompiler):\n    def __init__(self, modin_frame):\n        self._modin_frame = modin_frame\n\n    storage_format = property(\n        lambda self: \"Base\", doc=BaseQueryCompiler.storage_format.__doc__\n    )\n    engine = property(lambda self: \"Python\", doc=BaseQueryCompiler.engine.__doc__)\n\n    def finalize(self):\n        self._modin_frame.finalize()\n\n    def execute(self):\n        self.finalize()\n        self._modin_frame.wait_computations()\n\n    @classmethod\n    def from_pandas(cls, df, data_cls):\n        return cls(data_cls.from_pandas(df))\n\n    @classmethod\n    def from_arrow(cls, at, data_cls):\n        return cls(data_cls.from_arrow(at))\n\n    def free(self):\n        pass\n\n    def to_interchange_dataframe(\n        self, nan_as_null: bool = False, allow_copy: bool = True\n    ):\n        raise NotImplementedError(\n            \"The selected execution does not implement the DataFrame exchange protocol.\"\n        )\n\n    @classmethod\n    def from_interchange_dataframe(cls, df, data_cls):\n        raise NotImplementedError(\n            \"The selected execution does not implement the DataFrame exchange protocol.\"\n        )\n\n    to_pandas = PandasQueryCompiler.to_pandas\n    default_to_pandas = PandasQueryCompiler.default_to_pandas\n\n\nclass BaseOnPythonIO(PandasOnPythonIO):\n    query_compiler_cls = TestQC\n\n\nclass BaseOnPythonFactory(factories.BaseFactory):\n    @classmethod\n    def prepare(cls):\n        cls.io_cls = BaseOnPythonIO\n\n\ndef set_base_execution(name=BASE_EXECUTION_NAME):\n    setattr(factories, f\"{name}Factory\", BaseOnPythonFactory)\n    Backend.register_backend(\n        \"BaseOnPython\",\n        Execution(\n            engine=\"Python\",\n            storage_format=\"Base\",\n        ),\n    )\n    modin.set_execution(engine=\"python\", storage_format=name.split(\"On\")[0])\n\n\n@pytest.fixture(scope=\"function\")\ndef get_unique_base_execution():\n    \"\"\"Setup unique execution for a single function and yield its QueryCompiler that's suitable for inplace modifications.\"\"\"\n    # It's better to use decimal IDs rather than hex ones due to factory names formatting\n    execution_id = int(uuid.uuid4().hex, 16)\n    format_name = f\"Base{execution_id}\"\n    engine_name = \"Python\"\n    execution_name = f\"{format_name}On{engine_name}\"\n\n    # Dynamically building all the required classes to form a new execution\n    base_qc = type(\n        format_name, (TestQC,), {\"get_backend\": (lambda self: execution_name)}\n    )\n    base_io = type(\n        f\"{execution_name}IO\", (BaseOnPythonIO,), {\"query_compiler_cls\": base_qc}\n    )\n    base_factory = type(\n        f\"{execution_name}Factory\",\n        (BaseOnPythonFactory,),\n        {\"prepare\": classmethod(lambda cls: setattr(cls, \"io_cls\", base_io))},\n    )\n\n    # Setting up the new execution\n    setattr(factories, f\"{execution_name}Factory\", base_factory)\n    Backend.register_backend(\n        execution_name, Execution(engine=engine_name, storage_format=format_name)\n    )\n    old_engine, old_format = modin.set_execution(\n        engine=engine_name, storage_format=format_name\n    )\n    yield base_qc\n\n    # Teardown the new execution\n    modin.set_execution(engine=old_engine, storage_format=old_format)\n    try:\n        delattr(factories, f\"{execution_name}Factory\")\n    except AttributeError:\n        pass\n\n\ndef pytest_configure(config):\n    execution = config.option.execution\n\n    if execution is None:\n        return\n\n    if execution == BASE_EXECUTION_NAME:\n        set_base_execution(BASE_EXECUTION_NAME)\n        config.addinivalue_line(\n            \"filterwarnings\", \"default:.*defaulting to pandas.*:UserWarning\"\n        )\n    else:\n        partition, engine = execution.split(\"On\")\n        modin.set_execution(engine=engine, storage_format=partition)\n\n\ndef pytest_runtest_call(item):\n    custom_markers = [\"xfail\", \"skip\"]\n\n    # dynamicly adding custom markers to tests\n    for custom_marker in custom_markers:\n        for marker in item.iter_markers(name=f\"{custom_marker}_executions\"):\n            executions = marker.args[0]\n            if not isinstance(executions, list):\n                executions = [executions]\n\n            current_execution = modin.utils.get_current_execution()\n            reason = marker.kwargs.pop(\"reason\", \"\")\n\n            item.add_marker(\n                getattr(pytest.mark, custom_marker)(\n                    condition=current_execution in executions,\n                    reason=f\"Execution {current_execution} does not pass this test. {reason}\",\n                    **marker.kwargs,\n                )\n            )\n\n\n_doc_pytest_fixture = \"\"\"\nPytest fixture factory that makes temp {file_type} files for testing.\n\nYields:\n    Function that generates {file_type} files\n\"\"\"\n\n\n@pytest.fixture(scope=\"class\")\ndef TestReadCSVFixture(tmp_path_factory):\n    tmp_path = tmp_path_factory.mktemp(\"TestReadCSVFixture\")\n\n    creator = _make_csv_file(data_dir=tmp_path)\n    # each xdist worker spawned in separate process with separate namespace and dataset\n    pytest.csvs_names = {}\n    # test_read_csv_col_handling, test_read_csv_parsing\n    pytest.csvs_names[\"test_read_csv_regular\"] = creator()\n    # test_read_csv_parsing\n    pytest.csvs_names[\"test_read_csv_yes_no\"] = creator(\n        additional_col_values=[\"Yes\", \"true\", \"No\", \"false\"],\n    )\n    # test_read_csv_col_handling\n    pytest.csvs_names[\"test_read_csv_blank_lines\"] = creator(\n        add_blank_lines=True,\n    )\n    # test_read_csv_nans_handling\n    pytest.csvs_names[\"test_read_csv_nans\"] = creator(\n        add_blank_lines=True,\n        additional_col_values=[\"<NA>\", \"N/A\", \"NA\", \"NULL\", \"custom_nan\", \"73\"],\n    )\n    # test_read_csv_error_handling\n    pytest.csvs_names[\"test_read_csv_bad_lines\"] = creator(\n        add_bad_lines=True,\n    )\n    yield\n\n\n@pytest.fixture\n@doc(_doc_pytest_fixture, file_type=\"csv\")\ndef make_csv_file(tmp_path):\n    yield _make_csv_file(data_dir=tmp_path)\n\n\ndef create_fixture(file_type):\n    @doc(_doc_pytest_fixture, file_type=file_type)\n    def fixture(tmp_path):\n        yield make_default_file(file_type=file_type, data_dir=tmp_path)\n\n    return fixture\n\n\nfor file_type in (\"json\", \"html\", \"excel\", \"feather\", \"stata\", \"hdf\", \"pickle\", \"fwf\"):\n    fixture = create_fixture(file_type)\n    fixture.__name__ = f\"make_{file_type}_file\"\n    globals()[fixture.__name__] = pytest.fixture(fixture)\n\n\n@pytest.fixture\ndef make_parquet_file():\n    \"\"\"Pytest fixture factory that makes a parquet file/dir for testing.\n\n    Yields:\n        Function that generates a parquet file/dir\n    \"\"\"\n    filenames = []\n\n    def _make_parquet_file(\n        filename,\n        nrows=NROWS,\n        ncols=2,\n        force=True,\n        range_index_start=0,\n        range_index_step=1,\n        range_index_name=None,\n        partitioned_columns=[],\n        row_group_size: Optional[int] = None,\n    ):\n        \"\"\"Helper function to generate parquet files/directories.\n\n        Args:\n            filename: The name of test file, that should be created.\n            nrows: Number of rows for the dataframe.\n            ncols: Number of cols for the dataframe.\n            force: Create a new file/directory even if one already exists.\n            partitioned_columns: Create a partitioned directory using pandas.\n            row_group_size: Maximum size of each row group.\n        \"\"\"\n        if force or not os.path.exists(filename):\n            df = pandas.DataFrame(\n                {f\"col{x + 1}\": np.arange(nrows) for x in range(ncols)}\n            )\n            index = pandas.RangeIndex(\n                start=range_index_start,\n                stop=range_index_start + (nrows * range_index_step),\n                step=range_index_step,\n                name=range_index_name,\n            )\n            if (\n                range_index_start == 0\n                and range_index_step == 1\n                and range_index_name is None\n            ):\n                assert df.index.equals(index)\n            else:\n                df.index = index\n            if len(partitioned_columns) > 0:\n                df.to_parquet(\n                    filename,\n                    partition_cols=partitioned_columns,\n                    row_group_size=row_group_size,\n                )\n            else:\n                df.to_parquet(filename, row_group_size=row_group_size)\n            filenames.append(filename)\n\n    # Return function that generates parquet files\n    yield _make_parquet_file\n\n    # Delete parquet file that was created\n    for path in filenames:\n        if os.path.exists(path):\n            if os.path.isdir(path):\n                shutil.rmtree(path)\n            else:\n                os.remove(path)\n\n\n@pytest.fixture\ndef make_sql_connection():\n    \"\"\"Sets up sql connections and takes them down after the caller is done.\n\n    Yields:\n        Factory that generates sql connection objects\n    \"\"\"\n\n    def _sql_connection(filename, table=\"\"):\n        # Remove file if exists\n        if os.path.exists(filename):\n            os.remove(filename)\n        # Create connection and, if needed, table\n        conn = \"sqlite:///{}\".format(filename)\n        if table:\n            df = pandas.DataFrame(\n                {\n                    \"col1\": [0, 1, 2, 3, 4, 5, 6],\n                    \"col2\": [7, 8, 9, 10, 11, 12, 13],\n                    \"col3\": [14, 15, 16, 17, 18, 19, 20],\n                    \"col4\": [21, 22, 23, 24, 25, 26, 27],\n                    \"col5\": [0, 0, 0, 0, 0, 0, 0],\n                }\n            )\n            df.to_sql(table, conn)\n        return conn\n\n    yield _sql_connection\n\n\n@pytest.fixture(scope=\"class\")\ndef TestReadGlobCSVFixture(tmp_path_factory):\n    tmp_path = tmp_path_factory.mktemp(\"TestReadGlobCSVFixture\")\n\n    base_name = get_unique_filename(extension=\"\")\n    pytest.glob_path = str(tmp_path / \"{}_*.csv\".format(base_name))\n    pytest.files = [str(tmp_path / \"{}_{}.csv\".format(base_name, i)) for i in range(11)]\n    for fname in pytest.files:\n        # Glob does not guarantee ordering so we have to remove the randomness in the generated csvs.\n        _make_csv_file(data_dir=tmp_path)(fname, row_size=11, remove_randomness=True)\n\n    yield\n\n\n@pytest.fixture\ndef get_generated_doc_urls():\n    return lambda: _generated_doc_urls\n\n\n@pytest.fixture\ndef set_num_partitions(request):\n    old_num_partitions = NPartitions.get()\n    NPartitions.put(request.param)\n    yield\n    NPartitions.put(old_num_partitions)\n\n\n@pytest.fixture()\ndef set_benchmark_mode(request):\n    old_benchmark_mode = BenchmarkMode.get()\n    BenchmarkMode.put(request.param)\n    yield\n    BenchmarkMode.put(old_benchmark_mode)\n\n\n@pytest.fixture\ndef set_async_read_mode(request):\n    old_async_read_mode = AsyncReadMode.get()\n    AsyncReadMode.put(request.param)\n    yield\n    AsyncReadMode.put(old_async_read_mode)\n\n\n@pytest.fixture\ndef set_min_row_partition_size(request):\n    old_min_row_partition_size = MinRowPartitionSize.get()\n    MinRowPartitionSize.put(request.param)\n    yield\n    MinRowPartitionSize.put(old_min_row_partition_size)\n\n\nray_client_server = None\n\n\n@pytest.fixture\ndef s3_storage_options(worker_id):\n    # # copied from pandas conftest.py:\n    # https://github.com/pandas-dev/pandas/blob/32f789fbc5d5a72d9d1ac14935635289eeac9009/pandas/tests/io/conftest.py#L45\n    # worker_id is a pytest fixture\n    if GithubCI.get():\n        url = \"http://localhost:5000/\"\n    else:\n        # If we hit this else-case, this test is being run locally. In that case, we want\n        # each worker to point to a different port for its mock S3 service. The easiest way\n        # to do that is to use the `worker_id`, which is unique, to determine what port to point\n        # to. We arbitrarily assign `5` as a worker id to the master worker, since we need a number\n        # for each worker, and we never run tests with more than `pytest -n 4`.\n        worker_id = \"5\" if worker_id == \"master\" else worker_id.lstrip(\"gw\")\n        url = f\"http://127.0.0.1:555{worker_id}/\"\n    return {\"client_kwargs\": {\"endpoint_url\": url}}\n\n\n@pytest.fixture(scope=\"session\")\ndef monkeysession():\n    with pytest.MonkeyPatch.context() as mp:\n        yield mp\n\n\n@pytest.fixture(scope=\"session\")\ndef s3_base(worker_id, monkeysession):\n    \"\"\"\n    Fixture for mocking S3 interaction.\n\n    Sets up moto server in separate process locally.\n\n    Yields\n    ------\n    str\n        URL for motoserver/moto CI service.\n    \"\"\"\n    # copied from pandas conftest.py\n    # still need access keys for https://github.com/getmoto/moto/issues/1924\n    monkeysession.setenv(\"AWS_ACCESS_KEY_ID\", \"foobar_key\")\n    monkeysession.setenv(\"AWS_SECRET_ACCESS_KEY\", \"foobar_secret\")\n    monkeysession.setenv(\"AWS_REGION\", \"us-west-2\")\n    if GithubCI.get():\n        if sys.platform in (\"darwin\", \"win32\", \"cygwin\") or (\n            platform.machine() in (\"arm64\", \"aarch64\")\n            or platform.machine().startswith(\"armv\")\n        ):\n            # pandas comments say:\n            # DO NOT RUN on Windows/macOS/ARM, only Ubuntu\n            # - subprocess in CI can cause timeouts\n            # - GitHub Actions do not support\n            #   container services for the above OSs\n            pytest.skip(\n                \"S3 tests do not have a corresponding service in Windows, macOS \"\n                + \"or ARM platforms\"\n            )\n        else:\n            # assume CI has started moto in docker container:\n            # https://docs.getmoto.org/en/latest/docs/server_mode.html#run-using-docker\n            # It would be nice to start moto on another thread as in the\n            # instructions here:\n            # https://docs.getmoto.org/en/latest/docs/server_mode.html#start-within-python\n            # but that gives 403 forbidden error when we try to create the bucket\n            yield \"http://localhost:5000\"\n    else:\n        # Launching moto in server mode, i.e., as a separate process\n        # with an S3 endpoint on localhost\n\n        # If we hit this else-case, this test is being run locally. In that case, we want\n        # each worker to point to a different port for its mock S3 service. The easiest way\n        # to do that is to use the `worker_id`, which is unique, to determine what port to point\n        # to.\n        endpoint_port = (\n            5500 if worker_id == \"master\" else (5550 + int(worker_id.lstrip(\"gw\")))\n        )\n        endpoint_uri = f\"http://127.0.0.1:{endpoint_port}/\"\n\n        # pipe to null to avoid logging in terminal\n        # TODO any way to throw the error from here? e.g. i had an annoying problem\n        # where I didn't have flask-cors and moto just failed .if there's an error\n        # in the popen command and we throw an error within the body of the context\n        # manager, the test just hangs forever.\n        with subprocess.Popen(\n            [\"moto_server\", \"s3\", \"-p\", str(endpoint_port)],\n            stdout=subprocess.DEVNULL,\n            stderr=subprocess.PIPE,\n        ) as proc:\n            for _ in range(50):\n                try:\n                    # OK to go once server is accepting connections\n                    if requests.get(endpoint_uri).ok:\n                        break\n                except Exception:\n                    # try again while we still have retries\n                    time.sleep(0.1)\n            else:\n                proc.terminate()\n                _, errs = proc.communicate()\n                raise RuntimeError(\n                    \"Could not connect to moto server after 50 tries. \"\n                    + f\"See stderr for extra info: {errs}\"\n                )\n            yield endpoint_uri\n\n            proc.terminate()\n\n\n@pytest.fixture\ndef s3_resource(s3_base):\n    \"\"\"\n    Set up S3 bucket with contents. The primary bucket name is \"modin-test\".\n\n    When running locally, this function should be safe even if there are multiple pytest\n    workers running in parallel because each worker gets its own endpoint. When running\n    in CI, we use a single endpoint for all workers, so we can't have multiple pytest\n    workers running in parallel.\n    \"\"\"\n    bucket = \"modin-test\"\n    conn = boto3.resource(\"s3\", endpoint_url=s3_base)\n    cli = boto3.client(\"s3\", endpoint_url=s3_base)\n\n    # https://github.com/getmoto/moto/issues/3292\n    # without location, I get\n    # botocore.exceptions.ClientError: An error occurred\n    # (IllegalLocationConstraintException) when calling the CreateBucket operation:\n    # The unspecified location constraint is incompatible for the region specific\n    # endpoint this request was sent to.\n    # even if I delete os.environ['AWS_REGION'] but somehow pandas can get away with\n    # this.\n    try:\n        cli.create_bucket(\n            Bucket=bucket, CreateBucketConfiguration={\"LocationConstraint\": \"us-west-2\"}\n        )\n    except Exception as e:\n        # OK if bucket already exists, but want to raise other exceptions.\n        # The exception raised by `create_bucket` is made using a factory,\n        # so we need to check using this method of reading the response rather\n        # than just checking the type of the exception.\n        response = getattr(e, \"response\", {})\n        error_code = response.get(\"Error\", {}).get(\"Code\", \"\")\n        if error_code not in (\"BucketAlreadyOwnedByYou\", \"BucketAlreadyExists\"):\n            raise\n    for _ in range(20):\n        # We want to wait until bucket creation is finished.\n        if cli.list_buckets()[\"Buckets\"]:\n            break\n        time.sleep(0.1)\n    if not cli.list_buckets()[\"Buckets\"]:\n        raise RuntimeError(\"Could not create bucket\")\n\n    s3fs.S3FileSystem.clear_instance_cache()\n\n    s3 = s3fs.S3FileSystem(client_kwargs={\"endpoint_url\": s3_base})\n\n    test_s3_files = [\n        (\"modin-bugs/multiple_csv/\", \"modin/tests/pandas/data/multiple_csv/\"),\n        (\n            \"modin-bugs/test_data_dir.parquet/\",\n            \"modin/tests/pandas/data/test_data_dir.parquet/\",\n        ),\n        (\"modin-bugs/test_data.parquet\", \"modin/tests/pandas/data/test_data.parquet\"),\n        (\"modin-bugs/test_data.json\", \"modin/tests/pandas/data/test_data.json\"),\n        (\"modin-bugs/test_data.fwf\", \"modin/tests/pandas/data/test_data.fwf\"),\n        (\"modin-bugs/test_data.feather\", \"modin/tests/pandas/data/test_data.feather\"),\n        (\"modin-bugs/issue5159.parquet/\", \"modin/tests/pandas/data/issue5159.parquet/\"),\n    ]\n    for s3_key, file_name in test_s3_files:\n        s3.put(file_name, f\"{bucket}/{s3_key}\", recursive=s3_key.endswith(\"/\"))\n\n    yield conn\n\n    s3.rm(bucket, recursive=True)\n    for _ in range(20):\n        # We want to wait until the deletion finishes.\n        if not cli.list_buckets()[\"Buckets\"]:\n            break\n        time.sleep(0.1)\n\n\n@pytest.fixture\ndef modify_config(request):\n    values = request.param\n    old_values = {}\n\n    for key, value in values.items():\n        old_values[key] = key.get()\n        key.put(value)\n\n    yield  # waiting for the test to be completed\n    # restoring old parameters\n    for key, value in old_values.items():\n        try:\n            key.put(value)\n        except ValueError as e:\n            # sometimes bool env variables have 'None' as a default value, which\n            # causes a ValueError when we try to set this value back, as technically,\n            # only bool values are allowed (and 'None' is not a bool), in this case\n            # we try to set 'False' instead\n            if key.type == bool and value is None:\n                key.put(False)\n            else:\n                raise e\n\n\n@contextmanager\ndef copy_and_restore(\n    dicts: Iterable[defaultdict],\n) -> None:\n    \"\"\"\n    Make deep copies of defaultdicts and restore them upon exiting this context.\n\n    Ideally this function would be a fixture, but we want to pass it parameters\n    and use it in other fixtures, and it does not seem to be possible to pass\n    parameters from one fixture to another.\n\n    Parameters\n    ----------\n    dicts : Iterable[defaultdict]\n        The dicts to copy and restore.\n    \"\"\"\n    try:\n        # Use a tuples of tuples instead of a dict mapping each original dict\n        # to its copy, because the original dict is not hashable.\n        original_dict_to_copy = tuple(\n            (original_dict, copy.deepcopy(original_dict)) for original_dict in dicts\n        )\n        yield\n    finally:\n        for original_dict, dict_copy in original_dict_to_copy:\n            original_dict.clear()\n            original_dict.update(dict_copy)\n\n\n@pytest.fixture(autouse=True)\ndef clean_up_extensions():\n\n    with copy_and_restore(\n        (\n            pd.dataframe.DataFrame._extensions,\n            pd.Series._extensions,\n            pd.base.BasePandasDataset._extensions,\n            _GENERAL_EXTENSIONS,\n            pd.groupby.DataFrameGroupBy._extensions,\n            pd.groupby.SeriesGroupBy._extensions,\n        )\n    ):\n        yield\n\n    from modin.pandas.api.extensions.extensions import _attrs_to_delete_on_test\n\n    for k, v in _attrs_to_delete_on_test.items():\n        for obj in v:\n            delattr(k, obj)\n    _attrs_to_delete_on_test.clear()\n\n\n@pytest.fixture(autouse=True)\ndef clean_up_auto_backend_switching():\n\n    with copy_and_restore(\n        (\n            _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS,\n            _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS,\n        )\n    ):\n        yield\n\n\n@pytest.fixture(autouse=True)\ndef assert_no_root_logging(caplog):\n    try:\n        import xgboost\n    except ImportError:\n        xgboost_path = None\n    else:\n        xgboost_path = os.path.dirname(xgboost.__file__)\n    root_logger = logging.getLogger()\n    # Capture logs at any level, i.e. at level >= logging.NOTSET.\n    with caplog.at_level(logging.NOTSET):\n        yield\n    # Note that because this code is in a fixture, we have to use\n    # caplog.get_records(when=\"call\") instead of caplog.records:\n    # https://github.com/pytest-dev/pytest/issues/4033\n    assert not any(\n        record.name == root_logger.name\n        # Allow xgboost to log to root.\n        # TODO(https://github.com/modin-project/modin/issues/5194): Check\n        # whether we can remove this exception once we use a newer version of\n        # xgboost.\n        and not (xgboost_path is not None and record.pathname.startswith(xgboost_path))\n        for record in caplog.get_records(when=\"call\")\n    )\n"
  },
  {
    "path": "modin/core/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's core functionality.\"\"\"\n"
  },
  {
    "path": "modin/core/computation/__init__.py",
    "content": ""
  },
  {
    "path": "modin/core/computation/align.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nCore eval alignment algorithms. Forked from pandas.core.computation.align\n\"\"\"\n\nfrom __future__ import annotations\n\nimport warnings\nfrom collections.abc import Sequence\nfrom functools import (\n    partial,\n    wraps,\n)\nfrom typing import (\n    Callable,\n)\n\nimport numpy as np\nimport pandas\nimport pandas.core.common as com\nfrom pandas._typing import F\nfrom pandas.core.base import PandasObject\nfrom pandas.errors import PerformanceWarning\n\nfrom modin.core.computation.common import result_type_many\nfrom modin.pandas import DataFrame, Series\nfrom modin.pandas.base import BasePandasDataset\n\n\ndef _align_core_single_unary_op(\n    term,\n) -> tuple[partial | type[BasePandasDataset], dict[str, pandas.Index] | None]:\n    typ: partial | type[BasePandasDataset]\n    axes: dict[str, pandas.Index] | None = None\n\n    if isinstance(term.value, np.ndarray):\n        typ = partial(np.asanyarray, dtype=term.value.dtype)\n    else:\n        typ = type(term.value)\n        if hasattr(term.value, \"axes\"):\n            axes = _zip_axes_from_type(typ, term.value.axes)\n\n    return typ, axes\n\n\ndef _zip_axes_from_type(\n    typ: type[BasePandasDataset], new_axes: Sequence[pandas.Index]\n) -> dict[str, pandas.Index]:\n    return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)}\n\n\ndef _any_pandas_objects(terms) -> bool:\n    \"\"\"\n    Check a sequence of terms for instances of PandasObject.\n    \"\"\"\n    return any(isinstance(term.value, PandasObject) for term in terms)\n\n\ndef _filter_special_cases(f) -> Callable[[F], F]:\n    @wraps(f)\n    def wrapper(terms):\n        # single unary operand\n        if len(terms) == 1:\n            return _align_core_single_unary_op(terms[0])\n\n        term_values = (term.value for term in terms)\n\n        # we don't have any pandas objects\n        if not _any_pandas_objects(terms):\n            return result_type_many(*term_values), None\n\n        return f(terms)\n\n    return wrapper\n\n\n@_filter_special_cases\ndef _align_core(terms):\n    term_index = [i for i, term in enumerate(terms) if hasattr(term.value, \"axes\")]\n    term_dims = [terms[i].value.ndim for i in term_index]\n\n    ndims = pandas.Series(dict(zip(term_index, term_dims)))\n\n    # initial axes are the axes of the largest-axis'd term\n    biggest = terms[ndims.idxmax()].value\n    typ = biggest._constructor\n    axes = biggest.axes\n    naxes = len(axes)\n    gt_than_one_axis = naxes > 1\n\n    for value in (terms[i].value for i in term_index):\n        is_series = isinstance(value, Series)\n        is_series_and_gt_one_axis = is_series and gt_than_one_axis\n\n        for axis, items in enumerate(value.axes):\n            if is_series_and_gt_one_axis:\n                ax, itm = naxes - 1, value.index\n            else:\n                ax, itm = axis, items\n\n            if not axes[ax].is_(itm):\n                axes[ax] = axes[ax].union(itm)\n\n    for i, ndim in ndims.items():\n        for axis, items in zip(range(ndim), axes):\n            ti = terms[i].value\n\n            if hasattr(ti, \"reindex\"):\n                transpose = isinstance(ti, Series) and naxes > 1\n                reindexer = axes[naxes - 1] if transpose else items\n\n                term_axis_size = len(ti.axes[axis])\n                reindexer_size = len(reindexer)\n\n                ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))\n                if ordm >= 1 and reindexer_size >= 10000:\n                    w = (\n                        f\"Alignment difference on axis {axis} is larger \"\n                        + f\"than an order of magnitude on term {repr(terms[i].name)}, \"\n                        + f\"by more than {ordm:.4g}; performance may suffer.\"\n                    )\n                    warnings.warn(w, category=PerformanceWarning)\n\n                obj = ti.reindex(reindexer, axis=axis, copy=False)\n                terms[i].update(obj)\n\n        terms[i].update(terms[i].value.values)\n\n    return typ, _zip_axes_from_type(typ, axes)\n\n\ndef align_terms(terms):\n    \"\"\"\n    Align a set of terms.\n    \"\"\"\n    try:\n        # flatten the parse tree (a nested list, really)\n        terms = list(com.flatten(terms))\n    except TypeError:\n        # can't iterate so it must just be a constant or single variable\n        if isinstance(terms.value, (Series, DataFrame)):\n            typ = type(terms.value)\n            return typ, _zip_axes_from_type(typ, terms.value.axes)\n        return np.result_type(terms.type), None\n\n    # if all resolved variables are numeric scalars\n    if all(term.is_scalar for term in terms):\n        return result_type_many(*(term.value for term in terms)).type, None\n\n    # perform the main alignment\n    typ, axes = _align_core(terms)\n    return typ, axes\n\n\ndef reconstruct_object(typ, obj, axes, dtype):\n    \"\"\"\n    Reconstruct an object given its type, raw value, and possibly empty\n    (None) axes.\n\n    Parameters\n    ----------\n    typ : object\n        A type\n    obj : object\n        The value to use in the type constructor\n    axes : dict\n        The axes to use to construct the resulting pandas object\n\n    Returns\n    -------\n    ret : typ\n        An object of type ``typ`` with the value `obj` and possible axes\n        `axes`.\n    \"\"\"\n    try:\n        typ = typ.type\n    except AttributeError:\n        pass\n\n    res_t = np.result_type(obj.dtype, dtype)\n\n    if not isinstance(typ, partial) and issubclass(typ, PandasObject):\n        return typ(obj, dtype=res_t, **axes)\n\n    # special case for pathological things like ~True/~False\n    if hasattr(res_t, \"type\") and typ == np.bool_ and res_t != np.bool_:\n        ret_value = res_t.type(obj)\n    else:\n        ret_value = typ(obj).astype(res_t)\n        # The condition is to distinguish 0-dim array (returned in case of\n        # scalar) and 1 element array\n        # e.g. np.array(0) and np.array([0])\n        if (\n            len(obj.shape) == 1\n            and len(obj) == 1\n            and not isinstance(ret_value, np.ndarray)\n        ):\n            ret_value = np.array([ret_value]).astype(res_t)\n\n    return ret_value\n"
  },
  {
    "path": "modin/core/computation/check.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nForked from pandas.core.computation.check\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom pandas.compat._optional import import_optional_dependency\n\nne = import_optional_dependency(\"numexpr\", errors=\"warn\")\nNUMEXPR_INSTALLED = ne is not None\n\n__all__ = [\"NUMEXPR_INSTALLED\"]\n"
  },
  {
    "path": "modin/core/computation/common.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nForked from pandas.core.computation.common\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom functools import reduce\n\nimport numpy as np\nfrom pandas._config import get_option\nfrom pandas.core.dtypes.cast import find_common_type\nfrom pandas.core.dtypes.common import is_extension_array_dtype\n\n\ndef ensure_decoded(s) -> str:\n    \"\"\"\n    If we have bytes, decode them to unicode.\n    \"\"\"\n    if isinstance(s, (np.bytes_, bytes)):\n        s = s.decode(get_option(\"display.encoding\"))\n    return s\n\n\ndef result_type_many(*arrays_and_dtypes):\n    \"\"\"\n    Wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32)\n    argument limit.\n    \"\"\"\n    try:\n        return np.result_type(*arrays_and_dtypes)\n    except ValueError:\n        # we have > NPY_MAXARGS terms in our expression\n        return reduce(np.result_type, arrays_and_dtypes)\n    except TypeError:\n        arr_and_dtypes = list(arrays_and_dtypes)\n        ea_dtypes, non_ea_dtypes = [], []\n        for arr_or_dtype in arr_and_dtypes:\n            if is_extension_array_dtype(arr_or_dtype):\n                ea_dtypes.append(arr_or_dtype)\n            else:\n                non_ea_dtypes.append(arr_or_dtype)\n\n        if non_ea_dtypes:\n            try:\n                np_dtype = np.result_type(*non_ea_dtypes)\n            except ValueError:\n                np_dtype = reduce(np.result_type, arrays_and_dtypes)\n            return find_common_type(ea_dtypes + [np_dtype])\n\n        return find_common_type(ea_dtypes)\n"
  },
  {
    "path": "modin/core/computation/engines.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nEngine classes for :func:`~pandas.eval`. Forked from pandas.core.computation.engines\n\"\"\"\n\nfrom __future__ import annotations\n\nimport abc\nfrom typing import TYPE_CHECKING\n\nfrom pandas.errors import NumExprClobberingError\nfrom pandas.io.formats import printing\n\nfrom modin.core.computation.align import (\n    align_terms,\n    reconstruct_object,\n)\nfrom modin.core.computation.ops import (\n    MATHOPS,\n    REDUCTIONS,\n)\n\nif TYPE_CHECKING:\n    from modin.core.computation.expr import Expr\n\n_ne_builtins = frozenset(MATHOPS + REDUCTIONS)\n\n\ndef _check_ne_builtin_clash(expr: Expr) -> None:\n    \"\"\"\n    Attempt to prevent foot-shooting in a helpful way.\n\n    Parameters\n    ----------\n    expr : Expr\n        Terms can contain\n    \"\"\"\n    names = expr.names\n    overlap = names & _ne_builtins\n\n    if overlap:\n        s = \", \".join([repr(x) for x in overlap])\n        raise NumExprClobberingError(\n            f'Variables in expression \"{expr}\" overlap with builtins: ({s})'\n        )\n\n\nclass AbstractEngine(metaclass=abc.ABCMeta):\n    \"\"\"Object serving as a base class for all engines.\"\"\"\n\n    has_neg_frac = False\n\n    def __init__(self, expr) -> None:\n        self.expr = expr\n        self.aligned_axes = None\n        self.result_type = None\n\n    def convert(self) -> str:\n        \"\"\"\n        Convert an expression for evaluation.\n\n        Defaults to return the expression as a string.\n        \"\"\"\n        return printing.pprint_thing(self.expr)\n\n    def evaluate(self) -> object:\n        \"\"\"\n        Run the engine on the expression.\n\n        This method performs alignment which is necessary no matter what engine\n        is being used, thus its implementation is in the base class.\n\n        Returns\n        -------\n        object\n            The result of the passed expression.\n        \"\"\"\n        if not self._is_aligned:\n            self.result_type, self.aligned_axes = align_terms(self.expr.terms)\n\n        # make sure no names in resolvers and locals/globals clash\n        res = self._evaluate()\n        return reconstruct_object(\n            self.result_type, res, self.aligned_axes, self.expr.terms.return_type\n        )\n\n    @property\n    def _is_aligned(self) -> bool:\n        return self.aligned_axes is not None and self.result_type is not None\n\n    @abc.abstractmethod\n    def _evaluate(self):\n        \"\"\"\n        Return an evaluated expression.\n\n        Parameters\n        ----------\n        env : Scope\n            The local and global environment in which to evaluate an\n            expression.\n\n        Notes\n        -----\n        Must be implemented by subclasses.\n        \"\"\"\n\n\nclass NumExprEngine(AbstractEngine):\n    \"\"\"NumExpr engine class\"\"\"\n\n    has_neg_frac = True\n\n    def _evaluate(self):\n        import numexpr as ne\n\n        # convert the expression to a valid numexpr expression\n        s = self.convert()\n\n        env = self.expr.env\n        scope = env.full_scope\n        _check_ne_builtin_clash(self.expr)\n        return ne.evaluate(s, local_dict=scope)\n\n\nclass PythonEngine(AbstractEngine):\n    \"\"\"\n    Evaluate an expression in Python space.\n\n    Mostly for testing purposes.\n    \"\"\"\n\n    has_neg_frac = False\n\n    def evaluate(self):\n        return self.expr()\n\n    def _evaluate(self) -> None:\n        pass\n\n\nENGINES: dict[str, type[AbstractEngine]] = {\n    \"numexpr\": NumExprEngine,\n    \"python\": PythonEngine,\n}\n"
  },
  {
    "path": "modin/core/computation/eval.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nTop level ``eval`` module. Forked from pandas.core.computation.eval\n\"\"\"\n\nfrom __future__ import annotations\n\nimport tokenize\nimport warnings\n\nfrom pandas.core.dtypes.common import is_extension_array_dtype\nfrom pandas.io.formats.printing import pprint_thing\nfrom pandas.util._validators import validate_bool_kwarg\n\nfrom modin.core.computation.check import NUMEXPR_INSTALLED\nfrom modin.core.computation.engines import ENGINES\nfrom modin.core.computation.expr import (\n    PARSERS,\n    Expr,\n)\nfrom modin.core.computation.ops import BinOp\nfrom modin.core.computation.parsing import tokenize_string\nfrom modin.core.computation.scope import ensure_scope\nfrom modin.pandas.base import BasePandasDataset\n\n\ndef _check_engine(engine: str | None) -> str:\n    \"\"\"\n    Make sure a valid engine is passed.\n\n    Parameters\n    ----------\n    engine : str\n        String to validate.\n\n    Raises\n    ------\n    KeyError\n      * If an invalid engine is passed.\n    ImportError\n      * If numexpr was requested but doesn't exist.\n\n    Returns\n    -------\n    str\n        Engine name.\n    \"\"\"\n\n    if engine is None:\n        engine = \"numexpr\" if NUMEXPR_INSTALLED else \"python\"\n\n    if engine not in ENGINES:\n        valid_engines = list(ENGINES.keys())\n        raise KeyError(\n            f\"Invalid engine '{engine}' passed, valid engines are {valid_engines}\"\n        )\n\n    # TODO: validate this in a more general way (thinking of future engines\n    # that won't necessarily be import-able)\n    # Could potentially be done on engine instantiation\n    if engine == \"numexpr\" and not NUMEXPR_INSTALLED:\n        raise ImportError(\n            \"'numexpr' is not installed or an unsupported version. Cannot use \"\n            + \"engine='numexpr' for query/eval if 'numexpr' is not installed\"\n        )\n\n    return engine\n\n\ndef _check_parser(parser: str):\n    \"\"\"\n    Make sure a valid parser is passed.\n\n    Parameters\n    ----------\n    parser : str\n\n    Raises\n    ------\n    KeyError\n      * If an invalid parser is passed\n    \"\"\"\n    if parser not in PARSERS:\n        raise KeyError(\n            f\"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}\"\n        )\n\n\ndef _check_resolvers(resolvers):\n    if resolvers is not None:\n        for resolver in resolvers:\n            if not hasattr(resolver, \"__getitem__\"):\n                name = type(resolver).__name__\n                raise TypeError(\n                    f\"Resolver of type '{name}' does not \"\n                    + \"implement the __getitem__ method\"\n                )\n\n\ndef _check_expression(expr):\n    \"\"\"\n    Make sure an expression is not an empty string\n\n    Parameters\n    ----------\n    expr : object\n        An object that can be converted to a string\n\n    Raises\n    ------\n    ValueError\n      * If expr is an empty string\n    \"\"\"\n    if not expr:\n        raise ValueError(\"expr cannot be an empty string\")\n\n\ndef _convert_expression(expr) -> str:\n    \"\"\"\n    Convert an object to an expression.\n\n    This function converts an object to an expression (a unicode string) and\n    checks to make sure it isn't empty after conversion. This is used to\n    convert operators to their string representation for recursive calls to\n    :func:`~pandas.eval`.\n\n    Parameters\n    ----------\n    expr : object\n        The object to be converted to a string.\n\n    Returns\n    -------\n    str\n        The string representation of an object.\n\n    Raises\n    ------\n    ValueError\n      * If the expression is empty.\n    \"\"\"\n    s = pprint_thing(expr)\n    _check_expression(s)\n    return s\n\n\ndef _check_for_locals(expr: str, stack_level: int, parser: str):\n    at_top_of_stack = stack_level == 0\n    not_pandas_parser = parser != \"pandas\"\n\n    if not_pandas_parser:\n        msg = \"The '@' prefix is only supported by the pandas parser\"\n    elif at_top_of_stack:\n        msg = (\n            \"The '@' prefix is not allowed in top-level eval calls.\\n\"\n            + \"please refer to your variables by name without the '@' prefix.\"\n        )\n\n    if at_top_of_stack or not_pandas_parser:\n        for toknum, tokval in tokenize_string(expr):\n            if toknum == tokenize.OP and tokval == \"@\":\n                raise SyntaxError(msg)\n\n\ndef eval(\n    expr: str | BinOp,  # we leave BinOp out of the docstr bc it isn't for users\n    parser: str = \"pandas\",\n    engine: str | None = None,\n    local_dict=None,\n    global_dict=None,\n    resolvers=(),\n    level: int = 0,\n    target=None,\n    inplace: bool = False,\n):\n    \"\"\"\n    Evaluate a Python expression as a string using various backends.\n\n    The following arithmetic operations are supported: ``+``, ``-``, ``*``,\n    ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following\n    boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).\n    Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,\n    :keyword:`or`, and :keyword:`not` with the same semantics as the\n    corresponding bitwise operators.  :class:`~pandas.Series` and\n    :class:`~pandas.DataFrame` objects are supported and behave as they would\n    with plain ol' Python evaluation.\n\n    Parameters\n    ----------\n    expr : str\n        The expression to evaluate. This string cannot contain any Python\n        `statements\n        <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,\n        only Python `expressions\n        <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.\n    parser : {'pandas', 'python'}, default 'pandas'\n        The parser to use to construct the syntax tree from the expression. The\n        default of ``'pandas'`` parses code slightly different than standard\n        Python. Alternatively, you can parse an expression using the\n        ``'python'`` parser to retain strict Python semantics.  See the\n        :ref:`enhancing performance <enhancingperf.eval>` documentation for\n        more details.\n    engine : {'python', 'numexpr'}, default 'numexpr'\n\n        The engine used to evaluate the expression. Supported engines are\n\n        - None : tries to use ``numexpr``, falls back to ``python``\n        - ``'numexpr'`` : This default engine evaluates pandas objects using\n          numexpr for large speed ups in complex expressions with large frames.\n        - ``'python'`` : Performs operations as if you had ``eval``'d in top\n          level python. This engine is generally not that useful.\n\n        More backends may be available in the future.\n    local_dict : dict or None, optional\n        A dictionary of local variables, taken from locals() by default.\n    global_dict : dict or None, optional\n        A dictionary of global variables, taken from globals() by default.\n    resolvers : list of dict-like or None, optional\n        A list of objects implementing the ``__getitem__`` special method that\n        you can use to inject an additional collection of namespaces to use for\n        variable lookup. For example, this is used in the\n        :meth:`~DataFrame.query` method to inject the\n        ``DataFrame.index`` and ``DataFrame.columns``\n        variables that refer to their respective :class:`~pandas.DataFrame`\n        instance attributes.\n    level : int, optional\n        The number of prior stack frames to traverse and add to the current\n        scope. Most users will **not** need to change this parameter.\n    target : object, optional, default None\n        This is the target object for assignment. It is used when there is\n        variable assignment in the expression. If so, then `target` must\n        support item assignment with string keys, and if a copy is being\n        returned, it must also support `.copy()`.\n    inplace : bool, default False\n        If `target` is provided, and the expression mutates `target`, whether\n        to modify `target` inplace. Otherwise, return a copy of `target` with\n        the mutation.\n\n    Returns\n    -------\n    ndarray, numeric scalar, DataFrame, Series, or None\n        The completion value of evaluating the given code or None if ``inplace=True``.\n\n    Raises\n    ------\n    ValueError\n        There are many instances where such an error can be raised:\n\n        - `target=None`, but the expression is multiline.\n        - The expression is multiline, but not all them have item assignment.\n          An example of such an arrangement is this:\n\n          a = b + 1\n          a + 2\n\n          Here, there are expressions on different lines, making it multiline,\n          but the last line has no variable assigned to the output of `a + 2`.\n        - `inplace=True`, but the expression is missing item assignment.\n        - Item assignment is provided, but the `target` does not support\n          string item assignment.\n        - Item assignment is provided and `inplace=False`, but the `target`\n          does not support the `.copy()` method\n\n    See Also\n    --------\n    DataFrame.query : Evaluates a boolean expression to query the columns\n            of a frame.\n    DataFrame.eval : Evaluate a string describing operations on\n            DataFrame columns.\n\n    Notes\n    -----\n    The ``dtype`` of any objects involved in an arithmetic ``%`` operation are\n    recursively cast to ``float64``.\n\n    See the :ref:`enhancing performance <enhancingperf.eval>` documentation for\n    more details.\n\n    Examples\n    --------\n    >>> df = pd.DataFrame({\"animal\": [\"dog\", \"pig\"], \"age\": [10, 20]})\n    >>> df\n      animal  age\n    0    dog   10\n    1    pig   20\n\n    We can add a new column using ``pd.eval``:\n\n    >>> pd.eval(\"double_age = df.age * 2\", target=df)\n      animal  age  double_age\n    0    dog   10          20\n    1    pig   20          40\n    \"\"\"\n    inplace = validate_bool_kwarg(inplace, \"inplace\")\n\n    exprs: list[str | BinOp]\n    if isinstance(expr, str):\n        _check_expression(expr)\n        exprs = [e.strip() for e in expr.splitlines() if e.strip() != \"\"]\n    else:\n        # ops.BinOp; for internal compat, not intended to be passed by users\n        exprs = [expr]\n    multi_line = len(exprs) > 1\n\n    if multi_line and target is None:\n        raise ValueError(\n            \"multi-line expressions are only valid in the \"\n            + \"context of data, use DataFrame.eval\"\n        )\n    engine = _check_engine(engine)\n    _check_parser(parser)\n    _check_resolvers(resolvers)\n\n    ret = None\n    first_expr = True\n    target_modified = False\n\n    for expr in exprs:\n        expr = _convert_expression(expr)\n        _check_for_locals(expr, level, parser)\n\n        # get our (possibly passed-in) scope\n        env = ensure_scope(\n            level + 1,\n            global_dict=global_dict,\n            local_dict=local_dict,\n            resolvers=resolvers,\n            target=target,\n        )\n\n        parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)\n\n        if engine == \"numexpr\" and (\n            is_extension_array_dtype(parsed_expr.terms.return_type)\n            or getattr(parsed_expr.terms, \"operand_types\", None) is not None\n            and any(\n                is_extension_array_dtype(elem)\n                for elem in parsed_expr.terms.operand_types\n            )\n        ):\n            warnings.warn(\n                \"Engine has switched to 'python' because numexpr does not support \"\n                + \"extension array dtypes. Please set your engine to python manually.\",\n                RuntimeWarning,\n            )\n            engine = \"python\"\n\n        # construct the engine and evaluate the parsed expression\n        eng = ENGINES[engine]\n        eng_inst = eng(parsed_expr)\n        ret = eng_inst.evaluate()\n\n        if parsed_expr.assigner is None:\n            if multi_line:\n                raise ValueError(\n                    \"Multi-line expressions are only valid \"\n                    + \"if all expressions contain an assignment\"\n                )\n            if inplace:\n                raise ValueError(\"Cannot operate inplace if there is no assignment\")\n\n        # assign if needed\n        assigner = parsed_expr.assigner\n        if env.target is not None and assigner is not None:\n            target_modified = True\n\n            # if returning a copy, copy only on the first assignment\n            if not inplace and first_expr:\n                try:\n                    target = env.target\n                    if isinstance(target, BasePandasDataset):\n                        target = target.copy(deep=True)\n                    else:\n                        target = target.copy()\n                except AttributeError as err:\n                    raise ValueError(\"Cannot return a copy of the target\") from err\n            else:\n                target = env.target\n\n            # TypeError is most commonly raised (e.g. int, list), but you\n            # get IndexError if you try to do this assignment on np.ndarray.\n            # we will ignore numpy warnings here; e.g. if trying\n            # to use a non-numeric indexer\n            try:\n                if inplace and isinstance(target, BasePandasDataset):\n                    target.loc[:, assigner] = ret\n                else:\n                    target[assigner] = ret  # pyright: ignore[reportGeneralTypeIssues]\n            except (TypeError, IndexError) as err:\n                raise ValueError(\"Cannot assign expression output to target\") from err\n\n            if not resolvers:\n                resolvers = ({assigner: ret},)\n            else:\n                # existing resolver needs updated to handle\n                # case of mutating existing column in copy\n                for resolver in resolvers:\n                    if assigner in resolver:\n                        resolver[assigner] = ret\n                        break\n                else:\n                    resolvers += ({assigner: ret},)\n\n            ret = None\n            first_expr = False\n\n    # We want to exclude `inplace=None` as being False.\n    return (target if target_modified else ret) if inplace is False else None\n"
  },
  {
    "path": "modin/core/computation/expr.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\n:func:`~pandas.eval` parsers.\n\nForked from pandas.core.computation.expr\n\"\"\"\n\nfrom __future__ import annotations\n\nimport ast\nimport tokenize\nfrom functools import (\n    partial,\n    reduce,\n)\nfrom keyword import iskeyword\nfrom typing import (\n    Callable,\n    ClassVar,\n    TypeVar,\n)\n\nimport numpy as np\nimport pandas.core.common as com\nfrom pandas.errors import UndefinedVariableError\nfrom pandas.io.formats import printing\n\nfrom modin.core.computation.ops import (\n    ARITH_OPS_SYMS,\n    BOOL_OPS_SYMS,\n    CMP_OPS_SYMS,\n    LOCAL_TAG,\n    UNARY_OPS_SYMS,\n    BinOp,\n    Constant,\n    FuncNode,\n    Op,\n    Term,\n    UnaryOp,\n    is_term,\n)\nfrom modin.core.computation.parsing import (\n    clean_backtick_quoted_toks,\n    tokenize_string,\n)\nfrom modin.core.computation.scope import Scope\n\n\ndef _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]:\n    \"\"\"\n    Rewrite the assignment operator for PyTables expressions that use ``=``\n    as a substitute for ``==``.\n\n    Parameters\n    ----------\n    tok : tuple of int, str\n        ints correspond to the all caps constants in the tokenize module\n\n    Returns\n    -------\n    tuple of int, str\n        Either the input or token or the replacement values\n    \"\"\"\n    toknum, tokval = tok\n    return toknum, \"==\" if tokval == \"=\" else tokval\n\n\ndef _replace_booleans(tok: tuple[int, str]) -> tuple[int, str]:\n    \"\"\"\n    Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise\n    precedence is changed to boolean precedence.\n\n    Parameters\n    ----------\n    tok : tuple of int, str\n        ints correspond to the all caps constants in the tokenize module\n\n    Returns\n    -------\n    tuple of int, str\n        Either the input or token or the replacement values\n    \"\"\"\n    toknum, tokval = tok\n    if toknum == tokenize.OP:\n        if tokval == \"&\":\n            return tokenize.NAME, \"and\"\n        elif tokval == \"|\":\n            return tokenize.NAME, \"or\"\n        return toknum, tokval\n    return toknum, tokval\n\n\ndef _replace_locals(tok: tuple[int, str]) -> tuple[int, str]:\n    \"\"\"\n    Replace local variables with a syntactically valid name.\n\n    Parameters\n    ----------\n    tok : tuple of int, str\n        ints correspond to the all caps constants in the tokenize module\n\n    Returns\n    -------\n    tuple of int, str\n        Either the input or token or the replacement values\n\n    Notes\n    -----\n    This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as\n    ``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_``\n    is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it.\n    \"\"\"\n    toknum, tokval = tok\n    if toknum == tokenize.OP and tokval == \"@\":\n        return tokenize.OP, LOCAL_TAG\n    return toknum, tokval\n\n\ndef _compose2(f, g):\n    \"\"\"\n    Compose 2 callables.\n    \"\"\"\n    return lambda *args, **kwargs: f(g(*args, **kwargs))\n\n\ndef _compose(*funcs):\n    \"\"\"\n    Compose 2 or more callables.\n    \"\"\"\n    assert len(funcs) > 1, \"At least 2 callables must be passed to compose\"\n    return reduce(_compose2, funcs)\n\n\ndef _preparse(\n    source: str,\n    f=_compose(\n        _replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks\n    ),\n) -> str:\n    \"\"\"\n    Compose a collection of tokenization functions.\n\n    Parameters\n    ----------\n    source : str\n        A Python source code string\n    f : callable\n        This takes a tuple of (toknum, tokval) as its argument and returns a\n        tuple with the same structure but possibly different elements. Defaults\n        to the composition of ``_rewrite_assign``, ``_replace_booleans``, and\n        ``_replace_locals``.\n\n    Returns\n    -------\n    str\n        Valid Python source code\n\n    Notes\n    -----\n    The `f` parameter can be any callable that takes *and* returns input of the\n    form ``(toknum, tokval)``, where ``toknum`` is one of the constants from\n    the ``tokenize`` module and ``tokval`` is a string.\n    \"\"\"\n    assert callable(f), \"f must be callable\"\n    return tokenize.untokenize(f(x) for x in tokenize_string(source))\n\n\ndef _is_type(t):\n    \"\"\"\n    Factory for a type checking function of type ``t`` or tuple of types.\n    \"\"\"\n    return lambda x: isinstance(x.value, t)\n\n\n_is_list = _is_type(list)\n_is_str = _is_type(str)\n\n\n# partition all AST nodes\n_all_nodes = frozenset(\n    node\n    for node in (getattr(ast, name) for name in dir(ast))\n    if isinstance(node, type) and issubclass(node, ast.AST)\n)\n\n\ndef _filter_nodes(superclass, all_nodes=_all_nodes):\n    \"\"\"\n    Filter out AST nodes that are subclasses of ``superclass``.\n    \"\"\"\n    node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass))\n    return frozenset(node_names)\n\n\n_all_node_names = frozenset(x.__name__ for x in _all_nodes)\n_mod_nodes = _filter_nodes(ast.mod)\n_stmt_nodes = _filter_nodes(ast.stmt)\n_expr_context_nodes = _filter_nodes(ast.expr_context)\n_boolop_nodes = _filter_nodes(ast.boolop)\n_handler_nodes = _filter_nodes(ast.excepthandler)\n_arguments_nodes = _filter_nodes(ast.arguments)\n_keyword_nodes = _filter_nodes(ast.keyword)\n_alias_nodes = _filter_nodes(ast.alias)\n\n\n# nodes that we don't support directly but are needed for parsing\n_hacked_nodes = frozenset([\"Assign\", \"Module\", \"Expr\"])\n\n\n_unsupported_expr_nodes = frozenset(\n    [\n        \"Yield\",\n        \"GeneratorExp\",\n        \"IfExp\",\n        \"DictComp\",\n        \"SetComp\",\n        \"Repr\",\n        \"Lambda\",\n        \"Set\",\n        \"AST\",\n        \"Is\",\n        \"IsNot\",\n    ]\n)\n\n# these nodes are low priority or won't ever be supported (e.g., AST)\n_unsupported_nodes = (\n    _stmt_nodes\n    | _mod_nodes\n    | _handler_nodes\n    | _arguments_nodes\n    | _keyword_nodes\n    | _alias_nodes\n    | _expr_context_nodes\n    | _unsupported_expr_nodes\n) - _hacked_nodes\n\n# we're adding a different assignment in some cases to be equality comparison\n# and we don't want `stmt` and friends in their so get only the class whose\n# names are capitalized\n_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes\nintersection = _unsupported_nodes & _base_supported_nodes\n_msg = f\"cannot both support and not support {intersection}\"\nassert not intersection, _msg\n\n\ndef _node_not_implemented(node_name: str) -> Callable[..., None]:\n    \"\"\"\n    Return a function that raises a NotImplementedError with a passed node name.\n    \"\"\"\n\n    def f(self, *args, **kwargs):\n        raise NotImplementedError(f\"'{node_name}' nodes are not implemented\")\n\n    return f\n\n\n_T = TypeVar(\"_T\")\n\n\ndef disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]:\n    \"\"\"\n    Decorator to disallow certain nodes from parsing. Raises a\n    NotImplementedError instead.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n\n    def disallowed(cls: type[_T]) -> type[_T]:\n        # error: \"Type[_T]\" has no attribute \"unsupported_nodes\"\n        cls.unsupported_nodes = ()  # type: ignore[attr-defined]\n        for node in nodes:\n            new_method = _node_not_implemented(node)\n            name = f\"visit_{node}\"\n            # error: \"Type[_T]\" has no attribute \"unsupported_nodes\"\n            cls.unsupported_nodes += (name,)  # type: ignore[attr-defined]\n            setattr(cls, name, new_method)\n        return cls\n\n    return disallowed\n\n\ndef _op_maker(op_class, op_symbol):\n    \"\"\"\n    Return a function to create an op class with its symbol already passed.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n\n    def f(self, node, *args, **kwargs):\n        \"\"\"\n        Return a partial function with an Op subclass with an operator already passed.\n\n        Returns\n        -------\n        callable\n        \"\"\"\n        return partial(op_class, op_symbol, *args, **kwargs)\n\n    return f\n\n\n_op_classes = {\"binary\": BinOp, \"unary\": UnaryOp}\n\n\ndef add_ops(op_classes):\n    \"\"\"\n    Decorator to add default implementation of ops.\n    \"\"\"\n\n    def f(cls):\n        for op_attr_name, op_class in op_classes.items():\n            ops = getattr(cls, f\"{op_attr_name}_ops\")\n            ops_map = getattr(cls, f\"{op_attr_name}_op_nodes_map\")\n            for op in ops:\n                op_node = ops_map[op]\n                if op_node is not None:\n                    made_op = _op_maker(op_class, op)\n                    setattr(cls, f\"visit_{op_node}\", made_op)\n        return cls\n\n    return f\n\n\n@disallow(_unsupported_nodes)\n@add_ops(_op_classes)\nclass BaseExprVisitor(ast.NodeVisitor):\n    \"\"\"\n    Custom ast walker. Parsers of other engines should subclass this class\n    if necessary.\n\n    Parameters\n    ----------\n    env : Scope\n    engine : str\n    parser : str\n    preparser : callable\n    \"\"\"\n\n    const_type: ClassVar[type[Term]] = Constant\n    term_type: ClassVar[type[Term]] = Term\n\n    binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS\n    binary_op_nodes = (\n        \"Gt\",\n        \"Lt\",\n        \"GtE\",\n        \"LtE\",\n        \"Eq\",\n        \"NotEq\",\n        \"In\",\n        \"NotIn\",\n        \"BitAnd\",\n        \"BitOr\",\n        \"And\",\n        \"Or\",\n        \"Add\",\n        \"Sub\",\n        \"Mult\",\n        \"Div\",\n        \"Pow\",\n        \"FloorDiv\",\n        \"Mod\",\n    )\n    binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))\n\n    unary_ops = UNARY_OPS_SYMS\n    unary_op_nodes = \"UAdd\", \"USub\", \"Invert\", \"Not\"\n    unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))\n\n    rewrite_map = {\n        ast.Eq: ast.In,\n        ast.NotEq: ast.NotIn,\n        ast.In: ast.In,\n        ast.NotIn: ast.NotIn,\n    }\n\n    unsupported_nodes: tuple[str, ...]\n\n    def __init__(self, env, engine, parser, preparser=_preparse) -> None:\n        self.env = env\n        self.engine = engine\n        self.parser = parser\n        self.preparser = preparser\n        self.assigner = None\n\n    def visit(self, node, **kwargs):\n        if isinstance(node, str):\n            clean = self.preparser(node)\n            try:\n                node = ast.fix_missing_locations(ast.parse(clean))\n            except SyntaxError as e:\n                if any(iskeyword(x) for x in clean.split()):\n                    e.msg = \"Python keyword not valid identifier in numexpr query\"\n                raise e\n\n        method = f\"visit_{type(node).__name__}\"\n        visitor = getattr(self, method)\n        return visitor(node, **kwargs)\n\n    def visit_Module(self, node, **kwargs):\n        if len(node.body) != 1:\n            raise SyntaxError(\"only a single expression is allowed\")\n        expr = node.body[0]\n        return self.visit(expr, **kwargs)\n\n    def visit_Expr(self, node, **kwargs):\n        return self.visit(node.value, **kwargs)\n\n    def _rewrite_membership_op(self, node, left, right):\n        # the kind of the operator (is actually an instance)\n        op_instance = node.op\n        op_type = type(op_instance)\n\n        # must be two terms and the comparison operator must be ==/!=/in/not in\n        if is_term(left) and is_term(right) and op_type in self.rewrite_map:\n            left_list, right_list = map(_is_list, (left, right))\n            left_str, right_str = map(_is_str, (left, right))\n\n            # if there are any strings or lists in the expression\n            if left_list or right_list or left_str or right_str:\n                op_instance = self.rewrite_map[op_type]()\n\n            # pop the string variable out of locals and replace it with a list\n            # of one string, kind of a hack\n            if right_str:\n                name = self.env.add_tmp([right.value])\n                right = self.term_type(name, self.env)\n\n            if left_str:\n                name = self.env.add_tmp([left.value])\n                left = self.term_type(name, self.env)\n\n        op = self.visit(op_instance)\n        return op, op_instance, left, right\n\n    def _maybe_transform_eq_ne(self, node, left=None, right=None):\n        if left is None:\n            left = self.visit(node.left, side=\"left\")\n        if right is None:\n            right = self.visit(node.right, side=\"right\")\n        op, op_class, left, right = self._rewrite_membership_op(node, left, right)\n        return op, op_class, left, right\n\n    def _maybe_downcast_constants(self, left, right):\n        f32 = np.dtype(np.float32)\n        if (\n            left.is_scalar\n            and hasattr(left, \"value\")\n            and not right.is_scalar\n            and right.return_type == f32\n        ):\n            # right is a float32 array, left is a scalar\n            name = self.env.add_tmp(np.float32(left.value))\n            left = self.term_type(name, self.env)\n        if (\n            right.is_scalar\n            and hasattr(right, \"value\")\n            and not left.is_scalar\n            and left.return_type == f32\n        ):\n            # left is a float32 array, right is a scalar\n            name = self.env.add_tmp(np.float32(right.value))\n            right = self.term_type(name, self.env)\n\n        return left, right\n\n    def _maybe_eval(self, binop, eval_in_python):\n        # eval `in` and `not in` (for now) in \"partial\" python space\n        # things that can be evaluated in \"eval\" space will be turned into\n        # temporary variables. for example,\n        # [1,2] in a + 2 * b\n        # in that case a + 2 * b will be evaluated using numexpr, and the \"in\"\n        # call will be evaluated using isin (in python space)\n        return binop.evaluate(\n            self.env, self.engine, self.parser, self.term_type, eval_in_python\n        )\n\n    def _maybe_evaluate_binop(\n        self,\n        op,\n        op_class,\n        lhs,\n        rhs,\n        eval_in_python=(\"in\", \"not in\"),\n        maybe_eval_in_python=(\"==\", \"!=\", \"<\", \">\", \"<=\", \">=\"),\n    ):\n        res = op(lhs, rhs)\n\n        if res.has_invalid_return_type:\n            raise TypeError(\n                f\"unsupported operand type(s) for {res.op}: \"\n                + f\"'{lhs.type}' and '{rhs.type}'\"\n            )\n\n        if self.engine != \"pytables\" and (\n            res.op in CMP_OPS_SYMS\n            and getattr(lhs, \"is_datetime\", False)\n            or getattr(rhs, \"is_datetime\", False)\n        ):\n            # all date ops must be done in python bc numexpr doesn't work\n            # well with NaT\n            return self._maybe_eval(res, self.binary_ops)\n\n        if res.op in eval_in_python:\n            # \"in\"/\"not in\" ops are always evaluated in python\n            return self._maybe_eval(res, eval_in_python)\n        elif self.engine != \"pytables\":\n            if (\n                getattr(lhs, \"return_type\", None) == object\n                or getattr(rhs, \"return_type\", None) == object\n            ):\n                # evaluate \"==\" and \"!=\" in python if either of our operands\n                # has an object return type\n                return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)\n        return res\n\n    def visit_BinOp(self, node, **kwargs):\n        op, op_class, left, right = self._maybe_transform_eq_ne(node)\n        left, right = self._maybe_downcast_constants(left, right)\n        return self._maybe_evaluate_binop(op, op_class, left, right)\n\n    def visit_UnaryOp(self, node, **kwargs):\n        op = self.visit(node.op)\n        operand = self.visit(node.operand)\n        return op(operand)\n\n    def visit_Name(self, node, **kwargs) -> Term:\n        return self.term_type(node.id, self.env, **kwargs)\n\n    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min\n    def visit_NameConstant(self, node, **kwargs) -> Term:\n        return self.const_type(node.value, self.env)\n\n    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min\n    def visit_Num(self, node, **kwargs) -> Term:\n        return self.const_type(node.value, self.env)\n\n    def visit_Constant(self, node, **kwargs) -> Term:\n        return self.const_type(node.value, self.env)\n\n    # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min\n    def visit_Str(self, node, **kwargs) -> Term:\n        name = self.env.add_tmp(node.s)\n        return self.term_type(name, self.env)\n\n    def visit_List(self, node, **kwargs) -> Term:\n        name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts])\n        return self.term_type(name, self.env)\n\n    visit_Tuple = visit_List\n\n    def visit_Index(self, node, **kwargs):\n        \"\"\"df.index[4]\"\"\"\n        return self.visit(node.value)\n\n    def visit_Subscript(self, node, **kwargs) -> Term:\n        from modin.core.computation.eval import eval as pd_eval\n\n        value = self.visit(node.value)\n        slobj = self.visit(node.slice)\n        result = pd_eval(\n            slobj, local_dict=self.env, engine=self.engine, parser=self.parser\n        )\n        try:\n            # a Term instance\n            v = value.value[result]\n        except AttributeError:\n            # an Op instance\n            lhs = pd_eval(\n                value, local_dict=self.env, engine=self.engine, parser=self.parser\n            )\n            v = lhs[result]\n        name = self.env.add_tmp(v)\n        return self.term_type(name, env=self.env)\n\n    def visit_Slice(self, node, **kwargs) -> slice:\n        \"\"\"df.index[slice(4,6)]\"\"\"\n        lower = node.lower\n        if lower is not None:\n            lower = self.visit(lower).value\n        upper = node.upper\n        if upper is not None:\n            upper = self.visit(upper).value\n        step = node.step\n        if step is not None:\n            step = self.visit(step).value\n\n        return slice(lower, upper, step)\n\n    def visit_Assign(self, node, **kwargs):\n        \"\"\"\n        support a single assignment node, like\n\n        c = a + b\n\n        set the assigner at the top level, must be a Name node which\n        might or might not exist in the resolvers\n\n        \"\"\"\n        if len(node.targets) != 1:\n            raise SyntaxError(\"can only assign a single expression\")\n        if not isinstance(node.targets[0], ast.Name):\n            raise SyntaxError(\"left hand side of an assignment must be a single name\")\n        if self.env.target is None:\n            raise ValueError(\"cannot assign without a target object\")\n\n        try:\n            assigner = self.visit(node.targets[0], **kwargs)\n        except UndefinedVariableError:\n            assigner = node.targets[0].id\n\n        self.assigner = getattr(assigner, \"name\", assigner)\n        if self.assigner is None:\n            raise SyntaxError(\n                \"left hand side of an assignment must be a single resolvable name\"\n            )\n\n        return self.visit(node.value, **kwargs)\n\n    def visit_Attribute(self, node, **kwargs):\n        attr = node.attr\n        value = node.value\n\n        ctx = node.ctx\n        if isinstance(ctx, ast.Load):\n            # resolve the value\n            resolved = self.visit(value).value\n            try:\n                v = getattr(resolved, attr)\n                name = self.env.add_tmp(v)\n                return self.term_type(name, self.env)\n            except AttributeError:\n                # something like datetime.datetime where scope is overridden\n                if isinstance(value, ast.Name) and value.id == attr:\n                    return resolved\n                raise\n\n        raise ValueError(f\"Invalid Attribute context {type(ctx).__name__}\")\n\n    def visit_Call(self, node, side=None, **kwargs):\n        if isinstance(node.func, ast.Attribute) and node.func.attr != \"__call__\":\n            res = self.visit_Attribute(node.func)\n        elif not isinstance(node.func, ast.Name):\n            raise TypeError(\"Only named functions are supported\")\n        else:\n            try:\n                res = self.visit(node.func)\n            except UndefinedVariableError:\n                # Check if this is a supported function name\n                try:\n                    res = FuncNode(node.func.id)\n                except ValueError:\n                    # Raise original error\n                    raise\n\n        if res is None:\n            # error: \"expr\" has no attribute \"id\"\n            raise ValueError(\n                f\"Invalid function call {node.func.id}\"  # type: ignore[attr-defined]\n            )\n        if hasattr(res, \"value\"):\n            res = res.value\n\n        if isinstance(res, FuncNode):\n            new_args = [self.visit(arg) for arg in node.args]\n\n            if node.keywords:\n                raise TypeError(\n                    f'Function \"{res.name}\" does not support keyword arguments'\n                )\n\n            return res(*new_args)\n\n        else:\n            new_args = [self.visit(arg)(self.env) for arg in node.args]\n\n            for key in node.keywords:\n                if not isinstance(key, ast.keyword):\n                    # error: \"expr\" has no attribute \"id\"\n                    raise ValueError(\n                        \"keyword error in function call \"\n                        + f\"'{node.func.id}'\"  # type: ignore[attr-defined]\n                    )\n\n                if key.arg:\n                    kwargs[key.arg] = self.visit(key.value)(self.env)\n\n            name = self.env.add_tmp(res(*new_args, **kwargs))\n            return self.term_type(name=name, env=self.env)\n\n    def translate_In(self, op):\n        return op\n\n    def visit_Compare(self, node, **kwargs):\n        ops = node.ops\n        comps = node.comparators\n\n        # base case: we have something like a CMP b\n        if len(comps) == 1:\n            op = self.translate_In(ops[0])\n            binop = ast.BinOp(op=op, left=node.left, right=comps[0])\n            return self.visit(binop)\n\n        # recursive case: we have a chained comparison, a CMP b CMP c, etc.\n        left = node.left\n        values = []\n        for op, comp in zip(ops, comps):\n            new_node = self.visit(\n                ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])\n            )\n            left = comp\n            values.append(new_node)\n        return self.visit(ast.BoolOp(op=ast.And(), values=values))\n\n    def _try_visit_binop(self, bop):\n        if isinstance(bop, (Op, Term)):\n            return bop\n        return self.visit(bop)\n\n    def visit_BoolOp(self, node, **kwargs):\n        def visitor(x, y):\n            lhs = self._try_visit_binop(x)\n            rhs = self._try_visit_binop(y)\n\n            op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs)\n            return self._maybe_evaluate_binop(op, node.op, lhs, rhs)\n\n        operands = node.values\n        return reduce(visitor, operands)\n\n\n_python_not_supported = frozenset([\"Dict\", \"BoolOp\", \"In\", \"NotIn\"])\n\n\n@disallow(\n    (_unsupported_nodes | _python_not_supported)\n    - (_boolop_nodes | frozenset([\"BoolOp\", \"Attribute\", \"In\", \"NotIn\", \"Tuple\"]))\n)\nclass PandasExprVisitor(BaseExprVisitor):\n    def __init__(\n        self,\n        env,\n        engine,\n        parser,\n        preparser=partial(\n            _preparse,\n            f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks),\n        ),\n    ) -> None:\n        super().__init__(env, engine, parser, preparser)\n\n\n@disallow(_unsupported_nodes | _python_not_supported | frozenset([\"Not\"]))\nclass PythonExprVisitor(BaseExprVisitor):\n    def __init__(\n        self, env, engine, parser, preparser=lambda source, f=None: source\n    ) -> None:\n        super().__init__(env, engine, parser, preparser=preparser)\n\n\nclass Expr:\n    \"\"\"\n    Object encapsulating an expression.\n\n    Parameters\n    ----------\n    expr : str\n    engine : str, optional, default 'numexpr'\n    parser : str, optional, default 'pandas'\n    env : Scope, optional, default None\n    level : int, optional, default 2\n    \"\"\"\n\n    env: Scope\n    engine: str\n    parser: str\n\n    def __init__(\n        self,\n        expr,\n        engine: str = \"numexpr\",\n        parser: str = \"pandas\",\n        env: Scope | None = None,\n        level: int = 0,\n    ) -> None:\n        self.expr = expr\n        self.env = env or Scope(level=level + 1)\n        self.engine = engine\n        self.parser = parser\n        self._visitor = PARSERS[parser](self.env, self.engine, self.parser)\n        self.terms = self.parse()\n\n    @property\n    def assigner(self):\n        return getattr(self._visitor, \"assigner\", None)\n\n    def __call__(self):\n        return self.terms(self.env)\n\n    def __repr__(self) -> str:\n        return printing.pprint_thing(self.terms)\n\n    def __len__(self) -> int:\n        return len(self.expr)\n\n    def parse(self):\n        \"\"\"\n        Parse an expression.\n        \"\"\"\n        return self._visitor.visit(self.expr)\n\n    @property\n    def names(self):\n        \"\"\"\n        Get the names in an expression.\n        \"\"\"\n        if is_term(self.terms):\n            return frozenset([self.terms.name])\n        return frozenset(term.name for term in com.flatten(self.terms))\n\n\nPARSERS = {\"python\": PythonExprVisitor, \"pandas\": PandasExprVisitor}\n"
  },
  {
    "path": "modin/core/computation/ops.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nOperator classes for eval. Forked from pandas.core.computation.ops\n\"\"\"\n\nfrom __future__ import annotations\n\nimport operator\nfrom datetime import datetime\nfrom functools import partial\nfrom typing import (\n    TYPE_CHECKING,\n    Callable,\n    Literal,\n)\n\nimport numpy as np\nimport pandas\nimport pandas.core.common as com\nfrom pandas.core.dtypes.cast import find_common_type\nfrom pandas.core.dtypes.common import (\n    is_list_like,\n    is_scalar,\n)\nfrom pandas.io.formats.printing import (\n    pprint_thing,\n    pprint_thing_encoded,\n)\n\nfrom modin.core.computation.common import (\n    ensure_decoded,\n    result_type_many,\n)\nfrom modin.core.computation.scope import DEFAULT_GLOBALS\n\nif TYPE_CHECKING:\n    from collections.abc import (\n        Iterable,\n        Iterator,\n    )\n\nREDUCTIONS = (\"sum\", \"prod\", \"min\", \"max\")\n\n_unary_math_ops = (\n    \"sin\",\n    \"cos\",\n    \"exp\",\n    \"log\",\n    \"expm1\",\n    \"log1p\",\n    \"sqrt\",\n    \"sinh\",\n    \"cosh\",\n    \"tanh\",\n    \"arcsin\",\n    \"arccos\",\n    \"arctan\",\n    \"arccosh\",\n    \"arcsinh\",\n    \"arctanh\",\n    \"abs\",\n    \"log10\",\n    \"floor\",\n    \"ceil\",\n)\n_binary_math_ops = (\"arctan2\",)\n\nMATHOPS = _unary_math_ops + _binary_math_ops\n\n\nLOCAL_TAG = \"__pd_eval_local_\"\n\n\nclass Term:\n    def __new__(cls, name, env, side=None, encoding=None):\n        klass = Constant if not isinstance(name, str) else cls\n        # error: Argument 2 for \"super\" not an instance of argument 1\n        supr_new = super(Term, klass).__new__  # type: ignore[misc]\n        return supr_new(klass)\n\n    is_local: bool\n\n    def __init__(self, name, env, side=None, encoding=None) -> None:\n        # name is a str for Term, but may be something else for subclasses\n        self._name = name\n        self.env = env\n        self.side = side\n        tname = str(name)\n        self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS\n        self._value = self._resolve_name()\n        self.encoding = encoding\n\n    @property\n    def local_name(self) -> str:\n        return self.name.replace(LOCAL_TAG, \"\")\n\n    def __repr__(self) -> str:\n        return pprint_thing(self.name)\n\n    def __call__(self, *args, **kwargs):\n        return self.value\n\n    def evaluate(self, *args, **kwargs) -> Term:\n        return self\n\n    def _resolve_name(self):\n        local_name = str(self.local_name)\n        is_local = self.is_local\n        if local_name in self.env.scope and isinstance(\n            self.env.scope[local_name], type\n        ):\n            is_local = False\n\n        res = self.env.resolve(local_name, is_local=is_local)\n        self.update(res)\n\n        if hasattr(res, \"ndim\") and res.ndim > 2:\n            raise NotImplementedError(\n                \"N-dimensional objects, where N > 2, are not supported with eval\"\n            )\n        return res\n\n    def update(self, value) -> None:\n        \"\"\"\n        search order for local (i.e., @variable) variables:\n\n        scope, key_variable\n        [('locals', 'local_name'),\n         ('globals', 'local_name'),\n         ('locals', 'key'),\n         ('globals', 'key')]\n        \"\"\"\n        key = self.name\n\n        # if it's a variable name (otherwise a constant)\n        if isinstance(key, str):\n            self.env.swapkey(self.local_name, key, new_value=value)\n\n        self.value = value\n\n    @property\n    def is_scalar(self) -> bool:\n        return is_scalar(self._value)\n\n    @property\n    def type(self):\n        try:\n            # potentially very slow for large, mixed dtype frames\n            return find_common_type(self._value.dtypes.values)\n        except AttributeError:\n            try:\n                # ndarray\n                return self._value.dtype\n            except AttributeError:\n                # scalar\n                return type(self._value)\n\n    return_type = type\n\n    @property\n    def raw(self) -> str:\n        return f\"{type(self).__name__}(name={repr(self.name)}, type={self.type})\"\n\n    @property\n    def is_datetime(self) -> bool:\n        try:\n            t = self.type.type\n        except AttributeError:\n            t = self.type\n\n        return issubclass(t, (datetime, np.datetime64))\n\n    @property\n    def value(self):\n        return self._value\n\n    @value.setter\n    def value(self, new_value) -> None:\n        self._value = new_value\n\n    @property\n    def name(self):\n        return self._name\n\n    @property\n    def ndim(self) -> int:\n        return self._value.ndim\n\n\nclass Constant(Term):\n    def _resolve_name(self):\n        return self._name\n\n    @property\n    def name(self):\n        return self.value\n\n    def __repr__(self) -> str:\n        # in python 2 str() of float\n        # can truncate shorter than repr()\n        return repr(self.name)\n\n\n_bool_op_map = {\"not\": \"~\", \"and\": \"&\", \"or\": \"|\"}\n\n\nclass Op:\n    \"\"\"\n    Hold an operator of arbitrary arity.\n    \"\"\"\n\n    op: str\n\n    def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None) -> None:\n        self.op = _bool_op_map.get(op, op)\n        self.operands = operands\n        self.encoding = encoding\n\n    def __iter__(self) -> Iterator:\n        return iter(self.operands)\n\n    def __repr__(self) -> str:\n        \"\"\"\n        Print a generic n-ary operator and its operands using infix notation.\n        \"\"\"\n        # recurse over the operands\n        parened = (f\"({pprint_thing(opr)})\" for opr in self.operands)\n        return pprint_thing(f\" {self.op} \".join(parened))\n\n    @property\n    def return_type(self):\n        # clobber types to bool if the op is a boolean operator\n        if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS):\n            return np.bool_\n        return result_type_many(*(term.type for term in com.flatten(self)))\n\n    @property\n    def has_invalid_return_type(self) -> bool:\n        types = self.operand_types\n        obj_dtype_set = frozenset([np.dtype(\"object\")])\n        return self.return_type == object and types - obj_dtype_set\n\n    @property\n    def operand_types(self):\n        return frozenset(term.type for term in com.flatten(self))\n\n    @property\n    def is_scalar(self) -> bool:\n        return all(operand.is_scalar for operand in self.operands)\n\n    @property\n    def is_datetime(self) -> bool:\n        try:\n            t = self.return_type.type\n        except AttributeError:\n            t = self.return_type\n\n        return issubclass(t, (datetime, np.datetime64))\n\n\ndef _in(x, y):\n    \"\"\"\n    Compute the vectorized membership of ``x in y`` if possible, otherwise\n    use Python.\n    \"\"\"\n    try:\n        return x.isin(y)\n    except AttributeError:\n        if is_list_like(x):\n            try:\n                return y.isin(x)\n            except AttributeError:\n                pass\n        return x in y\n\n\ndef _not_in(x, y):\n    \"\"\"\n    Compute the vectorized membership of ``x not in y`` if possible,\n    otherwise use Python.\n    \"\"\"\n    try:\n        return ~x.isin(y)\n    except AttributeError:\n        if is_list_like(x):\n            try:\n                return ~y.isin(x)\n            except AttributeError:\n                pass\n        return x not in y\n\n\nCMP_OPS_SYMS = (\">\", \"<\", \">=\", \"<=\", \"==\", \"!=\", \"in\", \"not in\")\n_cmp_ops_funcs = (\n    operator.gt,\n    operator.lt,\n    operator.ge,\n    operator.le,\n    operator.eq,\n    operator.ne,\n    _in,\n    _not_in,\n)\n_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs))\n\nBOOL_OPS_SYMS = (\"&\", \"|\", \"and\", \"or\")\n_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)\n_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs))\n\nARITH_OPS_SYMS = (\"+\", \"-\", \"*\", \"/\", \"**\", \"//\", \"%\")\n_arith_ops_funcs = (\n    operator.add,\n    operator.sub,\n    operator.mul,\n    operator.truediv,\n    operator.pow,\n    operator.floordiv,\n    operator.mod,\n)\n_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs))\n\nSPECIAL_CASE_ARITH_OPS_SYMS = (\"**\", \"//\", \"%\")\n\n\n_binary_ops_dict = {}\n\nfor d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):\n    _binary_ops_dict.update(d)\n\n\ndef is_term(obj) -> bool:\n    return isinstance(obj, Term)\n\n\nclass BinOp(Op):\n    \"\"\"\n    Hold a binary operator and its operands.\n\n    Parameters\n    ----------\n    op : str\n    lhs : Term or Op\n    rhs : Term or Op\n    \"\"\"\n\n    def __init__(self, op: str, lhs, rhs) -> None:\n        super().__init__(op, (lhs, rhs))\n        self.lhs = lhs\n        self.rhs = rhs\n\n        self._disallow_scalar_only_bool_ops()\n\n        self.convert_values()\n\n        try:\n            self.func = _binary_ops_dict[op]\n        except KeyError as err:\n            # has to be made a list for python3\n            keys = list(_binary_ops_dict.keys())\n            raise ValueError(\n                f\"Invalid binary operator {repr(op)}, valid operators are {keys}\"\n            ) from err\n\n    def __call__(self, env):\n        \"\"\"\n        Recursively evaluate an expression in Python space.\n\n        Parameters\n        ----------\n        env : Scope\n\n        Returns\n        -------\n        object\n            The result of an evaluated expression.\n        \"\"\"\n        # recurse over the left/right nodes\n        left = self.lhs(env)\n        right = self.rhs(env)\n\n        return self.func(left, right)\n\n    def evaluate(self, env, engine: str, parser, term_type, eval_in_python):\n        \"\"\"\n        Evaluate a binary operation *before* being passed to the engine.\n\n        Parameters\n        ----------\n        env : Scope\n        engine : str\n        parser : str\n        term_type : type\n        eval_in_python : list\n\n        Returns\n        -------\n        term_type\n            The \"pre-evaluated\" expression as an instance of ``term_type``\n        \"\"\"\n        if engine == \"python\":\n            res = self(env)\n        else:\n            # recurse over the left/right nodes\n\n            left = self.lhs.evaluate(\n                env,\n                engine=engine,\n                parser=parser,\n                term_type=term_type,\n                eval_in_python=eval_in_python,\n            )\n\n            right = self.rhs.evaluate(\n                env,\n                engine=engine,\n                parser=parser,\n                term_type=term_type,\n                eval_in_python=eval_in_python,\n            )\n\n            # base cases\n            if self.op in eval_in_python:\n                res = self.func(left.value, right.value)\n            else:\n                from modin.core.computation.eval import eval\n\n                res = eval(self, local_dict=env, engine=engine, parser=parser)\n\n        name = env.add_tmp(res)\n        return term_type(name, env=env)\n\n    def convert_values(self) -> None:\n        \"\"\"\n        Convert datetimes to a comparable value in an expression.\n        \"\"\"\n\n        def stringify(value):\n            encoder: Callable\n            if self.encoding is not None:\n                encoder = partial(pprint_thing_encoded, encoding=self.encoding)\n            else:\n                encoder = pprint_thing\n            return encoder(value)\n\n        lhs, rhs = self.lhs, self.rhs\n\n        if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:\n            v = rhs.value\n            if isinstance(v, (int, float)):\n                v = stringify(v)\n            v = pandas.Timestamp(ensure_decoded(v))\n            if v.tz is not None:\n                v = v.tz_convert(\"UTC\")\n            self.rhs.update(v)\n\n        if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:\n            v = lhs.value\n            if isinstance(v, (int, float)):\n                v = stringify(v)\n            v = pandas.Timestamp(ensure_decoded(v))\n            if v.tz is not None:\n                v = v.tz_convert(\"UTC\")\n            self.lhs.update(v)\n\n    def _disallow_scalar_only_bool_ops(self):\n        rhs = self.rhs\n        lhs = self.lhs\n\n        # GH#24883 unwrap dtype if necessary to ensure we have a type object\n        rhs_rt = rhs.return_type\n        rhs_rt = getattr(rhs_rt, \"type\", rhs_rt)\n        lhs_rt = lhs.return_type\n        lhs_rt = getattr(lhs_rt, \"type\", lhs_rt)\n        if (\n            (lhs.is_scalar or rhs.is_scalar)\n            and self.op in _bool_ops_dict\n            and (\n                not (\n                    issubclass(rhs_rt, (bool, np.bool_))\n                    and issubclass(lhs_rt, (bool, np.bool_))\n                )\n            )\n        ):\n            raise NotImplementedError(\"cannot evaluate scalar only bool ops\")\n\n\ndef isnumeric(dtype) -> bool:\n    return issubclass(np.dtype(dtype).type, np.number)\n\n\nUNARY_OPS_SYMS = (\"+\", \"-\", \"~\", \"not\")\n_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)\n_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))\n\n\nclass UnaryOp(Op):\n    \"\"\"\n    Hold a unary operator and its operands.\n\n    Parameters\n    ----------\n    op : str\n        The token used to represent the operator.\n    operand : Term or Op\n        The Term or Op operand to the operator.\n\n    Raises\n    ------\n    ValueError\n        * If no function associated with the passed operator token is found.\n    \"\"\"\n\n    def __init__(self, op: Literal[\"+\", \"-\", \"~\", \"not\"], operand) -> None:\n        super().__init__(op, (operand,))\n        self.operand = operand\n\n        try:\n            self.func = _unary_ops_dict[op]\n        except KeyError as err:\n            raise ValueError(\n                f\"Invalid unary operator {repr(op)}, valid operators are {UNARY_OPS_SYMS}\"\n            ) from err\n\n    def __call__(self, env) -> MathCall:\n        operand = self.operand(env)\n        # error: Cannot call function of unknown type\n        return self.func(operand)  # type: ignore[operator]\n\n    def __repr__(self) -> str:\n        return pprint_thing(f\"{self.op}({self.operand})\")\n\n    @property\n    def return_type(self) -> np.dtype:\n        operand = self.operand\n        if operand.return_type == np.dtype(\"bool\"):\n            return np.dtype(\"bool\")\n        if isinstance(operand, Op) and (\n            operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict\n        ):\n            return np.dtype(\"bool\")\n        return np.dtype(\"int\")\n\n\nclass MathCall(Op):\n    def __init__(self, func, args) -> None:\n        super().__init__(func.name, args)\n        self.func = func\n\n    def __call__(self, env):\n        # error: \"Op\" not callable\n        operands = [op(env) for op in self.operands]  # type: ignore[operator]\n        return self.func.func(*operands)\n\n    def __repr__(self) -> str:\n        operands = map(str, self.operands)\n        return pprint_thing(f\"{self.op}({','.join(operands)})\")\n\n\nclass FuncNode:\n    def __init__(self, name: str) -> None:\n        if name not in MATHOPS:\n            raise ValueError(f'\"{name}\" is not a supported function')\n        self.name = name\n        self.func = getattr(np, name)\n\n    def __call__(self, *args) -> MathCall:\n        return MathCall(self, args)\n"
  },
  {
    "path": "modin/core/computation/parsing.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\n:func:`~pandas.eval` source string parsing functions. Forked from pandas.core.computation.parsing\n\"\"\"\n\nfrom __future__ import annotations\n\nimport token\nimport tokenize\nfrom io import StringIO\nfrom keyword import iskeyword\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from collections.abc import (\n        Hashable,\n        Iterator,\n    )\n\n# A token value Python's tokenizer probably will never use.\nBACKTICK_QUOTED_STRING = 100\n\n\ndef create_valid_python_identifier(name: str) -> str:\n    \"\"\"\n    Create valid Python identifiers from any string.\n\n    Check if name contains any special characters. If it contains any\n    special characters, the special characters will be replaced by\n    a special string and a prefix is added.\n\n    Raises\n    ------\n    SyntaxError\n        If the returned name is not a Python valid identifier, raise an exception.\n        This can happen if there is a hashtag in the name, as the tokenizer will\n        than terminate and not find the backtick.\n        But also for characters that fall out of the range of (U+0001..U+007F).\n    \"\"\"\n    if name.isidentifier() and not iskeyword(name):\n        return name\n\n    # Create a dict with the special characters and their replacement string.\n    # EXACT_TOKEN_TYPES contains these special characters\n    # token.tok_name contains a readable description of the replacement string.\n    special_characters_replacements = {\n        char: f\"_{token.tok_name[tokval]}_\"\n        for char, tokval in (tokenize.EXACT_TOKEN_TYPES.items())\n    }\n    special_characters_replacements.update(\n        {\n            \" \": \"_\",\n            \"?\": \"_QUESTIONMARK_\",\n            \"!\": \"_EXCLAMATIONMARK_\",\n            \"$\": \"_DOLLARSIGN_\",\n            \"€\": \"_EUROSIGN_\",\n            \"°\": \"_DEGREESIGN_\",\n            # Including quotes works, but there are exceptions.\n            \"'\": \"_SINGLEQUOTE_\",\n            '\"': \"_DOUBLEQUOTE_\",\n            # Currently not possible. Terminates parser and won't find backtick.\n            # \"#\": \"_HASH_\",\n        }\n    )\n\n    name = \"\".join([special_characters_replacements.get(char, char) for char in name])\n    name = f\"BACKTICK_QUOTED_STRING_{name}\"\n\n    if not name.isidentifier():\n        raise SyntaxError(f\"Could not convert '{name}' to a valid Python identifier.\")\n\n    return name\n\n\ndef clean_backtick_quoted_toks(tok: tuple[int, str]) -> tuple[int, str]:\n    \"\"\"\n    Clean up a column name if surrounded by backticks.\n\n    Backtick quoted string are indicated by a certain tokval value. If a string\n    is a backtick quoted token it will processed by\n    :func:`_create_valid_python_identifier` so that the parser can find this\n    string when the query is executed.\n    In this case the tok will get the NAME tokval.\n\n    Parameters\n    ----------\n    tok : tuple of int, str\n        ints correspond to the all caps constants in the tokenize module\n\n    Returns\n    -------\n    tok : Tuple[int, str]\n        Either the input or token or the replacement values\n    \"\"\"\n    toknum, tokval = tok\n    if toknum == BACKTICK_QUOTED_STRING:\n        return tokenize.NAME, create_valid_python_identifier(tokval)\n    return toknum, tokval\n\n\ndef clean_column_name(name: Hashable) -> Hashable:\n    \"\"\"\n    Function to emulate the cleaning of a backtick quoted name.\n\n    The purpose for this function is to see what happens to the name of\n    identifier if it goes to the process of being parsed a Python code\n    inside a backtick quoted string and than being cleaned\n    (removed of any special characters).\n\n    Parameters\n    ----------\n    name : hashable\n        Name to be cleaned.\n\n    Returns\n    -------\n    name : hashable\n        Returns the name after tokenizing and cleaning.\n\n    Notes\n    -----\n        For some cases, a name cannot be converted to a valid Python identifier.\n        In that case :func:`tokenize_string` raises a SyntaxError.\n        In that case, we just return the name unmodified.\n\n        If this name was used in the query string (this makes the query call impossible)\n        an error will be raised by :func:`tokenize_backtick_quoted_string` instead,\n        which is not caught and propagates to the user level.\n    \"\"\"\n    try:\n        tokenized = tokenize_string(f\"`{name}`\")\n        tokval = next(tokenized)[1]\n        return create_valid_python_identifier(tokval)\n    except SyntaxError:\n        return name\n\n\ndef tokenize_backtick_quoted_string(\n    token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int\n) -> tuple[int, str]:\n    \"\"\"\n    Creates a token from a backtick quoted string.\n\n    Moves the token_generator forwards till right after the next backtick.\n\n    Parameters\n    ----------\n    token_generator : Iterator[tokenize.TokenInfo]\n        The generator that yields the tokens of the source string (Tuple[int, str]).\n        The generator is at the first token after the backtick (`)\n\n    source : str\n        The Python source code string.\n\n    string_start : int\n        This is the start of backtick quoted string inside the source string.\n\n    Returns\n    -------\n    tok: Tuple[int, str]\n        The token that represents the backtick quoted string.\n        The integer is equal to BACKTICK_QUOTED_STRING (100).\n    \"\"\"\n    string_end = None\n    for _, tokval, start, _, _ in token_generator:\n        if tokval == \"`\":\n            string_end = start[1]\n            break\n\n    assert string_end is not None\n    return BACKTICK_QUOTED_STRING, source[string_start:string_end]\n\n\ndef tokenize_string(source: str) -> Iterator[tuple[int, str]]:\n    \"\"\"\n    Tokenize a Python source code string.\n\n    Parameters\n    ----------\n    source : str\n        The Python source code string.\n\n    Returns\n    -------\n    tok_generator : Iterator[Tuple[int, str]]\n        An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).\n    \"\"\"\n    line_reader = StringIO(source).readline\n    token_generator = tokenize.generate_tokens(line_reader)\n\n    # Loop over all tokens till a backtick (`) is found.\n    # Then, take all tokens till the next backtick to form a backtick quoted string\n    for toknum, tokval, start, _, _ in token_generator:\n        if tokval == \"`\":\n            try:\n                yield tokenize_backtick_quoted_string(\n                    token_generator, source, string_start=start[1] + 1\n                )\n            except Exception as err:\n                raise SyntaxError(f\"Failed to parse backticks in '{source}'.\") from err\n        else:\n            yield toknum, tokval\n"
  },
  {
    "path": "modin/core/computation/scope.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule for scope operations. Forked from pandas.core.computation.scope\n\"\"\"\n\nfrom __future__ import annotations\n\nimport datetime\nimport inspect\nimport itertools\nimport pprint\nimport struct\nimport sys\nfrom collections import ChainMap\nfrom io import StringIO\nfrom typing import TypeVar\n\nimport numpy as np\nimport pandas\nfrom pandas.errors import UndefinedVariableError\n\n_KT = TypeVar(\"_KT\")\n_VT = TypeVar(\"_VT\")\n\n\n# https://docs.python.org/3/library/collections.html#chainmap-examples-and-recipes\nclass DeepChainMap(ChainMap[_KT, _VT]):\n    \"\"\"\n    Variant of ChainMap that allows direct updates to inner scopes.\n\n    Only works when all passed mapping are mutable.\n    \"\"\"\n\n    def __setitem__(self, key: _KT, value: _VT) -> None:\n        for mapping in self.maps:\n            if key in mapping:\n                mapping[key] = value\n                return\n        self.maps[0][key] = value\n\n    def __delitem__(self, key: _KT) -> None:\n        \"\"\"\n        Raises\n        ------\n        KeyError\n            If `key` doesn't exist.\n        \"\"\"\n        for mapping in self.maps:\n            if key in mapping:\n                del mapping[key]\n                return\n        raise KeyError(key)\n\n\ndef ensure_scope(\n    level: int, global_dict=None, local_dict=None, resolvers=(), target=None\n) -> Scope:\n    \"\"\"Ensure that we are grabbing the correct scope.\"\"\"\n    return Scope(\n        level + 1,\n        global_dict=global_dict,\n        local_dict=local_dict,\n        resolvers=resolvers,\n        target=target,\n    )\n\n\ndef _replacer(x) -> str:\n    \"\"\"\n    Replace a number with its hexadecimal representation. Used to tag\n    temporary variables with their calling scope's id.\n    \"\"\"\n    # get the hex repr of the binary char and remove 0x and pad by pad_size\n    # zeros\n    try:\n        hexin = ord(x)\n    except TypeError:\n        # bytes literals masquerade as ints when iterating in py3\n        hexin = x\n\n    return hex(hexin)\n\n\ndef _raw_hex_id(obj) -> str:\n    \"\"\"Return the padded hexadecimal id of ``obj``.\"\"\"\n    # interpret as a pointer since that's what really what id returns\n    packed = struct.pack(\"@P\", id(obj))\n    return \"\".join([_replacer(x) for x in packed])\n\n\nDEFAULT_GLOBALS = {\n    \"Timestamp\": pandas.Timestamp,\n    \"datetime\": datetime.datetime,\n    \"True\": True,\n    \"False\": False,\n    \"list\": list,\n    \"tuple\": tuple,\n    \"inf\": np.inf,\n    \"Inf\": np.inf,\n}\n\n\ndef _get_pretty_string(obj) -> str:\n    \"\"\"\n    Return a prettier version of obj.\n\n    Parameters\n    ----------\n    obj : object\n        Object to pretty print\n\n    Returns\n    -------\n    str\n        Pretty print object repr\n    \"\"\"\n    sio = StringIO()\n    pprint.pprint(obj, stream=sio)  # noqa: T203\n    return sio.getvalue()\n\n\nclass Scope:\n    \"\"\"\n    Object to hold scope, with a few bells to deal with some custom syntax\n    and contexts added by pandas.\n\n    Parameters\n    ----------\n    level : int\n    global_dict : dict or None, optional, default None\n    local_dict : dict or Scope or None, optional, default None\n    resolvers : list-like or None, optional, default None\n    target : object\n\n    Attributes\n    ----------\n    level : int\n    scope : DeepChainMap\n    target : object\n    temps : dict\n    \"\"\"\n\n    __slots__ = [\"level\", \"scope\", \"target\", \"resolvers\", \"temps\"]\n    level: int\n    scope: DeepChainMap\n    resolvers: DeepChainMap\n    temps: dict\n\n    def __init__(\n        self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None\n    ) -> None:\n        self.level = level + 1\n\n        # shallow copy because we don't want to keep filling this up with what\n        # was there before if there are multiple calls to Scope/_ensure_scope\n        self.scope = DeepChainMap(DEFAULT_GLOBALS.copy())\n        self.target = target\n\n        if isinstance(local_dict, Scope):\n            self.scope.update(local_dict.scope)\n            if local_dict.target is not None:\n                self.target = local_dict.target\n            self._update(local_dict.level)\n\n        frame = sys._getframe(self.level)\n\n        try:\n            # shallow copy here because we don't want to replace what's in\n            # scope when we align terms (alignment accesses the underlying\n            # numpy array of pandas objects)\n            scope_global = self.scope.new_child(\n                (global_dict if global_dict is not None else frame.f_globals).copy()\n            )\n            self.scope = DeepChainMap(scope_global)\n            if not isinstance(local_dict, Scope):\n                scope_local = self.scope.new_child(\n                    (local_dict if local_dict is not None else frame.f_locals).copy()\n                )\n                self.scope = DeepChainMap(scope_local)\n        finally:\n            del frame\n\n        # assumes that resolvers are going from outermost scope to inner\n        if isinstance(local_dict, Scope):\n            resolvers += tuple(local_dict.resolvers.maps)\n        self.resolvers = DeepChainMap(*resolvers)\n        self.temps = {}\n\n    def __repr__(self) -> str:\n        scope_keys = _get_pretty_string(list(self.scope.keys()))\n        res_keys = _get_pretty_string(list(self.resolvers.keys()))\n        return f\"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})\"\n\n    @property\n    def has_resolvers(self) -> bool:\n        \"\"\"\n        Return whether we have any extra scope.\n\n        For example, DataFrames pass Their columns as resolvers during calls to\n        ``DataFrame.eval()`` and ``DataFrame.query()``.\n\n        Returns\n        -------\n        hr : bool\n        \"\"\"\n        return bool(len(self.resolvers))\n\n    def resolve(self, key: str, is_local: bool):\n        \"\"\"\n        Resolve a variable name in a possibly local context.\n\n        Parameters\n        ----------\n        key : str\n            A variable name\n        is_local : bool\n            Flag indicating whether the variable is local or not (prefixed with\n            the '@' symbol)\n\n        Returns\n        -------\n        value : object\n            The value of a particular variable\n        \"\"\"\n        try:\n            # only look for locals in outer scope\n            if is_local:\n                return self.scope[key]\n\n            # not a local variable so check in resolvers if we have them\n            if self.has_resolvers:\n                return self.resolvers[key]\n\n            # if we're here that means that we have no locals and we also have\n            # no resolvers\n            assert not is_local and not self.has_resolvers\n            return self.scope[key]\n        except KeyError:\n            try:\n                # last ditch effort we look in temporaries\n                # these are created when parsing indexing expressions\n                # e.g., df[df > 0]\n                return self.temps[key]\n            except KeyError as err:\n                raise UndefinedVariableError(key, is_local) from err\n\n    def swapkey(self, old_key: str, new_key: str, new_value=None) -> None:\n        \"\"\"\n        Replace a variable name, with a potentially new value.\n\n        Parameters\n        ----------\n        old_key : str\n            Current variable name to replace\n        new_key : str\n            New variable name to replace `old_key` with\n        new_value : object\n            Value to be replaced along with the possible renaming\n        \"\"\"\n        if self.has_resolvers:\n            maps = self.resolvers.maps + self.scope.maps\n        else:\n            maps = self.scope.maps\n\n        maps.append(self.temps)\n\n        for mapping in maps:\n            if old_key in mapping:\n                mapping[new_key] = new_value\n                return\n\n    def _get_vars(self, stack, scopes: list[str]) -> None:\n        \"\"\"\n        Get specifically scoped variables from a list of stack frames.\n\n        Parameters\n        ----------\n        stack : list\n            A list of stack frames as returned by ``inspect.stack()``\n        scopes : sequence of strings\n            A sequence containing valid stack frame attribute names that\n            evaluate to a dictionary. For example, ('locals', 'globals')\n        \"\"\"\n        variables = itertools.product(scopes, stack)\n        for scope, (frame, _, _, _, _, _) in variables:\n            try:\n                d = getattr(frame, f\"f_{scope}\")\n                self.scope = DeepChainMap(self.scope.new_child(d))\n            finally:\n                # won't remove it, but DECREF it\n                # in Py3 this probably isn't necessary since frame won't be\n                # scope after the loop\n                del frame\n\n    def _update(self, level: int) -> None:\n        \"\"\"\n        Update the current scope by going back `level` levels.\n\n        Parameters\n        ----------\n        level : int\n        \"\"\"\n        sl = level + 1\n\n        # add sl frames to the scope starting with the\n        # most distant and overwriting with more current\n        # makes sure that we can capture variable scope\n        stack = inspect.stack()\n\n        try:\n            self._get_vars(stack[:sl], scopes=[\"locals\"])\n        finally:\n            # explcitly delete the stack according to the advice here:\n            # https://docs.python.org/3/library/inspect.html#inspect.Traceback\n            del stack[:], stack\n\n    def add_tmp(self, value) -> str:\n        \"\"\"\n        Add a temporary variable to the scope.\n\n        Parameters\n        ----------\n        value : object\n            An arbitrary object to be assigned to a temporary variable.\n\n        Returns\n        -------\n        str\n            The name of the temporary variable created.\n        \"\"\"\n        name = f\"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}\"\n\n        # add to inner most scope\n        assert name not in self.temps\n        self.temps[name] = value\n        assert name in self.temps\n\n        # only increment if the variable gets put in the scope\n        return name\n\n    @property\n    def ntemps(self) -> int:\n        \"\"\"The number of temporary variables in this scope\"\"\"\n        return len(self.temps)\n\n    @property\n    def full_scope(self) -> DeepChainMap:\n        \"\"\"\n        Return the full scope for use with passing to engines transparently\n        as a mapping.\n\n        Returns\n        -------\n        vars : DeepChainMap\n            All variables in this scope.\n        \"\"\"\n        maps = [self.temps] + self.resolvers.maps + self.scope.maps\n        return DeepChainMap(*maps)\n"
  },
  {
    "path": "modin/core/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe functionality.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/algebra/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin Dataframe algebra (core operators).\"\"\"\n\nfrom .binary import Binary\nfrom .fold import Fold\nfrom .groupby import GroupByReduce\nfrom .map import Map\nfrom .operator import Operator\nfrom .reduce import Reduce\nfrom .tree_reduce import TreeReduce\n\n__all__ = [\n    \"Operator\",\n    \"Map\",\n    \"TreeReduce\",\n    \"Reduce\",\n    \"Fold\",\n    \"Binary\",\n    \"GroupByReduce\",\n]\n"
  },
  {
    "path": "modin/core/dataframe/algebra/binary.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses builder class for Binary operator.\"\"\"\n\nfrom __future__ import annotations\n\nimport warnings\nfrom typing import TYPE_CHECKING, Any, Callable, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas.api.types import is_bool_dtype, is_scalar\n\nfrom modin.error_message import ErrorMessage\n\nfrom .operator import Operator\n\nif TYPE_CHECKING:\n    from pandas._typing import DtypeObj\n\n    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\ndef maybe_compute_dtypes_common_cast(\n    first: PandasQueryCompiler,\n    second: Union[PandasQueryCompiler, dict, list, tuple, np.ndarray, str, DtypeObj],\n    trigger_computations: bool = False,\n    axis: int = 0,\n    func: Optional[\n        Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]\n    ] = None,\n) -> Optional[pandas.Series]:\n    \"\"\"\n    Precompute data types for binary operations by finding common type between operands.\n\n    Parameters\n    ----------\n    first : PandasQueryCompiler\n        First operand for which the binary operation would be performed later.\n    second : PandasQueryCompiler, dict, list, tuple, np.ndarray, str or DtypeObj\n        Second operand for which the binary operation would be performed later.\n    trigger_computations : bool, default: False\n        Whether to trigger computation of the lazy metadata for `first` and `second`.\n        If False is specified this method will return None if any of the operands doesn't\n        have materialized dtypes.\n    axis : int, default: 0\n        Axis to perform the binary operation along.\n    func : callable(pandas.DataFrame, pandas.DataFrame) -> pandas.DataFrame, optional\n        If specified, will use this function to perform the \"try_sample\" method\n        (see ``Binary.register()`` docs for more details).\n\n    Returns\n    -------\n    pandas.Series\n        The pandas series with precomputed dtypes or None if there's not enough metadata to compute it.\n\n    Notes\n    -----\n    The dtypes of the operands are supposed to be known.\n    \"\"\"\n    if not trigger_computations:\n        if not first.frame_has_materialized_dtypes:\n            return None\n\n        if isinstance(second, type(first)) and not second.frame_has_materialized_dtypes:\n            return None\n\n    dtypes_first = first.dtypes.to_dict()\n    if isinstance(second, type(first)):\n        dtypes_second = second.dtypes.to_dict()\n        columns_first = set(first.columns)\n        columns_second = set(second.columns)\n        common_columns = columns_first.intersection(columns_second)\n        # Here we want to XOR the sets in order to find the columns that do not\n        # belong to the intersection, these will be NaN columns in the result\n        mismatch_columns = columns_first ^ columns_second\n    elif isinstance(second, dict):\n        dtypes_second = {\n            key: pandas.api.types.pandas_dtype(type(value))\n            for key, value in second.items()\n        }\n        columns_first = set(first.columns)\n        columns_second = set(second.keys())\n        common_columns = columns_first.intersection(columns_second)\n        # Here we want to find the difference between the sets in order to find columns\n        # that are missing in the dictionary, this will be NaN columns in the result\n        mismatch_columns = columns_first.difference(columns_second)\n    else:\n        if isinstance(second, (list, tuple)):\n            second_dtypes_list = (\n                [pandas.api.types.pandas_dtype(type(value)) for value in second]\n                if axis == 1\n                # Here we've been given a column so it has only one dtype,\n                # Infering the dtype using `np.array`, TODO: maybe there's more efficient way?\n                else [np.array(second).dtype] * len(dtypes_first)\n            )\n        elif is_scalar(second) or isinstance(second, np.ndarray):\n            try:\n                dtype = getattr(second, \"dtype\", None) or pandas.api.types.pandas_dtype(\n                    type(second)\n                )\n            except TypeError:\n                # For example, dtype '<class 'datetime.datetime'>' not understood\n                dtype = pandas.Series(second).dtype\n            second_dtypes_list = [dtype] * len(dtypes_first)\n        else:\n            raise NotImplementedError(\n                f\"Can't compute common type for {type(first)} and {type(second)}.\"\n            )\n        # We verify operands shapes at the front-end, invalid operands shouldn't be\n        # propagated to the query compiler level\n        ErrorMessage.catch_bugs_and_request_email(\n            failure_condition=len(second_dtypes_list) != len(dtypes_first),\n            extra_log=\"Shapes of the operands of a binary operation don't match\",\n        )\n        dtypes_second = {\n            key: second_dtypes_list[idx] for idx, key in enumerate(dtypes_first.keys())\n        }\n        common_columns = first.columns\n        mismatch_columns = []\n\n    # If at least one column doesn't match, the result of the non matching column would be nan.\n    nan_dtype = pandas.api.types.pandas_dtype(type(np.nan))\n    dtypes = None\n    if func is not None:\n        try:\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\"ignore\")\n                df1 = pandas.DataFrame([[1] * len(common_columns)]).astype(\n                    {i: dtypes_first[col] for i, col in enumerate(common_columns)}\n                )\n                df2 = pandas.DataFrame([[1] * len(common_columns)]).astype(\n                    {i: dtypes_second[col] for i, col in enumerate(common_columns)}\n                )\n                dtypes = func(df1, df2).dtypes.set_axis(common_columns)\n        # it sometimes doesn't work correctly with strings, so falling back to\n        # the \"common_cast\" method in this case\n        except TypeError:\n            pass\n    if dtypes is None:\n        dtypes = pandas.Series(\n            [\n                pandas.core.dtypes.cast.find_common_type(\n                    [\n                        dtypes_first[x],\n                        dtypes_second[x],\n                    ]\n                )\n                for x in common_columns\n            ],\n            index=common_columns,\n        )\n    dtypes: pandas.Series = pandas.concat(\n        [\n            dtypes,\n            pandas.Series(\n                [nan_dtype] * (len(mismatch_columns)),\n                index=mismatch_columns,\n            ),\n        ]\n    )\n    return dtypes\n\n\ndef maybe_build_dtypes_series(\n    first: PandasQueryCompiler,\n    second: Union[PandasQueryCompiler, Any],\n    dtype: DtypeObj,\n    trigger_computations: bool = False,\n) -> Optional[pandas.Series]:\n    \"\"\"\n    Build a ``pandas.Series`` describing dtypes of the result of a binary operation.\n\n    Parameters\n    ----------\n    first : PandasQueryCompiler\n        First operand for which the binary operation would be performed later.\n    second : PandasQueryCompiler, list-like or scalar\n        Second operand for which the binary operation would be performed later.\n    dtype : DtypeObj\n        Dtype of the result.\n    trigger_computations : bool, default: False\n        Whether to trigger computation of the lazy metadata for `first` and `second`.\n        If False is specified this method will return None if any of the operands doesn't\n        have materialized columns.\n\n    Returns\n    -------\n    pandas.Series or None\n        The pandas series with precomputed dtypes or None if there's not enough metadata to compute it.\n\n    Notes\n    -----\n    Finds a union of columns and finds dtypes for all these columns.\n    \"\"\"\n    if not trigger_computations:\n        if not first.frame_has_columns_cache:\n            return None\n\n        if isinstance(second, type(first)) and not second.frame_has_columns_cache:\n            return None\n\n    columns_first = set(first.columns)\n    if isinstance(second, type(first)):\n        columns_second = set(second.columns)\n        columns_union = columns_first.union(columns_second)\n    else:\n        columns_union = columns_first\n\n    dtypes = pandas.Series([dtype] * len(columns_union), index=columns_union)\n    return dtypes\n\n\ndef try_compute_new_dtypes(\n    first: PandasQueryCompiler,\n    second: Union[PandasQueryCompiler, Any],\n    infer_dtypes: Optional[str] = None,\n    result_dtype: Optional[Union[DtypeObj, str]] = None,\n    axis: int = 0,\n    func: Optional[\n        Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]\n    ] = None,\n) -> Optional[pandas.Series]:\n    \"\"\"\n    Precompute resulting dtypes of the binary operation if possible.\n\n    The dtypes won't be precomputed if any of the operands doesn't have their dtypes materialized\n    or if the second operand type is not supported. Supported types: PandasQueryCompiler, list,\n    dict, tuple, np.ndarray.\n\n    Parameters\n    ----------\n    first : PandasQueryCompiler\n        First operand of the binary operation.\n    second : PandasQueryCompiler, list-like or scalar\n        Second operand of the binary operation.\n    infer_dtypes : {\"common_cast\", \"try_sample\", \"bool\", None}, default: None\n        How dtypes should be infered (see ``Binary.register`` doc for more info).\n    result_dtype : np.dtype, optional\n        NumPy dtype of the result. If not specified it will be inferred from the `infer_dtypes` parameter.\n    axis : int, default: 0\n        Axis to perform the binary operation along.\n    func : callable(pandas.DataFrame, pandas.DataFrame) -> pandas.DataFrame, optional\n        A callable to be used for the \"try_sample\" method.\n\n    Returns\n    -------\n    pandas.Series or None\n    \"\"\"\n    if infer_dtypes is None and result_dtype is None:\n        return None\n\n    try:\n        if infer_dtypes == \"bool\" or is_bool_dtype(result_dtype):\n            dtypes = maybe_build_dtypes_series(\n                first, second, dtype=pandas.api.types.pandas_dtype(bool)\n            )\n        elif infer_dtypes == \"common_cast\":\n            dtypes = maybe_compute_dtypes_common_cast(\n                first, second, axis=axis, func=None\n            )\n        elif infer_dtypes == \"try_sample\":\n            if func is None:\n                raise ValueError(\n                    \"'func' must be specified if dtypes infering method is 'try_sample'\"\n                )\n            dtypes = maybe_compute_dtypes_common_cast(\n                first, second, axis=axis, func=func\n            )\n        else:\n            # For now we only know how to handle `result_dtype == bool` as that's\n            # the only value that is being passed here right now, it's unclear\n            # how we should behave in case of an arbitrary dtype, so let's wait\n            # for at least one case to appear for this regard.\n            dtypes = None\n    except NotImplementedError:\n        dtypes = None\n\n    return dtypes\n\n\nclass Binary(Operator):\n    \"\"\"Builder class for Binary operator.\"\"\"\n\n    @classmethod\n    def register(\n        cls,\n        func: Callable[..., pandas.DataFrame],\n        join_type: str = \"outer\",\n        sort: bool = None,\n        labels: str = \"replace\",\n        infer_dtypes: Optional[str] = None,\n    ) -> Callable[..., PandasQueryCompiler]:\n        \"\"\"\n        Build template binary operator.\n\n        Parameters\n        ----------\n        func : callable(pandas.DataFrame, [pandas.DataFrame, list-like, scalar]) -> pandas.DataFrame\n            Binary function to execute. Have to be able to accept at least two arguments.\n        join_type : {'left', 'right', 'outer', 'inner', None}, default: 'outer'\n            Type of join that will be used if indices of operands are not aligned.\n        sort : bool, default: None\n            Whether to sort index and columns or not.\n        labels : {\"keep\", \"replace\", \"drop\"}, default: \"replace\"\n            Whether keep labels from left Modin DataFrame, replace them with labels\n            from joined DataFrame or drop altogether to make them be computed lazily later.\n        infer_dtypes : {\"common_cast\", \"try_sample\", \"bool\", None}, default: None\n            How dtypes should be inferred.\n                * If \"common_cast\", casts to common dtype of operand columns.\n                * If \"try_sample\", creates small pandas DataFrames with dtypes of operands and\n                  runs the `func` on them to determine output dtypes. If a ``TypeError`` is raised\n                  during this process, fallback to \"common_cast\" method.\n                * If \"bool\", dtypes would be a boolean series with same size as that of operands.\n                * If ``None``, do not infer new dtypes (they will be computed manually once accessed).\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and executes binary operation.\n        \"\"\"\n\n        def caller(\n            query_compiler: PandasQueryCompiler,\n            other: Union[PandasQueryCompiler, Any],\n            broadcast: bool = False,\n            *args: tuple,\n            dtypes: Optional[Union[DtypeObj, str]] = None,\n            **kwargs: dict,\n        ) -> PandasQueryCompiler:\n            \"\"\"\n            Apply binary `func` to passed operands.\n\n            Parameters\n            ----------\n            query_compiler : PandasQueryCompiler\n                Left operand of `func`.\n            other : PandasQueryCompiler, list-like object or scalar\n                Right operand of `func`.\n            broadcast : bool, default: False\n                If `other` is a one-column query compiler, indicates whether it is a Series or not.\n                Frames and Series have to be processed differently, however we can't distinguish them\n                at the query compiler level, so this parameter is a hint that passed from a high level API.\n            *args : tuple,\n                Arguments that will be passed to `func`.\n            dtypes : \"copy\", scalar dtype or None, default: None\n                Dtypes of the result. \"copy\" to keep old dtypes and None to compute them on demand.\n            **kwargs : dict,\n                Arguments that will be passed to `func`.\n\n            Returns\n            -------\n            PandasQueryCompiler\n                Result of binary function.\n            \"\"\"\n            axis: int = kwargs.get(\"axis\", 0)\n            if isinstance(other, type(query_compiler)) and broadcast:\n                assert (\n                    len(other.columns) == 1\n                ), \"Invalid broadcast argument for `broadcast_apply`, too many columns: {}\".format(\n                    len(other.columns)\n                )\n                # Transpose on `axis=1` because we always represent an individual\n                # column or row as a single-column Modin DataFrame\n                if axis == 1:\n                    other = other.transpose()\n            if dtypes != \"copy\":\n                dtypes = try_compute_new_dtypes(\n                    query_compiler, other, infer_dtypes, dtypes, axis, func\n                )\n\n            shape_hint = None\n            if isinstance(other, type(query_compiler)):\n                if broadcast:\n                    if (\n                        query_compiler.frame_has_materialized_columns\n                        and other.frame_has_materialized_columns\n                    ):\n                        if (\n                            len(query_compiler.columns) == 1\n                            and len(other.columns) == 1\n                            and query_compiler.columns.equals(other.columns)\n                        ):\n                            shape_hint = \"column\"\n                    return query_compiler.__constructor__(\n                        query_compiler._modin_frame.broadcast_apply(\n                            axis,\n                            lambda left, right: func(\n                                left, right.squeeze(), *args, **kwargs\n                            ),\n                            other._modin_frame,\n                            join_type=join_type,\n                            labels=labels,\n                            dtypes=dtypes,\n                        ),\n                        shape_hint=shape_hint,\n                    )\n                else:\n                    if (\n                        query_compiler.frame_has_materialized_columns\n                        and other.frame_has_materialized_columns\n                    ):\n                        if (\n                            len(query_compiler.columns) == 1\n                            and len(other.columns) == 1\n                            and query_compiler.columns.equals(other.columns)\n                        ):\n                            shape_hint = \"column\"\n                    return query_compiler.__constructor__(\n                        query_compiler._modin_frame.n_ary_op(\n                            lambda x, y: func(x, y, *args, **kwargs),\n                            [other._modin_frame],\n                            join_type=join_type,\n                            sort=sort,\n                            labels=labels,\n                            dtypes=dtypes,\n                        ),\n                        shape_hint=shape_hint,\n                    )\n            else:\n                # TODO: it's possible to chunk the `other` and broadcast them to partitions\n                # accordingly, in that way we will be able to use more efficient `._modin_frame.map()`\n                if isinstance(other, (dict, list, np.ndarray, pandas.Series)):\n                    new_modin_frame = query_compiler._modin_frame.apply_full_axis(\n                        axis,\n                        lambda df: func(df, other, *args, **kwargs),\n                        new_index=query_compiler.index,\n                        new_columns=query_compiler.columns,\n                        dtypes=dtypes,\n                    )\n                else:\n                    if (\n                        query_compiler.frame_has_materialized_columns\n                        and len(query_compiler._modin_frame.columns) == 1\n                        and is_scalar(other)\n                    ):\n                        shape_hint = \"column\"\n                    new_modin_frame = query_compiler._modin_frame.map(\n                        func,\n                        func_args=(other, *args),\n                        func_kwargs=kwargs,\n                        dtypes=dtypes,\n                        lazy=True,\n                    )\n                return query_compiler.__constructor__(\n                    new_modin_frame, shape_hint=shape_hint\n                )\n\n        return caller\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module default2pandas provides templates for a query compiler default-to-pandas methods.\"\"\"\n\nfrom .binary import BinaryDefault\nfrom .cat import CatDefault\nfrom .dataframe import DataFrameDefault\nfrom .datetime import DateTimeDefault\nfrom .default import DefaultMethod\nfrom .groupby import GroupByDefault, SeriesGroupByDefault\nfrom .list import ListDefault\nfrom .resample import ResampleDefault\nfrom .rolling import ExpandingDefault, RollingDefault\nfrom .series import SeriesDefault\nfrom .str import StrDefault\nfrom .struct import StructDefault\n\n__all__ = [\n    \"DataFrameDefault\",\n    \"DateTimeDefault\",\n    \"SeriesDefault\",\n    \"StrDefault\",\n    \"BinaryDefault\",\n    \"ResampleDefault\",\n    \"RollingDefault\",\n    \"ExpandingDefault\",\n    \"DefaultMethod\",\n    \"CatDefault\",\n    \"GroupByDefault\",\n    \"SeriesGroupByDefault\",\n    \"ListDefault\",\n    \"StructDefault\",\n]\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/binary.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default binary functions builder class.\"\"\"\n\nimport pandas\nfrom pandas.core.dtypes.common import is_list_like\n\nfrom .default import DefaultMethod\n\n\nclass BinaryDefault(DefaultMethod):\n    \"\"\"Build default-to-pandas methods which executes binary functions.\"\"\"\n\n    @classmethod\n    def build_default_to_pandas(cls, fn, fn_name):\n        \"\"\"\n        Build function that do fallback to pandas for passed binary `fn`.\n\n        Parameters\n        ----------\n        fn : callable\n            Binary function to apply to the casted to pandas frame and other operand.\n        fn_name : str\n            Function name which will be shown in default-to-pandas warning message.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler, does fallback to pandas and applies binary `fn`\n            to the casted to pandas frame.\n        \"\"\"\n\n        def bin_ops_wrapper(df, other, *args, **kwargs):\n            \"\"\"Apply specified binary function to the passed operands.\"\"\"\n            squeeze_other = kwargs.pop(\"broadcast\", False) or kwargs.pop(\n                \"squeeze_other\", False\n            )\n            squeeze_self = kwargs.pop(\"squeeze_self\", False)\n\n            if squeeze_other:\n                other = other.squeeze(axis=1)\n\n            if squeeze_self:\n                df = df.squeeze(axis=1)\n\n            result = fn(df, other, *args, **kwargs)\n            if (\n                not isinstance(result, pandas.Series)\n                and not isinstance(result, pandas.DataFrame)\n                and is_list_like(result)\n            ):\n                result = pandas.DataFrame(result)\n            return result\n\n        return super().build_default_to_pandas(bin_ops_wrapper, fn_name)\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/cat.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default applied-on-category functions builder class.\"\"\"\n\nfrom .series import SeriesDefault\n\n\nclass CatDefault(SeriesDefault):\n    \"\"\"Builder for default-to-pandas methods which is executed under category accessor.\"\"\"\n\n    @classmethod\n    def frame_wrapper(cls, df):\n        \"\"\"\n        Get category accessor of the passed frame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n\n        Returns\n        -------\n        pandas.core.arrays.categorical.CategoricalAccessor\n        \"\"\"\n        return df.squeeze(axis=1).cat\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default DataFrame functions builder class.\"\"\"\n\nimport pandas\n\nfrom modin.utils import _inherit_docstrings\n\nfrom .default import DefaultMethod\n\n\n@_inherit_docstrings(DefaultMethod)\nclass DataFrameDefault(DefaultMethod):\n    DEFAULT_OBJECT_TYPE = pandas.DataFrame\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/datetime.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default applied-on-datetime functions builder class.\"\"\"\n\nfrom .series import SeriesDefault\n\n\nclass DateTimeDefault(SeriesDefault):\n    \"\"\"Builder for default-to-pandas methods which is executed under datetime accessor.\"\"\"\n\n    @classmethod\n    def frame_wrapper(cls, df):\n        \"\"\"\n        Get datetime accessor of the passed frame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n\n        Returns\n        -------\n        pandas.core.indexes.accessors.DatetimeProperties\n        \"\"\"\n        return df.squeeze(axis=1).dt\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/default.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default functions builder class.\"\"\"\n\nimport pandas\nfrom pandas.core.dtypes.common import is_list_like\n\nfrom modin.core.dataframe.algebra.operator import Operator\nfrom modin.utils import MODIN_UNNAMED_SERIES_LABEL, try_cast_to_pandas\n\n\nclass ObjTypeDeterminer:\n    \"\"\"\n    Class that routes work to the frame.\n\n    Provides an instance which forwards all of the `__getattribute__` calls\n    to an object under which `key` function is applied.\n    \"\"\"\n\n    def __getattr__(self, key):\n        \"\"\"\n        Build function that executes `key` function over passed frame.\n\n        Parameters\n        ----------\n        key : str\n\n        Returns\n        -------\n        callable\n            Function that takes DataFrame and executes `key` function on it.\n        \"\"\"\n\n        def func(df, *args, **kwargs):\n            \"\"\"Access specified attribute of the passed object and call it if it's callable.\"\"\"\n            prop = getattr(df, key)\n            if callable(prop):\n                return prop(*args, **kwargs)\n            else:\n                return prop\n\n        return func\n\n\nclass DefaultMethod(Operator):\n    \"\"\"\n    Builder for default-to-pandas methods.\n\n    Attributes\n    ----------\n    OBJECT_TYPE : str\n        Object type name that will be shown in default-to-pandas warning message.\n    DEFAULT_OBJECT_TYPE : object\n        Default place to search for a function.\n    \"\"\"\n\n    OBJECT_TYPE = \"DataFrame\"\n    DEFAULT_OBJECT_TYPE = ObjTypeDeterminer\n\n    @classmethod\n    def register(cls, func, obj_type=None, inplace=None, fn_name=None):\n        \"\"\"\n        Build function that do fallback to default pandas implementation for passed `func`.\n\n        Parameters\n        ----------\n        func : callable or str,\n            Function to apply to the casted to pandas frame or its property accesed\n            by ``cls.frame_wrapper``.\n        obj_type : object, optional\n            If `func` is a string with a function name then `obj_type` provides an\n            object to search function in.\n        inplace : bool, optional\n            If True return an object to which `func` was applied, otherwise return\n            the result of `func`.\n        fn_name : str, optional\n            Function name which will be shown in default-to-pandas warning message.\n            If not specified, name will be deducted from `func`.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler, does fallback to pandas and applies `func`\n            to the casted to pandas frame or its property accesed by ``cls.frame_wrapper``.\n        \"\"\"\n        if isinstance(func, str):\n            if obj_type is None:\n                obj_type = cls.DEFAULT_OBJECT_TYPE\n            fn = getattr(obj_type, func)\n        else:\n            fn = func\n\n        if type(fn) is property:\n            if fn_name is None and hasattr(fn, \"fget\"):\n                # When `fn` is a property, `str(fn)` will be something like\n                # \"<property object at 0x7f8671e09d10>\". We instead check its `fget` method to get\n                # the name of the property.\n                # Note that this method is still imperfect because we cannot get the class name\n                # of the property. For example, we can only get \"hour\" from `Series.dt.hour`.\n                fn_name = f\"<property fget:{getattr(fn.fget, '__name__', 'noname')}>\"\n            fn = cls.build_property_wrapper(fn)\n        else:\n            fn_name = getattr(fn, \"__name__\", str(fn)) if fn_name is None else fn_name\n\n        def applyier(df, *args, **kwargs):\n            \"\"\"\n            Apply target function to the casted to pandas frame.\n\n            This function is directly applied to the casted to pandas frame, executes target\n            function under it and processes result so it is possible to create a valid\n            query compiler from it.\n            \"\"\"\n            # pandas default implementation doesn't know how to handle `dtypes` keyword argument\n            kwargs.pop(\"dtypes\", None)\n            df = cls.frame_wrapper(df)\n            result = fn(df, *args, **kwargs)\n\n            if (\n                not isinstance(result, pandas.Series)\n                and not isinstance(result, pandas.DataFrame)\n                and func not in (\"to_numpy\", pandas.DataFrame.to_numpy)\n                and func not in (\"align\", pandas.DataFrame.align)\n                and func not in (\"divmod\", pandas.Series.divmod)\n                and func not in (\"rdivmod\", pandas.Series.rdivmod)\n                and func not in (\"to_list\", pandas.Series.to_list)\n                and func not in (\"corr\", pandas.Series.corr)\n                and func not in (\"to_dict\", pandas.Series.to_dict)\n                and func not in (\"mean\", pandas.DataFrame.mean)\n                and func not in (\"median\", pandas.DataFrame.median)\n                and func not in (\"skew\", pandas.DataFrame.skew)\n                and func not in (\"kurt\", pandas.DataFrame.kurt)\n            ):\n                # When applying a DatetimeProperties or TimedeltaProperties function,\n                # if we don't specify the dtype for the DataFrame, the frame might\n                # get the wrong dtype, e.g. for to_pydatetime in\n                # https://github.com/modin-project/modin/issues/4436\n                astype_kwargs = {}\n                dtype = getattr(result, \"dtype\", None)\n                if dtype and isinstance(\n                    df,\n                    (\n                        pandas.core.indexes.accessors.DatetimeProperties,\n                        pandas.core.indexes.accessors.TimedeltaProperties,\n                    ),\n                ):\n                    astype_kwargs[\"dtype\"] = dtype\n                result = (\n                    pandas.DataFrame(result, **astype_kwargs)\n                    if is_list_like(result)\n                    else pandas.DataFrame([result], **astype_kwargs)\n                )\n            if isinstance(result, pandas.Series):\n                if result.name is None:\n                    result.name = MODIN_UNNAMED_SERIES_LABEL\n                result = result.to_frame()\n\n            inplace_method = kwargs.get(\"inplace\", False)\n            if inplace is not None:\n                inplace_method = inplace\n            return result if not inplace_method else df\n\n        return cls.build_wrapper(applyier, fn_name)\n\n    @classmethod\n    # FIXME: this method is almost a duplicate of `cls.build_default_to_pandas`.\n    # Those two methods should be merged into a single one.\n    def build_wrapper(cls, fn, fn_name):\n        \"\"\"\n        Build function that do fallback to pandas for passed `fn`.\n\n        In comparison with ``cls.build_default_to_pandas`` this method also\n        casts function arguments to pandas before doing fallback.\n\n        Parameters\n        ----------\n        fn : callable\n            Function to apply to the defaulted frame.\n        fn_name : str\n            Function name which will be shown in default-to-pandas warning message.\n\n        Returns\n        -------\n        callable\n            Method that does fallback to pandas and applies `fn` to the pandas frame.\n        \"\"\"\n        wrapper = cls.build_default_to_pandas(fn, fn_name)\n\n        def args_cast(self, *args, **kwargs):\n            \"\"\"\n            Preprocess `default_to_pandas` function arguments and apply default function.\n\n            Cast all Modin objects that function arguments contain to its pandas representation.\n            \"\"\"\n            args = try_cast_to_pandas(args)\n            kwargs = try_cast_to_pandas(kwargs)\n            return wrapper(self, *args, **kwargs)\n\n        return args_cast\n\n    @classmethod\n    def build_property_wrapper(cls, prop):\n        \"\"\"\n        Build function that accesses specified property of the frame.\n\n        Parameters\n        ----------\n        prop : str\n            Property name to access.\n\n        Returns\n        -------\n        callable\n            Function that takes DataFrame and returns its value of `prop` property.\n        \"\"\"\n\n        def property_wrapper(df):\n            \"\"\"Get specified property of the passed object.\"\"\"\n            return prop.fget(df)\n\n        return property_wrapper\n\n    @classmethod\n    def build_default_to_pandas(cls, fn, fn_name):\n        \"\"\"\n        Build function that do fallback to pandas for passed `fn`.\n\n        Parameters\n        ----------\n        fn : callable\n            Function to apply to the defaulted frame.\n        fn_name : str\n            Function name which will be shown in default-to-pandas warning message.\n\n        Returns\n        -------\n        callable\n            Method that does fallback to pandas and applies `fn` to the pandas frame.\n        \"\"\"\n        fn.__name__ = f\"<function {cls.OBJECT_TYPE}.{fn_name}>\"\n\n        def wrapper(self, *args, **kwargs):\n            \"\"\"Do fallback to pandas for the specified function.\"\"\"\n            return self.default_to_pandas(fn, *args, **kwargs)\n\n        return wrapper\n\n    @classmethod\n    def frame_wrapper(cls, df):\n        \"\"\"\n        Extract frame property to apply function on.\n\n        This method is executed under casted to pandas frame right before applying\n        a function passed to `register`, which gives an ability to transform frame somehow\n        or access its properties, by overriding this method in a child class.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n\n        Returns\n        -------\n        pandas.DataFrame\n\n        Notes\n        -----\n        Being a base implementation, this particular method does nothing with passed frame.\n        \"\"\"\n        return df\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/groupby.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default GroupBy functions builder class.\"\"\"\n\nimport warnings\nfrom typing import Any\n\nimport pandas\nfrom pandas.core.dtypes.common import is_list_like\n\n# Defines a set of string names of functions that are executed in a transform-way in groupby\nfrom pandas.core.groupby.base import transformation_kernels\n\nfrom modin.utils import MODIN_UNNAMED_SERIES_LABEL, hashable\n\nfrom .default import DefaultMethod\n\n\n# FIXME: there is no sence of keeping `GroupBy` and `GroupByDefault` logic in a different\n# classes. They should be combined.\nclass GroupBy:\n    \"\"\"Builder for GroupBy aggregation functions.\"\"\"\n\n    agg_aliases = [\n        \"agg\",\n        \"dict_agg\",\n        pandas.core.groupby.DataFrameGroupBy.agg,\n        pandas.core.groupby.DataFrameGroupBy.aggregate,\n    ]\n\n    @staticmethod\n    def is_transformation_kernel(agg_func: Any) -> bool:\n        \"\"\"\n        Check whether a passed aggregation function is a transformation.\n\n        Transformation means that the result of the function will be broadcasted\n        to the frame's original shape.\n\n        Parameters\n        ----------\n        agg_func : Any\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return hashable(agg_func) and agg_func in transformation_kernels.union(\n            # these methods are also producing transpose-like result in a sense we understand it\n            # (they're non-aggregative functions), however are missing in the pandas dictionary\n            {\"nth\", \"head\", \"tail\"}\n        )\n\n    @classmethod\n    def _call_groupby(cls, df, *args, **kwargs):  # noqa: PR01\n        \"\"\"Call .groupby() on passed `df`.\"\"\"\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            return df.groupby(*args, **kwargs)\n\n    @classmethod\n    def validate_by(cls, by):\n        \"\"\"\n        Build valid `by` parameter for `pandas.DataFrame.groupby`.\n\n        Cast all DataFrames in `by` parameter to Series or list of Series in case\n        of multi-column frame.\n\n        Parameters\n        ----------\n        by : DateFrame, Series, index label or list of such\n            Object which indicates groups for GroupBy.\n\n        Returns\n        -------\n        Series, index label or list of such\n            By parameter with all DataFrames casted to Series.\n        \"\"\"\n\n        def try_cast_series(df):\n            \"\"\"Cast one-column frame to Series.\"\"\"\n            if isinstance(df, pandas.DataFrame):\n                df = df.squeeze(axis=1)\n            if not isinstance(df, pandas.Series):\n                return df\n            if df.name == MODIN_UNNAMED_SERIES_LABEL:\n                df.name = None\n            return df\n\n        if isinstance(by, pandas.DataFrame):\n            by = [try_cast_series(column) for _, column in by.items()]\n        elif isinstance(by, pandas.Series):\n            by = [try_cast_series(by)]\n        elif isinstance(by, list):\n            by = [try_cast_series(o) for o in by]\n        return by\n\n    @classmethod\n    def inplace_applyier_builder(cls, key, func=None):\n        \"\"\"\n        Bind actual aggregation function to the GroupBy aggregation method.\n\n        Parameters\n        ----------\n        key : callable\n            Function that takes GroupBy object and evaluates passed aggregation function.\n        func : callable or str, optional\n            Function that takes DataFrame and aggregate its data. Will be applied\n            to each group at the grouped frame.\n\n        Returns\n        -------\n        callable,\n            Function that executes aggregation under GroupBy object.\n        \"\"\"\n        inplace_args = [] if func is None else [func]\n\n        def inplace_applyier(grp, *func_args, **func_kwargs):\n            return key(grp, *inplace_args, *func_args, **func_kwargs)\n\n        return inplace_applyier\n\n    @classmethod\n    def get_func(cls, key, **kwargs):\n        \"\"\"\n        Extract aggregation function from groupby arguments.\n\n        Parameters\n        ----------\n        key : callable or str\n            Default aggregation function. If aggregation function is not specified\n            via groupby arguments, then `key` function is used.\n        **kwargs : dict\n            GroupBy arguments that may contain aggregation function.\n\n        Returns\n        -------\n        callable\n            Aggregation function.\n\n        Notes\n        -----\n        There are two ways of how groupby aggregation can be invoked:\n            1. Explicitly with query compiler method: `qc.groupby_sum()`.\n            2. By passing aggregation function as an argument: `qc.groupby_agg(\"sum\")`.\n        Both are going to produce the same result, however in the first case actual aggregation\n        function can be extracted from the method name, while for the second only from the method arguments.\n        \"\"\"\n        if \"agg_func\" in kwargs:\n            return cls.inplace_applyier_builder(key, kwargs[\"agg_func\"])\n        elif \"func_dict\" in kwargs:\n            return cls.inplace_applyier_builder(key, kwargs[\"func_dict\"])\n        else:\n            return cls.inplace_applyier_builder(key)\n\n    @classmethod\n    def build_aggregate_method(cls, key):\n        \"\"\"\n        Build function for `QueryCompiler.groupby_agg` that can be executed as default-to-pandas.\n\n        Parameters\n        ----------\n        key : callable or str\n            Default aggregation function. If aggregation function is not specified\n            via groupby arguments, then `key` function is used.\n\n        Returns\n        -------\n        callable\n            Function that executes groupby aggregation.\n        \"\"\"\n\n        def fn(\n            df,\n            by,\n            axis,\n            groupby_kwargs,\n            agg_args,\n            agg_kwargs,\n            drop=False,\n            **kwargs,\n        ):\n            \"\"\"Group DataFrame and apply aggregation function to each group.\"\"\"\n            by = cls.validate_by(by)\n\n            grp = cls._call_groupby(df, by, axis=axis, **groupby_kwargs)\n            agg_func = cls.get_func(key, **kwargs)\n            result = agg_func(grp, *agg_args, **agg_kwargs)\n\n            return result\n\n        return fn\n\n    @classmethod\n    def build_groupby_reduce_method(cls, agg_func):\n        \"\"\"\n        Build function for `QueryCompiler.groupby_*` that can be executed as default-to-pandas.\n\n        Parameters\n        ----------\n        agg_func : callable or str\n            Default aggregation function. If aggregation function is not specified\n            via groupby arguments, then `agg_func` function is used.\n\n        Returns\n        -------\n        callable\n            Function that executes groupby aggregation.\n        \"\"\"\n\n        def fn(\n            df, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False, **kwargs\n        ):\n            \"\"\"Group DataFrame and apply aggregation function to each group.\"\"\"\n            if not isinstance(by, (pandas.Series, pandas.DataFrame)):\n                by = cls.validate_by(by)\n                grp = cls._call_groupby(df, by, axis=axis, **groupby_kwargs)\n                grp_agg_func = cls.get_func(agg_func, **kwargs)\n                return grp_agg_func(\n                    grp,\n                    *agg_args,\n                    **agg_kwargs,\n                )\n\n            if isinstance(by, pandas.DataFrame):\n                by = by.squeeze(axis=1)\n            if (\n                drop\n                and isinstance(by, pandas.Series)\n                and by.name in df\n                and df[by.name].equals(by)\n            ):\n                by = [by.name]\n            if isinstance(by, pandas.DataFrame):\n                df = pandas.concat([df] + [by[[o for o in by if o not in df]]], axis=1)\n                by = list(by.columns)\n\n            groupby_kwargs = groupby_kwargs.copy()\n            as_index = groupby_kwargs.pop(\"as_index\", True)\n            groupby_kwargs[\"as_index\"] = True\n\n            grp = cls._call_groupby(df, by, axis=axis, **groupby_kwargs)\n            func = cls.get_func(agg_func, **kwargs)\n            result = func(grp, *agg_args, **agg_kwargs)\n            method = kwargs.get(\"method\")\n\n            if isinstance(result, pandas.Series):\n                result = result.to_frame(\n                    MODIN_UNNAMED_SERIES_LABEL if result.name is None else result.name\n                )\n\n            if not as_index:\n                if isinstance(by, pandas.Series):\n                    # 1. If `drop` is True then 'by' Series represents a column from the\n                    #    source frame and so the 'by' is internal.\n                    # 2. If method is 'size' then any 'by' is considered to be internal.\n                    #    This is a hacky legacy from the ``groupby_size`` implementation:\n                    #    https://github.com/modin-project/modin/issues/3739\n                    internal_by = (by.name,) if drop or method == \"size\" else tuple()\n                else:\n                    internal_by = by\n\n                cls.handle_as_index_for_dataframe(\n                    result,\n                    internal_by,\n                    by_cols_dtypes=(\n                        df.index.dtypes.values\n                        if isinstance(df.index, pandas.MultiIndex)\n                        else (df.index.dtype,)\n                    ),\n                    by_length=len(by),\n                    drop=drop,\n                    method=method,\n                    inplace=True,\n                )\n\n            if result.index.name == MODIN_UNNAMED_SERIES_LABEL:\n                result.index.name = None\n\n            return result\n\n        return fn\n\n    @classmethod\n    def is_aggregate(cls, key):  # noqa: PR01\n        \"\"\"Check whether `key` is an alias for pandas.GroupBy.aggregation method.\"\"\"\n        return key in cls.agg_aliases\n\n    @classmethod\n    def build_groupby(cls, func):\n        \"\"\"\n        Build function that groups DataFrame and applies aggregation function to the every group.\n\n        Parameters\n        ----------\n        func : callable or str\n            Default aggregation function. If aggregation function is not specified\n            via groupby arguments, then `func` function is used.\n\n        Returns\n        -------\n        callable\n            Function that takes pandas DataFrame and does GroupBy aggregation.\n        \"\"\"\n        if cls.is_aggregate(func):\n            return cls.build_aggregate_method(func)\n        return cls.build_groupby_reduce_method(func)\n\n    @classmethod\n    def handle_as_index_for_dataframe(\n        cls,\n        result,\n        internal_by_cols,\n        by_cols_dtypes=None,\n        by_length=None,\n        selection=None,\n        partition_idx=0,\n        drop=True,\n        method=None,\n        inplace=False,\n    ):\n        \"\"\"\n        Handle `as_index=False` parameter for the passed GroupBy aggregation result.\n\n        Parameters\n        ----------\n        result : DataFrame\n            Frame containing GroupBy aggregation result computed with `as_index=True`\n            parameter (group names are located at the frame's index).\n        internal_by_cols : list-like\n            Internal 'by' columns.\n        by_cols_dtypes : list-like, optional\n            Data types of the internal 'by' columns. Required to do special casing\n            in case of categorical 'by'. If not specified, assume that there is no\n            categorical data in 'by'.\n        by_length : int, optional\n            Amount of keys to group on (including frame columns and external objects like list, Series, etc.)\n            If not specified, consider `by_length` to be equal ``len(internal_by_cols)``.\n        selection : label or list of labels, optional\n            Set of columns that were explicitly selected for aggregation (for example\n            via dict-aggregation). If not specified assuming that aggregation was\n            applied to all of the available columns.\n        partition_idx : int, default: 0\n            Positional index of the current partition.\n        drop : bool, default: True\n            Indicates whether or not any of the `by` data came from the same frame.\n        method : str, optional\n            Name of the groupby function. This is a hint to be able to do special casing.\n            Note: this parameter is a legacy from the ``groupby_size`` implementation,\n            it's a hacky one and probably will be removed in the future: https://github.com/modin-project/modin/issues/3739.\n        inplace : bool, default: False\n            Modify the DataFrame in place (do not create a new object).\n\n        Returns\n        -------\n        DataFrame\n            GroupBy aggregation result with the considered `as_index=False` parameter.\n        \"\"\"\n        if not inplace:\n            result = result.copy()\n\n        reset_index, drop, lvls_to_drop, cols_to_drop = cls.handle_as_index(\n            result_cols=result.columns,\n            result_index_names=result.index.names,\n            internal_by_cols=internal_by_cols,\n            by_cols_dtypes=by_cols_dtypes,\n            by_length=by_length,\n            selection=selection,\n            partition_idx=partition_idx,\n            drop=drop,\n            method=method,\n        )\n\n        if len(lvls_to_drop) > 0:\n            result.index = result.index.droplevel(lvls_to_drop)\n        if len(cols_to_drop) > 0:\n            result.drop(columns=cols_to_drop, inplace=True)\n        if reset_index:\n            result.reset_index(drop=drop, inplace=True)\n        return result\n\n    @staticmethod\n    def handle_as_index(\n        result_cols,\n        result_index_names,\n        internal_by_cols,\n        by_cols_dtypes=None,\n        by_length=None,\n        selection=None,\n        partition_idx=0,\n        drop=True,\n        method=None,\n    ):\n        \"\"\"\n        Compute hints to process ``as_index=False`` parameter for the GroupBy result.\n\n        This function resolves naming conflicts of the index levels to insert and the column labels\n        for the GroupBy result. The logic of this function assumes that the initial GroupBy result\n        was computed as ``as_index=True``.\n\n        Parameters\n        ----------\n        result_cols : pandas.Index\n            Columns of the GroupBy result.\n        result_index_names : list-like\n            Index names of the GroupBy result.\n        internal_by_cols : list-like\n            Internal 'by' columns.\n        by_cols_dtypes : list-like, optional\n            Data types of the internal 'by' columns. Required to do special casing\n            in case of categorical 'by'. If not specified, assume that there is no\n            categorical data in 'by'.\n        by_length : int, optional\n            Amount of keys to group on (including frame columns and external objects like list, Series, etc.)\n            If not specified, consider `by_length` to be equal ``len(internal_by_cols)``.\n        selection : label or list of labels, optional\n            Set of columns that were explicitly selected for aggregation (for example\n            via dict-aggregation). If not specified assuming that aggregation was\n            applied to all of the available columns.\n        partition_idx : int, default: 0\n            Positional index of the current partition.\n        drop : bool, default: True\n            Indicates whether or not any of the `by` data came from the same frame.\n        method : str, optional\n            Name of the groupby function. This is a hint to be able to do special casing.\n            Note: this parameter is a legacy from the ``groupby_size`` implementation,\n            it's a hacky one and probably will be removed in the future: https://github.com/modin-project/modin/issues/3739.\n\n        Returns\n        -------\n        reset_index : bool\n            Indicates whether to reset index to the default one (0, 1, 2 ... n) at this partition.\n        drop_index : bool\n            If `reset_index` is True, indicates whether to drop all index levels (True) or insert them into the\n            resulting columns (False).\n        lvls_to_drop : list of ints\n            Contains numeric indices of the levels of the result index to drop as intersected.\n        cols_to_drop : list of labels\n            Contains labels of the columns to drop from the result as intersected.\n\n        Examples\n        --------\n        >>> groupby_result = compute_groupby_without_processing_as_index_parameter()\n        >>> if not as_index:\n        >>>     reset_index, drop, lvls_to_drop, cols_to_drop = handle_as_index(**extract_required_params(groupby_result))\n        >>>     if len(lvls_to_drop) > 0:\n        >>>         groupby_result.index = groupby_result.index.droplevel(lvls_to_drop)\n        >>>     if len(cols_to_drop) > 0:\n        >>>         groupby_result = groupby_result.drop(columns=cols_to_drop)\n        >>>     if reset_index:\n        >>>         groupby_result_with_processed_as_index_parameter = groupby_result.reset_index(drop=drop)\n        >>> else:\n        >>>     groupby_result_with_processed_as_index_parameter = groupby_result\n        \"\"\"\n        if by_length is None:\n            by_length = len(internal_by_cols)\n\n        reset_index = method != \"transform\" and (by_length > 0 or selection is not None)\n\n        # If the method is \"size\" then the result contains only one unique named column\n        # and we don't have to worry about any naming conflicts, so inserting all of\n        # the \"by\" into the result (just a fast-path)\n        if method == \"size\":\n            return reset_index, False, [], []\n\n        # Pandas logic of resolving naming conflicts is the following:\n        #   1. If any categorical is in 'by' and 'by' is multi-column, then the categorical\n        #      index is prioritized: drop intersected columns and insert all of the 'by' index\n        #      levels to the frame as columns.\n        #   2. Otherwise, aggregation result is prioritized: drop intersected index levels and\n        #      insert the filtered ones to the frame as columns.\n        if by_cols_dtypes is not None:\n            keep_index_levels = (\n                by_length > 1\n                and selection is None\n                and any(isinstance(x, pandas.CategoricalDtype) for x in by_cols_dtypes)\n            )\n        else:\n            keep_index_levels = False\n\n        # 1. We insert 'by'-columns to the result at the beginning of the frame and so only to the\n        #    first partition, if partition_idx != 0 we just drop the index. If there are no columns\n        #    that are required to drop (keep_index_levels is True) then we can exit here.\n        # 2. We don't insert 'by'-columns to the result if 'by'-data came from a different\n        #    frame (drop is False), there's only one exception for this rule: if the `method` is \"size\",\n        #    so if (drop is False) and method is not \"size\" we just drop the index and so can exit here.\n        if (not keep_index_levels and partition_idx != 0) or (\n            not drop and method != \"size\"\n        ):\n            return reset_index, True, [], []\n\n        if not isinstance(internal_by_cols, pandas.Index):\n            if not is_list_like(internal_by_cols):\n                internal_by_cols = [internal_by_cols]\n            internal_by_cols = pandas.Index(internal_by_cols)\n\n        internal_by_cols = (\n            internal_by_cols[\n                ~internal_by_cols.str.startswith(MODIN_UNNAMED_SERIES_LABEL, na=False)\n            ]\n            if hasattr(internal_by_cols, \"str\")\n            else internal_by_cols\n        )\n\n        if selection is not None and not isinstance(selection, pandas.Index):\n            selection = pandas.Index(selection)\n\n        lvls_to_drop = []\n        cols_to_drop = []\n\n        if not keep_index_levels:\n            # We want to insert only these internal-by-cols that are not presented\n            # in the result in order to not create naming conflicts\n            if selection is None:\n                cols_to_insert = frozenset(internal_by_cols) - frozenset(result_cols)\n            else:\n                cols_to_insert = frozenset(\n                    # We have to use explicit 'not in' check and not just difference\n                    # of sets because of specific '__contains__' operator in case of\n                    # scalar 'col' and MultiIndex 'selection'.\n                    col\n                    for col in internal_by_cols\n                    if col not in selection\n                )\n        else:\n            cols_to_insert = internal_by_cols\n            # We want to drop such internal-by-cols that are presented\n            # in the result in order to not create naming conflicts\n            cols_to_drop = frozenset(internal_by_cols) & frozenset(result_cols)\n\n        if partition_idx == 0:\n            lvls_to_drop = [\n                i\n                for i, name in enumerate(result_index_names)\n                if name not in cols_to_insert\n            ]\n        else:\n            lvls_to_drop = result_index_names\n\n        drop = False\n        if len(lvls_to_drop) == len(result_index_names):\n            drop = True\n            lvls_to_drop = []\n\n        return reset_index, drop, lvls_to_drop, cols_to_drop\n\n\nclass SeriesGroupBy(GroupBy):\n    \"\"\"Builder for GroupBy aggregation functions for Series.\"\"\"\n\n    @classmethod\n    def _call_groupby(cls, df, *args, **kwargs):  # noqa: PR01\n        \"\"\"Call .groupby() on passed `df` squeezed to Series.\"\"\"\n        # We can end up here by two means - either by \"true\" call\n        # like Series().groupby() or by df.groupby()[item].\n\n        if len(df.columns) == 1:\n            # Series().groupby() case\n            return df.squeeze(axis=1).groupby(*args, **kwargs)\n        # In second case surrounding logic will supplement grouping columns,\n        # so we need to drop them after grouping is over; our originally\n        # selected column is always the first, so use it\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            return df.groupby(*args, **kwargs)[df.columns[0]]\n\n\nclass GroupByDefault(DefaultMethod):\n    \"\"\"Builder for default-to-pandas GroupBy aggregation functions.\"\"\"\n\n    _groupby_cls = GroupBy\n\n    OBJECT_TYPE = \"GroupBy\"\n\n    @classmethod\n    def register(cls, func, **kwargs):\n        \"\"\"\n        Build default-to-pandas GroupBy aggregation function.\n\n        Parameters\n        ----------\n        func : callable or str\n            Default aggregation function. If aggregation function is not specified\n            via groupby arguments, then `func` function is used.\n        **kwargs : kwargs\n            Additional arguments that will be passed to function builder.\n\n        Returns\n        -------\n        callable\n            Functiom that takes query compiler and defaults to pandas to do GroupBy\n            aggregation.\n        \"\"\"\n        return super().register(\n            cls._groupby_cls.build_groupby(func), fn_name=func.__name__, **kwargs\n        )\n\n    # This specifies a `pandas.DataFrameGroupBy` method to pass the `agg_func` to,\n    # it's based on `how` to apply it. Going by pandas documentation:\n    #   1. `.aggregate(func)` applies func row/column wise.\n    #   2. `.apply(func)` applies func to a DataFrames, holding a whole group (group-wise).\n    #   3. `.transform(func)` is the same as `.apply()` but also broadcast the `func`\n    #      result to the group's original shape.\n    #   4. 'direct' mode means that the passed `func` has to be applied directly\n    #      to the `pandas.DataFrameGroupBy` object.\n    _aggregation_methods_dict = {\n        \"axis_wise\": pandas.core.groupby.DataFrameGroupBy.aggregate,\n        \"group_wise\": pandas.core.groupby.DataFrameGroupBy.apply,\n        \"transform\": pandas.core.groupby.DataFrameGroupBy.transform,\n        \"direct\": lambda grp, func, *args, **kwargs: func(grp, *args, **kwargs),\n    }\n\n    @classmethod\n    def get_aggregation_method(cls, how):\n        \"\"\"\n        Return `pandas.DataFrameGroupBy` method that implements the passed `how` UDF applying strategy.\n\n        Parameters\n        ----------\n        how : {\"axis_wise\", \"group_wise\", \"transform\"}\n            `how` parameter of the ``BaseQueryCompiler.groupby_agg``.\n\n        Returns\n        -------\n        callable(pandas.DataFrameGroupBy, callable, *args, **kwargs) -> [pandas.DataFrame | pandas.Series]\n\n        Notes\n        -----\n        Visit ``BaseQueryCompiler.groupby_agg`` doc-string for more information about `how` parameter.\n        \"\"\"\n        return cls._aggregation_methods_dict[how]\n\n\nclass SeriesGroupByDefault(GroupByDefault):\n    \"\"\"Builder for default-to-pandas GroupBy aggregation functions for Series.\"\"\"\n\n    _groupby_cls = SeriesGroupBy\n\n    _aggregation_methods_dict = {\n        \"axis_wise\": pandas.core.groupby.SeriesGroupBy.aggregate,\n        \"group_wise\": pandas.core.groupby.SeriesGroupBy.apply,\n        \"transform\": pandas.core.groupby.SeriesGroupBy.transform,\n        \"direct\": lambda grp, func, *args, **kwargs: func(grp, *args, **kwargs),\n    }\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/list.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default applied-on-list accessor functions builder class.\"\"\"\n\nfrom .series import SeriesDefault\n\n\nclass ListDefault(SeriesDefault):\n    \"\"\"Builder for default-to-pandas methods which is executed under list accessor.\"\"\"\n\n    @classmethod\n    def frame_wrapper(cls, df):\n        \"\"\"\n        Get list accessor of the passed frame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n\n        Returns\n        -------\n        pandas.core.arrays.arrow.ListAccessor\n        \"\"\"\n        return df.squeeze(axis=1).list\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/resample.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default Resamle functions builder class.\"\"\"\n\nfrom .default import DefaultMethod\n\n\n# FIXME: there is no sence of keeping `Resampler` and `ResampleDefault` logic in a different\n# classes. They should be combined.\nclass Resampler:\n    \"\"\"Builder class for resampled aggregation functions.\"\"\"\n\n    @classmethod\n    def build_resample(cls, func, squeeze_self):\n        \"\"\"\n        Build function that resamples time-series data and does aggregation.\n\n        Parameters\n        ----------\n        func : callable\n            Aggregation function to execute under resampled frame.\n        squeeze_self : bool\n            Whether or not to squeeze frame before resampling.\n\n        Returns\n        -------\n        callable\n            Function that takes pandas DataFrame and applies aggregation\n            to resampled time-series data.\n        \"\"\"\n\n        def fn(df, resample_kwargs, *args, **kwargs):\n            \"\"\"Resample time-series data of the passed frame and apply specified aggregation.\"\"\"\n            if squeeze_self:\n                df = df.squeeze(axis=1)\n            resampler = df.resample(**resample_kwargs)\n\n            if type(func) is property:\n                return func.fget(resampler)\n\n            return func(resampler, *args, **kwargs)\n\n        return fn\n\n\nclass ResampleDefault(DefaultMethod):\n    \"\"\"Builder for default-to-pandas resampled aggregation functions.\"\"\"\n\n    OBJECT_TYPE = \"Resampler\"\n\n    @classmethod\n    def register(cls, func, squeeze_self=False, **kwargs):\n        \"\"\"\n        Build function that do fallback to pandas and aggregate resampled data.\n\n        Parameters\n        ----------\n        func : callable\n            Aggregation function to execute under resampled frame.\n        squeeze_self : bool, default: False\n            Whether or not to squeeze frame before resampling.\n        **kwargs : kwargs\n            Additional arguments that will be passed to function builder.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and does fallback to pandas to resample\n            time-series data and apply aggregation on it.\n        \"\"\"\n        return super().register(\n            Resampler.build_resample(func, squeeze_self),\n            fn_name=func.__name__,\n            **kwargs\n        )\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/rolling.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default Rolling functions builder class.\"\"\"\n\nfrom .default import DefaultMethod\n\n\nclass RollingDefault(DefaultMethod):\n    \"\"\"Builder for default-to-pandas aggregation on a rolling window functions.\"\"\"\n\n    OBJECT_TYPE = \"Rolling\"\n\n    @classmethod\n    def _build_rolling(cls, func):\n        \"\"\"\n        Build function that creates a rolling window and executes `func` on it.\n\n        Parameters\n        ----------\n        func : callable\n            Function to execute on a rolling window.\n\n        Returns\n        -------\n        callable\n            Function that takes pandas DataFrame and applies `func` on a rolling window.\n        \"\"\"\n\n        def fn(df, rolling_kwargs, *args, **kwargs):\n            \"\"\"Create rolling window for the passed frame and execute specified `func` on it.\"\"\"\n            roller = df.rolling(**rolling_kwargs)\n\n            if type(func) is property:\n                return func.fget(roller)\n\n            return func(roller, *args, **kwargs)\n\n        return fn\n\n    @classmethod\n    def register(cls, func, **kwargs):\n        \"\"\"\n        Build function that do fallback to pandas to apply `func` on a rolling window.\n\n        Parameters\n        ----------\n        func : callable\n            Function to execute on a rolling window.\n        **kwargs : kwargs\n            Additional arguments that will be passed to function builder.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and defaults to pandas to apply aggregation\n            `func` on a rolling window.\n        \"\"\"\n        return super().register(\n            cls._build_rolling(func), fn_name=func.__name__, **kwargs\n        )\n\n\nclass ExpandingDefault(DefaultMethod):\n    \"\"\"Builder for default-to-pandas aggregation on an expanding window functions.\"\"\"\n\n    OBJECT_TYPE = \"Expanding\"\n\n    @classmethod\n    def _build_expanding(cls, func, squeeze_self):\n        \"\"\"\n        Build function that creates an expanding window and executes `func` on it.\n\n        Parameters\n        ----------\n        func : callable\n            Function to execute on a expanding window.\n        squeeze_self : bool\n            Whether or not to squeeze frame before executing the window function.\n\n        Returns\n        -------\n        callable\n            Function that takes pandas DataFrame and applies `func` on a expanding window.\n        \"\"\"\n\n        def fn(df, rolling_args, *args, **kwargs):\n            \"\"\"Create rolling window for the passed frame and execute specified `func` on it.\"\"\"\n            if squeeze_self:\n                df = df.squeeze(axis=1)\n            roller = df.expanding(*rolling_args)\n\n            if type(func) is property:\n                return func.fget(roller)\n\n            return func(roller, *args, **kwargs)\n\n        return fn\n\n    @classmethod\n    def register(cls, func, squeeze_self=False, **kwargs):\n        \"\"\"\n        Build function that do fallback to pandas to apply `func` on a expanding window.\n\n        Parameters\n        ----------\n        func : callable\n            Function to execute on an expanding window.\n        squeeze_self : bool, default: False\n            Whether or not to squeeze frame before executing the window function.\n        **kwargs : kwargs\n            Additional arguments that will be passed to function builder.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and defaults to pandas to apply aggregation\n            `func` on an expanding window.\n        \"\"\"\n        return super().register(\n            cls._build_expanding(func, squeeze_self=squeeze_self),\n            fn_name=func.__name__,\n            **kwargs\n        )\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/series.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default Series functions builder class.\"\"\"\n\nfrom .default import DefaultMethod\n\n\nclass SeriesDefault(DefaultMethod):\n    \"\"\"Builder for default-to-pandas methods which is executed under Series.\"\"\"\n\n    OBJECT_TYPE = \"Series\"\n\n    @classmethod\n    def frame_wrapper(cls, df):\n        \"\"\"\n        Squeeze passed DataFrame to be able to process Series-specific functions on it.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            One-column DataFrame to squeeze.\n\n        Returns\n        -------\n        pandas.Series\n        \"\"\"\n        return df.squeeze(axis=1)\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/str.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default applied-on-str functions builder class.\"\"\"\n\nfrom .series import SeriesDefault\n\n\nclass StrDefault(SeriesDefault):\n    \"\"\"Builder for default-to-pandas methods which is executed under `str` accessor.\"\"\"\n\n    @classmethod\n    def frame_wrapper(cls, df):\n        \"\"\"\n        Get `str` accessor of the passed frame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n\n        Returns\n        -------\n        pandas.core.strings.accessor.StringMethods\n        \"\"\"\n        return df.squeeze(axis=1).str\n"
  },
  {
    "path": "modin/core/dataframe/algebra/default2pandas/struct.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses default applied-on-struct accessor functions builder class.\"\"\"\n\nfrom .series import SeriesDefault\n\n\nclass StructDefault(SeriesDefault):\n    \"\"\"Builder for default-to-pandas methods which is executed under struct accessor.\"\"\"\n\n    @classmethod\n    def frame_wrapper(cls, df):\n        \"\"\"\n        Get struct accessor of the passed frame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n\n        Returns\n        -------\n        pandas.core.arrays.arrow.StructAccessor\n        \"\"\"\n        return df.squeeze(axis=1).struct\n"
  },
  {
    "path": "modin/core/dataframe/algebra/fold.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses builder class for Fold operator.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Callable, Optional\n\nfrom .operator import Operator\n\nif TYPE_CHECKING:\n    import pandas\n\n    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\nclass Fold(Operator):\n    \"\"\"Builder class for Fold functions.\"\"\"\n\n    @classmethod\n    def register(\n        cls, fold_function: Callable[..., pandas.DataFrame], shape_preserved=False\n    ) -> Callable[..., PandasQueryCompiler]:\n        \"\"\"\n        Build Fold operator that will be performed across rows/columns.\n\n        Parameters\n        ----------\n        fold_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame\n            Function to apply across rows/columns.\n        shape_preserved : bool, default: False\n            Whether the shape of the dataframe is preserved or not\n            after applying a function.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and executes Fold function.\n        \"\"\"\n\n        def caller(\n            query_compiler: PandasQueryCompiler,\n            fold_axis: Optional[int] = None,\n            *args: tuple,\n            new_index=None,\n            new_columns=None,\n            **kwargs: dict,\n        ) -> PandasQueryCompiler:\n            \"\"\"\n            Execute Fold function against passed query compiler.\n\n            Parameters\n            ----------\n            query_compiler : PandasQueryCompiler\n                The query compiler to execute the function on.\n            fold_axis : int, optional\n                0 or None means apply across full column partitions. 1 means\n                apply across full row partitions.\n            *args : tuple\n                Additional arguments passed to `fold_function`.\n            new_index : list-like, optional\n                The index of the result.\n            new_columns : list-like, optional\n                The columns of the result.\n            **kwargs: dict\n                Additional keyword arguments passed to `fold_function`.\n\n            Returns\n            -------\n            PandasQueryCompiler\n                A new query compiler representing the result of executing the\n                function.\n            \"\"\"\n            return query_compiler.__constructor__(\n                query_compiler._modin_frame.fold(\n                    cls.validate_axis(fold_axis),\n                    lambda x: fold_function(x, *args, **kwargs),\n                    new_index=new_index,\n                    new_columns=new_columns,\n                    shape_preserved=shape_preserved,\n                )\n            )\n\n        return caller\n"
  },
  {
    "path": "modin/core/dataframe/algebra/groupby.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses builder class for GroupByReduce operator.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Callable, Optional, Union\n\nimport pandas\n\nfrom modin.core.dataframe.pandas.metadata import ModinIndex\nfrom modin.error_message import ErrorMessage\nfrom modin.utils import MODIN_UNNAMED_SERIES_LABEL, hashable\n\nfrom .default2pandas.groupby import GroupBy, GroupByDefault\nfrom .tree_reduce import TreeReduce\n\nif TYPE_CHECKING:\n    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\nclass GroupByReduce(TreeReduce):\n    \"\"\"\n    Builder class for GroupBy aggregation functions.\n\n    Attributes\n    ----------\n    ID_LEVEL_NAME : str\n        It's supposed that implementations may produce multiple temporary\n        columns per one source column in an intermediate phase. In order\n        for these columns to be processed accordingly at the Reduce phase,\n        an implementation must store unique names for such temporary\n        columns in the ``ID_LEVEL_NAME`` level. Duplicated names are not allowed.\n    _GROUPBY_REDUCE_IMPL_FLAG : str\n        Attribute indicating that a callable should be treated as an\n        implementation for one of the TreeReduce phases rather than an\n        arbitrary aggregation. Note: this attribute should be considered private.\n    \"\"\"\n\n    ID_LEVEL_NAME: str = \"__ID_LEVEL_NAME__\"\n    _GROUPBY_REDUCE_IMPL_FLAG: str = \"__groupby_reduce_impl_func__\"\n\n    @classmethod\n    def register(\n        cls,\n        map_func: Union[str, dict, Callable[..., pandas.DataFrame]],\n        reduce_func: Optional[Union[str, dict, Callable[..., pandas.DataFrame]]] = None,\n        **call_kwds: dict,\n    ) -> Callable[..., PandasQueryCompiler]:\n        \"\"\"\n        Build template GroupBy aggregation function.\n\n        Resulted function is applied in parallel via TreeReduce algorithm.\n\n        Parameters\n        ----------\n        map_func : str, dict or callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame\n            Function to apply to the `GroupByObject` at the map phase. If ``str`` was passed it will\n            be treated as a DataFrameGroupBy's method name.\n        reduce_func : str, dict or callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame, optional\n            Function to apply to the ``DataFrameGroupBy`` at the reduce phase. If not specified\n            will be set the same as 'map_func'.\n        **call_kwds : dict\n            Kwargs that will be passed to the returned function.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and executes GroupBy aggregation\n            with TreeReduce algorithm.\n        \"\"\"\n        if reduce_func is None:\n            reduce_func = map_func\n\n        def build_fn(name):\n            return lambda df, *args, **kwargs: getattr(df, name)(*args, **kwargs)\n\n        if isinstance(map_func, str):\n            map_func = build_fn(map_func)\n        if isinstance(reduce_func, str):\n            reduce_func = build_fn(reduce_func)\n\n        assert not (\n            isinstance(map_func, dict) ^ isinstance(reduce_func, dict)\n        ) and not (\n            callable(map_func) ^ callable(reduce_func)\n        ), \"Map and reduce functions must be either both dict or both callable.\"\n\n        return lambda *args, **kwargs: cls.caller(\n            *args, map_func=map_func, reduce_func=reduce_func, **kwargs, **call_kwds\n        )\n\n    @classmethod\n    def register_implementation(\n        cls,\n        map_func: Callable[..., pandas.DataFrame],\n        reduce_func: Callable[..., pandas.DataFrame],\n    ) -> None:\n        \"\"\"\n        Register callables to be recognized as an implementations of tree-reduce phases.\n\n        Parameters\n        ----------\n        map_func : callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame\n            Callable to register.\n        reduce_func : callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame\n            Callable to register.\n        \"\"\"\n        setattr(map_func, cls._GROUPBY_REDUCE_IMPL_FLAG, True)\n        setattr(reduce_func, cls._GROUPBY_REDUCE_IMPL_FLAG, True)\n\n    @classmethod\n    def map(\n        cls,\n        df: pandas.DataFrame,\n        map_func: Callable[..., pandas.DataFrame],\n        axis: int,\n        groupby_kwargs: dict,\n        agg_args: list,\n        agg_kwargs: dict,\n        other: Optional[pandas.DataFrame] = None,\n        by=None,\n        drop: bool = False,\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Execute Map phase of GroupByReduce.\n\n        Groups DataFrame and applies map function. Groups will be\n        preserved in the results index for the following reduce phase.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            Serialized frame to group.\n        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame\n            Function to apply to the `GroupByObject`.\n        axis : {0, 1}\n            Axis to group and apply aggregation function along. 0 means index axis\n            when 1 means column axis.\n        groupby_kwargs : dict\n            Dictionary which carries arguments for `pandas.DataFrame.groupby`.\n        agg_args : list-like\n            Positional arguments to pass to the aggregation functions.\n        agg_kwargs : dict\n            Keyword arguments to pass to the aggregation functions.\n        other : pandas.DataFrame, optional\n            Serialized frame, whose columns are used to determine the groups.\n            If not specified, `by` parameter is used.\n        by : level index name or list of such labels, optional\n            Index levels, that is used to determine groups.\n            If not specified, `other` parameter is used.\n        drop : bool, default: False\n            Indicates whether or not by-data came from the `self` frame.\n\n        Returns\n        -------\n        pandas.DataFrame\n            GroupBy aggregation result for one particular partition.\n        \"\"\"\n        # Set `as_index` to True to track the metadata of the grouping object\n        # It is used to make sure that between phases we are constructing the\n        # right index and placing columns in the correct order.\n        groupby_kwargs[\"as_index\"] = True\n        groupby_kwargs[\"observed\"] = True\n        # We have to filter func-dict BEFORE inserting broadcasted 'by' columns\n        # to avoid multiple aggregation results for 'by' cols in case they're\n        # present in the func-dict:\n        apply_func = cls.get_callable(\n            map_func,\n            df,\n            # We won't be able to preserve the order as the Map phase would likely\n            # produce some temporary columns that won't fit into the original\n            # aggregation order. It doesn't matter much as we restore the original\n            # order at the Reduce phase.\n            preserve_aggregation_order=False,\n        )\n        if other is not None:\n            # Other is a broadcasted partition that represents 'by' data to group on.\n            # If 'drop' then the 'by' data came from the 'self' frame, thus\n            # inserting missed columns to the partition to group on them.\n            if drop or isinstance(\n                other := other.squeeze(axis=axis ^ 1), pandas.DataFrame\n            ):\n                df = pandas.concat(\n                    [df] + [other[[o for o in other if o not in df]]],\n                    axis=1,\n                )\n                other = list(other.columns)\n            by_part = other\n        else:\n            by_part = by\n\n        result = apply_func(\n            df.groupby(by=by_part, axis=axis, **groupby_kwargs), *agg_args, **agg_kwargs\n        )\n        # Result could not always be a frame, so wrapping it into DataFrame\n        return pandas.DataFrame(result)\n\n    @classmethod\n    def reduce(\n        cls,\n        df: pandas.DataFrame,\n        reduce_func: Union[dict, Callable[..., pandas.DataFrame]],\n        axis: int,\n        groupby_kwargs: dict,\n        agg_args: list,\n        agg_kwargs: dict,\n        partition_idx: int = 0,\n        drop: bool = False,\n        method: Optional[str] = None,\n        finalizer_fn: Optional[Callable[[pandas.DataFrame], pandas.DataFrame]] = None,\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Execute Reduce phase of GroupByReduce.\n\n        Combines groups from the Map phase and applies reduce function.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            Serialized frame which contain groups to combine.\n        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame\n            Function to apply to the `GroupByObject`.\n        axis : {0, 1}\n            Axis to group and apply aggregation function along. 0 means index axis\n            when 1 means column axis.\n        groupby_kwargs : dict\n            Dictionary which carries arguments for `pandas.DataFrame.groupby`.\n        agg_args : list-like\n            Positional arguments to pass to the aggregation functions.\n        agg_kwargs : dict\n            Keyword arguments to pass to the aggregation functions.\n        partition_idx : int, default: 0\n            Internal index of column partition to which this function is applied.\n        drop : bool, default: False\n            Indicates whether or not by-data came from the `self` frame.\n        method : str, optional\n            Name of the groupby function. This is a hint to be able to do special casing.\n        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, optional\n            A callable to execute at the end a groupby kernel against groupby result.\n\n        Returns\n        -------\n        pandas.DataFrame\n            GroupBy aggregation result.\n        \"\"\"\n        # Wrapping names into an Index should be unnecessary, however\n        # there is a bug in pandas with intersection that forces us to do so:\n        # https://github.com/pandas-dev/pandas/issues/39699\n        by_part = pandas.Index(df.index.names)\n\n        groupby_kwargs = groupby_kwargs.copy()\n        as_index = groupby_kwargs.get(\"as_index\", True)\n\n        # Set `as_index` to True to track the metadata of the grouping object\n        groupby_kwargs[\"as_index\"] = True\n\n        # since now index levels contain out 'by', in the reduce phace\n        # we want to group on these levels\n        groupby_kwargs[\"level\"] = list(range(len(df.index.names)))\n\n        apply_func = cls.get_callable(reduce_func, df)\n        result = apply_func(\n            df.groupby(axis=axis, **groupby_kwargs), *agg_args, **agg_kwargs\n        )\n\n        if not as_index:\n            idx = df.index\n            GroupBy.handle_as_index_for_dataframe(\n                result,\n                by_part,\n                by_cols_dtypes=(\n                    idx.dtypes.values\n                    if isinstance(idx, pandas.MultiIndex) and hasattr(idx, \"dtypes\")\n                    else (idx.dtype,)\n                ),\n                by_length=len(by_part),\n                selection=reduce_func.keys() if isinstance(reduce_func, dict) else None,\n                partition_idx=partition_idx,\n                drop=drop,\n                method=method,\n                inplace=True,\n            )\n        # Result could not always be a frame, so wrapping it into DataFrame\n        result = pandas.DataFrame(result)\n        if result.index.name == MODIN_UNNAMED_SERIES_LABEL:\n            result.index.name = None\n\n        return result if finalizer_fn is None else finalizer_fn(result)\n\n    @classmethod\n    def caller(\n        cls,\n        query_compiler: PandasQueryCompiler,\n        by,\n        map_func: Union[dict, Callable[..., pandas.DataFrame]],\n        reduce_func: Union[dict, Callable[..., pandas.DataFrame]],\n        axis: int,\n        groupby_kwargs: dict,\n        agg_args: list,\n        agg_kwargs: dict,\n        drop: bool = False,\n        method: Optional[str] = None,\n        default_to_pandas_func: Optional[Callable[..., pandas.DataFrame]] = None,\n        finalizer_fn: Optional[Callable[[pandas.DataFrame], pandas.DataFrame]] = None,\n    ) -> PandasQueryCompiler:\n        \"\"\"\n        Execute GroupBy aggregation with TreeReduce approach.\n\n        Parameters\n        ----------\n        query_compiler : PandasQueryCompiler\n            Frame to group.\n        by : PandasQueryCompiler, column or index label, Grouper or list of such\n            Object that determine groups.\n        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame\n            Function to apply to the `GroupByObject` at the Map phase.\n        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame\n            Function to apply to the `GroupByObject` at the Reduce phase.\n        axis : {0, 1}\n            Axis to group and apply aggregation function along. 0 means index axis\n            when 1 means column axis.\n        groupby_kwargs : dict\n            Dictionary which carries arguments for pandas.DataFrame.groupby.\n        agg_args : list-like\n            Positional arguments to pass to the aggregation functions.\n        agg_kwargs : dict\n            Keyword arguments to pass to the aggregation functions.\n        drop : bool, default: False\n            Indicates whether or not by-data came from the `self` frame.\n        method : str, optional\n            Name of the GroupBy aggregation function. This is a hint to be able to do special casing.\n        default_to_pandas_func : callable(pandas.DataFrameGroupBy) -> pandas.DataFrame, optional\n            The pandas aggregation function equivalent to the `map_func + reduce_func`.\n            Used in case of defaulting to pandas. If not specified `map_func` is used.\n        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, optional\n            A callable to execute at the end a groupby kernel against groupby result.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            QueryCompiler which carries the result of GroupBy aggregation.\n        \"\"\"\n        is_unsupported_axis = axis != 0\n        # Defaulting to pandas in case of an empty frame as we can't process it properly.\n        # Higher API level won't pass empty data here unless the frame has delayed\n        # computations. So we apparently lose some laziness here (due to index access)\n        # because of the inability to process empty groupby natively.\n        is_empty_data = (\n            len(query_compiler.columns) == 0 or len(query_compiler.index) == 0\n        )\n        is_grouping_using_by_arg = (\n            groupby_kwargs.get(\"level\", None) is None and by is not None\n        )\n        is_unsupported_by_arg = isinstance(by, pandas.Grouper) or (\n            not hashable(by) and not isinstance(by, type(query_compiler))\n        )\n\n        if (\n            is_unsupported_axis\n            or is_empty_data\n            or (is_grouping_using_by_arg and is_unsupported_by_arg)\n        ):\n            if default_to_pandas_func is None:\n                default_to_pandas_func = (\n                    (lambda grp: grp.agg(map_func))\n                    if isinstance(map_func, dict)\n                    else map_func\n                )\n            default_to_pandas_func = GroupByDefault.register(default_to_pandas_func)\n            return default_to_pandas_func(\n                query_compiler,\n                by=by,\n                axis=axis,\n                groupby_kwargs=groupby_kwargs,\n                agg_args=agg_args,\n                agg_kwargs=agg_kwargs,\n                drop=drop,\n            )\n\n        # The bug only occurs in the case of Categorical 'by', so we might want to check whether any of\n        # the 'by' dtypes is Categorical before going into this branch, however triggering 'dtypes'\n        # computation if they're not computed may take time, so we don't do it\n        if not groupby_kwargs.get(\"sort\", True) and isinstance(\n            by, type(query_compiler)\n        ):\n            ErrorMessage.mismatch_with_pandas(\n                operation=\"df.groupby(categorical_by, sort=False)\",\n                message=(\n                    \"the groupby keys will be sorted anyway, although the 'sort=False' was passed. \"\n                    + \"See the following issue for more details: \"\n                    + \"https://github.com/modin-project/modin/issues/3571\"\n                ),\n            )\n            groupby_kwargs = groupby_kwargs.copy()\n            groupby_kwargs[\"sort\"] = True\n\n        map_fn, reduce_fn = cls.build_map_reduce_functions(\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            map_func=map_func,\n            reduce_func=reduce_func,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n            method=method,\n            finalizer_fn=finalizer_fn,\n        )\n\n        # If `by` is a ModinFrame, then its partitions will be broadcasted to every\n        # `self` partition in a way determined by engine (modin_frame.groupby_reduce)\n        # Otherwise `by` was already bound to the Map function in `build_map_reduce_functions`.\n        broadcastable_by = getattr(by, \"_modin_frame\", None)\n        apply_indices = list(map_func.keys()) if isinstance(map_func, dict) else None\n        if (\n            broadcastable_by is not None\n            and groupby_kwargs.get(\"as_index\", True)\n            and broadcastable_by.has_materialized_dtypes\n        ):\n            new_index = ModinIndex(\n                # actual value will be assigned on a parent update\n                value=None,\n                axis=0,\n                dtypes=broadcastable_by.dtypes,\n            )\n        else:\n            new_index = None\n        new_modin_frame = query_compiler._modin_frame.groupby_reduce(\n            axis,\n            broadcastable_by,\n            map_fn,\n            reduce_fn,\n            apply_indices=apply_indices,\n            new_index=new_index,\n        )\n\n        result = query_compiler.__constructor__(new_modin_frame)\n        return result\n\n    @classmethod\n    def get_callable(\n        cls,\n        agg_func: Union[dict, Callable[..., pandas.DataFrame]],\n        df: pandas.DataFrame,\n        preserve_aggregation_order: bool = True,\n    ) -> Callable[..., pandas.DataFrame]:\n        \"\"\"\n        Build aggregation function to apply to each group at this particular partition.\n\n        If it's dictionary aggregation — filters aggregation dictionary for keys which\n        this particular partition contains, otherwise do nothing with passed function.\n\n        Parameters\n        ----------\n        agg_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame\n            Aggregation function.\n        df : pandas.DataFrame\n            Serialized partition which contains available columns.\n        preserve_aggregation_order : bool, default: True\n            Whether to manually restore the order of columns for the result specified\n            by the `agg_func` keys (only makes sense when `agg_func` is a dictionary).\n\n        Returns\n        -------\n        callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame\n            Aggregation function that can be safely applied to this particular partition.\n        \"\"\"\n        if not isinstance(agg_func, dict):\n            return agg_func\n\n        grp_has_id_level = df.columns.names[0] == cls.ID_LEVEL_NAME\n        # The 'id' level prevents us from a lookup for the original\n        # partition's columns. So dropping the level.\n        partition_columns = frozenset(\n            df.columns.droplevel(0) if grp_has_id_level else df.columns\n        )\n\n        partition_dict = {k: v for k, v in agg_func.items() if k in partition_columns}\n        return cls._build_callable_for_dict(\n            partition_dict, preserve_aggregation_order, grp_has_id_level\n        )\n\n    @classmethod\n    def _build_callable_for_dict(\n        cls,\n        agg_dict: dict,\n        preserve_aggregation_order: bool = True,\n        grp_has_id_level: bool = False,\n    ) -> Callable[..., pandas.DataFrame]:\n        \"\"\"\n        Build callable for an aggregation dictionary.\n\n        Parameters\n        ----------\n        agg_dict : dict\n            Aggregation dictionary.\n        preserve_aggregation_order : bool, default: True\n            Whether to manually restore the order of columns for the result specified\n            by the `agg_func` keys (only makes sense when `agg_func` is a dictionary).\n        grp_has_id_level : bool, default: False\n            Whether the frame we're grouping on has intermediate columns\n            (see ``cls.ID_LEVEL_NAME``).\n\n        Returns\n        -------\n        callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame\n        \"\"\"\n        # We have to keep this import away from the module level to avoid circular import\n        from modin.pandas.utils import walk_aggregation_dict\n\n        # We now filter aggregation functions into those that could be applied natively\n        # using pandas (pandas_grp_obj.agg(**native_aggs)) and those that require\n        # special treatment (custom_aggs).\n        custom_aggs = {}\n        native_aggs = {}\n\n        result_columns = []\n        for col, func, func_name, col_renaming_required in walk_aggregation_dict(\n            agg_dict\n        ):\n            # Filter dictionary\n            dict_to_add = (\n                custom_aggs if cls.is_registered_implementation(func) else native_aggs\n            )\n\n            new_value = func if func_name is None else (func_name, func)\n            old_value = dict_to_add.get(col, None)\n\n            if old_value is not None:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=not isinstance(old_value, list),\n                    extra_log=\"Expected for all aggregation values to be a list when at least \"\n                    + f\"one column has multiple aggregations. Got: {old_value} {type(old_value)}\",\n                )\n                old_value.append(new_value)\n            else:\n                # Pandas knows that it has to modify the resulting columns if it meets\n                # a function wrapped into a list. Renaming is required if either a new\n                # column name was explicitly specified, or multiple functions were\n                # specified per one column, or if any other column in the aggregation\n                # is going to be renamed.\n                dict_to_add[col] = [new_value] if col_renaming_required else new_value\n\n            # Construct resulting columns\n            if col_renaming_required:\n                func_name = str(func) if func_name is None else func_name\n                result_columns.append(\n                    (*(col if isinstance(col, tuple) else (col,)), func_name)\n                )\n            else:\n                result_columns.append(col)\n\n        result_columns = pandas.Index(result_columns)\n\n        def aggregate_on_dict(grp_obj, *args, **kwargs):\n            \"\"\"Aggregate the passed groupby object.\"\"\"\n            if len(native_aggs) == 0:\n                native_agg_res = None\n            elif grp_has_id_level:\n                # Adding the 'id' level to the aggregation keys so they match `grp_obj` columns\n                native_aggs_modified = {\n                    (\n                        cls.ID_LEVEL_NAME,\n                        *(key if isinstance(key, tuple) else (key,)),\n                    ): value\n                    for key, value in native_aggs.items()\n                }\n                native_agg_res = grp_obj.agg(native_aggs_modified)\n                # Dropping the 'id' level from the resulted frame\n                native_agg_res.columns = native_agg_res.columns.droplevel(0)\n            else:\n                native_agg_res = grp_obj.agg(native_aggs)\n\n            custom_results = []\n            insert_id_levels = False\n\n            for col, func, func_name, col_renaming_required in walk_aggregation_dict(\n                custom_aggs\n            ):\n                if grp_has_id_level:\n                    cols_without_ids = grp_obj.obj.columns.droplevel(0)\n                    if isinstance(cols_without_ids, pandas.MultiIndex):\n                        # We may have multiple columns matching the `col` in\n                        # a MultiIndex case, that's why use `.get_locs` here\n                        col_pos = cols_without_ids.get_locs(col)\n                    else:\n                        # `pandas.Index` doesn't have `.get_locs` method\n                        col_pos = cols_without_ids.get_loc(col)\n                    agg_key = grp_obj.obj.columns[col_pos]\n                else:\n                    agg_key = [col]\n\n                result = func(grp_obj[agg_key])\n                # The `func` may have discarded an ID-level if there were any.\n                # So checking for this again.\n                result_has_id_level = result.columns.names[0] == cls.ID_LEVEL_NAME\n                insert_id_levels |= result_has_id_level\n\n                if col_renaming_required:\n                    func_name = str(func) if func_name is None else func_name\n                    if result_has_id_level:\n                        result.columns = pandas.MultiIndex.from_tuples(\n                            [\n                                # `old_col[0]` stores values from the 'id'\n                                # level, the ones we want to preserve here\n                                (old_col[0], col, func_name)\n                                for old_col in result.columns\n                            ],\n                            names=[\n                                result.columns.names[0],\n                                result.columns.names[1],\n                                None,\n                            ],\n                        )\n                    else:\n                        result.columns = pandas.MultiIndex.from_tuples(\n                            [(col, func_name)] * len(result.columns),\n                            names=[result.columns.names[0], None],\n                        )\n\n                custom_results.append(result)\n\n            if insert_id_levels:\n                # As long as any `result` has an id-level we have to insert the level\n                # into every `result` so the number of levels matches\n                for idx, ext_result in enumerate(custom_results):\n                    if ext_result.columns.names[0] != cls.ID_LEVEL_NAME:\n                        custom_results[idx] = pandas.concat(\n                            [ext_result],\n                            keys=[cls.ID_LEVEL_NAME],\n                            names=[cls.ID_LEVEL_NAME],\n                            axis=1,\n                            copy=False,\n                        )\n\n                if native_agg_res is not None:\n                    native_agg_res = pandas.concat(\n                        [native_agg_res],\n                        keys=[cls.ID_LEVEL_NAME],\n                        names=[cls.ID_LEVEL_NAME],\n                        axis=1,\n                        copy=False,\n                    )\n\n            native_res_part = [] if native_agg_res is None else [native_agg_res]\n            parts = [*native_res_part, *custom_results]\n            if parts:\n                result = pandas.concat(parts, axis=1, copy=False)\n            else:\n                result = pandas.DataFrame(columns=result_columns)\n\n            # The order is naturally preserved if there's no custom aggregations\n            if preserve_aggregation_order and len(custom_aggs):\n                result = result.reindex(result_columns, axis=1)\n            return result\n\n        return aggregate_on_dict\n\n    @classmethod\n    def is_registered_implementation(cls, func: Callable) -> bool:\n        \"\"\"\n        Check whether the passed `func` was registered as a TreeReduce implementation.\n\n        Parameters\n        ----------\n        func : callable\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return callable(func) and hasattr(func, cls._GROUPBY_REDUCE_IMPL_FLAG)\n\n    @classmethod\n    def build_map_reduce_functions(\n        cls,\n        by,\n        axis: int,\n        groupby_kwargs: dict,\n        map_func: Union[dict, Callable[..., pandas.DataFrame]],\n        reduce_func: Union[dict, Callable[..., pandas.DataFrame]],\n        agg_args: list,\n        agg_kwargs: dict,\n        drop: bool = False,\n        method: Optional[str] = None,\n        finalizer_fn: Callable[[pandas.DataFrame], pandas.DataFrame] = None,\n    ) -> tuple[Callable, Callable]:\n        \"\"\"\n        Bind appropriate arguments to map and reduce functions.\n\n        Parameters\n        ----------\n        by : BaseQueryCompiler, column or index label, Grouper or list of such\n            Object that determine groups.\n        axis : {0, 1}\n            Axis to group and apply aggregation function along. 0 means index axis\n            when 1 means column axis.\n        groupby_kwargs : dict\n            Dictionary which carries arguments for pandas.DataFrame.groupby.\n        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame\n            Function to apply to the `GroupByObject` at the Map phase.\n        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame\n            Function to apply to the `GroupByObject` at the Reduce phase.\n        agg_args : list-like\n            Positional arguments to pass to the aggregation functions.\n        agg_kwargs : dict\n            Keyword arguments to pass to the aggregation functions.\n        drop : bool, default: False\n            Indicates whether or not by-data came from the `self` frame.\n        method : str, optional\n            Name of the GroupBy aggregation function. This is a hint to be able to do special casing.\n        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, optional\n            A callable to execute at the end a groupby kernel against groupby result.\n\n        Returns\n        -------\n        Tuple of callable\n            Tuple of map and reduce functions with bound arguments.\n        \"\"\"\n        # if by is a query compiler, then it will be broadcasted explicit via\n        # groupby_reduce method of the modin frame and so we don't want secondary\n        # implicit broadcastion via passing it as an function argument.\n        if hasattr(by, \"_modin_frame\"):\n            by = None\n\n        def _map(\n            df: pandas.DataFrame,\n            other: Optional[pandas.DataFrame] = None,\n            **kwargs: dict,\n        ) -> pandas.DataFrame:\n            def wrapper(\n                df: pandas.DataFrame, other: Optional[pandas.DataFrame] = None\n            ) -> pandas.DataFrame:\n                return cls.map(\n                    df,\n                    other=other,\n                    axis=axis,\n                    by=by,\n                    groupby_kwargs=groupby_kwargs.copy(),\n                    map_func=map_func,\n                    agg_args=agg_args,\n                    agg_kwargs=agg_kwargs,\n                    drop=drop,\n                    **kwargs,\n                )\n\n            try:\n                result = wrapper(df, other)\n            # This will happen with Arrow buffer read-only errors. We don't want to copy\n            # all the time, so this will try to fast-path the code first.\n            except ValueError:\n                result = wrapper(df.copy(), other if other is None else other.copy())\n            return result\n\n        def _reduce(df: pandas.DataFrame, **call_kwargs: dict) -> pandas.DataFrame:\n            def wrapper(df: pandas.DataFrame):\n                return cls.reduce(\n                    df,\n                    axis=axis,\n                    groupby_kwargs=groupby_kwargs,\n                    reduce_func=reduce_func,\n                    agg_args=agg_args,\n                    agg_kwargs=agg_kwargs,\n                    drop=drop,\n                    method=method,\n                    finalizer_fn=finalizer_fn,\n                    **call_kwargs,\n                )\n\n            try:\n                result = wrapper(df)\n            # This will happen with Arrow buffer read-only errors. We don't want to copy\n            # all the time, so this will try to fast-path the code first.\n            except ValueError:\n                result = wrapper(df.copy())\n            return result\n\n        return _map, _reduce\n"
  },
  {
    "path": "modin/core/dataframe/algebra/map.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses builder class for Map operator.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Callable\n\nfrom .operator import Operator\n\nif TYPE_CHECKING:\n    import pandas\n\n    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\nclass Map(Operator):\n    \"\"\"Builder class for Map operator.\"\"\"\n\n    @classmethod\n    def register(\n        cls,\n        function: Callable[..., pandas.DataFrame],\n        *call_args: tuple,\n        **call_kwds: dict,\n    ) -> Callable[..., PandasQueryCompiler]:\n        \"\"\"\n        Build Map operator that will be performed across each partition.\n\n        Parameters\n        ----------\n        function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame\n            Function that will be applied to the each partition.\n            Function takes `pandas.DataFrame` and returns `pandas.DataFrame`\n            of the same shape.\n        *call_args : tuple\n            Args that will be passed to the returned function.\n        **call_kwds : dict\n            Kwargs that will be passed to the returned function.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and executes map function.\n        \"\"\"\n\n        def caller(\n            query_compiler: PandasQueryCompiler, *args: tuple, **kwargs: dict\n        ) -> PandasQueryCompiler:\n            \"\"\"Execute Map function against passed query compiler.\"\"\"\n            shape_hint = call_kwds.pop(\"shape_hint\", None) or query_compiler._shape_hint\n            return query_compiler.__constructor__(\n                query_compiler._modin_frame.map(\n                    lambda x: function(x, *args, **kwargs), *call_args, **call_kwds\n                ),\n                shape_hint=shape_hint,\n            )\n\n        return caller\n"
  },
  {
    "path": "modin/core/dataframe/algebra/operator.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module contains an interface for operator builder classes.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Callable, Optional\n\n\nclass Operator(object):\n    \"\"\"Interface for building operators that can execute in parallel across partitions.\"\"\"\n\n    def __init__(self) -> None:\n        raise ValueError(\n            \"Please use {}.register instead of the constructor\".format(\n                type(self).__name__\n            )\n        )\n\n    @classmethod\n    def register(cls, func: Callable, **kwargs: dict):\n        \"\"\"\n        Build operator that applies source function across the entire dataset.\n\n        Parameters\n        ----------\n        func : callable\n            Source function.\n        **kwargs : dict\n            Kwargs that will be passed to the builder function.\n\n        Returns\n        -------\n        callable\n        \"\"\"\n        raise NotImplementedError(\"Please implement in child class\")\n\n    @classmethod\n    def validate_axis(cls, axis: Optional[int]) -> int:\n        \"\"\"\n        Ensure that axis to apply function on has valid value.\n\n        Parameters\n        ----------\n        axis : int, optional\n            0 or None means apply on index, 1 means apply on columns.\n\n        Returns\n        -------\n        int\n            Integer representation of given axis.\n        \"\"\"\n        return 0 if axis is None else axis\n"
  },
  {
    "path": "modin/core/dataframe/algebra/reduce.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses builder class for Reduce operator.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Callable, Optional\n\nfrom .operator import Operator\n\nif TYPE_CHECKING:\n    import pandas\n\n    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\nclass Reduce(Operator):\n    \"\"\"Builder class for Reduce operator.\"\"\"\n\n    @classmethod\n    def register(\n        cls,\n        reduce_function: Callable[..., pandas.Series],\n        axis: Optional[int] = None,\n        shape_hint: Optional[str] = None,\n    ) -> Callable[..., PandasQueryCompiler]:\n        \"\"\"\n        Build Reduce operator that will be performed across rows/columns.\n\n        It's used if `func` reduces the dimension of partitions in contrast to `Fold`.\n\n        Parameters\n        ----------\n        reduce_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.Series\n            Source function.\n        axis : int, optional\n            Axis to apply function along.\n        shape_hint : {\"row\", \"column\", None}, default: None\n            Shape hint for the results known to be a column or a row, otherwise None.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and executes Reduce function.\n        \"\"\"\n\n        def caller(\n            query_compiler: PandasQueryCompiler, *args: tuple, **kwargs: dict\n        ) -> PandasQueryCompiler:\n            \"\"\"Execute Reduce function against passed query compiler.\"\"\"\n            _axis = kwargs.get(\"axis\") if axis is None else axis\n            return query_compiler.__constructor__(\n                query_compiler._modin_frame.reduce(\n                    cls.validate_axis(_axis),\n                    lambda x: reduce_function(x, *args, **kwargs),\n                ),\n                shape_hint=shape_hint,\n            )\n\n        return caller\n"
  },
  {
    "path": "modin/core/dataframe/algebra/tree_reduce.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses builder class for TreeReduce operator.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Callable, Optional\n\nfrom .operator import Operator\n\nif TYPE_CHECKING:\n    import pandas\n    from pandas._typing import DtypeObj\n\n    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\nclass TreeReduce(Operator):\n    \"\"\"Builder class for TreeReduce operator.\"\"\"\n\n    @classmethod\n    def register(\n        cls,\n        map_function: Optional[Callable[..., pandas.DataFrame]],\n        reduce_function: Optional[Callable[..., pandas.Series]] = None,\n        axis: Optional[int] = None,\n        compute_dtypes: Optional[Callable[..., DtypeObj]] = None,\n    ) -> Callable[..., PandasQueryCompiler]:\n        \"\"\"\n        Build TreeReduce operator.\n\n        Parameters\n        ----------\n        map_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame\n            Source map function.\n        reduce_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.Series, optional\n            Source reduce function.\n        axis : int, optional\n            Specifies axis to apply function along.\n        compute_dtypes : callable(pandas.Series, *func_args, **func_kwargs) -> DtypeObj, optional\n            Callable for computing dtypes.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and executes passed functions\n            with TreeReduce algorithm.\n        \"\"\"\n        if reduce_function is None:\n            reduce_function = map_function\n\n        def caller(\n            query_compiler: PandasQueryCompiler, *args: tuple, **kwargs: dict\n        ) -> PandasQueryCompiler:\n            \"\"\"Execute TreeReduce function against passed query compiler.\"\"\"\n            _axis = kwargs.get(\"axis\") if axis is None else axis\n\n            new_dtypes = None\n            if compute_dtypes and query_compiler.frame_has_materialized_dtypes:\n                new_dtypes = str(compute_dtypes(query_compiler.dtypes, *args, **kwargs))\n\n            return query_compiler.__constructor__(\n                query_compiler._modin_frame.tree_reduce(\n                    cls.validate_axis(_axis),\n                    lambda x: map_function(x, *args, **kwargs),\n                    lambda y: reduce_function(y, *args, **kwargs),\n                    dtypes=new_dtypes,\n                )\n            )\n\n        return caller\n"
  },
  {
    "path": "modin/core/dataframe/base/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/base/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe class and Axis and JoinType Enums.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/base/dataframe/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains class ModinDataframe.\n\nModinDataframe is a parent abstract class for any dataframe class.\n\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import Callable, Dict, Hashable, List, Optional, Union\n\nfrom modin.core.dataframe.base.dataframe.utils import Axis, JoinType\n\n\nclass ModinDataframe(ABC):\n    \"\"\"\n    An abstract class that represents the Parent class for any Dataframe class.\n\n    This class is intended to specify the behaviors that a Dataframe must implement.\n\n    For more details about how these methods were chosen, please refer to this\n    (https://people.eecs.berkeley.edu/~totemtang/paper/Modin.pdf) paper, which specifies\n    a Dataframe algebra that this class exposes.\n    \"\"\"\n\n    @abstractmethod\n    def take_2d_labels_or_positional(\n        self,\n        row_labels: Optional[List[Hashable]] = None,\n        row_positions: Optional[List[int]] = None,\n        col_labels: Optional[List[Hashable]] = None,\n        col_positions: Optional[List[int]] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Mask rows and columns in the dataframe.\n\n        Allow users to perform selection and projection on the row and column labels (named notation),\n        in addition to the row and column number (positional notation).\n\n        Parameters\n        ----------\n        row_labels : list of hashable, optional\n            The row labels to extract.\n        row_positions : list of int, optional\n            The row positions to extract.\n        col_labels : list of hashable, optional\n            The column labels to extract.\n        col_positions : list of int, optional\n            The column positions to extract.\n\n        Returns\n        -------\n        ModinDataframe\n             A new ModinDataframe from the mask provided.\n\n        Notes\n        -----\n        If both `row_labels` and `row_positions` are provided, a ValueError is raised.\n        The same rule applies for `col_labels` and `col_positions`.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def filter_by_types(self, types: List[Hashable]) -> \"ModinDataframe\":\n        \"\"\"\n        Allow the user to specify a type or set of types by which to filter the columns.\n\n        Parameters\n        ----------\n        types : list of hashables\n            The types to filter columns by.\n\n        Returns\n        -------\n        ModinDataframe\n             A new ModinDataframe with only the columns whose dtypes appear in `types`.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def map(\n        self,\n        function: Callable,\n        axis: Optional[Union[int, Axis]] = None,\n        dtypes: Optional[str] = None,\n        new_columns: Optional[List[Hashable]] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Apply a user-defined function row-wise if `axis`=0, column-wise if `axis`=1, and cell-wise if `axis` is None.\n\n        Parameters\n        ----------\n        function : callable(row|col|cell) -> row|col|cell\n            The function to map across the dataframe.\n        axis : int or modin.core.dataframe.base.utils.Axis, optional\n            The axis to map over.\n        dtypes : str, optional\n            The data types for the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and this allows us to avoid (re)computing it.\n        new_columns : List[Hashable], optional\n            New column labels of the result, its length has to be identical\n            to the older columns. If not specified, old column labels are preserved.\n\n        Returns\n        -------\n        ModinDataframe\n             A new ModinDataframe with the map applied.\n\n        Notes\n        -----\n        This does not change the shape of the dataframe.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def filter(self, axis: Union[int, Axis], condition: Callable) -> \"ModinDataframe\":\n        \"\"\"\n        Filter data based on the function provided along the specified axis.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to filter over.\n        condition : callable(row|col) -> bool\n            The function to use for the filter. This function should filter the\n            data itself. It accepts either a row or column (depending on the axis argument) and\n            returns True to keep the row/col, and False to drop it.\n\n        Returns\n        -------\n        ModinDataframe\n             A new ModinDataframe filtered by content according to the filter provided by condition.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def explode(\n        self,\n        axis: Union[int, Axis],\n        function: Callable,\n        result_schema: Optional[Dict[Hashable, type]] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Explode data based on the function provided along the specified axis.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to expand over.\n        function : callable\n            The function to use to expand the data. This function should accept one\n            row/column, and return multiple.\n        result_schema : dictionary, optional\n            Mapping from column labels to data types that represents the types of the output dataframe.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the specified axis expanded.\n\n        Notes\n        -----\n        Only one axis can be expanded at a time.\n\n        The user-defined function may increase the number of rows (columns if axis=1),\n        but it should not remove or drop rows.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def window(\n        self,\n        axis: Union[int, Axis],\n        reduce_fn: Callable,\n        window_size: int,\n        result_schema: Optional[Dict[Hashable, type]] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Apply a sliding window operator that acts as a GROUPBY on each window, reducing each window to a single row (column).\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to slide over.\n        reduce_fn : callable(rowgroup|colgroup) -> row|col\n            The reduce function to apply over the data.\n        window_size : int\n            The number of row/columns to pass to the function.\n            (The size of the sliding window).\n        result_schema : dictionary, optional\n            Mapping from column labels to data types that represents the types of the output dataframe.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the reduce function applied over windows of the specified\n            axis.\n\n        Notes\n        -----\n        The user-defined reduce function must reduce each window's column\n        (row if axis=1) down to a single value.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def groupby(\n        self,\n        axis: Union[int, Axis],\n        by: Union[str, List[str]],\n        operator: Callable,\n        result_schema: Optional[Dict[Hashable, type]] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Generate groups based on values in the input column(s) and perform the specified operation on each.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to apply the grouping over.\n        by : string or list of strings\n            One or more column labels to use for grouping.\n        operator : callable\n            The operation to carry out on each of the groups. The operator is another\n            algebraic operator with its own user-defined function parameter, depending\n            on the output desired by the user.\n        result_schema : dictionary, optional\n            Mapping from column labels to data types that represents the types of the output dataframe.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe containing the groupings specified, with the operator\n            applied to each group.\n\n        Notes\n        -----\n        No communication between groups is allowed in this algebra implementation.\n\n        The number of rows (columns if axis=1) returned by the user-defined function\n        passed to the groupby may be at most the number of rows in the group, and\n        may be as small as a single row.\n\n        Unlike the pandas API, an intermediate \"GROUP BY\" object is not present in this\n        algebra implementation.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def reduce(\n        self,\n        axis: Union[int, Axis],\n        function: Callable,\n        dtypes: Optional[str] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Perform a user-defined aggregation on the specified axis, where the axis reduces down to a singleton.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the reduce over.\n        function : callable(row|col) -> single value\n            The reduce function to apply to each column.\n        dtypes : str, optional\n            The data types for the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and this allows us to avoid (re)computing it.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the same columns as the previous, with only a single row.\n\n        Notes\n        -----\n        The user-defined function must reduce to a single value.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def tree_reduce(\n        self,\n        axis: Union[int, Axis],\n        map_func: Callable,\n        reduce_func: Optional[Callable] = None,\n        dtypes: Optional[str] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Perform a user-defined aggregation on the specified axis, where the axis reduces down to a singleton using a tree-reduce computation pattern.\n\n        The map function is applied first over multiple partitions of a column, and then the reduce\n        function (if specified, otherwise the map function is applied again) is applied to the\n        results to produce a single value.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the tree reduce over.\n        map_func : callable(row|col) -> row|col|single value\n            The map function to apply to each column.\n        reduce_func : callable(row|col) -> single value, optional\n            The reduce function to apply to the results of the map function.\n        dtypes : str, optional\n            The data types for the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and this allows us to avoid (re)computing it.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the same columns as the previous, with only a single row.\n\n        Notes\n        -----\n        The user-defined function must reduce to a single value.\n\n        If the user-defined function requires access to the entire column, please use reduce instead.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def infer_types(self, columns_list: List[str]) -> \"ModinDataframe\":\n        \"\"\"\n        Determine the compatible type shared by all values in the specified columns, and coerce them to that type.\n\n        Parameters\n        ----------\n        columns_list : list of strings\n            List of column labels to infer and induce types over.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the inferred schema.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def join(\n        self,\n        axis: Union[int, Axis],\n        condition: Callable,\n        other: \"ModinDataframe\",\n        join_type: Union[str, JoinType],\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Join this dataframe with the other.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the join on.\n        condition : callable\n            Function that determines which rows should be joined. The condition can be a\n            simple equality, e.g. \"left.col1 == right.col1\" or can be arbitrarily complex.\n        other : ModinDataframe\n            The other data to join with, i.e. the right dataframe.\n        join_type : string  {\"inner\", \"left\", \"right\", \"outer\"} or modin.core.dataframe.base.utils.JoinType\n            The type of join to perform.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe that is the result of applying the specified join over the two\n            dataframes.\n\n        Notes\n        -----\n        During the join, this dataframe is considered the left, while the other is\n        treated as the right.\n\n        Only inner joins, left outer, right outer, and full outer joins are currently supported.\n        Support for other join types (e.g. natural join) may be implemented in the future.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def concat(\n        self,\n        axis: Union[int, Axis],\n        others: Union[\"ModinDataframe\", List[\"ModinDataframe\"]],\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Append rows/columns along the specified axis from multiple dataframes.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis on which to perform the concatenation.\n        others : ModinDataframe or list of ModinDataframes\n            The other ModinDataframe(s) to concatenate.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe that is the result of concatenating the dataframes over the\n            specified axis.\n\n        Notes\n        -----\n        The concat operator incurs fixed overheads, and so this algebra places no\n        limit to the number of dataframes that may be concatenated in this way.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def transpose(self) -> \"ModinDataframe\":\n        \"\"\"\n        Swap the row and column axes.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the row and column axes swapped.\n\n        Notes\n        -----\n        Transposing a dataframe is expensive, and so it is performed lazily. The axes are swapped\n        logically immediately, but the physical swap does not occur until absolutely necessary,\n        which helps motivate the axis argument to the other operators in this algebra.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def to_labels(self, column_labels: Union[str, List[str]]) -> \"ModinDataframe\":\n        \"\"\"\n        Replace the row labels with one or more columns of data.\n\n        Parameters\n        ----------\n        column_labels : string or list of strings\n            Column label(s) to use as the new row labels.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the row labels replaced by the specified columns.\n\n        Notes\n        -----\n        When multiple column labels are specified, a hierarchical set of labels is created, ordered by the ordering\n        of labels in the input.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def from_labels(self) -> \"ModinDataframe\":\n        \"\"\"\n        Move the row labels into the data at position 0, and sets the row labels to the positional notation.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the row labels moved into the data.\n\n        Notes\n        -----\n        In the case that the dataframe has hierarchical labels, all label \"levels\" are inserted into the dataframe\n        in the order they occur in the labels, with the outermost being in position 0.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def rename(\n        self,\n        new_row_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,\n        new_col_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Replace the row and column labels with the specified new labels.\n\n        Parameters\n        ----------\n        new_row_labels : dictionary or callable, optional\n            Mapping or callable that relates old row labels to new labels.\n        new_col_labels : dictionary or callable, optional\n            Mapping or callable that relates old col labels to new labels.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe with the new row and column labels.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def sort_by(\n        self,\n        axis: Union[int, Axis],\n        labels: Union[str, List[str]],\n        ascending: bool = True,\n    ) -> \"ModinDataframe\":\n        \"\"\"\n        Logically reorder rows (columns if axis=1) lexicographically by the data in a column or set of columns.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the sort over.\n        labels : string or list of strings\n            Column (row if axis=1) label(s) to use to determine lexicographical ordering. If multiple\n            columns (rows if axis=1) are provided, the sort is performed on the first column (row if axis=1),\n            with ties broken by the other columns (rows if axis=1) provided.\n        ascending : boolean, default: True\n            Whether to sort in ascending or descending order.\n\n        Returns\n        -------\n        ModinDataframe\n            A new ModinDataframe sorted into lexicographical order by the specified column(s).\n        \"\"\"\n        pass\n"
  },
  {
    "path": "modin/core/dataframe/base/dataframe/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains useful enums for Modin.\n\nAxis is an enum that represents the `axis` argument for dataframe operations.\nJoinType is an enum that represents the `join_type` or `how` argument for the join algebra operator.\n\"\"\"\n\nfrom enum import Enum\nfrom typing import Dict, List, Sequence, Tuple, cast\n\nimport pandas\nfrom pandas._typing import IndexLabel\nfrom pandas.api.types import is_scalar\nfrom pandas.core.dtypes.common import is_integer_dtype\n\n\nclass Axis(Enum):  # noqa: PR01\n    \"\"\"\n    An enum that represents the `axis` argument provided to the algebra operators.\n\n    The enum has 3 values - ROW_WISE to represent the row axis, COL_WISE to represent the\n    column axis, and CELL_WISE to represent no axis. ROW_WISE operations iterate over the rows\n    COL_WISE operations over the columns, and CELL_WISE operations over any of the partitioning\n    schemes that are supported in Modin (row-wise, column-wise, or block-wise).\n    \"\"\"\n\n    ROW_WISE = 0\n    COL_WISE = 1\n    CELL_WISE = None\n\n\nclass JoinType(Enum):  # noqa: PR01\n    \"\"\"\n    An enum that represents the `join_type` argument provided to the algebra operators.\n\n    The enum has 4 values - INNER to represent inner joins, LEFT to represent left joins, RIGHT to\n    represent right joins, and OUTER to represent outer joins.\n    \"\"\"\n\n    INNER = \"inner\"\n    LEFT = \"left\"\n    RIGHT = \"right\"\n    OUTER = \"outer\"\n\n\ndef join_columns(\n    left: pandas.Index,\n    right: pandas.Index,\n    left_on: IndexLabel,\n    right_on: IndexLabel,\n    suffixes: Tuple[str, str],\n) -> Tuple[pandas.Index, Dict[IndexLabel, IndexLabel], Dict[IndexLabel, IndexLabel]]:\n    \"\"\"\n    Compute resulting columns for the two dataframes being merged.\n\n    Parameters\n    ----------\n    left : pandas.Index\n        Columns of the left frame to join.\n    right : pandas.Index\n        Columns of the right frame to join.\n    left_on : list-like or scalar\n        Column names on which the frames are joined in the left DataFrame.\n    right_on : list-like or scalar\n        Column names on which the frames are joined in the right DataFrame.\n    suffixes : tuple[str, str]\n        A 2-length sequence containing suffixes to append to the intersected columns.\n\n    Returns\n    -------\n    pandas.Index, dict[IndexLabel -> IndexLabel], dict[IndexLabel -> IndexLabel]\n        Returns columns for the resulting frame and mappings of old to new column\n        names for `left` and `right` accordingly.\n\n    Raises\n    ------\n    NotImplementedError\n        Raised when one of the keys to join is an index level, pandas behaviour is really\n        complicated in this case, so we're not supporting this case for now.\n    \"\"\"\n    # using `cast` to make `mypy` acknowledged that the variable now ensured to be `Sequence[IndexLabel]`\n    left_on = cast(Sequence[IndexLabel], [left_on] if is_scalar(left_on) else left_on)\n    right_on = cast(\n        Sequence[IndexLabel], [right_on] if is_scalar(right_on) else right_on\n    )\n\n    # handling a simple case of merging on one column and when the column is located in an index\n    if len(left_on) == 1 and len(right_on) == 1 and left_on[0] == right_on[0]:\n        if left_on[0] not in left and right_on[0] not in right:\n            # in this case the 'on' column will stay in the index, so we can simply\n            # drop the 'left/right_on' values and proceed as normal\n            left_on = []\n            right_on = []\n        # in other cases, we can simply add the index name to columns and proceed as normal\n        # on python 3.9 with pandas-stubs 2.2, these lines will warn about insert being an untyped call,\n        # but this error is no longer present on higher versions\n        elif left_on[0] not in left:\n            left = left.insert(loc=0, item=left_on[0])  # type: ignore[no-untyped-call, unused-ignore]\n        elif right_on[0] not in right:\n            right = right.insert(loc=0, item=right_on[0])  # type: ignore[no-untyped-call, unused-ignore]\n\n    if any(col not in left for col in left_on) or any(\n        col not in right for col in right_on\n    ):\n        raise NotImplementedError(\n            \"Cases, where one of the keys to join is an index level, are not yet supported.\"\n        )\n\n    left_conflicts = set(left) & (set(right) - set(right_on))\n    right_conflicts = set(right) & (set(left) - set(left_on))\n    conflicting_cols = left_conflicts | right_conflicts\n\n    def _get_new_name(col: IndexLabel, suffix: str) -> IndexLabel:\n        if col in conflicting_cols:\n            return (\n                (f\"{col[0]}{suffix}\", *col[1:])\n                if isinstance(col, tuple)\n                else f\"{col}{suffix}\"\n            )\n        else:\n            return col\n\n    left_renamer: Dict[IndexLabel, IndexLabel] = {}\n    right_renamer: Dict[IndexLabel, IndexLabel] = {}\n    new_left: List = []\n    new_right: List = []\n\n    for col in left:\n        new_name = _get_new_name(col, suffixes[0])\n        new_left.append(new_name)\n        left_renamer[col] = new_name\n\n    for col in right:\n        # If we're joining on the column that exists in both frames then it was already\n        # taken from the 'left', don't want to take it again from the 'right'.\n        if not (col in left_on and col in right_on):\n            new_name = _get_new_name(col, suffixes[1])\n            new_right.append(new_name)\n            right_renamer[col] = new_name\n\n    new_columns = pandas.Index(new_left + new_right)\n    return new_columns, left_renamer, right_renamer\n\n\ndef is_trivial_index(index: pandas.Index) -> bool:\n    \"\"\"\n    Check if the index is a trivial index, i.e. a sequence [0..n].\n\n    Parameters\n    ----------\n    index : pandas.Index\n        An index to check.\n\n    Returns\n    -------\n    bool\n    \"\"\"\n    if len(index) == 0:\n        return True\n    if isinstance(index, pandas.RangeIndex):\n        return index.start == 0 and index.step == 1\n    if not (isinstance(index, pandas.Index) and is_integer_dtype(index)):\n        return False\n    return (\n        index.is_monotonic_increasing\n        and index.is_unique\n        and index.min() == 0\n        and index.max() == len(index) - 1\n    )\n"
  },
  {
    "path": "modin/core/dataframe/base/interchange/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe functionality related to data exchange protocols.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/base/interchange/dataframe_protocol/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nBase Modin Dataframe functionality related to the dataframe exchange protocol.\n\nSee more in https://data-apis.org/dataframe-protocol/latest/index.html.\n\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/base/interchange/dataframe_protocol/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nDataframe exchange protocol implementation.\n\nSee more in https://data-apis.org/dataframe-protocol/latest/index.html.\n\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import Any, Dict, Iterable, Optional, Sequence, Tuple, TypedDict\n\nfrom .utils import ColumnNullType, DlpackDeviceType, DTypeKind\n\n\nclass ColumnBuffers(TypedDict):  # noqa: GL08\n    # first element is a buffer containing the column data;\n    # second element is the data buffer's associated dtype\n    data: Tuple[\"ProtocolBuffer\", Any]\n\n    # first element is a buffer containing mask values indicating missing data;\n    # second element is the mask value buffer's associated dtype.\n    # None if the null representation is not a bit or byte mask\n    validity: Optional[Tuple[\"ProtocolBuffer\", Any]]\n\n    # first element is a buffer containing the offset values for\n    # variable-size binary data (e.g., variable-length strings);\n    # second element is the offsets buffer's associated dtype.\n    # None if the data buffer does not have an associated offsets buffer\n    offsets: Optional[Tuple[\"ProtocolBuffer\", Any]]\n\n\nclass CategoricalDescription(TypedDict):  # noqa: GL08\n    # whether the ordering of dictionary indices is semantically meaningful\n    is_ordered: bool\n    # whether a column-style mapping of categorical values to other objects exists\n    is_dictionary: bool\n    # None if not a column-style categorical.\n    categories: Optional[\"ProtocolColumn\"]\n\n\nclass ProtocolBuffer(ABC):\n    \"\"\"\n    Data in the buffer is guaranteed to be contiguous in memory.\n\n    Note that there is no dtype attribute present, a buffer can be thought of\n    as simply a block of memory. However, if the column that the buffer is\n    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is\n    implemented, then that dtype information will be contained in the return\n    value from ``__dlpack__``.\n\n    This distinction is useful to support both (a) data exchange via DLPack on a\n    buffer and (b) dtypes like variable-length strings which do not have a\n    fixed number of bytes per element.\n    \"\"\"\n\n    @property\n    @abstractmethod\n    def bufsize(self) -> int:\n        \"\"\"\n        Buffer size in bytes.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def ptr(self) -> int:\n        \"\"\"\n        Pointer to start of the buffer as an integer.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def __dlpack__(self) -> Any:\n        \"\"\"\n        Produce DLPack capsule (see array API standard).\n\n        DLPack not implemented in NumPy yet, so leave it out here.\n\n        Raises\n        ------\n        ``TypeError`` if the buffer contains unsupported dtypes.\n        ``NotImplementedError`` if DLPack support is not implemented.\n\n        Notes\n        -----\n        Useful to have to connect to array libraries. Support optional because\n        it's not completely trivial to implement for a Python-only library.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]:\n        \"\"\"\n        Device type and device ID for where the data in the buffer resides.\n\n        Uses device type codes matching DLPack. Enum members are:\n            - CPU = 1\n            - CUDA = 2\n            - CPU_PINNED = 3\n            - OPENCL = 4\n            - VULKAN = 7\n            - METAL = 8\n            - VPI = 9\n            - ROCM = 10\n\n        Returns\n        -------\n        tuple\n            Device type and device ID.\n\n        Notes\n        -----\n        Must be implemented even if ``__dlpack__`` is not.\n        \"\"\"\n        pass\n\n\nclass ProtocolColumn(ABC):\n    \"\"\"\n    A column object, with only the methods and properties required by the interchange protocol defined.\n\n    A column can contain one or more chunks. Each chunk can contain up to three\n    buffers - a data buffer, a mask buffer (depending on null representation),\n    and an offsets buffer (if variable-size binary; e.g., variable-length strings).\n\n    TBD: Arrow has a separate \"null\" dtype, and has no separate mask concept.\n         Instead, it seems to use \"children\" for both columns with a bit mask,\n         and for nested dtypes. Unclear whether this is elegant or confusing.\n         This design requires checking the null representation explicitly.\n         The Arrow design requires checking:\n         1. the ARROW_FLAG_NULLABLE (for sentinel values)\n         2. if a column has two children, combined with one of those children\n            having a null dtype.\n         Making the mask concept explicit seems useful. One null dtype would\n         not be enough to cover both bit and byte masks, so that would mean\n         even more checking if we did it the Arrow way.\n    TBD: there's also the \"chunk\" concept here, which is implicit in Arrow as\n         multiple buffers per array (= column here). Semantically it may make\n         sense to have both: chunks were meant for example for lazy evaluation\n         of data which doesn't fit in memory, while multiple buffers per column\n         could also come from doing a selection operation on a single\n         contiguous buffer.\n         Given these concepts, one would expect chunks to be all of the same\n         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),\n         while multiple buffers could have data-dependent lengths. Not an issue\n         in pandas if one column is backed by a single NumPy array, but in\n         Arrow it seems possible.\n         Are multiple chunks *and* multiple buffers per column necessary for\n         the purposes of this interchange protocol, or must producers either\n         reuse the chunk concept for this or copy the data?\n\n    Notes\n    -----\n    This ProtocolColumn object can only be produced by ``__dataframe__``,\n    so doesn't need its own version or ``__column__`` protocol.\n    \"\"\"\n\n    @abstractmethod\n    def size(self) -> int:\n        \"\"\"\n        Size of the column, in elements.\n\n        Corresponds to `DataFrame.num_rows()` if column is a single chunk;\n        equal to size of this current chunk otherwise.\n\n        Is a method rather than a property because it may cause a (potentially\n        expensive) computation for some dataframe implementations.\n\n        Returns\n        -------\n        int\n            Size of the column, in elements.\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def offset(self) -> int:\n        \"\"\"\n        Get the offset of first element.\n\n        May be > 0 if using chunks; for example for a column\n        with N chunks of equal size M (only the last chunk may be shorter),\n        ``offset = n * M``, ``n = 0 .. N-1``.\n\n        Returns\n        -------\n        int\n            The offset of first element.\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def dtype(self) -> Tuple[DTypeKind, int, str, str]:\n        \"\"\"\n        Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.\n\n        * Kind : DTypeKind\n        * Bit-width : the number of bits as an integer\n        * Format string : data type description format string in Apache Arrow C\n                        Data Interface format.\n        * Endianness : current only native endianness (``=``) is supported\n\n        Returns\n        -------\n        tuple\n            ``(kind, bit-width, format string, endianness)``.\n\n        Notes\n        -----\n        - Kind specifiers are aligned with DLPack where possible\n          (hence the jump to 20, leave enough room for future extension).\n        - Masks must be specified as boolean with either bit width 1 (for bit masks)\n          or 8 (for byte masks).\n        - Dtype width in bits was preferred over bytes\n        - Endianness isn't too useful, but included now in case in the future\n          we need to support non-native endianness\n        - Went with Apache Arrow format strings over NumPy format strings\n          because they're more complete from a dataframe perspective\n        - Format strings are mostly useful for datetime specification, and for categoricals.\n        - For categoricals, the format string describes the type of the categorical\n          in the data buffer. In case of a separate encoding of the categorical\n          (e.g. an integer to string mapping), this can be derived from ``self.describe_categorical``.\n        - Data types not included: complex, Arrow-style null, binary, decimal,\n          and nested (list, struct, map, union) dtypes.\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def describe_categorical(self) -> CategoricalDescription:\n        \"\"\"\n        If the dtype is categorical, there are two options.\n\n        - There are only values in the data buffer.\n        - There is a separate non-categorical Column encoding categorical values.\n\n        TBD: are there any other in-memory representations that are needed?\n\n        Returns\n        -------\n        dict\n            Content of returned dict:\n            - \"is_ordered\" : bool, whether the ordering of dictionary indices is\n                             semantically meaningful.\n            - \"is_dictionary\" : bool, whether a mapping of\n                                categorical values to other objects exists\n            - \"categories\" : Column representing the (implicit) mapping of indices to\n                             category values (e.g. an array of cat1, cat2, ...).\n                             None if not a dictionary-style categorical.\n\n        Raises\n        ------\n        ``TypeError`` if the dtype is not categorical.\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def describe_null(self) -> Tuple[ColumnNullType, Any]:\n        \"\"\"\n        Return the missing value (or \"null\") representation the column dtype uses.\n\n        Return as a tuple ``(kind, value)``.\n        * Kind: ColumnNullType\n        * Value : if kind is \"sentinel value\", the actual value. If kind is a bit\n          mask or a byte mask, the value (0 or 1) indicating a missing value. None\n          otherwise.\n\n        Returns\n        -------\n        tuple\n            ``(kind, value)``.\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def null_count(self) -> int:\n        \"\"\"\n        Get number of null elements, if known.\n\n        Returns\n        -------\n        int\n\n        Notes\n        -----\n        Arrow uses -1 to indicate \"unknown\", but None seems cleaner.\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def metadata(self) -> Dict[str, Any]:\n        \"\"\"\n        Get the metadata for the column.\n\n        See `DataFrame.metadata` for more details.\n\n        Returns\n        -------\n        dict\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def num_chunks(self) -> int:\n        \"\"\"\n        Return the number of chunks the column consists of.\n\n        Returns\n        -------\n        int\n           The number of chunks the column consists of.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable[\"ProtocolColumn\"]:\n        \"\"\"\n        Return an iterator yielding the chunks.\n\n        By default ``n_chunks=None``, yields the chunks that the data is stored as by the producer.\n        If given, ``n_chunks`` must be a multiple of ``self.num_chunks()``,\n        meaning the producer must subdivide each chunk before yielding it.\n\n        Parameters\n        ----------\n        n_chunks : int, optional\n            Number of chunks to yield.\n\n        Yields\n        ------\n        DataFrame\n            A ``DataFrame`` object(s).\n\n        Raises\n        ------\n        ``RuntimeError`` if ``n_chunks`` is not a multiple of ``self.num_chunks()``.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def get_buffers(self) -> ColumnBuffers:\n        \"\"\"\n        Return a dictionary containing the underlying buffers.\n\n        Returns\n        -------\n        dict\n            - \"data\": a two-element tuple whose first element is a buffer\n              containing the data and whose second element is the data buffer's associated dtype.\n            - \"validity\": a two-element tuple whose first element is a buffer\n              containing mask values indicating missing data and\n              whose second element is the mask value buffer's\n              associated dtype. None if the null representation is not a bit or byte mask.\n            - \"offsets\": a two-element tuple whose first element is a buffer\n              containing the offset values for variable-size binary data\n              (e.g., variable-length strings) and whose second element is the offsets\n              buffer's associated dtype. None if the data buffer does not have\n              an associated offsets buffer.\n        \"\"\"\n        pass\n\n\nclass ProtocolDataframe(ABC):\n    \"\"\"\n    A data frame class, with only the methods required by the interchange protocol defined.\n\n    Instances of this (private) class are returned from\n    ``modin.core.dataframe.base.dataframe.dataframe.ModinDataframe.__dataframe__``\n    as objects with the methods and attributes defined on this class.\n\n    A \"data frame\" represents an ordered collection of named columns.\n    A column's \"name\" must be a unique string. Columns may be accessed by name or by position.\n    This could be a public data frame class, or an object with the methods and\n    attributes defined on this ProtocolDataframe class could be returned from the\n    ``__dataframe__`` method of a public data frame class in a library adhering\n    to the dataframe interchange protocol specification.\n    \"\"\"\n\n    version = 0  # version of the protocol\n\n    @abstractmethod\n    def __dataframe__(\n        self, nan_as_null: bool = False, allow_copy: bool = True\n    ) -> \"ProtocolDataframe\":\n        \"\"\"\n        Construct a new dataframe interchange object, potentially changing the parameters.\n\n        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.\n\n        Parameters\n        ----------\n        nan_as_null : bool, default: False\n            A keyword intended for the consumer to tell the producer\n            to overwrite null values in the data with ``NaN``.\n            This currently has no effect; once support for nullable extension\n            dtypes is added, this value should be propagated to columns.\n        allow_copy : bool, default: True\n            A keyword that defines whether or not the library is allowed\n            to make a copy of the data. For example, copying data would be necessary\n            if a library supports strided buffers, given that this protocol\n            specifies contiguous buffers. Currently, if the flag is set to ``False``\n            and a copy is needed, a ``RuntimeError`` will be raised.\n\n        Returns\n        -------\n        ProtocolDataframe\n        \"\"\"\n        pass\n\n    @property\n    @abstractmethod\n    def metadata(self) -> Dict[str, Any]:\n        \"\"\"\n        Get the metadata for the data frame, as a dictionary with string keys.\n\n        The contents of `metadata` may be anything, they are meant for a library\n        to store information that it needs to, e.g., roundtrip losslessly or\n        for two implementations to share data that is not (yet) part of the\n        interchange protocol specification. For avoiding collisions with other\n        entries, please add name the keys with the name of the library\n        followed by a period and the desired name, e.g, ``pandas.indexcol``.\n\n        Returns\n        -------\n        dict\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def num_columns(self) -> int:\n        \"\"\"\n        Return the number of columns in the ProtocolDataframe.\n\n        Returns\n        -------\n        int\n            The number of columns in the ProtocolDataframe.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def num_rows(self) -> Optional[int]:\n        \"\"\"\n        Return the number of rows in the ProtocolDataframe, if available.\n\n        Returns\n        -------\n        int\n            The number of rows in the ProtocolDataframe.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def num_chunks(self) -> int:\n        \"\"\"\n        Return the number of chunks the ProtocolDataframe consists of.\n\n        Returns\n        -------\n        int\n            The number of chunks the ProtocolDataframe consists of.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def column_names(self) -> Iterable[str]:\n        \"\"\"\n        Return an iterator yielding the column names.\n\n        Yields\n        ------\n        str\n            The name of the column(s).\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def get_column(self, i: int) -> ProtocolColumn:\n        \"\"\"\n        Return the column at the indicated position.\n\n        Parameters\n        ----------\n        i : int\n            Positional index of the column to be returned.\n\n        Returns\n        -------\n        Column\n            The column at the indicated position.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def get_column_by_name(self, name: str) -> ProtocolColumn:\n        \"\"\"\n        Return the column whose name is the indicated name.\n\n        Parameters\n        ----------\n        name : str\n            String label of the column to be returned.\n\n        Returns\n        -------\n        Column\n            The column whose name is the indicated name.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def get_columns(self) -> Iterable[ProtocolColumn]:\n        \"\"\"\n        Return an iterator yielding the columns.\n\n        Yields\n        ------\n        Column\n            The ``Column`` object(s).\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def select_columns(self, indices: Sequence[int]) -> \"ProtocolDataframe\":\n        \"\"\"\n        Create a new ProtocolDataframe by selecting a subset of columns by index.\n\n        Parameters\n        ----------\n        indices : Sequence[int]\n            Column indices to be selected out of the ProtocolDataframe.\n\n        Returns\n        -------\n        ProtocolDataframe\n            A new ProtocolDataframe with selected a subset of columns by index.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def select_columns_by_name(self, names: Sequence[str]) -> \"ProtocolDataframe\":\n        \"\"\"\n        Create a new ProtocolDataframe by selecting a subset of columns by name.\n\n        Parameters\n        ----------\n        names : Sequence[str]\n            Column names to be selected out of the ProtocolDataframe.\n\n        Returns\n        -------\n        ProtocolDataframe\n            A new ProtocolDataframe with selected a subset of columns by name.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def get_chunks(\n        self, n_chunks: Optional[int] = None\n    ) -> Iterable[\"ProtocolDataframe\"]:\n        \"\"\"\n        Return an iterator yielding the chunks.\n\n        By default `n_chunks=None`, yields the chunks that the data is stored as by the producer.\n        If given, `n_chunks` must be a multiple of `self.num_chunks()`,\n        meaning the producer must subdivide each chunk before yielding it.\n\n        Parameters\n        ----------\n        n_chunks : int, optional\n            Number of chunks to yield.\n\n        Yields\n        ------\n        ProtocolDataframe\n            A ``ProtocolDataframe`` object(s).\n\n        Raises\n        ------\n        ``RuntimeError`` if ``n_chunks`` is not a multiple of ``self.num_chunks()``.\n        \"\"\"\n        pass\n"
  },
  {
    "path": "modin/core/dataframe/base/interchange/dataframe_protocol/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nDataframe exchange protocol implementation.\n\nSee more in https://data-apis.org/dataframe-protocol/latest/index.html.\n\"\"\"\n\nimport enum\nimport re\nfrom typing import Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas.api.types import is_datetime64_dtype\n\n\nclass DTypeKind(enum.IntEnum):  # noqa PR01\n    \"\"\"\n    Integer enum for data types.\n    Attributes\n    ----------\n    INT : int\n        Matches to signed integer data type.\n    UINT : int\n        Matches to unsigned integer data type.\n    FLOAT : int\n        Matches to floating point data type.\n    BOOL : int\n        Matches to boolean data type.\n    STRING : int\n        Matches to string data type (UTF-8 encoded).\n    DATETIME : int\n        Matches to datetime data type.\n    CATEGORICAL : int\n        Matches to categorical data type.\n    \"\"\"\n\n    INT = 0\n    UINT = 1\n    FLOAT = 2\n    BOOL = 20\n    STRING = 21  # UTF-8\n    DATETIME = 22\n    CATEGORICAL = 23\n\n\nclass ColumnNullType(enum.IntEnum):  # noqa PR01\n    \"\"\"\n    Integer enum for null type representation.\n    Attributes\n    ----------\n    NON_NULLABLE : int\n        Non-nullable column.\n    USE_NAN : int\n        Use explicit float NaN value.\n    USE_SENTINEL : int\n        Sentinel value besides NaN.\n    USE_BITMASK : int\n        The bit is set/unset representing a null on a certain position.\n    USE_BYTEMASK : int\n        The byte is set/unset representing a null on a certain position.\n    \"\"\"\n\n    NON_NULLABLE = 0\n    USE_NAN = 1\n    USE_SENTINEL = 2\n    USE_BITMASK = 3\n    USE_BYTEMASK = 4\n\n\nclass DlpackDeviceType(enum.IntEnum):  # noqa PR01\n    \"\"\"Integer enum for device type codes matching DLPack.\"\"\"\n\n    CPU = 1\n    CUDA = 2\n    CPU_PINNED = 3\n    OPENCL = 4\n    VULKAN = 7\n    METAL = 8\n    VPI = 9\n    ROCM = 10\n\n\nclass ArrowCTypes:\n    \"\"\"\n    Enum for Apache Arrow C type format strings.\n\n    The Arrow C data interface:\n    https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings\n    \"\"\"\n\n    NULL = \"n\"\n    BOOL = \"b\"\n    INT8 = \"c\"\n    UINT8 = \"C\"\n    INT16 = \"s\"\n    UINT16 = \"S\"\n    INT32 = \"i\"\n    UINT32 = \"I\"\n    INT64 = \"l\"\n    UINT64 = \"L\"\n    FLOAT16 = \"e\"\n    FLOAT32 = \"f\"\n    FLOAT64 = \"g\"\n    STRING = \"u\"  # utf-8\n    DATE32 = \"tdD\"\n    DATE64 = \"tdm\"\n    # Resoulution:\n    #   - seconds -> 's'\n    #   - miliseconds -> 'm'\n    #   - microseconds -> 'u'\n    #   - nanoseconds -> 'n'\n    TIMESTAMP = \"ts{resolution}:{tz}\"\n    TIME = \"tt{resolution}\"\n\n\nclass Endianness:\n    \"\"\"Enum indicating the byte-order of a data-type.\"\"\"\n\n    LITTLE = \"<\"\n    BIG = \">\"\n    NATIVE = \"=\"\n    NA = \"|\"\n\n\ndef pandas_dtype_to_arrow_c(dtype: Union[np.dtype, pandas.CategoricalDtype]) -> str:\n    \"\"\"\n    Represent pandas `dtype` as a format string in Apache Arrow C notation.\n\n    Parameters\n    ----------\n    dtype : np.dtype\n        Datatype of pandas DataFrame to represent.\n\n    Returns\n    -------\n    str\n        Format string in Apache Arrow C notation of the given `dtype`.\n    \"\"\"\n    if isinstance(dtype, pandas.CategoricalDtype):\n        return ArrowCTypes.INT64\n    elif dtype == pandas.api.types.pandas_dtype(\"O\"):\n        return ArrowCTypes.STRING\n\n    format_str = getattr(ArrowCTypes, dtype.name.upper(), None)\n    if format_str is not None:\n        return format_str\n\n    if is_datetime64_dtype(dtype):\n        # Selecting the first char of resolution string:\n        # dtype.str -> '<M8[ns]'\n        resolution = re.findall(r\"\\[(.*)\\]\", dtype.str)[0][:1]\n        return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz=\"\")\n\n    raise NotImplementedError(\n        f\"Convertion of {dtype} to Arrow C format string is not implemented.\"\n    )\n\n\ndef raise_copy_alert(copy_reason: Optional[str] = None) -> None:\n    \"\"\"\n    Raise a ``RuntimeError`` mentioning that there's a copy required.\n\n    Parameters\n    ----------\n    copy_reason : str, optional\n        The reason of making a copy. Should fit to the following format:\n        'The copy occurred due to {copy_reason}.'.\n    \"\"\"\n    msg = \"Copy required but 'allow_copy=False' is set.\"\n    if copy_reason:\n        msg += f\" The copy occurred due to {copy_reason}.\"\n    raise RuntimeError(msg)\n"
  },
  {
    "path": "modin/core/dataframe/base/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes related to its partitioning.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/base/partitioning/axis_partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base class of an axis partition for a Modin Dataframe.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import Any, Callable, Iterable, Optional, Tuple, Type, Union\n\nfrom modin.logging import ClassLogger\nfrom modin.logging.config import LogLevel\n\n\nclass BaseDataframeAxisPartition(\n    ABC, ClassLogger, modin_layer=\"VIRTUAL-PARTITION\", log_level=LogLevel.DEBUG\n):  # pragma: no cover\n    \"\"\"\n    An abstract class that represents the parent class for any axis partition class.\n\n    This class is intended to simplify the way that operations are performed.\n\n    Attributes\n    ----------\n    _PARTITIONS_METADATA_LEN : int\n        The number of metadata values that the object of `partition_type` consumes.\n    \"\"\"\n\n    @property\n    @abstractmethod\n    def list_of_blocks(self) -> list:\n        \"\"\"Get the list of physical partition objects that compose this partition.\"\"\"\n        pass\n\n    def apply(\n        self,\n        func: Callable,\n        *args: Iterable,\n        num_splits: Optional[int] = None,\n        other_axis_partition: Optional[\"BaseDataframeAxisPartition\"] = None,\n        maintain_partitioning: bool = True,\n        lengths: Optional[Iterable] = None,\n        manual_partition: bool = False,\n        **kwargs: dict,\n    ) -> Any:\n        \"\"\"\n        Apply a function to this axis partition along full axis.\n\n        Parameters\n        ----------\n        func : callable\n            The function to apply. This will be preprocessed according to\n            the corresponding `BaseDataframePartition` objects.\n        *args : iterable\n            Positional arguments to pass to `func`.\n        num_splits : int, default: None\n            The number of times to split the result object.\n        other_axis_partition : BaseDataframeAxisPartition, default: None\n            Another `BaseDataframeAxisPartition` object to be applied\n            to func. This is for operations that are between two data sets.\n        maintain_partitioning : bool, default: True\n            Whether to keep the partitioning in the same\n            orientation as it was previously or not. This is important because we may be\n            operating on an individual axis partition and not touching the rest.\n            In this case, we have to return the partitioning to its previous\n            orientation (the lengths will remain the same). This is ignored between\n            two axis partitions.\n        lengths : iterable, default: None\n            The list of lengths to shuffle the partition into.\n        manual_partition : bool, default: False\n            If True, partition the result with `lengths`.\n        **kwargs : dict\n            Additional keywords arguments to be passed in `func`.\n\n        Returns\n        -------\n        list\n            A list of `BaseDataframePartition` objects.\n\n        Notes\n        -----\n        The procedures that invoke this method assume full axis\n        knowledge. Implement this method accordingly.\n\n        You must return a list of `BaseDataframePartition` objects from this method.\n        \"\"\"\n        pass\n\n    # Child classes must have these in order to correctly subclass.\n    partition_type: Type\n    _PARTITIONS_METADATA_LEN = 0\n\n    def _wrap_partitions(\n        self, partitions: list, extract_metadata: Optional[bool] = None\n    ) -> list:\n        \"\"\"\n        Wrap remote partition objects with `BaseDataframePartition` class.\n\n        Parameters\n        ----------\n        partitions : list\n            List of remotes partition objects to be wrapped with `BaseDataframePartition` class.\n        extract_metadata : bool, optional\n            Whether the partitions list contains information about partition's metadata.\n            If `None` was passed will take the argument's value from the value of `cls._PARTITIONS_METADATA_LEN`.\n\n        Returns\n        -------\n        list\n            List of wrapped remote partition objects.\n        \"\"\"\n        assert self.partition_type is not None\n\n        if extract_metadata is None:\n            # If `_PARTITIONS_METADATA_LEN == 0` then the execution doesn't support metadata\n            # and thus we should never try extracting it, otherwise assuming that the common\n            # approach of always passing the metadata is used.\n            extract_metadata = bool(self._PARTITIONS_METADATA_LEN)\n\n        if extract_metadata:\n            # Here we recieve a 1D array of futures describing partitions and their metadata as:\n            # [object_id{partition_idx}, metadata{partition_idx}_{metadata_idx}, ...]\n            # Here's an example of such array:\n            # [\n            #  object_id1, metadata1_1, metadata1_2, ..., metadata1_PARTITIONS_METADATA_LEN,\n            #  object_id2, metadata2_1, ..., metadata2_PARTITIONS_METADATA_LEN,\n            #  ...\n            #  object_idN, metadataN_1, ..., metadataN_PARTITIONS_METADATA_LEN,\n            # ]\n            return [\n                self.partition_type(*init_args)\n                for init_args in zip(\n                    # `partition_type` consumes `(object_id, *metadata)`, thus adding `+1`\n                    *[iter(partitions)]\n                    * (1 + self._PARTITIONS_METADATA_LEN)\n                )\n            ]\n        else:\n            return [self.partition_type(object_id) for object_id in partitions]\n\n    def force_materialization(\n        self, get_ip: bool = False\n    ) -> \"BaseDataframeAxisPartition\":\n        \"\"\"\n        Materialize axis partitions into a single partition.\n\n        Parameters\n        ----------\n        get_ip : bool, default: False\n            Whether to get node ip address to a single partition or not.\n\n        Returns\n        -------\n        BaseDataframeAxisPartition\n            An axis partition containing only a single materialized partition.\n        \"\"\"\n        materialized = self.apply(\n            lambda x: x, num_splits=1, maintain_partitioning=False\n        )\n        return type(self)(materialized, get_ip=get_ip)  # type: ignore[call-arg]\n\n    def unwrap(\n        self, squeeze: bool = False, get_ip: bool = False\n    ) -> Union[list, Tuple[list, list]]:\n        \"\"\"\n        Unwrap partitions from this axis partition.\n\n        Parameters\n        ----------\n        squeeze : bool, default: False\n            Flag used to unwrap only one partition.\n        get_ip : bool, default: False\n            Whether to get node ip address to each partition or not.\n\n        Returns\n        -------\n        list\n            List of partitions from this axis partition.\n\n        Notes\n        -----\n        If `get_ip=True`, a tuple of lists of Ray.ObjectRef/Dask.Future to node ip addresses and\n        unwrapped partitions, respectively, is returned if Ray/Dask is used as an engine\n        (i.e. [(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]).\n        \"\"\"\n        if squeeze and len(self.list_of_blocks) == 1:\n            if get_ip:\n                # TODO(https://github.com/modin-project/modin/issues/5176): Stop ignoring the list_of_ips\n                # check once we know that we're not calling list_of_ips on python axis partitions\n                return self.list_of_ips[0], self.list_of_blocks[0]  # type: ignore[attr-defined]\n            else:\n                return self.list_of_blocks[0]\n        else:\n            if get_ip:\n                return list(zip(self.list_of_ips, self.list_of_blocks))  # type: ignore[attr-defined]\n            else:\n                return self.list_of_blocks\n"
  },
  {
    "path": "modin/core/dataframe/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/pandas/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe class optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/pandas/dataframe/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains class PandasDataframe.\n\nPandasDataframe is a parent abstract class for any dataframe class\nfor pandas storage format.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport datetime\nimport re\nfrom abc import ABC, abstractmethod\nfrom functools import cached_property\nfrom typing import TYPE_CHECKING, Callable, Dict, Hashable, List, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas._libs.lib import no_default\nfrom pandas.api.types import is_object_dtype\nfrom pandas.core.dtypes.common import is_dtype_equal, is_list_like, is_numeric_dtype\nfrom pandas.core.indexes.api import Index, RangeIndex\n\nfrom modin.config import (\n    IsRayCluster,\n    MinColumnPartitionSize,\n    MinRowPartitionSize,\n    NPartitions,\n)\nfrom modin.core.dataframe.base.dataframe.dataframe import ModinDataframe\nfrom modin.core.dataframe.base.dataframe.utils import Axis, JoinType, is_trivial_index\nfrom modin.core.dataframe.pandas.dataframe.utils import (\n    ShuffleSortFunctions,\n    add_missing_categories_to_groupby,\n    lazy_metadata_decorator,\n)\nfrom modin.core.dataframe.pandas.metadata import (\n    DtypesDescriptor,\n    LazyProxyCategoricalDtype,\n    ModinDtypes,\n    ModinIndex,\n)\nfrom modin.core.storage_formats.pandas.parsers import (\n    find_common_type_cat as find_common_type,\n)\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\nfrom modin.core.storage_formats.pandas.utils import get_length_list\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger\nfrom modin.logging.config import LogLevel\nfrom modin.pandas.indexing import is_range_like\nfrom modin.pandas.utils import (\n    check_both_not_none,\n    get_pandas_backend,\n    is_full_grab_slice,\n)\nfrom modin.utils import MODIN_UNNAMED_SERIES_LABEL\n\nif TYPE_CHECKING:\n    from pandas._typing import npt\n\n    from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n        ProtocolDataframe,\n    )\n    from modin.core.dataframe.pandas.partitioning.partition_manager import (\n        PandasDataframePartitionManager,\n    )\n\n\nclass PandasDataframe(\n    ABC, ClassLogger, modin_layer=\"CORE-DATAFRAME\", log_level=LogLevel.DEBUG\n):\n    \"\"\"\n    An abstract class that represents the parent class for any pandas storage format dataframe class.\n\n    This class provides interfaces to run operations on dataframe partitions.\n\n    Parameters\n    ----------\n    partitions : np.ndarray\n        A 2D NumPy array of partitions.\n    index : sequence or callable, optional\n        The index for the dataframe. Converted to a ``pandas.Index``.\n        Is computed from partitions on demand if not specified.\n        If ``callable() -> (pandas.Index, list of row lengths or None)`` type,\n        then the calculation will be delayed until `self.index` is called.\n    columns : sequence, optional\n        The columns object for the dataframe. Converted to a ``pandas.Index``.\n        Is computed from partitions on demand if not specified.\n    row_lengths : list, optional\n        The length of each partition in the rows. The \"height\" of\n        each of the block partitions. Is computed if not provided.\n    column_widths : list, optional\n        The width of each partition in the columns. The \"width\" of\n        each of the block partitions. Is computed if not provided.\n    dtypes : pandas.Series or callable, optional\n        The data types for the dataframe columns.\n    pandas_backend : {\"pyarrow\", None}, optional\n        Backend used by pandas.\n    \"\"\"\n\n    _partition_mgr_cls: PandasDataframePartitionManager\n    _query_compiler_cls = PandasQueryCompiler\n    # These properties flag whether or not we are deferring the metadata synchronization\n    _deferred_index: bool = False\n    _deferred_column: bool = False\n\n    _index_cache: ModinIndex = None\n    _columns_cache: ModinIndex = None\n    _dtypes: Optional[ModinDtypes] = None\n    _pandas_backend: Optional[str] = None\n\n    @property\n    def storage_format(self) -> str:\n        \"\"\"\n        The storage format for this frame's data.\n\n        Returns\n        -------\n        str\n            The storage format.\n        \"\"\"\n        return \"Pandas\"\n\n    @property\n    @abstractmethod\n    def engine(self) -> str:\n        \"\"\"\n        The engine for this frame.\n\n        Returns\n        -------\n        str\n            The engine.\n        \"\"\"\n        pass\n\n    @cached_property\n    def __constructor__(self) -> type[PandasDataframe]:\n        \"\"\"\n        Create a new instance of this object.\n\n        Returns\n        -------\n        callable\n        \"\"\"\n        return type(self)\n\n    def __init__(\n        self,\n        partitions,\n        index=None,\n        columns=None,\n        row_lengths=None,\n        column_widths=None,\n        dtypes: Optional[Union[pandas.Series, ModinDtypes, Callable]] = None,\n        pandas_backend: Optional[str] = None,\n    ):\n        self._partitions = partitions\n        self.set_index_cache(index)\n        self.set_columns_cache(columns)\n        self._row_lengths_cache = row_lengths\n        self._column_widths_cache = column_widths\n        self._pandas_backend = pandas_backend\n        if pandas_backend != \"pyarrow\" or len(partitions) == 0:\n            # If the backend is pyarrow and there are no partitions, the computed dtype otherwise becomes NaN,\n            # which means we lost the dtype, so actually set it in that case\n            self.set_dtypes_cache(dtypes)\n        else:\n            # In this case, the type precomputation may be incorrect; we need\n            # to know the type algebra precisely. Considering the number of operations\n            # and different combinations of backends, the best solution would be to\n            # introduce optimizations gradually, with a large number of tests.\n            self.set_dtypes_cache(None)\n\n        self._validate_axes_lengths()\n        self._filter_empties(compute_metadata=False)\n\n    def _validate_axes_lengths(self):\n        \"\"\"Validate that labels are split correctly if split is known.\"\"\"\n        if (\n            self._row_lengths_cache is not None\n            and self.has_materialized_index\n            and len(self.index) > 0\n        ):\n            # An empty frame can have 0 rows but a nonempty index. If the frame\n            # does have rows, the number of rows must equal the size of the\n            # index.\n            num_rows = sum(self._row_lengths_cache)\n            if num_rows > 0:\n                ErrorMessage.catch_bugs_and_request_email(\n                    num_rows != len(self.index),\n                    f\"Row lengths: {num_rows} != {len(self.index)}\",\n                )\n            ErrorMessage.catch_bugs_and_request_email(\n                any(val < 0 for val in self._row_lengths_cache),\n                f\"Row lengths cannot be negative: {self._row_lengths_cache}\",\n            )\n        if (\n            self._column_widths_cache is not None\n            and self.has_materialized_columns\n            and len(self.columns) > 0\n        ):\n            # An empty frame can have 0 column but a nonempty column index. If\n            # the frame does have columns, the number of columns must equal the\n            # size of the columns.\n            num_columns = sum(self._column_widths_cache)\n            if num_columns > 0:\n                ErrorMessage.catch_bugs_and_request_email(\n                    num_columns != len(self.columns),\n                    f\"Column widths: {num_columns} != {len(self.columns)}\",\n                )\n            ErrorMessage.catch_bugs_and_request_email(\n                any(val < 0 for val in self._column_widths_cache),\n                f\"Column widths cannot be negative: {self._column_widths_cache}\",\n            )\n\n    @property\n    def num_parts(self) -> int:\n        \"\"\"\n        Get the total number of partitions for this frame.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        return np.prod(self._partitions.shape)\n\n    @property\n    def row_lengths(self):\n        \"\"\"\n        Compute the row partitions lengths if they are not cached.\n\n        Returns\n        -------\n        list\n            A list of row partitions lengths.\n        \"\"\"\n        if self._row_lengths_cache is None:\n            if len(self._partitions.T) > 0:\n                row_parts = self._partitions.T[0]\n                self._row_lengths_cache = self._get_lengths(row_parts, Axis.ROW_WISE)\n            else:\n                self._row_lengths_cache = []\n        return self._row_lengths_cache\n\n    @classmethod\n    def _get_lengths(cls, parts, axis):\n        \"\"\"\n        Get list of dimensions for all the provided parts.\n\n        Parameters\n        ----------\n        parts : list\n            List of parttions.\n        axis : {0, 1}\n            The axis along which to get the lengths (0 - length across rows or, 1 - width across columns).\n\n        Returns\n        -------\n        list\n        \"\"\"\n        if axis == Axis.ROW_WISE:\n            return [part.length() for part in parts]\n        else:\n            return [part.width() for part in parts]\n\n    def __len__(self) -> int:\n        \"\"\"\n        Return length of index axis.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        if self.has_materialized_index:\n            _len = len(self.index)\n        else:\n            _len = sum(self.row_lengths)\n        return _len\n\n    @property\n    def column_widths(self):\n        \"\"\"\n        Compute the column partitions widths if they are not cached.\n\n        Returns\n        -------\n        list\n            A list of column partitions widths.\n        \"\"\"\n        if self._column_widths_cache is None:\n            if len(self._partitions) > 0:\n                col_parts = self._partitions[0]\n                self._column_widths_cache = self._get_lengths(col_parts, Axis.COL_WISE)\n            else:\n                self._column_widths_cache = []\n        return self._column_widths_cache\n\n    def _set_axis_lengths_cache(self, value, axis=0):\n        \"\"\"\n        Set the row/column lengths cache for the specified axis.\n\n        Parameters\n        ----------\n        value : list of ints\n        axis : int, default: 0\n            0 for row lengths and 1 for column widths.\n        \"\"\"\n        if axis == 0:\n            self._row_lengths_cache = value\n        else:\n            self._column_widths_cache = value\n\n    def _get_axis_lengths_cache(self, axis=0):\n        \"\"\"\n        Get partition's shape caches along the specified axis if avaliable.\n\n        Parameters\n        ----------\n        axis : int, default: 0\n            0 - get row lengths cache, 1 - get column widths cache.\n\n        Returns\n        -------\n        list of ints or None\n            If the cache is computed return a list of ints, ``None`` otherwise.\n        \"\"\"\n        return self._row_lengths_cache if axis == 0 else self._column_widths_cache\n\n    def _get_axis_lengths(self, axis: int = 0) -> List[int]:\n        \"\"\"\n        Get row lengths/column widths.\n\n        Parameters\n        ----------\n        axis : int, default: 0\n\n        Returns\n        -------\n        list of ints\n        \"\"\"\n        return self.row_lengths if axis == 0 else self.column_widths\n\n    @property\n    def has_dtypes_cache(self) -> bool:\n        \"\"\"\n        Check if the dtypes cache exists.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._dtypes is not None\n\n    @property\n    def has_materialized_dtypes(self) -> bool:\n        \"\"\"\n        Check if dataframe has materialized index cache.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.has_dtypes_cache and self._dtypes.is_materialized\n\n    def copy_dtypes_cache(self):\n        \"\"\"\n        Copy the dtypes cache.\n\n        Returns\n        -------\n        pandas.Series, callable or None\n            If there is an pandas.Series in the cache, then copying occurs.\n        \"\"\"\n        dtypes_cache = None\n        if self.has_dtypes_cache:\n            dtypes_cache = self._dtypes.copy()\n        return dtypes_cache\n\n    def _maybe_update_proxies(self, dtypes, new_parent=None):\n        \"\"\"\n        Update lazy proxies stored inside of `dtypes` with a new parent inplace.\n\n        Parameters\n        ----------\n        dtypes : pandas.Series, ModinDtypes or callable\n        new_parent : object, optional\n            A new parent to link the proxies to. If not specified\n            will consider the `self` to be a new parent.\n\n        Returns\n        -------\n        pandas.Series, ModinDtypes or callable\n        \"\"\"\n        new_parent = new_parent or self\n        if isinstance(dtypes, ModinDtypes):\n            dtypes = dtypes.maybe_specify_new_frame_ref(new_parent)\n        if isinstance(dtypes, pandas.Series):\n            LazyProxyCategoricalDtype.update_dtypes(dtypes, new_parent)\n        return dtypes\n\n    def set_dtypes_cache(self, dtypes):\n        \"\"\"\n        Set dtypes cache.\n\n        Parameters\n        ----------\n        dtypes : pandas.Series, ModinDtypes, callable or None\n        \"\"\"\n        dtypes = self._maybe_update_proxies(dtypes)\n        if dtypes is None and self.has_materialized_columns:\n            # try to set a descriptor instead of 'None' to be more flexible in\n            # dtypes computing\n            try:\n                self._dtypes = ModinDtypes(\n                    DtypesDescriptor(\n                        cols_with_unknown_dtypes=self.columns.tolist(), parent_df=self\n                    )\n                )\n            except NotImplementedError:\n                self._dtypes = None\n        elif isinstance(dtypes, ModinDtypes) or dtypes is None:\n            self._dtypes = dtypes\n        else:\n            self._dtypes = ModinDtypes(dtypes)\n\n    @property\n    def dtypes(self):\n        \"\"\"\n        Compute the data types if they are not cached.\n\n        Returns\n        -------\n        pandas.Series\n            A pandas Series containing the data types for this dataframe.\n        \"\"\"\n        if self.has_dtypes_cache:\n            dtypes = self._dtypes.get()\n        else:\n            dtypes = self._compute_dtypes()\n            self.set_dtypes_cache(dtypes)\n            # During materialization, we can find out the backend and, if it\n            # is suitable, use the ability to pre-calculate types.\n            self._pandas_backend = get_pandas_backend(dtypes)\n        return dtypes\n\n    def get_dtypes_set(self):\n        \"\"\"\n        Get a set of dtypes that are in this dataframe.\n\n        Returns\n        -------\n        set\n        \"\"\"\n        if isinstance(self._dtypes, ModinDtypes):\n            return self._dtypes.get_dtypes_set()\n        return set(self.dtypes.values)\n\n    def _compute_dtypes(self, columns=None) -> pandas.Series:\n        \"\"\"\n        Compute the data types via TreeReduce pattern for the specified columns.\n\n        Parameters\n        ----------\n        columns : list-like, optional\n            Columns to compute dtypes for. If not specified compute dtypes\n            for all the columns in the dataframe.\n\n        Returns\n        -------\n        pandas.Series\n            A pandas Series containing the data types for this dataframe.\n        \"\"\"\n\n        def dtype_builder(df):\n            return df.apply(lambda col: find_common_type(col.values), axis=0)\n\n        if columns is not None:\n            # Sorting positions to request columns in the order they're stored (it's more efficient)\n            numeric_indices = sorted(self.columns.get_indexer_for(columns))\n            if any(pos < 0 for pos in numeric_indices):\n                raise KeyError(\n                    f\"Some of the columns are not in index: subset={columns}; columns={self.columns}\"\n                )\n            obj = self.take_2d_labels_or_positional(\n                col_labels=self.columns[numeric_indices].tolist()\n            )\n        else:\n            obj = self\n\n        # For now we will use a pandas Series for the dtypes.\n        if len(obj.columns) > 0:\n            dtypes = (\n                obj.tree_reduce(0, lambda df: df.dtypes, dtype_builder)\n                .to_pandas()\n                .iloc[0]\n            )\n        else:\n            dtypes = pandas.Series([])\n        # reset name to None because we use MODIN_UNNAMED_SERIES_LABEL internally\n        dtypes.name = None\n        return dtypes\n\n    def set_index_cache(self, index):\n        \"\"\"\n        Set index cache.\n\n        Parameters\n        ----------\n        index : sequence, callable or None\n        \"\"\"\n        if index is None:\n            self._index_cache = ModinIndex(self, axis=0)\n        elif isinstance(index, ModinIndex):\n            # update reference with the new frame to not pollute memory\n            self._index_cache = index.maybe_specify_new_frame_ref(self, axis=0)\n        else:\n            self._index_cache = ModinIndex(index)\n\n    def set_columns_cache(self, columns):\n        \"\"\"\n        Set columns cache.\n\n        Parameters\n        ----------\n        columns : sequence, callable or None\n        \"\"\"\n        if columns is None:\n            self._columns_cache = ModinIndex(self, axis=1)\n        elif isinstance(columns, ModinIndex):\n            # update reference with the new frame to not pollute memory\n            self._columns_cache = columns.maybe_specify_new_frame_ref(self, axis=1)\n        else:\n            self._columns_cache = ModinIndex(columns)\n\n    def set_axis_cache(self, value, axis=0):\n        \"\"\"\n        Set cache for the specified axis (index or columns).\n\n        Parameters\n        ----------\n        value : sequence, callable or None\n        axis : int, default: 0\n        \"\"\"\n        if axis == 0:\n            self.set_index_cache(value)\n        else:\n            self.set_columns_cache(value)\n\n    def has_axis_cache(self, axis=0) -> bool:\n        \"\"\"\n        Check if the cache for the specified axis exists.\n\n        Parameters\n        ----------\n        axis : int, default: 0\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.has_index_cache if axis == 0 else self.has_columns_cache\n\n    @property\n    def has_index_cache(self):\n        \"\"\"\n        Check if the index cache exists.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._index_cache is not None\n\n    def copy_index_cache(self, copy_lengths=False):\n        \"\"\"\n        Copy the index cache.\n\n        Parameters\n        ----------\n        copy_lengths : bool, default: False\n            Whether to copy the stored partition lengths to the\n            new index object.\n\n        Returns\n        -------\n        pandas.Index, callable or ModinIndex\n            If there is an pandas.Index in the cache, then copying occurs.\n        \"\"\"\n        idx_cache = self._index_cache\n        if self.has_index_cache:\n            idx_cache = self._index_cache.copy(copy_lengths)\n        return idx_cache\n\n    def _get_axis_cache(self, axis=0) -> ModinIndex:\n        \"\"\"\n        Get axis cache for the specified axis if available.\n\n        Parameters\n        ----------\n        axis : int, default: 0\n\n        Returns\n        -------\n        ModinIndex\n        \"\"\"\n        return self._index_cache if axis == 0 else self._columns_cache\n\n    @property\n    def has_columns_cache(self):\n        \"\"\"\n        Check if the columns cache exists.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._columns_cache is not None\n\n    def copy_columns_cache(self, copy_lengths=False):\n        \"\"\"\n        Copy the columns cache.\n\n        Parameters\n        ----------\n        copy_lengths : bool, default: False\n            Whether to copy the stored partition lengths to the\n            new index object.\n\n        Returns\n        -------\n        pandas.Index or None\n            If there is an pandas.Index in the cache, then copying occurs.\n        \"\"\"\n        columns_cache = self._columns_cache\n        if columns_cache is not None:\n            columns_cache = columns_cache.copy(copy_lengths)\n        return columns_cache\n\n    def copy_axis_cache(self, axis=0, copy_lengths=False):\n        \"\"\"\n        Copy the axis cache (index or columns).\n\n        Parameters\n        ----------\n        axis : int, default: 0\n        copy_lengths : bool, default: False\n            Whether to copy the stored partition lengths to the\n            new index object.\n\n        Returns\n        -------\n        pandas.Index, callable or None\n            If there is an pandas.Index in the cache, then copying occurs.\n        \"\"\"\n        if axis == 0:\n            return self.copy_index_cache(copy_lengths)\n        else:\n            return self.copy_columns_cache(copy_lengths)\n\n    @property\n    def has_materialized_index(self):\n        \"\"\"\n        Check if dataframe has materialized index cache.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.has_index_cache and self._index_cache.is_materialized\n\n    @property\n    def has_materialized_columns(self):\n        \"\"\"\n        Check if dataframe has materialized columns cache.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.has_columns_cache and self._columns_cache.is_materialized\n\n    def _validate_set_axis(self, new_labels, old_labels):\n        \"\"\"\n        Validate the possibility of replacement of old labels with the new labels.\n\n        Parameters\n        ----------\n        new_labels : list-like\n            The labels to replace with.\n        old_labels : list-like\n            The labels to replace.\n\n        Returns\n        -------\n        list-like\n            The validated labels.\n        \"\"\"\n        new_labels = (\n            ModinIndex(new_labels)\n            if not isinstance(new_labels, ModinIndex)\n            else new_labels\n        )\n        old_len = len(old_labels)\n        new_len = len(new_labels)\n        if old_len != new_len:\n            raise ValueError(\n                f\"Length mismatch: Expected axis has {old_len} elements, \"\n                + f\"new values have {new_len} elements\"\n            )\n        return new_labels\n\n    def _get_index(self):\n        \"\"\"\n        Get the index from the cache object.\n\n        Returns\n        -------\n        pandas.Index\n            An index object containing the row labels.\n        \"\"\"\n        if self.has_index_cache:\n            index, row_lengths = self._index_cache.get(return_lengths=True)\n        else:\n            index, row_lengths = self._compute_axis_labels_and_lengths(0)\n            self.set_index_cache(index)\n        if self._row_lengths_cache is None:\n            self._row_lengths_cache = row_lengths\n        return index\n\n    def _get_columns(self):\n        \"\"\"\n        Get the columns from the cache object.\n\n        Returns\n        -------\n        pandas.Index\n            An index object containing the column labels.\n        \"\"\"\n        if self.has_columns_cache:\n            columns, column_widths = self._columns_cache.get(return_lengths=True)\n        else:\n            columns, column_widths = self._compute_axis_labels_and_lengths(1)\n            self.set_columns_cache(columns)\n        if self._column_widths_cache is None:\n            self._column_widths_cache = column_widths\n        return columns\n\n    def _set_index(self, new_index):\n        \"\"\"\n        Replace the current row labels with new labels.\n\n        Parameters\n        ----------\n        new_index : list-like\n            The new row labels.\n        \"\"\"\n        if self.has_materialized_index:\n            new_index = self._validate_set_axis(new_index, self._index_cache)\n        self.set_index_cache(new_index)\n        self.synchronize_labels(axis=0)\n\n    def _set_columns(self, new_columns):\n        \"\"\"\n        Replace the current column labels with new labels.\n\n        Parameters\n        ----------\n        new_columns : list-like\n           The new column labels.\n        \"\"\"\n        if self.has_materialized_columns:\n            # do not set new columns if they're identical to the previous ones\n            if (\n                isinstance(new_columns, pandas.Index)\n                and self.columns.identical(new_columns)\n            ) or (\n                not isinstance(new_columns, pandas.Index)\n                and np.array_equal(self.columns.values, new_columns)\n            ):\n                return\n            new_columns = self._validate_set_axis(new_columns, self._columns_cache)\n        if isinstance(self._dtypes, ModinDtypes):\n            try:\n                new_dtypes = self._dtypes.set_index(new_columns)\n            except NotImplementedError:\n                # can raise on duplicated labels\n                new_dtypes = None\n        elif isinstance(self._dtypes, pandas.Series):\n            new_dtypes = self.dtypes.set_axis(new_columns)\n        else:\n            new_dtypes = None\n        self.set_columns_cache(new_columns)\n        # we have to set new dtypes cache after columns,\n        # so the 'self.columns' and 'new_dtypes.index' indices would match\n        self.set_dtypes_cache(new_dtypes)\n        self.synchronize_labels(axis=1)\n\n    columns = property(_get_columns, _set_columns)\n    index = property(_get_index, _set_index)\n\n    @property\n    def axes(self):\n        \"\"\"\n        Get index and columns that can be accessed with an `axis` integer.\n\n        Returns\n        -------\n        list\n            List with two values: index and columns.\n        \"\"\"\n        return [self.index, self.columns]\n\n    def get_axis(self, axis: int = 0) -> pandas.Index:\n        \"\"\"\n        Get index object for the requested axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}, default: 0\n\n        Returns\n        -------\n        pandas.Index\n        \"\"\"\n        return self.index if axis == 0 else self.columns\n\n    def _compute_axis_labels_and_lengths(self, axis: int, partitions=None):\n        \"\"\"\n        Compute the labels for specific `axis`.\n\n        Parameters\n        ----------\n        axis : int\n            Axis to compute labels along.\n        partitions : np.ndarray, optional\n            A 2D NumPy array of partitions from which labels will be grabbed.\n            If not specified, partitions will be taken from `self._partitions`.\n\n        Returns\n        -------\n        pandas.Index\n            Labels for the specified `axis`.\n        List of int\n            Size of partitions alongside specified `axis`.\n        \"\"\"\n        if partitions is None:\n            partitions = self._partitions\n        new_index, internal_idx = self._partition_mgr_cls.get_indices(axis, partitions)\n        return new_index, list(map(len, internal_idx))\n\n    def _filter_empties(self, compute_metadata=True):\n        \"\"\"\n        Remove empty partitions from `self._partitions` to avoid triggering excess computation.\n\n        Parameters\n        ----------\n        compute_metadata : bool, default: True\n            Trigger the computations for partition sizes and labels if they're not done already.\n        \"\"\"\n        if not compute_metadata and (\n            self._row_lengths_cache is None or self._column_widths_cache is None\n        ):\n            # do not trigger the computations\n            return\n\n        if (\n            self.has_materialized_index\n            and len(self.index) == 0\n            or self.has_materialized_columns\n            and len(self.columns) == 0\n            or sum(self.row_lengths) == 0\n            or sum(self.column_widths) == 0\n        ):\n            # This is the case for an empty frame. We don't want to completely remove\n            # all metadata and partitions so for the moment, we won't prune if the frame\n            # is empty.\n            # TODO: Handle empty dataframes better\n            return\n        self._partitions = np.array(\n            [\n                [\n                    self._partitions[i][j]\n                    for j in range(len(self._partitions[i]))\n                    if j < len(self.column_widths) and self.column_widths[j] != 0\n                ]\n                for i in range(len(self._partitions))\n                if i < len(self.row_lengths) and self.row_lengths[i] != 0\n            ]\n        )\n        new_col_widths = [w for w in self.column_widths if w != 0]\n        new_row_lengths = [r for r in self.row_lengths if r != 0]\n\n        # check whether an axis partitioning was modified and if we should reset the lengths id for 'ModinIndex'\n        if new_col_widths != self.column_widths:\n            self.set_columns_cache(self.copy_columns_cache(copy_lengths=False))\n        if new_row_lengths != self.row_lengths:\n            self.set_index_cache(self.copy_index_cache(copy_lengths=False))\n\n        self._column_widths_cache = new_col_widths\n        self._row_lengths_cache = new_row_lengths\n\n    def synchronize_labels(self, axis=None):\n        \"\"\"\n        Set the deferred axes variables for the ``PandasDataframe``.\n\n        Parameters\n        ----------\n        axis : int, optional\n            The deferred axis.\n            0 for the index, 1 for the columns.\n        \"\"\"\n        if axis is None:\n            self._deferred_index = True\n            self._deferred_column = True\n        elif axis == 0:\n            self._deferred_index = True\n        else:\n            self._deferred_column = True\n\n    def _propagate_index_objs(self, axis=None) -> None:\n        \"\"\"\n        Synchronize labels by applying the index object for specific `axis` to the `self._partitions` lazily.\n\n        Adds `set_axis` function to call-queue of each partition from `self._partitions`\n        to apply new axis.\n\n        Parameters\n        ----------\n        axis : int, optional\n            The axis to apply to. If it's None applies to both axes.\n        \"\"\"\n        self._filter_empties(compute_metadata=False)\n        if axis is None or axis == 0:\n            cum_row_lengths = np.cumsum([0] + self.row_lengths)\n        if axis is None or axis == 1:\n            cum_col_widths = np.cumsum([0] + self.column_widths)\n\n        if axis is None:\n\n            def apply_idx_objs(df, idx, cols):\n                # We should make at least one copy to avoid the data modification problem\n                # that may arise when sharing buffers from distributed storage\n                # (zero-copy pickling).\n                return df.set_axis(idx, axis=\"index\").set_axis(\n                    cols, axis=\"columns\", copy=False\n                )\n\n            self._partitions = np.array(\n                [\n                    [\n                        self._partitions[i][j].add_to_apply_calls(\n                            apply_idx_objs,\n                            idx=self.index[\n                                slice(cum_row_lengths[i], cum_row_lengths[i + 1])\n                            ],\n                            cols=self.columns[\n                                slice(cum_col_widths[j], cum_col_widths[j + 1])\n                            ],\n                            length=self.row_lengths[i],\n                            width=self.column_widths[j],\n                        )\n                        for j in range(len(self._partitions[i]))\n                    ]\n                    for i in range(len(self._partitions))\n                ]\n            )\n            self._deferred_index = False\n            self._deferred_column = False\n        elif axis == 0:\n\n            def apply_idx_objs(df, idx):\n                return df.set_axis(idx, axis=\"index\")\n\n            self._partitions = np.array(\n                [\n                    [\n                        self._partitions[i][j].add_to_apply_calls(\n                            apply_idx_objs,\n                            idx=self.index[\n                                slice(cum_row_lengths[i], cum_row_lengths[i + 1])\n                            ],\n                            length=self.row_lengths[i],\n                            width=(\n                                self.column_widths[j]\n                                if self._column_widths_cache is not None\n                                else None\n                            ),\n                        )\n                        for j in range(len(self._partitions[i]))\n                    ]\n                    for i in range(len(self._partitions))\n                ]\n            )\n            self._deferred_index = False\n        elif axis == 1:\n\n            def apply_idx_objs(df, cols):\n                return df.set_axis(cols, axis=\"columns\")\n\n            self._partitions = np.array(\n                [\n                    [\n                        self._partitions[i][j].add_to_apply_calls(\n                            apply_idx_objs,\n                            cols=self.columns[\n                                slice(cum_col_widths[j], cum_col_widths[j + 1])\n                            ],\n                            length=(\n                                self.row_lengths[i]\n                                if self._row_lengths_cache is not None\n                                else None\n                            ),\n                            width=self.column_widths[j],\n                        )\n                        for j in range(len(self._partitions[i]))\n                    ]\n                    for i in range(len(self._partitions))\n                ]\n            )\n            self._deferred_column = False\n        else:\n            ErrorMessage.catch_bugs_and_request_email(\n                axis is not None and axis not in [0, 1]\n            )\n\n    @lazy_metadata_decorator(apply_axis=None)\n    def take_2d_labels_or_positional(\n        self,\n        row_labels: Optional[List[Hashable]] = None,\n        row_positions: Optional[List[int]] = None,\n        col_labels: Optional[List[Hashable]] = None,\n        col_positions: Optional[List[int]] = None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Lazily select columns or rows from given indices.\n\n        Parameters\n        ----------\n        row_labels : list of hashable, optional\n            The row labels to extract.\n        row_positions : list-like of ints, optional\n            The row positions to extract.\n        col_labels : list of hashable, optional\n            The column labels to extract.\n        col_positions : list-like of ints, optional\n            The column positions to extract.\n\n        Returns\n        -------\n        PandasDataframe\n             A new PandasDataframe from the mask provided.\n\n        Notes\n        -----\n        If both `row_labels` and `row_positions` are provided, a ValueError is raised.\n        The same rule applies for `col_labels` and `col_positions`.\n        \"\"\"\n        if check_both_not_none(row_labels, row_positions):\n            raise ValueError(\n                \"Both row_labels and row_positions were provided - \"\n                + \"please provide only one of row_labels and row_positions.\"\n            )\n        if check_both_not_none(col_labels, col_positions):\n            raise ValueError(\n                \"Both col_labels and col_positions were provided - \"\n                + \"please provide only one of col_labels and col_positions.\"\n            )\n\n        if row_labels is not None:\n            # Get numpy array of positions of values from `row_labels`\n            if isinstance(self.index, pandas.MultiIndex):\n                row_positions = np.zeros(len(row_labels), dtype=\"int64\")\n                # we can't use .get_locs(row_labels) because the function\n                # requires a different format for row_labels\n                for idx, label in enumerate(row_labels):\n                    if isinstance(label, str):\n                        label = [label]\n                    # get_loc can return slice that _take_2d_positional can't handle\n                    row_positions[idx] = self.index.get_locs(label)[0]\n            else:\n                row_positions = self.index.get_indexer_for(row_labels)\n\n        if col_labels is not None:\n            # Get numpy array of positions of values from `col_labels`\n            if isinstance(self.columns, pandas.MultiIndex):\n                col_positions = np.zeros(len(col_labels), dtype=\"int64\")\n                # we can't use .get_locs(col_labels) because the function\n                # requires a different format for row_labels\n                for idx, label in enumerate(col_labels):\n                    if isinstance(label, str):\n                        label = [label]\n                    # get_loc can return slice that _take_2d_positional can't handle\n                    col_positions[idx] = self.columns.get_locs(label)[0]\n            else:\n                col_positions = self.columns.get_indexer_for(col_labels)\n\n        return self._take_2d_positional(row_positions, col_positions)\n\n    def _get_sorted_positions(self, positions):\n        \"\"\"\n        Sort positions if necessary.\n\n        Parameters\n        ----------\n        positions : Sequence[int]\n\n        Returns\n        -------\n        Sequence[int]\n        \"\"\"\n        # Helper for take_2d_positional\n        if is_range_like(positions) and positions.step > 0:\n            sorted_positions = positions\n        else:\n            sorted_positions = np.sort(positions)\n        return sorted_positions\n\n    def _get_new_lengths(self, partitions_dict, *, axis: int) -> List[int]:\n        \"\"\"\n        Find lengths of new partitions.\n\n        Parameters\n        ----------\n        partitions_dict : dict\n        axis : int\n\n        Returns\n        -------\n        list[int]\n        \"\"\"\n        # Helper for take_2d_positional\n        if axis == 0:\n            axis_lengths = self.row_lengths\n        else:\n            axis_lengths = self.column_widths\n\n        new_lengths = [\n            len(\n                # Row lengths for slice are calculated as the length of the slice\n                # on the partition. Often this will be the same length as the current\n                # length, but sometimes it is different, thus the extra calculation.\n                range(*part_indexer.indices(axis_lengths[part_idx]))\n                if isinstance(part_indexer, slice)\n                else part_indexer\n            )\n            for part_idx, part_indexer in partitions_dict.items()\n        ]\n        return new_lengths\n\n    def _get_new_index_obj(\n        self, positions, sorted_positions, axis: int\n    ) -> tuple[pandas.Index, slice | npt.NDArray[np.intp]]:\n        \"\"\"\n        Find the new Index object for take_2d_positional result.\n\n        Parameters\n        ----------\n        positions : Sequence[int]\n        sorted_positions : Sequence[int]\n        axis : int\n\n        Returns\n        -------\n        pandas.Index\n        slice or Sequence[int]\n        \"\"\"\n        # Helper for take_2d_positional\n        # Use the slice to calculate the new columns\n        if axis == 0:\n            idx = self.index\n        else:\n            idx = self.columns\n\n        # TODO: Support fast processing of negative-step ranges\n        if is_range_like(positions) and positions.step > 0:\n            # pandas Index is more likely to preserve its metadata if the indexer\n            #  is slice\n            monotonic_idx = slice(positions.start, positions.stop, positions.step)\n        else:\n            monotonic_idx = np.asarray(sorted_positions, dtype=np.intp)\n\n        new_idx = idx[monotonic_idx]\n        return new_idx, monotonic_idx\n\n    def _take_2d_positional(\n        self,\n        row_positions: Optional[List[int]] = None,\n        col_positions: Optional[List[int]] = None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Lazily select columns or rows from given indices.\n\n        Parameters\n        ----------\n        row_positions : list-like of ints, optional\n            The row positions to extract.\n        col_positions : list-like of ints, optional\n            The column positions to extract.\n\n        Returns\n        -------\n        PandasDataframe\n             A new PandasDataframe from the mask provided.\n        \"\"\"\n        indexers = []\n        for axis, indexer in enumerate((row_positions, col_positions)):\n            if is_range_like(indexer):\n                if indexer.step == 1 and len(indexer) == len(self.get_axis(axis)):\n                    # By this function semantics, `None` indexer is a full-axis access\n                    indexer = None\n                elif indexer is not None and not isinstance(indexer, pandas.RangeIndex):\n                    # Pure python's range is not fully compatible with a list of ints,\n                    # converting it to ``pandas.RangeIndex``` that is compatible.\n                    indexer = pandas.RangeIndex(\n                        indexer.start, indexer.stop, indexer.step\n                    )\n            else:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=not (indexer is None or is_list_like(indexer)),\n                    extra_log=\"Mask takes only list-like numeric indexers, \"\n                    + f\"received: {type(indexer)}\",\n                )\n                if isinstance(indexer, list):\n                    indexer = np.array(indexer, dtype=np.int64)\n            indexers.append(indexer)\n        row_positions, col_positions = indexers\n\n        if col_positions is None and row_positions is None:\n            return self.copy()\n\n        # quite fast check that allows skip sorting\n        must_sort_row_pos = row_positions is not None and not np.all(\n            row_positions[1:] >= row_positions[:-1]\n        )\n        must_sort_col_pos = col_positions is not None and not np.all(\n            col_positions[1:] >= col_positions[:-1]\n        )\n\n        if col_positions is None and row_positions is not None:\n            # Check if the optimization that first takes part of the data using the mask\n            # operation so that later less data is concatenated into a whole column is useful.\n            # In the case when only a small portion of the data is discarded, the overhead of the\n            # engine (for putting data in and out of storage) can exceed the resulting speedup.\n            all_rows = None\n            if self.has_materialized_index:\n                all_rows = len(self.index)\n            elif self._row_lengths_cache or must_sort_row_pos:\n                all_rows = sum(self.row_lengths)\n\n            # 'base_num_cols' specifies the number of columns that the dataframe should have\n            # in order to jump to 'reordered_labels' in case of len(row_positions) / len(self) >= base_ratio;\n            # these variables may be a subject to change in order to tune performance more accurately\n            base_num_cols = 10\n            base_ratio = 0.2\n            # Example:\n            #   len(self.columns): 10 == base_num_cols -> min ratio to jump to reorder_labels: 0.2 == base_ratio\n            #   len(self.columns): 15 -> min ratio to jump to reorder_labels: 0.3\n            #   len(self.columns): 20 -> min ratio to jump to reorder_labels: 0.4\n            #   ...\n            #   len(self.columns): 49 -> min ratio to jump to reorder_labels: 0.98\n            #   len(self.columns): 50 -> min ratio to jump to reorder_labels: 1.0\n            #   len(self.columns): 55 -> min ratio to jump to reorder_labels: 1.0\n            #   ...\n            if (all_rows and len(row_positions) > 0.9 * all_rows) or (\n                must_sort_row_pos\n                and len(row_positions) * base_num_cols\n                >= min(\n                    all_rows * len(self.columns) * base_ratio,\n                    len(row_positions) * base_num_cols,\n                )\n            ):\n                return self._reorder_labels(\n                    row_positions=row_positions, col_positions=col_positions\n                )\n        sorted_row_positions = sorted_col_positions = None\n        if row_positions is not None:\n            if must_sort_row_pos:\n                sorted_row_positions = self._get_sorted_positions(row_positions)\n            else:\n                sorted_row_positions = row_positions\n            # Get dict of row_parts as {row_index: row_internal_indices}\n            row_partitions_dict = self._get_dict_of_block_index(\n                0, sorted_row_positions, are_indices_sorted=True\n            )\n            new_row_lengths = self._get_new_lengths(row_partitions_dict, axis=0)\n            new_index, _ = self._get_new_index_obj(\n                row_positions, sorted_row_positions, axis=0\n            )\n        else:\n            row_partitions_dict = {i: slice(None) for i in range(len(self._partitions))}\n            new_row_lengths = self._row_lengths_cache\n            new_index = self.copy_index_cache(copy_lengths=True)\n\n        if col_positions is not None:\n            if must_sort_col_pos:\n                sorted_col_positions = self._get_sorted_positions(col_positions)\n            else:\n                sorted_col_positions = col_positions\n            # Get dict of col_parts as {col_index: col_internal_indices}\n            col_partitions_dict = self._get_dict_of_block_index(\n                1, sorted_col_positions, are_indices_sorted=True\n            )\n            new_col_widths = self._get_new_lengths(col_partitions_dict, axis=1)\n            new_columns, monotonic_col_idx = self._get_new_index_obj(\n                col_positions, sorted_col_positions, axis=1\n            )\n\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=sum(new_col_widths) != len(new_columns),\n                extra_log=f\"{sum(new_col_widths)} != {len(new_columns)}.\\n\"\n                + f\"{col_positions}\\n{self.column_widths}\\n{col_partitions_dict}\",\n            )\n\n            if self.has_materialized_dtypes:\n                new_dtypes = self.dtypes.iloc[monotonic_col_idx]\n            elif isinstance(self._dtypes, ModinDtypes):\n                try:\n                    supported_monotonic_col_idx = monotonic_col_idx\n                    if isinstance(monotonic_col_idx, slice):\n                        supported_monotonic_col_idx = pandas.RangeIndex(\n                            monotonic_col_idx.start,\n                            monotonic_col_idx.stop,\n                            monotonic_col_idx.step,\n                        ).to_list()\n                    new_dtypes = self._dtypes.lazy_get(\n                        supported_monotonic_col_idx, numeric_index=True\n                    )\n                # can raise either on missing cache or on duplicated labels\n                except (ValueError, NotImplementedError):\n                    new_dtypes = None\n            else:\n                new_dtypes = None\n        else:\n            col_partitions_dict = {\n                i: slice(None) for i in range(len(self._partitions.T))\n            }\n            new_col_widths = self._column_widths_cache\n            new_columns = self.copy_columns_cache(copy_lengths=True)\n            new_dtypes = self.copy_dtypes_cache()\n\n        new_partitions = np.array(\n            [\n                [\n                    self._partitions[row_idx][col_idx].mask(\n                        row_internal_indices, col_internal_indices\n                    )\n                    for col_idx, col_internal_indices in col_partitions_dict.items()\n                ]\n                for row_idx, row_internal_indices in row_partitions_dict.items()\n            ]\n        )\n        intermediate = self.__constructor__(\n            new_partitions,\n            new_index,\n            new_columns,\n            new_row_lengths,\n            new_col_widths,\n            new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n        return self._maybe_reorder_labels(\n            intermediate,\n            row_positions,\n            col_positions,\n        )\n\n    def _maybe_reorder_labels(\n        self,\n        intermediate: PandasDataframe,\n        row_positions,\n        col_positions,\n    ) -> PandasDataframe:\n        \"\"\"\n        Call re-order labels on take_2d_labels_or_positional result if necessary.\n\n        Parameters\n        ----------\n        intermediate : PandasDataFrame\n        row_positions : list-like of ints, optional\n            The row positions to extract.\n        col_positions : list-like of ints, optional\n            The column positions to extract.\n\n        Returns\n        -------\n        PandasDataframe\n        \"\"\"\n        # Check if monotonically increasing, return if it is. Fast track code path for\n        # common case to keep it fast.\n        if (\n            row_positions is None\n            # Fast range processing of non-positive-step ranges is not yet supported\n            or (is_range_like(row_positions) and row_positions.step > 0)\n            or len(row_positions) == 1\n            or np.all(row_positions[1:] >= row_positions[:-1])\n        ) and (\n            col_positions is None\n            # Fast range processing of non-positive-step ranges is not yet supported\n            or (is_range_like(col_positions) and col_positions.step > 0)\n            or len(col_positions) == 1\n            or np.all(col_positions[1:] >= col_positions[:-1])\n        ):\n            return intermediate\n\n        # The new labels are often smaller than the old labels, so we can't reuse the\n        # original order values because those were mapped to the original data. We have\n        # to reorder here based on the expected order from within the data.\n        # To do so, we \"unsort\" the indices by using np.argsort() twice, as inspired by\n        # https://stackoverflow.com/questions/2483696/undo-or-reverse-argsort-python,\n        # meaning that `new_row_order` must be so `np.sort(row_positions)[new_row_order] == row_positions`\n        # This is achieved by first calculating the indices which would sort `row_positions`,\n        # and then by calculating new indices that would sort \"sorting indices\" themselves.\n        # First argsort brings us to the proper \"index space\" (according to smaller labels count),\n        # and the second re-orders them to match the original data.\n        new_row_order, new_col_order = None, None\n        if is_range_like(row_positions):\n            if row_positions.step < 0:\n                # do not need to re-order positive-step-ranges\n                new_row_order = pandas.RangeIndex(len(row_positions) - 1, -1, -1)\n        elif row_positions is not None:\n            new_row_order = np.argsort(\n                np.argsort(np.asarray(row_positions, dtype=np.intp))\n            )\n        if is_range_like(col_positions):\n            if col_positions.step < 0:\n                new_col_order = pandas.RangeIndex(len(col_positions) - 1, -1, -1)\n        elif col_positions is not None:\n            new_col_order = np.argsort(\n                np.argsort(np.asarray(col_positions, dtype=np.intp))\n            )\n        return intermediate._reorder_labels(\n            row_positions=new_row_order, col_positions=new_col_order\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"rows\")\n    def from_labels(self) -> PandasDataframe:\n        \"\"\"\n        Convert the row labels to a column of data, inserted at the first position.\n\n        Gives result by similar way as `pandas.DataFrame.reset_index`. Each level\n        of `self.index` will be added as separate column of data.\n\n        Returns\n        -------\n        PandasDataframe\n            A PandasDataframe with new columns from index labels.\n        \"\"\"\n        new_row_labels = pandas.RangeIndex(len(self.index))\n        if self.index.nlevels > 1:\n            level_names = [\n                (\n                    self.index.names[i]\n                    if self.index.names[i] is not None\n                    else \"level_{}\".format(i)\n                )\n                for i in range(self.index.nlevels)\n            ]\n        else:\n            level_names = [\n                (\n                    self.index.names[0]\n                    if self.index.names[0] is not None\n                    else (\n                        \"index\" if \"index\" not in self.columns else \"level_{}\".format(0)\n                    )\n                )\n            ]\n        names = tuple(level_names) if len(level_names) > 1 else level_names[0]\n        new_dtypes = self.index.to_frame(name=names).dtypes\n        try:\n            new_dtypes = ModinDtypes.concat([new_dtypes, self._dtypes])\n        except NotImplementedError:\n            # can raise on duplicated labels\n            new_dtypes = None\n\n        # We will also use the `new_column_names` in the calculation of the internal metadata, so this is a\n        # lightweight way of ensuring the metadata matches.\n        if self.columns.nlevels > 1:\n            # Column labels are different for multilevel index.\n            new_column_names = pandas.MultiIndex.from_tuples(\n                # Set level names on the 1st columns level and fill up empty level names with empty string.\n                # Expand tuples in level names. This is how reset_index works when col_level col_fill are not specified.\n                [\n                    tuple(\n                        list(level) + [\"\"] * (self.columns.nlevels - len(level))\n                        if isinstance(level, tuple)\n                        else [level] + [\"\"] * (self.columns.nlevels - 1)\n                    )\n                    for level in level_names\n                ],\n                names=self.columns.names,\n            )\n        else:\n            new_column_names = pandas.Index(level_names, tupleize_cols=False)\n        new_columns = new_column_names.append(self.columns)\n\n        def from_labels_executor(\n            df: pandas.DataFrame, **kwargs\n        ) -> pandas.DataFrame:  # pragma: no cover\n            # Setting the names here ensures that external and internal metadata always match.\n            df.index.names = new_column_names\n\n            # Handling of a case when columns have the same name as one of index levels names.\n            # In this case `df.reset_index` provides errors related to columns duplication.\n            # This case is possible because columns metadata updating is deferred. To workaround\n            # `df.reset_index` error we allow columns duplication in \"if\" branch via `concat`.\n            if any(name_level in df.columns for name_level in df.index.names):\n                columns_to_add = df.index.to_frame()\n                columns_to_add.reset_index(drop=True, inplace=True)\n                df = df.reset_index(drop=True)\n                result = pandas.concat([columns_to_add, df], axis=1, copy=False)\n            else:\n                result = df.reset_index()\n            # Put the index back to the original due to GH#4394\n            result.index = df.index\n            return result\n\n        new_parts = self._partition_mgr_cls.apply_func_to_select_indices(\n            0,\n            self._partitions,\n            from_labels_executor,\n            [0],\n            keep_remaining=True,\n        )\n        new_column_widths = [\n            self.index.nlevels + self.column_widths[0]\n        ] + self.column_widths[1:]\n        result = self.__constructor__(\n            new_parts,\n            new_row_labels,\n            new_columns,\n            row_lengths=self._row_lengths_cache,\n            column_widths=new_column_widths,\n            dtypes=new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n        # Set flag for propagating deferred row labels across dataframe partitions\n        result.synchronize_labels(axis=0)\n        return result\n\n    def to_labels(self, column_list: List[Hashable]) -> PandasDataframe:\n        \"\"\"\n        Move one or more columns into the row labels. Previous labels are dropped.\n\n        Parameters\n        ----------\n        column_list : list of hashable\n            The list of column names to place as the new row labels.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe that has the updated labels.\n        \"\"\"\n        extracted_columns = self.take_2d_labels_or_positional(\n            col_labels=column_list\n        ).to_pandas()\n\n        if len(column_list) == 1:\n            new_labels = pandas.Index(\n                extracted_columns.squeeze(axis=1), name=column_list[0]\n            )\n        else:\n            new_labels = pandas.MultiIndex.from_frame(\n                extracted_columns, names=column_list\n            )\n        result = self.take_2d_labels_or_positional(\n            col_labels=[i for i in self.columns if i not in extracted_columns.columns]\n        )\n        result.index = new_labels\n        return result\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def _reorder_labels(self, row_positions=None, col_positions=None):\n        \"\"\"\n        Reorder the column and or rows in this DataFrame.\n\n        Parameters\n        ----------\n        row_positions : list of int, optional\n            The ordered list of new row orders such that each position within the list\n            indicates the new position.\n        col_positions : list of int, optional\n            The ordered list of new column orders such that each position within the\n            list indicates the new position.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe with reordered columns and/or rows.\n        \"\"\"\n        new_dtypes = self.copy_dtypes_cache()\n        if row_positions is not None:\n            # We want to preserve the frame's partitioning so passing in ``keep_partitioning=True``\n            # in order to use the cached `row_lengths` values for the new frame.\n            # If the frame's is re-partitioned using the \"standard\" partitioning,\n            # then knowing that, we can compute new row lengths.\n            ordered_rows = self._partition_mgr_cls.map_axis_partitions(\n                0,\n                self._partitions,\n                lambda df: df.iloc[row_positions],\n                keep_partitioning=True,\n            )\n            row_idx = self.index[row_positions]\n\n            if len(row_idx) != len(self.index):\n                # The frame was re-partitioned along the 0 axis during reordering using\n                # the \"standard\" partitioning. Knowing the standard partitioning scheme\n                # we are able to compute new row lengths.\n                new_lengths = get_length_list(\n                    axis_len=len(row_idx),\n                    num_splits=ordered_rows.shape[0],\n                    min_block_size=MinRowPartitionSize.get(),\n                )\n            else:\n                # If the frame's partitioning was preserved then\n                # we can use previous row lengths cache\n                new_lengths = self._row_lengths_cache\n        else:\n            ordered_rows = self._partitions\n            row_idx = self.copy_index_cache(copy_lengths=True)\n            new_lengths = self._row_lengths_cache\n        if col_positions is not None:\n            # We want to preserve the frame's partitioning so passing in ``keep_partitioning=True``\n            # in order to use the cached `column_widths` values for the new frame.\n            # If the frame's is re-partitioned using the \"standard\" partitioning,\n            # then knowing that, we can compute new column widths.\n            ordered_cols = self._partition_mgr_cls.map_axis_partitions(\n                1,\n                ordered_rows,\n                lambda df: df.iloc[:, col_positions],\n                keep_partitioning=True,\n            )\n            col_idx = self.columns[col_positions]\n            if self.has_materialized_dtypes:\n                new_dtypes = self.dtypes.iloc[col_positions]\n            elif isinstance(self._dtypes, ModinDtypes):\n                try:\n                    new_dtypes = self._dtypes.lazy_get(col_idx)\n                # can raise on duplicated labels\n                except NotImplementedError:\n                    new_dtypes = None\n\n            if len(col_idx) != len(self.columns):\n                # The frame was re-partitioned along the 1 axis during reordering using\n                # the \"standard\" partitioning. Knowing the standard partitioning scheme\n                # we are able to compute new column widths.\n                new_widths = get_length_list(\n                    axis_len=len(col_idx),\n                    num_splits=ordered_cols.shape[1],\n                    min_block_size=MinColumnPartitionSize.get(),\n                )\n            else:\n                # If the frame's partitioning was preserved then\n                # we can use previous column widths cache\n                new_widths = self._column_widths_cache\n        else:\n            ordered_cols = ordered_rows\n            col_idx = self.copy_columns_cache(copy_lengths=True)\n            new_widths = self._column_widths_cache\n        return self.__constructor__(\n            ordered_cols,\n            row_idx,\n            col_idx,\n            new_lengths,\n            new_widths,\n            new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    @lazy_metadata_decorator(apply_axis=None)\n    def copy(self):\n        \"\"\"\n        Copy this object.\n\n        Returns\n        -------\n        PandasDataframe\n            A copied version of this object.\n        \"\"\"\n        return self.__constructor__(\n            self._partitions,\n            self.copy_index_cache(copy_lengths=True),\n            self.copy_columns_cache(copy_lengths=True),\n            self._row_lengths_cache,\n            self._column_widths_cache,\n            self.copy_dtypes_cache(),\n            pandas_backend=self._pandas_backend,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def astype(self, col_dtypes, errors: str = \"raise\"):\n        \"\"\"\n        Convert the columns dtypes to given dtypes.\n\n        Parameters\n        ----------\n        col_dtypes : dictionary of {col: dtype,...} or str\n            Where col is the column name and dtype is a NumPy dtype.\n        errors : {'raise', 'ignore'}, default: 'raise'\n            Control raising of exceptions on invalid data for provided dtype.\n\n        Returns\n        -------\n        BaseDataFrame\n            Dataframe with updated dtypes.\n        \"\"\"\n        new_dtypes = None\n        self_dtypes = self.dtypes\n        # When casting to \"category\" we have to make up the whole axis partition\n        # to get the properly encoded table of categories. Every block partition\n        # will store the encoded table. That can lead to higher memory footprint.\n        # TODO: Revisit if this hurts users.\n        use_full_axis_cast = False\n        if isinstance(col_dtypes, dict):\n            for column, dtype in col_dtypes.items():\n                if not is_dtype_equal(dtype, self_dtypes[column]):\n                    if new_dtypes is None:\n                        new_dtypes = self_dtypes.copy()\n                    # Update the new dtype series to the proper pandas dtype\n                    new_dtype = pandas.api.types.pandas_dtype(dtype)\n                    if self.engine == \"Dask\" and hasattr(dtype, \"_is_materialized\"):\n                        # FIXME: https://github.com/dask/distributed/issues/8585\n                        _ = dtype._materialize_categories()\n\n                    # We cannot infer without computing the dtype if new dtype is categorical\n                    if isinstance(new_dtype, pandas.CategoricalDtype):\n                        new_dtypes[column] = LazyProxyCategoricalDtype._build_proxy(\n                            # Actual parent will substitute `None` at `.set_dtypes_cache`\n                            parent=None,\n                            column_name=column,\n                            materializer=lambda parent, column: parent._compute_dtypes(\n                                columns=[column]\n                            )[column],\n                        )\n                        use_full_axis_cast = True\n                    else:\n                        new_dtypes[column] = new_dtype\n\n            def astype_builder(df):\n                \"\"\"Compute new partition frame with dtypes updated.\"\"\"\n                return df.astype(\n                    {k: v for k, v in col_dtypes.items() if k in df}, errors=errors\n                )\n\n        else:\n            # Assume that the dtype is a scalar.\n            if not (self_dtypes == col_dtypes).all():\n                new_dtypes = self_dtypes.copy()\n                new_dtype = pandas.api.types.pandas_dtype(col_dtypes)\n                if self.engine == \"Dask\" and hasattr(new_dtype, \"_is_materialized\"):\n                    # FIXME: https://github.com/dask/distributed/issues/8585\n                    _ = new_dtype._materialize_categories()\n                if isinstance(new_dtype, pandas.CategoricalDtype):\n                    new_dtypes[:] = new_dtypes.to_frame().apply(\n                        lambda column: LazyProxyCategoricalDtype._build_proxy(\n                            # Actual parent will substitute `None` at `.set_dtypes_cache`\n                            parent=None,\n                            column_name=column.index[0],\n                            materializer=lambda parent, column: parent._compute_dtypes(\n                                columns=[column]\n                            )[column],\n                        )\n                    )[0]\n                    use_full_axis_cast = True\n                else:\n                    new_dtypes[:] = new_dtype\n\n            def astype_builder(df):\n                \"\"\"Compute new partition frame with dtypes updated.\"\"\"\n                return df.astype(col_dtypes, errors=errors)\n\n        if new_dtypes is None:\n            return self.copy()\n        if use_full_axis_cast:\n            new_frame = self._partition_mgr_cls.map_axis_partitions(\n                0, self._partitions, astype_builder, keep_partitioning=True\n            )\n        else:\n            new_frame = self._partition_mgr_cls.lazy_map_partitions(\n                self._partitions, astype_builder\n            )\n        return self.__constructor__(\n            new_frame,\n            self.copy_index_cache(copy_lengths=True),\n            self.copy_columns_cache(copy_lengths=True),\n            self._row_lengths_cache,\n            self._column_widths_cache,\n            new_dtypes,\n            pandas_backend=get_pandas_backend(new_dtypes),\n        )\n\n    def numeric_columns(self, include_bool=True):\n        \"\"\"\n        Return the names of numeric columns in the frame.\n\n        Parameters\n        ----------\n        include_bool : bool, default: True\n            Whether to consider boolean columns as numeric.\n\n        Returns\n        -------\n        list\n            List of column names.\n        \"\"\"\n        columns = []\n        for col, dtype in zip(self.columns, self.dtypes):\n            if is_numeric_dtype(dtype) and (\n                include_bool or (not include_bool and dtype != np.bool_)\n            ):\n                columns.append(col)\n        return columns\n\n    def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):\n        \"\"\"\n        Convert indices to an ordered dict mapping partition (or block) index to internal indices in said partition.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis along which to get the indices (0 - rows, 1 - columns).\n        indices : list of int, slice\n            A list of global indices to convert.\n        are_indices_sorted : bool, default: False\n            Flag indicating whether the `indices` sequence is sorted by ascending or not.\n            Note: the internal algorithm requires for the `indices` to be sorted, this\n            flag is used for optimization in order to not sort already sorted data.\n            Be careful when passing ``True`` for this flag, if the data appears to be unsorted\n            with the flag set to ``True`` this would lead to undefined behavior.\n\n        Returns\n        -------\n        dict\n            A mapping from partition index to list of internal indices which correspond to `indices` in each\n            partition.\n        \"\"\"\n        # TODO: Support handling of slices with specified 'step'. For now, converting them into a range\n        if isinstance(indices, slice) and (\n            indices.step is not None and indices.step != 1\n        ):\n            indices = range(*indices.indices(len(self.get_axis(axis))))\n        # Fasttrack slices\n        if isinstance(indices, slice) or (is_range_like(indices) and indices.step == 1):\n            # Converting range-like indexer to slice\n            indices = slice(indices.start, indices.stop, indices.step)\n            if is_full_grab_slice(indices, sequence_len=len(self.get_axis(axis))):\n                return dict(\n                    zip(\n                        range(self._partitions.shape[axis]),\n                        [slice(None)] * self._partitions.shape[axis],\n                    )\n                )\n            # Empty selection case\n            if indices.start == indices.stop and indices.start is not None:\n                return dict()\n            if indices.start is None or indices.start == 0:\n                last_part, last_idx = list(\n                    self._get_dict_of_block_index(axis, [indices.stop]).items()\n                )[0]\n                dict_of_slices = dict(zip(range(last_part), [slice(None)] * last_part))\n                dict_of_slices.update({last_part: slice(last_idx[0])})\n                return dict_of_slices\n            elif indices.stop is None or indices.stop >= len(self.get_axis(axis)):\n                first_part, first_idx = list(\n                    self._get_dict_of_block_index(axis, [indices.start]).items()\n                )[0]\n                dict_of_slices = dict({first_part: slice(first_idx[0], None)})\n                num_partitions = np.size(self._partitions, axis=axis)\n                part_list = range(first_part + 1, num_partitions)\n                dict_of_slices.update(\n                    dict(zip(part_list, [slice(None)] * len(part_list)))\n                )\n                return dict_of_slices\n            else:\n                first_part, first_idx = list(\n                    self._get_dict_of_block_index(axis, [indices.start]).items()\n                )[0]\n                last_part, last_idx = list(\n                    self._get_dict_of_block_index(axis, [indices.stop]).items()\n                )[0]\n                if first_part == last_part:\n                    return dict({first_part: slice(first_idx[0], last_idx[0])})\n                else:\n                    if last_part - first_part == 1:\n                        return dict(\n                            # FIXME: this dictionary creation feels wrong - it might not maintain the order\n                            {\n                                first_part: slice(first_idx[0], None),\n                                last_part: slice(None, last_idx[0]),\n                            }\n                        )\n                    else:\n                        dict_of_slices = dict({first_part: slice(first_idx[0], None)})\n                        part_list = range(first_part + 1, last_part)\n                        dict_of_slices.update(\n                            dict(zip(part_list, [slice(None)] * len(part_list)))\n                        )\n                        dict_of_slices.update({last_part: slice(None, last_idx[0])})\n                        return dict_of_slices\n        if isinstance(indices, list):\n            # Converting python list to numpy for faster processing\n            indices = np.array(indices, dtype=np.int64)\n        # Fasttrack empty numpy array\n        if isinstance(indices, np.ndarray) and indices.size == 0:\n            # This will help preserve metadata stored in empty dataframes (indexes and dtypes)\n            # Otherwise, we will get an empty `new_partitions` array, from which it will\n            #  no longer be possible to obtain metadata\n            return dict([(0, np.array([], dtype=np.int64))])\n        negative_mask = np.less(indices, 0)\n        has_negative = np.any(negative_mask)\n        if has_negative:\n            # We're going to modify 'indices' inplace in a numpy way, so doing a copy/converting indices to numpy.\n            indices = (\n                indices.copy()\n                if isinstance(indices, np.ndarray)\n                else np.array(indices, dtype=np.int64)\n            )\n            indices[negative_mask] = indices[negative_mask] % len(self.get_axis(axis))\n        # If the `indices` array was modified because of the negative indices conversion\n        # then the original order was broken and so we have to sort anyway:\n        if has_negative or not are_indices_sorted:\n            indices = np.sort(indices)\n        if axis == 0:\n            bins = np.array(self.row_lengths)\n        else:\n            bins = np.array(self.column_widths)\n        # INT_MAX to make sure we don't try to compute on partitions that don't exist.\n        cumulative = np.append(bins[:-1].cumsum(), np.iinfo(bins.dtype).max)\n\n        def internal(block_idx: int, global_index):\n            \"\"\"Transform global index to internal one for given block (identified by its index).\"\"\"\n            return (\n                global_index\n                if not block_idx\n                else np.subtract(\n                    global_index, cumulative[min(block_idx, len(cumulative) - 1) - 1]\n                )\n            )\n\n        partition_ids = np.digitize(indices, cumulative)\n        count_for_each_partition = np.array(\n            [(partition_ids == i).sum() for i in range(len(cumulative))]\n        ).cumsum()\n        # Compute the internal indices and pair those with the partition index.\n        # If the first partition has any values we need to return, compute those\n        # first to make the list comprehension easier. Otherwise, just append the\n        # rest of the values to an empty list.\n        if count_for_each_partition[0] > 0:\n            first_partition_indices = [\n                (0, internal(0, indices[slice(count_for_each_partition[0])]))\n            ]\n        else:\n            first_partition_indices = []\n        partition_ids_with_indices = first_partition_indices + [\n            (\n                i,\n                internal(\n                    i,\n                    indices[\n                        slice(\n                            count_for_each_partition[i - 1],\n                            count_for_each_partition[i],\n                        )\n                    ],\n                ),\n            )\n            for i in range(1, len(count_for_each_partition))\n            if count_for_each_partition[i] > count_for_each_partition[i - 1]\n        ]\n        return dict(partition_ids_with_indices)\n\n    @staticmethod\n    def _join_index_objects(axis, indexes, how, sort, fill_value=None):\n        \"\"\"\n        Join the pair of index objects (columns or rows) by a given strategy.\n\n        Unlike Index.join() in pandas, if `axis` is 1, `sort` is False,\n        and `how` is \"outer\", the result will _not_ be sorted.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis index object to join (0 - rows, 1 - columns).\n        indexes : list(Index)\n            The indexes to join on.\n        how : {'left', 'right', 'inner', 'outer', None}\n            The type of join to join to make. If `None` then joined index\n            considered to be the first index in the `indexes` list.\n        sort : boolean\n            Whether or not to sort the joined index.\n        fill_value : any, optional\n            Value to use for missing values.\n\n        Returns\n        -------\n        (Index, func)\n            Joined index with make_reindexer func.\n        \"\"\"\n        assert isinstance(indexes, list)\n\n        # define helper functions\n        def merge(left_index, right_index):\n            \"\"\"Combine a pair of indices depending on `axis`, `how` and `sort` from outside.\"\"\"\n            if axis == 1 and how == \"outer\" and not sort:\n                return left_index.union(right_index, sort=False)\n            else:\n                return left_index.join(right_index, how=how, sort=sort)\n\n        # define condition for joining indexes\n        all_indices_equal = all(indexes[0].equals(index) for index in indexes[1:])\n        do_join_index = how is not None and not all_indices_equal\n\n        # define condition for joining indexes with getting indexers\n        need_indexers = (\n            axis == 0\n            and not all_indices_equal\n            and any(not index.is_unique for index in indexes)\n        )\n        indexers = None\n\n        # perform joining indexes\n        if do_join_index:\n            if len(indexes) == 2 and need_indexers:\n                # in case of count of indexes > 2 we should perform joining all indexes\n                # after that get indexers\n                # in the fast path we can obtain joined_index and indexers in one call\n                indexers = [None, None]\n                joined_index, indexers[0], indexers[1] = indexes[0].join(\n                    indexes[1], how=how, sort=sort, return_indexers=True\n                )\n            else:\n                joined_index = indexes[0]\n                # TODO: revisit for performance\n                for index in indexes[1:]:\n                    joined_index = merge(joined_index, index)\n        else:\n            joined_index = indexes[0].copy()\n\n        if need_indexers and indexers is None:\n            indexers = [index.get_indexer_for(joined_index) for index in indexes]\n\n        def make_reindexer(do_reindex: bool, frame_idx: int):\n            \"\"\"Create callback that reindexes the dataframe using newly computed index.\"\"\"\n            # the order of the frames must match the order of the indexes\n            if not do_reindex:\n                return lambda df: df\n\n            if need_indexers:\n                assert indexers is not None\n\n                return lambda df: df._reindex_with_indexers(\n                    {0: [joined_index, indexers[frame_idx]]},\n                    copy=True,\n                    allow_dups=True,\n                    fill_value=fill_value,\n                )\n            return lambda df: df.reindex(joined_index, axis=axis, fill_value=fill_value)\n\n        return joined_index, make_reindexer\n\n    # Internal methods\n    # These methods are for building the correct answer in a modular way.\n    # Please be careful when changing these!\n\n    def _build_treereduce_func(self, axis, func):\n        \"\"\"\n        Properly formats a TreeReduce result so that the partitioning is correct.\n\n        Parameters\n        ----------\n        axis : int\n            The axis along which to apply the function.\n        func : callable\n            The function to apply.\n\n        Returns\n        -------\n        callable\n            A function to be shipped to the partitions to be executed.\n\n        Notes\n        -----\n        This should be used for any TreeReduce style operation that results in a\n        reduced data dimensionality (dataframe -> series).\n        \"\"\"\n\n        def _tree_reduce_func(df, *args, **kwargs):\n            \"\"\"Tree-reducer function itself executing `func`, presenting the resulting pandas.Series as pandas.DataFrame.\"\"\"\n            series_result = func(df, *args, **kwargs)\n            if axis == 0 and isinstance(series_result, pandas.Series):\n                # In the case of axis=0, we need to keep the shape of the data\n                # consistent with what we have done. In the case of a reduce, the\n                # data for axis=0 should be a single value for each column. By\n                # transposing the data after we convert to a DataFrame, we ensure that\n                # the columns of the result line up with the columns from the data.\n                # axis=1 does not have this requirement because the index already will\n                # line up with the index of the data based on how pandas creates a\n                # DataFrame from a Series.\n                result = pandas.DataFrame(series_result).T\n                result.index = [MODIN_UNNAMED_SERIES_LABEL]\n            else:\n                result = pandas.DataFrame(series_result)\n                if isinstance(series_result, pandas.Series):\n                    result.columns = [MODIN_UNNAMED_SERIES_LABEL]\n            return result\n\n        return _tree_reduce_func\n\n    def _compute_tree_reduce_metadata(self, axis, new_parts, dtypes=None):\n        \"\"\"\n        Compute the metadata for the result of reduce function.\n\n        Parameters\n        ----------\n        axis : int\n            The axis on which reduce function was applied.\n        new_parts : NumPy 2D array\n            Partitions with the result of applied function.\n        dtypes : str, optional\n            The data types for the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and this allows us to avoid (re)computing it.\n\n        Returns\n        -------\n        PandasDataframe\n            Modin series (1xN frame) containing the reduced data.\n        \"\"\"\n        new_axes, new_axes_lengths = [0, 0], [0, 0]\n\n        new_axes[axis] = [MODIN_UNNAMED_SERIES_LABEL]\n        new_axes[axis ^ 1] = self.get_axis(axis ^ 1)\n\n        new_axes_lengths[axis] = [1]\n        new_axes_lengths[axis ^ 1] = self._get_axis_lengths(axis ^ 1)\n\n        if dtypes == \"copy\":\n            dtypes = self.copy_dtypes_cache()\n        elif dtypes is not None:\n            dtypes = pandas.Series(\n                [pandas.api.types.pandas_dtype(dtypes)] * len(new_axes[1]),\n                index=new_axes[1],\n            )\n\n        result = self.__constructor__(\n            new_parts,\n            *new_axes,\n            *new_axes_lengths,\n            dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n        return result\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def reduce(\n        self,\n        axis: Union[int, Axis],\n        function: Callable,\n        dtypes: Optional[str] = None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Perform a user-defined aggregation on the specified axis, where the axis reduces down to a singleton. Requires knowledge of the full axis for the reduction.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the reduce over.\n        function : callable(row|col) -> single value\n            The reduce function to apply to each column.\n        dtypes : str, optional\n            The data types for the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and this allows us to avoid (re)computing it.\n\n        Returns\n        -------\n        PandasDataframe\n            Modin series (1xN frame) containing the reduced data.\n\n        Notes\n        -----\n        The user-defined function must reduce to a single value.\n        \"\"\"\n        axis = Axis(axis)\n        function = self._build_treereduce_func(axis.value, function)\n        new_parts = self._partition_mgr_cls.map_axis_partitions(\n            axis.value, self._partitions, function\n        )\n        return self._compute_tree_reduce_metadata(axis.value, new_parts, dtypes=dtypes)\n\n    @lazy_metadata_decorator(apply_axis=\"opposite\", axis_arg=0)\n    def tree_reduce(\n        self,\n        axis: Union[int, Axis],\n        map_func: Callable,\n        reduce_func: Optional[Callable] = None,\n        dtypes: Optional[str] = None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Apply function that will reduce the data to a pandas Series.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the tree reduce over.\n        map_func : callable(row|col) -> row|col\n            Callable function to map the dataframe.\n        reduce_func : callable(row|col) -> single value, optional\n            Callable function to reduce the dataframe.\n            If none, then apply map_func twice.\n        dtypes : str, optional\n            The data types for the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and this allows us to avoid (re)computing it.\n\n        Returns\n        -------\n        PandasDataframe\n            A new dataframe.\n        \"\"\"\n        axis = Axis(axis)\n        map_func = self._build_treereduce_func(axis.value, map_func)\n        if reduce_func is None:\n            reduce_func = map_func\n        else:\n            reduce_func = self._build_treereduce_func(axis.value, reduce_func)\n\n        map_parts = self._partition_mgr_cls.map_partitions(self._partitions, map_func)\n        reduce_parts = self._partition_mgr_cls.map_axis_partitions(\n            axis.value, map_parts, reduce_func\n        )\n        return self._compute_tree_reduce_metadata(\n            axis.value, reduce_parts, dtypes=dtypes\n        )\n\n    @lazy_metadata_decorator(apply_axis=None)\n    def map(\n        self,\n        func: Callable,\n        dtypes: Optional[str] = None,\n        new_columns: Optional[pandas.Index] = None,\n        func_args=None,\n        func_kwargs=None,\n        lazy=False,\n    ) -> PandasDataframe:\n        \"\"\"\n        Perform a function that maps across the entire dataset.\n\n        Parameters\n        ----------\n        func : callable(row|col|cell) -> row|col|cell\n            The function to apply.\n        dtypes : dtypes of the result, optional\n            The data types for the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and this allows us to avoid (re)computing it.\n        new_columns : pandas.Index, optional\n            New column labels of the result, its length has to be identical\n            to the older columns. If not specified, old column labels are preserved.\n        func_args : iterable, optional\n            Positional arguments for the 'func' callable.\n        func_kwargs : dict, optional\n            Keyword arguments for the 'func' callable.\n        lazy : bool, default: False\n            Whether to prefer lazy execution or not.\n\n        Returns\n        -------\n        PandasDataframe\n            A new dataframe.\n        \"\"\"\n        map_fn = (\n            self._partition_mgr_cls.lazy_map_partitions\n            if lazy\n            else self._partition_mgr_cls.map_partitions\n        )\n        new_partitions = map_fn(self._partitions, func, func_args, func_kwargs)\n\n        if new_columns is not None and self.has_materialized_columns:\n            assert len(new_columns) == len(\n                self.columns\n            ), \"New column's length must be identical to the previous columns\"\n        elif new_columns is None:\n            new_columns = self.copy_columns_cache(copy_lengths=True)\n        if isinstance(dtypes, str) and dtypes == \"copy\":\n            dtypes = self.copy_dtypes_cache()\n        elif dtypes is not None and not isinstance(dtypes, pandas.Series):\n            if isinstance(new_columns, ModinIndex):\n                # Materializing lazy columns in order to build dtype's index\n                new_columns = new_columns.get(return_lengths=False)\n            dtypes = pandas.Series(\n                [pandas.api.types.pandas_dtype(dtypes)] * len(new_columns),\n                index=new_columns,\n            )\n        return self.__constructor__(\n            new_partitions,\n            self.copy_index_cache(copy_lengths=True),\n            new_columns,\n            self._row_lengths_cache,\n            self._column_widths_cache,\n            dtypes=dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    def window(\n        self,\n        axis: Union[int, Axis],\n        reduce_fn: Callable,\n        window_size: int,\n        result_schema: Optional[Dict[Hashable, type]] = None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Apply a sliding window operator that acts as a GROUPBY on each window, and reduces down to a single row (column) per window.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to slide over.\n        reduce_fn : callable(rowgroup|colgroup) -> row|col\n            The reduce function to apply over the data.\n        window_size : int\n            The number of row/columns to pass to the function.\n            (The size of the sliding window).\n        result_schema : dict, optional\n            Mapping from column labels to data types that represents the types of the output dataframe.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe with the reduce function applied over windows of the specified\n                axis.\n\n        Notes\n        -----\n        The user-defined reduce function must reduce each window’s column\n        (row if axis=1) down to a single value.\n        \"\"\"\n        pass\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def fold(self, axis, func, new_index=None, new_columns=None, shape_preserved=False):\n        \"\"\"\n        Perform a function across an entire axis.\n\n        Parameters\n        ----------\n        axis : int\n            The axis to apply over.\n        func : callable\n            The function to apply.\n        new_index : list-like, optional\n            The index of the result.\n        new_columns : list-like, optional\n            The columns of the result.\n        shape_preserved : bool, default: False\n            Whether the shape of the dataframe is preserved or not\n            after applying a function.\n\n        Returns\n        -------\n        PandasDataframe\n            A new dataframe.\n        \"\"\"\n        new_row_lengths = None\n        new_column_widths = None\n        if shape_preserved:\n            if new_index is None:\n                new_index = self.copy_index_cache(copy_lengths=True)\n            if new_columns is None:\n                new_columns = self.copy_columns_cache(copy_lengths=True)\n            new_row_lengths = self._row_lengths_cache\n            new_column_widths = self._column_widths_cache\n\n        new_partitions = self._partition_mgr_cls.map_axis_partitions(\n            axis, self._partitions, func, keep_partitioning=True\n        )\n        return self.__constructor__(\n            new_partitions,\n            new_index,\n            new_columns,\n            row_lengths=new_row_lengths,\n            column_widths=new_column_widths,\n            pandas_backend=self._pandas_backend,\n        )\n\n    def infer_objects(self) -> PandasDataframe:\n        \"\"\"\n        Attempt to infer better dtypes for object columns.\n\n        Attempts soft conversion of object-dtyped columns, leaving non-object and unconvertible\n        columns unchanged. The inference rules are the same as during normal Series/DataFrame\n        construction.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe with the inferred schema.\n        \"\"\"\n        obj_cols = [\n            col for col, dtype in enumerate(self.dtypes) if is_object_dtype(dtype)\n        ]\n        return self.infer_types(obj_cols)\n\n    def infer_types(self, col_labels: List[str]) -> PandasDataframe:\n        \"\"\"\n        Determine the compatible type shared by all values in the specified columns, and coerce them to that type.\n\n        Parameters\n        ----------\n        col_labels : list\n            List of column labels to infer and induce types over.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe with the inferred schema.\n        \"\"\"\n        # Compute dtypes on the specified columns, and then set those dtypes on a new frame\n        new_cols = self.take_2d_labels_or_positional(col_labels=col_labels)\n        new_cols_dtypes = new_cols.tree_reduce(0, pandas.DataFrame.infer_objects).dtypes\n        new_dtypes = self.dtypes.copy()\n        new_dtypes[col_labels] = new_cols_dtypes\n        return self.__constructor__(\n            self._partitions,\n            self.copy_index_cache(copy_lengths=True),\n            self.copy_columns_cache(copy_lengths=True),\n            self._row_lengths_cache,\n            self._column_widths_cache,\n            new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    def join(\n        self,\n        axis: Union[int, Axis],\n        condition: Callable,\n        other: ModinDataframe,\n        join_type: Union[str, JoinType],\n    ) -> PandasDataframe:\n        \"\"\"\n        Join this dataframe with the other.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the join on.\n        condition : callable\n            Function that determines which rows should be joined. The condition can be a\n            simple equality, e.g. \"left.col1 == right.col1\" or can be arbitrarily complex.\n        other : ModinDataframe\n            The other data to join with, i.e. the right dataframe.\n        join_type : string {\"inner\", \"left\", \"right\", \"outer\"} or modin.core.dataframe.base.utils.JoinType\n            The type of join to perform.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe that is the result of applying the specified join over the two\n            dataframes.\n\n        Notes\n        -----\n        During the join, this dataframe is considered the left, while the other is\n        treated as the right.\n\n        Only inner joins, left outer, right outer, and full outer joins are currently supported.\n        Support for other join types (e.g. natural join) may be implemented in the future.\n        \"\"\"\n        pass\n\n    def rename(\n        self,\n        new_row_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,\n        new_col_labels: Optional[Union[Dict[Hashable, Hashable], Callable]] = None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Replace the row and column labels with the specified new labels.\n\n        Parameters\n        ----------\n        new_row_labels : dictionary or callable, optional\n            Mapping or callable that relates old row labels to new labels.\n        new_col_labels : dictionary or callable, optional\n            Mapping or callable that relates old col labels to new labels.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe with the new row and column labels.\n        \"\"\"\n        result = self.copy()\n        if new_row_labels is not None:\n            if callable(new_row_labels):\n                new_row_labels = result.index.map(new_row_labels)\n            result.index = new_row_labels\n        if new_col_labels is not None:\n            if callable(new_col_labels):\n                new_col_labels = result.columns.map(new_col_labels)\n            result.columns = new_col_labels\n        return result\n\n    def combine_and_apply(\n        self, func, new_index=None, new_columns=None, new_dtypes=None\n    ):\n        \"\"\"\n        Combine all partitions into a single big one and apply the passed function to it.\n\n        Use this method with care as it collects all the data on the same worker,\n        it's only recommended to use this method on small or reduced datasets.\n\n        Parameters\n        ----------\n        func : callable(pandas.DataFrame) -> pandas.DataFrame\n            A function to apply to the combined partition.\n        new_index : sequence, optional\n            Index of the result.\n        new_columns : sequence, optional\n            Columns of the result.\n        new_dtypes : dict-like, optional\n            Dtypes of the result.\n\n        Returns\n        -------\n        PandasDataframe\n        \"\"\"\n        if self._partitions.shape[1] > 1:\n            new_partitions = self._partition_mgr_cls.row_partitions(self._partitions)\n            new_partitions = np.array([[partition] for partition in new_partitions])\n            modin_frame = self.__constructor__(\n                new_partitions,\n                self.copy_index_cache(copy_lengths=True),\n                self.copy_columns_cache(),\n                self._row_lengths_cache,\n                [len(self.columns)] if self.has_materialized_columns else None,\n                self.copy_dtypes_cache(),\n                pandas_backend=self._pandas_backend,\n            )\n        else:\n            modin_frame = self\n        return modin_frame.apply_full_axis(\n            axis=0,\n            func=func,\n            new_index=new_index,\n            new_columns=new_columns,\n            dtypes=new_dtypes,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def _apply_func_to_range_partitioning(\n        self,\n        key_columns,\n        func,\n        ascending=True,\n        preserve_columns=False,\n        data=None,\n        data_key_columns=None,\n        level=None,\n        shuffle_func_cls=ShuffleSortFunctions,\n        **kwargs,\n    ):\n        \"\"\"\n        Reshuffle data so it would be range partitioned and then apply the passed function row-wise.\n\n        Parameters\n        ----------\n        key_columns : list of hashables\n            Columns to build the range partitioning for. Can't be specified along with `level`.\n        func : callable(pandas.DataFrame) -> pandas.DataFrame\n            Function to apply against partitions.\n        ascending : bool, default: True\n            Whether the range should be built in ascending or descending order.\n        preserve_columns : bool, default: False\n            If the columns cache should be preserved (specify this flag if `func` doesn't change column labels).\n        data : PandasDataframe, optional\n            Dataframe to range-partition along with the `self` frame. If specified, the `func` will recieve\n            a dataframe with an additional MultiIndex level in columns that separates `self` and `data`:\n            ``df[\"grouper\"] # self`` and ``df[\"data\"] # data``.\n        data_key_columns : list of hashables, optional\n            Additional key columns from `data`. Will be combined with `key_columns`.\n        level : list of ints or labels, optional\n            Index level(s) to build the range partitioning for. Can't be specified along with `key_columns`.\n        shuffle_func_cls : cls, default: ShuffleSortFunctions\n            A class implementing ``modin.core.dataframe.pandas.utils.ShuffleFunctions`` to be used\n            as a shuffle function.\n        **kwargs : dict\n            Additional arguments to forward to the range builder function.\n\n        Returns\n        -------\n        PandasDataframe\n            A new dataframe.\n        \"\"\"\n        if data is not None:\n            # adding an extra MultiIndex level in order to separate `self grouper` from the `data`\n            # after concatenation\n            new_grouper_cols = pandas.MultiIndex.from_tuples(\n                [\n                    (\"grouper\", *col) if isinstance(col, tuple) else (\"grouper\", col)\n                    for col in self.columns\n                ]\n            )\n            grouper = self.copy()\n            grouper.columns = new_grouper_cols\n\n            new_data_cols = pandas.MultiIndex.from_tuples(\n                [\n                    (\"data\", *col) if isinstance(col, tuple) else (\"data\", col)\n                    for col in data.columns\n                ]\n            )\n            data = data.copy()\n            data.columns = new_data_cols\n\n            grouper = grouper.concat(axis=1, others=[data], how=\"right\", sort=False)\n\n            # since original column names were modified, have to modify 'key_columns' as well\n            key_columns = [\n                (\"grouper\", *col) if isinstance(col, tuple) else (\"grouper\", col)\n                for col in key_columns\n            ]\n            if data_key_columns is None:\n                data_key_columns = []\n            else:\n                data_key_columns = [\n                    (\"data\", *col) if isinstance(col, tuple) else (\"data\", col)\n                    for col in data_key_columns\n                ]\n            key_columns += data_key_columns\n        else:\n            grouper = self\n\n        # If there's only one row partition can simply apply the function row-wise without the need to reshuffle\n        if grouper._partitions.shape[0] == 1:\n            result = grouper.apply_full_axis(\n                axis=1,\n                func=func,\n                new_columns=grouper.copy_columns_cache() if preserve_columns else None,\n            )\n            if preserve_columns:\n                result._set_axis_lengths_cache(grouper._column_widths_cache, axis=1)\n            return result\n\n        # don't want to inherit over-partitioning so doing this 'min' check\n        ideal_num_new_partitions = min(len(grouper._partitions), NPartitions.get())\n        m = len(grouper) / ideal_num_new_partitions\n        sampling_probability = (1 / m) * np.log(ideal_num_new_partitions * len(grouper))\n        # If this df is overpartitioned, we try to sample each partition with probability\n        # greater than 1, which leads to an error. In this case, we can do one of the following\n        # two things. If there is only enough rows for one partition, and we have only 1 column\n        # partition, we can just combine the overpartitioned df into one partition, and sort that\n        # partition. If there is enough data for more than one partition, we can tell the sorting\n        # algorithm how many partitions we want to end up with, so it samples and finds pivots\n        # according to that.\n        if sampling_probability >= 1:\n            from modin.config import MinRowPartitionSize\n\n            ideal_num_new_partitions = round(len(grouper) / MinRowPartitionSize.get())\n            if len(grouper) < MinRowPartitionSize.get() or ideal_num_new_partitions < 2:\n                # If the data is too small, we shouldn't try reshuffling/repartitioning but rather\n                # simply combine all partitions and apply the sorting to the whole dataframe\n                return grouper.combine_and_apply(func=func)\n\n            if ideal_num_new_partitions < len(grouper._partitions):\n                if len(grouper._partitions) % ideal_num_new_partitions == 0:\n                    joining_partitions = np.split(\n                        grouper._partitions, ideal_num_new_partitions\n                    )\n                else:\n                    step = round(len(grouper._partitions) / ideal_num_new_partitions)\n                    joining_partitions = np.split(\n                        grouper._partitions,\n                        range(step, len(grouper._partitions), step),\n                    )\n\n                new_partitions = np.array(\n                    [\n                        grouper._partition_mgr_cls.column_partitions(\n                            ptn_grp, full_axis=False\n                        )\n                        for ptn_grp in joining_partitions\n                    ]\n                )\n            else:\n                new_partitions = grouper._partitions\n        else:\n            new_partitions = grouper._partitions\n\n        shuffling_functions = shuffle_func_cls(\n            grouper,\n            key_columns,\n            ascending[0] if is_list_like(ascending) else ascending,\n            ideal_num_new_partitions,\n            level=level,\n            **kwargs,\n        )\n\n        if key_columns:\n            # here we want to get indices of those partitions that hold the key columns\n            key_indices = grouper.columns.get_indexer_for(key_columns)\n            partition_indices = np.unique(\n                np.digitize(key_indices, np.cumsum(grouper.column_widths))\n            )\n        elif level is not None:\n            # each partition contains an index, so taking the first one\n            partition_indices = [0]\n        else:\n            raise ValueError(\"Must specify either 'level' or 'key_columns'\")\n\n        new_partitions = grouper._partition_mgr_cls.shuffle_partitions(\n            new_partitions,\n            partition_indices,\n            shuffling_functions,\n            func,\n        )\n\n        result = grouper.__constructor__(new_partitions)\n        if preserve_columns:\n            result.set_columns_cache(grouper.copy_columns_cache())\n            # We perform the final steps of the sort on full axis partitions, so we know that the\n            # length of each partition is the full length of the dataframe.\n            if grouper.has_materialized_columns:\n                result._set_axis_lengths_cache([len(grouper.columns)], axis=1)\n        return result\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def sort_by(\n        self,\n        axis: Union[int, Axis],\n        columns: Union[str, List[str]],\n        ascending: bool = True,\n        **kwargs,\n    ) -> PandasDataframe:\n        \"\"\"\n        Logically reorder rows (columns if axis=1) lexicographically by the data in a column or set of columns.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to perform the sort over.\n        columns : string or list\n            Column label(s) to use to determine lexicographical ordering.\n        ascending : boolean, default: True\n            Whether to sort in ascending or descending order.\n        **kwargs : dict\n            Keyword arguments to pass when sorting partitions.\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe sorted into lexicographical order by the specified column(s).\n        \"\"\"\n        if not isinstance(columns, list):\n            columns = [columns]\n\n        def sort_function(df):  # pragma: no cover\n            # When we do a sort on the result of Series.value_counts, we don't rename the index until\n            # after everything is done, which causes an error when sorting the partitions, since the\n            # index and the column share the same name, when in actuality, the index's name should be\n            # None. This fixes the indexes name beforehand in that case, so that the sort works.\n            index_renaming = None\n            if any(name in df.columns for name in df.index.names):\n                index_renaming = df.index.names\n                df.index = df.index.set_names([None] * len(df.index.names))\n            df = df.sort_values(by=columns, ascending=ascending, **kwargs)\n            if index_renaming is not None:\n                df.index = df.index.set_names(index_renaming)\n            return df\n\n        # If this df is empty, we don't want to try and shuffle or sort.\n        if len(self.get_axis(1)) == 0 or len(self) == 0:\n            return self.copy()\n\n        axis = Axis(axis)\n        if axis != Axis.ROW_WISE:\n            raise NotImplementedError(\n                f\"Algebra sort only implemented row-wise. {axis.name} sort not implemented yet!\"\n            )\n\n        result = self._apply_func_to_range_partitioning(\n            key_columns=[columns[0]],\n            func=sort_function,\n            ascending=ascending,\n            preserve_columns=True,\n            **kwargs,\n        )\n        result.set_dtypes_cache(self.copy_dtypes_cache())\n\n        if kwargs.get(\"ignore_index\", False):\n            result.index = RangeIndex(len(self.get_axis(axis.value)))\n\n        # Since the strategy to pick our pivots involves random sampling\n        # we could end up picking poor pivots, leading to skew in our partitions.\n        # We should add a fix to check if there is skew in the partitions and rebalance\n        # them if necessary. Calling `rebalance_partitions` won't do this, since it only\n        # resolves the case where there isn't the right amount of partitions - not where\n        # there is skew across the lengths of partitions.\n        return result\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def filter(self, axis: Union[Axis, int], condition: Callable) -> PandasDataframe:\n        \"\"\"\n        Filter data based on the function provided along an entire axis.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to filter over.\n        condition : callable(row|col) -> bool\n            The function to use for the filter. This function should filter the\n            data itself.\n\n        Returns\n        -------\n        PandasDataframe\n            A new filtered dataframe.\n        \"\"\"\n        axis = Axis(axis)\n        assert axis in (\n            Axis.ROW_WISE,\n            Axis.COL_WISE,\n        ), \"Axis argument to filter operator must be 0 (rows) or 1 (columns)\"\n\n        new_partitions = self._partition_mgr_cls.map_axis_partitions(\n            axis.value, self._partitions, condition, keep_partitioning=True\n        )\n\n        new_axes, new_lengths = [0, 0], [0, 0]\n\n        new_axes[axis.value] = self.copy_axis_cache(axis.value, copy_lengths=True)\n        new_lengths[axis.value] = (\n            self._row_lengths_cache if axis.value == 0 else self._column_widths_cache\n        )\n        new_axes[axis.value ^ 1], new_lengths[axis.value ^ 1] = None, None\n\n        return self.__constructor__(\n            new_partitions,\n            *new_axes,\n            *new_lengths,\n            self.copy_dtypes_cache() if axis == Axis.COL_WISE else None,\n            pandas_backend=self._pandas_backend,\n        )\n\n    def filter_by_types(self, types: List[Hashable]) -> PandasDataframe:\n        \"\"\"\n        Allow the user to specify a type or set of types by which to filter the columns.\n\n        Parameters\n        ----------\n        types : list\n            The types to filter columns by.\n\n        Returns\n        -------\n        PandasDataframe\n             A new PandasDataframe from the filter provided.\n        \"\"\"\n        return self.take_2d_labels_or_positional(\n            col_positions=[i for i, dtype in enumerate(self.dtypes) if dtype in types]\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def explode(self, axis: Union[int, Axis], func: Callable) -> PandasDataframe:\n        \"\"\"\n        Explode list-like entries along an entire axis.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis specifying how to explode. If axis=1, explode according\n            to columns.\n        func : callable\n            The function to use to explode a single element.\n\n        Returns\n        -------\n        PandasFrame\n            A new filtered dataframe.\n        \"\"\"\n        axis = Axis(axis)\n        partitions = self._partition_mgr_cls.map_axis_partitions(\n            axis.value, self._partitions, func, keep_partitioning=True\n        )\n        if axis == Axis.COL_WISE:\n            new_index, row_lengths = self._compute_axis_labels_and_lengths(\n                0, partitions\n            )\n            new_columns, column_widths = self.columns, self._column_widths_cache\n        else:\n            new_index, row_lengths = self.index, self._row_lengths_cache\n            new_columns, column_widths = self._compute_axis_labels_and_lengths(\n                1, partitions\n            )\n        return self.__constructor__(\n            partitions,\n            new_index,\n            new_columns,\n            row_lengths,\n            column_widths,\n            pandas_backend=self._pandas_backend,\n        )\n\n    def combine(self) -> PandasDataframe:\n        \"\"\"\n        Create a single partition PandasDataframe from the partitions of the current dataframe.\n\n        Returns\n        -------\n        PandasDataframe\n            A single partition PandasDataframe.\n        \"\"\"\n        new_index = None\n        new_columns = None\n        if self._deferred_index:\n            new_index = self.index\n        if self._deferred_column:\n            new_columns = self.columns\n        partitions = self._partition_mgr_cls.combine(\n            self._partitions, new_index, new_columns\n        )\n        result = self.__constructor__(\n            partitions,\n            index=self.copy_index_cache(),\n            columns=self.copy_columns_cache(),\n            row_lengths=(\n                [sum(self._row_lengths_cache)]\n                if self._row_lengths_cache is not None\n                else None\n            ),\n            column_widths=(\n                [sum(self._column_widths_cache)]\n                if self._column_widths_cache is not None\n                else None\n            ),\n            dtypes=self.copy_dtypes_cache(),\n            pandas_backend=self._pandas_backend,\n        )\n        return result\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def apply_full_axis(\n        self,\n        axis,\n        func,\n        new_index=None,\n        new_columns=None,\n        apply_indices=None,\n        enumerate_partitions: bool = False,\n        dtypes=None,\n        keep_partitioning=True,\n        num_splits=None,\n        sync_labels=True,\n        pass_axis_lengths_to_partitions=False,\n    ) -> PandasDataframe:\n        \"\"\"\n        Perform a function across an entire axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to apply over (0 - rows, 1 - columns).\n        func : callable\n            The function to apply.\n        new_index : list-like, optional\n            The index of the result. We may know this in advance,\n            and if not provided it must be computed.\n        new_columns : list-like, optional\n            The columns of the result. We may know this in\n            advance, and if not provided it must be computed.\n        apply_indices : list-like, optional\n            Indices of `axis ^ 1` to apply function over.\n        enumerate_partitions : bool, default: False\n            Whether pass partition index into applied `func` or not.\n            Note that `func` must be able to obtain `partition_idx` kwarg.\n        dtypes : list-like or scalar, optional\n            The data types of the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and allows us to avoid (re)computing it.\n        keep_partitioning : boolean, default: True\n            The flag to keep partition boundaries for Modin Frame if possible.\n            Setting it to True disables shuffling data from one partition to another in case the resulting\n            number of splits is equal to the initial number of splits.\n        num_splits : int, optional\n            The number of partitions to split the result into across the `axis`. If None, then the number\n            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`\n            then the number of splits is preserved.\n        sync_labels : boolean, default: True\n            Synchronize external indexes (`new_index`, `new_columns`) with internal indexes.\n            This could be used when you're certain that the indices in partitions are equal to\n            the provided hints in order to save time on syncing them.\n        pass_axis_lengths_to_partitions : bool, default: False\n            Whether pass partition lengths along `axis ^ 1` to the kernel `func`.\n            Note that `func` must be able to obtain `df, *axis_lengths`.\n\n        Returns\n        -------\n        PandasDataframe\n            A new dataframe.\n\n        Notes\n        -----\n        The data shape may change as a result of the function.\n        \"\"\"\n        return self.broadcast_apply_full_axis(\n            axis=axis,\n            func=func,\n            new_index=new_index,\n            new_columns=new_columns,\n            apply_indices=apply_indices,\n            enumerate_partitions=enumerate_partitions,\n            dtypes=dtypes,\n            other=None,\n            keep_partitioning=keep_partitioning,\n            num_splits=num_splits,\n            sync_labels=sync_labels,\n            pass_axis_lengths_to_partitions=pass_axis_lengths_to_partitions,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def apply_full_axis_select_indices(\n        self,\n        axis,\n        func,\n        apply_indices=None,\n        numeric_indices=None,\n        new_index=None,\n        new_columns=None,\n        keep_remaining=False,\n        new_dtypes: Optional[Union[pandas.Series, ModinDtypes]] = None,\n    ):\n        \"\"\"\n        Apply a function across an entire axis for a subset of the data.\n\n        Parameters\n        ----------\n        axis : int\n            The axis to apply over.\n        func : callable\n            The function to apply.\n        apply_indices : list-like, optional\n            The labels to apply over.\n        numeric_indices : list-like, optional\n            The indices to apply over.\n        new_index : list-like, optional\n            The index of the result. We may know this in advance,\n            and if not provided it must be computed.\n        new_columns : list-like, optional\n            The columns of the result. We may know this in\n            advance, and if not provided it must be computed.\n        keep_remaining : boolean, default: False\n            Whether or not to drop the data that is not computed over.\n        new_dtypes : ModinDtypes or pandas.Series, optional\n            The data types of the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and allows us to avoid (re)computing it.\n\n        Returns\n        -------\n        PandasDataframe\n            A new dataframe.\n        \"\"\"\n        assert apply_indices is not None or numeric_indices is not None\n        # Convert indices to numeric indices\n        old_index = self.index if axis else self.columns\n        if apply_indices is not None:\n            numeric_indices = old_index.get_indexer_for(apply_indices)\n        # Get the indices for the axis being applied to (it is the opposite of axis\n        # being applied over)\n        dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)\n        new_partitions = (\n            self._partition_mgr_cls.apply_func_to_select_indices_along_full_axis(\n                axis,\n                self._partitions,\n                func,\n                dict_indices,\n                keep_remaining=keep_remaining,\n            )\n        )\n        # TODO Infer columns and index from `keep_remaining` and `apply_indices`\n        if new_index is None:\n            new_index = self.index if axis == 1 else None\n        if new_columns is None:\n            new_columns = self.columns if axis == 0 else None\n        return self.__constructor__(\n            new_partitions,\n            new_index,\n            new_columns,\n            None,\n            None,\n            dtypes=new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def apply_select_indices(\n        self,\n        axis,\n        func,\n        apply_indices=None,\n        row_labels=None,\n        col_labels=None,\n        new_index=None,\n        new_columns=None,\n        new_dtypes: Optional[pandas.Series] = None,\n        keep_remaining=False,\n        item_to_distribute=no_default,\n    ) -> PandasDataframe:\n        \"\"\"\n        Apply a function for a subset of the data.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to apply over.\n        func : callable\n            The function to apply.\n        apply_indices : list-like, optional\n            The labels to apply over. Must be given if axis is provided.\n        row_labels : list-like, optional\n            The row labels to apply over. Must be provided with\n            `col_labels` to apply over both axes.\n        col_labels : list-like, optional\n            The column labels to apply over. Must be provided\n            with `row_labels` to apply over both axes.\n        new_index : list-like, optional\n            The index of the result, if known in advance.\n        new_columns : list-like, optional\n            The columns of the result, if known in advance.\n        new_dtypes : pandas.Series, optional\n            The dtypes of the result, if known in advance.\n        keep_remaining : boolean, default: False\n            Whether or not to drop the data that is not computed over.\n        item_to_distribute : np.ndarray or scalar, default: no_default\n            The item to split up so it can be applied over both axes.\n\n        Returns\n        -------\n        PandasDataframe\n            A new dataframe.\n        \"\"\"\n        # TODO Infer columns and index from `keep_remaining` and `apply_indices`\n        if new_index is None:\n            new_index = self.index if axis == 1 else None\n        if new_columns is None:\n            new_columns = self.columns if axis == 0 else None\n        if new_columns is not None and isinstance(new_dtypes, pandas.Series):\n            assert new_dtypes.index.equals(\n                new_columns\n            ), f\"{new_dtypes=} doesn't have the same columns as in {new_columns=}\"\n\n        if axis is not None:\n            assert apply_indices is not None\n            # Convert indices to numeric indices\n            old_index = self.index if axis else self.columns\n            numeric_indices = old_index.get_indexer_for(apply_indices)\n            # Get indices being applied to (opposite of indices being applied over)\n            dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)\n            new_partitions = self._partition_mgr_cls.apply_func_to_select_indices(\n                axis,\n                self._partitions,\n                func,\n                dict_indices,\n                keep_remaining=keep_remaining,\n            )\n            # Length objects for new object creation. This is shorter than if..else\n            # This object determines the lengths and widths based on the given\n            # parameters and builds a dictionary used in the constructor below. 0 gives\n            # the row lengths and 1 gives the column widths. Since the dimension of\n            # `axis` given may have changed, we currently just recompute it.\n            # TODO Determine lengths from current lengths if `keep_remaining=False`\n            lengths_objs = {\n                axis: (\n                    [len(apply_indices)]\n                    if not keep_remaining\n                    else [self.row_lengths, self.column_widths][axis]\n                ),\n                axis ^ 1: [self.row_lengths, self.column_widths][axis ^ 1],\n            }\n            return self.__constructor__(\n                new_partitions,\n                new_index,\n                new_columns,\n                lengths_objs[0],\n                lengths_objs[1],\n                new_dtypes,\n                pandas_backend=self._pandas_backend,\n            )\n        else:\n            # We are applying over both axes here, so make sure we have all the right\n            # variables set.\n            assert row_labels is not None and col_labels is not None\n            assert keep_remaining\n            assert item_to_distribute is not no_default\n            row_partitions_list = self._get_dict_of_block_index(0, row_labels).items()\n            col_partitions_list = self._get_dict_of_block_index(1, col_labels).items()\n            new_partitions = self._partition_mgr_cls.apply_func_to_indices_both_axis(\n                self._partitions,\n                func,\n                row_partitions_list,\n                col_partitions_list,\n                item_to_distribute,\n                # Passing caches instead of values in order to not trigger shapes recomputation\n                # if they are not used inside this function.\n                self._row_lengths_cache,\n                self._column_widths_cache,\n            )\n            return self.__constructor__(\n                new_partitions,\n                new_index,\n                new_columns,\n                self._row_lengths_cache,\n                self._column_widths_cache,\n                new_dtypes,\n                pandas_backend=self._pandas_backend,\n            )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def broadcast_apply(\n        self,\n        axis,\n        func,\n        other,\n        join_type=\"left\",\n        copartition=True,\n        labels=\"keep\",\n        dtypes=None,\n    ):\n        \"\"\"\n        Broadcast axis partitions of `other` to partitions of `self` and apply a function.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to broadcast over.\n        func : callable\n            Function to apply.\n        other : PandasDataframe\n            Modin DataFrame to broadcast.\n        join_type : str, default: \"left\"\n            Type of join to apply.\n        copartition : bool, default: True\n            Whether to align indices/partitioning of the `self` and `other` frame.\n            Disabling this may save some time, however, you have to be 100% sure that\n            the indexing and partitioning are identical along the broadcasting axis,\n            this might be the case for example if `other` is a projection of the `self`\n            or vice-versa. If copartitioning is disabled and partitioning/indexing are\n            incompatible then you may end up with undefined behavior.\n        labels : {\"keep\", \"replace\", \"drop\"}, default: \"keep\"\n            Whether keep labels from `self` Modin DataFrame, replace them with labels\n            from joined DataFrame or drop altogether to make them be computed lazily later.\n        dtypes : \"copy\", pandas.Series or None, optional\n            Dtypes of the result. \"copy\" to keep old dtypes and None to compute them on demand.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        if copartition:\n            # Only sort the indices if they do not match\n            (\n                left_parts,\n                right_parts,\n                joined_index,\n                partition_sizes_along_axis,\n            ) = self._copartition(\n                axis,\n                other,\n                join_type,\n            )\n            # unwrap list returned by `copartition`.\n            right_parts = right_parts[0]\n        else:\n            left_parts = self._partitions\n            right_parts = other._partitions\n            partition_sizes_along_axis, joined_index = self._get_axis_lengths_cache(\n                axis\n            ), self.copy_axis_cache(axis)\n\n        new_frame = self._partition_mgr_cls.broadcast_apply(\n            axis, func, left_parts, right_parts\n        )\n        if isinstance(dtypes, str) and dtypes == \"copy\":\n            dtypes = self.copy_dtypes_cache()\n\n        def _pick_axis(get_axis, sizes_cache):\n            if labels == \"keep\":\n                return get_axis(), sizes_cache\n            if labels == \"replace\":\n                return joined_index, partition_sizes_along_axis\n            assert labels == \"drop\", f\"Unexpected `labels`: {labels}\"\n            return None, None\n\n        if axis == 0:\n            # Pass shape caches instead of values in order to not trigger shape computation.\n            new_index, new_row_lengths = _pick_axis(\n                self.copy_index_cache, self._row_lengths_cache\n            )\n            new_columns, new_column_widths = (\n                self.copy_columns_cache(),\n                self._column_widths_cache,\n            )\n        else:\n            new_index, new_row_lengths = (\n                self.copy_index_cache(),\n                self._row_lengths_cache,\n            )\n            new_columns, new_column_widths = _pick_axis(\n                self.copy_columns_cache, self._column_widths_cache\n            )\n\n        return self.__constructor__(\n            new_frame,\n            new_index,\n            new_columns,\n            new_row_lengths,\n            new_column_widths,\n            dtypes=dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    def _prepare_frame_to_broadcast(self, axis, indices, broadcast_all):\n        \"\"\"\n        Compute the indices to broadcast `self` considering `indices`.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to broadcast along.\n        indices : dict\n            Dict of indices and internal indices of partitions where `self` must\n            be broadcasted.\n        broadcast_all : bool\n            Whether broadcast the whole axis of `self` frame or just a subset of it.\n\n        Returns\n        -------\n        dict\n            Dictionary with indices of partitions to broadcast.\n\n        Notes\n        -----\n        New dictionary of indices of `self` partitions represents that\n        you want to broadcast `self` at specified another partition named `other`. For example,\n        Dictionary {key: {key1: [0, 1], key2: [5]}} means, that in `other`[key] you want to\n        broadcast [self[key1], self[key2]] partitions and internal indices for `self` must be [[0, 1], [5]]\n        \"\"\"\n        if broadcast_all:\n            sizes = self.row_lengths if axis else self.column_widths\n            return {key: dict(enumerate(sizes)) for key in indices.keys()}\n        passed_len = 0\n        result_dict = {}\n        for part_num, internal in indices.items():\n            result_dict[part_num] = self._get_dict_of_block_index(\n                axis ^ 1, np.arange(passed_len, passed_len + len(internal))\n            )\n            passed_len += len(internal)\n        return result_dict\n\n    def _extract_partitions(self):\n        \"\"\"\n        Extract partitions if partitions are present.\n\n        If partitions are empty return a dummy partition with empty data but\n        index and columns of current dataframe.\n\n        Returns\n        -------\n        np.ndarray\n            NumPy array with extracted partitions.\n        \"\"\"\n        if self._partitions.size > 0:\n            return self._partitions\n        else:\n            dtypes = None\n            if self.has_materialized_dtypes:\n                dtypes = self.dtypes\n            return self._partition_mgr_cls.create_partition_from_metadata(\n                index=self.index, columns=self.columns, dtypes=dtypes\n            )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def broadcast_apply_select_indices(\n        self,\n        axis,\n        func,\n        other: PandasDataframe,\n        apply_indices=None,\n        numeric_indices=None,\n        keep_remaining=False,\n        broadcast_all=True,\n        new_index=None,\n        new_columns=None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Apply a function to select indices at specified axis and broadcast partitions of `other` Modin DataFrame.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to apply function along.\n        func : callable\n            Function to apply.\n        other : PandasDataframe\n            Partitions of which should be broadcasted.\n        apply_indices : list, optional\n            List of labels to apply (if `numeric_indices` are not specified).\n        numeric_indices : list, optional\n            Numeric indices to apply (if `apply_indices` are not specified).\n        keep_remaining : bool, default: False\n            Whether drop the data that is not computed over or not.\n        broadcast_all : bool, default: True\n            Whether broadcast the whole axis of right frame to every\n            partition or just a subset of it.\n        new_index : pandas.Index, optional\n            Index of the result. We may know this in advance,\n            and if not provided it must be computed.\n        new_columns : pandas.Index, optional\n            Columns of the result. We may know this in advance,\n            and if not provided it must be computed.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        assert (\n            apply_indices is not None or numeric_indices is not None\n        ), \"Indices to apply must be specified!\"\n\n        if other is None:\n            if apply_indices is None:\n                apply_indices = self.get_axis(axis)[numeric_indices]\n            return self.apply_select_indices(\n                axis=axis,\n                func=func,\n                apply_indices=apply_indices,\n                keep_remaining=keep_remaining,\n                new_index=new_index,\n                new_columns=new_columns,\n            )\n\n        if numeric_indices is None:\n            old_index = self.index if axis else self.columns\n            numeric_indices = old_index.get_indexer_for(apply_indices)\n\n        dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)\n        broadcasted_dict = other._prepare_frame_to_broadcast(\n            axis, dict_indices, broadcast_all=broadcast_all\n        )\n        new_partitions = self._partition_mgr_cls.broadcast_apply_select_indices(\n            axis,\n            func,\n            self._partitions,\n            other._partitions,\n            dict_indices,\n            broadcasted_dict,\n            keep_remaining,\n        )\n        return self.__constructor__(\n            new_partitions,\n            index=new_index,\n            columns=new_columns,\n            pandas_backend=self._pandas_backend,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def broadcast_apply_full_axis(\n        self,\n        axis,\n        func,\n        other,\n        new_index=None,\n        new_columns=None,\n        apply_indices=None,\n        enumerate_partitions=False,\n        dtypes=None,\n        keep_partitioning=True,\n        num_splits=None,\n        sync_labels=True,\n        pass_axis_lengths_to_partitions=False,\n    ):\n        \"\"\"\n        Broadcast partitions of `other` Modin DataFrame and apply a function along full axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to apply over (0 - rows, 1 - columns).\n        func : callable\n            Function to apply.\n        other : PandasDataframe or list\n            Modin DataFrame(s) to broadcast.\n        new_index : list-like, optional\n            Index of the result. We may know this in advance,\n            and if not provided it must be computed.\n        new_columns : list-like, optional\n            Columns of the result. We may know this in\n            advance, and if not provided it must be computed.\n        apply_indices : list-like, optional\n            Indices of `axis ^ 1` to apply function over.\n        enumerate_partitions : bool, default: False\n            Whether pass partition index into applied `func` or not.\n            Note that `func` must be able to obtain `partition_idx` kwarg.\n        dtypes : list-like or scalar, optional\n            Data types of the result. This is an optimization\n            because there are functions that always result in a particular data\n            type, and allows us to avoid (re)computing it.\n        keep_partitioning : boolean, default: True\n            The flag to keep partition boundaries for Modin Frame if possible.\n            Setting it to True disables shuffling data from one partition to another in case the resulting\n            number of splits is equal to the initial number of splits.\n        num_splits : int, optional\n            The number of partitions to split the result into across the `axis`. If None, then the number\n            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`\n            then the number of splits is preserved.\n        sync_labels : boolean, default: True\n            Synchronize external indexes (`new_index`, `new_columns`) with internal indexes.\n            This could be used when you're certain that the indices in partitions are equal to\n            the provided hints in order to save time on syncing them.\n        pass_axis_lengths_to_partitions : bool, default: False\n            Whether pass partition lengths along `axis ^ 1` to the kernel `func`.\n            Note that `func` must be able to obtain `df, *axis_lengths`.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        if other is not None:\n            if not isinstance(other, list):\n                other = [other]\n            other = [o._extract_partitions() for o in other] if len(other) else None\n\n        if apply_indices is not None:\n            numeric_indices = self.get_axis(axis ^ 1).get_indexer_for(apply_indices)\n            apply_indices = self._get_dict_of_block_index(\n                axis ^ 1, numeric_indices\n            ).keys()\n\n        apply_func_args = None\n        if pass_axis_lengths_to_partitions:\n            if axis == 0:\n                apply_func_args = (\n                    self._column_widths_cache\n                    if self._column_widths_cache is not None\n                    else [part.width(materialize=False) for part in self._partitions[0]]\n                )\n            else:\n                apply_func_args = (\n                    self._row_lengths_cache\n                    if self._row_lengths_cache is not None\n                    else [\n                        part.length(materialize=False) for part in self._partitions.T[0]\n                    ]\n                )\n\n        new_partitions = self._partition_mgr_cls.broadcast_axis_partitions(\n            axis=axis,\n            left=self._partitions,\n            right=other,\n            apply_func=self._build_treereduce_func(axis, func),\n            apply_indices=apply_indices,\n            enumerate_partitions=enumerate_partitions,\n            keep_partitioning=keep_partitioning,\n            num_splits=num_splits,\n            apply_func_args=apply_func_args,\n        )\n        kw = {\"row_lengths\": None, \"column_widths\": None}\n        if isinstance(dtypes, str) and dtypes == \"copy\":\n            kw[\"dtypes\"] = self.copy_dtypes_cache()\n        elif isinstance(dtypes, DtypesDescriptor):\n            kw[\"dtypes\"] = ModinDtypes(dtypes)\n        elif dtypes is not None:\n            if isinstance(dtypes, (pandas.Series, ModinDtypes)):\n                kw[\"dtypes\"] = dtypes.copy()\n            else:\n                if new_columns is None:\n                    assert not is_list_like(dtypes)\n                    dtype = pandas.api.types.pandas_dtype(dtypes)\n                    kw[\"dtypes\"] = ModinDtypes(DtypesDescriptor(remaining_dtype=dtype))\n                else:\n                    kw[\"dtypes\"] = (\n                        pandas.Series(dtypes, index=new_columns)\n                        if is_list_like(dtypes)\n                        else pandas.Series(\n                            [pandas.api.types.pandas_dtype(dtypes)] * len(new_columns),\n                            index=new_columns,\n                        )\n                    )\n        is_index_materialized = ModinIndex.is_materialized_index(new_index)\n        is_columns_materialized = ModinIndex.is_materialized_index(new_columns)\n        if axis == 0:\n            if (\n                is_columns_materialized\n                and len(new_partitions.shape) > 1\n                and new_partitions.shape[1] == 1\n            ):\n                kw[\"column_widths\"] = [len(new_columns)]\n        elif axis == 1:\n            if is_index_materialized and new_partitions.shape[0] == 1:\n                kw[\"row_lengths\"] = [len(new_index)]\n        if not keep_partitioning:\n            if kw[\"row_lengths\"] is None and is_index_materialized:\n                if axis == 0:\n                    kw[\"row_lengths\"] = get_length_list(\n                        axis_len=len(new_index),\n                        num_splits=new_partitions.shape[0],\n                        min_block_size=MinRowPartitionSize.get(),\n                    )\n                elif axis == 1:\n                    if self._row_lengths_cache is not None and len(new_index) == sum(\n                        self._row_lengths_cache\n                    ):\n                        kw[\"row_lengths\"] = self._row_lengths_cache\n            if kw[\"column_widths\"] is None and is_columns_materialized:\n                if axis == 1:\n                    kw[\"column_widths\"] = get_length_list(\n                        axis_len=len(new_columns),\n                        num_splits=new_partitions.shape[1],\n                        min_block_size=MinColumnPartitionSize.get(),\n                    )\n                elif axis == 0:\n                    if self._column_widths_cache is not None and len(\n                        new_columns\n                    ) == sum(self._column_widths_cache):\n                        kw[\"column_widths\"] = self._column_widths_cache\n        else:\n            if axis == 0:\n                if (\n                    kw[\"row_lengths\"] is None\n                    and self._row_lengths_cache is not None\n                    and is_index_materialized\n                    and len(new_index) == sum(self._row_lengths_cache)\n                    # to avoid problems that may arise when filtering empty dataframes\n                    and all(r != 0 for r in self._row_lengths_cache)\n                ):\n                    kw[\"row_lengths\"] = self._row_lengths_cache\n            elif axis == 1:\n                if (\n                    kw[\"column_widths\"] is None\n                    and self._column_widths_cache is not None\n                    and is_columns_materialized\n                    and len(new_columns) == sum(self._column_widths_cache)\n                    # to avoid problems that may arise when filtering empty dataframes\n                    and all(w != 0 for w in self._column_widths_cache)\n                ):\n                    kw[\"column_widths\"] = self._column_widths_cache\n\n        result = self.__constructor__(\n            new_partitions,\n            index=new_index,\n            columns=new_columns,\n            **kw,\n            pandas_backend=self._pandas_backend,\n        )\n        if sync_labels and new_index is not None:\n            result.synchronize_labels(axis=0)\n        if sync_labels and new_columns is not None:\n            result.synchronize_labels(axis=1)\n        return result\n\n    def _check_if_axes_identical(self, other: PandasDataframe, axis: int = 0) -> bool:\n        \"\"\"\n        Check whether indices/partitioning along the specified `axis` are identical when compared with `other`.\n\n        Parameters\n        ----------\n        other : PandasDataframe\n            Dataframe to compare indices/partitioning with.\n        axis : int, default: 0\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        if self.has_axis_cache(axis) and other.has_axis_cache(axis):\n            self_cache, other_cache = self._get_axis_cache(axis), other._get_axis_cache(\n                axis\n            )\n            equal_indices = self_cache.equals(other_cache)\n            if equal_indices:\n                equal_lengths = self_cache.compare_partition_lengths_if_possible(\n                    other_cache\n                )\n                if isinstance(equal_lengths, bool):\n                    return equal_lengths\n                return self._get_axis_lengths(axis) == other._get_axis_lengths(axis)\n            return False\n        return self.get_axis(axis).equals(\n            other.get_axis(axis)\n        ) and self._get_axis_lengths(axis) == other._get_axis_lengths(axis)\n\n    def _copartition(\n        self, axis, other, how, sort=None, force_repartition=False, fill_value=None\n    ):\n        \"\"\"\n        Copartition two Modin DataFrames.\n\n        Perform aligning of partitions, index and partition blocks.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to copartition along (0 - rows, 1 - columns).\n        other : PandasDataframe\n            Other Modin DataFrame(s) to copartition against.\n        how : str\n            How to manage joining the index object (\"left\", \"right\", etc.).\n        sort : bool, default: None\n            Whether sort the joined index or not.\n            If ``None``, sort is defined in depend on labels equality along the axis.\n        force_repartition : bool, default: False\n            Whether force the repartitioning or not. By default,\n            this method will skip repartitioning if it is possible. This is because\n            reindexing is extremely inefficient. Because this method is used to\n            `join` or `append`, it is vital that the internal indices match.\n        fill_value : any, optional\n            Value to use for missing values.\n\n        Returns\n        -------\n        tuple\n            Tuple containing:\n                1) 2-d NumPy array of aligned left partitions\n                2) list of 2-d NumPy arrays of aligned right partitions\n                3) joined index along ``axis``, may be ``ModinIndex`` if not materialized\n                4) If materialized, list with sizes of partitions along axis that partitioning\n                   was done on, otherwise ``None``. This list will be empty if and only if all\n                   the frames are empty.\n        \"\"\"\n        if isinstance(other, type(self)):\n            other = [other]\n\n        if not force_repartition and all(\n            o._check_if_axes_identical(self, axis) for o in other\n        ):\n            return (\n                self._partitions,\n                [o._partitions for o in other],\n                self.copy_axis_cache(axis, copy_lengths=True),\n                self._get_axis_lengths_cache(axis),\n            )\n\n        if sort is None:\n            sort = not all(self.get_axis(axis).equals(o.get_axis(axis)) for o in other)\n\n        self_index = self.get_axis(axis)\n        others_index = [o.get_axis(axis) for o in other]\n        joined_index, make_reindexer = self._join_index_objects(\n            axis, [self_index] + others_index, how, sort, fill_value\n        )\n\n        frames = [self] + other\n        non_empty_frames_idx = [\n            i for i, o in enumerate(frames) if o._partitions.size != 0\n        ]\n\n        # If all frames are empty\n        if len(non_empty_frames_idx) == 0:\n            return (\n                self._partitions,\n                [o._partitions for o in other],\n                joined_index,\n                # There are no partition sizes because the resulting dataframe\n                # has no partitions.\n                [],\n            )\n\n        base_frame_idx = non_empty_frames_idx[0]\n        other_frames = frames[base_frame_idx + 1 :]\n\n        # Picking first non-empty frame\n        base_frame = frames[non_empty_frames_idx[0]]\n        base_index = base_frame.get_axis(axis)\n\n        # define conditions for reindexing and repartitioning `self` frame\n        do_reindex_base = not base_index.equals(joined_index)\n        do_repartition_base = force_repartition or do_reindex_base\n\n        # Perform repartitioning and reindexing for `base_frame` if needed.\n        # Also define length of base and frames. We will need to know the\n        # lengths for alignment.\n        if do_repartition_base:\n            reindexed_base = base_frame._partition_mgr_cls.map_axis_partitions(\n                axis,\n                base_frame._partitions,\n                make_reindexer(do_reindex_base, base_frame_idx),\n            )\n            if axis:\n                base_lengths = [obj.width() for obj in reindexed_base[0]]\n            else:\n                base_lengths = [obj.length() for obj in reindexed_base.T[0]]\n        else:\n            reindexed_base = base_frame._partitions\n            base_lengths = base_frame.column_widths if axis else base_frame.row_lengths\n\n        others_lengths = [o._get_axis_lengths(axis) for o in other_frames]\n\n        # define conditions for reindexing and repartitioning `other` frames\n        do_reindex_others = [\n            not o.get_axis(axis).equals(joined_index) for o in other_frames\n        ]\n\n        do_repartition_others = [None] * len(other_frames)\n        for i in range(len(other_frames)):\n            do_repartition_others[i] = (\n                force_repartition\n                or do_reindex_others[i]\n                or others_lengths[i] != base_lengths\n            )\n\n        # perform repartitioning and reindexing for `other_frames` if needed\n        reindexed_other_list = [None] * len(other_frames)\n        for i in range(len(other_frames)):\n            if do_repartition_others[i]:\n                # indices of others frame start from `base_frame_idx` + 1\n                reindexed_other_list[i] = other_frames[\n                    i\n                ]._partition_mgr_cls.map_axis_partitions(\n                    axis,\n                    other_frames[i]._partitions,\n                    make_reindexer(do_repartition_others[i], base_frame_idx + 1 + i),\n                    lengths=base_lengths,\n                )\n            else:\n                reindexed_other_list[i] = other_frames[i]._partitions\n        reindexed_frames = (\n            [frames[i]._partitions for i in range(base_frame_idx)]\n            + [reindexed_base]\n            + reindexed_other_list\n        )\n        return (reindexed_frames[0], reindexed_frames[1:], joined_index, base_lengths)\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def n_ary_op(\n        self,\n        op,\n        right_frames: list[PandasDataframe],\n        join_type=\"outer\",\n        sort=None,\n        copartition_along_columns=True,\n        labels=\"replace\",\n        dtypes: Optional[pandas.Series] = None,\n    ) -> PandasDataframe:\n        \"\"\"\n        Perform an n-opary operation by joining with other Modin DataFrame(s).\n\n        Parameters\n        ----------\n        op : callable\n            Function to apply after the join.\n        right_frames : list of PandasDataframe\n            Modin DataFrames to join with.\n        join_type : str, default: \"outer\"\n            Type of join to apply.\n        sort : bool, default: None\n            Whether to sort index and columns or not.\n        copartition_along_columns : bool, default: True\n            Whether to perform copartitioning along columns or not.\n            For some ops this isn't needed (e.g., `fillna`).\n        labels : {\"replace\", \"drop\"}, default: \"replace\"\n            Whether use labels from joined DataFrame or drop altogether to make\n            them be computed lazily later.\n        dtypes : pandas.Series, optional\n            Dtypes of the resultant dataframe, this argument will be\n            received if the resultant dtypes of n-opary operation is precomputed.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        left_parts, list_of_right_parts, joined_index, row_lengths = self._copartition(\n            0,\n            right_frames,\n            join_type,\n            sort=sort,\n        )\n        if copartition_along_columns:\n            new_left_frame = self.__constructor__(\n                left_parts,\n                joined_index,\n                self.copy_columns_cache(copy_lengths=True),\n                row_lengths,\n                self._column_widths_cache,\n                pandas_backend=self._pandas_backend,\n            )\n            new_right_frames = [\n                self.__constructor__(\n                    right_parts,\n                    joined_index,\n                    right_frame.copy_columns_cache(copy_lengths=True),\n                    row_lengths,\n                    right_frame._column_widths_cache,\n                    pandas_backend=self._pandas_backend,\n                )\n                for right_parts, right_frame in zip(list_of_right_parts, right_frames)\n            ]\n\n            (\n                left_parts,\n                list_of_right_parts,\n                joined_columns,\n                column_widths,\n            ) = new_left_frame._copartition(\n                1,\n                new_right_frames,\n                join_type,\n                sort=sort,\n            )\n        else:\n            joined_columns = self.copy_columns_cache(copy_lengths=True)\n            column_widths = self._column_widths_cache\n\n        new_frame = (\n            np.array([])\n            if len(left_parts) == 0\n            or any(len(right_parts) == 0 for right_parts in list_of_right_parts)\n            else self._partition_mgr_cls.n_ary_operation(\n                left_parts, op, list_of_right_parts\n            )\n        )\n        if labels == \"drop\":\n            joined_index = joined_columns = row_lengths = column_widths = None\n\n        return self.__constructor__(\n            new_frame,\n            joined_index,\n            joined_columns,\n            row_lengths,\n            column_widths,\n            dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def concat(\n        self,\n        axis: Union[int, Axis],\n        others: Union[PandasDataframe, List[PandasDataframe]],\n        how,\n        sort,\n    ) -> PandasDataframe:\n        \"\"\"\n        Concatenate `self` with one or more other Modin DataFrames.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            Axis to concatenate over.\n        others : list\n            List of Modin DataFrames to concatenate with.\n        how : str\n            Type of join to use for the axis.\n        sort : bool\n            Whether sort the result or not.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        axis = Axis(axis)\n        new_widths = None\n        new_lengths = None\n\n        def _compute_new_widths():\n            widths = None\n            if self._column_widths_cache is not None and all(\n                o._column_widths_cache is not None for o in others\n            ):\n                widths = self._column_widths_cache + [\n                    width for o in others for width in o._column_widths_cache\n                ]\n            return widths\n\n        # Fast path for equivalent columns and partitioning\n        if axis == Axis.ROW_WISE and all(\n            o._check_if_axes_identical(self, axis=1) for o in others\n        ):\n            joined_index = self.copy_columns_cache(copy_lengths=True)\n            left_parts = self._partitions\n            right_parts = [o._partitions for o in others]\n            new_widths = self._column_widths_cache\n        elif axis == Axis.COL_WISE and all(\n            o._check_if_axes_identical(self, axis=0) for o in others\n        ):\n            joined_index = self.copy_index_cache(copy_lengths=True)\n            left_parts = self._partitions\n            right_parts = [o._partitions for o in others]\n            new_lengths = self._row_lengths_cache\n            # we can only do this for COL_WISE because `concat` might rebalance partitions for ROW_WISE\n            new_widths = _compute_new_widths()\n        else:\n            (\n                left_parts,\n                right_parts,\n                joined_index,\n                partition_sizes_along_axis,\n            ) = self._copartition(\n                axis.value ^ 1, others, how, sort=sort, force_repartition=False\n            )\n            if axis == Axis.COL_WISE:\n                new_lengths = partition_sizes_along_axis\n                new_widths = _compute_new_widths()\n            else:\n                new_widths = partition_sizes_along_axis\n        new_partitions, new_lengths2 = self._partition_mgr_cls.concat(\n            axis.value, left_parts, right_parts\n        )\n        if new_lengths is None:\n            new_lengths = new_lengths2\n        new_dtypes = None\n        new_index = None\n        new_columns = None\n        if axis == Axis.ROW_WISE:\n            if all(obj.has_materialized_index for obj in (self, *others)):\n                new_index = self.index.append([other.index for other in others])\n            new_columns = joined_index\n            frames = [self] + others\n            # TODO: should we wrap all `concat` call into \"try except\" block?\n            # `ModinDtypes.concat` can throw exception in case of duplicate values\n            new_dtypes = ModinDtypes.concat([frame._dtypes for frame in frames], axis=1)\n            # If we have already cached the length of each row in at least one\n            # of the row's partitions, we can build new_lengths for the new\n            # frame. Typically, if we know the length for any partition in a\n            # row, we know the length for the first partition in the row. So\n            # just check the lengths of the first column of partitions.\n            if not new_lengths:\n                new_lengths = []\n                if new_partitions.size > 0:\n                    if all(\n                        part._length_cache is not None for part in new_partitions.T[0]\n                    ):\n                        new_lengths = self._get_lengths(new_partitions.T[0], axis)\n                    else:\n                        new_lengths = None\n        else:\n            if all(obj.has_materialized_columns for obj in (self, *others)):\n                new_columns = self.columns.append([other.columns for other in others])\n            new_index = joined_index\n            try:\n                new_dtypes = ModinDtypes.concat(\n                    [self.copy_dtypes_cache()] + [o.copy_dtypes_cache() for o in others]\n                )\n            except NotImplementedError:\n                new_dtypes = None\n            # If we have already cached the width of each column in at least one\n            # of the column's partitions, we can build new_widths for the new\n            # frame. Typically, if we know the width for any partition in a\n            # column, we know the width for the first partition in the column.\n            # So just check the widths of the first row of partitions.\n            if not new_widths:\n                new_widths = []\n                if new_partitions.size > 0:\n                    if all(part._width_cache is not None for part in new_partitions[0]):\n                        new_widths = self._get_lengths(new_partitions[0], axis)\n                    else:\n                        new_widths = None\n\n        return self.__constructor__(\n            new_partitions,\n            new_index,\n            new_columns,\n            new_lengths,\n            new_widths,\n            new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    def _apply_func_to_range_partitioning_broadcast(\n        self,\n        right,\n        func,\n        key,\n        new_index=None,\n        new_columns=None,\n        new_dtypes: Optional[Union[ModinDtypes, pandas.Series]] = None,\n    ):\n        \"\"\"\n        Apply `func` against two dataframes using range-partitioning implementation.\n\n        The method first builds range-partitioning for both dataframes using the data from\n        `self[key]`, after that, it applies `func` row-wise to `self` frame and\n        broadcasts row-parts of `right` to `self`.\n\n        Parameters\n        ----------\n        right : PandasDataframe\n        func : callable(left : pandas.DataFrame, right : pandas.DataFrame) -> pandas.DataFrame\n        key : list of labels\n            Columns to use to build range-partitioning. Must present in both dataframes.\n        new_index : pandas.Index, optional\n            Index values to write to the result's cache.\n        new_columns : pandas.Index, optional\n            Column values to write to the result's cache.\n        new_dtypes : pandas.Series or ModinDtypes, optional\n            Dtype values to write to the result's cache.\n\n        Returns\n        -------\n        PandasDataframe\n        \"\"\"\n        if self._partitions.shape[0] == 1:\n            result = self.broadcast_apply_full_axis(\n                axis=1,\n                func=func,\n                new_columns=new_columns,\n                dtypes=new_dtypes,\n                other=right,\n            )\n            return result\n\n        if not isinstance(key, list):\n            key = [key]\n\n        shuffling_functions = ShuffleSortFunctions(\n            self,\n            key,\n            ascending=True,\n            ideal_num_new_partitions=self._partitions.shape[0],\n        )\n\n        # here we want to get indices of those partitions that hold the key columns\n        key_indices = self.columns.get_indexer_for(key)\n        partition_indices = np.unique(\n            np.digitize(key_indices, np.cumsum(self.column_widths))\n        )\n\n        new_partitions = self._partition_mgr_cls.shuffle_partitions(\n            self._partitions,\n            partition_indices,\n            shuffling_functions,\n            func,\n            right_partitions=right._partitions,\n        )\n\n        return self.__constructor__(\n            new_partitions,\n            index=new_index,\n            columns=new_columns,\n            dtypes=new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def groupby(\n        self,\n        axis: Union[int, Axis],\n        internal_by: List[str],\n        external_by: List[PandasDataframe],\n        by_positions: List[int],\n        operator: Callable,\n        result_schema: Optional[Dict[Hashable, type]] = None,\n        align_result_columns: bool = False,\n        series_groupby: bool = False,\n        add_missing_cats: bool = False,\n        **kwargs: dict,\n    ) -> PandasDataframe:\n        \"\"\"\n        Generate groups based on values in the input column(s) and perform the specified operation on each.\n\n        Parameters\n        ----------\n        axis : int or modin.core.dataframe.base.utils.Axis\n            The axis to apply the grouping over.\n        internal_by : list of strings\n            One or more column labels from the `self` dataframe to use for grouping.\n        external_by : list of PandasDataframes\n            PandasDataframes to group by (may be specified along with or without `internal_by`).\n        by_positions : list of ints\n            Specifies the order of grouping by `internal_by` and `external_by` columns.\n            Each element in `by_positions` specifies an index from either `external_by` or `internal_by`.\n            Indices for `external_by` are positive and start from 0. Indices for `internal_by` are negative\n            and start from -1 (so in order to convert them to a valid indices one should do ``-idx - 1``).\n            '''\n            by_positions = [0, -1, 1, -2, 2, 3]\n            internal_by = [\"col1\", \"col2\"]\n            external_by = [sr1, sr2, sr3, sr4]\n\n            df.groupby([sr1, \"col1\", sr2, \"col2\", sr3, sr4])\n            '''.\n        operator : callable(pandas.core.groupby.DataFrameGroupBy) -> pandas.DataFrame\n            The operation to carry out on each of the groups. The operator is another\n            algebraic operator with its own user-defined function parameter, depending\n            on the output desired by the user.\n        result_schema : dict, optional\n            Mapping from column labels to data types that represents the types of the output dataframe.\n        align_result_columns : bool, default: False\n            Whether to manually align columns between all the resulted row partitions.\n            This flag is helpful when dealing with UDFs as they can change the partition's shape\n            and labeling unpredictably, resulting in an invalid dataframe.\n        series_groupby : bool, default: False\n            Whether to convert a one-column DataFrame to a Series before performing groupby.\n        add_missing_cats : bool, default: False\n            Whether to add missing categories from `by` columns to the result.\n        **kwargs : dict\n            Additional arguments to pass to the ``df.groupby`` method (besides the 'by' argument).\n\n        Returns\n        -------\n        PandasDataframe\n            A new PandasDataframe containing the groupings specified, with the operator\n                applied to each group.\n\n        Notes\n        -----\n        No communication between groups is allowed in this algebra implementation.\n\n        The number of rows (columns if axis=1) returned by the user-defined function\n        passed to the groupby may be at most the number of rows in the group, and\n        may be as small as a single row.\n\n        Unlike the pandas API, an intermediate \"GROUP BY\" object is not present in this\n        algebra implementation.\n        \"\"\"\n        axis = Axis(axis)\n        if axis != Axis.ROW_WISE:\n            raise NotImplementedError(\n                f\"Algebra groupby only implemented row-wise. {axis.name} axis groupby not implemented yet!\"\n            )\n\n        has_external_grouper = len(external_by) > 0\n        skip_on_aligning_flag = \"__skip_me_on_aligning__\"\n        duplicated_suffix = \"__duplicated_suffix__\"\n        duplicated_pattern = r\"_[\\d]*__duplicated_suffix__\"\n        kwargs[\"observed\"] = True\n        level = kwargs.get(\"level\")\n\n        if level is not None and not isinstance(level, list):\n            level = [level]\n\n        def apply_func(df):  # pragma: no cover\n            if has_external_grouper:\n                external_grouper = df[\"grouper\"]\n                external_grouper = [\n                    # `df.groupby()` can only take a list of Series'es, so splitting\n                    # the df into a list of individual Series'es\n                    external_grouper.iloc[:, i]\n                    for i in range(len(external_grouper.columns))\n                ]\n\n                # renaming 'None' and duplicated names back to their original names\n                for obj in external_grouper:\n                    if not isinstance(obj, pandas.Series):\n                        continue\n                    name = obj.name\n                    if isinstance(name, str):\n                        if name.startswith(MODIN_UNNAMED_SERIES_LABEL):\n                            name = None\n                        elif name.endswith(duplicated_suffix):\n                            name = re.sub(duplicated_pattern, \"\", name)\n                    elif isinstance(name, tuple):\n                        if name[-1].endswith(duplicated_suffix):\n                            name = (\n                                *name[:-1],\n                                re.sub(duplicated_pattern, \"\", name[-1]),\n                            )\n                    obj.name = name\n\n                df = df[\"data\"]\n            else:\n                external_grouper = []\n\n            by = []\n            # restoring original order of 'by' columns\n            for idx in by_positions:\n                if idx >= 0:\n                    by.append(external_grouper[idx])\n                else:\n                    by.append(internal_by[-idx - 1])\n\n            if series_groupby:\n                df = df.squeeze(axis=1)\n\n            if kwargs.get(\"level\") is not None:\n                assert len(by) == 0\n                # passing an empty list triggers an error\n                by = None\n\n            result = operator(df.groupby(by, **kwargs))\n\n            if align_result_columns and df.empty and result.empty:\n                # We want to align columns only of those frames that actually performed\n                # some groupby aggregation, if an empty frame was originally passed\n                # (an empty bin on reshuffling was created) then there were no groupby\n                # executed over this partition and so it has incorrect columns\n                # that shouldn't be considered on the aligning phase\n                result.attrs[skip_on_aligning_flag] = True\n            return result\n\n        if has_external_grouper:\n            grouper = (\n                external_by[0]\n                if len(external_by) == 1\n                else external_by[0].concat(\n                    axis=1, others=external_by[1:], how=\"left\", sort=False\n                )\n            )\n\n            new_grouper_cols = []\n            columns_were_changed = False\n            same_columns = {}\n            # duplicated names break range-partitioning mechanism, so renaming them.\n            # original names will be reverted in the actual groupby kernel\n            for col in grouper.columns:\n                suffix = same_columns.get(col)\n                if suffix is None:\n                    same_columns[col] = 0\n                else:\n                    same_columns[col] += 1\n                    col = (\n                        (*col[:-1], f\"{col[-1]}_{suffix}{duplicated_suffix}\")\n                        if isinstance(col, tuple)\n                        else f\"{col}_{suffix}{duplicated_suffix}\"\n                    )\n                    columns_were_changed = True\n                new_grouper_cols.append(col)\n\n            if columns_were_changed:\n                grouper.columns = pandas.Index(new_grouper_cols)\n            grouper_key_columns = grouper.columns\n            data = self\n            data_key_columns = internal_by\n        else:\n            grouper = self\n            grouper_key_columns = internal_by\n            data, data_key_columns = None, None\n\n        result = grouper._apply_func_to_range_partitioning(\n            key_columns=grouper_key_columns,\n            func=apply_func,\n            data=data,\n            data_key_columns=data_key_columns,\n            level=level,\n        )\n        # no need aligning columns if there's only one row partition\n        if add_missing_cats or align_result_columns and result._partitions.shape[0] > 1:\n            # FIXME: the current reshuffling implementation guarantees us that there's only one column\n            # partition in the result, so we should never hit this exception for now, however\n            # in the future, we might want to make this implementation more broader\n            if result._partitions.shape[1] > 1:\n                raise NotImplementedError(\n                    \"Aligning columns is not yet implemented for multiple column partitions.\"\n                )\n\n            # There're two implementations:\n            #   1. The first one work faster, but may stress the network a lot in cluster mode since\n            #      it gathers all the dataframes in a single ray-kernel.\n            #   2. The second one works slower, but only gathers light pandas.Index objects,\n            #      so there should be less stress on the network.\n            if add_missing_cats or not IsRayCluster.get():\n                if self.has_materialized_dtypes:\n                    original_dtypes = pandas.Series(\n                        {\n                            # lazy proxies hold a reference to another modin's DataFrame which can be\n                            # a problem during serialization, in this scenario we don't need actual\n                            # categorical values, so a \"category\" string will be enough\n                            name: (\n                                \"category\"\n                                if isinstance(dtype, LazyProxyCategoricalDtype)\n                                else dtype\n                            )\n                            for name, dtype in self.dtypes.items()\n                        }\n                    )\n                else:\n                    original_dtypes = None\n\n                def compute_aligned_columns(*dfs, initial_columns=None, by=None):\n                    \"\"\"Take row partitions, filter empty ones, and return joined columns for them.\"\"\"\n                    if align_result_columns:\n                        valid_dfs = [\n                            df\n                            for df in dfs\n                            if not df.attrs.get(skip_on_aligning_flag, False)\n                        ]\n\n                        if len(valid_dfs) == 0 and len(dfs) != 0:\n                            valid_dfs = dfs\n\n                        # Using '.concat()' on empty-slices instead of 'Index.join()'\n                        # in order to get identical behavior to pandas when it joins\n                        # results of different groups\n                        combined_cols = pandas.concat(\n                            [df.iloc[:0] for df in valid_dfs], axis=0, join=\"outer\"\n                        ).columns\n                    else:\n                        combined_cols = dfs[0].columns\n\n                    masks = None\n                    if add_missing_cats:\n                        masks, combined_cols = add_missing_categories_to_groupby(\n                            dfs,\n                            by,\n                            operator,\n                            initial_columns,\n                            combined_cols,\n                            is_udf_agg=align_result_columns,\n                            kwargs=kwargs.copy(),\n                            initial_dtypes=original_dtypes,\n                        )\n                    return (\n                        (combined_cols, masks)\n                        if align_result_columns\n                        else (None, masks)\n                    )\n\n                external_by_cols = [\n                    None if col.startswith(MODIN_UNNAMED_SERIES_LABEL) else col\n                    for obj in external_by\n                    for col in obj.columns\n                ]\n                by = []\n                # restoring original order of 'by' columns\n                for idx in by_positions:\n                    if idx >= 0:\n                        by.append(external_by_cols[idx])\n                    else:\n                        by.append(internal_by[-idx - 1])\n\n                # Passing all partitions to the 'compute_aligned_columns' kernel to get\n                # aligned columns\n                parts = result._partitions.flatten()\n                aligned_columns = parts[0].apply(\n                    compute_aligned_columns,\n                    *[part._data for part in parts[1:]],\n                    initial_columns=pandas.Index(external_by_cols).append(self.columns),\n                    by=by,\n                )\n\n                def apply_aligned(df, args, partition_idx):\n                    combined_cols, mask = args\n                    if mask is not None and mask.get(partition_idx) is not None:\n                        values = mask[partition_idx]\n\n                        original_names = df.index.names\n                        # TODO: inserting 'values' based on 'searchsorted' result might be more efficient\n                        # in cases of small amount of 'values'\n                        df = pandas.concat([df, values])\n                        if kwargs[\"sort\"]:\n                            df = df.sort_index(axis=0)\n                        df.index.names = original_names\n                    if combined_cols is not None:\n                        df = df.reindex(columns=combined_cols)\n                    return df\n\n                # Lazily applying aligned columns to partitions\n                new_partitions = self._partition_mgr_cls.lazy_map_partitions(\n                    result._partitions,\n                    apply_aligned,\n                    func_args=(aligned_columns._data,),\n                    enumerate_partitions=True,\n                )\n            else:\n\n                def join_cols(df, *cols):\n                    \"\"\"Join `cols` and apply the joined columns to `df`.\"\"\"\n                    valid_cols = [\n                        pandas.DataFrame(columns=col) for col in cols if col is not None\n                    ]\n                    if len(valid_cols) == 0:\n                        return df\n                    # Using '.concat()' on empty-slices instead of 'Index.join()'\n                    # in order to get identical behavior to pandas when it joins\n                    # results of different groups\n                    result_col = pandas.concat(valid_cols, axis=0, join=\"outer\").columns\n                    return df.reindex(columns=result_col)\n\n                # Getting futures for columns of non-empty partitions\n                cols = [\n                    part.apply(\n                        lambda df: (\n                            None\n                            if df.attrs.get(skip_on_aligning_flag, False)\n                            else df.columns\n                        )\n                    )._data\n                    for part in result._partitions.flatten()\n                ]\n\n                # Lazily joining and applying the aligned columns\n                new_partitions = self._partition_mgr_cls.lazy_map_partitions(\n                    result._partitions,\n                    join_cols,\n                    func_args=cols,\n                )\n            result = self.__constructor__(\n                new_partitions,\n                index=result.copy_index_cache(),\n                row_lengths=result._row_lengths_cache,\n                pandas_backend=self._pandas_backend,\n            )\n\n        if (\n            not result.has_materialized_index\n            and not has_external_grouper\n            and level is None\n        ):\n            by_dtypes = ModinDtypes(self._dtypes).lazy_get(internal_by)\n            if by_dtypes.is_materialized:\n                new_index = ModinIndex(value=result, axis=0, dtypes=by_dtypes)\n                result.set_index_cache(new_index)\n\n        if result_schema is not None:\n            new_dtypes = pandas.Series(result_schema)\n\n            result.set_dtypes_cache(new_dtypes)\n            result.set_columns_cache(new_dtypes.index)\n\n        return result\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def groupby_reduce(\n        self,\n        axis,\n        by,\n        map_func,\n        reduce_func,\n        new_index=None,\n        new_columns=None,\n        apply_indices=None,\n    ):\n        \"\"\"\n        Groupby another Modin DataFrame dataframe and aggregate the result.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to groupby and aggregate over.\n        by : PandasDataframe or None\n            A Modin DataFrame to group by.\n        map_func : callable\n            Map component of the aggregation.\n        reduce_func : callable\n            Reduce component of the aggregation.\n        new_index : pandas.Index, optional\n            Index of the result. We may know this in advance,\n            and if not provided it must be computed.\n        new_columns : pandas.Index, optional\n            Columns of the result. We may know this in advance,\n            and if not provided it must be computed.\n        apply_indices : list-like, optional\n            Indices of `axis ^ 1` to apply groupby over.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        by_parts = by if by is None else by._partitions\n        if by is None:\n            self._propagate_index_objs(axis=0)\n\n        if apply_indices is not None:\n            numeric_indices = self.get_axis(axis ^ 1).get_indexer_for(apply_indices)\n            apply_indices = list(\n                self._get_dict_of_block_index(axis ^ 1, numeric_indices).keys()\n            )\n\n        if by_parts is not None:\n            # inplace operation\n            if by_parts.shape[axis] != self._partitions.shape[axis]:\n                self._filter_empties(compute_metadata=False)\n        new_partitions = self._partition_mgr_cls.groupby_reduce(\n            axis, self._partitions, by_parts, map_func, reduce_func, apply_indices\n        )\n        return self.__constructor__(\n            new_partitions,\n            index=new_index,\n            columns=new_columns,\n            pandas_backend=self._pandas_backend,\n        )\n\n    @classmethod\n    def from_pandas(cls, df):\n        \"\"\"\n        Create a Modin DataFrame from a pandas DataFrame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            A pandas DataFrame.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        new_index = df.index\n        new_columns = df.columns\n        new_dtypes = df.dtypes\n        new_frame, pandas_backend, new_lengths, new_widths = (\n            cls._partition_mgr_cls.from_pandas(df, True)\n        )\n        return cls(\n            new_frame,\n            new_index,\n            new_columns,\n            new_lengths,\n            new_widths,\n            dtypes=new_dtypes,\n            pandas_backend=pandas_backend,\n        )\n\n    @classmethod\n    def from_arrow(cls, at):\n        \"\"\"\n        Create a Modin DataFrame from an Arrow Table.\n\n        Parameters\n        ----------\n        at : pyarrow.table\n            Arrow Table.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        new_frame, pandas_backend, new_lengths, new_widths = (\n            cls._partition_mgr_cls.from_arrow(at, return_dims=True)\n        )\n        new_columns = Index.__new__(Index, data=at.column_names, dtype=\"O\")\n        new_index = Index.__new__(RangeIndex, data=range(at.num_rows))\n        new_dtypes = pandas.Series(\n            [cls._arrow_type_to_dtype(col.type) for col in at.columns],\n            index=at.column_names,\n        )\n        return cls(\n            partitions=new_frame,\n            index=new_index,\n            columns=new_columns,\n            row_lengths=new_lengths,\n            column_widths=new_widths,\n            dtypes=new_dtypes,\n            pandas_backend=pandas_backend,\n        )\n\n    @classmethod\n    def _arrow_type_to_dtype(cls, arrow_type):\n        \"\"\"\n        Convert an arrow data type to a pandas data type.\n\n        Parameters\n        ----------\n        arrow_type : arrow dtype\n            Arrow data type to be converted to a pandas data type.\n\n        Returns\n        -------\n        object\n            Any dtype compatible with pandas.\n        \"\"\"\n        import pyarrow\n\n        try:\n            # TODO: should we map arrow types to pyarrow-backed pandas types?\n            # It seems like this might help avoid the expense of transferring\n            # data between backends (numpy and pyarrow), but we need to be sure\n            # how this fits into the type inference system in pandas.\n            res = arrow_type.to_pandas_dtype()\n        # Conversion to pandas is not implemented for some arrow types,\n        # perform manual conversion for them:\n        except NotImplementedError:\n            if pyarrow.types.is_time(arrow_type):\n                res = np.dtype(datetime.time)\n            else:\n                raise\n\n        if not isinstance(res, (np.dtype, str)):\n            return np.dtype(res)\n        return res\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def to_pandas(self):\n        \"\"\"\n        Convert this Modin DataFrame to a pandas DataFrame.\n\n        Returns\n        -------\n        pandas.DataFrame\n        \"\"\"\n        df = self._partition_mgr_cls.to_pandas(self._partitions)\n        if df.empty:\n            df = pandas.DataFrame(columns=self.columns, index=self.index)\n            if len(df.columns) and self.has_materialized_dtypes:\n                df = df.astype(self.dtypes)\n        else:\n            for axis, has_external_index in enumerate(\n                [\"has_materialized_index\", \"has_materialized_columns\"]\n            ):\n                # no need to check external and internal axes since in that case\n                # external axes will be computed from internal partitions\n                if getattr(self, has_external_index):\n                    external_index = self.columns if axis else self.index\n                    ErrorMessage.catch_bugs_and_request_email(\n                        not df.axes[axis].equals(external_index),\n                        f\"Internal and external indices on axis {axis} do not match.\",\n                    )\n                    # have to do this in order to assign some potentially missing metadata,\n                    # the ones that were set to the external index but were never propagated\n                    # into the internal ones\n                    df = df.set_axis(axis=axis, labels=external_index, copy=False)\n\n        return df\n\n    def to_numpy(self, **kwargs):\n        \"\"\"\n        Convert this Modin DataFrame to a NumPy array.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Additional keyword arguments to be passed in `to_numpy`.\n\n        Returns\n        -------\n        np.ndarray\n        \"\"\"\n        arr = self._partition_mgr_cls.to_numpy(self._partitions, **kwargs)\n        ErrorMessage.catch_bugs_and_request_email(\n            self.has_materialized_index\n            and len(arr) != len(self.index)\n            or self.has_materialized_columns\n            and len(arr[0]) != len(self.columns)\n        )\n        return arr\n\n    @lazy_metadata_decorator(apply_axis=None, transpose=True)\n    def transpose(self):\n        \"\"\"\n        Transpose the index and columns of this Modin DataFrame.\n\n        Reflect this Modin DataFrame over its main diagonal\n        by writing rows as columns and vice-versa.\n\n        Returns\n        -------\n        PandasDataframe\n            New Modin DataFrame.\n        \"\"\"\n        new_partitions = self._partition_mgr_cls.lazy_map_partitions(\n            self._partitions, lambda df: df.T\n        ).T\n        if self.has_materialized_dtypes:\n            new_dtypes = pandas.Series(\n                np.full(len(self.index), find_common_type(self.dtypes.values)),\n                index=self.index,\n            )\n        else:\n            new_dtypes = None\n        return self.__constructor__(\n            new_partitions,\n            self.copy_columns_cache(copy_lengths=True),\n            self.copy_index_cache(copy_lengths=True),\n            self._column_widths_cache,\n            self._row_lengths_cache,\n            dtypes=new_dtypes,\n            pandas_backend=self._pandas_backend,\n        )\n\n    @lazy_metadata_decorator(apply_axis=\"both\")\n    def finalize(self):\n        \"\"\"\n        Perform all deferred calls on partitions.\n\n        This makes `self` Modin Dataframe independent of a history of queries\n        that were used to build it.\n        \"\"\"\n        self._partition_mgr_cls.finalize(self._partitions)\n\n    def wait_computations(self):\n        \"\"\"Wait for all computations to complete without materializing data.\"\"\"\n        self._partition_mgr_cls.wait_partitions(self._partitions.flatten())\n\n    def support_materialization_in_worker_process(self) -> bool:\n        \"\"\"\n        Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return True\n\n    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):\n        \"\"\"\n        Get a Modin DataFrame that implements the dataframe exchange protocol.\n\n        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.\n\n        Parameters\n        ----------\n        nan_as_null : bool, default: False\n            A keyword intended for the consumer to tell the producer\n            to overwrite null values in the data with ``NaN`` (or ``NaT``).\n            This currently has no effect; once support for nullable extension\n            dtypes is added, this value should be propagated to columns.\n        allow_copy : bool, default: True\n            A keyword that defines whether or not the library is allowed\n            to make a copy of the data. For example, copying data would be necessary\n            if a library supports strided buffers, given that this protocol\n            specifies contiguous buffers. Currently, if the flag is set to ``False``\n            and a copy is needed, a ``RuntimeError`` will be raised.\n\n        Returns\n        -------\n        ProtocolDataframe\n            A dataframe object following the dataframe protocol specification.\n        \"\"\"\n        from modin.core.dataframe.pandas.interchange.dataframe_protocol.dataframe import (\n            PandasProtocolDataframe,\n        )\n\n        return PandasProtocolDataframe(\n            self, nan_as_null=nan_as_null, allow_copy=allow_copy\n        )\n\n    @classmethod\n    def from_interchange_dataframe(cls, df: ProtocolDataframe) -> PandasDataframe:\n        \"\"\"\n        Convert a DataFrame implementing the dataframe exchange protocol to a Core Modin Dataframe.\n\n        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.\n\n        Parameters\n        ----------\n        df : ProtocolDataframe\n            The DataFrame object supporting the dataframe exchange protocol.\n\n        Returns\n        -------\n        PandasDataframe\n            A new Core Modin Dataframe object.\n        \"\"\"\n        if type(df) is cls:\n            return df\n\n        if not hasattr(df, \"__dataframe__\"):\n            raise ValueError(\n                \"`df` does not support DataFrame exchange protocol, i.e. `__dataframe__` method\"\n            )\n\n        from modin.core.dataframe.pandas.interchange.dataframe_protocol.from_dataframe import (\n            from_dataframe_to_pandas,\n        )\n\n        ErrorMessage.default_to_pandas(message=\"`from_dataframe`\")\n        pandas_df = from_dataframe_to_pandas(df)\n        return cls.from_pandas(pandas_df)\n\n    def case_when(self, caselist):\n        \"\"\"\n        Replace values where the conditions are True.\n\n        This is Series.case_when() implementation and, thus, it's designed to work\n        only with single-column DataFrames.\n\n        Parameters\n        ----------\n        caselist : list of tuples\n\n        Returns\n        -------\n        PandasDataframe\n        \"\"\"\n        # The import is here to avoid an incorrect module initialization when running tests.\n        # This module is loaded before `pytest_configure()` is called. If `pytest_configure()`\n        # changes the engine, the `remote_function` decorator will not be valid.\n        from modin.core.execution.utils import remote_function\n\n        @remote_function\n        def remote_fn(df, name, caselist):  # pragma: no cover\n            caselist = [\n                tuple(\n                    (\n                        data.squeeze(axis=1)\n                        if isinstance(data, pandas.DataFrame)\n                        else data\n                    )\n                    for data in case_tuple\n                )\n                for case_tuple in caselist\n            ]\n            return pandas.DataFrame({name: df.squeeze(axis=1).case_when(caselist)})\n\n        cls = type(self)\n        use_map = True\n        is_trivial_idx = None\n        name = self.columns[0]\n        # Lists of modin frames: first for conditions, second for replacements\n        modin_lists = [[], []]\n        # Fill values for conditions and replacements respectively\n        fill_values = [True, None]\n        new_caselist = []\n        for case_tuple in caselist:\n            new_case = []\n            for data, modin_list, fill_value in zip(\n                case_tuple, modin_lists, fill_values\n            ):\n                if isinstance(data, cls):\n                    modin_list.append(data)\n                elif callable(data):\n                    data = remote_function(data)\n                elif isinstance(data, pandas.Series):\n                    use_map = False\n                    if is_trivial_idx is None:\n                        self_idx = self.index\n                        length = len(self_idx)\n                        is_trivial_idx = is_trivial_index(self_idx)\n                    if is_trivial_idx and is_trivial_index(data.index):\n                        data = data[:length]\n                        diff = length - len(data)\n                        if diff > 0:\n                            data = pandas.concat(\n                                [data, pandas.Series([fill_value] * diff)],\n                                ignore_index=True,\n                            )\n                    else:\n                        data = data.reindex(self_idx, fill_value=fill_value)\n                elif use_map and is_list_like(data):\n                    use_map = False\n                new_case.append(data)\n            new_caselist.append(tuple(new_case))\n\n        if modin_lists[0] or modin_lists[1]:\n            # Copartition modin frames\n            use_map = False\n            columns = self.columns\n            column_widths = [1]\n            for modin_list, fill_value in zip(modin_lists, fill_values):\n                _, list_of_right_parts, joined_index, row_lengths = self._copartition(\n                    Axis.ROW_WISE.value,\n                    modin_list,\n                    how=\"left\",\n                    sort=False,\n                    fill_value=fill_value,\n                )\n                modin_list.clear()\n                modin_list.extend(\n                    self.__constructor__(\n                        part,\n                        joined_index,\n                        columns,\n                        row_lengths,\n                        column_widths,\n                        pandas_backend=self._pandas_backend,\n                    )\n                    for part in list_of_right_parts\n                )\n\n            # Replace modin frames with copartitioned\n            caselist = new_caselist\n            new_caselist = []\n            for i in range(2):\n                modin_lists[i] = iter(modin_lists[i])\n            for case_tuple in caselist:\n                new_case = tuple(\n                    next(modin_list) if isinstance(data, cls) else data\n                    for data, modin_list in zip(case_tuple, modin_lists)\n                )\n                new_caselist.append(new_case)\n\n        # If all the conditions are callable and the replacements are either\n        # callable or scalar, use map().\n        if use_map:\n            return self.map(func=remote_fn, func_args=[name, new_caselist], lazy=True)\n\n        # Get the chunk of data corresponding the the specified partition\n        def map_data(\n            part_idx,\n            part_len,\n            data,\n            data_offset,\n            fill_value,\n        ):\n            if isinstance(data, cls):\n                return data._partitions[part_idx][0]._data\n            if isinstance(data, pandas.Series):\n                return data[data_offset : data_offset + part_len]\n            return (\n                data[data_offset : data_offset + part_len]\n                if is_list_like(data)\n                else data\n            )\n\n        parts = [p[0] for p in self._partitions]\n        lengths = self.row_lengths\n        new_parts = []\n        data_offset = 0\n\n        # Split the data and apply the remote function to each partition\n        # with the corresponding chunk of data\n        for i, part, part_len in zip(range(len(parts)), parts, lengths):\n            cases = [\n                tuple(\n                    map_data(i, part_len, data, data_offset, fill_value)\n                    for data, fill_value in zip(c, (True, None))\n                )\n                for c in new_caselist\n            ]\n            new_parts.append(\n                part.add_to_apply_calls(\n                    remote_fn,\n                    name,\n                    cases,\n                    length=part_len,\n                    width=1,\n                )\n            )\n            data_offset += part_len\n        new_parts = np.array([[p] for p in new_parts])\n        return self.__constructor__(\n            new_parts,\n            columns=self.columns,\n            index=self.index,\n            row_lengths=lengths,\n            column_widths=[1],\n            pandas_backend=self._pandas_backend,\n        )\n"
  },
  {
    "path": "modin/core/dataframe/pandas/dataframe/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Collection of algebra utility functions, used to shuffle data across partitions.\"\"\"\n\nimport abc\nfrom collections import namedtuple\nfrom typing import TYPE_CHECKING, Callable, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas._libs.tslibs import to_offset\nfrom pandas.core.dtypes.common import is_list_like, is_numeric_dtype\nfrom pandas.core.resample import _get_timestamp_range_edges\n\nfrom modin.error_message import ErrorMessage\nfrom modin.utils import _inherit_docstrings\n\nif TYPE_CHECKING:\n    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\n\nColumnInfo = namedtuple(\"ColumnInfo\", [\"name\", \"pivots\", \"is_numeric\"])\n\n\nclass ShuffleFunctions:\n    \"\"\"\n    Defines an interface to perform the sampling, quantiles picking, and the splitting stages for the range-partitioning building.\n\n    Parameters\n    ----------\n    modin_frame : PandasDataframe\n        The frame to build the range-partitioning for.\n    columns : str or list of strings\n        The column/columns to use as a key.\n    ascending : bool\n        Whether the ranges should be in ascending or descending order.\n    ideal_num_new_partitions : int\n        The ideal number of new partitions.\n    **kwargs : dict\n        Additional keyword arguments.\n    \"\"\"\n\n    def __init__(\n        self, modin_frame, columns, ascending, ideal_num_new_partitions, **kwargs\n    ):\n        pass\n\n    @abc.abstractmethod\n    def sample_fn(self, partition: pandas.DataFrame) -> pandas.DataFrame:\n        \"\"\"\n        Pick samples over the given partition.\n\n        Parameters\n        ----------\n        partition : pandas.DataFrame\n\n        Returns\n        -------\n        pandas.DataFrame:\n            The samples for the partition.\n        \"\"\"\n        pass\n\n    @abc.abstractmethod\n    def pivot_fn(self, samples: \"list[pandas.DataFrame]\") -> int:\n        \"\"\"\n        Determine quantiles from the given samples and save it for the future ``.split_fn()`` calls.\n\n        Parameters\n        ----------\n        samples : list of pandas.DataFrames\n\n        Returns\n        -------\n        int\n            The number of bins the ``.split_fn()`` will return.\n        \"\"\"\n        pass\n\n    @abc.abstractmethod\n    def split_fn(self, partition: pandas.DataFrame) -> \"tuple[pandas.DataFrame, ...]\":\n        \"\"\"\n        Split the given dataframe into the range-partitions defined by the preceding call of the ``.pivot_fn()``.\n\n        Parameters\n        ----------\n        partition : pandas.DataFrame\n\n        Returns\n        -------\n        tuple of pandas.DataFrames\n\n        Notes\n        -----\n        In order to call this method you must call the ``.pivot_fn()`` first.\n        \"\"\"\n        pass\n\n\n@_inherit_docstrings(ShuffleFunctions)\nclass ShuffleSortFunctions(ShuffleFunctions):\n    \"\"\"\n    Perform the sampling, quantiles picking, and the splitting stages for the range-partitioning building.\n\n    Parameters\n    ----------\n    modin_frame : PandasDataframe\n        The frame to build the range-partitioning for.\n    columns : str, list of strings or None\n        The column/columns to use as a key. Can't be specified along with `level`.\n    ascending : bool\n        Whether the ranges should be in ascending or descending order.\n    ideal_num_new_partitions : int\n        The ideal number of new partitions.\n    level : list of strings or ints, or None\n        Index level(s) to use as a key. Can't be specified along with `columns`.\n    closed_on_right : bool, default: False\n        Whether to include the right limit in range-partitioning.\n            True:  bins[i - 1] < x <= bins[i]\n            False: bins[i - 1] <= x < bins[i]\n    **kwargs : dict\n        Additional keyword arguments.\n    \"\"\"\n\n    def __init__(\n        self,\n        modin_frame: \"PandasDataframe\",\n        columns: Optional[Union[str, list]],\n        ascending: Union[list, bool],\n        ideal_num_new_partitions: int,\n        level: Optional[list[Union[str, int]]] = None,\n        closed_on_right: bool = False,\n        **kwargs: dict,\n    ):\n        self.frame_len = len(modin_frame)\n        self.ideal_num_new_partitions = ideal_num_new_partitions\n        self.columns = columns if is_list_like(columns) else [columns]\n        self.ascending = ascending\n        self.kwargs = kwargs.copy()\n        self.level = level\n        self.columns_info = None\n        self.closed_on_right = closed_on_right\n\n    def sample_fn(self, partition: pandas.DataFrame) -> pandas.DataFrame:\n        if self.level is not None:\n            partition = self._index_to_df_zero_copy(partition, self.level)\n        else:\n            partition = partition[self.columns]\n        return self.pick_samples_for_quantiles(\n            partition, self.ideal_num_new_partitions, self.frame_len\n        )\n\n    def pivot_fn(self, samples: \"list[pandas.DataFrame]\") -> int:\n        key = self.kwargs.get(\"key\", None)\n        samples = pandas.concat(samples, axis=0, copy=False)\n\n        columns_info: \"list[ColumnInfo]\" = []\n        number_of_groups = 1\n        cols = []\n        for i, col in enumerate(samples.columns):\n            num_pivots = int(self.ideal_num_new_partitions / number_of_groups)\n            if num_pivots < 2 and len(columns_info):\n                break\n            column_val = samples[col]\n            cols.append(col)\n            is_numeric = is_numeric_dtype(column_val.dtype)\n\n            # When we are not sorting numbers, we need our quantiles to not do arithmetic on the values\n            method = \"linear\" if is_numeric else \"inverted_cdf\"\n            pivots = self.pick_pivots_from_samples_for_sort(\n                column_val, num_pivots, method, key\n            )\n            columns_info.append(\n                ColumnInfo(\n                    self.level[i] if self.level is not None else col,\n                    pivots,\n                    is_numeric,\n                )\n            )\n            number_of_groups *= len(pivots) + 1\n        self.columns_info = columns_info\n        return number_of_groups\n\n    def split_fn(\n        self,\n        partition: pandas.DataFrame,\n    ) -> \"tuple[pandas.DataFrame, ...]\":\n        ErrorMessage.catch_bugs_and_request_email(\n            failure_condition=self.columns_info is None,\n            extra_log=\"The 'split_fn' doesn't have proper metadata, the probable reason is that it was called before 'pivot_fn'\",\n        )\n        return self.split_partitions_using_pivots_for_sort(\n            partition,\n            self.columns_info,\n            self.ascending,\n            keys_are_index_levels=self.level is not None,\n            closed_on_right=self.closed_on_right,\n            **self.kwargs,\n        )\n\n    @staticmethod\n    def _find_quantiles(\n        df: Union[pandas.DataFrame, pandas.Series], quantiles: list, method: str\n    ) -> np.ndarray:\n        \"\"\"\n        Find quantiles of a given dataframe using the specified method.\n\n        We use this method to provide backwards compatibility with NumPy versions < 1.23 (e.g. when\n        the user is using Modin in compat mode). This is basically a wrapper around `np.quantile` that\n        ensures we provide the correct `method` argument - i.e. if we are dealing with objects (which\n        may or may not support algebra), we do not want to use a method to find quantiles that will\n        involve algebra operations (e.g. mean) between the objects, since that may fail.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame or pandas.Series\n            The data to pick quantiles from.\n        quantiles : list[float]\n            The quantiles to compute.\n        method : str\n            The method to use. `linear` if dealing with numeric types, otherwise `inverted_cdf`.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy array with the quantiles of the data.\n        \"\"\"\n        if method == \"linear\":\n            # This is the default method for finding quantiles, so it does not need to be specified,\n            # which keeps backwards compatibility with older versions of NumPy that do not have a\n            # `method` keyword argument in np.quantile.\n            return np.unique(np.quantile(df, quantiles))\n        else:\n            try:\n                return np.unique(np.quantile(df, quantiles, method=method))\n            except Exception:\n                # In this case, we're dealing with an array of objects, but the current version of\n                # NumPy does not have a `method` kwarg. We need to use the older kwarg, `interpolation`\n                # instead.\n                return np.unique(np.quantile(df, quantiles, interpolation=\"lower\"))\n\n    @staticmethod\n    def pick_samples_for_quantiles(\n        df: pandas.DataFrame,\n        num_partitions: int,\n        length: int,\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Pick samples over the given partition.\n\n        This function picks samples from the given partition using the TeraSort algorithm - each\n        value is sampled with probability 1 / m * ln(n * t) where m = total_length / num_partitions,\n        t = num_partitions, and n = total_length.\n\n        Parameters\n        ----------\n        df : pandas.Dataframe\n            The masked dataframe to pick samples from.\n        num_partitions : int\n            The number of partitions.\n        length : int\n            The total length.\n\n        Returns\n        -------\n        pandas.DataFrame:\n            The samples for the partition.\n\n        Notes\n        -----\n        This sampling algorithm is inspired by TeraSort. You can find more information about TeraSort\n        and the sampling algorithm at https://www.cse.cuhk.edu.hk/~taoyf/paper/sigmod13-mr.pdf.\n        \"\"\"\n        m = length / num_partitions\n        probability = (1 / m) * np.log(num_partitions * length)\n        return df.sample(frac=probability)\n\n    def pick_pivots_from_samples_for_sort(\n        self,\n        samples: pandas.Series,\n        ideal_num_new_partitions: int,\n        method: str = \"linear\",\n        key: Optional[Callable] = None,\n    ) -> np.ndarray:\n        \"\"\"\n        Determine quantiles from the given samples.\n\n        This function takes as input the quantiles calculated over all partitions from\n        `sample_func` defined above, and determines a final NPartitions.get() quantiles\n        to use to roughly sort the entire dataframe. It does so by collating all the samples\n        and computing NPartitions.get() quantiles for the overall set.\n\n        Parameters\n        ----------\n        samples : pandas.Series\n            The samples computed by ``get_partition_quantiles_for_sort``.\n        ideal_num_new_partitions : int\n            The ideal number of new partitions.\n        method : str, default: linear\n            The method to use when picking quantiles.\n        key : Callable, default: None\n            The key to use on the samples when picking pivots.\n\n        Returns\n        -------\n        np.ndarray\n            A list of overall quantiles.\n        \"\"\"\n        samples = samples.to_numpy()\n        # We don't call `np.unique` on the samples, since if a quantile shows up in multiple\n        # partition's samples, this is probably an indicator of skew in the dataset, and we\n        # want our final partitions to take this into account.\n        if key is not None:\n            samples = key(samples)\n        # We don't want to pick very many quantiles if we have a very small dataframe.\n        num_quantiles = ideal_num_new_partitions\n        quantiles = [i / num_quantiles for i in range(1, num_quantiles)]\n        # If we only desire 1 partition, we need to ensure that we're not trying to find quantiles\n        # from an empty list of pivots.\n        if len(quantiles) > 0:\n            return self._find_quantiles(samples, quantiles, method)\n        return np.array([])\n\n    @staticmethod\n    def split_partitions_using_pivots_for_sort(\n        df: pandas.DataFrame,\n        columns_info: \"list[ColumnInfo]\",\n        ascending: bool,\n        keys_are_index_levels: bool = False,\n        closed_on_right: bool = False,\n        **kwargs: dict,\n    ) -> \"tuple[pandas.DataFrame, ...]\":\n        \"\"\"\n        Split the given dataframe into the partitions specified by `pivots` in `columns_info`.\n\n        This function takes as input a row-axis partition, as well as the quantiles determined\n        by the `pivot_func` defined above. It then splits the input dataframe into NPartitions.get()\n        dataframes, with the elements in the i-th split belonging to the i-th partition, as determined\n        by the quantiles we're using.\n\n        Parameters\n        ----------\n        df : pandas.Dataframe\n            The partition to split.\n        columns_info : list of ColumnInfo\n            Information regarding keys and pivots for range partitioning.\n        ascending : bool\n            The ascending flag.\n        keys_are_index_levels : bool, default: False\n            Whether `columns_info` describes index levels or actual columns from `df`.\n        closed_on_right : bool, default: False\n            Whether to include the right limit in range-partitioning.\n                True:  bins[i - 1] < x <= bins[i]\n                False: bins[i - 1] <= x < bins[i]\n        **kwargs : dict\n            Additional keyword arguments.\n\n        Returns\n        -------\n        tuple[pandas.DataFrame]\n            A tuple of the splits from this partition.\n        \"\"\"\n        if len(columns_info) == 0:\n            # We can return the dataframe with zero changes if there were no pivots passed\n            return (df,)\n\n        key_data = (\n            ShuffleSortFunctions._index_to_df_zero_copy(\n                df, [col_info.name for col_info in columns_info]\n            )\n            if keys_are_index_levels\n            else df[[col_info.name for col_info in columns_info]]\n        )\n        na_index = key_data.isna().squeeze(axis=1)\n        if na_index.ndim == 2:\n            na_index = na_index.any(axis=1)\n        na_rows = df[na_index]\n        non_na_rows = df[~na_index]\n\n        def get_group(grp, key, df):\n            \"\"\"Get a group with the `key` from the `grp`, if it doesn't exist return an empty slice of `df`.\"\"\"\n            try:\n                return grp.get_group(key)\n            except KeyError:\n                return pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(\n                    df.dtypes\n                )\n\n        groupby_codes = []\n        group_keys = []\n        for col_info in columns_info:\n            pivots = col_info.pivots\n            if len(pivots) == 0:\n                continue\n            # If `ascending=False` and we are dealing with a numeric dtype, we can pass in a reversed list\n            # of pivots, and `np.digitize` will work correctly. For object dtypes, we use `np.searchsorted`\n            # which breaks when we reverse the pivots.\n            if not ascending and col_info.is_numeric:\n                # `key` is already applied to `pivots` in the `pick_pivots_from_samples_for_sort` function.\n                pivots = pivots[::-1]\n            group_keys.append(range(len(pivots) + 1))\n            key = kwargs.pop(\"key\", None)\n            cols_to_digitize = (\n                non_na_rows.index.get_level_values(col_info.name)\n                if keys_are_index_levels\n                else non_na_rows[col_info.name]\n            )\n            if key is not None:\n                cols_to_digitize = key(cols_to_digitize)\n\n            if cols_to_digitize.ndim == 2:\n                cols_to_digitize = cols_to_digitize.squeeze()\n\n            if col_info.is_numeric:\n                groupby_col = np.digitize(\n                    cols_to_digitize, pivots, right=closed_on_right\n                )\n                # `np.digitize` returns results based off of the sort order of the pivots it is passed.\n                # When we only have one unique value in our pivots, `np.digitize` assumes that the pivots\n                # are sorted in ascending order, and gives us results based off of that assumption - so if\n                # we actually want to sort in descending order, we need to swap the new indices.\n                if not ascending and len(np.unique(pivots)) == 1:\n                    groupby_col = len(pivots) - groupby_col\n            else:\n                groupby_col = np.searchsorted(\n                    pivots,\n                    cols_to_digitize,\n                    side=\"left\" if closed_on_right else \"right\",\n                )\n                # Since np.searchsorted requires the pivots to be in ascending order, if we want to sort\n                # in descending order, we need to swap the new indices.\n                if not ascending:\n                    groupby_col = len(pivots) - groupby_col\n            groupby_codes.append(groupby_col)\n\n        if len(group_keys) == 0:\n            # We can return the dataframe with zero changes if there were no pivots passed\n            return (df,)\n        elif len(group_keys) == 1:\n            group_keys = group_keys[0]\n        else:\n            group_keys = pandas.MultiIndex.from_product(group_keys)\n\n        if len(non_na_rows) == 1:\n            groups = [\n                # taking an empty slice for an index's metadata\n                (\n                    pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(\n                        df.dtypes\n                    )\n                    if key != groupby_codes[0]\n                    else non_na_rows\n                )\n                for key in group_keys\n            ]\n        else:\n            grouped = non_na_rows.groupby(groupby_codes)\n            groups = [get_group(grouped, key, df) for key in group_keys]\n        index_to_insert_na_vals = (\n            -1 if kwargs.get(\"na_position\", \"last\") == \"last\" else 0\n        )\n        groups[index_to_insert_na_vals] = pandas.concat(\n            [groups[index_to_insert_na_vals], na_rows]\n        ).astype(df.dtypes)\n        return tuple(groups)\n\n    @staticmethod\n    def _index_to_df_zero_copy(\n        df: pandas.DataFrame, levels: list[Union[str, int]]\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Convert index `level` of `df` to a ``pandas.DataFrame``.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n        levels : list of labels or ints\n            Index level to convert to a dataframe.\n\n        Returns\n        -------\n        pandas.DataFrame\n            The columns in the resulting dataframe use the same data arrays as the index levels\n            in the original `df`, so no copies.\n        \"\"\"\n        # calling 'df.index.to_frame()' creates a copy of the index, so doing the conversion manually\n        # to avoid the copy\n        data = {\n            (\n                df.index.names[lvl] if isinstance(lvl, int) else lvl\n            ): df.index.get_level_values(lvl)\n            for lvl in levels\n        }\n        index_data = pandas.DataFrame(data, index=df.index, copy=False)\n        return index_data\n\n\n@_inherit_docstrings(ShuffleSortFunctions)\nclass ShuffleResample(ShuffleSortFunctions):\n    def __init__(\n        self,\n        modin_frame: \"PandasDataframe\",\n        columns: Union[str, list],\n        ascending: Union[list, bool],\n        ideal_num_new_partitions: int,\n        resample_kwargs: dict,\n        **kwargs: dict,\n    ):\n        resample_kwargs = resample_kwargs.copy()\n        rule = resample_kwargs.pop(\"rule\")\n\n        if resample_kwargs[\"closed\"] is None:\n            # this rule regarding the default value of 'closed' is inherited\n            # from pandas documentation for 'pandas.DataFrame.resample'\n            if rule in (\"ME\", \"YE\", \"QE\", \"BME\", \"BA\", \"BQE\", \"W\"):\n                resample_kwargs[\"closed\"] = \"right\"\n            else:\n                resample_kwargs[\"closed\"] = \"left\"\n\n        super().__init__(\n            modin_frame,\n            columns,\n            ascending,\n            ideal_num_new_partitions,\n            closed_on_right=resample_kwargs[\"closed\"] == \"right\",\n            **kwargs,\n        )\n\n        resample_kwargs[\"freq\"] = to_offset(rule)\n        self.resample_kwargs = resample_kwargs\n\n    @staticmethod\n    def pick_samples_for_quantiles(\n        df: pandas.DataFrame,\n        num_partitions: int,\n        length: int,\n    ) -> pandas.DataFrame:\n        # to build proper bins we need min and max timestamp of the whole DatetimeIndex,\n        # so computing it in each partition\n        return pandas.concat([df.min().to_frame().T, df.max().to_frame().T])\n\n    def pick_pivots_from_samples_for_sort(\n        self,\n        samples: np.ndarray,\n        ideal_num_new_partitions: int,\n        method: str = \"linear\",\n        key: Optional[Callable] = None,\n    ) -> np.ndarray:\n        if key is not None:\n            raise NotImplementedError(key)\n\n        max_value = samples.max()\n\n        first, last = _get_timestamp_range_edges(\n            samples.min(),\n            max_value,\n            self.resample_kwargs[\"freq\"],\n            unit=samples.dt.unit,\n            closed=self.resample_kwargs[\"closed\"],\n            origin=self.resample_kwargs[\"origin\"],\n            offset=self.resample_kwargs[\"offset\"],\n        )\n\n        all_bins = pandas.date_range(\n            start=first,\n            end=last,\n            freq=self.resample_kwargs[\"freq\"],\n            ambiguous=True,\n            nonexistent=\"shift_forward\",\n            unit=samples.dt.unit,\n        )\n\n        all_bins = self._adjust_bin_edges(\n            all_bins,\n            max_value,\n            freq=self.resample_kwargs[\"freq\"],\n            closed=self.resample_kwargs[\"closed\"],\n        )\n\n        # take pivot values with an even interval\n        step = 1 / ideal_num_new_partitions\n        bins = [\n            all_bins[int(len(all_bins) * i * step)]\n            for i in range(1, ideal_num_new_partitions)\n        ]\n        return bins\n\n    def _adjust_bin_edges(\n        self,\n        binner: pandas.DatetimeIndex,\n        end_timestamp,\n        freq,\n        closed,\n    ) -> pandas.DatetimeIndex:\n        \"\"\"\n        Adjust bin edges.\n\n        This function was copied & simplified from ``pandas.core.resample.TimeGrouper._adjuct_bin_edges()``.\n\n        Parameters\n        ----------\n        binner : pandas.DatetimeIndex\n        end_timestamp : pandas.Timestamp\n        freq : str\n        closed : bool\n\n        Returns\n        -------\n        pandas.DatetimeIndex\n        \"\"\"\n        # Some hacks for > daily data, see pandas-dev/pandas#1471, pandas-dev/pandas#1458, pandas-dev/pandas#1483\n\n        if freq.name not in (\"BME\", \"ME\", \"W\") and freq.name.split(\"-\")[0] not in (\n            \"BQE\",\n            \"BYE\",\n            \"QE\",\n            \"YE\",\n            \"W\",\n        ):\n            return binner\n\n        # If the right end-point is on the last day of the month, roll forwards\n        # until the last moment of that day. Note that we only do this for offsets\n        # which correspond to the end of a super-daily period - \"month start\", for\n        # example, is excluded.\n        if closed == \"right\":\n            # GH 21459, GH 9119: Adjust the bins relative to the wall time\n            edges_dti = binner.tz_localize(None)\n            edges_dti = (\n                edges_dti\n                + pandas.Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit)\n                - pandas.Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit)\n            )\n            binner = edges_dti.tz_localize(binner.tz)\n\n        # intraday values on last day\n        if binner[-2] > end_timestamp:\n            binner = binner[:-1]\n        return binner\n\n    @staticmethod\n    def split_partitions_using_pivots_for_sort(\n        df: pandas.DataFrame,\n        columns_info: \"list[ColumnInfo]\",\n        ascending: bool,\n        closed_on_right: bool = True,\n        **kwargs: dict,\n    ) -> \"tuple[pandas.DataFrame, ...]\":\n        def add_attr(df, timestamp):\n            if \"bin_bounds\" in df.attrs:\n                df.attrs[\"bin_bounds\"] = (*df.attrs[\"bin_bounds\"], timestamp)\n            else:\n                df.attrs[\"bin_bounds\"] = (timestamp,)\n            return df\n\n        result = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(\n            df, columns_info, ascending, **kwargs\n        )\n        # it's required for each bin to know its bounds in order for resampling to work\n        # properly when down-sampling occurs. Reach here for an example:\n        # https://github.com/modin-project/modin/pull/7140#discussion_r1549246505\n        # We're writing the bounds as 'attrs' to avoid duplications in the final partition\n        for i, pivot in enumerate(columns_info[0].pivots):\n            add_attr(result[i], pivot - pandas.Timedelta(1, unit=\"ns\"))\n            if i + 1 <= len(result):\n                add_attr(result[i + 1], pivot + pandas.Timedelta(1, unit=\"ns\"))\n        return result\n\n\ndef lazy_metadata_decorator(apply_axis=None, axis_arg=-1, transpose=False):\n    \"\"\"\n    Lazily propagate metadata for the ``PandasDataframe``.\n\n    This decorator first adds the minimum required reindexing operations\n    to each partition's queue of functions to be lazily applied for\n    each PandasDataframe in the arguments by applying the function\n    run_f_on_minimally_updated_metadata. The decorator also sets the\n    flags for deferred metadata synchronization on the function result\n    if necessary.\n\n    Parameters\n    ----------\n    apply_axis : str, default: None\n        The axes on which to apply the reindexing operations to the `self._partitions` lazily.\n        Case None: No lazy metadata propagation.\n        Case \"both\": Add reindexing operations on both axes to partition queue.\n        Case \"opposite\": Add reindexing operations complementary to given axis.\n        Case \"rows\": Add reindexing operations on row axis to partition queue.\n    axis_arg : int, default: -1\n        The index or column axis.\n    transpose : bool, default: False\n        Boolean for if a transpose operation is being used.\n\n    Returns\n    -------\n    Wrapped Function.\n    \"\"\"\n\n    def decorator(f):\n        from functools import wraps\n\n        @wraps(f)\n        def run_f_on_minimally_updated_metadata(self, *args, **kwargs):\n            from .dataframe import PandasDataframe\n\n            for obj in (\n                [self]\n                + [o for o in args if isinstance(o, PandasDataframe)]\n                + [v for v in kwargs.values() if isinstance(v, PandasDataframe)]\n                + [\n                    d\n                    for o in args\n                    if isinstance(o, list)\n                    for d in o\n                    if isinstance(d, PandasDataframe)\n                ]\n                + [\n                    d\n                    for _, o in kwargs.items()\n                    if isinstance(o, list)\n                    for d in o\n                    if isinstance(d, PandasDataframe)\n                ]\n            ):\n                if apply_axis == \"both\":\n                    if obj._deferred_index and obj._deferred_column:\n                        obj._propagate_index_objs(axis=None)\n                    elif obj._deferred_index:\n                        obj._propagate_index_objs(axis=0)\n                    elif obj._deferred_column:\n                        obj._propagate_index_objs(axis=1)\n                elif apply_axis == \"opposite\":\n                    if \"axis\" not in kwargs:\n                        axis = args[axis_arg]\n                    else:\n                        axis = kwargs[\"axis\"]\n                    if axis == 0 and obj._deferred_column:\n                        obj._propagate_index_objs(axis=1)\n                    elif axis == 1 and obj._deferred_index:\n                        obj._propagate_index_objs(axis=0)\n                elif apply_axis == \"rows\":\n                    obj._propagate_index_objs(axis=0)\n            result = f(self, *args, **kwargs)\n            if apply_axis is None and not transpose:\n                result._deferred_index = self._deferred_index\n                result._deferred_column = self._deferred_column\n            elif apply_axis is None and transpose:\n                result._deferred_index = self._deferred_column\n                result._deferred_column = self._deferred_index\n            elif apply_axis == \"opposite\":\n                if axis == 0:\n                    result._deferred_index = self._deferred_index\n                else:\n                    result._deferred_column = self._deferred_column\n            elif apply_axis == \"rows\":\n                result._deferred_column = self._deferred_column\n            return result\n\n        return run_f_on_minimally_updated_metadata\n\n    return decorator\n\n\ndef add_missing_categories_to_groupby(\n    dfs,\n    by,\n    operator,\n    initial_columns,\n    combined_cols,\n    is_udf_agg,\n    kwargs,\n    initial_dtypes=None,\n):\n    \"\"\"\n    Generate values for missing categorical values to be inserted into groupby result.\n\n    This function is used to emulate behavior of ``groupby(observed=False)`` parameter,\n    it takes groupby result that was computed using ``groupby(observed=True)``\n    and computes results for categorical values that are not presented in `dfs`.\n\n    Parameters\n    ----------\n    dfs : list of pandas.DataFrames\n        Row partitions containing groupby results.\n    by : list of hashable\n        Column labels that were used to perform groupby.\n    operator : callable\n        Aggregation function that was used during groupby.\n    initial_columns : pandas.Index\n        Column labels of the original dataframe.\n    combined_cols : pandas.Index\n        Column labels of the groupby result.\n    is_udf_agg : bool\n        Whether ``operator`` is a UDF.\n    kwargs : dict\n        Parameters that were passed to ``groupby(by, **kwargs)``.\n    initial_dtypes : pandas.Series, optional\n        Dtypes of the original dataframe. If not specified, assume it's ``int64``.\n\n    Returns\n    -------\n    masks : dict[int, pandas.DataFrame]\n        Mapping between partition idx and a dataframe with results for missing categorical values\n        to insert to this partition.\n    new_combined_cols : pandas.Index\n        New column labels of the groupby result. If ``is_udf_agg is True``, then ``operator``\n        may change the resulted columns.\n    \"\"\"\n    kwargs[\"observed\"] = False\n    new_combined_cols = combined_cols\n\n    ### At first we need to compute missing categorical values\n    indices = [df.index for df in dfs]\n    # total_index contains all categorical values that resided in the result,\n    # missing values are computed differently depending on whether we're grouping\n    # on multiple groupers or not\n    total_index = indices[0].append(indices[1:])\n    if isinstance(total_index, pandas.MultiIndex):\n        if all(\n            not isinstance(level, pandas.CategoricalIndex)\n            for level in total_index.levels\n        ):\n            return {}, new_combined_cols\n        missing_cats_dtype = {\n            name: (\n                level.dtype\n                if isinstance(level.dtype, pandas.CategoricalDtype)\n                # it's a bit confusing but we have to convert the remaining 'by' columns to categoricals\n                # in order to compute a proper fill value later in the code\n                else pandas.CategoricalDtype(level)\n            )\n            for level, name in zip(total_index.levels, total_index.names)\n        }\n        # if we're grouping on multiple groupers, then the missing categorical values is a\n        # carthesian product of (actual_missing_categorical_values X all_values_of_another_groupers)\n        complete_index = pandas.MultiIndex.from_product(\n            [\n                value.categories.astype(total_level.dtype)\n                for total_level, value in zip(\n                    total_index.levels, missing_cats_dtype.values()\n                )\n            ],\n            names=by,\n        )\n        missing_index = complete_index[~complete_index.isin(total_index)]\n    else:\n        if not isinstance(total_index, pandas.CategoricalIndex):\n            return {}, new_combined_cols\n        # if we're grouping on a single grouper then we simply compute the difference\n        # between categorical values in the result and the values defined in categorical dtype\n        missing_index = total_index.categories.difference(total_index.values)\n        missing_cats_dtype = {by[0]: pandas.CategoricalDtype(missing_index)}\n    missing_index.names = by\n\n    if len(missing_index) == 0:\n        return {}, new_combined_cols\n\n    ### At this stage we want to get a fill_value for missing categorical values\n    if is_udf_agg and isinstance(total_index, pandas.MultiIndex):\n        # if grouping on multiple columns and aggregating with an UDF, then the\n        # fill value is always `np.nan`\n        missing_values = pandas.DataFrame({0: [np.nan]})\n    else:\n        # In case of a udf aggregation we're forced to run the operator against each\n        # missing category, as in theory it can return different results for each\n        # empty group. In other cases it's enough to run the operator against a single\n        # missing categorical and then broadcast the fill value to each missing value\n        if not is_udf_agg:\n            missing_cats_dtype = {\n                key: pandas.CategoricalDtype(value.categories[:1])\n                for key, value in missing_cats_dtype.items()\n            }\n\n        empty_df = pandas.DataFrame(columns=initial_columns)\n        # HACK: default 'object' dtype doesn't fit our needs, as most of the aggregations\n        # fail on a non-numeric columns, ideally, we need dtypes of the original dataframe,\n        # however, 'int64' also works fine here if the original schema is not available\n        empty_df = empty_df.astype(\n            \"int64\" if initial_dtypes is None else initial_dtypes\n        )\n        empty_df = empty_df.astype(missing_cats_dtype)\n        missing_values = operator(empty_df.groupby(by, **kwargs))\n\n    if is_udf_agg and not isinstance(total_index, pandas.MultiIndex):\n        missing_values = missing_values.drop(columns=by, errors=\"ignore\")\n        new_combined_cols = pandas.concat(\n            [\n                pandas.DataFrame(columns=combined_cols),\n                missing_values.iloc[:0],\n            ],\n            axis=0,\n            join=\"outer\",\n        ).columns\n    else:\n        # HACK: If the aggregation has failed, the result would be empty. Assuming the\n        # fill value to be `np.nan` here (this may not always be correct!!!)\n        fill_value = np.nan if len(missing_values) == 0 else missing_values.iloc[0, 0]\n        missing_values = pandas.DataFrame(\n            fill_value, index=missing_index, columns=combined_cols\n        )\n\n    # restoring original categorical dtypes for the indices (MultiIndex already have proper dtypes)\n    if not isinstance(missing_values.index, pandas.MultiIndex):\n        missing_values.index = missing_values.index.astype(total_index.dtype)\n\n    ### Then we decide to which missing categorical values should go to which partition\n    if not kwargs[\"sort\"]:\n        # If the result is allowed to be unsorted, simply insert all the missing\n        # categories to the last partition\n        mask = {len(indices) - 1: missing_values}\n        return mask, new_combined_cols\n\n    # If the result has to be sorted, we have to assign missing categoricals to proper partitions.\n    # For that purpose we define bins with corner values of each partition and then using either\n    # np.digitize or np.searchsorted find correct bins for each missing categorical value.\n    # Example: part0-> [0, 1, 2]; part1-> [3, 4, 10, 12]; part2-> [15, 17, 20, 100]\n    #          bins -> [2, 12] # took last values of each partition excluding the last partition\n    #                            (every value that's matching 'x > part[-2][-1]' should go to the\n    #                             last partition, meaning that including the last value of the last\n    #                             partitions doesn't make sense)\n    #          missing_cats ->                    [-2, 5, 6, 14, 21, 120]\n    #          np.digitize(missing_cats, bins) -> [ 0, 1, 1,  2,  2,  2]\n    #                                               ^-- mapping between values and partition idx to insert\n    bins = []\n    old_bins_to_new = {}\n    offset = 0\n    # building bins by taking last values of each partition excluding the last partition\n    for idx in indices[:-1]:\n        if len(idx) == 0:\n            # if a partition is empty, we can't use its values to define a bin, thus we simply\n            # skip it and remember the number of skipped partitions as an 'offset'\n            offset += 1\n            continue\n        # remember the number of skipped partitions before this bin, in order to restore original\n        # indexing at the end\n        old_bins_to_new[len(bins)] = offset\n        # for MultiIndices we always use the very first level for bins as using multiple levels\n        # doesn't affect the result\n        bins.append(idx[-1][0] if isinstance(idx, pandas.MultiIndex) else idx[-1])\n    old_bins_to_new[len(bins)] = offset\n\n    if len(bins) == 0:\n        # insert values to the first non-empty partition\n        return {old_bins_to_new.get(0, 0): missing_values}, new_combined_cols\n\n    # we used the very first level of MultiIndex to build bins, meaning that we also have\n    # to use values of the first index's level for 'digitize'\n    lvl_zero = (\n        missing_values.index.levels[0]\n        if isinstance(missing_values.index, pandas.MultiIndex)\n        else missing_values.index\n    )\n    if pandas.api.types.is_any_real_numeric_dtype(lvl_zero):\n        part_idx = np.digitize(lvl_zero, bins, right=True)\n    else:\n        part_idx = np.searchsorted(bins, lvl_zero)\n\n    ### In the end we build a dictionary mapping partition index to a dataframe with missing categoricals\n    ### to be inserted into this partition\n    masks = {}\n    if isinstance(total_index, pandas.MultiIndex):\n        for idx, values in pandas.RangeIndex(len(lvl_zero)).groupby(part_idx).items():\n            masks[idx] = missing_values[\n                pandas.Index(missing_values.index.codes[0]).isin(values)\n            ]\n    else:\n        frame_idx = missing_values.index.to_frame()\n        for idx, values in lvl_zero.groupby(part_idx).items():\n            masks[idx] = missing_values[frame_idx.iloc[:, 0].isin(values)]\n\n    # Restore the original indexing by adding the amount of skipped missing partitions\n    masks = {key + old_bins_to_new[key]: value for key, value in masks.items()}\n    return masks, new_combined_cols\n"
  },
  {
    "path": "modin/core/dataframe/pandas/interchange/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe functionality related to data exchange protocols and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/pandas/interchange/dataframe_protocol/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nBase Modin Dataframe functionality related to the dataframe exchange protocol and optimized for pandas storage format.\n\nSee more in https://data-apis.org/dataframe-protocol/latest/index.html.\n\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/pandas/interchange/dataframe_protocol/buffer.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nDataframe exchange protocol implementation.\n\nSee more in https://data-apis.org/dataframe-protocol/latest/index.html.\n\nNotes\n-----\n- Interpreting a raw pointer (as in ``Buffer.ptr``) is annoying and unsafe to\n  do in pure Python. It's more general but definitely less friendly than having\n  ``to_arrow`` and ``to_numpy`` methods. So for the buffers which lack\n  ``__dlpack__`` (e.g., because the column dtype isn't supported by DLPack),\n  this is worth looking at again.\n\"\"\"\n\nimport enum\nfrom typing import Tuple\n\nimport numpy as np\n\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    ProtocolBuffer,\n)\nfrom modin.utils import _inherit_docstrings\n\n\n@_inherit_docstrings(ProtocolBuffer)\nclass PandasProtocolBuffer(ProtocolBuffer):\n    \"\"\"\n    Data in the buffer is guaranteed to be contiguous in memory.\n\n    Note that there is no dtype attribute present, a buffer can be thought of\n    as simply a block of memory. However, if the column that the buffer is\n    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is\n    implemented, then that dtype information will be contained in the return\n    value from ``__dlpack__``.\n\n    This distinction is useful to support both (a) data exchange via DLPack on a\n    buffer and (b) dtypes like variable-length strings which do not have a\n    fixed number of bytes per element.\n\n    Parameters\n    ----------\n    x : np.ndarray\n        Data to be held by ``Buffer``.\n    allow_copy : bool, default: True\n        A keyword that defines whether or not the library is allowed\n        to make a copy of the data. For example, copying data would be necessary\n        if a library supports strided buffers, given that this protocol\n        specifies contiguous buffers. Currently, if the flag is set to ``False``\n        and a copy is needed, a ``RuntimeError`` will be raised.\n    \"\"\"\n\n    def __init__(self, x: np.ndarray, allow_copy: bool = True) -> None:\n        if not x.strides == (x.dtype.itemsize,):\n            # The protocol does not support strided buffers, so a copy is\n            # necessary. If that's not allowed, we need to raise an exception.\n            if allow_copy:\n                x = x.copy()\n            else:\n                raise RuntimeError(\n                    \"Exports cannot be zero-copy in the case \"\n                    + \"of a non-contiguous buffer\"\n                )\n\n        # Store the numpy array in which the data resides as a private\n        # attribute, so we can use it to retrieve the public attributes\n        self._x = x\n\n    @property\n    def bufsize(self) -> int:\n        return self._x.size * self._x.dtype.itemsize\n\n    @property\n    def ptr(self) -> int:\n        return self._x.__array_interface__[\"data\"][0]\n\n    def __dlpack__(self):\n        raise NotImplementedError(\"__dlpack__\")\n\n    def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]:\n        class Device(enum.IntEnum):\n            CPU = 1\n\n        return (Device.CPU, None)\n\n    def __repr__(self) -> str:\n        \"\"\"\n        Return a string representation for a particular ``PandasProtocolBuffer``.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        return (\n            \"Buffer(\"\n            + str(\n                {\n                    \"bufsize\": self.bufsize,\n                    \"ptr\": self.ptr,\n                    \"device\": self.__dlpack_device__()[0].name,\n                }\n            )\n            + \")\"\n        )\n"
  },
  {
    "path": "modin/core/dataframe/pandas/interchange/dataframe_protocol/column.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nDataframe exchange protocol implementation.\n\nSee more in https://data-apis.org/dataframe-protocol/latest/index.html.\n\nNotes\n-----\n- Interpreting a raw pointer (as in ``Buffer.ptr``) is annoying and unsafe to\n  do in pure Python. It's more general but definitely less friendly than having\n  ``to_arrow`` and ``to_numpy`` methods. So for the buffers which lack\n  ``__dlpack__`` (e.g., because the column dtype isn't supported by DLPack),\n  this is worth looking at again.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom functools import cached_property\nfrom typing import Any, Dict, Iterable, Optional, Tuple\n\nimport numpy as np\nimport pandas\n\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    CategoricalDescription,\n    ProtocolColumn,\n)\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.utils import (\n    ColumnNullType,\n    DTypeKind,\n    pandas_dtype_to_arrow_c,\n)\nfrom modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\nfrom modin.utils import _inherit_docstrings\n\nfrom .buffer import PandasProtocolBuffer\nfrom .exception import NoOffsetsBuffer, NoValidityBuffer\n\n_NO_VALIDITY_BUFFER = {\n    ColumnNullType.NON_NULLABLE: \"This column is non-nullable so does not have a mask\",\n    ColumnNullType.USE_NAN: \"This column uses NaN as null so does not have a separate mask\",\n    ColumnNullType.USE_SENTINEL: \"This column uses a sentinel value so does not have a mask\",\n}\n\n\n@_inherit_docstrings(ProtocolColumn)\nclass PandasProtocolColumn(ProtocolColumn):\n    \"\"\"\n    A column object, with only the methods and properties required by the interchange protocol defined.\n\n    A column can contain one or more chunks. Each chunk can contain up to three\n    buffers - a data buffer, a mask buffer (depending on null representation),\n    and an offsets buffer (if variable-size binary; e.g., variable-length strings).\n\n    TBD: Arrow has a separate \"null\" dtype, and has no separate mask concept.\n         Instead, it seems to use \"children\" for both columns with a bit mask,\n         and for nested dtypes. Unclear whether this is elegant or confusing.\n         This design requires checking the null representation explicitly.\n         The Arrow design requires checking:\n         1. the ARROW_FLAG_NULLABLE (for sentinel values)\n         2. if a column has two children, combined with one of those children\n            having a null dtype.\n         Making the mask concept explicit seems useful. One null dtype would\n         not be enough to cover both bit and byte masks, so that would mean\n         even more checking if we did it the Arrow way.\n    TBD: there's also the \"chunk\" concept here, which is implicit in Arrow as\n         multiple buffers per array (= column here). Semantically it may make\n         sense to have both: chunks were meant for example for lazy evaluation\n         of data which doesn't fit in memory, while multiple buffers per column\n         could also come from doing a selection operation on a single\n         contiguous buffer.\n         Given these concepts, one would expect chunks to be all of the same\n         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),\n         while multiple buffers could have data-dependent lengths. Not an issue\n         in pandas if one column is backed by a single NumPy array, but in\n         Arrow it seems possible.\n         Are multiple chunks *and* multiple buffers per column necessary for\n         the purposes of this interchange protocol, or must producers either\n         reuse the chunk concept for this or copy the data?\n\n    Parameters\n    ----------\n    column : PandasDataframe\n        A ``PandasDataframe`` object.\n    allow_copy : bool, default: True\n        A keyword that defines whether or not the library is allowed\n        to make a copy of the data. For example, copying data would be necessary\n        if a library supports strided buffers, given that this protocol\n        specifies contiguous buffers. Currently, if the flag is set to ``False``\n        and a copy is needed, a ``RuntimeError`` will be raised.\n\n    Notes\n    -----\n    This Column object can only be produced by ``__dataframe__``,\n    so doesn't need its own version or ``__column__`` protocol.\n    \"\"\"\n\n    def __init__(self, column: PandasDataframe, allow_copy: bool = True) -> None:\n        if not isinstance(column, PandasDataframe):\n            raise NotImplementedError(f\"Columns of type {type(column)} not handled yet\")\n\n        self._col = column\n        self._allow_copy = allow_copy\n\n    def size(self) -> int:\n        return len(self._col.index)\n\n    @property\n    def offset(self) -> int:\n        return 0\n\n    @cached_property\n    def dtype(self) -> Tuple[DTypeKind, int, str, str]:\n        dtype = self._col.dtypes.iloc[0]\n\n        if isinstance(dtype, pandas.CategoricalDtype):\n            pandas_series = self._col.to_pandas().squeeze(axis=1)\n            codes = pandas_series.values.codes\n            (\n                _,\n                bitwidth,\n                c_arrow_dtype_f_str,\n                _,\n            ) = self._dtype_from_primitive_pandas_dtype(codes.dtype)\n            dtype_cache = (\n                DTypeKind.CATEGORICAL,\n                bitwidth,\n                c_arrow_dtype_f_str,\n                \"=\",\n            )\n        elif pandas.api.types.is_string_dtype(dtype):\n            dtype_cache = (DTypeKind.STRING, 8, pandas_dtype_to_arrow_c(dtype), \"=\")\n        else:\n            dtype_cache = self._dtype_from_primitive_pandas_dtype(dtype)\n\n        return dtype_cache\n\n    def _dtype_from_primitive_pandas_dtype(\n        self, dtype\n    ) -> Tuple[DTypeKind, int, str, str]:\n        \"\"\"\n        Deduce dtype specific for the protocol from pandas dtype.\n\n        See `self.dtype` for details.\n\n        Parameters\n        ----------\n        dtype : any\n            A pandas dtype.\n\n        Returns\n        -------\n        tuple\n        \"\"\"\n        _np_kinds = {\n            \"i\": DTypeKind.INT,\n            \"u\": DTypeKind.UINT,\n            \"f\": DTypeKind.FLOAT,\n            \"b\": DTypeKind.BOOL,\n            \"M\": DTypeKind.DATETIME,\n        }\n        kind = _np_kinds.get(dtype.kind, None)\n        if kind is None:\n            raise NotImplementedError(\n                f\"Data type {dtype} not supported by the dataframe exchange protocol\"\n            )\n        return (\n            kind,\n            dtype.itemsize * 8,\n            pandas_dtype_to_arrow_c(dtype),\n            dtype.byteorder,\n        )\n\n    @property\n    def describe_categorical(self) -> CategoricalDescription:\n        if self.dtype[0] != DTypeKind.CATEGORICAL:\n            raise TypeError(\n                \"`describe_categorical only works on a column with \"\n                + \"categorical dtype!\"\n            )\n\n        pandas_series = self._col.to_pandas().squeeze(axis=1)\n        cat_frame = type(self._col).from_pandas(\n            pandas.DataFrame({\"cat\": pandas_series.cat.categories})\n        )\n        return {\n            \"is_ordered\": pandas_series.cat.ordered,\n            \"is_dictionary\": True,\n            \"categories\": PandasProtocolColumn(cat_frame, self._allow_copy),\n        }\n\n    @property\n    def describe_null(self) -> Tuple[int, Any]:\n        nulls = {\n            DTypeKind.FLOAT: (ColumnNullType.USE_NAN, None),\n            DTypeKind.DATETIME: (ColumnNullType.USE_NAN, None),\n            DTypeKind.INT: (ColumnNullType.NON_NULLABLE, None),\n            DTypeKind.UINT: (ColumnNullType.NON_NULLABLE, None),\n            DTypeKind.BOOL: (ColumnNullType.NON_NULLABLE, None),\n            # Null values for categoricals are stored as `-1` sentinel values\n            # in the category date (e.g., `col.values.codes` is int8 np.ndarray)\n            DTypeKind.CATEGORICAL: (ColumnNullType.USE_SENTINEL, -1),\n            # follow Arrow in using 1 as valid value and 0 for missing/null value\n            DTypeKind.STRING: (ColumnNullType.USE_BYTEMASK, 0),\n        }\n\n        kind = self.dtype[0]\n        try:\n            null, value = nulls[kind]\n        except KeyError:\n            raise NotImplementedError(f\"Data type {kind} not yet supported\")\n\n        return null, value\n\n    @cached_property\n    def null_count(self) -> int:\n\n        def map_func(df):\n            return df.isna()\n\n        def reduce_func(df):\n            return pandas.DataFrame(df.sum())\n\n        intermediate_df = self._col.tree_reduce(0, map_func, reduce_func)\n        # Set ``pandas.RangeIndex(1)`` to index and column labels because\n        # 1) We internally use `MODIN_UNNAMED_SERIES_LABEL` for labels of a reduced axis\n        # 2) The return value of `reduce_func` is a pandas DataFrame with\n        # index and column labels set to ``pandas.RangeIndex(1)``\n        # 3) We further use `to_pandas().squeeze()` to get an integer value of the null count.\n        # Otherwise, we get mismatching internal and external indices for both axes\n        intermediate_df.index = pandas.RangeIndex(1)\n        intermediate_df.columns = pandas.RangeIndex(1)\n        return intermediate_df.to_pandas().squeeze(axis=1).item()\n\n    @property\n    def metadata(self) -> Dict[str, Any]:\n        return {\"modin.index\": self._col.index}\n\n    def num_chunks(self) -> int:\n        return self._col._partitions.shape[0]\n\n    def get_chunks(\n        self, n_chunks: Optional[int] = None\n    ) -> Iterable[\"PandasProtocolColumn\"]:\n        cur_n_chunks = self.num_chunks()\n        n_rows = self.size()\n        if n_chunks is None or n_chunks == cur_n_chunks:\n            cum_row_lengths = np.cumsum([0] + self._col.row_lengths)\n            for i in range(len(cum_row_lengths) - 1):\n                yield PandasProtocolColumn(\n                    self._col.take_2d_labels_or_positional(\n                        row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),\n                        col_positions=None,\n                    ),\n                    allow_copy=self._col._allow_copy,\n                )\n            return\n\n        if n_chunks % cur_n_chunks != 0:\n            raise RuntimeError(\n                \"The passed `n_chunks` must be a multiple of `self.num_chunks()`.\"\n            )\n\n        if n_chunks > n_rows:\n            raise RuntimeError(\n                \"The passed `n_chunks` value is bigger than `self.num_rows()`.\"\n            )\n\n        chunksize = n_rows // n_chunks\n        new_lengths = [chunksize] * n_chunks\n        new_lengths[-1] = n_rows % n_chunks + new_lengths[-1]\n\n        new_partitions = self._col._partition_mgr_cls.map_axis_partitions(\n            0,\n            self._col._partitions,\n            lambda df: df,\n            keep_partitioning=False,\n            lengths=new_lengths,\n        )\n        new_df = self._col.__constructor__(\n            new_partitions,\n            self._col.index,\n            self._col.columns,\n            new_lengths,\n            self._col.column_widths,\n        )\n        cum_row_lengths = np.cumsum([0] + new_df.row_lengths)\n        for i in range(len(cum_row_lengths) - 1):\n            yield PandasProtocolColumn(\n                new_df.take_2d_labels_or_positional(\n                    row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),\n                    col_positions=None,\n                ),\n                allow_copy=self._allow_copy,\n            )\n\n    def get_buffers(self) -> Dict[str, Any]:\n        buffers = {}\n        buffers[\"data\"] = self._get_data_buffer()\n        try:\n            buffers[\"validity\"] = self._get_validity_buffer()\n        except NoValidityBuffer:\n            buffers[\"validity\"] = None\n\n        try:\n            buffers[\"offsets\"] = self._get_offsets_buffer()\n        except NoOffsetsBuffer:\n            buffers[\"offsets\"] = None\n\n        return buffers\n\n    _data_buffer_cache = None\n\n    def _get_data_buffer(\n        self,\n    ) -> Tuple[PandasProtocolBuffer, Any]:  # Any is for self.dtype tuple\n        \"\"\"\n        Return the buffer containing the data and the buffer's associated dtype.\n\n        Returns\n        -------\n        tuple\n            The data buffer.\n        \"\"\"\n        if self._data_buffer_cache is not None:\n            return self._data_buffer_cache\n\n        dtype = self.dtype\n        if dtype[0] in (\n            DTypeKind.INT,\n            DTypeKind.UINT,\n            DTypeKind.FLOAT,\n            DTypeKind.BOOL,\n            DTypeKind.DATETIME,\n        ):\n            buffer = PandasProtocolBuffer(\n                self._col.to_numpy().flatten(), allow_copy=self._allow_copy\n            )\n        elif dtype[0] == DTypeKind.CATEGORICAL:\n            pandas_series = self._col.to_pandas().squeeze(axis=1)\n            codes = pandas_series.values.codes\n            buffer = PandasProtocolBuffer(codes, allow_copy=self._allow_copy)\n            dtype = self._dtype_from_primitive_pandas_dtype(codes.dtype)\n        elif dtype[0] == DTypeKind.STRING:\n            # Marshal the strings from a NumPy object array into a byte array\n            buf = self._col.to_numpy().flatten()\n            b = bytearray()\n\n            # TODO: this for-loop is slow; can be implemented in Cython/C/C++ later\n            for i in range(buf.size):\n                if type(buf[i]) is str:\n                    b.extend(buf[i].encode(encoding=\"utf-8\"))\n\n            # Convert the byte array to a pandas \"buffer\" using a NumPy array as the backing store\n            buffer = PandasProtocolBuffer(np.frombuffer(b, dtype=\"uint8\"))\n\n            # Define the dtype for the returned buffer\n            dtype = (\n                DTypeKind.STRING,\n                8,\n                \"u\",\n                \"=\",\n            )  # note: currently only support native endianness\n        else:\n            raise NotImplementedError(f\"Data type {self._col.dtype[0]} not handled yet\")\n\n        self._data_buffer_cache = (buffer, dtype)\n        return self._data_buffer_cache\n\n    _validity_buffer_cache = None\n\n    def _get_validity_buffer(self) -> Tuple[PandasProtocolBuffer, Any]:\n        \"\"\"\n        Get the validity buffer.\n\n        The buffer contains the mask values indicating\n        missing data and the buffer's associated dtype.\n\n        Returns\n        -------\n        tuple\n            The validity buffer.\n\n        Raises\n        ------\n        ``NoValidityBuffer`` if null representation is not a bit or byte mask.\n        \"\"\"\n        if self._validity_buffer_cache is not None:\n            return self._validity_buffer_cache\n\n        null, invalid = self.describe_null\n\n        if self.dtype[0] == DTypeKind.STRING:\n            # For now, have the mask array be comprised of bytes, rather than a bit array\n            buf = self._col.to_numpy().flatten()\n\n            # Determine the encoding for valid values\n            valid = invalid == 0\n            invalid = not valid\n\n            mask = np.empty(shape=(len(buf),), dtype=np.bool_)\n            for i, obj in enumerate(buf):\n                mask[i] = valid if isinstance(obj, str) else invalid\n\n            # Convert the mask array to a Pandas \"buffer\" using a NumPy array as the backing store\n            buffer = PandasProtocolBuffer(mask)\n\n            # Define the dtype of the returned buffer\n            dtype = (DTypeKind.BOOL, 8, \"b\", \"=\")\n\n            self._validity_buffer_cache = (buffer, dtype)\n            return self._validity_buffer_cache\n\n        try:\n            msg = _NO_VALIDITY_BUFFER[null]\n        except KeyError:\n            raise NotImplementedError(\"See self.describe_null\")\n\n        raise NoValidityBuffer(msg)\n\n    _offsets_buffer_cache = None\n\n    def _get_offsets_buffer(self) -> Tuple[PandasProtocolBuffer, Any]:\n        \"\"\"\n        Get the offsets buffer.\n\n        The buffer contains the offset values for variable-size binary data\n        (e.g., variable-length strings) and the buffer's associated dtype.\n\n        Returns\n        -------\n        tuple\n            The offsets buffer.\n\n        Raises\n        ------\n        ``NoOffsetsBuffer`` if the data buffer does not have an associated offsets buffer.\n        \"\"\"\n        if self._offsets_buffer_cache is not None:\n            return self._offsets_buffer_cache\n\n        if self.dtype[0] == DTypeKind.STRING:\n            # For each string, we need to manually determine the next offset\n            values = self._col.to_numpy().flatten()\n            ptr = 0\n            offsets = [ptr] + [None] * len(values)\n            for i, v in enumerate(values):\n                # For missing values (in this case, `np.nan` values), we don't increment the pointer)\n                if type(v) is str:\n                    b = v.encode(encoding=\"utf-8\")\n                    ptr += len(b)\n\n                offsets[i + 1] = ptr\n\n            # Convert the list of offsets to a NumPy array of signed 64-bit integers (note: Arrow allows the offsets array to be either `int32` or `int64`; here, we default to the latter)\n            buf = np.asarray(offsets, dtype=\"int64\")\n\n            # Convert the offsets to a Pandas \"buffer\" using the NumPy array as the backing store\n            buffer = PandasProtocolBuffer(buf)\n\n            # Assemble the buffer dtype info\n            dtype = (\n                DTypeKind.INT,\n                64,\n                \"l\",\n                \"=\",\n            )  # note: currently only support native endianness\n        else:\n            raise NoOffsetsBuffer(\n                \"This column has a fixed-length dtype so does not have an offsets buffer\"\n            )\n\n        self._offsets_buffer_cache = (buffer, dtype)\n        return self._offsets_buffer_cache\n"
  },
  {
    "path": "modin/core/dataframe/pandas/interchange/dataframe_protocol/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nDataframe exchange protocol implementation.\n\nSee more in https://data-apis.org/dataframe-protocol/latest/index.html.\n\nNotes\n-----\n- Interpreting a raw pointer (as in ``Buffer.ptr``) is annoying and unsafe to\n  do in pure Python. It's more general but definitely less friendly than having\n  ``to_arrow`` and ``to_numpy`` methods. So for the buffers which lack\n  ``__dlpack__`` (e.g., because the column dtype isn't supported by DLPack),\n  this is worth looking at again.\n\"\"\"\n\nimport collections\nfrom typing import Any, Dict, Iterable, Optional, Sequence\n\nimport numpy as np\n\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    ProtocolDataframe,\n)\nfrom modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\nfrom modin.utils import _inherit_docstrings\n\nfrom .column import PandasProtocolColumn\n\n\n@_inherit_docstrings(ProtocolDataframe)\nclass PandasProtocolDataframe(ProtocolDataframe):\n    \"\"\"\n    A data frame class, with only the methods required by the interchange protocol defined.\n\n    Instances of this (private) class are returned from ``modin.pandas.DataFrame.__dataframe__``\n    as objects with the methods and attributes defined on this class.\n\n    A \"data frame\" represents an ordered collection of named columns.\n    A column's \"name\" must be a unique string. Columns may be accessed by name or by position.\n    This could be a public data frame class, or an object with the methods and\n    attributes defined on this DataFrame class could be returned from the\n    ``__dataframe__`` method of a public data frame class in a library adhering\n    to the dataframe interchange protocol specification.\n\n    Parameters\n    ----------\n    df : PandasDataframe\n        A ``PandasDataframe`` object.\n    nan_as_null : bool, default:False\n        A keyword intended for the consumer to tell the producer\n        to overwrite null values in the data with ``NaN`` (or ``NaT``).\n        This currently has no effect; once support for nullable extension\n        dtypes is added, this value should be propagated to columns.\n    allow_copy : bool, default: True\n        A keyword that defines whether or not the library is allowed\n        to make a copy of the data. For example, copying data would be necessary\n        if a library supports strided buffers, given that this protocol\n        specifies contiguous buffers. Currently, if the flag is set to ``False``\n        and a copy is needed, a ``RuntimeError`` will be raised.\n    \"\"\"\n\n    def __init__(\n        self,\n        df: PandasDataframe,\n        nan_as_null: bool = False,\n        allow_copy: bool = True,\n    ) -> None:\n        self._df = df\n        self._nan_as_null = nan_as_null\n        self._allow_copy = allow_copy\n\n    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):\n        return PandasProtocolDataframe(\n            self._df, nan_as_null=nan_as_null, allow_copy=allow_copy\n        )\n\n    @property\n    def metadata(self) -> Dict[str, Any]:\n        return {\"modin.index\": self._df.index}\n\n    def num_columns(self) -> int:\n        return len(self._df.columns)\n\n    def num_rows(self) -> int:\n        return len(self._df.index)\n\n    def num_chunks(self) -> int:\n        return self._df._partitions.shape[0]\n\n    def column_names(self) -> Iterable[str]:\n        for col in self._df.columns:\n            yield col\n\n    def get_column(self, i: int) -> PandasProtocolColumn:\n        return PandasProtocolColumn(\n            self._df.take_2d_labels_or_positional(\n                row_positions=None, col_positions=[i]\n            ),\n            allow_copy=self._allow_copy,\n        )\n\n    def get_column_by_name(self, name: str) -> PandasProtocolColumn:\n        return PandasProtocolColumn(\n            self._df.take_2d_labels_or_positional(\n                row_positions=None, col_labels=[name]\n            ),\n            allow_copy=self._allow_copy,\n        )\n\n    def get_columns(self) -> Iterable[PandasProtocolColumn]:\n        for name in self._df.columns:\n            yield PandasProtocolColumn(\n                self._df.take_2d_labels_or_positional(\n                    row_positions=None, col_labels=[name]\n                ),\n                allow_copy=self._allow_copy,\n            )\n\n    def select_columns(self, indices: Sequence[int]) -> \"PandasProtocolDataframe\":\n        if not isinstance(indices, collections.abc.Sequence):\n            raise ValueError(\"`indices` is not a sequence\")\n\n        return PandasProtocolDataframe(\n            self._df.take_2d_labels_or_positional(\n                row_positions=None, col_positions=indices\n            ),\n            allow_copy=self._allow_copy,\n        )\n\n    def select_columns_by_name(self, names: Sequence[str]) -> \"PandasProtocolDataframe\":\n        if not isinstance(names, collections.abc.Sequence):\n            raise ValueError(\"`names` is not a sequence\")\n\n        return PandasProtocolDataframe(\n            self._df.take_2d_labels_or_positional(row_positions=None, col_labels=names),\n            allow_copy=self._allow_copy,\n        )\n\n    def get_chunks(\n        self, n_chunks: Optional[int] = None\n    ) -> Iterable[\"PandasProtocolDataframe\"]:\n        cur_n_chunks = self.num_chunks()\n        n_rows = self.num_rows()\n        if n_chunks is None or n_chunks == cur_n_chunks:\n            cum_row_lengths = np.cumsum([0] + self._df.row_lengths)\n            for i in range(len(cum_row_lengths) - 1):\n                yield PandasProtocolDataframe(\n                    self._df.take_2d_labels_or_positional(\n                        row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),\n                        col_positions=None,\n                    ),\n                    allow_copy=self._allow_copy,\n                )\n            return\n        if n_chunks % cur_n_chunks != 0:\n            raise RuntimeError(\n                \"The passed `n_chunks` must be a multiple of `self.num_chunks()`.\"\n            )\n\n        if n_chunks > n_rows:\n            raise RuntimeError(\n                \"The passed `n_chunks` value is bigger than `self.num_rows()`.\"\n            )\n\n        chunksize = n_rows // n_chunks\n        new_lengths = [chunksize] * n_chunks\n        new_lengths[-1] = n_rows % n_chunks + new_lengths[-1]\n\n        new_partitions = self._df._partition_mgr_cls.map_axis_partitions(\n            0,\n            self._df._partitions,\n            lambda df: df,\n            keep_partitioning=False,\n            lengths=new_lengths,\n        )\n        new_df = self._df.__constructor__(\n            new_partitions,\n            self._df.index,\n            self._df.columns,\n            new_lengths,\n            self._df.column_widths,\n        )\n        cum_row_lengths = np.cumsum([0] + new_df.row_lengths)\n        for i in range(len(cum_row_lengths) - 1):\n            yield PandasProtocolDataframe(\n                new_df.take_2d_labels_or_positional(\n                    row_positions=range(cum_row_lengths[i], cum_row_lengths[i + 1]),\n                    col_positions=None,\n                ),\n                allow_copy=self._allow_copy,\n            )\n"
  },
  {
    "path": "modin/core/dataframe/pandas/interchange/dataframe_protocol/exception.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Exceptions that can be caught by dataframe exchange protocol implementation for pandas storage format.\"\"\"\n\n\nclass NoValidityBuffer(Exception):\n    \"\"\"Exception to be raised if there is no validity buffer for ``PandasProtocolColumn``.\"\"\"\n\n    pass\n\n\nclass NoOffsetsBuffer(Exception):\n    \"\"\"Exception to be raised if there is no offsets buffer for ``PandasProtocolColumn``.\"\"\"\n\n    pass\n"
  },
  {
    "path": "modin/core/dataframe/pandas/interchange/dataframe_protocol/from_dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses the functions building a ``pandas.DataFrame`` from a DataFrame exchange protocol object.\"\"\"\n\nimport ctypes\nimport re\nfrom typing import Any, Optional, Tuple, Union\n\nimport numpy as np\nimport pandas\n\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    ProtocolBuffer,\n    ProtocolColumn,\n    ProtocolDataframe,\n)\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.utils import (\n    ArrowCTypes,\n    ColumnNullType,\n    DTypeKind,\n    Endianness,\n)\n\nnp_types_map = {\n    DTypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},\n    DTypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},\n    DTypeKind.FLOAT: {32: np.float32, 64: np.float64},\n    # Consider bitmask to be a uint8 dtype to parse the bits later\n    DTypeKind.BOOL: {1: np.uint8, 8: bool},\n}\n\n\ndef from_dataframe_to_pandas(df: ProtocolDataframe, n_chunks: Optional[int] = None):\n    \"\"\"\n    Build a ``pandas.DataFrame`` from an object supporting the DataFrame exchange protocol, i.e. `__dataframe__` method.\n\n    Parameters\n    ----------\n    df : ProtocolDataframe\n        Object supporting the exchange protocol, i.e. `__dataframe__` method.\n    n_chunks : int, optional\n        Number of chunks to split `df`.\n\n    Returns\n    -------\n    pandas.DataFrame\n    \"\"\"\n    if not hasattr(df, \"__dataframe__\"):\n        raise ValueError(\"`df` does not support __dataframe__\")\n\n    df = df.__dataframe__()\n    if isinstance(df, dict):\n        df = df[\"dataframe\"]\n\n    pandas_dfs = []\n    for chunk in df.get_chunks(n_chunks):\n        pandas_df = protocol_df_chunk_to_pandas(chunk)\n        pandas_dfs.append(pandas_df)\n\n    pandas_df = pandas.concat(pandas_dfs, axis=0, ignore_index=True)\n\n    index_obj = df.metadata.get(\"modin.index\", df.metadata.get(\"pandas.index\", None))\n    if index_obj is not None:\n        pandas_df.index = index_obj\n\n    return pandas_df\n\n\ndef protocol_df_chunk_to_pandas(df):\n    \"\"\"\n    Convert exchange protocol chunk to ``pandas.DataFrame``.\n\n    Parameters\n    ----------\n    df : ProtocolDataframe\n\n    Returns\n    -------\n    pandas.DataFrame\n    \"\"\"\n    # We need a dict of columns here, with each column being a NumPy array (at\n    # least for now, deal with non-NumPy dtypes later).\n    columns = dict()\n    buffers = []  # hold on to buffers, keeps memory alive\n    for name in df.column_names():\n        if not isinstance(name, str):\n            raise ValueError(f\"Column {name} is not a string\")\n        if name in columns:\n            raise ValueError(f\"Column {name} is not unique\")\n        col = df.get_column_by_name(name)\n        columns[name], buf = unpack_protocol_column(col)\n        buffers.append(buf)\n\n    pandas_df = pandas.DataFrame(columns)\n    pandas_df._buffers = buffers\n    return pandas_df\n\n\ndef unpack_protocol_column(\n    col: ProtocolColumn,\n) -> Tuple[Union[np.ndarray, pandas.Series], Any]:\n    \"\"\"\n    Unpack an interchange protocol column to a pandas-ready column.\n\n    Parameters\n    ----------\n    col : ProtocolColumn\n        Column to unpack.\n\n    Returns\n    -------\n    tuple\n        Tuple of resulting column (either an ndarray or a series) and the object\n        which keeps memory referenced by the column alive.\n    \"\"\"\n    dtype = col.dtype[0]\n    if dtype in (\n        DTypeKind.INT,\n        DTypeKind.UINT,\n        DTypeKind.FLOAT,\n        DTypeKind.BOOL,\n    ):\n        return primitive_column_to_ndarray(col)\n    elif dtype == DTypeKind.CATEGORICAL:\n        return categorical_column_to_series(col)\n    elif dtype == DTypeKind.STRING:\n        return string_column_to_ndarray(col)\n    elif dtype == DTypeKind.DATETIME:\n        return datetime_column_to_ndarray(col)\n    else:\n        raise NotImplementedError(f\"Data type {dtype} not handled yet\")\n\n\ndef primitive_column_to_ndarray(col: ProtocolColumn) -> Tuple[np.ndarray, Any]:\n    \"\"\"\n    Convert a column holding one of the primitive dtypes (int, uint, float or bool) to a NumPy array.\n\n    Parameters\n    ----------\n    col : ProtocolColumn\n\n    Returns\n    -------\n    tuple\n        Tuple of np.ndarray holding the data and the memory owner object that keeps the memory alive.\n    \"\"\"\n    buffers = col.get_buffers()\n\n    data_buff, data_dtype = buffers[\"data\"]\n    data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size())\n\n    data = set_nulls(data, col, buffers[\"validity\"])\n    return data, buffers\n\n\ndef categorical_column_to_series(col: ProtocolColumn) -> Tuple[pandas.Series, Any]:\n    \"\"\"\n    Convert a column holding categorical data to a pandas Series.\n\n    Parameters\n    ----------\n    col : ProtocolColumn\n\n    Returns\n    -------\n    tuple\n        Tuple of pandas.Series holding the data and the memory owner object that keeps the memory alive.\n    \"\"\"\n    cat_descr = col.describe_categorical\n    ordered, is_dict, categories = (\n        cat_descr[\"is_ordered\"],\n        cat_descr[\"is_dictionary\"],\n        cat_descr[\"categories\"],\n    )\n\n    if not is_dict or categories is None:\n        raise NotImplementedError(\"Non-dictionary categoricals not supported yet\")\n\n    buffers = col.get_buffers()\n\n    codes_buff, codes_dtype = buffers[\"data\"]\n    codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size())\n\n    # Doing module in order to not get ``IndexError`` for out-of-bounds sentinel values in `codes`\n    cat_values, categories_buf = unpack_protocol_column(categories)\n    values = cat_values[codes % len(cat_values)]\n\n    cat = pandas.Categorical(values, categories=cat_values, ordered=ordered)\n    data = pandas.Series(cat)\n\n    data = set_nulls(data, col, buffers[\"validity\"])\n    return data, [buffers, categories_buf]\n\n\ndef _inverse_null_buf(buf: np.ndarray, null_kind: ColumnNullType) -> np.ndarray:\n    \"\"\"\n    Inverse the boolean value of buffer storing either bit- or bytemask.\n\n    Parameters\n    ----------\n    buf : np.ndarray\n        Buffer to inverse the boolean value for.\n    null_kind : {ColumnNullType.USE_BYTEMASK, ColumnNullType.USE_BITMASK}\n        How to treat the buffer.\n\n    Returns\n    -------\n    np.ndarray\n        Logically inversed buffer.\n    \"\"\"\n    if null_kind == ColumnNullType.USE_BITMASK:\n        return ~buf\n    assert (\n        null_kind == ColumnNullType.USE_BYTEMASK\n    ), f\"Unexpected null kind: {null_kind}\"\n    # bytemasks use 0 for `False` and anything else for `True`, so convert to bool\n    # by direct comparison instead of bitwise reversal like we do for bitmasks\n    return buf == 0\n\n\ndef string_column_to_ndarray(col: ProtocolColumn) -> Tuple[np.ndarray, Any]:\n    \"\"\"\n    Convert a column holding string data to a NumPy array.\n\n    Parameters\n    ----------\n    col : ProtocolColumn\n\n    Returns\n    -------\n    tuple\n        Tuple of np.ndarray holding the data and the memory owner object that keeps the memory alive.\n    \"\"\"\n    null_kind, sentinel_val = col.describe_null\n\n    if null_kind not in (\n        ColumnNullType.NON_NULLABLE,\n        ColumnNullType.USE_BITMASK,\n        ColumnNullType.USE_BYTEMASK,\n    ):\n        raise NotImplementedError(\n            f\"{null_kind} null kind is not yet supported for string columns.\"\n        )\n\n    buffers = col.get_buffers()\n\n    # Retrieve the data buffer containing the UTF-8 code units\n    data_buff, protocol_data_dtype = buffers[\"data\"]\n    # We're going to reinterpret the buffer as uint8, so making sure we can do it safely\n    assert protocol_data_dtype[1] == 8  # bitwidth == 8\n    assert protocol_data_dtype[2] == ArrowCTypes.STRING  # format_str == utf-8\n    # Convert the buffers to NumPy arrays, in order to go from STRING to an equivalent ndarray,\n    # we claim that the buffer is uint8 (i.e., a byte array)\n    data_dtype = (\n        DTypeKind.UINT,\n        8,\n        ArrowCTypes.UINT8,\n        Endianness.NATIVE,\n    )\n    # Specify zero offset as we don't want to chunk the string data\n    data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size())\n\n    # Retrieve the offsets buffer containing the index offsets demarcating the beginning and end of each string\n    offset_buff, offset_dtype = buffers[\"offsets\"]\n    # Offsets buffer contains start-stop positions of strings in the data buffer,\n    # meaning that it has more elements than in the data buffer, do `col.size() + 1` here\n    # to pass a proper offsets buffer size\n    offsets = buffer_to_ndarray(\n        offset_buff, offset_dtype, col.offset, length=col.size() + 1\n    )\n\n    null_pos = None\n    if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):\n        valid_buff, valid_dtype = buffers[\"validity\"]\n        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())\n        if sentinel_val == 0:\n            null_pos = _inverse_null_buf(null_pos, null_kind)\n\n    # Assemble the strings from the code units\n    str_list = [None] * col.size()\n    for i in range(col.size()):\n        # Check for missing values\n        if null_pos is not None and null_pos[i]:\n            str_list[i] = np.nan\n            continue\n\n        # Extract a range of code units\n        units = data[offsets[i] : offsets[i + 1]]\n\n        # Convert the list of code units to bytes\n        str_bytes = bytes(units)\n\n        # Create the string\n        string = str_bytes.decode(encoding=\"utf-8\")\n\n        # Add to our list of strings\n        str_list[i] = string\n\n    # Convert the string list to a NumPy array\n    return np.asarray(str_list, dtype=\"object\"), buffers\n\n\ndef datetime_column_to_ndarray(col: ProtocolColumn) -> Tuple[np.ndarray, Any]:\n    \"\"\"\n    Convert a column holding DateTime data to a NumPy array.\n\n    Parameters\n    ----------\n    col : ProtocolColumn\n\n    Returns\n    -------\n    tuple\n        Tuple of np.ndarray holding the data and the memory owner object that keeps the memory alive.\n    \"\"\"\n    buffers = col.get_buffers()\n\n    _, _, format_str, _ = col.dtype\n    dbuf, dtype = buffers[\"data\"]\n    # Consider dtype being `uint` to get number of units passed since the 01.01.1970\n    data = buffer_to_ndarray(\n        dbuf,\n        (\n            DTypeKind.UINT,\n            dtype[1],\n            getattr(ArrowCTypes, f\"UINT{dtype[1]}\"),\n            Endianness.NATIVE,\n        ),\n        col.offset,\n        col.size(),\n    )\n\n    def parse_format_str(format_str, data):\n        \"\"\"Parse datetime `format_str` to interpret the `data`.\"\"\"\n        # timestamp 'ts{unit}:tz'\n        timestamp_meta = re.match(r\"ts([smun]):(.*)\", format_str)\n        if timestamp_meta:\n            unit, tz = timestamp_meta.group(1), timestamp_meta.group(2)\n            if tz != \"\":\n                raise NotImplementedError(\"Timezones are not supported yet\")\n            if unit != \"s\":\n                # the format string describes only a first letter of the unit, add one extra\n                # letter to make the unit in numpy-style: 'm' -> 'ms', 'u' -> 'us', 'n' -> 'ns'\n                unit += \"s\"\n            data = data.astype(f\"datetime64[{unit}]\")\n            return data\n\n        # date 'td{Days/Ms}'\n        date_meta = re.match(r\"td([Dm])\", format_str)\n        if date_meta:\n            unit = date_meta.group(1)\n            if unit == \"D\":\n                # NumPy doesn't support DAY unit, so converting days to seconds\n                # (converting to uint64 to avoid overflow)\n                data = (data.astype(np.uint64) * (24 * 60 * 60)).astype(\"datetime64[s]\")\n            elif unit == \"m\":\n                data = data.astype(\"datetime64[ms]\")\n            else:\n                raise NotImplementedError(f\"Date unit is not supported: {unit}\")\n            return data\n\n        raise NotImplementedError(f\"DateTime kind is not supported: {format_str}\")\n\n    data = parse_format_str(format_str, data)\n    data = set_nulls(data, col, buffers[\"validity\"])\n    return data, buffers\n\n\ndef buffer_to_ndarray(\n    buffer: ProtocolBuffer,\n    dtype: Tuple[DTypeKind, int, str, str],\n    offset: int = 0,\n    length: Optional[int] = None,\n) -> np.ndarray:\n    \"\"\"\n    Build a NumPy array from the passed buffer.\n\n    Parameters\n    ----------\n    buffer : ProtocolBuffer\n        Buffer to build a NumPy array from.\n    dtype : tuple\n        Data type of the buffer conforming protocol dtypes format.\n    offset : int, default: 0\n        Number of elements to offset from the start of the buffer.\n    length : int, optional\n        If the buffer is a bit-mask, specifies a number of bits to read\n        from the buffer. Has no effect otherwise.\n\n    Returns\n    -------\n    np.ndarray\n\n    Notes\n    -----\n    The returned array doesn't own the memory. A user of the function must keep the memory\n    owner object alive as long as the returned NumPy array is being used.\n    \"\"\"\n    kind, bit_width, _, _ = dtype\n\n    column_dtype = np_types_map.get(kind, {}).get(bit_width, None)\n    if column_dtype is None:\n        raise NotImplementedError(f\"Convertion for {dtype} is not yet supported.\")\n\n    # TODO: No DLPack yet, so need to construct a new ndarray from the data pointer\n    # and size in the buffer plus the dtype on the column. Use DLPack as NumPy supports\n    # it since https://github.com/numpy/numpy/pull/19083\n    ctypes_type = np.ctypeslib.as_ctypes_type(column_dtype)\n    data_pointer = ctypes.cast(\n        buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type)\n    )\n\n    if bit_width == 1:\n        assert length is not None, \"`length` must be specified for a bit-mask buffer.\"\n        arr = np.ctypeslib.as_array(data_pointer, shape=(buffer.bufsize,))\n        return bitmask_to_bool_ndarray(arr, length, first_byte_offset=offset % 8)\n    else:\n        return np.ctypeslib.as_array(\n            data_pointer, shape=(buffer.bufsize // (bit_width // 8),)\n        )\n\n\ndef bitmask_to_bool_ndarray(\n    bitmask: np.ndarray, mask_length: int, first_byte_offset: int = 0\n) -> np.ndarray:\n    \"\"\"\n    Convert bit-mask to a boolean NumPy array.\n\n    Parameters\n    ----------\n    bitmask : np.ndarray[uint8]\n        NumPy array of uint8 dtype representing the bitmask.\n    mask_length : int\n        Number of elements in the mask to interpret.\n    first_byte_offset : int, default: 0\n        Number of elements to offset from the start of the first byte.\n\n    Returns\n    -------\n    np.ndarray[bool]\n    \"\"\"\n    bytes_to_skip = first_byte_offset // 8\n    bitmask = bitmask[bytes_to_skip:]\n    first_byte_offset %= 8\n\n    bool_mask = np.zeros(mask_length, dtype=bool)\n\n    # Proccessing the first byte separately as it has its own offset\n    val = bitmask[0]\n    mask_idx = 0\n    bits_in_first_byte = min(8 - first_byte_offset, mask_length)\n    for j in range(bits_in_first_byte):\n        if val & (1 << (j + first_byte_offset)):\n            bool_mask[mask_idx] = True\n        mask_idx += 1\n\n    # `mask_length // 8` describes how many full bytes to process\n    for i in range((mask_length - bits_in_first_byte) // 8):\n        # doing `+ 1` as we already processed the first byte\n        val = bitmask[i + 1]\n        for j in range(8):\n            if val & (1 << j):\n                bool_mask[mask_idx] = True\n            mask_idx += 1\n\n    if len(bitmask) > 1:\n        # Processing reminder of last byte\n        val = bitmask[-1]\n        for j in range(len(bool_mask) - mask_idx):\n            if val & (1 << j):\n                bool_mask[mask_idx] = True\n            mask_idx += 1\n\n    return bool_mask\n\n\ndef set_nulls(\n    data: Union[np.ndarray, pandas.Series],\n    col: ProtocolColumn,\n    validity: Tuple[ProtocolBuffer, Tuple[DTypeKind, int, str, str]],\n    allow_modify_inplace: bool = True,\n):\n    \"\"\"\n    Set null values for the data according to the column null kind.\n\n    Parameters\n    ----------\n    data : np.ndarray or pandas.Series\n        Data to set nulls in.\n    col : ProtocolColumn\n        Column object that describes the `data`.\n    validity : tuple(ProtocolBuffer, dtype) or None\n        The return value of ``col.buffers()``. We do not access the ``col.buffers()``\n        here to not take the ownership of the memory of buffer objects.\n    allow_modify_inplace : bool, default: True\n        Whether to modify the `data` inplace when zero-copy is possible (True) or always\n        modify a copy of the `data` (False).\n\n    Returns\n    -------\n    np.ndarray or pandas.Series\n        Data with the nulls being set.\n    \"\"\"\n    null_kind, sentinel_val = col.describe_null\n    null_pos = None\n\n    if null_kind == ColumnNullType.USE_SENTINEL:\n        null_pos = data == sentinel_val\n    elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):\n        valid_buff, valid_dtype = validity\n        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())\n        if sentinel_val == 0:\n            null_pos = _inverse_null_buf(null_pos, null_kind)\n    elif null_kind in (ColumnNullType.NON_NULLABLE, ColumnNullType.USE_NAN):\n        pass\n    else:\n        raise NotImplementedError(f\"Null kind {null_kind} is not yet supported.\")\n\n    if null_pos is not None and np.any(null_pos):\n        if not allow_modify_inplace:\n            data = data.copy()\n        try:\n            data[null_pos] = None\n        except TypeError:\n            # TypeError happens if the `data` dtype appears to be non-nullable in numpy notation\n            # (bool, int, uint), if such happens, cast the `data` to nullable float dtype.\n            data = data.astype(float)\n            data[null_pos] = None\n\n    return data\n"
  },
  {
    "path": "modin/core/dataframe/pandas/metadata/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Utilities and classes to handle work with metadata.\"\"\"\n\nfrom .dtypes import (\n    DtypesDescriptor,\n    LazyProxyCategoricalDtype,\n    ModinDtypes,\n    extract_dtype,\n)\nfrom .index import ModinIndex\n\n__all__ = [\n    \"ModinDtypes\",\n    \"ModinIndex\",\n    \"LazyProxyCategoricalDtype\",\n    \"DtypesDescriptor\",\n    \"extract_dtype\",\n]\n"
  },
  {
    "path": "modin/core/dataframe/pandas/metadata/dtypes.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module contains class ``ModinDtypes``.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Callable, Optional, Union\n\nimport pandas\nfrom pandas._typing import DtypeObj, IndexLabel\nfrom pandas.core.dtypes.cast import find_common_type\n\nif TYPE_CHECKING:\n    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\n    from .index import ModinIndex\n\nfrom modin.error_message import ErrorMessage\n\n\nclass DtypesDescriptor:\n    \"\"\"\n    Describes partial dtypes.\n\n    Parameters\n    ----------\n    known_dtypes : dict[IndexLabel, DtypeObj] or pandas.Series, optional\n        Columns that we know dtypes for.\n    cols_with_unknown_dtypes : list[IndexLabel], optional\n        Column names that have unknown dtypes. If specified together with `remaining_dtype`, must describe all\n        columns with unknown dtypes, otherwise, the missing columns will be assigned to `remaining_dtype`.\n        If `cols_with_unknown_dtypes` is incomplete, you must specify `know_all_names=False`.\n    remaining_dtype : DtypeObj, optional\n        Dtype for columns that are not present neither in `known_dtypes` nor in `cols_with_unknown_dtypes`.\n        This parameter is intended to describe columns that we known dtypes for, but don't know their\n        names yet. Note, that this parameter DOESN'T describe dtypes for columns from `cols_with_unknown_dtypes`.\n    parent_df : PandasDataframe, optional\n        Dataframe object for which we describe dtypes. This dataframe will be used to compute\n        missing dtypes on ``.materialize()``.\n    columns_order : dict[int, IndexLabel], optional\n        Order of columns in the dataframe. If specified, must describe all the columns of the dataframe.\n    know_all_names : bool, default: True\n        Whether `known_dtypes` and `cols_with_unknown_dtypes` contain all column names for this dataframe besides those,\n        that are being described by `remaining_dtype`.\n        One can't pass `know_all_names=False` together with `remaining_dtype` as this creates ambiguity\n        on how to interpret missing columns (whether they belong to `remaining_dtype` or not).\n    _schema_is_known : bool, optional\n        Whether `known_dtypes` describe all columns in the dataframe. This parameter intended mostly\n        for internal use.\n    \"\"\"\n\n    def __init__(\n        self,\n        known_dtypes: Optional[Union[dict[IndexLabel, DtypeObj], pandas.Series]] = None,\n        cols_with_unknown_dtypes: Optional[list[IndexLabel]] = None,\n        remaining_dtype: Optional[DtypeObj] = None,\n        parent_df: Optional[PandasDataframe] = None,\n        columns_order: Optional[dict[int, IndexLabel]] = None,\n        know_all_names: bool = True,\n        _schema_is_known: Optional[bool] = None,\n    ):\n        if not know_all_names and remaining_dtype is not None:\n            raise ValueError(\n                \"It's not allowed to pass 'remaining_dtype' and 'know_all_names=False' at the same time.\"\n            )\n        # columns with known dtypes\n        self._known_dtypes: dict[IndexLabel, DtypeObj] = (\n            {} if known_dtypes is None else dict(known_dtypes)\n        )\n        if known_dtypes is not None and len(self._known_dtypes) != len(known_dtypes):\n            raise NotImplementedError(\n                \"Duplicated column names are not yet supported by DtypesDescriptor\"\n            )\n        # columns with unknown dtypes (they're not described by 'remaining_dtype')\n        if cols_with_unknown_dtypes is not None and len(\n            set(cols_with_unknown_dtypes)\n        ) != len(cols_with_unknown_dtypes):\n            raise NotImplementedError(\n                \"Duplicated column names are not yet supported by DtypesDescriptor\"\n            )\n        self._cols_with_unknown_dtypes: list[IndexLabel] = (\n            [] if cols_with_unknown_dtypes is None else cols_with_unknown_dtypes\n        )\n        # whether 'known_dtypes' describe all columns in the dataframe\n        self._schema_is_known: Optional[bool] = _schema_is_known\n        if self._schema_is_known is None:\n            self._schema_is_known = False\n            if (\n                # if 'cols_with_unknown_dtypes' was explicitly specified as an empty list and\n                # we don't have any 'remaining_dtype', then we assume that 'known_dtypes' are complete\n                cols_with_unknown_dtypes is not None\n                and know_all_names\n                and remaining_dtype is None\n                and len(self._known_dtypes) > 0\n            ):\n                self._schema_is_known = len(cols_with_unknown_dtypes) == 0\n\n        self._know_all_names: bool = know_all_names\n        # a common dtype for columns that are not present in 'known_dtypes' nor in 'cols_with_unknown_dtypes'\n        self._remaining_dtype: Optional[DtypeObj] = remaining_dtype\n        self._parent_df: Optional[PandasDataframe] = parent_df\n        if columns_order is None:\n            self._columns_order: Optional[dict[int, IndexLabel]] = None\n            # try to compute '._columns_order' using 'parent_df'\n            self.columns_order\n        else:\n            if remaining_dtype is not None:\n                raise ValueError(\n                    \"Passing 'columns_order' and 'remaining_dtype' is ambiguous. You have to manually \"\n                    + \"complete 'known_dtypes' using the information from 'columns_order' and 'remaining_dtype'.\"\n                )\n            elif not self._know_all_names:\n                raise ValueError(\n                    \"Passing 'columns_order' and 'know_all_names=False' is ambiguous. You have to manually \"\n                    + \"complete 'cols_with_unknown_dtypes' using the information from 'columns_order' \"\n                    + \"and pass 'know_all_names=True'.\"\n                )\n            elif len(columns_order) != (\n                len(self._cols_with_unknown_dtypes) + len(self._known_dtypes)\n            ):\n                raise ValueError(\n                    \"The length of 'columns_order' doesn't match to 'known_dtypes' and 'cols_with_unknown_dtypes'\"\n                )\n            self._columns_order: Optional[dict[int, IndexLabel]] = columns_order\n\n    def update_parent(self, new_parent: PandasDataframe):\n        \"\"\"\n        Set new parent dataframe.\n\n        Parameters\n        ----------\n        new_parent : PandasDataframe\n        \"\"\"\n        self._parent_df = new_parent\n        LazyProxyCategoricalDtype.update_dtypes(self._known_dtypes, new_parent)\n        # try to compute '._columns_order' using 'new_parent'\n        self.columns_order\n\n    @property\n    def columns_order(self) -> Optional[dict[int, IndexLabel]]:\n        \"\"\"\n        Get order of columns for the described dataframe if available.\n\n        Returns\n        -------\n        dict[int, IndexLabel] or None\n        \"\"\"\n        if self._columns_order is not None:\n            return self._columns_order\n        if self._parent_df is None or not self._parent_df.has_materialized_columns:\n            return None\n\n        actual_columns = self._parent_df.columns\n        self._normalize_self_levels(actual_columns)\n\n        self._columns_order = {i: col for i, col in enumerate(actual_columns)}\n        # we got information about new columns and thus can potentially\n        # extend our knowledge about missing dtypes\n        if len(self._columns_order) > (\n            len(self._known_dtypes) + len(self._cols_with_unknown_dtypes)\n        ):\n            new_cols = [\n                col\n                for col in self._columns_order.values()\n                if col not in self._known_dtypes\n                and col not in self._cols_with_unknown_dtypes\n            ]\n            if self._remaining_dtype is not None:\n                self._known_dtypes.update(\n                    {col: self._remaining_dtype for col in new_cols}\n                )\n                self._remaining_dtype = None\n                if len(self._cols_with_unknown_dtypes) == 0:\n                    self._schema_is_known = True\n            else:\n                self._cols_with_unknown_dtypes.extend(new_cols)\n        self._know_all_names = True\n        return self._columns_order\n\n    def __repr__(self):  # noqa: GL08\n        return (\n            f\"DtypesDescriptor:\\n\\tknown dtypes: {self._known_dtypes};\\n\\t\"\n            + f\"remaining dtype: {self._remaining_dtype};\\n\\t\"\n            + f\"cols with unknown dtypes: {self._cols_with_unknown_dtypes};\\n\\t\"\n            + f\"schema is known: {self._schema_is_known};\\n\\t\"\n            + f\"has parent df: {self._parent_df is not None};\\n\\t\"\n            + f\"columns order: {self._columns_order};\\n\\t\"\n            + f\"know all names: {self._know_all_names}\"\n        )\n\n    def __str__(self):  # noqa: GL08\n        return self.__repr__()\n\n    def lazy_get(\n        self, ids: list[Union[IndexLabel, int]], numeric_index: bool = False\n    ) -> DtypesDescriptor:\n        \"\"\"\n        Get dtypes descriptor for a subset of columns without triggering any computations.\n\n        Parameters\n        ----------\n        ids : list of index labels or positional indexers\n            Columns for the subset.\n        numeric_index : bool, default: False\n            Whether `ids` are positional indixes or column labels to take.\n\n        Returns\n        -------\n        DtypesDescriptor\n            Descriptor that describes dtypes for columns specified in `ids`.\n        \"\"\"\n        if len(set(ids)) != len(ids):\n            raise NotImplementedError(\n                \"Duplicated column names are not yet supported by DtypesDescriptor\"\n            )\n\n        if numeric_index:\n            if self.columns_order is not None:\n                ids = [self.columns_order[i] for i in ids]\n            else:\n                raise ValueError(\n                    \"Can't lazily get columns by positional indixers if the columns order is unknown\"\n                )\n\n        result = {}\n        unknown_cols = []\n        columns_order = {}\n        for i, col in enumerate(ids):\n            columns_order[i] = col\n            if col in self._cols_with_unknown_dtypes:\n                unknown_cols.append(col)\n                continue\n            dtype = self._known_dtypes.get(col)\n            if dtype is None and self._remaining_dtype is None:\n                unknown_cols.append(col)\n            elif dtype is None and self._remaining_dtype is not None:\n                result[col] = self._remaining_dtype\n            else:\n                result[col] = dtype\n        remaining_dtype = self._remaining_dtype if len(unknown_cols) != 0 else None\n        return DtypesDescriptor(\n            result,\n            unknown_cols,\n            remaining_dtype,\n            self._parent_df,\n            columns_order=columns_order,\n        )\n\n    def copy(self) -> DtypesDescriptor:\n        \"\"\"\n        Get a copy of this descriptor.\n\n        Returns\n        -------\n        DtypesDescriptor\n        \"\"\"\n        return type(self)(\n            # should access '.columns_order' first, as it may compute columns order\n            # and complete the metadata for 'self'\n            columns_order=(\n                None if self.columns_order is None else self.columns_order.copy()\n            ),\n            known_dtypes=self._known_dtypes.copy(),\n            cols_with_unknown_dtypes=self._cols_with_unknown_dtypes.copy(),\n            remaining_dtype=self._remaining_dtype,\n            parent_df=self._parent_df,\n            know_all_names=self._know_all_names,\n            _schema_is_known=self._schema_is_known,\n        )\n\n    def set_index(self, new_index: Union[pandas.Index, ModinIndex]) -> DtypesDescriptor:\n        \"\"\"\n        Set new column names for this descriptor.\n\n        Parameters\n        ----------\n        new_index : pandas.Index or ModinIndex\n\n        Returns\n        -------\n        DtypesDescriptor\n            New descriptor with updated column names.\n\n        Notes\n        -----\n        Calling this method on a descriptor that returns ``None`` for ``.columns_order``\n        will result into information lose.\n        \"\"\"\n        if len(new_index) != len(set(new_index)):\n            raise NotImplementedError(\n                \"Duplicated column names are not yet supported by DtypesDescriptor\"\n            )\n\n        if self.columns_order is None:\n            # we can't map new columns to old columns and lost all dtypes :(\n            return DtypesDescriptor(\n                cols_with_unknown_dtypes=new_index,\n                columns_order={i: col for i, col in enumerate(new_index)},\n                parent_df=self._parent_df,\n                know_all_names=True,\n            )\n\n        new_self = self.copy()\n        renamer = {old_c: new_index[i] for i, old_c in new_self.columns_order.items()}\n        new_self._known_dtypes = {\n            renamer[old_col]: value for old_col, value in new_self._known_dtypes.items()\n        }\n        new_self._cols_with_unknown_dtypes = [\n            renamer[old_col] for old_col in new_self._cols_with_unknown_dtypes\n        ]\n        new_self._columns_order = {\n            i: renamer[old_col] for i, old_col in new_self._columns_order.items()\n        }\n        return new_self\n\n    def equals(self, other: DtypesDescriptor) -> bool:\n        \"\"\"\n        Compare two descriptors for equality.\n\n        Parameters\n        ----------\n        other : DtypesDescriptor\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return (\n            self._known_dtypes == other._known_dtypes\n            and set(self._cols_with_unknown_dtypes)\n            == set(other._cols_with_unknown_dtypes)\n            and self._remaining_dtype == other._remaining_dtype\n            and self._schema_is_known == other._schema_is_known\n            and self.columns_order == other.columns_order\n            and self._know_all_names == other._know_all_names\n        )\n\n    @property\n    def is_materialized(self) -> bool:\n        \"\"\"\n        Whether this descriptor contains information about all dtypes in the dataframe.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._schema_is_known\n\n    def _materialize_all_names(self):\n        \"\"\"Materialize missing column names.\"\"\"\n        if self._know_all_names:\n            return\n\n        all_cols = self._parent_df.columns\n        self._normalize_self_levels(all_cols)\n        for col in all_cols:\n            if (\n                col not in self._known_dtypes\n                and col not in self._cols_with_unknown_dtypes\n            ):\n                self._cols_with_unknown_dtypes.append(col)\n\n        self._know_all_names = True\n\n    def _materialize_cols_with_unknown_dtypes(self):\n        \"\"\"Compute dtypes for cols specified in `._cols_with_unknown_dtypes`.\"\"\"\n        if (\n            len(self._known_dtypes) == 0\n            and len(self._cols_with_unknown_dtypes) == 0\n            and not self._know_all_names\n        ):\n            # here we have to compute dtypes for all columns in the dataframe,\n            # so avoiding columns materialization by setting 'subset=None'\n            subset = None\n        else:\n            if not self._know_all_names:\n                self._materialize_all_names()\n            subset = self._cols_with_unknown_dtypes\n\n        if subset is None or len(subset) > 0:\n            self._known_dtypes.update(self._parent_df._compute_dtypes(subset))\n\n        self._know_all_names = True\n        self._cols_with_unknown_dtypes = []\n\n    def materialize(self):\n        \"\"\"Complete information about dtypes.\"\"\"\n        if self.is_materialized:\n            return\n        if self._parent_df is None:\n            raise RuntimeError(\n                \"It's not allowed to call '.materialize()' before '._parent_df' is specified.\"\n            )\n\n        self._materialize_cols_with_unknown_dtypes()\n\n        if self._remaining_dtype is not None:\n            cols = self._parent_df.columns\n            self._normalize_self_levels(cols)\n            self._known_dtypes.update(\n                {\n                    col: self._remaining_dtype\n                    for col in cols\n                    if col not in self._known_dtypes\n                }\n            )\n\n        # we currently not guarantee for dtypes to be in a proper order:\n        # https://github.com/modin-project/modin/blob/8a332c1597c54d36f7ccbbd544e186b689f9ceb1/modin/pandas/test/utils.py#L644-L646\n        # so restoring the order only if it's possible\n        if self.columns_order is not None:\n            assert len(self.columns_order) == len(self._known_dtypes)\n            self._known_dtypes = {\n                self.columns_order[i]: self._known_dtypes[self.columns_order[i]]\n                for i in range(len(self.columns_order))\n            }\n\n        self._schema_is_known = True\n        self._remaining_dtype = None\n        self._parent_df = None\n\n    def to_series(self) -> pandas.Series:\n        \"\"\"\n        Convert descriptor to a pandas Series.\n\n        Returns\n        -------\n        pandas.Series\n        \"\"\"\n        self.materialize()\n        return pandas.Series(self._known_dtypes)\n\n    def get_dtypes_set(self) -> set[DtypeObj]:\n        \"\"\"\n        Get a set of dtypes from the descriptor.\n\n        Returns\n        -------\n        set[DtypeObj]\n        \"\"\"\n        if len(self._cols_with_unknown_dtypes) > 0 or not self._know_all_names:\n            self._materialize_cols_with_unknown_dtypes()\n        known_dtypes: set[DtypeObj] = set(self._known_dtypes.values())\n        if self._remaining_dtype is not None:\n            known_dtypes.add(self._remaining_dtype)\n        return known_dtypes\n\n    @classmethod\n    def _merge_dtypes(\n        cls, values: list[Union[DtypesDescriptor, pandas.Series, None]]\n    ) -> DtypesDescriptor:\n        \"\"\"\n        Union columns described by ``values`` and compute common dtypes for them.\n\n        Parameters\n        ----------\n        values : list of DtypesDescriptors, pandas.Series or Nones\n\n        Returns\n        -------\n        DtypesDescriptor\n        \"\"\"\n        known_dtypes = {}\n        cols_with_unknown_dtypes = []\n        know_all_names = True\n        dtypes_are_unknown = False\n\n        # index - joined column names, columns - dtypes taken from 'values'\n        #        0     1     2      3\n        #  col1  int   bool  float  int\n        #  col2  int   int   int    int\n        #  colN  bool  bool  bool   int\n        dtypes_matrix = pandas.DataFrame()\n\n        for i, val in enumerate(values):\n            if isinstance(val, cls):\n                know_all_names &= val._know_all_names\n                dtypes = val._known_dtypes.copy()\n                dtypes.update({col: \"unknown\" for col in val._cols_with_unknown_dtypes})\n                if val._remaining_dtype is not None:\n                    # we can't process remaining dtypes, so just discarding them\n                    know_all_names = False\n\n                # setting a custom name to the Series to prevent duplicated names\n                # in the 'dtypes_matrix'\n                series = pandas.Series(dtypes, name=i)\n                dtypes_matrix = pandas.concat([dtypes_matrix, series], axis=1)\n                if not (val._know_all_names and val._remaining_dtype is None):\n                    dtypes_matrix.fillna(\n                        value={\n                            # If we encountered a 'NaN' while 'val' describes all the columns, then\n                            # it means, that the missing columns for this instance will be filled with NaNs (floats),\n                            # otherwise, it may indicate missing columns that this 'val' has no info about,\n                            # meaning that we shouldn't try computing a new dtype for this column,\n                            # so marking it as 'unknown'\n                            i: \"unknown\",\n                        },\n                        inplace=True,\n                    )\n            elif isinstance(val, pandas.Series):\n                dtypes_matrix = pandas.concat([dtypes_matrix, val], axis=1)\n            elif val is None:\n                # one of the 'dtypes' is None, meaning that we wouldn't been infer a valid result dtype,\n                # however, we're continuing our loop so we would at least know the columns we're missing\n                # dtypes for\n                dtypes_are_unknown = True\n                know_all_names = False\n            else:\n                raise NotImplementedError(type(val))\n\n        if dtypes_are_unknown:\n            return DtypesDescriptor(\n                cols_with_unknown_dtypes=dtypes_matrix.index.tolist(),\n                know_all_names=know_all_names,\n            )\n\n        def combine_dtypes(row):\n            if (row == \"unknown\").any():\n                return \"unknown\"\n            row = row.fillna(pandas.api.types.pandas_dtype(\"float\"))\n            return find_common_type(list(row.values))\n\n        dtypes = dtypes_matrix.apply(combine_dtypes, axis=1)\n\n        for col, dtype in dtypes.items():\n            if dtype == \"unknown\":\n                cols_with_unknown_dtypes.append(col)\n            else:\n                known_dtypes[col] = dtype\n\n        return DtypesDescriptor(\n            known_dtypes,\n            cols_with_unknown_dtypes,\n            remaining_dtype=None,\n            know_all_names=know_all_names,\n        )\n\n    @classmethod\n    def concat(\n        cls, values: list[Union[DtypesDescriptor, pandas.Series, None]], axis: int = 0\n    ) -> DtypesDescriptor:\n        \"\"\"\n        Concatenate dtypes descriptors into a single descriptor.\n\n        Parameters\n        ----------\n        values : list of DtypesDescriptors and pandas.Series\n        axis : int, default: 0\n            If ``axis == 0``: concatenate column names. This implements the logic of\n            how dtypes are combined on ``pd.concat([df1, df2], axis=1)``.\n            If ``axis == 1``: perform a union join for the column names described by\n            `values` and then find common dtypes for the columns appeared to be in\n            an intersection. This implements the logic of how dtypes are combined on\n            ``pd.concat([df1, df2], axis=0).dtypes``.\n\n        Returns\n        -------\n        DtypesDescriptor\n        \"\"\"\n        if axis == 1:\n            return cls._merge_dtypes(values)\n        known_dtypes = {}\n        cols_with_unknown_dtypes = []\n        schema_is_known = True\n        # some default value to not mix it with 'None'\n        remaining_dtype = \"default\"\n        know_all_names = True\n\n        for val in values:\n            if isinstance(val, cls):\n                all_new_cols = (\n                    list(val._known_dtypes.keys()) + val._cols_with_unknown_dtypes\n                )\n                if any(\n                    col in known_dtypes or col in cols_with_unknown_dtypes\n                    for col in all_new_cols\n                ):\n                    raise NotImplementedError(\n                        \"Duplicated column names are not yet supported by DtypesDescriptor\"\n                    )\n                know_all_names &= val._know_all_names\n                known_dtypes.update(val._known_dtypes)\n                cols_with_unknown_dtypes.extend(val._cols_with_unknown_dtypes)\n                if know_all_names:\n                    if (\n                        remaining_dtype == \"default\"\n                        and val._remaining_dtype is not None\n                    ):\n                        remaining_dtype = val._remaining_dtype\n                    elif (\n                        remaining_dtype != \"default\"\n                        and val._remaining_dtype is not None\n                        and remaining_dtype != val._remaining_dtype\n                    ):\n                        remaining_dtype = None\n                        know_all_names = False\n                else:\n                    remaining_dtype = None\n                schema_is_known &= val._schema_is_known\n            elif isinstance(val, pandas.Series):\n                if any(\n                    col in known_dtypes or col in cols_with_unknown_dtypes\n                    for col in val.index\n                ):\n                    raise NotImplementedError(\n                        \"Duplicated column names are not yet supported by DtypesDescriptor\"\n                    )\n                known_dtypes.update(val)\n            elif val is None:\n                remaining_dtype = None\n                schema_is_known = False\n                know_all_names = False\n            else:\n                raise NotImplementedError(type(val))\n        return cls(\n            known_dtypes,\n            cols_with_unknown_dtypes,\n            None if remaining_dtype == \"default\" else remaining_dtype,\n            parent_df=None,\n            _schema_is_known=schema_is_known,\n            know_all_names=know_all_names,\n        )\n\n    @staticmethod\n    def _normalize_levels(columns, reference=None):\n        \"\"\"\n        Normalize levels of MultiIndex column names.\n\n        The function fills missing levels with empty strings as pandas do:\n        '''\n        >>> columns = [\"a\", (\"l1\", \"l2\"), (\"l1a\", \"l2a\", \"l3a\")]\n        >>> _normalize_levels(columns)\n        [(\"a\", \"\", \"\"), (\"l1\", \"l2\", \"\"), (\"l1a\", \"l2a\", \"l3a\")]\n        >>> # with a reference\n        >>> idx = pandas.MultiIndex(...)\n        >>> idx.nlevels\n        4\n        >>> _normalize_levels(columns, reference=idx)\n        [(\"a\", \"\", \"\", \"\"), (\"l1\", \"l2\", \"\", \"\"), (\"l1a\", \"l2a\", \"l3a\", \"\")]\n        '''\n\n        Parameters\n        ----------\n        columns : sequence\n            Labels to normalize. If dictionary, will replace keys with normalized columns.\n        reference : pandas.Index, optional\n            An index to match the number of levels with. If reference is a MultiIndex, then the reference number\n            of levels should not be greater than the maximum number of levels in `columns`. If not specified,\n            the `columns` themselves become a `reference`.\n\n        Returns\n        -------\n        sequence\n            Column values with normalized levels.\n        dict[hashable, hashable]\n            Mapping from old column names to new names, only contains column names that\n            were changed.\n\n        Raises\n        ------\n        ValueError\n            When the reference number of levels is greater than the maximum number of levels\n            in `columns`.\n        \"\"\"\n        if reference is None:\n            reference = columns\n\n        if isinstance(reference, pandas.Index):\n            max_nlevels = reference.nlevels\n        else:\n            max_nlevels = 1\n            for col in reference:\n                if isinstance(col, tuple):\n                    max_nlevels = max(max_nlevels, len(col))\n\n        # if the reference is a regular flat index, then no actions are required (the result will be\n        # a flat index containing tuples of different lengths, this behavior fully matches pandas).\n        # Yes, this shortcut skips the 'if max_columns_nlevels > max_nlevels' below check on purpose.\n        if max_nlevels == 1:\n            return columns, {}\n\n        max_columns_nlevels = 1\n        for col in columns:\n            if isinstance(col, tuple):\n                max_columns_nlevels = max(max_columns_nlevels, len(col))\n\n        if max_columns_nlevels > max_nlevels:\n            raise ValueError(\n                f\"The reference number of levels is greater than the maximum number of levels in columns: {max_columns_nlevels} > {max_nlevels}\"\n            )\n\n        new_columns = []\n        old_to_new_mapping = {}\n        for col in columns:\n            old_col = col\n            if not isinstance(col, tuple):\n                col = (col,)\n            col = col + (\"\",) * (max_nlevels - len(col))\n            new_columns.append(col)\n            if old_col != col:\n                old_to_new_mapping[old_col] = col\n\n        return new_columns, old_to_new_mapping\n\n    def _normalize_self_levels(self, reference=None):\n        \"\"\"\n        Call ``self._normalize_levels()`` for known and unknown dtypes of this object.\n\n        Parameters\n        ----------\n        reference : pandas.Index, optional\n        \"\"\"\n        _, old_to_new_mapping = self._normalize_levels(\n            self._known_dtypes.keys(), reference\n        )\n        for old_col, new_col in old_to_new_mapping.items():\n            value = self._known_dtypes.pop(old_col)\n            self._known_dtypes[new_col] = value\n        self._cols_with_unknown_dtypes, _ = self._normalize_levels(\n            self._cols_with_unknown_dtypes, reference\n        )\n\n\nclass ModinDtypes:\n    \"\"\"\n    A class that hides the various implementations of the dtypes needed for optimization.\n\n    Parameters\n    ----------\n    value : pandas.Series, callable, DtypesDescriptor or ModinDtypes, optional\n    \"\"\"\n\n    def __init__(\n        self,\n        value: Optional[Union[Callable, pandas.Series, DtypesDescriptor, ModinDtypes]],\n    ):\n        if callable(value) or isinstance(value, pandas.Series):\n            self._value = value\n        elif isinstance(value, DtypesDescriptor):\n            self._value = value.to_series() if value.is_materialized else value\n        elif isinstance(value, type(self)):\n            self._value = value.copy()._value\n        elif isinstance(value, None):\n            self._value = DtypesDescriptor()\n        else:\n            raise ValueError(f\"ModinDtypes doesn't work with '{value}'\")\n\n    def __repr__(self):  # noqa: GL08\n        return f\"ModinDtypes:\\n\\tvalue type: {type(self._value)};\\n\\tvalue:\\n\\t{self._value}\"\n\n    def __str__(self):  # noqa: GL08\n        return self.__repr__()\n\n    @property\n    def is_materialized(self) -> bool:\n        \"\"\"\n        Check if the internal representation is materialized.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return isinstance(self._value, pandas.Series)\n\n    def get_dtypes_set(self) -> set[DtypeObj]:\n        \"\"\"\n        Get a set of dtypes from the descriptor.\n\n        Returns\n        -------\n        set[DtypeObj]\n        \"\"\"\n        if isinstance(self._value, DtypesDescriptor):\n            return self._value.get_dtypes_set()\n        if not self.is_materialized:\n            self.get()\n        return set(self._value.values)\n\n    def maybe_specify_new_frame_ref(self, new_parent: PandasDataframe) -> ModinDtypes:\n        \"\"\"\n        Set a new parent for the stored value if needed.\n\n        Parameters\n        ----------\n        new_parent : PandasDataframe\n\n        Returns\n        -------\n        ModinDtypes\n            A copy of ``ModinDtypes`` with updated parent.\n        \"\"\"\n        new_self = self.copy()\n        if new_self.is_materialized:\n            LazyProxyCategoricalDtype.update_dtypes(new_self._value, new_parent)\n            return new_self\n        if isinstance(self._value, DtypesDescriptor):\n            new_self._value.update_parent(new_parent)\n            return new_self\n        return new_self\n\n    def lazy_get(self, ids: list, numeric_index: bool = False) -> ModinDtypes:\n        \"\"\"\n        Get new ``ModinDtypes`` for a subset of columns without triggering any computations.\n\n        Parameters\n        ----------\n        ids : list of index labels or positional indexers\n            Columns for the subset.\n        numeric_index : bool, default: False\n            Whether `ids` are positional indixes or column labels to take.\n\n        Returns\n        -------\n        ModinDtypes\n            ``ModinDtypes`` that describes dtypes for columns specified in `ids`.\n        \"\"\"\n        if isinstance(self._value, DtypesDescriptor):\n            res = self._value.lazy_get(ids, numeric_index)\n            return ModinDtypes(res)\n        elif callable(self._value):\n            new_self = self.copy()\n            old_value = new_self._value\n            new_self._value = lambda: (\n                old_value().iloc[ids] if numeric_index else old_value()[ids]\n            )\n            return new_self\n        ErrorMessage.catch_bugs_and_request_email(\n            failure_condition=not self.is_materialized\n        )\n        return ModinDtypes(self._value.iloc[ids] if numeric_index else self._value[ids])\n\n    @classmethod\n    def concat(cls, values: list, axis: int = 0) -> ModinDtypes:\n        \"\"\"\n        Concatenate dtypes.\n\n        Parameters\n        ----------\n        values : list of DtypesDescriptors, pandas.Series, ModinDtypes and Nones\n        axis : int, default: 0\n            If ``axis == 0``: concatenate column names. This implements the logic of\n            how dtypes are combined on ``pd.concat([df1, df2], axis=1)``.\n            If ``axis == 1``: perform a union join for the column names described by\n            `values` and then find common dtypes for the columns appeared to be in\n            an intersection. This implements the logic of how dtypes are combined on\n            ``pd.concat([df1, df2], axis=0).dtypes``.\n\n        Returns\n        -------\n        ModinDtypes\n        \"\"\"\n        preprocessed_vals = []\n        for val in values:\n            if isinstance(val, cls):\n                val = val._value\n            if isinstance(val, (DtypesDescriptor, pandas.Series)) or val is None:\n                preprocessed_vals.append(val)\n            else:\n                raise NotImplementedError(type(val))\n\n        try:\n            desc = DtypesDescriptor.concat(preprocessed_vals, axis=axis)\n        except NotImplementedError as e:\n            # 'DtypesDescriptor' doesn't support duplicated labels, however, if all values are pandas Series,\n            # we still can perform concatenation using pure pandas\n            if (\n                # 'pd.concat(axis=1)' fails on duplicated labels anyway, so doing this logic\n                # only in case 'axis=0'\n                axis == 0\n                and \"duplicated\" not in e.args[0].lower()\n                or not all(isinstance(val, pandas.Series) for val in values)\n            ):\n                raise e\n            desc = pandas.concat(values)\n        return ModinDtypes(desc)\n\n    def set_index(self, new_index: Union[pandas.Index, ModinIndex]) -> ModinDtypes:\n        \"\"\"\n        Set new column names for stored dtypes.\n\n        Parameters\n        ----------\n        new_index : pandas.Index or ModinIndex\n\n        Returns\n        -------\n        ModinDtypes\n            New ``ModinDtypes`` with updated column names.\n        \"\"\"\n        new_self = self.copy()\n        if self.is_materialized:\n            new_self._value.index = new_index\n            return new_self\n        elif callable(self._value):\n            old_val = new_self._value\n            new_self._value = lambda: old_val().set_axis(new_index)\n            return new_self\n        elif isinstance(new_self._value, DtypesDescriptor):\n            new_self._value = new_self._value.set_index(new_index)\n            return new_self\n        else:\n            raise NotImplementedError()\n\n    def get(self) -> pandas.Series:\n        \"\"\"\n        Get the materialized internal representation.\n\n        Returns\n        -------\n        pandas.Series\n        \"\"\"\n        if not self.is_materialized:\n            if callable(self._value):\n                self._value = self._value()\n                if self._value is None:\n                    self._value = pandas.Series([])\n            elif isinstance(self._value, DtypesDescriptor):\n                self._value = self._value.to_series()\n            else:\n                raise NotImplementedError(type(self._value))\n        return self._value\n\n    def __len__(self):\n        \"\"\"\n        Redirect the 'len' request to the internal representation.\n\n        Returns\n        -------\n        int\n\n        Notes\n        -----\n        Executing this function materializes the data.\n        \"\"\"\n        if not self.is_materialized:\n            self.get()\n        return len(self._value)\n\n    def __reduce__(self):\n        \"\"\"\n        Serialize an object of this class.\n\n        Returns\n        -------\n        tuple\n\n        Notes\n        -----\n        The default implementation generates a recursion error. In a short:\n        during the construction of the object, `__getattr__` function is called, which\n        is not intended to be used in situations where the object is not initialized.\n        \"\"\"\n        return (self.__class__, (self._value,))\n\n    def __getattr__(self, name):\n        \"\"\"\n        Redirect access to non-existent attributes to the internal representation.\n\n        This is necessary so that objects of this class in most cases mimic the behavior\n        of the ``pandas.Series``. The main limitations of the current approach are type\n        checking and the use of this object where pandas dtypes are supposed to be used.\n\n        Parameters\n        ----------\n        name : str\n            Attribute name.\n\n        Returns\n        -------\n        object\n            Attribute.\n\n        Notes\n        -----\n        Executing this function materializes the data.\n        \"\"\"\n        if not self.is_materialized:\n            self.get()\n        return self._value.__getattribute__(name)\n\n    def copy(self) -> ModinDtypes:\n        \"\"\"\n        Copy an object without materializing the internal representation.\n\n        Returns\n        -------\n        ModinDtypes\n        \"\"\"\n        idx_cache = self._value\n        if not callable(idx_cache):\n            idx_cache = idx_cache.copy()\n        return ModinDtypes(idx_cache)\n\n    def __getitem__(self, key):  # noqa: GL08\n        if not self.is_materialized:\n            self.get()\n        return self._value.__getitem__(key)\n\n    def __setitem__(self, key, item):  # noqa: GL08\n        if not self.is_materialized:\n            self.get()\n        self._value.__setitem__(key, item)\n\n    def __iter__(self):  # noqa: GL08\n        if not self.is_materialized:\n            self.get()\n        return iter(self._value)\n\n    def __contains__(self, key):  # noqa: GL08\n        if not self.is_materialized:\n            self.get()\n        return key in self._value\n\n\nclass LazyProxyCategoricalDtype(pandas.CategoricalDtype):\n    \"\"\"\n    A lazy proxy representing ``pandas.CategoricalDtype``.\n\n    Parameters\n    ----------\n    categories : list-like, optional\n    ordered : bool, default: False\n\n    Notes\n    -----\n    Important note! One shouldn't use the class' constructor to instantiate a proxy instance,\n    it's intended only for compatibility purposes! In order to create a new proxy instance\n    use the appropriate class method `._build_proxy(...)`.\n    \"\"\"\n\n    def __init__(self, categories=None, ordered=False):\n        # These will be initialized later inside of the `._build_proxy()` method\n        self._parent, self._column_name, self._categories_val, self._materializer = (\n            None,\n            None,\n            None,\n            None,\n        )\n        super().__init__(categories, ordered)\n\n    @staticmethod\n    def update_dtypes(dtypes, new_parent):\n        \"\"\"\n        Update a parent for categorical proxies in a dtype object.\n\n        Parameters\n        ----------\n        dtypes : dict-like\n            A dict-like object describing dtypes. The method will walk through every dtype\n            an update parents for categorical proxies inplace.\n        new_parent : object\n        \"\"\"\n        for key, value in dtypes.items():\n            if isinstance(value, LazyProxyCategoricalDtype):\n                dtypes[key] = value._update_proxy(new_parent, column_name=key)\n\n    def _update_proxy(self, parent, column_name):\n        \"\"\"\n        Create a new proxy, if either parent or column name are different.\n\n        Parameters\n        ----------\n        parent : object\n            Source object to extract categories on demand.\n        column_name : str\n            Column name of the categorical column in the source object.\n\n        Returns\n        -------\n        pandas.CategoricalDtype or LazyProxyCategoricalDtype\n        \"\"\"\n        if self._is_materialized:\n            # The parent has been materialized, we don't need a proxy anymore.\n            return pandas.CategoricalDtype(self.categories, ordered=self._ordered)\n        elif parent is self._parent and column_name == self._column_name:\n            return self\n        else:\n            return self._build_proxy(parent, column_name, self._materializer)\n\n    @classmethod\n    def _build_proxy(cls, parent, column_name, materializer, dtype=None):\n        \"\"\"\n        Construct a lazy proxy.\n\n        Parameters\n        ----------\n        parent : object\n            Source object to extract categories on demand.\n        column_name : str\n            Column name of the categorical column in the source object.\n        materializer : callable(parent, column_name) -> pandas.CategoricalDtype\n            A function to call in order to extract categorical values.\n        dtype : dtype, optional\n            The categories dtype.\n\n        Returns\n        -------\n        LazyProxyCategoricalDtype\n        \"\"\"\n        result = cls()\n        result._parent = parent\n        result._column_name = column_name\n        result._materializer = materializer\n        result._dtype = dtype\n        return result\n\n    def _get_dtype(self):\n        \"\"\"\n        Get the categories dtype.\n\n        Returns\n        -------\n        dtype\n        \"\"\"\n        if self._dtype is None:\n            self._dtype = self.categories.dtype\n        return self._dtype\n\n    def __reduce__(self):\n        \"\"\"\n        Serialize an object of this class.\n\n        Returns\n        -------\n        tuple\n\n        Notes\n        -----\n        This object is serialized into a ``pandas.CategoricalDtype`` as an actual proxy can't be\n        properly serialized because of the references it stores for its potentially distributed parent.\n        \"\"\"\n        return (pandas.CategoricalDtype, (self.categories, self.ordered))\n\n    @property\n    def _categories(self):\n        \"\"\"\n        Get materialized categorical values.\n\n        Returns\n        -------\n        pandas.Index\n        \"\"\"\n        if not self._is_materialized:\n            self._materialize_categories()\n        return self._categories_val\n\n    @_categories.setter\n    def _categories(self, categories):\n        \"\"\"\n        Set new categorical values.\n\n        Parameters\n        ----------\n        categories : list-like\n        \"\"\"\n        self._categories_val = categories\n        self._parent = None  # The parent is not required any more\n        self._materializer = None\n        self._dtype = None\n\n    @property\n    def _is_materialized(self) -> bool:\n        \"\"\"\n        Check whether categorical values were already materialized.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._categories_val is not None\n\n    def _materialize_categories(self):\n        \"\"\"Materialize actual categorical values.\"\"\"\n        ErrorMessage.catch_bugs_and_request_email(\n            failure_condition=self._parent is None,\n            extra_log=\"attempted to materialize categories with parent being 'None'\",\n        )\n        categoricals = self._materializer(self._parent, self._column_name)\n        self._categories = categoricals.categories\n        self._ordered = categoricals.ordered\n\n\ndef get_categories_dtype(\n    cdt: Union[LazyProxyCategoricalDtype, pandas.CategoricalDtype],\n) -> DtypeObj:\n    \"\"\"\n    Get the categories dtype.\n\n    Parameters\n    ----------\n    cdt : LazyProxyCategoricalDtype or pandas.CategoricalDtype\n\n    Returns\n    -------\n    dtype\n    \"\"\"\n    return (\n        cdt._get_dtype()\n        if isinstance(cdt, LazyProxyCategoricalDtype)\n        else cdt.categories.dtype\n    )\n\n\ndef extract_dtype(value) -> DtypeObj | pandas.Series:\n    \"\"\"\n    Extract dtype(s) from the passed `value`.\n\n    Parameters\n    ----------\n    value : object\n\n    Returns\n    -------\n    DtypeObj or pandas.Series of DtypeObj\n    \"\"\"\n    try:\n        dtype = pandas.api.types.pandas_dtype(value)\n    except (TypeError, ValueError):\n        dtype = pandas.Series(value).dtype\n\n    return dtype\n"
  },
  {
    "path": "modin/core/dataframe/pandas/metadata/index.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module contains class ModinIndex.\"\"\"\n\nimport uuid\nfrom typing import Optional\n\nimport pandas\nfrom pandas.core.dtypes.common import is_list_like\nfrom pandas.core.indexes.api import ensure_index\n\n\nclass ModinIndex:\n    \"\"\"\n    A class that hides the various implementations of the index needed for optimization.\n\n    Parameters\n    ----------\n    value : sequence, PandasDataframe or callable() -> (pandas.Index, list of ints), optional\n        If a sequence passed this will be considered as the index values.\n        If a ``PandasDataframe`` passed then it will be used to lazily extract indices\n        when required, note that the `axis` parameter must be passed in this case.\n        If a callable passed then it's expected to return a pandas Index and a list of\n        partition lengths along the index axis.\n        If ``None`` was passed, the index will be considered an incomplete and will raise\n        a ``RuntimeError`` on an attempt of materialization. To complete the index object\n        you have to use ``.maybe_specify_new_frame_ref()`` method.\n\n    axis : int, optional\n        Specifies an axis the object represents, serves as an optional hint. This parameter\n        must be passed in case value is a ``PandasDataframe``.\n    dtypes : pandas.Series, optional\n        Materialized dtypes of index levels.\n    \"\"\"\n\n    def __init__(self, value=None, axis=None, dtypes: Optional[pandas.Series] = None):\n        from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\n\n        self._is_default_callable = False\n        self._axis = axis\n        self._dtypes = dtypes\n\n        if callable(value):\n            self._value = value\n        elif isinstance(value, PandasDataframe):\n            assert axis is not None\n            self._value = self._get_default_callable(value, axis)\n            self._is_default_callable = True\n        elif value is None:\n            assert axis is not None\n            self._value = value\n        else:\n            self._value = ensure_index(value)\n\n        self._lengths_cache = None\n        # index/lengths ID's for faster comparison between other ModinIndex objects,\n        # these should be propagated to the copies of the index\n        self._index_id = uuid.uuid4()\n        self._lengths_id = uuid.uuid4()\n\n    def maybe_get_dtypes(self) -> Optional[pandas.Series]:\n        \"\"\"\n        Get index dtypes if available.\n\n        Returns\n        -------\n        pandas.Series or None\n        \"\"\"\n        if self._dtypes is not None:\n            return self._dtypes\n        if self.is_materialized:\n            self._dtypes = (\n                self._value.dtypes\n                if isinstance(self._value, pandas.MultiIndex)\n                else pandas.Series([self._value.dtype], index=[self._value.name])\n            )\n            return self._dtypes\n        return None\n\n    @staticmethod\n    def _get_default_callable(dataframe_obj, axis):\n        \"\"\"\n        Build a callable extracting index labels and partitions lengths for the specified axis.\n\n        Parameters\n        ----------\n        dataframe_obj : PandasDataframe\n        axis : int\n            0 - extract indices, 1 - extract columns.\n\n        Returns\n        -------\n        callable() -> tuple(pandas.Index, list[ints])\n        \"\"\"\n        return lambda: dataframe_obj._compute_axis_labels_and_lengths(axis)\n\n    def maybe_specify_new_frame_ref(self, value, axis) -> \"ModinIndex\":\n        \"\"\"\n        Set a new reference for a frame used to lazily extract index labels if it's needed.\n\n        The method sets a new reference only if the indices are not yet materialized and\n        if a PandasDataframe was originally passed to construct this index (so the ModinIndex\n        object holds a reference to it). The reason the reference should be updated is that\n        we don't want to hold in memory those frames that are already not needed. Once the\n        reference is updated, the old frame will be garbage collected if there are no\n        more references to it.\n\n        Parameters\n        ----------\n        value : PandasDataframe\n            New dataframe to reference.\n        axis : int\n            Axis to extract labels from.\n\n        Returns\n        -------\n        ModinIndex\n            New ModinIndex with the reference updated.\n        \"\"\"\n        if self._value is not None and (\n            not callable(self._value) or not self._is_default_callable\n        ):\n            return self\n\n        new_index = self.copy(copy_lengths=True)\n        new_index._axis = axis\n        new_index._value = self._get_default_callable(value, new_index._axis)\n        # if the '._value' was 'None' initially, then the '_is_default_callable' flag was\n        # also being set to 'False', since now the '._value' is a default callable,\n        # so we want to ensure that the flag is set to 'True'\n        new_index._is_default_callable = True\n        return new_index\n\n    @property\n    def is_materialized(self) -> bool:\n        \"\"\"\n        Check if the internal representation is materialized.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.is_materialized_index(self)\n\n    @classmethod\n    def is_materialized_index(cls, index) -> bool:\n        \"\"\"\n        Check if the passed object represents a materialized index.\n\n        Parameters\n        ----------\n        index : object\n            An object to check.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        # importing here to avoid circular import issue\n        from modin.pandas.indexing import is_range_like\n\n        if isinstance(index, cls):\n            index = index._value\n        return is_list_like(index) or is_range_like(index) or isinstance(index, slice)\n\n    def get(self, return_lengths=False) -> pandas.Index:\n        \"\"\"\n        Get the materialized internal representation.\n\n        Parameters\n        ----------\n        return_lengths : bool, default: False\n            In some cases, during the index calculation, it's possible to get\n            the lengths of the partitions. This flag allows this data to be used\n            for optimization.\n\n        Returns\n        -------\n        pandas.Index\n        \"\"\"\n        if not self.is_materialized:\n            if callable(self._value):\n                index, self._lengths_cache = self._value()\n                self._value = ensure_index(index)\n            elif self._value is None:\n                raise RuntimeError(\n                    \"It's not allowed to call '.materialize()' before '._value' is specified.\"\n                )\n            else:\n                raise NotImplementedError(type(self._value))\n        if return_lengths:\n            return self._value, self._lengths_cache\n        else:\n            return self._value\n\n    def equals(self, other: \"ModinIndex\") -> bool:\n        \"\"\"\n        Check equality of the index values.\n\n        Parameters\n        ----------\n        other : ModinIndex\n\n        Returns\n        -------\n        bool\n            The result of the comparison.\n        \"\"\"\n        if self._index_id == other._index_id:\n            return True\n\n        if not self.is_materialized:\n            self.get()\n\n        if not other.is_materialized:\n            other.get()\n\n        return self._value.equals(other._value)\n\n    def compare_partition_lengths_if_possible(self, other: \"ModinIndex\"):\n        \"\"\"\n        Compare the partition lengths cache for the index being stored if possible.\n\n        The ``ModinIndex`` object may sometimes store the information about partition\n        lengths along the axis the index belongs to. If both `self` and `other` have\n        this information or it can be inferred from them, the method returns\n        a boolean - the result of the comparison, otherwise it returns ``None``\n        as an indication that the comparison cannot be made.\n\n        Parameters\n        ----------\n        other : ModinIndex\n\n        Returns\n        -------\n        bool or None\n            The result of the comparison if both `self` and `other` contain\n            the lengths data, ``None`` otherwise.\n        \"\"\"\n        if self._lengths_id == other._lengths_id:\n            return True\n\n        can_extract_lengths_from_self = self._lengths_cache is not None or callable(\n            self._value\n        )\n        can_extract_lengths_from_other = other._lengths_cache is not None or callable(\n            other._value\n        )\n        if can_extract_lengths_from_self and can_extract_lengths_from_other:\n            return self.get(return_lengths=True)[1] == other.get(return_lengths=True)[1]\n        return None\n\n    def __len__(self):\n        \"\"\"\n        Redirect the 'len' request to the internal representation.\n\n        Returns\n        -------\n        int\n\n        Notes\n        -----\n        Executing this function materializes the data.\n        \"\"\"\n        if not self.is_materialized:\n            self.get()\n        return len(self._value)\n\n    def __reduce__(self):\n        \"\"\"\n        Serialize an object of this class.\n\n        Returns\n        -------\n        tuple\n\n        Notes\n        -----\n        The default implementation generates a recursion error. In a short:\n        during the construction of the object, `__getattr__` function is called, which\n        is not intended to be used in situations where the object is not initialized.\n        \"\"\"\n        return (\n            self.__class__,\n            (self._value, self._axis),\n            {\n                \"_lengths_cache\": self._lengths_cache,\n                \"_index_id\": self._index_id,\n                \"_lengths_id\": self._lengths_id,\n                \"_is_default_callable\": self._is_default_callable,\n            },\n        )\n\n    def __getitem__(self, key):\n        \"\"\"\n        Get an index value at the position of `key`.\n\n        Parameters\n        ----------\n        key : int\n\n        Returns\n        -------\n        label\n        \"\"\"\n        if not self.is_materialized:\n            self.get()\n        return self._value[key]\n\n    def __getattr__(self, name):\n        \"\"\"\n        Redirect access to non-existent attributes to the internal representation.\n\n        This is necessary so that objects of this class in most cases mimic the behavior\n        of the ``pandas.Index``. The main limitations of the current approach are type\n        checking and the use of this object where pandas indexes are supposed to be used.\n\n        Parameters\n        ----------\n        name : str\n            Attribute name.\n\n        Returns\n        -------\n        object\n            Attribute.\n\n        Notes\n        -----\n        Executing this function materializes the data.\n        \"\"\"\n        if not self.is_materialized:\n            self.get()\n        return self._value.__getattribute__(name)\n\n    def copy(self, copy_lengths=False) -> \"ModinIndex\":\n        \"\"\"\n        Copy an object without materializing the internal representation.\n\n        Parameters\n        ----------\n        copy_lengths : bool, default: False\n            Whether to copy the stored partition lengths to the\n            new index object.\n\n        Returns\n        -------\n        ModinIndex\n        \"\"\"\n        idx_cache = self._value\n        if idx_cache is not None and not callable(idx_cache):\n            idx_cache = idx_cache.copy()\n        result = ModinIndex(idx_cache, axis=self._axis, dtypes=self._dtypes)\n        result._index_id = self._index_id\n        result._is_default_callable = self._is_default_callable\n        if copy_lengths:\n            result._lengths_cache = self._lengths_cache\n            result._lengths_id = self._lengths_id\n        return result\n"
  },
  {
    "path": "modin/core/dataframe/pandas/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes related to its partitioning and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/dataframe/pandas/partitioning/axis_partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module defines base interface for an axis partition of a Modin DataFrame.\"\"\"\n\nimport warnings\n\nimport numpy as np\nimport pandas\n\nfrom modin.config import MinColumnPartitionSize, MinRowPartitionSize\nfrom modin.core.dataframe.base.partitioning.axis_partition import (\n    BaseDataframeAxisPartition,\n)\nfrom modin.core.storage_formats.pandas.utils import (\n    generate_result_of_axis_func_pandas,\n    split_result_of_axis_func_pandas,\n)\n\nfrom .partition import PandasDataframePartition\n\n\nclass PandasDataframeAxisPartition(BaseDataframeAxisPartition):\n    \"\"\"\n    An abstract class is created to simplify and consolidate the code for axis partition that run pandas.\n\n    Because much of the code is similar, this allows us to reuse this code.\n\n    Parameters\n    ----------\n    list_of_partitions : Union[list, PandasDataframePartition]\n        List of ``PandasDataframePartition`` and\n        ``PandasDataframeAxisPartition`` objects, or a single\n        ``PandasDataframePartition``.\n    get_ip : bool, default: False\n        Whether to get node IP addresses to conforming partitions or not.\n    full_axis : bool, default: True\n        Whether or not the axis partition encompasses the whole axis.\n    call_queue : list, optional\n        A list of tuples (callable, args, kwargs) that contains deferred calls.\n    length : the future's type or int, optional\n        Length, or reference to length, of wrapped ``pandas.DataFrame``.\n    width : the future's type or int, optional\n        Width, or reference to width, of wrapped ``pandas.DataFrame``.\n    \"\"\"\n\n    def __init__(\n        self,\n        list_of_partitions,\n        get_ip=False,\n        full_axis=True,\n        call_queue=None,\n        length=None,\n        width=None,\n    ):\n        if isinstance(list_of_partitions, PandasDataframePartition):\n            list_of_partitions = [list_of_partitions]\n        self.full_axis = full_axis\n        self.call_queue = call_queue or []\n        self._length_cache = length\n        self._width_cache = width\n        # Check that all axis partition axes are the same in `list_of_partitions`\n        # We should never have mismatching axis in the current implementation. We add this\n        # defensive assertion to ensure that undefined behavior does not happen.\n        assert (\n            len(\n                set(\n                    obj.axis\n                    for obj in list_of_partitions\n                    if isinstance(obj, PandasDataframeAxisPartition)\n                )\n            )\n            <= 1\n        )\n        self._list_of_constituent_partitions = list_of_partitions\n        # Defer computing _list_of_block_partitions because we might need to\n        # drain call queues for that.\n        self._list_of_block_partitions = None\n\n    @property\n    def list_of_blocks(self):\n        \"\"\"\n        Get the list of physical partition objects that compose this partition.\n\n        Returns\n        -------\n        list\n            A list of physical partition objects (``ray.ObjectRef``, ``distributed.Future`` e.g.).\n        \"\"\"\n        # Defer draining call queue (which is hidden in `partition.list_of_blocks` call) until we get the partitions.\n        # TODO Look into draining call queue at the same time as the task\n        return [\n            partition.list_of_blocks[0] for partition in self.list_of_block_partitions\n        ]\n\n    @property\n    def list_of_block_partitions(self) -> list:\n        \"\"\"\n        Get the list of block partitions that compose this partition.\n\n        Returns\n        -------\n        List\n            A list of ``PandasDataframePartition``.\n        \"\"\"\n        if self._list_of_block_partitions is not None:\n            return self._list_of_block_partitions\n        self._list_of_block_partitions = []\n        # Extract block partitions from the block and axis partitions that\n        # constitute this partition.\n        for partition in self._list_of_constituent_partitions:\n            if isinstance(partition, PandasDataframeAxisPartition):\n                if partition.axis == self.axis:\n                    # We are building an axis partition out of another\n                    # axis partition `partition` that contains its own list\n                    # of block partitions, partition.list_of_block_partitions.\n                    # `partition` may have its own call queue, which has to be\n                    # applied to the entire `partition` before we execute any\n                    # further operations on its block parittions.\n                    partition.drain_call_queue()\n                    self._list_of_block_partitions.extend(\n                        partition.list_of_block_partitions\n                    )\n                else:\n                    # If this axis partition is made of axis partitions\n                    # for the other axes, squeeze such partitions into a single\n                    # block so that this partition only holds a one-dimensional\n                    # list of blocks. We could change this implementation to\n                    # hold a 2-d list of blocks, but that would complicate the\n                    # code quite a bit.\n                    self._list_of_block_partitions.append(\n                        partition.force_materialization().list_of_block_partitions[0]\n                    )\n            else:\n                self._list_of_block_partitions.append(partition)\n        return self._list_of_block_partitions\n\n    @classmethod\n    def _get_drain_func(cls):  # noqa: GL08\n        return PandasDataframeAxisPartition.drain\n\n    def drain_call_queue(self, num_splits=None):\n        \"\"\"\n        Execute all operations stored in this partition's call queue.\n\n        Parameters\n        ----------\n        num_splits : int, default: None\n            The number of times to split the result object.\n        \"\"\"\n        if len(self.call_queue) == 0:\n            # this implicitly calls `drain_call_queue` for block partitions,\n            # which might have deferred call queues\n            _ = self.list_of_blocks\n            return\n        call_queue = self.call_queue\n        try:\n            # Clearing the queue before calling `.apply()` so it won't try to drain it repeatedly\n            self.call_queue = []\n            drained = self.apply(\n                self._get_drain_func(), num_splits=num_splits, call_queue=call_queue\n            )\n        except Exception:\n            # Restoring the call queue in case of an exception as it most likely wasn't drained\n            self.call_queue = call_queue\n            raise\n        if not isinstance(drained, list):\n            drained = [drained]\n        self._list_of_block_partitions = drained\n\n    def force_materialization(self, get_ip=False):\n        \"\"\"\n        Materialize partitions into a single partition.\n\n        Parameters\n        ----------\n        get_ip : bool, default: False\n            Whether to get node ip address to a single partition or not.\n\n        Returns\n        -------\n        PandasDataframeAxisPartition\n            An axis partition containing only a single materialized partition.\n        \"\"\"\n        materialized = super().force_materialization(get_ip=get_ip)\n        self._list_of_block_partitions = materialized.list_of_block_partitions\n        return materialized\n\n    def apply(\n        self,\n        func,\n        *args,\n        num_splits=None,\n        other_axis_partition=None,\n        maintain_partitioning=True,\n        lengths=None,\n        manual_partition=False,\n        **kwargs,\n    ):\n        \"\"\"\n        Apply a function to this axis partition along full axis.\n\n        Parameters\n        ----------\n        func : callable\n            The function to apply.\n        *args : iterable\n            Positional arguments to pass to `func`.\n        num_splits : int, default: None\n            The number of times to split the result object.\n        other_axis_partition : PandasDataframeAxisPartition, default: None\n            Another `PandasDataframeAxisPartition` object to be applied\n            to func. This is for operations that are between two data sets.\n        maintain_partitioning : bool, default: True\n            Whether to keep the partitioning in the same\n            orientation as it was previously or not. This is important because we may be\n            operating on an individual AxisPartition and not touching the rest.\n            In this case, we have to return the partitioning to its previous\n            orientation (the lengths will remain the same). This is ignored between\n            two axis partitions.\n        lengths : iterable, default: None\n            The list of lengths to shuffle the object.\n        manual_partition : bool, default: False\n            If True, partition the result with `lengths`.\n        **kwargs : dict\n            Additional keywords arguments to be passed in `func`.\n\n        Returns\n        -------\n        list\n            A list of `PandasDataframePartition` objects.\n        \"\"\"\n        if not self.full_axis:\n            # If this is not a full axis partition, it already contains a subset of\n            # the full axis, so we shouldn't split the result further.\n            num_splits = 1\n        if len(self.call_queue) > 0:\n            self.drain_call_queue()\n\n        if num_splits is None:\n            num_splits = len(self.list_of_blocks)\n\n        if other_axis_partition is not None:\n            if not isinstance(other_axis_partition, list):\n                other_axis_partition = [other_axis_partition]\n\n            # (other_shape[i-1], other_shape[i]) will indicate slice\n            # to restore i-1 axis partition\n            other_shape = np.cumsum(\n                [0] + [len(o.list_of_blocks) for o in other_axis_partition]\n            )\n\n            return self._wrap_partitions(\n                self.deploy_func_between_two_axis_partitions(\n                    self.axis,\n                    func,\n                    args,\n                    kwargs,\n                    num_splits,\n                    len(self.list_of_blocks),\n                    other_shape,\n                    *tuple(\n                        self.list_of_blocks\n                        + [\n                            part\n                            for axis_partition in other_axis_partition\n                            for part in axis_partition.list_of_blocks\n                        ]\n                    ),\n                    min_block_size=(\n                        MinRowPartitionSize.get()\n                        if self.axis == 0\n                        else MinColumnPartitionSize.get()\n                    ),\n                )\n            )\n        result = self._wrap_partitions(\n            self.deploy_axis_func(\n                self.axis,\n                func,\n                args,\n                kwargs,\n                num_splits,\n                maintain_partitioning,\n                *self.list_of_blocks,\n                min_block_size=(\n                    MinRowPartitionSize.get()\n                    if self.axis == 0\n                    else MinColumnPartitionSize.get()\n                ),\n                lengths=lengths,\n                manual_partition=manual_partition,\n            )\n        )\n        if self.full_axis:\n            return result\n        else:\n            # If this is not a full axis partition, just take out the single split in the result.\n            return result[0]\n\n    def split(\n        self, split_func, num_splits, f_args=None, f_kwargs=None, extract_metadata=False\n    ):\n        \"\"\"\n        Split axis partition into multiple partitions using the `split_func`.\n\n        Parameters\n        ----------\n        split_func : callable(pandas.DataFrame) -> list[pandas.DataFrame]\n            A function that takes partition's content and split it into multiple chunks.\n        num_splits : int\n            The number of splits the `split_func` return.\n        f_args : iterable, optional\n            Positional arguments to pass to the `split_func`.\n        f_kwargs : dict, optional\n            Keyword arguments to pass to the `split_func`.\n        extract_metadata : bool, default: False\n            Whether to return metadata (length, width, ip) of the result. Passing `False` may relax\n            the load on object storage as the remote function would return X times fewer futures\n            (where X is the number of metadata values). Passing `False` makes sense for temporary\n            results where you know for sure that the metadata will never be requested.\n\n        Returns\n        -------\n        list\n            List of wrapped remote partition objects.\n        \"\"\"\n        f_args = tuple() if f_args is None else f_args\n        f_kwargs = {} if f_kwargs is None else f_kwargs\n        return self._wrap_partitions(\n            self.deploy_splitting_func(\n                self.axis,\n                split_func,\n                f_args,\n                f_kwargs,\n                num_splits,\n                *self.list_of_blocks,\n                extract_metadata=extract_metadata,\n            ),\n            extract_metadata=extract_metadata,\n        )\n\n    @classmethod\n    def deploy_splitting_func(\n        cls,\n        axis,\n        split_func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        *partitions,\n        extract_metadata=False,\n    ):\n        \"\"\"\n        Deploy a splitting function along a full axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        split_func : callable(pandas.DataFrame) -> list[pandas.DataFrame]\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to `split_func`.\n        f_kwargs : dict\n            Keyword arguments to pass to `split_func`.\n        num_splits : int\n            The number of splits the `split_func` return.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column).\n        extract_metadata : bool, default: False\n            Whether to return metadata (length, width, ip) of the result. Note that `True` value\n            is not supported in `PandasDataframeAxisPartition` class.\n\n        Returns\n        -------\n        list\n            A list of pandas DataFrames.\n        \"\"\"\n        dataframe = pandas.concat(list(partitions), axis=axis, copy=False)\n        # to reduce peak memory consumption\n        del partitions\n        return split_func(dataframe, *f_args, **f_kwargs)\n\n    @classmethod\n    def deploy_axis_func(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        maintain_partitioning,\n        *partitions,\n        min_block_size,\n        lengths=None,\n        manual_partition=False,\n        return_generator=False,\n    ):\n        \"\"\"\n        Deploy a function along a full axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see `split_result_of_axis_func_pandas`).\n        maintain_partitioning : bool\n            If True, keep the old partitioning if possible.\n            If False, create a new partition layout.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column).\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n        lengths : list, optional\n            The list of lengths to shuffle the object.\n        manual_partition : bool, default: False\n            If True, partition the result with `lengths`.\n        return_generator : bool, default: False\n            Return a generator from the function, set to `True` for Ray backend\n            as Ray remote functions can return Generators.\n\n        Returns\n        -------\n        list | Generator\n            A list or generator of pandas DataFrames.\n        \"\"\"\n        len_partitions = len(partitions)\n        lengths_partitions = [len(part) for part in partitions]\n        widths_partitions = [len(part.columns) for part in partitions]\n\n        dataframe = pandas.concat(list(partitions), axis=axis, copy=False)\n\n        # to reduce peak memory consumption\n        del partitions\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            try:\n                result = func(dataframe, *f_args, **f_kwargs)\n            except ValueError as err:\n                if \"assignment destination is read-only\" in str(err):\n                    result = func(dataframe.copy(), *f_args, **f_kwargs)\n                else:\n                    raise err\n\n        # to reduce peak memory consumption\n        del dataframe\n\n        if num_splits == 1:\n            # If we're not going to split the result, we don't need to specify\n            # split lengths.\n            lengths = None\n        elif manual_partition:\n            # The split function is expecting a list\n            lengths = list(lengths)\n        # We set lengths to None so we don't use the old lengths for the resulting partition\n        # layout. This is done if the number of splits is changing or we are told not to\n        # keep the old partitioning.\n        elif num_splits != len_partitions or not maintain_partitioning:\n            lengths = None\n        else:\n            if axis == 0:\n                lengths = lengths_partitions\n                if sum(lengths) != len(result):\n                    lengths = None\n            else:\n                lengths = widths_partitions\n                if sum(lengths) != len(result.columns):\n                    lengths = None\n        if return_generator:\n            return generate_result_of_axis_func_pandas(\n                axis,\n                num_splits,\n                result,\n                min_block_size,\n                lengths,\n            )\n        else:\n            return split_result_of_axis_func_pandas(\n                axis, num_splits, result, min_block_size, lengths\n            )\n\n    @classmethod\n    def deploy_func_between_two_axis_partitions(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        len_of_left,\n        other_shape,\n        *partitions,\n        min_block_size,\n        return_generator=False,\n    ):\n        \"\"\"\n        Deploy a function along a full axis between two data sets.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see `split_result_of_axis_func_pandas`).\n        len_of_left : int\n            The number of values in `partitions` that belong to the left data set.\n        other_shape : np.ndarray\n            The shape of right frame in terms of partitions, i.e.\n            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column) for both data sets.\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n        return_generator : bool, default: False\n            Return a generator from the function, set to `True` for Ray backend\n            as Ray remote functions can return Generators.\n\n        Returns\n        -------\n        list | Generator\n            A list or generator of pandas DataFrames.\n        \"\"\"\n        lt_frame = pandas.concat(partitions[:len_of_left], axis=axis, copy=False)\n\n        rt_parts = partitions[len_of_left:]\n\n        # to reduce peak memory consumption\n        del partitions\n\n        # reshaping flattened `rt_parts` array into a frame with shape `other_shape`\n        combined_axis = [\n            pandas.concat(\n                rt_parts[other_shape[i - 1] : other_shape[i]],\n                axis=axis,\n                copy=False,\n            )\n            for i in range(1, len(other_shape))\n        ]\n\n        # to reduce peak memory consumption\n        del rt_parts\n\n        rt_frame = pandas.concat(combined_axis, axis=axis ^ 1, copy=False)\n\n        # to reduce peak memory consumption\n        del combined_axis\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            result = func(lt_frame, rt_frame, *f_args, **f_kwargs)\n\n        # to reduce peak memory consumption\n        del lt_frame, rt_frame\n\n        if return_generator:\n            return generate_result_of_axis_func_pandas(\n                axis,\n                num_splits,\n                result,\n                min_block_size,\n            )\n        else:\n            return split_result_of_axis_func_pandas(\n                axis,\n                num_splits,\n                result,\n                min_block_size,\n            )\n\n    @classmethod\n    def drain(cls, df: pandas.DataFrame, call_queue: list):\n        \"\"\"\n        Execute all operations stored in the call queue on the pandas object (helper function).\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n        call_queue : list\n            Call queue that needs to be executed on pandas DataFrame.\n\n        Returns\n        -------\n        pandas.DataFrame\n        \"\"\"\n        for func, args, kwargs in call_queue:\n            df = func(df, *args, **kwargs)\n        return df\n\n    def mask(self, row_indices, col_indices):\n        \"\"\"\n        Create (synchronously) a mask that extracts the indices provided.\n\n        Parameters\n        ----------\n        row_indices : list-like, slice or label\n            The row labels for the rows to extract.\n        col_indices : list-like, slice or label\n            The column labels for the columns to extract.\n\n        Returns\n        -------\n        PandasDataframeAxisPartition\n            A new ``PandasDataframeAxisPartition`` object, materialized.\n        \"\"\"\n        return (\n            self.force_materialization()\n            .list_of_block_partitions[0]\n            .mask(row_indices, col_indices)\n        )\n\n    def to_pandas(self):\n        \"\"\"\n        Convert the data in this partition to a ``pandas.DataFrame``.\n\n        Returns\n        -------\n        pandas DataFrame.\n        \"\"\"\n        return self.force_materialization().list_of_block_partitions[0].to_pandas()\n\n    def to_numpy(self):\n        \"\"\"\n        Convert the data in this partition to a ``numpy.array``.\n\n        Returns\n        -------\n        NumPy array.\n        \"\"\"\n        return self.force_materialization().list_of_block_partitions[0].to_numpy()\n\n    _length_cache = None\n\n    def length(self, materialize=True):\n        \"\"\"\n        Get the length of this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int\n            The length of the partition.\n        \"\"\"\n        if self._length_cache is None:\n            if self.axis == 0:\n                self._length_cache = sum(\n                    obj.length() for obj in self.list_of_block_partitions\n                )\n            else:\n                self._length_cache = self.list_of_block_partitions[0].length(\n                    materialize\n                )\n        return self._length_cache\n\n    _width_cache = None\n\n    def width(self, materialize=True):\n        \"\"\"\n        Get the width of this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int\n            The width of the partition.\n        \"\"\"\n        if self._width_cache is None:\n            if self.axis == 1:\n                self._width_cache = sum(\n                    obj.width() for obj in self.list_of_block_partitions\n                )\n            else:\n                self._width_cache = self.list_of_block_partitions[0].width(materialize)\n        return self._width_cache\n\n    def wait(self):\n        \"\"\"Wait completing computations on the object wrapped by the partition.\"\"\"\n        pass\n\n    def add_to_apply_calls(self, func, *args, length=None, width=None, **kwargs):\n        \"\"\"\n        Add a function to the call queue.\n\n        Parameters\n        ----------\n        func : callable or a future type\n            Function to be added to the call queue.\n        *args : iterable\n            Additional positional arguments to be passed in `func`.\n        length : A future type or int, optional\n            Length, or reference to it, of wrapped ``pandas.DataFrame``.\n        width : A future type or int, optional\n            Width, or reference to it, of wrapped ``pandas.DataFrame``.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `func`.\n\n        Returns\n        -------\n        PandasDataframeAxisPartition\n            A new ``PandasDataframeAxisPartition`` object.\n        \"\"\"\n        return type(self)(\n            self.list_of_block_partitions,\n            full_axis=self.full_axis,\n            call_queue=self.call_queue + [[func, args, kwargs]],\n            length=length,\n            width=width,\n        )\n"
  },
  {
    "path": "modin/core/dataframe/pandas/partitioning/partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module defines base interface for a partition of a Modin DataFrame.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nimport uuid\nfrom abc import ABC\nfrom copy import copy\nfrom functools import cached_property\n\nimport pandas\nfrom pandas.api.types import is_scalar\n\nfrom modin.core.storage_formats.pandas.utils import length_fn_pandas, width_fn_pandas\nfrom modin.logging import ClassLogger, get_logger\nfrom modin.logging.config import LogLevel\nfrom modin.pandas.indexing import compute_sliced_len\n\n\nclass PandasDataframePartition(\n    ABC, ClassLogger, modin_layer=\"BLOCK-PARTITION\", log_level=LogLevel.DEBUG\n):  # pragma: no cover\n    \"\"\"\n    An abstract class that is base for any partition class of ``pandas`` storage format.\n\n    The class providing an API that has to be overridden by child classes.\n    \"\"\"\n\n    _length_cache = None\n    _width_cache = None\n    _identity_cache = None\n    _data = None\n    execution_wrapper = None\n\n    # these variables are intentionally initialized at runtime\n    # so as not to initialize the engine during import\n    _iloc_func = None\n\n    def __init__(self):\n        if type(self)._iloc_func is None:\n            # Places `_iloc` function into the storage to speed up\n            # remote function calls and caches the result.\n            # It also postpones engine initialization, which happens\n            # implicitly when `execution_wrapper.put` is called.\n            if self.execution_wrapper is not None:\n                type(self)._iloc_func = staticmethod(\n                    self.execution_wrapper.put(self._iloc)\n                )\n            else:\n                type(self)._iloc_func = staticmethod(self._iloc)\n\n    @cached_property\n    def __constructor__(self) -> type[PandasDataframePartition]:\n        \"\"\"\n        Create a new instance of this object.\n\n        Returns\n        -------\n        PandasDataframePartition\n            New instance of pandas partition.\n        \"\"\"\n        return type(self)\n\n    def get(self):\n        \"\"\"\n        Get the object wrapped by this partition.\n\n        Returns\n        -------\n        object\n            The object that was wrapped by this partition.\n\n        Notes\n        -----\n        This is the opposite of the classmethod `put`.\n        E.g. if you assign `x = PandasDataframePartition.put(1)`, `x.get()` should\n        always return 1.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.get::{self._identity}\")\n        self.drain_call_queue()\n        result = self.execution_wrapper.materialize(self._data)\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.get::{self._identity}\")\n        return result\n\n    @property\n    def list_of_blocks(self):\n        \"\"\"\n        Get the list of physical partition objects that compose this partition.\n\n        Returns\n        -------\n        list\n            A list of physical partition objects (``ray.ObjectRef``, ``distributed.Future`` e.g.).\n        \"\"\"\n        # Defer draining call queue until we get the partitions.\n        # TODO Look into draining call queue at the same time as the task\n        self.drain_call_queue()\n        return [self._data]\n\n    def apply(self, func, *args, **kwargs):\n        \"\"\"\n        Apply a function to the object wrapped by this partition.\n\n        Parameters\n        ----------\n        func : callable\n            Function to apply.\n        *args : iterable\n            Additional positional arguments to be passed in `func`.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `func`.\n\n        Returns\n        -------\n        PandasDataframePartition\n            New `PandasDataframePartition` object.\n\n        Notes\n        -----\n        It is up to the implementation how `kwargs` are handled. They are\n        an important part of many implementations. As of right now, they\n        are not serialized.\n        \"\"\"\n        pass\n\n    def add_to_apply_calls(self, func, *args, length=None, width=None, **kwargs):\n        \"\"\"\n        Add a function to the call queue.\n\n        Parameters\n        ----------\n        func : callable\n            Function to be added to the call queue.\n        *args : iterable\n            Additional positional arguments to be passed in `func`.\n        length : reference or int, optional\n            Length, or reference to length, of wrapped ``pandas.DataFrame``.\n        width : reference or int, optional\n            Width, or reference to width, of wrapped ``pandas.DataFrame``.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `func`.\n\n        Returns\n        -------\n        PandasDataframePartition\n            New `PandasDataframePartition` object with the function added to the call queue.\n\n        Notes\n        -----\n        This function will be executed when `apply` is called. It will be executed\n        in the order inserted; apply's func operates the last and return.\n        \"\"\"\n        return self.__constructor__(\n            self._data,\n            call_queue=self.call_queue + [[func, args, kwargs]],\n            length=length,\n            width=width,\n        )\n\n    def drain_call_queue(self):\n        \"\"\"Execute all operations stored in the call queue on the object wrapped by this partition.\"\"\"\n        pass\n\n    def wait(self):\n        \"\"\"Wait for completion of computations on the object wrapped by the partition.\"\"\"\n        pass\n\n    def to_pandas(self):\n        \"\"\"\n        Convert the object wrapped by this partition to a ``pandas.DataFrame``.\n\n        Returns\n        -------\n        pandas.DataFrame\n\n        Notes\n        -----\n        If the underlying object is a pandas DataFrame, this will likely\n        only need to call `get`.\n        \"\"\"\n        dataframe = self.get()\n        assert isinstance(dataframe, (pandas.DataFrame, pandas.Series))\n        return dataframe\n\n    def to_numpy(self, **kwargs):\n        \"\"\"\n        Convert the object wrapped by this partition to a NumPy array.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Additional keyword arguments to be passed in ``to_numpy``.\n\n        Returns\n        -------\n        np.ndarray\n\n        Notes\n        -----\n        If the underlying object is a pandas DataFrame, this will return\n        a 2D NumPy array.\n        \"\"\"\n        return self.apply(lambda df: df.to_numpy(**kwargs)).get()\n\n    @staticmethod\n    def _iloc(df, row_labels, col_labels):  # noqa: RT01, PR01\n        \"\"\"Perform `iloc` on dataframes wrapped in partitions (helper function).\"\"\"\n        return df.iloc[row_labels, col_labels]\n\n    def mask(self, row_labels, col_labels):\n        \"\"\"\n        Lazily create a mask that extracts the indices provided.\n\n        Parameters\n        ----------\n        row_labels : list-like, slice or label\n            The row labels for the rows to extract.\n        col_labels : list-like, slice or label\n            The column labels for the columns to extract.\n\n        Returns\n        -------\n        PandasDataframePartition\n            New `PandasDataframePartition` object.\n        \"\"\"\n\n        def is_full_axis_mask(index, axis_length):\n            \"\"\"Check whether `index` mask grabs `axis_length` amount of elements.\"\"\"\n            if isinstance(index, slice):\n                return index == slice(None) or (\n                    isinstance(axis_length, int)\n                    and compute_sliced_len(index, axis_length) == axis_length\n                )\n            return (\n                hasattr(index, \"__len__\")\n                and isinstance(axis_length, int)\n                and len(index) == axis_length\n            )\n\n        row_labels = [row_labels] if is_scalar(row_labels) else row_labels\n        col_labels = [col_labels] if is_scalar(col_labels) else col_labels\n\n        if is_full_axis_mask(row_labels, self._length_cache) and is_full_axis_mask(\n            col_labels, self._width_cache\n        ):\n            return copy(self)\n\n        new_obj = self.add_to_apply_calls(self._iloc_func, row_labels, col_labels)\n\n        def try_recompute_cache(indices, previous_cache):\n            \"\"\"Compute new axis-length cache for the masked frame based on its previous cache.\"\"\"\n            if not isinstance(indices, slice):\n                return len(indices)\n            if not isinstance(previous_cache, int):\n                return None\n            return compute_sliced_len(indices, previous_cache)\n\n        new_obj._length_cache = try_recompute_cache(row_labels, self._length_cache)\n        new_obj._width_cache = try_recompute_cache(col_labels, self._width_cache)\n        return new_obj\n\n    @classmethod\n    def put(cls, obj):\n        \"\"\"\n        Put an object into a store and wrap it with partition object.\n\n        Parameters\n        ----------\n        obj : object\n            An object to be put.\n\n        Returns\n        -------\n        PandasDataframePartition\n            New `PandasDataframePartition` object.\n        \"\"\"\n        pass\n\n    @classmethod\n    def preprocess_func(cls, func):\n        \"\"\"\n        Preprocess a function before an `apply` call.\n\n        Parameters\n        ----------\n        func : callable\n            Function to preprocess.\n\n        Returns\n        -------\n        callable\n            An object that can be accepted by `apply`.\n\n        Notes\n        -----\n        This is a classmethod because the definition of how to preprocess\n        should be class-wide. Also, we may want to use this before we\n        deploy a preprocessed function to multiple `PandasDataframePartition`\n        objects.\n        \"\"\"\n        pass\n\n    @classmethod\n    def _length_extraction_fn(cls):\n        \"\"\"\n        Return the function that computes the length of the object wrapped by this partition.\n\n        Returns\n        -------\n        callable\n            The function that computes the length of the object wrapped by this partition.\n        \"\"\"\n        return length_fn_pandas\n\n    @classmethod\n    def _width_extraction_fn(cls):\n        \"\"\"\n        Return the function that computes the width of the object wrapped by this partition.\n\n        Returns\n        -------\n        callable\n            The function that computes the width of the object wrapped by this partition.\n        \"\"\"\n        return width_fn_pandas\n\n    def length(self, materialize=True):\n        \"\"\"\n        Get the length of the object wrapped by this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or its Future\n            The length of the object.\n        \"\"\"\n        if self._length_cache is None:\n            self._length_cache = self.apply(self._length_extraction_fn()).get()\n        return self._length_cache\n\n    def width(self, materialize=True):\n        \"\"\"\n        Get the width of the object wrapped by the partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or its Future\n            The width of the object.\n        \"\"\"\n        if self._width_cache is None:\n            self._width_cache = self.apply(self._width_extraction_fn()).get()\n        return self._width_cache\n\n    @property\n    def _identity(self):\n        \"\"\"\n        Calculate identifier on request for debug logging mode.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        if self._identity_cache is None:\n            self._identity_cache = uuid.uuid4().hex\n        return self._identity_cache\n\n    def split(self, split_func, num_splits, *args):\n        \"\"\"\n        Split the object wrapped by the partition into multiple partitions.\n\n        Parameters\n        ----------\n        split_func : Callable[pandas.DataFrame, List[Any]] -> List[pandas.DataFrame]\n            The function that will split this partition into multiple partitions. The list contains\n            pivots to split by, and will have the same dtype as the major column we are shuffling on.\n        num_splits : int\n            The number of resulting partitions (may be empty).\n        *args : List[Any]\n            Arguments to pass to ``split_func``.\n\n        Returns\n        -------\n        list\n            A list of partitions.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.split::{self._identity}\")\n\n        self._is_debug(log) and log.debug(f\"SUBMIT::_split_df::{self._identity}\")\n        outputs = self.execution_wrapper.deploy(\n            split_func, [self._data] + list(args), num_returns=num_splits\n        )\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.split::{self._identity}\")\n        return [self.__constructor__(output) for output in outputs]\n\n    @classmethod\n    def empty(cls):\n        \"\"\"\n        Create a new partition that wraps an empty pandas DataFrame.\n\n        Returns\n        -------\n        PandasDataframePartition\n            New `PandasDataframePartition` object.\n        \"\"\"\n        return cls.put(pandas.DataFrame(), 0, 0)\n\n    def _is_debug(self, logger=None):\n        \"\"\"\n        Check that the logger is set to debug mode.\n\n        Parameters\n        ----------\n        logger : logging.logger, optional\n            Logger obtained from Modin's `get_logger` utility.\n            Explicit transmission of this parameter can be used in the case\n            when within the context of `_is_debug` call there was already\n            `get_logger` call. This is an optimization.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        if logger is None:\n            logger = get_logger()\n        return logger.isEnabledFor(logging.DEBUG)\n"
  },
  {
    "path": "modin/core/dataframe/pandas/partitioning/partition_manager.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule holding base PartitionManager class - the thing that tracks partitions across the distribution.\n\nThe manager also allows manipulating the data - running functions at each partition, shuffle over the distribution, etc.\n\"\"\"\n\nimport os\nimport warnings\nfrom abc import ABC\nfrom functools import wraps\nfrom typing import TYPE_CHECKING, Optional\n\nimport numpy as np\nimport pandas\nfrom pandas._libs.lib import no_default\n\nfrom modin.config import (\n    BenchmarkMode,\n    CpuCount,\n    DynamicPartitioning,\n    Engine,\n    MinColumnPartitionSize,\n    MinRowPartitionSize,\n    NPartitions,\n    PersistentPickle,\n    ProgressBar,\n)\nfrom modin.core.dataframe.pandas.utils import create_pandas_df_from_partitions\nfrom modin.core.storage_formats.pandas.utils import compute_chunksize\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger\nfrom modin.logging.config import LogLevel\nfrom modin.pandas.utils import get_pandas_backend\n\nif TYPE_CHECKING:\n    from modin.core.dataframe.pandas.dataframe.utils import ShuffleFunctions\n\n\ndef wait_computations_if_benchmark_mode(func):\n    \"\"\"\n    Make sure a `func` finished its computations in benchmark mode.\n\n    Parameters\n    ----------\n    func : callable\n        A function that should be performed in syncronous mode.\n\n    Returns\n    -------\n    callable\n        Wrapped function that executes eagerly (if benchmark mode) or original `func`.\n\n    Notes\n    -----\n    `func` should return NumPy array with partitions.\n    \"\"\"\n\n    @wraps(func)\n    def wait(cls, *args, **kwargs):\n        \"\"\"Wait for computation results.\"\"\"\n        result = func(cls, *args, **kwargs)\n        if BenchmarkMode.get():\n            if isinstance(result, tuple):\n                partitions = result[0]\n            else:\n                partitions = result\n            # When partitions have a deferred call queue, calling\n            # partition.wait() on each partition serially will serially kick\n            # off each deferred computation and wait for each partition to\n            # finish before kicking off the next one. Instead, we want to\n            # serially kick off all the deferred computations so that they can\n            # all run asynchronously, then wait on all the results.\n            cls.finalize(partitions)\n            # The partition manager invokes the relevant .wait() method under\n            # the hood, which should wait in parallel for all computations to finish\n            cls.wait_partitions(partitions.flatten())\n        return result\n\n    return wait\n\n\nclass PandasDataframePartitionManager(\n    ClassLogger, ABC, modin_layer=\"PARTITION-MANAGER\", log_level=LogLevel.DEBUG\n):\n    \"\"\"\n    Base class for managing the dataframe data layout and operators across the distribution of partitions.\n\n    Partition class is the class to use for storing each partition.\n    Each partition must extend the `PandasDataframePartition` class.\n    \"\"\"\n\n    _partition_class = None\n    # Column partitions class is the class to use to create the column partitions.\n    _column_partitions_class = None\n    # Row partitions class is the class to use to create the row partitions.\n    _row_partition_class = None\n    _execution_wrapper = None\n\n    @classmethod\n    def materialize_futures(cls, input_list):\n        \"\"\"\n        Materialize all futures in the input list.\n\n        Parameters\n        ----------\n        input_list : list\n            The list that has to be manipulated.\n\n        Returns\n        -------\n        list\n           A new list with materialized objects.\n        \"\"\"\n        # Do nothing if input_list is None or [].\n        if input_list is None:\n            return None\n        filtered_list = []\n        filtered_idx = []\n        for idx, item in enumerate(input_list):\n            if cls._execution_wrapper.is_future(item):\n                filtered_idx.append(idx)\n                filtered_list.append(item)\n        filtered_list = cls._execution_wrapper.materialize(filtered_list)\n        result = input_list.copy()\n        for idx, item in zip(filtered_idx, filtered_list):\n            result[idx] = item\n        return result\n\n    @classmethod\n    def preprocess_func(cls, map_func):\n        \"\"\"\n        Preprocess a function to be applied to `PandasDataframePartition` objects.\n\n        Parameters\n        ----------\n        map_func : callable\n            The function to be preprocessed.\n\n        Returns\n        -------\n        callable\n            The preprocessed version of the `map_func` provided.\n\n        Notes\n        -----\n        Preprocessing does not require any specific format, only that the\n        `PandasDataframePartition.apply` method will recognize it (for the subclass\n        being used).\n\n        If your `PandasDataframePartition` objects assume that a function provided\n        is serialized or wrapped or in some other format, this is the place\n        to add that logic. It is possible that this can also just return\n        `map_func` if the `apply` method of the `PandasDataframePartition` object\n        you are using does not require any modification to a given function.\n        \"\"\"\n        if cls._execution_wrapper.is_future(map_func):\n            return map_func  # Has already been preprocessed\n\n        old_value = PersistentPickle.get()\n        # When performing a function with Modin objects, it is more profitable to\n        # do the conversion to pandas once on the main process than several times\n        # on worker processes. Details: https://github.com/modin-project/modin/pull/6673/files#r1391086755\n        # For Dask, otherwise there may be an error: `coroutine 'Client._gather' was never awaited`\n        need_update = not PersistentPickle.get() and Engine.get() != \"Dask\"\n        if need_update:\n            PersistentPickle.put(True)\n        try:\n            result = cls._partition_class.preprocess_func(map_func)\n        finally:\n            if need_update:\n                PersistentPickle.put(old_value)\n        return result\n\n    # END Abstract Methods\n\n    @classmethod\n    def create_partition_from_metadata(\n        cls, dtypes: Optional[pandas.Series] = None, **metadata\n    ):\n        \"\"\"\n        Create NumPy array of partitions that holds an empty dataframe with given metadata.\n\n        Parameters\n        ----------\n        dtypes : pandas.Series, optional\n            Column dtypes.\n            Upon creating a pandas DataFrame from `metadata` we call `astype` since\n            pandas doesn't allow to pass a list of dtypes directly in the constructor.\n        **metadata : dict\n            Metadata that has to be wrapped in a partition.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy 2D array of a single partition which contains the data.\n        \"\"\"\n        metadata_dataframe = pandas.DataFrame(**metadata)\n        if dtypes is not None:\n            metadata_dataframe = metadata_dataframe.astype(dtypes)\n        return np.array([[cls._partition_class.put(metadata_dataframe)]])\n\n    @classmethod\n    def column_partitions(cls, partitions, full_axis=True):\n        \"\"\"\n        Get the list of `BaseDataframeAxisPartition` objects representing column-wise partitions.\n\n        Parameters\n        ----------\n        partitions : list-like\n            List of (smaller) partitions to be combined to column-wise partitions.\n        full_axis : bool, default: True\n            Whether or not this partition contains the entire column axis.\n\n        Returns\n        -------\n        list\n            A list of `BaseDataframeAxisPartition` objects.\n\n        Notes\n        -----\n        Each value in this list will be an `BaseDataframeAxisPartition` object.\n        `BaseDataframeAxisPartition` is located in `axis_partition.py`.\n        \"\"\"\n        if not isinstance(partitions, list):\n            partitions = [partitions]\n        return [\n            cls._column_partitions_class(col, full_axis=full_axis)\n            for frame in partitions\n            for col in frame.T\n        ]\n\n    @classmethod\n    def row_partitions(cls, partitions):\n        \"\"\"\n        List of `BaseDataframeAxisPartition` objects representing row-wise partitions.\n\n        Parameters\n        ----------\n        partitions : list-like\n            List of (smaller) partitions to be combined to row-wise partitions.\n\n        Returns\n        -------\n        list\n            A list of `BaseDataframeAxisPartition` objects.\n\n        Notes\n        -----\n        Each value in this list will an `BaseDataframeAxisPartition` object.\n        `BaseDataframeAxisPartition` is located in `axis_partition.py`.\n        \"\"\"\n        if not isinstance(partitions, list):\n            partitions = [partitions]\n        return [cls._row_partition_class(row) for frame in partitions for row in frame]\n\n    @classmethod\n    def axis_partition(cls, partitions, axis, full_axis: bool = True):\n        \"\"\"\n        Logically partition along given axis (columns or rows).\n\n        Parameters\n        ----------\n        partitions : list-like\n            List of partitions to be combined.\n        axis : {0, 1}\n            0 for column partitions, 1 for row partitions.\n        full_axis : bool, default: True\n            Whether or not this partition contains the entire column axis.\n\n        Returns\n        -------\n        list\n            A list of `BaseDataframeAxisPartition` objects.\n        \"\"\"\n        make_column_partitions = axis == 0\n        if not full_axis and not make_column_partitions:\n            raise NotImplementedError(\n                (\n                    \"Row partitions must contain the entire axis. We don't \"\n                    + \"support virtual partitioning for row partitions yet.\"\n                )\n            )\n        return (\n            cls.column_partitions(partitions)\n            if make_column_partitions\n            else cls.row_partitions(partitions)\n        )\n\n    @classmethod\n    def groupby_reduce(\n        cls, axis, partitions, by, map_func, reduce_func, apply_indices=None\n    ):\n        \"\"\"\n        Groupby data using the `map_func` provided along the `axis` over the `partitions` then reduce using `reduce_func`.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to groupby over.\n        partitions : NumPy 2D array\n            Partitions of the ModinFrame to groupby.\n        by : NumPy 2D array\n            Partitions of 'by' to broadcast.\n        map_func : callable\n            Map function.\n        reduce_func : callable,\n            Reduce function.\n        apply_indices : list of ints, default: None\n            Indices of `axis ^ 1` to apply function over.\n\n        Returns\n        -------\n        NumPy array\n            Partitions with applied groupby.\n        \"\"\"\n        if apply_indices is not None:\n            partitions = (\n                partitions[apply_indices] if axis else partitions[:, apply_indices]\n            )\n\n        if by is not None:\n            # need to make sure that the partitioning of the following objects\n            # coincides in the required axis, because `partition_manager.broadcast_apply`\n            # doesn't call `_copartition` unlike `modin_frame.broadcast_apply`\n            assert partitions.shape[axis] == by.shape[axis], (\n                f\"the number of partitions along {axis=} is not equal: \"\n                + f\"{partitions.shape[axis]} != {by.shape[axis]}\"\n            )\n            mapped_partitions = cls.broadcast_apply(\n                axis, map_func, left=partitions, right=by\n            )\n        else:\n            mapped_partitions = cls.map_partitions(partitions, map_func)\n\n        # Assuming, that the output will not be larger than the input,\n        # keep the current number of partitions.\n        num_splits = min(len(partitions), NPartitions.get())\n        return cls.map_axis_partitions(\n            axis,\n            mapped_partitions,\n            reduce_func,\n            enumerate_partitions=True,\n            num_splits=num_splits,\n        )\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def broadcast_apply_select_indices(\n        cls,\n        axis,\n        apply_func,\n        left,\n        right,\n        left_indices,\n        right_indices,\n        keep_remaining=False,\n    ):\n        \"\"\"\n        Broadcast the `right` partitions to `left` and apply `apply_func` to selected indices.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to apply and broadcast over.\n        apply_func : callable\n            Function to apply.\n        left : NumPy 2D array\n            Left partitions.\n        right : NumPy 2D array\n            Right partitions.\n        left_indices : list-like\n            Indices to apply function to.\n        right_indices : dictionary of indices of right partitions\n            Indices that you want to bring at specified left partition, for example\n            dict {key: {key1: [0, 1], key2: [5]}} means that in left[key] you want to\n            broadcast [right[key1], right[key2]] partitions and internal indices\n            for `right` must be [[0, 1], [5]].\n        keep_remaining : bool, default: False\n            Whether or not to keep the other partitions.\n            Some operations may want to drop the remaining partitions and\n            keep only the results.\n\n        Returns\n        -------\n        NumPy array\n            An array of partition objects.\n\n        Notes\n        -----\n        Your internal function must take these kwargs:\n        [`internal_indices`, `other`, `internal_other_indices`] to work correctly!\n        \"\"\"\n        if not axis:\n            partitions_for_apply = left.T\n            right = right.T\n        else:\n            partitions_for_apply = left\n\n        [obj.drain_call_queue() for row in right for obj in row]\n\n        def get_partitions(index):\n            \"\"\"Grab required partitions and indices from `right` and `right_indices`.\"\"\"\n            must_grab = right_indices[index]\n            partitions_list = np.array([right[i] for i in must_grab.keys()])\n            indices_list = list(must_grab.values())\n            return {\"other\": partitions_list, \"internal_other_indices\": indices_list}\n\n        new_partitions = np.array(\n            [\n                (\n                    partitions_for_apply[i]\n                    if i not in left_indices\n                    else cls._apply_func_to_list_of_partitions_broadcast(\n                        apply_func,\n                        partitions_for_apply[i],\n                        internal_indices=left_indices[i],\n                        **get_partitions(i),\n                    )\n                )\n                for i in range(len(partitions_for_apply))\n                if i in left_indices or keep_remaining\n            ]\n        )\n        if not axis:\n            new_partitions = new_partitions.T\n        return new_partitions\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def base_broadcast_apply(cls, axis, apply_func, left, right):\n        \"\"\"\n        Broadcast the `right` partitions to `left` and apply `apply_func` function.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to apply and broadcast over.\n        apply_func : callable\n            Function to apply.\n        left : np.ndarray\n            NumPy array of left partitions.\n        right : np.ndarray\n            NumPy array of right partitions.\n\n        Returns\n        -------\n        np.ndarray\n            NumPy array of result partition objects.\n\n        Notes\n        -----\n        This will often be overridden by implementations. It materializes the\n        entire partitions of the right and applies them to the left through `apply`.\n        \"\"\"\n\n        def map_func(df, *others):\n            other = (\n                pandas.concat(others, axis=axis ^ 1) if len(others) > 1 else others[0]\n            )\n            # to reduce peak memory consumption\n            del others\n            return apply_func(df, other)\n\n        map_func = cls.preprocess_func(map_func)\n        rt_axis_parts = cls.axis_partition(right, axis ^ 1)\n        return np.array(\n            [\n                [\n                    part.apply(\n                        map_func,\n                        *(\n                            rt_axis_parts[col_idx].list_of_blocks\n                            if axis\n                            else rt_axis_parts[row_idx].list_of_blocks\n                        ),\n                    )\n                    for col_idx, part in enumerate(left[row_idx])\n                ]\n                for row_idx in range(len(left))\n            ]\n        )\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def broadcast_axis_partitions(\n        cls,\n        axis,\n        apply_func,\n        left,\n        right,\n        keep_partitioning=False,\n        num_splits=None,\n        apply_indices=None,\n        broadcast_all=True,\n        enumerate_partitions=False,\n        lengths=None,\n        apply_func_args=None,\n        **kwargs,\n    ):\n        \"\"\"\n        Broadcast the `right` partitions to `left` and apply `apply_func` along full `axis`.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to apply and broadcast over.\n        apply_func : callable\n            Function to apply.\n        left : NumPy 2D array\n            Left partitions.\n        right : NumPy 2D array\n            Right partitions.\n        keep_partitioning : boolean, default: False\n            The flag to keep partition boundaries for Modin Frame if possible.\n            Setting it to True disables shuffling data from one partition to another in case the resulting\n            number of splits is equal to the initial number of splits.\n        num_splits : int, optional\n            The number of partitions to split the result into across the `axis`. If None, then the number\n            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`\n            then the number of splits is preserved.\n        apply_indices : list of ints, default: None\n            Indices of `axis ^ 1` to apply function over.\n        broadcast_all : bool, default: True\n            Whether or not to pass all right axis partitions to each of the left axis partitions.\n        enumerate_partitions : bool, default: False\n            Whether or not to pass partition index into `apply_func`.\n            Note that `apply_func` must be able to accept `partition_idx` kwarg.\n        lengths : list of ints, default: None\n            The list of lengths to shuffle the object. Note:\n                1. Passing `lengths` omits the `num_splits` parameter as the number of splits\n                will now be inferred from the number of integers present in `lengths`.\n                2. When passing lengths you must explicitly specify `keep_partitioning=False`.\n        apply_func_args : list-like, optional\n            Positional arguments to pass to the `func`.\n        **kwargs : dict\n            Additional options that could be used by different engines.\n\n        Returns\n        -------\n        NumPy array\n            An array of partition objects.\n        \"\"\"\n        ErrorMessage.catch_bugs_and_request_email(\n            failure_condition=keep_partitioning and lengths is not None,\n            extra_log=f\"`keep_partitioning` must be set to `False` when passing `lengths`. Got: {keep_partitioning=} | {lengths=}\",\n        )\n\n        # Since we are already splitting the DataFrame back up after an\n        # operation, we will just use this time to compute the number of\n        # partitions as best we can right now.\n        if keep_partitioning and num_splits is None:\n            num_splits = len(left) if axis == 0 else len(left.T)\n        elif lengths:\n            num_splits = len(lengths)\n        elif num_splits is None:\n            num_splits = NPartitions.get()\n        else:\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=not isinstance(num_splits, int),\n                extra_log=f\"Expected `num_splits` to be an integer, got: {type(num_splits)} | {num_splits=}\",\n            )\n        preprocessed_map_func = cls.preprocess_func(apply_func)\n        left_partitions = cls.axis_partition(left, axis)\n        right_partitions = None if right is None else cls.axis_partition(right, axis)\n        # For mapping across the entire axis, we don't maintain partitioning because we\n        # may want to line to partitioning up with another BlockPartitions object. Since\n        # we don't need to maintain the partitioning, this gives us the opportunity to\n        # load-balance the data as well.\n        kw = {\n            \"num_splits\": num_splits,\n            \"maintain_partitioning\": keep_partitioning,\n        }\n        if lengths:\n            kw[\"lengths\"] = lengths\n            kw[\"manual_partition\"] = True\n\n        if apply_indices is None:\n            apply_indices = np.arange(len(left_partitions))\n\n        result_blocks = np.array(\n            [\n                left_partitions[i].apply(\n                    preprocessed_map_func,\n                    *(apply_func_args if apply_func_args else []),\n                    other_axis_partition=(\n                        right_partitions if broadcast_all else right_partitions[i]\n                    ),\n                    **kw,\n                    **({\"partition_idx\": idx} if enumerate_partitions else {}),\n                    **kwargs,\n                )\n                for idx, i in enumerate(apply_indices)\n            ]\n        )\n        # If we are mapping over columns, they are returned to use the same as\n        # rows, so we need to transpose the returned 2D NumPy array to return\n        # the structure to the correct order.\n        return result_blocks.T if not axis else result_blocks\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def base_map_partitions(\n        cls,\n        partitions,\n        map_func,\n        func_args=None,\n        func_kwargs=None,\n    ):\n        \"\"\"\n        Apply `map_func` to every partition in `partitions`.\n\n        Parameters\n        ----------\n        partitions : NumPy 2D array\n            Partitions housing the data of Modin Frame.\n        map_func : callable\n            Function to apply.\n        func_args : iterable, optional\n            Positional arguments for the 'map_func'.\n        func_kwargs : dict, optional\n            Keyword arguments for the 'map_func'.\n\n        Returns\n        -------\n        NumPy array\n            An array of partitions\n        \"\"\"\n        preprocessed_map_func = cls.preprocess_func(map_func)\n        return np.array(\n            [\n                [\n                    part.apply(\n                        preprocessed_map_func,\n                        *func_args if func_args is not None else (),\n                        **func_kwargs if func_kwargs is not None else {},\n                    )\n                    for part in row_of_parts\n                ]\n                for row_of_parts in partitions\n            ]\n        )\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def broadcast_apply(\n        cls,\n        axis,\n        apply_func,\n        left,\n        right,\n    ):\n        \"\"\"\n        Broadcast the `right` partitions to `left` and apply `apply_func` function using different approaches to achieve the best performance.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to apply and broadcast over.\n        apply_func : callable\n            Function to apply.\n        left : np.ndarray\n            NumPy array of left partitions.\n        right : np.ndarray\n            NumPy array of right partitions.\n\n        Returns\n        -------\n        np.ndarray\n            NumPy array of result partition objects.\n        \"\"\"\n        if not DynamicPartitioning.get():\n            # block-wise broadcast\n            new_partitions = cls.base_broadcast_apply(\n                axis,\n                apply_func,\n                left,\n                right,\n            )\n        else:\n            # The dynamic partitioning behavior of `broadcast_apply` differs from that of `map_partitions`,\n            # since the columnar approach for `broadcast_apply` results in slowdown.\n            # axis-wise broadcast\n            new_partitions = cls.broadcast_axis_partitions(\n                axis=axis ^ 1,\n                left=left,\n                right=right,\n                apply_func=apply_func,\n                broadcast_all=False,\n                keep_partitioning=True,\n            )\n        return new_partitions\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def map_partitions(\n        cls,\n        partitions,\n        map_func,\n        func_args=None,\n        func_kwargs=None,\n    ):\n        \"\"\"\n        Apply `map_func` to `partitions` using different approaches to achieve the best performance.\n\n        Parameters\n        ----------\n        partitions : NumPy 2D array\n            Partitions housing the data of Modin Frame.\n        map_func : callable\n            Function to apply.\n        func_args : iterable, optional\n            Positional arguments for the 'map_func'.\n        func_kwargs : dict, optional\n            Keyword arguments for the 'map_func'.\n\n        Returns\n        -------\n        NumPy array\n            An array of partitions\n        \"\"\"\n        if not DynamicPartitioning.get():\n            # block-wise map\n            new_partitions = cls.base_map_partitions(\n                partitions, map_func, func_args, func_kwargs\n            )\n        else:\n            # axis-wise map\n            # we choose an axis for a combination of partitions\n            # whose size is closer to the number of CPUs\n            if abs(partitions.shape[0] - CpuCount.get()) < abs(\n                partitions.shape[1] - CpuCount.get()\n            ):\n                axis = 1\n            else:\n                axis = 0\n\n            column_splits = CpuCount.get() // partitions.shape[1]\n\n            if axis == 0 and column_splits > 1:\n                # splitting by parts of columnar partitions\n                new_partitions = cls.map_partitions_joined_by_column(\n                    partitions, column_splits, map_func, func_args, func_kwargs\n                )\n            else:\n                # splitting by full axis partitions\n                new_partitions = cls.map_axis_partitions(\n                    axis,\n                    partitions,\n                    lambda df: map_func(\n                        df,\n                        *(func_args if func_args is not None else ()),\n                        **(func_kwargs if func_kwargs is not None else {}),\n                    ),\n                    keep_partitioning=True,\n                )\n        return new_partitions\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def lazy_map_partitions(\n        cls,\n        partitions,\n        map_func,\n        func_args=None,\n        func_kwargs=None,\n        enumerate_partitions=False,\n    ):\n        \"\"\"\n        Apply `map_func` to every partition in `partitions` *lazily*.\n\n        Parameters\n        ----------\n        partitions : NumPy 2D array\n            Partitions of Modin Frame.\n        map_func : callable\n            Function to apply.\n        func_args : iterable, optional\n            Positional arguments for the 'map_func'.\n        func_kwargs : dict, optional\n            Keyword arguments for the 'map_func'.\n        enumerate_partitions : bool, default: False\n\n        Returns\n        -------\n        NumPy array\n            An array of partitions\n        \"\"\"\n        preprocessed_map_func = cls.preprocess_func(map_func)\n        return np.array(\n            [\n                [\n                    part.add_to_apply_calls(\n                        preprocessed_map_func,\n                        *(tuple() if func_args is None else func_args),\n                        **func_kwargs if func_kwargs is not None else {},\n                        **({\"partition_idx\": i} if enumerate_partitions else {}),\n                    )\n                    for part in row\n                ]\n                for i, row in enumerate(partitions)\n            ]\n        )\n\n    @classmethod\n    def map_axis_partitions(\n        cls,\n        axis,\n        partitions,\n        map_func,\n        keep_partitioning=False,\n        num_splits=None,\n        lengths=None,\n        enumerate_partitions=False,\n        **kwargs,\n    ):\n        \"\"\"\n        Apply `map_func` to every partition in `partitions` along given `axis`.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to perform the map across (0 - index, 1 - columns).\n        partitions : NumPy 2D array\n            Partitions of Modin Frame.\n        map_func : callable\n            Function to apply.\n        keep_partitioning : boolean, default: False\n            The flag to keep partition boundaries for Modin Frame if possible.\n            Setting it to True disables shuffling data from one partition to another in case the resulting\n            number of splits is equal to the initial number of splits.\n        num_splits : int, optional\n            The number of partitions to split the result into across the `axis`. If None, then the number\n            of splits will be infered automatically. If `num_splits` is None and `keep_partitioning=True`\n            then the number of splits is preserved.\n        lengths : list of ints, default: None\n            The list of lengths to shuffle the object. Note:\n                1. Passing `lengths` omits the `num_splits` parameter as the number of splits\n                will now be inferred from the number of integers present in `lengths`.\n                2. When passing lengths you must explicitly specify `keep_partitioning=False`.\n        enumerate_partitions : bool, default: False\n            Whether or not to pass partition index into `map_func`.\n            Note that `map_func` must be able to accept `partition_idx` kwarg.\n        **kwargs : dict\n            Additional options that could be used by different engines.\n\n        Returns\n        -------\n        NumPy array\n            An array of new partitions for Modin Frame.\n\n        Notes\n        -----\n        This method should be used in the case when `map_func` relies on\n        some global information about the axis.\n        \"\"\"\n        return cls.broadcast_axis_partitions(\n            axis=axis,\n            left=partitions,\n            apply_func=map_func,\n            keep_partitioning=keep_partitioning,\n            num_splits=num_splits,\n            right=None,\n            lengths=lengths,\n            enumerate_partitions=enumerate_partitions,\n            **kwargs,\n        )\n\n    @classmethod\n    def map_partitions_joined_by_column(\n        cls,\n        partitions,\n        column_splits,\n        map_func,\n        map_func_args=None,\n        map_func_kwargs=None,\n    ):\n        \"\"\"\n        Combine several blocks by column into one virtual partition and apply \"map_func\" to them.\n\n        Parameters\n        ----------\n        partitions : NumPy 2D array\n            Partitions of Modin Frame.\n        column_splits : int\n            The number of splits by column.\n        map_func : callable\n            Function to apply.\n        map_func_args : iterable, optional\n            Positional arguments for the 'map_func'.\n        map_func_kwargs : dict, optional\n            Keyword arguments for the 'map_func'.\n\n        Returns\n        -------\n        NumPy array\n            An array of new partitions for Modin Frame.\n        \"\"\"\n        if column_splits < 1:\n            raise ValueError(\n                \"The value of columns_splits must be greater than or equal to 1.\"\n            )\n        # step cannot be less than 1\n        step = max(partitions.shape[0] // column_splits, 1)\n        preprocessed_map_func = cls.preprocess_func(map_func)\n        result = np.empty(partitions.shape, dtype=object)\n        for i in range(\n            0,\n            partitions.shape[0],\n            step,\n        ):\n            partitions_subset = partitions[i : i + step]\n            # This is necessary when ``partitions.shape[0]`` is not divisible\n            # by `column_splits` without a remainder.\n            actual_step = len(partitions_subset)\n            kw = {\n                \"num_splits\": actual_step,\n            }\n            joined_column_partitions = cls.column_partitions(partitions_subset)\n            for j in range(partitions.shape[1]):\n                result[i : i + actual_step, j] = joined_column_partitions[j].apply(\n                    preprocessed_map_func,\n                    *map_func_args if map_func_args is not None else (),\n                    **kw,\n                    **map_func_kwargs if map_func_kwargs is not None else {},\n                )\n\n        return result\n\n    @classmethod\n    def concat(cls, axis, left_parts, right_parts):\n        \"\"\"\n        Concatenate the blocks of partitions with another set of blocks.\n\n        Parameters\n        ----------\n        axis : int\n            The axis to concatenate to.\n        left_parts : np.ndarray\n            NumPy array of partitions to concatenate with.\n        right_parts : np.ndarray or list\n            NumPy array of partitions to be concatenated.\n\n        Returns\n        -------\n        np.ndarray\n            A new NumPy array with concatenated partitions.\n        list[int] or None\n            Row lengths if possible to compute it.\n\n        Notes\n        -----\n        Assumes that the blocks are already the same shape on the\n        dimension being concatenated. A ValueError will be thrown if this\n        condition is not met.\n        \"\"\"\n        # TODO: Possible change is `isinstance(right_parts, list)`\n        if type(right_parts) is list:\n            # `np.array` with partitions of empty ModinFrame has a shape (0,)\n            # but `np.concatenate` can concatenate arrays only if its shapes at\n            # specified axis are equals, so filtering empty frames to avoid concat error\n            right_parts = [o for o in right_parts if o.size != 0]\n            to_concat = (\n                [left_parts] + right_parts if left_parts.size != 0 else right_parts\n            )\n            result = (\n                np.concatenate(to_concat, axis=axis) if len(to_concat) else left_parts\n            )\n        else:\n            result = np.append(left_parts, right_parts, axis=axis)\n        if axis == 0:\n            return cls.rebalance_partitions(result)\n        else:\n            return result, None\n\n    @classmethod\n    def to_pandas(cls, partitions):\n        \"\"\"\n        Convert NumPy array of PandasDataframePartition to pandas DataFrame.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            NumPy array of PandasDataframePartition.\n\n        Returns\n        -------\n        pandas.DataFrame\n            A pandas DataFrame\n        \"\"\"\n        return create_pandas_df_from_partitions(\n            cls.get_objects_from_partitions(partitions.flatten()), partitions.shape\n        )\n\n    @classmethod\n    def to_numpy(cls, partitions, **kwargs):\n        \"\"\"\n        Convert NumPy array of PandasDataframePartition to NumPy array of data stored within `partitions`.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            NumPy array of PandasDataframePartition.\n        **kwargs : dict\n            Keyword arguments for PandasDataframePartition.to_numpy function.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy array.\n        \"\"\"\n        return np.block(\n            [[block.to_numpy(**kwargs) for block in row] for row in partitions]\n        )\n\n    @classmethod\n    def split_pandas_df_into_partitions(\n        cls, df, row_chunksize, col_chunksize, update_bar\n    ):\n        \"\"\"\n        Split given pandas DataFrame according to the row/column chunk sizes into distributed partitions.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n        row_chunksize : int\n        col_chunksize : int\n        update_bar : callable(x) -> x\n            Function that updates a progress bar.\n\n        Returns\n        -------\n        2D np.ndarray[PandasDataframePartition]\n        \"\"\"\n        put_func = cls._partition_class.put\n        # even a full-axis slice can cost something (https://github.com/pandas-dev/pandas/issues/55202)\n        # so we try not to do it if unnecessary.\n        if col_chunksize >= len(df.columns):\n            col_parts = [df]\n        else:\n            col_parts = [\n                df.iloc[:, i : i + col_chunksize]\n                for i in range(0, len(df.columns), col_chunksize)\n            ]\n        parts = [\n            [\n                update_bar(\n                    put_func(col_part.iloc[i : i + row_chunksize]),\n                )\n                for col_part in col_parts\n            ]\n            for i in range(0, len(df), row_chunksize)\n        ]\n        return np.array(parts)\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def from_pandas(cls, df, return_dims=False):\n        \"\"\"\n        Return the partitions from pandas.DataFrame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            A pandas.DataFrame.\n        return_dims : bool, default: False\n            If it's True, return as (np.ndarray, row_lengths, col_widths),\n            else np.ndarray.\n\n        Returns\n        -------\n        (np.ndarray, backend) or (np.ndarray, backend, row_lengths, col_widths)\n            A NumPy array with partitions (with dimensions or not).\n        \"\"\"\n        num_splits = NPartitions.get()\n        min_row_block_size = MinRowPartitionSize.get()\n        min_column_block_size = MinColumnPartitionSize.get()\n        row_chunksize = compute_chunksize(df.shape[0], num_splits, min_row_block_size)\n        col_chunksize = compute_chunksize(\n            df.shape[1], num_splits, min_column_block_size\n        )\n\n        bar_format = (\n            \"{l_bar}{bar}{r_bar}\"\n            if os.environ.get(\"DEBUG_PROGRESS_BAR\", \"False\") == \"True\"\n            else \"{desc}: {percentage:3.0f}%{bar} Elapsed time: {elapsed}, estimated remaining time: {remaining}\"\n        )\n        if ProgressBar.get():\n            with warnings.catch_warnings():\n                warnings.simplefilter(\"ignore\")\n                try:\n                    from tqdm.autonotebook import tqdm as tqdm_notebook\n                except ImportError:\n                    raise ImportError(\"Please pip install tqdm to use the progress bar\")\n\n            rows = max(1, round(len(df) / row_chunksize))\n            cols = max(1, round(len(df.columns) / col_chunksize))\n            update_count = rows * cols\n            pbar = tqdm_notebook(\n                total=round(update_count),\n                desc=\"Distributing Dataframe\",\n                bar_format=bar_format,\n            )\n        else:\n            pbar = None\n\n        def update_bar(f):\n            if ProgressBar.get():\n                pbar.update(1)\n            return f\n\n        parts = cls.split_pandas_df_into_partitions(\n            df, row_chunksize, col_chunksize, update_bar\n        )\n        backend = get_pandas_backend(df.dtypes)\n        if ProgressBar.get():\n            pbar.close()\n        if not return_dims:\n            return parts, backend\n        else:\n            row_lengths = [\n                (\n                    row_chunksize\n                    if i + row_chunksize < len(df)\n                    else len(df) % row_chunksize or row_chunksize\n                )\n                for i in range(0, len(df), row_chunksize)\n            ]\n            col_widths = [\n                (\n                    col_chunksize\n                    if i + col_chunksize < len(df.columns)\n                    else len(df.columns) % col_chunksize or col_chunksize\n                )\n                for i in range(0, len(df.columns), col_chunksize)\n            ]\n            return parts, backend, row_lengths, col_widths\n\n    @classmethod\n    def from_arrow(cls, at, return_dims=False):\n        \"\"\"\n        Return the partitions from Apache Arrow (PyArrow).\n\n        Parameters\n        ----------\n        at : pyarrow.table\n            Arrow Table.\n        return_dims : bool, default: False\n            If it's True, return as (np.ndarray, row_lengths, col_widths),\n            else np.ndarray.\n\n        Returns\n        -------\n        (np.ndarray, backend) or (np.ndarray, backend, row_lengths, col_widths)\n            A NumPy array with partitions (with dimensions or not).\n        \"\"\"\n        return cls.from_pandas(at.to_pandas(), return_dims=return_dims)\n\n    @classmethod\n    def get_objects_from_partitions(cls, partitions):\n        \"\"\"\n        Get the objects wrapped by `partitions` (in parallel if supported).\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            NumPy array with ``PandasDataframePartition``-s.\n\n        Returns\n        -------\n        list\n            The objects wrapped by `partitions`.\n        \"\"\"\n        if hasattr(cls, \"_execution_wrapper\"):\n            # more efficient parallel implementation\n            for idx, part in enumerate(partitions):\n                if hasattr(part, \"force_materialization\"):\n                    partitions[idx] = part.force_materialization()\n            assert all(\n                [len(partition.list_of_blocks) == 1 for partition in partitions]\n            ), \"Implementation assumes that each partition contains a single block.\"\n            return cls._execution_wrapper.materialize(\n                [partition.list_of_blocks[0] for partition in partitions]\n            )\n        return [partition.get() for partition in partitions]\n\n    @classmethod\n    def wait_partitions(cls, partitions):\n        \"\"\"\n        Wait on the objects wrapped by `partitions`, without materializing them.\n\n        This method will block until all computations in the list have completed.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            NumPy array with ``PandasDataframePartition``-s.\n\n        Notes\n        -----\n        This method should be implemented in a more efficient way for engines that supports\n        waiting on objects in parallel.\n        \"\"\"\n        for partition in partitions:\n            partition.wait()\n\n    @classmethod\n    def get_indices(cls, axis, partitions, index_func=None):\n        \"\"\"\n        Get the internal indices stored in the partitions.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to extract the labels over.\n        partitions : np.ndarray\n            NumPy array with PandasDataframePartition's.\n        index_func : callable, default: None\n            The function to be used to extract the indices.\n\n        Returns\n        -------\n        pandas.Index\n            A pandas Index object.\n        list of pandas.Index\n            The list of internal indices for each partition.\n\n        Notes\n        -----\n        These are the global indices of the object. This is mostly useful\n        when you have deleted rows/columns internally, but do not know\n        which ones were deleted.\n        \"\"\"\n        if index_func is None:\n            index_func = lambda df: df.axes[axis]  # noqa: E731\n        ErrorMessage.catch_bugs_and_request_email(not callable(index_func))\n        func = cls.preprocess_func(index_func)\n        target = partitions.T if axis == 0 else partitions\n        if len(target):\n            new_idx = [idx.apply(func) for idx in target[0]]\n            new_idx = cls.get_objects_from_partitions(new_idx)\n        else:\n            new_idx = [pandas.Index([])]\n\n        # filter empty indexes in case there are multiple partitions\n        total_idx = list(filter(len, new_idx))\n        if len(total_idx) > 0:\n            # TODO FIX INFORMATION LEAK!!!!1!!1!!\n            total_idx = total_idx[0].append(total_idx[1:])\n        else:\n            # Meaning that all partitions returned a zero-length index,\n            # in this case, we return an index of any partition to preserve\n            # the index's metadata\n            total_idx = new_idx[0]\n        return total_idx, new_idx\n\n    @classmethod\n    def _apply_func_to_list_of_partitions_broadcast(\n        cls, func, partitions, other, **kwargs\n    ):\n        \"\"\"\n        Apply a function to a list of remote partitions.\n\n        `other` partitions will be broadcasted to `partitions`\n        and `func` will be applied.\n\n        Parameters\n        ----------\n        func : callable\n            The func to apply.\n        partitions : np.ndarray\n            The partitions to which the `func` will apply.\n        other : np.ndarray\n            The partitions to be broadcasted to `partitions`.\n        **kwargs : dict\n            Keyword arguments for PandasDataframePartition.apply function.\n\n        Returns\n        -------\n        list\n            A list of PandasDataframePartition objects.\n        \"\"\"\n        preprocessed_func = cls.preprocess_func(func)\n        return [\n            obj.apply(preprocessed_func, other=[o.get() for o in broadcasted], **kwargs)\n            for obj, broadcasted in zip(partitions, other.T)\n        ]\n\n    @classmethod\n    def _apply_func_to_list_of_partitions(cls, func, partitions, **kwargs):\n        \"\"\"\n        Apply a function to a list of remote partitions.\n\n        Parameters\n        ----------\n        func : callable\n            The func to apply.\n        partitions : np.ndarray\n            The partitions to which the `func` will apply.\n        **kwargs : dict\n            Keyword arguments for PandasDataframePartition.apply function.\n\n        Returns\n        -------\n        list\n            A list of PandasDataframePartition objects.\n\n        Notes\n        -----\n        This preprocesses the `func` first before applying it to the partitions.\n        \"\"\"\n        preprocessed_func = cls.preprocess_func(func)\n        return [obj.apply(preprocessed_func, **kwargs) for obj in partitions]\n\n    @classmethod\n    def combine(cls, partitions, new_index=None, new_columns=None):\n        \"\"\"\n        Convert a NumPy 2D array of partitions to a NumPy 2D array of a single partition.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            The partitions which have to be converted to a single partition.\n        new_index : pandas.Index, optional\n            Index for propagation into internal partitions.\n            Optimization allowing to do this in one remote kernel.\n        new_columns : pandas.Index, optional\n            Columns for propagation into internal partitions.\n            Optimization allowing to do this in one remote kernel.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy 2D array of a single partition.\n        \"\"\"\n        if partitions.size <= 1 and new_index is None and new_columns is None:\n            return partitions\n\n        def to_pandas_remote(df, partition_shape, *dfs):\n            \"\"\"Copy of ``cls.to_pandas()`` method adapted for a remote function.\"\"\"\n            return create_pandas_df_from_partitions(\n                (df,) + dfs,\n                partition_shape,\n                called_from_remote=True,\n                new_index=new_index,\n                new_columns=new_columns,\n            )\n\n        preprocessed_func = cls.preprocess_func(to_pandas_remote)\n        partition_shape = partitions.shape\n        partitions_flattened = partitions.flatten()\n        for idx, part in enumerate(partitions_flattened):\n            if hasattr(part, \"force_materialization\"):\n                partitions_flattened[idx] = part.force_materialization()\n        partition_refs = [\n            partition.list_of_blocks[0] for partition in partitions_flattened[1:]\n        ]\n        combined_partition = partitions.flat[0].apply(\n            preprocessed_func, partition_shape, *partition_refs\n        )\n        return np.array([combined_partition]).reshape(1, -1)\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def apply_func_to_select_indices(\n        cls, axis, partitions, func, indices, keep_remaining=False\n    ):\n        \"\"\"\n        Apply a function to select indices.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to apply the `func` over.\n        partitions : np.ndarray\n            The partitions to which the `func` will apply.\n        func : callable\n            The function to apply to these indices of partitions.\n        indices : dict\n            The indices to apply the function to.\n        keep_remaining : bool, default: False\n            Whether or not to keep the other partitions. Some operations\n            may want to drop the remaining partitions and keep\n            only the results.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy array with partitions.\n\n        Notes\n        -----\n        Your internal function must take a kwarg `internal_indices` for\n        this to work correctly. This prevents information leakage of the\n        internal index to the external representation.\n        \"\"\"\n        if partitions.size == 0:\n            return np.array([[]])\n        # Handling dictionaries has to be done differently, but we still want\n        # to figure out the partitions that need to be applied to, so we will\n        # store the dictionary in a separate variable and assign `indices` to\n        # the keys to handle it the same as we normally would.\n        if isinstance(func, dict):\n            dict_func = func\n        else:\n            dict_func = None\n        if not axis:\n            partitions_for_apply = partitions.T\n        else:\n            partitions_for_apply = partitions\n        # We may have a command to perform different functions on different\n        # columns at the same time. We attempt to handle this as efficiently as\n        # possible here. Functions that use this in the dictionary format must\n        # accept a keyword argument `func_dict`.\n        if dict_func is not None:\n            if not keep_remaining:\n                result = np.array(\n                    [\n                        cls._apply_func_to_list_of_partitions(\n                            func,\n                            partitions_for_apply[o_idx],\n                            func_dict={\n                                i_idx: dict_func[i_idx]\n                                for i_idx in list_to_apply\n                                if i_idx >= 0\n                            },\n                        )\n                        for o_idx, list_to_apply in indices.items()\n                    ]\n                )\n            else:\n                result = np.array(\n                    [\n                        (\n                            partitions_for_apply[i]\n                            if i not in indices\n                            else cls._apply_func_to_list_of_partitions(\n                                func,\n                                partitions_for_apply[i],\n                                func_dict={\n                                    idx: dict_func[idx]\n                                    for idx in indices[i]\n                                    if idx >= 0\n                                },\n                            )\n                        )\n                        for i in range(len(partitions_for_apply))\n                    ]\n                )\n        else:\n            if not keep_remaining:\n                # We are passing internal indices in here. In order for func to\n                # actually be able to use this information, it must be able to take in\n                # the internal indices. This might mean an iloc in the case of Pandas\n                # or some other way to index into the internal representation.\n                result = np.array(\n                    [\n                        cls._apply_func_to_list_of_partitions(\n                            func,\n                            partitions_for_apply[idx],\n                            internal_indices=list_to_apply,\n                        )\n                        for idx, list_to_apply in indices.items()\n                    ]\n                )\n            else:\n                # The difference here is that we modify a subset and return the\n                # remaining (non-updated) blocks in their original position.\n                result = np.array(\n                    [\n                        (\n                            partitions_for_apply[i]\n                            if i not in indices\n                            else cls._apply_func_to_list_of_partitions(\n                                func,\n                                partitions_for_apply[i],\n                                internal_indices=indices[i],\n                            )\n                        )\n                        for i in range(len(partitions_for_apply))\n                    ]\n                )\n        return result.T if not axis else result\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def apply_func_to_select_indices_along_full_axis(\n        cls, axis, partitions, func, indices, keep_remaining=False\n    ):\n        \"\"\"\n        Apply a function to a select subset of full columns/rows.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to apply the function over.\n        partitions : np.ndarray\n            The partitions to which the `func` will apply.\n        func : callable\n            The function to apply.\n        indices : list-like\n            The global indices to apply the func to.\n        keep_remaining : bool, default: False\n            Whether or not to keep the other partitions.\n            Some operations may want to drop the remaining partitions and\n            keep only the results.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy array with partitions.\n\n        Notes\n        -----\n        This should be used when you need to apply a function that relies\n        on some global information for the entire column/row, but only need\n        to apply a function to a subset.\n        For your func to operate directly on the indices provided,\n        it must use `internal_indices` as a keyword argument.\n        \"\"\"\n        if partitions.size == 0:\n            return np.array([[]])\n        # Handling dictionaries has to be done differently, but we still want\n        # to figure out the partitions that need to be applied to, so we will\n        # store the dictionary in a separate variable and assign `indices` to\n        # the keys to handle it the same as we normally would.\n        if isinstance(func, dict):\n            dict_func = func\n        else:\n            dict_func = None\n        preprocessed_func = cls.preprocess_func(func)\n        # Since we might be keeping the remaining blocks that are not modified,\n        # we have to also keep the block_partitions object in the correct\n        # direction (transpose for columns).\n        if not keep_remaining:\n            selected_partitions = partitions.T if not axis else partitions\n            selected_partitions = np.array([selected_partitions[i] for i in indices])\n            selected_partitions = (\n                selected_partitions.T if not axis else selected_partitions\n            )\n        else:\n            selected_partitions = partitions\n        if not axis:\n            partitions_for_apply = cls.column_partitions(selected_partitions)\n            partitions_for_remaining = partitions.T\n        else:\n            partitions_for_apply = cls.row_partitions(selected_partitions)\n            partitions_for_remaining = partitions\n        # We may have a command to perform different functions on different\n        # columns at the same time. We attempt to handle this as efficiently as\n        # possible here. Functions that use this in the dictionary format must\n        # accept a keyword argument `func_dict`.\n        if dict_func is not None:\n            if not keep_remaining:\n                result = np.array(\n                    [\n                        part.apply(\n                            preprocessed_func,\n                            func_dict={idx: dict_func[idx] for idx in indices[i]},\n                        )\n                        for i, part in zip(indices, partitions_for_apply)\n                    ]\n                )\n            else:\n                result = np.array(\n                    [\n                        (\n                            partitions_for_remaining[i]\n                            if i not in indices\n                            else cls._apply_func_to_list_of_partitions(\n                                preprocessed_func,\n                                partitions_for_apply[i],\n                                func_dict={idx: dict_func[idx] for idx in indices[i]},\n                            )\n                        )\n                        for i in range(len(partitions_for_apply))\n                    ]\n                )\n        else:\n            if not keep_remaining:\n                # See notes in `apply_func_to_select_indices`\n                result = np.array(\n                    [\n                        part.apply(preprocessed_func, internal_indices=indices[i])\n                        for i, part in zip(indices, partitions_for_apply)\n                    ]\n                )\n            else:\n                # See notes in `apply_func_to_select_indices`\n                result = np.array(\n                    [\n                        (\n                            partitions_for_remaining[i]\n                            if i not in indices\n                            else partitions_for_apply[i].apply(\n                                preprocessed_func, internal_indices=indices[i]\n                            )\n                        )\n                        for i in range(len(partitions_for_remaining))\n                    ]\n                )\n        return result.T if not axis else result\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def apply_func_to_indices_both_axis(\n        cls,\n        partitions,\n        func,\n        row_partitions_list,\n        col_partitions_list,\n        item_to_distribute=no_default,\n        row_lengths=None,\n        col_widths=None,\n    ):\n        \"\"\"\n        Apply a function along both axes.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            The partitions to which the `func` will apply.\n        func : callable\n            The function to apply.\n        row_partitions_list : iterable of tuples\n            Iterable of tuples, containing 2 values:\n                1. Integer row partition index.\n                2. Internal row indexer of this partition.\n        col_partitions_list : iterable of tuples\n            Iterable of tuples, containing 2 values:\n                1. Integer column partition index.\n                2. Internal column indexer of this partition.\n        item_to_distribute : np.ndarray or scalar, default: no_default\n            The item to split up so it can be applied over both axes.\n        row_lengths : list of ints, optional\n            Lengths of partitions for every row. If not specified this information\n            is extracted from partitions itself.\n        col_widths : list of ints, optional\n            Widths of partitions for every column. If not specified this information\n            is extracted from partitions itself.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy array with partitions.\n\n        Notes\n        -----\n        For your func to operate directly on the indices provided,\n        it must use `row_internal_indices`, `col_internal_indices` as keyword\n        arguments.\n        \"\"\"\n        partition_copy = partitions.copy()\n        row_position_counter = 0\n\n        if row_lengths is None:\n            row_lengths = [None] * len(row_partitions_list)\n        if col_widths is None:\n            col_widths = [None] * len(col_partitions_list)\n\n        def compute_part_size(indexer, remote_part, part_idx, axis):\n            \"\"\"Compute indexer length along the specified axis for the passed partition.\"\"\"\n            if isinstance(indexer, slice):\n                shapes_container = row_lengths if axis == 0 else col_widths\n                part_size = shapes_container[part_idx]\n                if part_size is None:\n                    part_size = (\n                        remote_part.length() if axis == 0 else remote_part.width()\n                    )\n                    shapes_container[part_idx] = part_size\n                indexer = range(*indexer.indices(part_size))\n            return len(indexer)\n\n        for row_idx, row_values in enumerate(row_partitions_list):\n            row_blk_idx, row_internal_idx = row_values\n            col_position_counter = 0\n            row_offset = 0\n            for col_idx, col_values in enumerate(col_partitions_list):\n                col_blk_idx, col_internal_idx = col_values\n                remote_part = partition_copy[row_blk_idx, col_blk_idx]\n\n                row_offset = compute_part_size(\n                    row_internal_idx, remote_part, row_idx, axis=0\n                )\n                col_offset = compute_part_size(\n                    col_internal_idx, remote_part, col_idx, axis=1\n                )\n\n                if item_to_distribute is not no_default:\n                    if isinstance(item_to_distribute, np.ndarray):\n                        item = item_to_distribute[\n                            row_position_counter : row_position_counter + row_offset,\n                            col_position_counter : col_position_counter + col_offset,\n                        ]\n                    else:\n                        item = item_to_distribute\n                    item = {\"item\": item}\n                else:\n                    item = {}\n                block_result = remote_part.add_to_apply_calls(\n                    func,\n                    row_internal_indices=row_internal_idx,\n                    col_internal_indices=col_internal_idx,\n                    **item,\n                )\n                partition_copy[row_blk_idx, col_blk_idx] = block_result\n                col_position_counter += col_offset\n            row_position_counter += row_offset\n        return partition_copy\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def n_ary_operation(cls, left, func, right: list):\n        r\"\"\"\n        Apply an n-ary operation to multiple ``PandasDataframe`` objects.\n\n        This method assumes that all the partitions of the dataframes in left\n        and right have the same dimensions. For each position i, j in each\n        dataframe's partitions, the result has a partition at (i, j) whose data\n        is func(left_partitions[i,j], \\*each_right_partitions[i,j]).\n\n        Parameters\n        ----------\n        left : np.ndarray\n            The partitions of left ``PandasDataframe``.\n        func : callable\n            The function to apply.\n        right : list of np.ndarray\n            The list of partitions of other ``PandasDataframe``.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy array with new partitions.\n        \"\"\"\n        func = cls.preprocess_func(func)\n\n        def get_right_block(right_partitions, row_idx, col_idx):\n            partition = right_partitions[row_idx][col_idx]\n            blocks = partition.list_of_blocks\n            \"\"\"\n            NOTE:\n            Currently we do one remote call per right virtual partition to\n            materialize the partitions' blocks, then another remote call to do\n            the n_ary operation. we could get better performance if we\n            assembled the other partition within the remote `apply` call, by\n            passing the partition in as `other_axis_partition`. However,\n            passing `other_axis_partition` requires some extra care that would\n            complicate the code quite a bit:\n            - block partitions don't know how to deal with `other_axis_partition`\n            - the right axis partition's axis could be different from the axis\n              of the corresponding left partition\n            - there can be multiple other_axis_partition because this is an n-ary\n              operation and n can be > 2.\n            So for now just do the materialization in a separate remote step.\n            \"\"\"\n            if len(blocks) > 1:\n                partition.force_materialization()\n            assert len(partition.list_of_blocks) == 1\n            return partition.list_of_blocks[0]\n\n        return np.array(\n            [\n                [\n                    part.apply(\n                        func,\n                        *(\n                            get_right_block(right_partitions, row_idx, col_idx)\n                            for right_partitions in right\n                        ),\n                    )\n                    for col_idx, part in enumerate(left[row_idx])\n                ]\n                for row_idx in range(len(left))\n            ]\n        )\n\n    @classmethod\n    def finalize(cls, partitions):\n        \"\"\"\n        Perform all deferred calls on partitions.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            Partitions of Modin Dataframe on which all deferred calls should be performed.\n        \"\"\"\n        [part.drain_call_queue() for row in partitions for part in row]\n\n    @classmethod\n    def rebalance_partitions(cls, partitions):\n        \"\"\"\n        Rebalance a 2-d array of partitions if we are using ``PandasOnRay`` or ``PandasOnDask`` executions.\n\n        For all other executions, the partitions are returned unchanged.\n\n        Rebalance the partitions by building a new array\n        of partitions out of the original ones so that:\n\n        - If all partitions have a length, each new partition has roughly the same number of rows.\n        - Otherwise, each new partition spans roughly the same number of old partitions.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            The 2-d array of partitions to rebalance.\n\n        Returns\n        -------\n        np.ndarray\n            A NumPy array with the same; or new, rebalanced, partitions, depending on the execution\n            engine and storage format.\n        list[int] or None\n            Row lengths if possible to compute it.\n        \"\"\"\n        # We rebalance when the ratio of the number of existing partitions to\n        # the ideal number of partitions is larger than this threshold. The\n        # threshold is a heuristic that may need to be tuned for performance.\n        max_excess_of_num_partitions = 1.5\n        num_existing_partitions = partitions.shape[0]\n        ideal_num_new_partitions = NPartitions.get()\n        if (\n            num_existing_partitions\n            <= ideal_num_new_partitions * max_excess_of_num_partitions\n        ):\n            return partitions, None\n        # If any partition has an unknown length, give each axis partition\n        # roughly the same number of row partitions. We use `_length_cache` here\n        # to avoid materializing any unmaterialized lengths.\n        if any(\n            partition._length_cache is None for row in partitions for partition in row\n        ):\n            # We need each partition to go into an axis partition, but the\n            # number of axis partitions may not evenly divide the number of\n            # partitions.\n            chunk_size = compute_chunksize(\n                num_existing_partitions, ideal_num_new_partitions, min_block_size=1\n            )\n            new_partitions = np.array(\n                [\n                    cls.column_partitions(\n                        partitions[i : i + chunk_size],\n                        full_axis=False,\n                    )\n                    for i in range(\n                        0,\n                        num_existing_partitions,\n                        chunk_size,\n                    )\n                ]\n            )\n            return new_partitions, None\n\n        # If we know the number of rows in every partition, then we should try\n        # instead to give each new partition roughly the same number of rows.\n        new_partitions = []\n        # `start` is the index of the first existing partition that we want to\n        # put into the current new partition.\n        start = 0\n        total_rows = sum(part.length() for part in partitions[:, 0])\n        ideal_partition_size = compute_chunksize(\n            total_rows, ideal_num_new_partitions, min_block_size=1\n        )\n        for _ in range(ideal_num_new_partitions):\n            # We might pick up old partitions too quickly and exhaust all of them.\n            if start >= len(partitions):\n                break\n            # `stop` is the index of the last existing partition so far that we\n            # want to put into the current new partition.\n            stop = start\n            partition_size = partitions[start][0].length()\n            # Add existing partitions into the current new partition until the\n            # number of rows in the new partition hits `ideal_partition_size`.\n            while stop < len(partitions) and partition_size < ideal_partition_size:\n                stop += 1\n                if stop < len(partitions):\n                    partition_size += partitions[stop][0].length()\n            # If the new partition is larger than we want, split the last\n            # current partition that it contains into two partitions, where\n            # the first partition has just enough rows to make the current\n            # new partition have length `ideal_partition_size`, and the second\n            # partition has the remainder.\n            if partition_size > ideal_partition_size * max_excess_of_num_partitions:\n                prev_length = sum(row[0].length() for row in partitions[start:stop])\n                new_last_partition_size = ideal_partition_size - prev_length\n                partitions = np.insert(\n                    partitions,\n                    stop + 1,\n                    [\n                        obj.mask(slice(new_last_partition_size, None), slice(None))\n                        for obj in partitions[stop]\n                    ],\n                    0,\n                )\n                # TODO: explicit `_length_cache` computing may be avoided after #4903 is merged\n                for obj in partitions[stop + 1]:\n                    obj._length_cache = partition_size - (\n                        prev_length + new_last_partition_size\n                    )\n\n                partitions[stop, :] = [\n                    obj.mask(slice(None, new_last_partition_size), slice(None))\n                    for obj in partitions[stop]\n                ]\n                # TODO: explicit `_length_cache` computing may be avoided after #4903 is merged\n                for obj in partitions[stop]:\n                    obj._length_cache = new_last_partition_size\n\n            # The new virtual partitions are not `full_axis`, even if they\n            # happen to span all rows in the dataframe, because they are\n            # meant to be the final partitions of the dataframe. They've\n            # already been split up correctly along axis 0, but using the\n            # default full_axis=True would cause partition.apply() to split\n            # its result along axis 0.\n            new_partitions.append(\n                cls.column_partitions(partitions[start : stop + 1], full_axis=False)\n            )\n            start = stop + 1\n        new_partitions = np.array(new_partitions)\n        lengths = [part.length() for part in new_partitions[:, 0]]\n        return new_partitions, lengths\n\n    @classmethod\n    @wait_computations_if_benchmark_mode\n    def shuffle_partitions(\n        cls,\n        partitions,\n        index,\n        shuffle_functions: \"ShuffleFunctions\",\n        final_shuffle_func,\n        right_partitions=None,\n    ):\n        \"\"\"\n        Return shuffled partitions.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            The 2-d array of partitions to shuffle.\n        index : int or list of ints\n            The index(es) of the column partitions corresponding to the partitions that contain the column to sample.\n        shuffle_functions : ShuffleFunctions\n            An object implementing the functions that we will be using to perform this shuffle.\n        final_shuffle_func : Callable(pandas.DataFrame) -> pandas.DataFrame\n            Function that shuffles the data within each new partition.\n        right_partitions : np.ndarray, optional\n            Partitions to broadcast to `self` partitions. If specified, the method builds range-partitioning\n            for `right_partitions` basing on bins calculated for `partitions`, then performs broadcasting.\n\n        Returns\n        -------\n        np.ndarray\n            A list of row-partitions that have been shuffled.\n        \"\"\"\n        # Mask the partition that contains the column that will be sampled.\n        masked_partitions = partitions[:, index]\n        # Sample each partition\n        sample_func = cls.preprocess_func(shuffle_functions.sample_fn)\n        if masked_partitions.ndim == 1:\n            samples = [partition.apply(sample_func) for partition in masked_partitions]\n        else:\n            samples = [\n                cls._row_partition_class(row_part, full_axis=False).apply(sample_func)\n                for row_part in masked_partitions\n            ]\n        # Get each sample to pass in to the pivot function\n        samples = cls.get_objects_from_partitions(samples)\n        num_bins = shuffle_functions.pivot_fn(samples)\n        # Convert our list of block partitions to row partitions. We need to create full-axis\n        # row partitions since we need to send the whole partition to the split step as otherwise\n        # we wouldn't know how to split the block partitions that don't contain the shuffling key.\n        row_partitions = cls.row_partitions(partitions)\n        if num_bins > 1:\n            # Gather together all of the sub-partitions\n            split_row_partitions = np.array(\n                [\n                    partition.split(\n                        shuffle_functions.split_fn,\n                        num_splits=num_bins,\n                        # The partition's metadata will never be accessed for the split partitions,\n                        # thus no need to compute it.\n                        extract_metadata=False,\n                    )\n                    for partition in row_partitions\n                ]\n            ).T\n\n            if right_partitions is None:\n                # We need to convert every partition that came from the splits into a column partition.\n                return np.array(\n                    [\n                        [\n                            cls._column_partitions_class(\n                                row_partition, full_axis=False\n                            ).apply(final_shuffle_func)\n                        ]\n                        for row_partition in split_row_partitions\n                    ]\n                )\n\n            right_row_parts = cls.row_partitions(right_partitions)\n            right_split_row_partitions = np.array(\n                [\n                    partition.split(\n                        shuffle_functions.split_fn,\n                        num_splits=num_bins,\n                        extract_metadata=False,\n                    )\n                    for partition in right_row_parts\n                ]\n            ).T\n            return np.array(\n                [\n                    cls._column_partitions_class(row_partition, full_axis=False).apply(\n                        final_shuffle_func,\n                        other_axis_partition=cls._column_partitions_class(\n                            right_row_partitions\n                        ),\n                    )\n                    for right_row_partitions, row_partition in zip(\n                        right_split_row_partitions, split_row_partitions\n                    )\n                ]\n            )\n\n        else:\n            # If there are not pivots we can simply apply the function row-wise\n            if right_partitions is None:\n                return np.array(\n                    [row_part.apply(final_shuffle_func) for row_part in row_partitions]\n                )\n            right_row_parts = cls.row_partitions(right_partitions)\n            return np.array(\n                [\n                    row_part.apply(\n                        final_shuffle_func, other_axis_partition=right_row_part\n                    )\n                    for right_row_part, row_part in zip(right_row_parts, row_partitions)\n                ]\n            )\n"
  },
  {
    "path": "modin/core/dataframe/pandas/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\n\"\"\"Collection of utility functions for the PandasDataFrame.\"\"\"\n\nimport pandas\nfrom pandas.api.types import union_categoricals\n\nfrom modin.error_message import ErrorMessage\n\n\ndef concatenate(dfs, copy=True):\n    \"\"\"\n    Concatenate pandas DataFrames with saving 'category' dtype.\n\n    All dataframes' columns must be equal to each other.\n\n    Parameters\n    ----------\n    dfs : list\n        List of pandas DataFrames to concatenate.\n    copy : bool, default: True\n        Make explicit copy when creating dataframe.\n\n    Returns\n    -------\n    pandas.DataFrame\n        A pandas DataFrame.\n    \"\"\"\n    for df in dfs:\n        assert df.columns.equals(dfs[0].columns)\n    for i in dfs[0].columns.get_indexer_for(dfs[0].select_dtypes(\"category\").columns):\n        columns = [df.iloc[:, i] for df in dfs]\n        all_categorical_parts_are_empty = None\n        has_non_categorical_parts = False\n        for col in columns:\n            if isinstance(col.dtype, pandas.CategoricalDtype):\n                if all_categorical_parts_are_empty is None:\n                    all_categorical_parts_are_empty = len(col) == 0\n                    continue\n                all_categorical_parts_are_empty &= len(col) == 0\n            else:\n                has_non_categorical_parts = True\n        # 'union_categoricals' raises an error if some of the passed values don't have categorical dtype,\n        # if it happens, we only want to continue when all parts with categorical dtypes are actually empty.\n        # This can happen if there were an aggregation that discards categorical dtypes and that aggregation\n        # doesn't properly do so for empty partitions\n        if has_non_categorical_parts and all_categorical_parts_are_empty:\n            continue\n        union = union_categoricals(columns)\n        for df in dfs:\n            df.isetitem(\n                i, pandas.Categorical(df.iloc[:, i], categories=union.categories)\n            )\n    # `ValueError: buffer source array is read-only` if copy==False\n    if len(dfs) == 1 and copy:\n        # concat doesn't make a copy if len(dfs) == 1,\n        # so do it explicitly\n        return dfs[0].copy()\n    return pandas.concat(dfs, copy=copy)\n\n\ndef create_pandas_df_from_partitions(\n    partition_data,\n    partition_shape,\n    called_from_remote=False,\n    new_index=None,\n    new_columns=None,\n):\n    \"\"\"\n    Convert partition data of multiple dataframes to a single dataframe.\n\n    Parameters\n    ----------\n    partition_data : list\n        List of pandas DataFrames or list of Object references holding pandas DataFrames.\n    partition_shape : int or tuple\n        Shape of the partitions NumPy array.\n    called_from_remote : bool, default: False\n        Flag used to check if explicit copy should be done in concat.\n    new_index : pandas.Index, optional\n        Index for propagation into internal partitions.\n        Optimization allowing to do this in one remote kernel.\n    new_columns : pandas.Index, optional\n        Columns for propagation into internal partitions.\n        Optimization allowing to do this in one remote kernel.\n\n    Returns\n    -------\n    pandas.DataFrame\n        A pandas DataFrame.\n    \"\"\"\n    if all(\n        isinstance(obj, (pandas.DataFrame, pandas.Series)) for obj in partition_data\n    ):\n        height, width, *_ = tuple(partition_shape) + (0,)\n        # restore 2d array\n        objs = iter(partition_data)\n        partition_data = [[next(objs) for _ in range(width)] for __ in range(height)]\n    else:\n        # Partitions do not always contain pandas objects.\n        # This implementation comes from the fact that calling `partition.get`\n        # function is not always equivalent to `partition.to_pandas`.\n        partition_data = [[obj.to_pandas() for obj in part] for part in partition_data]\n    if all(isinstance(part, pandas.Series) for row in partition_data for part in row):\n        axis = 0\n    elif all(\n        isinstance(part, pandas.DataFrame) for row in partition_data for part in row\n    ):\n        axis = 1\n    else:\n        ErrorMessage.catch_bugs_and_request_email(True)\n\n    def is_part_empty(part):\n        return part.empty and (\n            not isinstance(part, pandas.DataFrame) or (len(part.columns) == 0)\n        )\n\n    df_rows = [\n        pandas.concat([part for part in row], axis=axis, copy=False)\n        for row in partition_data\n        if not all(is_part_empty(part) for part in row)\n    ]\n\n    # to reduce peak memory consumption\n    del partition_data\n\n    if len(df_rows) == 0:\n        res = pandas.DataFrame()\n    else:\n        res = concatenate(df_rows, copy=not called_from_remote)\n\n    if new_index is not None:\n        res.index = new_index\n    if new_columns is not None:\n        res.columns = new_columns\n\n    return res\n"
  },
  {
    "path": "modin/core/execution/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to execution engines supported.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/dask/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Dask execution engine.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/dask/common/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Common utilities for Dask execution engine.\"\"\"\n\nfrom .engine_wrapper import DaskWrapper\nfrom .utils import initialize_dask\n\n__all__ = [\n    \"initialize_dask\",\n    \"DaskWrapper\",\n]\n"
  },
  {
    "path": "modin/core/execution/dask/common/engine_wrapper.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class responsible for execution of remote operations.\"\"\"\n\nfrom collections import UserDict\n\nimport pandas\nfrom dask.distributed import wait\nfrom distributed import Future\nfrom distributed.client import default_client\nfrom distributed.worker import get_worker\n\n\ndef get_dask_client():\n    \"\"\"\n    Get the Dask client, reusing the worker's client if execution is on a Dask worker.\n\n    Returns\n    -------\n    distributed.Client\n        The Dask client.\n    \"\"\"\n    try:\n        client = default_client()\n    except ValueError:\n        # We ought to be in a worker process\n        worker = get_worker()\n        client = worker.client\n    return client\n\n\ndef _deploy_dask_func(func, *args, return_pandas_df=None, **kwargs):  # pragma: no cover\n    \"\"\"\n    Wrap `func` to ease calling it remotely.\n\n    Parameters\n    ----------\n    func : callable\n        A local function that we want to call remotely.\n    *args : iterable\n        Positional arguments to pass to `func` when calling remotely.\n    return_pandas_df : bool, optional\n        Whether to convert the result of `func` to a pandas DataFrame or not.\n    **kwargs : dict\n        Keyword arguments to pass to `func` when calling remotely.\n\n    Returns\n    -------\n    distributed.Future or list\n        Dask identifier of the result being put into distributed memory.\n    \"\"\"\n    result = func(*args, **kwargs)\n    if return_pandas_df and not isinstance(result, pandas.DataFrame):\n        result = pandas.DataFrame(result)\n    return result\n\n\nclass DaskWrapper:\n    \"\"\"The class responsible for execution of remote operations.\"\"\"\n\n    @classmethod\n    def deploy(\n        cls,\n        func,\n        f_args=None,\n        f_kwargs=None,\n        return_pandas_df=None,\n        num_returns=1,\n        pure=True,\n    ):\n        \"\"\"\n        Deploy a function in a worker process.\n\n        Parameters\n        ----------\n        func : callable or distributed.Future\n            Function to be deployed in a worker process.\n        f_args : list or tuple, optional\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict, optional\n            Keyword arguments to pass to ``func``.\n        return_pandas_df : bool, optional\n            Whether to convert the result of `func` to a pandas DataFrame or not.\n        num_returns : int, default: 1\n            The number of returned objects.\n        pure : bool, default: True\n            Whether or not `func` is pure. See `Client.submit` for details.\n\n        Returns\n        -------\n        list\n            The result of ``func`` split into parts in accordance with ``num_returns``.\n        \"\"\"\n        client = get_dask_client()\n        args = [] if f_args is None else f_args\n        kwargs = {} if f_kwargs is None else f_kwargs\n        if callable(func):\n            remote_task_future = client.submit(func, *args, pure=pure, **kwargs)\n        else:\n            # for the case where type(func) is distributed.Future\n            remote_task_future = client.submit(\n                _deploy_dask_func,\n                func,\n                *args,\n                pure=pure,\n                return_pandas_df=return_pandas_df,\n                **kwargs,\n            )\n        if num_returns != 1:\n            return [\n                client.submit(lambda tup, i: tup[i], remote_task_future, i)\n                for i in range(num_returns)\n            ]\n        return remote_task_future\n\n    @classmethod\n    def is_future(cls, item):\n        \"\"\"\n        Check if the item is a Future.\n\n        Parameters\n        ----------\n        item : distributed.Future or object\n            Future or object to check.\n\n        Returns\n        -------\n        boolean\n            If the value is a future.\n        \"\"\"\n        return isinstance(item, Future)\n\n    @classmethod\n    def materialize(cls, future):\n        \"\"\"\n        Materialize data matching `future` object.\n\n        Parameters\n        ----------\n        future : distributed.Future or list\n            Future object of list of future objects whereby data needs to be materialized.\n\n        Returns\n        -------\n        Any\n            An object(s) from the distributed memory.\n        \"\"\"\n        client = get_dask_client()\n        return client.gather(future)\n\n    @classmethod\n    def put(cls, data, **kwargs):\n        \"\"\"\n        Put data into distributed memory.\n\n        Parameters\n        ----------\n        data : list, dict, or object\n            Data to scatter out to workers. Output type matches input type.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `Client.scatter`.\n\n        Returns\n        -------\n        List, dict, iterator, or queue of futures matching the type of input.\n        \"\"\"\n        if isinstance(data, dict):\n            # there is a bug that looks similar to https://github.com/dask/distributed/issues/3965;\n            # to avoid this we could change behaviour for serialization:\n            # <Future: finished, type: collections.UserDict, key: UserDict-b8a15c164319c1d32fd28481125de455>\n            # vs\n            # {'sep': <Future: finished, type: pandas._libs.lib._NoDefault, key: sep>, \\\n            #  'delimiter': <Future: finished, type: NoneType, key: delimiter> ...\n            data = UserDict(data)\n        client = get_dask_client()\n        return client.scatter(data, **kwargs)\n\n    @classmethod\n    def wait(cls, obj_ids, num_returns=None):\n        \"\"\"\n        Wait on the objects without materializing them (blocking operation).\n\n        Parameters\n        ----------\n        obj_ids : list, scalar\n        num_returns : int, optional\n        \"\"\"\n        if not isinstance(obj_ids, list):\n            obj_ids = [obj_ids]\n        if num_returns is None:\n            num_returns = len(obj_ids)\n        if num_returns == len(obj_ids):\n            wait(obj_ids, return_when=\"ALL_COMPLETED\")\n        else:\n            # Dask doesn't natively support `num_returns` as int.\n            # `wait` function doesn't always return only one finished future,\n            # so a simple loop is not enough here\n            done, not_done = wait(obj_ids, return_when=\"FIRST_COMPLETED\")\n            while len(done) < num_returns and (i := 0 < num_returns):\n                extra_done, not_done = wait(not_done, return_when=\"FIRST_COMPLETED\")\n                done.update(extra_done)\n                i += 1\n"
  },
  {
    "path": "modin/core/execution/dask/common/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses utility function to initialize Dask environment.\"\"\"\n\nimport os\n\nfrom modin.config import (\n    CIAWSAccessKeyID,\n    CIAWSSecretAccessKey,\n    CpuCount,\n    DaskThreadsPerWorker,\n    GithubCI,\n    Memory,\n    NPartitions,\n)\nfrom modin.core.execution.utils import set_env\n\n\ndef initialize_dask():\n    \"\"\"Initialize Dask environment.\"\"\"\n    from distributed.client import default_client\n    from distributed.worker import get_worker\n\n    try:\n        # Check if running within a Dask worker process\n        get_worker()\n        # If the above line does not raise an error, we are in a worker process\n        # and should not create a new client\n        return\n    except ValueError:\n        # Not in a Dask worker, proceed to check for or create a client\n        pass\n\n    try:\n        client = default_client()\n\n        def _disable_warnings():\n            import warnings\n\n            warnings.simplefilter(\"ignore\", category=FutureWarning)\n\n        client.run(_disable_warnings)\n\n    except ValueError:\n        from distributed import Client\n\n        num_cpus = CpuCount.get()\n        threads_per_worker = DaskThreadsPerWorker.get()\n        memory_limit = Memory.get()\n        worker_memory_limit = memory_limit // num_cpus if memory_limit else \"auto\"\n\n        # when the client is initialized, environment variables are inherited\n        with set_env(PYTHONWARNINGS=\"ignore::FutureWarning\"):\n            client = Client(\n                n_workers=num_cpus,\n                threads_per_worker=threads_per_worker,\n                memory_limit=worker_memory_limit,\n            )\n\n        if GithubCI.get():\n            # set these keys to run tests that write to the mock s3 service. this seems\n            # to be the way to pass environment variables to the workers:\n            # https://jacobtomlinson.dev/posts/2021/bio-for-2021/\n            access_key = CIAWSAccessKeyID.get()\n            aws_secret = CIAWSSecretAccessKey.get()\n            client.run(\n                lambda: os.environ.update(\n                    {\n                        \"AWS_ACCESS_KEY_ID\": access_key,\n                        \"AWS_SECRET_ACCESS_KEY\": aws_secret,\n                    }\n                )\n            )\n\n    num_cpus = len(client.ncores())\n    NPartitions._put(num_cpus)\n    CpuCount._put(num_cpus)\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Dask execution engine and optimized for specific storage formats.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Dask execution engine and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe class optimized for pandas on Dask execution.\"\"\"\n\nfrom .dataframe import PandasOnDaskDataframe\n\n__all__ = [\"PandasOnDaskDataframe\"]\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/dataframe/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that implements ``PandasDataframe``.\"\"\"\n\nfrom modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\nfrom modin.utils import _inherit_docstrings\n\nfrom ..partitioning.partition_manager import PandasOnDaskDataframePartitionManager\n\n\nclass PandasOnDaskDataframe(PandasDataframe):\n    \"\"\"\n    The class implements the interface in ``PandasDataframe``.\n\n    Parameters\n    ----------\n    partitions : np.ndarray\n        A 2D NumPy array of partitions.\n    index : sequence\n        The index for the dataframe. Converted to a pandas.Index.\n    columns : sequence\n        The columns object for the dataframe. Converted to a pandas.Index.\n    row_lengths : list, optional\n        The length of each partition in the rows. The \"height\" of\n        each of the block partitions. Is computed if not provided.\n    column_widths : list, optional\n        The width of each partition in the columns. The \"width\" of\n        each of the block partitions. Is computed if not provided.\n    dtypes : pandas.Series, optional\n        The data types for the dataframe columns.\n    pandas_backend : {\"pyarrow\", None}, optional\n        Backend used by pandas. None - means default NumPy backend.\n    \"\"\"\n\n    _partition_mgr_cls = PandasOnDaskDataframePartitionManager\n\n    @classmethod\n    def reconnect(cls, address, attributes):  # noqa: GL08\n        # The main goal is to configure the client for the worker process\n        # using the address passed by the custom `__reduce__` function\n        try:\n            from distributed import default_client\n\n            default_client()\n        except ValueError:\n            from distributed import Client\n\n            # setup `default_client` for worker process\n            _ = Client(address)\n        obj = cls.__new__(cls)\n        obj.__dict__.update(attributes)\n        return obj\n\n    def __reduce__(self):  # noqa: GL08\n        from distributed import default_client\n\n        address = default_client().scheduler_info()[\"address\"]\n        return self.reconnect, (address, self.__dict__)\n\n    @property\n    @_inherit_docstrings(PandasDataframe.engine)\n    def engine(self) -> str:\n        return \"Dask\"\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base IO classes optimized for pandas on Dask execution.\"\"\"\n\nfrom .io import PandasOnDaskIO\n\n__all__ = [\n    \"PandasOnDaskIO\",\n]\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/io/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that implements ``BaseIO`` using Dask as an execution engine.\"\"\"\n\nimport numpy as np\nfrom distributed.client import default_client\n\nfrom modin.core.execution.dask.common import DaskWrapper\nfrom modin.core.execution.dask.implementations.pandas_on_dask.dataframe import (\n    PandasOnDaskDataframe,\n)\nfrom modin.core.execution.dask.implementations.pandas_on_dask.partitioning import (\n    PandasOnDaskDataframePartition,\n)\nfrom modin.core.io import (\n    BaseIO,\n    CSVDispatcher,\n    ExcelDispatcher,\n    FeatherDispatcher,\n    FWFDispatcher,\n    JSONDispatcher,\n    ParquetDispatcher,\n    SQLDispatcher,\n)\nfrom modin.core.storage_formats.pandas.parsers import (\n    PandasCSVParser,\n    PandasExcelParser,\n    PandasFeatherParser,\n    PandasFWFParser,\n    PandasJSONParser,\n    PandasParquetParser,\n    PandasSQLParser,\n)\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\nfrom modin.distributed.dataframe.pandas.partitions import (\n    from_partitions,\n    unwrap_partitions,\n)\nfrom modin.experimental.core.io import (\n    ExperimentalCSVGlobDispatcher,\n    ExperimentalCustomTextDispatcher,\n    ExperimentalGlobDispatcher,\n    ExperimentalSQLDispatcher,\n)\nfrom modin.experimental.core.storage_formats.pandas.parsers import (\n    ExperimentalCustomTextParser,\n    ExperimentalPandasCSVGlobParser,\n    ExperimentalPandasJsonParser,\n    ExperimentalPandasParquetParser,\n    ExperimentalPandasPickleParser,\n    ExperimentalPandasXmlParser,\n)\nfrom modin.pandas.series import Series\nfrom modin.utils import MODIN_UNNAMED_SERIES_LABEL\n\n\nclass PandasOnDaskIO(BaseIO):\n    \"\"\"The class implements interface in ``BaseIO`` using Dask as an execution engine.\"\"\"\n\n    frame_cls = PandasOnDaskDataframe\n    frame_partition_cls = PandasOnDaskDataframePartition\n    query_compiler_cls = PandasQueryCompiler\n    build_args = dict(\n        frame_cls=PandasOnDaskDataframe,\n        frame_partition_cls=PandasOnDaskDataframePartition,\n        query_compiler_cls=PandasQueryCompiler,\n        base_io=BaseIO,\n    )\n\n    def __make_read(*classes, build_args=build_args):\n        # used to reduce code duplication\n        return type(\"\", (DaskWrapper, *classes), build_args).read\n\n    def __make_write(*classes, build_args=build_args):\n        # used to reduce code duplication\n        return type(\"\", (DaskWrapper, *classes), build_args).write\n\n    read_csv = __make_read(PandasCSVParser, CSVDispatcher)\n    read_fwf = __make_read(PandasFWFParser, FWFDispatcher)\n    read_json = __make_read(PandasJSONParser, JSONDispatcher)\n    read_parquet = __make_read(PandasParquetParser, ParquetDispatcher)\n    to_parquet = __make_write(ParquetDispatcher)\n    # Blocked on pandas-dev/pandas#12236. It is faster to default to pandas.\n    # read_hdf = __make_read(PandasHDFParser, HDFReader)\n    read_feather = __make_read(PandasFeatherParser, FeatherDispatcher)\n    read_sql = __make_read(PandasSQLParser, SQLDispatcher)\n    to_sql = __make_write(SQLDispatcher)\n    read_excel = __make_read(PandasExcelParser, ExcelDispatcher)\n\n    # experimental methods that don't exist in pandas\n    read_csv_glob = __make_read(\n        ExperimentalPandasCSVGlobParser, ExperimentalCSVGlobDispatcher\n    )\n    read_parquet_glob = __make_read(\n        ExperimentalPandasParquetParser, ExperimentalGlobDispatcher\n    )\n    to_parquet_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": BaseIO.to_parquet},\n    )\n    read_json_glob = __make_read(\n        ExperimentalPandasJsonParser, ExperimentalGlobDispatcher\n    )\n    to_json_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": BaseIO.to_json},\n    )\n    read_xml_glob = __make_read(ExperimentalPandasXmlParser, ExperimentalGlobDispatcher)\n    to_xml_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": BaseIO.to_xml},\n    )\n    read_pickle_glob = __make_read(\n        ExperimentalPandasPickleParser, ExperimentalGlobDispatcher\n    )\n    to_pickle_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": BaseIO.to_pickle},\n    )\n    read_custom_text = __make_read(\n        ExperimentalCustomTextParser, ExperimentalCustomTextDispatcher\n    )\n    read_sql_distributed = __make_read(\n        ExperimentalSQLDispatcher, build_args={**build_args, \"base_read\": read_sql}\n    )\n\n    del __make_read  # to not pollute class namespace\n    del __make_write  # to not pollute class namespace\n\n    @classmethod\n    def from_dask(cls, dask_obj):\n        \"\"\"\n        Create a Modin `query_compiler` from a Dask DataFrame.\n\n        Parameters\n        ----------\n        dask_obj : dask.dataframe.DataFrame\n            The Dask DataFrame to convert from.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the Dask DataFrame.\n        \"\"\"\n        client = default_client()\n        dask_fututures = client.compute(dask_obj.to_delayed())\n        modin_df = from_partitions(dask_fututures, axis=0)._query_compiler\n        return modin_df\n\n    @classmethod\n    def to_dask(cls, modin_obj):\n        \"\"\"\n        Convert a Modin DataFrame/Series to a Dask DataFrame/Series.\n\n        Parameters\n        ----------\n        modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n            The Modin DataFrame/Series to convert.\n\n        Returns\n        -------\n        dask.dataframe.DataFrame or dask.dataframe.Series\n            Converted object with type depending on input.\n        \"\"\"\n        from dask.dataframe import from_delayed\n\n        partitions = unwrap_partitions(modin_obj, axis=0)\n\n        # partiotions must be converted to pandas Series\n        if isinstance(modin_obj, Series):\n            client = default_client()\n\n            def df_to_series(df):\n                series = df[df.columns[0]]\n                if df.columns[0] == MODIN_UNNAMED_SERIES_LABEL:\n                    series.name = None\n                return series\n\n            partitions = [client.submit(df_to_series, part) for part in partitions]\n\n        return from_delayed(partitions)\n\n    @classmethod\n    def from_map(cls, func, iterable, *args, **kwargs):\n        \"\"\"\n        Create a Modin `query_compiler` from a map function.\n\n        This method will construct a Modin `query_compiler` split by row partitions.\n        The number of row partitions matches the number of elements in the iterable object.\n\n        Parameters\n        ----------\n        func : callable\n            Function to map across the iterable object.\n        iterable : Iterable\n            An iterable object.\n        *args : tuple\n            Positional arguments to pass in `func`.\n        **kwargs : dict\n            Keyword arguments to pass in `func`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data returned by map function.\n        \"\"\"\n        func = cls.frame_cls._partition_mgr_cls.preprocess_func(func)\n        partitions = np.array(\n            [\n                [\n                    cls.frame_partition_cls(\n                        DaskWrapper.deploy(\n                            func,\n                            f_args=(obj,) + args,\n                            f_kwargs=kwargs,\n                            return_pandas_df=True,\n                        )\n                    )\n                ]\n                for obj in iterable\n            ]\n        )\n        return cls.query_compiler_cls(cls.frame_cls(partitions))\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes related to its partitioning and optimized for pandas on Dask execution.\"\"\"\n\nfrom .partition import PandasOnDaskDataframePartition\nfrom .partition_manager import PandasOnDaskDataframePartitionManager\nfrom .virtual_partition import (\n    PandasOnDaskDataframeColumnPartition,\n    PandasOnDaskDataframeRowPartition,\n    PandasOnDaskDataframeVirtualPartition,\n)\n\n__all__ = [\n    \"PandasOnDaskDataframePartition\",\n    \"PandasOnDaskDataframePartitionManager\",\n    \"PandasOnDaskDataframeVirtualPartition\",\n    \"PandasOnDaskDataframeColumnPartition\",\n    \"PandasOnDaskDataframeRowPartition\",\n]\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that wraps data (block partition) and its metadata.\"\"\"\n\nimport pandas\nfrom distributed import Future\nfrom distributed.utils import get_ip\n\nfrom modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition\nfrom modin.core.execution.dask.common import DaskWrapper\nfrom modin.logging import get_logger\nfrom modin.pandas.indexing import compute_sliced_len\n\n\nclass PandasOnDaskDataframePartition(PandasDataframePartition):\n    \"\"\"\n    The class implements the interface in ``PandasDataframePartition``.\n\n    Parameters\n    ----------\n    data : distributed.Future\n        A reference to pandas DataFrame that need to be wrapped with this class.\n    length : distributed.Future or int, optional\n        Length or reference to it of wrapped pandas DataFrame.\n    width : distributed.Future or int, optional\n        Width or reference to it of wrapped pandas DataFrame.\n    ip : distributed.Future or str, optional\n        Node IP address or reference to it that holds wrapped pandas DataFrame.\n    call_queue : list, optional\n        Call queue that needs to be executed on wrapped pandas DataFrame.\n    \"\"\"\n\n    execution_wrapper = DaskWrapper\n\n    def __init__(self, data, length=None, width=None, ip=None, call_queue=None):\n        super().__init__()\n        assert isinstance(data, Future)\n        self._data = data\n        if call_queue is None:\n            call_queue = []\n        self.call_queue = call_queue\n        self._length_cache = length\n        self._width_cache = width\n        self._ip_cache = ip\n\n        log = get_logger()\n        self._is_debug(log) and log.debug(\n            \"Partition ID: {}, Height: {}, Width: {}, Node IP: {}\".format(\n                self._identity,\n                str(self._length_cache),\n                str(self._width_cache),\n                str(self._ip_cache),\n            )\n        )\n\n    def apply(self, func, *args, **kwargs):\n        \"\"\"\n        Apply a function to the object wrapped by this partition.\n\n        Parameters\n        ----------\n        func : callable or distributed.Future\n            A function to apply.\n        *args : iterable\n            Additional positional arguments to be passed in `func`.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `func`.\n\n        Returns\n        -------\n        PandasOnDaskDataframePartition\n            A new ``PandasOnDaskDataframePartition`` object.\n\n        Notes\n        -----\n        The keyword arguments are sent as a dictionary.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.apply::{self._identity}\")\n        call_queue = self.call_queue + [[func, args, kwargs]]\n        if len(call_queue) > 1:\n            self._is_debug(log) and log.debug(\n                f\"SUBMIT::_apply_list_of_funcs::{self._identity}\"\n            )\n            futures = self.execution_wrapper.deploy(\n                func=apply_list_of_funcs,\n                f_args=(call_queue, self._data),\n                num_returns=2,\n                pure=False,\n            )\n        else:\n            # We handle `len(call_queue) == 1` in a different way because\n            # this improves performance a bit.\n            func, f_args, f_kwargs = call_queue[0]\n            futures = self.execution_wrapper.deploy(\n                func=apply_func,\n                f_args=(self._data, func, *f_args),\n                f_kwargs=f_kwargs,\n                num_returns=2,\n                pure=False,\n            )\n            self._is_debug(log) and log.debug(f\"SUBMIT::_apply_func::{self._identity}\")\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.apply::{self._identity}\")\n        return self.__constructor__(futures[0], ip=futures[1])\n\n    def drain_call_queue(self):\n        \"\"\"Execute all operations stored in the call queue on the object wrapped by this partition.\"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(\n            f\"ENTER::Partition.drain_call_queue::{self._identity}\"\n        )\n        if len(self.call_queue) == 0:\n            return\n        call_queue = self.call_queue\n        if len(call_queue) > 1:\n            self._is_debug(log) and log.debug(\n                f\"SUBMIT::_apply_list_of_funcs::{self._identity}\"\n            )\n            futures = self.execution_wrapper.deploy(\n                func=apply_list_of_funcs,\n                f_args=(call_queue, self._data),\n                num_returns=2,\n                pure=False,\n            )\n        else:\n            # We handle `len(call_queue) == 1` in a different way because\n            # this improves performance a bit.\n            func, f_args, f_kwargs = call_queue[0]\n            self._is_debug(log) and log.debug(f\"SUBMIT::_apply_func::{self._identity}\")\n            futures = self.execution_wrapper.deploy(\n                func=apply_func,\n                f_args=(self._data, func, *f_args),\n                f_kwargs=f_kwargs,\n                num_returns=2,\n                pure=False,\n            )\n        self._data = futures[0]\n        self._ip_cache = futures[1]\n        self._is_debug(log) and log.debug(\n            f\"EXIT::Partition.drain_call_queue::{self._identity}\"\n        )\n        self.call_queue = []\n\n    def wait(self):\n        \"\"\"Wait completing computations on the object wrapped by the partition.\"\"\"\n        self.drain_call_queue()\n        self.execution_wrapper.wait(self._data)\n\n    def mask(self, row_labels, col_labels):\n        \"\"\"\n        Lazily create a mask that extracts the indices provided.\n\n        Parameters\n        ----------\n        row_labels : list-like, slice or label\n            The row labels for the rows to extract.\n        col_labels : list-like, slice or label\n            The column labels for the columns to extract.\n\n        Returns\n        -------\n        PandasOnDaskDataframePartition\n            A new ``PandasOnDaskDataframePartition`` object.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.mask::{self._identity}\")\n        new_obj = super().mask(row_labels, col_labels)\n        if isinstance(row_labels, slice) and isinstance(self._length_cache, Future):\n            if row_labels == slice(None):\n                # fast path - full axis take\n                new_obj._length_cache = self._length_cache\n            else:\n                new_obj._length_cache = self.execution_wrapper.deploy(\n                    func=compute_sliced_len, f_args=(row_labels, self._length_cache)\n                )\n        if isinstance(col_labels, slice) and isinstance(self._width_cache, Future):\n            if col_labels == slice(None):\n                # fast path - full axis take\n                new_obj._width_cache = self._width_cache\n            else:\n                new_obj._width_cache = self.execution_wrapper.deploy(\n                    func=compute_sliced_len, f_args=(col_labels, self._width_cache)\n                )\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.mask::{self._identity}\")\n        return new_obj\n\n    def __copy__(self):\n        \"\"\"\n        Create a copy of this partition.\n\n        Returns\n        -------\n        PandasOnDaskDataframePartition\n            A copy of this partition.\n        \"\"\"\n        return self.__constructor__(\n            self._data,\n            length=self._length_cache,\n            width=self._width_cache,\n            ip=self._ip_cache,\n            call_queue=self.call_queue,\n        )\n\n    @classmethod\n    def put(cls, obj):\n        \"\"\"\n        Put an object into distributed memory and wrap it with partition object.\n\n        Parameters\n        ----------\n        obj : any\n            An object to be put.\n\n        Returns\n        -------\n        PandasOnDaskDataframePartition\n            A new ``PandasOnDaskDataframePartition`` object.\n        \"\"\"\n        return cls(\n            cls.execution_wrapper.put(obj, hash=False),\n            len(obj.index),\n            len(obj.columns),\n        )\n\n    @classmethod\n    def preprocess_func(cls, func):\n        \"\"\"\n        Preprocess a function before an ``apply`` call.\n\n        Parameters\n        ----------\n        func : callable\n            The function to preprocess.\n\n        Returns\n        -------\n        callable\n            An object that can be accepted by ``apply``.\n        \"\"\"\n        return cls.execution_wrapper.put(func, hash=False, broadcast=True)\n\n    def length(self, materialize=True):\n        \"\"\"\n        Get the length of the object wrapped by this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or distributed.Future\n            The length of the object.\n        \"\"\"\n        if self._length_cache is None:\n            self._length_cache = self.apply(len)._data\n        if isinstance(self._length_cache, Future) and materialize:\n            self._length_cache = self.execution_wrapper.materialize(self._length_cache)\n        return self._length_cache\n\n    def width(self, materialize=True):\n        \"\"\"\n        Get the width of the object wrapped by the partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or distributed.Future\n            The width of the object.\n        \"\"\"\n        if self._width_cache is None:\n            self._width_cache = self.apply(lambda df: len(df.columns))._data\n        if isinstance(self._width_cache, Future) and materialize:\n            self._width_cache = self.execution_wrapper.materialize(self._width_cache)\n        return self._width_cache\n\n    def ip(self, materialize=True):\n        \"\"\"\n        Get the node IP address of the object wrapped by this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        str\n            IP address of the node that holds the data.\n        \"\"\"\n        if self._ip_cache is None:\n            self._ip_cache = self.apply(lambda df: pandas.DataFrame([]))._ip_cache\n        if materialize and isinstance(self._ip_cache, Future):\n            self._ip_cache = self.execution_wrapper.materialize(self._ip_cache)\n        return self._ip_cache\n\n\ndef apply_func(partition, func, *args, **kwargs):\n    \"\"\"\n    Execute a function on the partition in a worker process.\n\n    Parameters\n    ----------\n    partition : pandas.DataFrame\n        A pandas DataFrame the function needs to be executed on.\n    func : callable\n        The function to perform.\n    *args : list\n        Positional arguments to pass to ``func``.\n    **kwargs : dict\n        Keyword arguments to pass to ``func``.\n\n    Returns\n    -------\n    pandas.DataFrame\n        The resulting pandas DataFrame.\n    str\n        The node IP address of the worker process.\n\n    Notes\n    -----\n    Directly passing a call queue entry (i.e. a list of [func, args, kwargs]) instead of\n    destructuring it causes a performance penalty.\n    \"\"\"\n    result = func(partition, *args, **kwargs)\n    return result, get_ip()\n\n\ndef apply_list_of_funcs(call_queue, partition):\n    \"\"\"\n    Execute all operations stored in the call queue on the partition in a worker process.\n\n    Parameters\n    ----------\n    call_queue : list\n        A call queue of ``[func, args, kwargs]`` triples that needs to be executed on the partition.\n    partition : pandas.DataFrame\n        A pandas DataFrame the call queue needs to be executed on.\n\n    Returns\n    -------\n    pandas.DataFrame\n        The resulting pandas DataFrame.\n    str\n        The node IP address of the worker process.\n    \"\"\"\n    for func, f_args, f_kwargs in call_queue:\n        partition = func(partition, *f_args, **f_kwargs)\n    return partition, get_ip()\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition_manager.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that implements ``PandasDataframePartitionManager``.\"\"\"\n\nfrom modin.core.dataframe.pandas.partitioning.partition_manager import (\n    PandasDataframePartitionManager,\n)\nfrom modin.core.execution.dask.common import DaskWrapper\n\nfrom .partition import PandasOnDaskDataframePartition\nfrom .virtual_partition import (\n    PandasOnDaskDataframeColumnPartition,\n    PandasOnDaskDataframeRowPartition,\n)\n\n\nclass PandasOnDaskDataframePartitionManager(PandasDataframePartitionManager):\n    \"\"\"The class implements the interface in `PandasDataframePartitionManager`.\"\"\"\n\n    # This object uses PandasOnDaskDataframePartition objects as the underlying store.\n    _partition_class = PandasOnDaskDataframePartition\n    _column_partitions_class = PandasOnDaskDataframeColumnPartition\n    _row_partition_class = PandasOnDaskDataframeRowPartition\n    _execution_wrapper = DaskWrapper\n\n    @classmethod\n    def wait_partitions(cls, partitions):\n        \"\"\"\n        Wait on the objects wrapped by `partitions` in parallel, without materializing them.\n\n        This method will block until all computations in the list have completed.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            NumPy array with ``PandasDataframePartition``-s.\n        \"\"\"\n        cls._execution_wrapper.wait(\n            [block for partition in partitions for block in partition.list_of_blocks]\n        )\n"
  },
  {
    "path": "modin/core/execution/dask/implementations/pandas_on_dask/partitioning/virtual_partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses classes responsible for storing a virtual partition and applying a function to it.\"\"\"\n\nimport pandas\nfrom distributed.utils import get_ip\n\nfrom modin.core.dataframe.pandas.partitioning.axis_partition import (\n    PandasDataframeAxisPartition,\n)\nfrom modin.core.execution.dask.common import DaskWrapper\nfrom modin.utils import _inherit_docstrings\n\nfrom .partition import PandasOnDaskDataframePartition\n\n\nclass PandasOnDaskDataframeVirtualPartition(PandasDataframeAxisPartition):\n    \"\"\"\n    The class implements the interface in ``PandasDataframeAxisPartition``.\n\n    Parameters\n    ----------\n    list_of_partitions : Union[list, PandasOnDaskDataframePartition]\n        List of ``PandasOnDaskDataframePartition`` and\n        ``PandasOnDaskDataframeVirtualPartition`` objects, or a single\n        ``PandasOnDaskDataframePartition``.\n    get_ip : bool, default: False\n        Whether to get node IP addresses of conforming partitions or not.\n    full_axis : bool, default: True\n        Whether or not the virtual partition encompasses the whole axis.\n    call_queue : list, optional\n        A list of tuples (callable, args, kwargs) that contains deferred calls.\n    length : distributed.Future or int, optional\n        Length, or reference to length, of wrapped ``pandas.DataFrame``.\n    width : distributed.Future or int, optional\n        Width, or reference to width, of wrapped ``pandas.DataFrame``.\n    \"\"\"\n\n    axis = None\n    _PARTITIONS_METADATA_LEN = 3  # (length, width, ip)\n    partition_type = PandasOnDaskDataframePartition\n\n    @property\n    def list_of_ips(self):\n        \"\"\"\n        Get the IPs holding the physical objects composing this partition.\n\n        Returns\n        -------\n        List\n            A list of IPs as ``distributed.Future`` or str.\n        \"\"\"\n        # Defer draining call queue until we get the ip address\n        result = [None] * len(self.list_of_block_partitions)\n        for idx, partition in enumerate(self.list_of_block_partitions):\n            partition.drain_call_queue()\n            result[idx] = partition.ip(materialize=False)\n        return result\n\n    @classmethod\n    @_inherit_docstrings(PandasDataframeAxisPartition.deploy_splitting_func)\n    def deploy_splitting_func(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        *partitions,\n        extract_metadata=False,\n    ):\n        return DaskWrapper.deploy(\n            func=_deploy_dask_func,\n            f_args=(\n                PandasDataframeAxisPartition.deploy_splitting_func,\n                axis,\n                func,\n                f_args,\n                f_kwargs,\n                num_splits,\n                *partitions,\n            ),\n            f_kwargs={\"extract_metadata\": extract_metadata},\n            num_returns=(\n                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)\n                if extract_metadata\n                else num_splits\n            ),\n            pure=False,\n        )\n\n    @classmethod\n    def deploy_axis_func(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        maintain_partitioning,\n        *partitions,\n        min_block_size,\n        lengths=None,\n        manual_partition=False,\n    ):\n        \"\"\"\n        Deploy a function along a full axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see `split_result_of_axis_func_pandas`).\n        maintain_partitioning : bool\n            If True, keep the old partitioning if possible.\n            If False, create a new partition layout.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column).\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n        lengths : iterable, default: None\n            The list of lengths to shuffle the partition into.\n        manual_partition : bool, default: False\n            If True, partition the result with `lengths`.\n\n        Returns\n        -------\n        list\n            A list of distributed.Future.\n        \"\"\"\n        result_num_splits = len(lengths) if lengths else num_splits\n        return DaskWrapper.deploy(\n            func=_deploy_dask_func,\n            f_args=(\n                PandasDataframeAxisPartition.deploy_axis_func,\n                axis,\n                func,\n                f_args,\n                f_kwargs,\n                num_splits,\n                maintain_partitioning,\n                *partitions,\n            ),\n            f_kwargs={\n                \"min_block_size\": min_block_size,\n                \"lengths\": lengths,\n                \"manual_partition\": manual_partition,\n            },\n            num_returns=result_num_splits * (1 + cls._PARTITIONS_METADATA_LEN),\n            pure=False,\n        )\n\n    @classmethod\n    def deploy_func_between_two_axis_partitions(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        len_of_left,\n        other_shape,\n        *partitions,\n        min_block_size,\n    ):\n        \"\"\"\n        Deploy a function along a full axis between two data sets.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see `split_result_of_axis_func_pandas`).\n        len_of_left : int\n            The number of values in `partitions` that belong to the left data set.\n        other_shape : np.ndarray\n            The shape of right frame in terms of partitions, i.e.\n            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column) for both data sets.\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n\n        Returns\n        -------\n        list\n            A list of distributed.Future.\n        \"\"\"\n        return DaskWrapper.deploy(\n            func=_deploy_dask_func,\n            f_args=(\n                PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions,\n                axis,\n                func,\n                f_args,\n                f_kwargs,\n                num_splits,\n                len_of_left,\n                other_shape,\n                *partitions,\n            ),\n            f_kwargs={\n                \"min_block_size\": min_block_size,\n            },\n            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN),\n            pure=False,\n        )\n\n    def wait(self):\n        \"\"\"Wait completing computations on the object wrapped by the partition.\"\"\"\n        self.drain_call_queue()\n        DaskWrapper.wait(self.list_of_blocks)\n\n\n@_inherit_docstrings(PandasOnDaskDataframeVirtualPartition)\nclass PandasOnDaskDataframeColumnPartition(PandasOnDaskDataframeVirtualPartition):\n    axis = 0\n\n\n@_inherit_docstrings(PandasOnDaskDataframeVirtualPartition)\nclass PandasOnDaskDataframeRowPartition(PandasOnDaskDataframeVirtualPartition):\n    axis = 1\n\n\ndef _deploy_dask_func(\n    deployer,\n    axis,\n    f_to_deploy,\n    f_args,\n    f_kwargs,\n    *args,\n    extract_metadata=True,\n    **kwargs,\n):\n    \"\"\"\n    Execute a function on an axis partition in a worker process.\n\n    This is ALWAYS called on either ``PandasDataframeAxisPartition.deploy_axis_func``\n    or ``PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions``, which both\n    serve to deploy another dataframe function on a Dask worker process.\n\n    Parameters\n    ----------\n    deployer : callable\n        A `PandasDataFrameAxisPartition.deploy_*` method that will call `deploy_f`.\n    axis : {0, 1}\n        The axis to perform the function along.\n    f_to_deploy : callable or RayObjectID\n        The function to deploy.\n    f_args : list or tuple\n        Positional arguments to pass to ``f_to_deploy``.\n    f_kwargs : dict\n        Keyword arguments to pass to ``f_to_deploy``.\n    *args : list\n        Positional arguments to pass to ``func``.\n    extract_metadata : bool, default: True\n        Whether to return metadata (length, width, ip) of the result. Passing `False` may relax\n        the load on object storage as the remote function would return 4 times fewer futures.\n        Passing `False` makes sense for temporary results where you know for sure that the\n        metadata will never be requested.\n    **kwargs : dict\n        Keyword arguments to pass to ``func``.\n\n    Returns\n    -------\n    list\n        The result of the function ``func`` and metadata for it.\n    \"\"\"\n    result = deployer(axis, f_to_deploy, f_args, f_kwargs, *args, **kwargs)\n    if not extract_metadata:\n        return result\n    ip = get_ip()\n    if isinstance(result, pandas.DataFrame):\n        return result, len(result), len(result.columns), ip\n    elif all(isinstance(r, pandas.DataFrame) for r in result):\n        return [i for r in result for i in [r, len(r), len(r.columns), ip]]\n    else:\n        return [i for r in result for i in [r, None, None, ip]]\n"
  },
  {
    "path": "modin/core/execution/dispatching/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to dispatching to specific execution.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/dispatching/factories/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Factories responsible for dispatching to specific execution.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/dispatching/factories/dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nContain IO dispatcher class.\n\nDispatcher routes the work to execution-specific functions.\n\"\"\"\n\nfrom typing import Union\n\nfrom pandas._libs.lib import NoDefault, no_default\n\nfrom modin.config import Backend, Engine, IsExperimental, StorageFormat\nfrom modin.core.execution.dispatching.factories import factories\nfrom modin.core.storage_formats.base import BaseQueryCompiler\nfrom modin.utils import _inherit_docstrings\n\n\nclass FactoryNotFoundError(AttributeError):\n    \"\"\"\n    ``FactoryNotFound`` exception class.\n\n    Raise when no matching factory could be found.\n    \"\"\"\n\n    pass\n\n\nclass StubIoEngine(object):\n    \"\"\"\n    IO-Engine that does nothing more than raise NotImplementedError when any method is called.\n\n    Parameters\n    ----------\n    factory_name : str\n        Factory name, which will be reflected in error messages.\n\n    Notes\n    -----\n    Used for testing purposes.\n    \"\"\"\n\n    def __init__(self, factory_name=\"\"):\n        self.factory_name = factory_name or \"Unknown\"\n\n    def __getattr__(self, name):\n        \"\"\"\n        Return a function that raises `NotImplementedError` for the `name` method.\n\n        Parameters\n        ----------\n        name : str\n            Method name to indicate in `NotImplementedError`.\n\n        Returns\n        -------\n        callable\n        \"\"\"\n\n        def stub(*args, **kw):\n            raise NotImplementedError(\n                f\"Method {self.factory_name}.{name} is not implemented\"\n            )\n\n        return stub\n\n\nclass StubFactory(factories.BaseFactory):\n    \"\"\"\n    Factory that does nothing more than raise NotImplementedError when any method is called.\n\n    Notes\n    -----\n    Used for testing purposes.\n    \"\"\"\n\n    io_cls = StubIoEngine()\n\n    @classmethod\n    def set_failing_name(cls, factory_name):\n        \"\"\"\n        Fill in `.io_cls` class attribute with ``StubIoEngine`` engine.\n\n        Parameters\n        ----------\n        factory_name : str\n            Name to pass to the ``StubIoEngine`` constructor.\n        \"\"\"\n        cls.io_cls = StubIoEngine(factory_name)\n        return cls\n\n\nclass FactoryDispatcher(object):\n    \"\"\"\n    Class that routes IO-work to the factories.\n\n    This class is responsible for keeping selected factory up-to-date and dispatching\n    calls of IO-functions to its actual execution-specific implementations.\n    \"\"\"\n\n    __factory: factories.BaseFactory = None\n\n    @classmethod\n    def get_factory(cls) -> factories.BaseFactory:\n        \"\"\"Get current factory.\"\"\"\n        if cls.__factory is None:\n\n            from modin.pandas import _initialize_engine\n\n            Engine.subscribe(\n                lambda engine_parameter: _initialize_engine(engine_parameter.get())\n            )\n            Backend.subscribe(cls._update_factory)\n        return_value = cls.__factory\n        return return_value\n\n    @classmethod\n    def _get_prepared_factory_for_backend(cls, backend) -> factories.BaseFactory:\n        \"\"\"\n        Get factory for the specified backend.\n\n        Parameters\n        ----------\n        backend : str\n            Backend name.\n\n        Returns\n        -------\n        factories.BaseFactory\n            Factory for the specified backend.\n        \"\"\"\n        execution = Backend.get_execution_for_backend(backend)\n        from modin.pandas import _initialize_engine\n\n        _initialize_engine(execution.engine)\n        factory_name = f\"{execution.storage_format}On{execution.engine}Factory\"\n        experimental_factory_name = \"Experimental\" + factory_name\n        try:\n            factory = getattr(factories, factory_name, None) or getattr(\n                factories, experimental_factory_name\n            )\n        except AttributeError:\n            if not IsExperimental.get():\n                # allow missing factories in experimental mode only\n                msg = (\n                    \"Cannot find neither factory {} nor experimental factory {}. \"\n                    + \"Potential reason might be incorrect environment variable value for \"\n                    + f\"{StorageFormat.varname} or {Engine.varname}\"\n                )\n                raise FactoryNotFoundError(\n                    msg.format(factory_name, experimental_factory_name)\n                )\n            factory = StubFactory.set_failing_name(factory_name)\n        else:\n            try:\n                factory.prepare()\n            except ModuleNotFoundError as err:\n                raise ModuleNotFoundError(\n                    f\"Make sure all required packages are installed: {str(err)}\"\n                ) from err\n        return factory\n\n    @classmethod\n    def _update_factory(cls, *args):\n        \"\"\"\n        Update and prepare factory with a new one specified via Modin config.\n\n        Parameters\n        ----------\n        *args : iterable\n            This parameters serves the compatibility purpose.\n            Does not affect the result.\n        \"\"\"\n        cls.__factory = cls._get_prepared_factory_for_backend(Backend.get())\n\n    @classmethod\n    def from_pandas(\n        cls, df, backend: Union[str, NoDefault] = no_default\n    ) -> BaseQueryCompiler:\n        \"\"\"\n        Create a Modin query compiler from a pandas DataFrame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            The pandas DataFrame to convert.\n        backend : str or NoDefault, default: NoDefault\n            The backend to use for the resulting query compiler. If NoDefault,\n            use the current global default ``Backend`` from the Modin config.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            A Modin query compiler that wraps the input pandas DataFrame.\n        \"\"\"\n        return (\n            cls.get_factory()\n            if backend is no_default\n            else cls._get_prepared_factory_for_backend(backend)\n        )._from_pandas(df)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._from_arrow)\n    def from_arrow(cls, at):\n        return cls.get_factory()._from_arrow(at)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._from_non_pandas)\n    def from_non_pandas(cls, *args, **kwargs):\n        return cls.get_factory()._from_non_pandas(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._from_interchange_dataframe)\n    def from_interchange_dataframe(cls, *args, **kwargs):\n        return cls.get_factory()._from_interchange_dataframe(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._from_ray)\n    def from_ray(cls, ray_obj):\n        return cls.get_factory()._from_ray(ray_obj)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._from_dask)\n    def from_dask(cls, dask_obj):\n        return cls.get_factory()._from_dask(dask_obj)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._from_map)\n    def from_map(cls, func, iterable, *args, **kwargs):\n        return cls.get_factory()._from_map(func, iterable, *args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_parquet)\n    def read_parquet(cls, **kwargs):\n        return cls.get_factory()._read_parquet(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_csv)\n    def read_csv(cls, **kwargs):\n        return cls.get_factory()._read_csv(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._read_csv_glob)\n    def read_csv_glob(cls, **kwargs):\n        return cls.get_factory()._read_csv_glob(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._read_pickle_glob)\n    def read_pickle_glob(cls, **kwargs):\n        return cls.get_factory()._read_pickle_glob(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_json)\n    def read_json(cls, **kwargs):\n        return cls.get_factory()._read_json(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_gbq)\n    def read_gbq(cls, **kwargs):\n        return cls.get_factory()._read_gbq(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_html)\n    def read_html(cls, **kwargs):\n        return cls.get_factory()._read_html(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_clipboard)\n    def read_clipboard(cls, **kwargs):\n        return cls.get_factory()._read_clipboard(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_excel)\n    def read_excel(cls, **kwargs):\n        return cls.get_factory()._read_excel(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_hdf)\n    def read_hdf(cls, **kwargs):\n        return cls.get_factory()._read_hdf(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_feather)\n    def read_feather(cls, **kwargs):\n        return cls.get_factory()._read_feather(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_stata)\n    def read_stata(cls, **kwargs):\n        return cls.get_factory()._read_stata(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_sas)\n    def read_sas(cls, **kwargs):  # pragma: no cover\n        return cls.get_factory()._read_sas(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_pickle)\n    def read_pickle(cls, **kwargs):\n        return cls.get_factory()._read_pickle(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_sql)\n    def read_sql(cls, **kwargs):\n        return cls.get_factory()._read_sql(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._read_sql_distributed)\n    def read_sql_distributed(cls, **kwargs):\n        return cls.get_factory()._read_sql_distributed(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_fwf)\n    def read_fwf(cls, **kwargs):\n        return cls.get_factory()._read_fwf(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_sql_table)\n    def read_sql_table(cls, **kwargs):\n        return cls.get_factory()._read_sql_table(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_sql_query)\n    def read_sql_query(cls, **kwargs):\n        return cls.get_factory()._read_sql_query(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._read_spss)\n    def read_spss(cls, **kwargs):\n        return cls.get_factory()._read_spss(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_sql)\n    def to_sql(cls, *args, **kwargs):\n        return cls.get_factory()._to_sql(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_pickle)\n    def to_pickle(cls, *args, **kwargs):\n        return cls.get_factory()._to_pickle(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._to_pickle_glob)\n    def to_pickle_glob(cls, *args, **kwargs):\n        return cls.get_factory()._to_pickle_glob(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._read_parquet_glob)\n    def read_parquet_glob(cls, *args, **kwargs):\n        return cls.get_factory()._read_parquet_glob(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._to_parquet_glob)\n    def to_parquet_glob(cls, *args, **kwargs):\n        return cls.get_factory()._to_parquet_glob(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._read_json_glob)\n    def read_json_glob(cls, *args, **kwargs):\n        return cls.get_factory()._read_json_glob(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._to_json_glob)\n    def to_json_glob(cls, *args, **kwargs):\n        return cls.get_factory()._to_json_glob(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._read_xml_glob)\n    def read_xml_glob(cls, *args, **kwargs):\n        return cls.get_factory()._read_xml_glob(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._to_xml_glob)\n    def to_xml_glob(cls, *args, **kwargs):\n        return cls.get_factory()._to_xml_glob(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.PandasOnRayFactory._read_custom_text)\n    def read_custom_text(cls, **kwargs):\n        return cls.get_factory()._read_custom_text(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_csv)\n    def to_csv(cls, *args, **kwargs):\n        return cls.get_factory()._to_csv(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_json)\n    def to_json(cls, *args, **kwargs):\n        return cls.get_factory()._to_json(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_json)\n    def to_json_series(cls, *args, **kwargs):\n        return cls.get_factory()._to_json_series(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_xml)\n    def to_xml(cls, *args, **kwargs):\n        return cls.get_factory()._to_xml(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_parquet)\n    def to_parquet(cls, *args, **kwargs):\n        return cls.get_factory()._to_parquet(*args, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_ray)\n    def to_ray(cls, modin_obj):\n        return cls.get_factory()._to_ray(modin_obj)\n\n    @classmethod\n    @_inherit_docstrings(factories.BaseFactory._to_dask)\n    def to_dask(cls, modin_obj):\n        return cls.get_factory()._to_dask(modin_obj)\n"
  },
  {
    "path": "modin/core/execution/dispatching/factories/factories.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains Factories for all of the supported Modin executions.\n\nFactory is a bridge between calls of IO function from high-level API and its\nactual implementation in the execution, bound to that factory. Each execution is represented\nwith a Factory class.\n\"\"\"\n\nimport re\nimport typing\nimport warnings\n\nimport pandas\nfrom pandas.util._decorators import doc\n\nfrom modin.core.io import BaseIO\nfrom modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler\nfrom modin.utils import get_current_execution\n\n_doc_abstract_factory_class = \"\"\"\nAbstract {role} factory which allows to override the IO module easily.\n\nThis class is responsible for dispatching calls of IO-functions to its\nactual execution-specific implementations.\n\nAttributes\n----------\nio_cls : BaseIO\n    IO module class of the underlying execution. The place to dispatch calls to.\n\"\"\"\n\n_doc_factory_class = \"\"\"\nFactory of {execution_name} execution.\n\nThis class is responsible for dispatching calls of IO-functions to its\nactual execution-specific implementations.\n\nAttributes\n----------\nio_cls : {execution_name}IO\n    IO module class of the underlying execution. The place to dispatch calls to.\n\"\"\"\n\n_doc_factory_prepare_method = \"\"\"\nInitialize Factory.\n\nFills in `.io_cls` class attribute with {io_module_name} lazily.\n\"\"\"\n\n_doc_io_method_raw_template = \"\"\"\nBuild query compiler from {source}.\n\nParameters\n----------\n{params}\n\nReturns\n-------\nQueryCompiler\n    Query compiler of the selected storage format.\n\"\"\"\n\n_doc_io_method_template = (\n    _doc_io_method_raw_template\n    + \"\"\"\nSee Also\n--------\nmodin.pandas.{method}\n\"\"\"\n)\n\n_doc_io_method_all_params = \"\"\"*args : args\n    Arguments to pass to the QueryCompiler builder method.\n**kwargs : kwargs\n    Arguments to pass to the QueryCompiler builder method.\"\"\"\n\n_doc_io_method_kwargs_params = \"\"\"**kwargs : kwargs\n    Arguments to pass to the QueryCompiler builder method.\"\"\"\n\n\ntypes_dictionary = {\"pandas\": {\"category\": pandas.CategoricalDtype}}\n\nsupported_executions = (\n    \"PandasOnRay\",\n    \"PandasOnUnidist\",\n    \"PandasOnDask\",\n)\n\n\nclass FactoryInfo(typing.NamedTuple):\n    \"\"\"\n    Structure that stores information about factory.\n\n    Parameters\n    ----------\n    engine : str\n        Name of underlying execution engine.\n    partition : str\n        Name of the partition format.\n    experimental : bool\n        Whether underlying engine is experimental-only.\n    \"\"\"\n\n    engine: str\n    partition: str\n    experimental: bool\n\n\nclass NotRealFactory(Exception):\n    \"\"\"\n    ``NotRealFactory`` exception class.\n\n    Raise when no matching factory could be found.\n    \"\"\"\n\n    pass\n\n\n@doc(_doc_abstract_factory_class, role=\"\")\nclass BaseFactory(object):\n    io_cls: typing.Type[BaseIO] = None  # The module where the I/O functionality exists.\n\n    @classmethod\n    def get_info(cls) -> FactoryInfo:\n        \"\"\"\n        Get information about current factory.\n\n        Notes\n        -----\n        It parses factory name, so it must be conformant with how ``FactoryDispatcher``\n        class constructs factory names.\n        \"\"\"\n        try:\n            experimental, partition, engine = re.match(\n                r\"^(Experimental)?(.*)On(.*)Factory$\", cls.__name__\n            ).groups()\n        except AttributeError:\n            raise NotRealFactory()\n        return FactoryInfo(\n            engine=engine, partition=partition, experimental=bool(experimental)\n        )\n\n    @classmethod\n    @doc(\n        _doc_factory_prepare_method,\n        io_module_name=\"an underlying execution's IO-module\",\n    )\n    def prepare(cls):\n        raise NotImplementedError(\"Subclasses of BaseFactory must implement prepare\")\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"pandas DataFrame\",\n        params=\"df : pandas.DataFrame\",\n        method=\"io.from_pandas\",\n    )\n    def _from_pandas(cls, df):\n        return cls.io_cls.from_pandas(df)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"Arrow Table\",\n        params=\"at : pyarrow.Table\",\n        method=\"io.from_arrow\",\n    )\n    def _from_arrow(cls, at):\n        return cls.io_cls.from_arrow(at)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a non-pandas object (dict, list, np.array etc...)\",\n        params=_doc_io_method_all_params,\n        method=\"io.from_non_pandas\",\n    )\n    def _from_non_pandas(cls, *args, **kwargs):\n        return cls.io_cls.from_non_pandas(*args, **kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a DataFrame object supporting exchange protocol `__dataframe__()`\",\n        params=_doc_io_method_all_params,\n        method=\"io.from_interchange_dataframe\",\n    )\n    def _from_interchange_dataframe(cls, *args, **kwargs):\n        return cls.io_cls.from_interchange_dataframe(*args, **kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a Ray Dataset\",\n        params=\"ray_obj : ray.data.Dataset\",\n        method=\"modin.core.execution.ray.implementations.pandas_on_ray.io.PandasOnRayIO.from_ray\",\n    )\n    def _from_ray(cls, ray_obj):\n        return cls.io_cls.from_ray(ray_obj)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a Dask DataFrame\",\n        params=\"dask_obj : dask.dataframe.DataFrame\",\n        method=\"modin.core.execution.dask.implementations.pandas_on_dask.io.PandasOnDaskIO.from_dask\",\n    )\n    def _from_dask(cls, dask_obj):\n        return cls.io_cls.from_dask(dask_obj)\n\n    @classmethod\n    def _from_map(cls, func, iterable, *args, **kwargs):\n        \"\"\"\n        Create a Modin `query_compiler` from a map function.\n\n        This method will construct a Modin `query_compiler` split by row partitions.\n        The number of row partitions matches the number of elements in the iterable object.\n\n        Parameters\n        ----------\n        func : callable\n            Function to map across the iterable object.\n        iterable : Iterable\n            An iterable object.\n        *args : tuple\n            Positional arguments to pass in `func`.\n        **kwargs : dict\n            Keyword arguments to pass in `func`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data returned by map function.\n        \"\"\"\n        return cls.io_cls.from_map(func, iterable, *args, **kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a Parquet file\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_parquet\",\n    )\n    def _read_parquet(cls, **kwargs):\n        return cls.io_cls.read_parquet(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a CSV file\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_csv\",\n    )\n    def _read_csv(cls, **kwargs):\n        return cls.io_cls.read_csv(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a JSON file\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_json\",\n    )\n    def _read_json(cls, **kwargs):\n        return cls.io_cls.read_json(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a Google BigQuery\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_gbq\",\n    )\n    def _read_gbq(cls, **kwargs):\n        return cls.io_cls.read_gbq(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"an HTML document\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_html\",\n    )\n    def _read_html(cls, **kwargs):\n        return cls.io_cls.read_html(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"clipboard\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_clipboard\",\n    )\n    def _read_clipboard(cls, **kwargs):  # pragma: no cover\n        return cls.io_cls.read_clipboard(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"an Excel file\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_excel\",\n    )\n    def _read_excel(cls, **kwargs):\n        return cls.io_cls.read_excel(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"an HDFStore\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_hdf\",\n    )\n    def _read_hdf(cls, **kwargs):\n        return cls.io_cls.read_hdf(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a feather-format object\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_feather\",\n    )\n    def _read_feather(cls, **kwargs):\n        return cls.io_cls.read_feather(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a Stata file\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_stata\",\n    )\n    def _read_stata(cls, **kwargs):\n        return cls.io_cls.read_stata(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a SAS file\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_sas\",\n    )\n    def _read_sas(cls, **kwargs):  # pragma: no cover\n        return cls.io_cls.read_sas(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a pickled Modin or pandas DataFrame\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_pickle\",\n    )\n    def _read_pickle(cls, **kwargs):\n        return cls.io_cls.read_pickle(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a SQL query or database table\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_sql\",\n    )\n    def _read_sql(cls, **kwargs):\n        return cls.io_cls.read_sql(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a table of fixed-width formatted lines\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_fwf\",\n    )\n    def _read_fwf(cls, **kwargs):\n        return cls.io_cls.read_fwf(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a SQL database table\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_sql_table\",\n    )\n    def _read_sql_table(cls, **kwargs):\n        return cls.io_cls.read_sql_table(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"a SQL query\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_sql_query\",\n    )\n    def _read_sql_query(cls, **kwargs):\n        return cls.io_cls.read_sql_query(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_template,\n        source=\"an SPSS file\",\n        params=_doc_io_method_kwargs_params,\n        method=\"read_spss\",\n    )\n    def _read_spss(cls, **kwargs):\n        return cls.io_cls.read_spss(**kwargs)\n\n    @classmethod\n    def _to_sql(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to a SQL database.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to the writer method.\n        **kwargs : kwargs\n            Arguments to the writer method.\n        \"\"\"\n        return cls.io_cls.to_sql(*args, **kwargs)\n\n    @classmethod\n    def _to_pickle(cls, *args, **kwargs):\n        \"\"\"\n        Pickle query compiler object.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to the writer method.\n        **kwargs : kwargs\n            Arguments to the writer method.\n        \"\"\"\n        return cls.io_cls.to_pickle(*args, **kwargs)\n\n    @classmethod\n    def _to_csv(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to a CSV file.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        return cls.io_cls.to_csv(*args, **kwargs)\n\n    @classmethod\n    def _to_json(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to a JSON file.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        return cls.io_cls.to_json(*args, **kwargs)\n\n    @classmethod\n    def _to_json_series(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content of a Series to a JSON file.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        return cls.io_cls.to_json_series(*args, **kwargs)\n\n    @classmethod\n    def _to_xml(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to a XML file.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        return cls.io_cls.to_xml(*args, **kwargs)\n\n    @classmethod\n    def _to_parquet(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to a parquet file.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        return cls.io_cls.to_parquet(*args, **kwargs)\n\n    @classmethod\n    def _to_ray(cls, modin_obj):\n        \"\"\"\n        Write query compiler content to a Ray Dataset.\n\n        Parameters\n        ----------\n        modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n            The Modin DataFrame/Series to write.\n\n        Returns\n        -------\n        ray.data.Dataset\n            A Ray Dataset object.\n\n        Notes\n        -----\n        Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.\n        \"\"\"\n        return cls.io_cls.to_ray(modin_obj)\n\n    @classmethod\n    def _to_dask(cls, modin_obj):\n        \"\"\"\n        Write query compiler content to a Dask DataFrame/Series.\n\n        Parameters\n        ----------\n        modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n            The Modin DataFrame/Series to write.\n\n        Returns\n        -------\n        dask.dataframe.DataFrame or dask.dataframe.Series\n            A Dask DataFrame/Series object.\n\n        Notes\n        -----\n        Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.\n        \"\"\"\n        return cls.io_cls.to_dask(modin_obj)\n\n    # experimental methods that don't exist in pandas\n    @classmethod\n    @doc(\n        _doc_io_method_raw_template,\n        source=\"CSV files\",\n        params=_doc_io_method_kwargs_params,\n    )\n    def _read_csv_glob(cls, **kwargs):\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_read_csv_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.read_csv_glob(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_raw_template,\n        source=\"Pickle files\",\n        params=_doc_io_method_kwargs_params,\n    )\n    def _read_pickle_glob(cls, **kwargs):\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_read_pickle_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.read_pickle_glob(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_raw_template,\n        source=\"SQL files\",\n        params=_doc_io_method_kwargs_params,\n    )\n    def _read_sql_distributed(cls, **kwargs):\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            extra_parameters = (\n                \"partition_column\",\n                \"lower_bound\",\n                \"upper_bound\",\n                \"max_sessions\",\n            )\n            if any(\n                param in kwargs and kwargs[param] is not None\n                for param in extra_parameters\n            ):\n                warnings.warn(\n                    f\"Distributed read_sql() was only implemented for {', '.join(supported_executions)} executions.\"\n                )\n            for param in extra_parameters:\n                del kwargs[param]\n            return cls.io_cls.read_sql(**kwargs)\n        return cls.io_cls.read_sql_distributed(**kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_raw_template,\n        source=\"Custom text files\",\n        params=_doc_io_method_kwargs_params,\n    )\n    def _read_custom_text(cls, **kwargs):\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_read_custom_text()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.read_custom_text(**kwargs)\n\n    @classmethod\n    def _to_pickle_glob(cls, *args, **kwargs):\n        \"\"\"\n        Distributed pickle query compiler object.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to the writer method.\n        **kwargs : kwargs\n            Arguments to the writer method.\n        \"\"\"\n        # TODO(https://github.com/modin-project/modin/issues/7429): Use\n        # frame-level execution instead of the global, default execution.\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_to_pickle_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.to_pickle_glob(*args, **kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_raw_template,\n        source=\"Parquet files\",\n        params=_doc_io_method_kwargs_params,\n    )\n    def _read_parquet_glob(cls, **kwargs):\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_read_parquet_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.read_parquet_glob(**kwargs)\n\n    @classmethod\n    def _to_parquet_glob(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to several parquet files.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_to_parquet_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.to_parquet_glob(*args, **kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_raw_template,\n        source=\"Json files\",\n        params=_doc_io_method_kwargs_params,\n    )\n    def _read_json_glob(cls, **kwargs):\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_read_json_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.read_json_glob(**kwargs)\n\n    @classmethod\n    def _to_json_glob(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to several json files.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_to_json_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.to_json_glob(*args, **kwargs)\n\n    @classmethod\n    @doc(\n        _doc_io_method_raw_template,\n        source=\"XML files\",\n        params=_doc_io_method_kwargs_params,\n    )\n    def _read_xml_glob(cls, **kwargs):\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_read_xml_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.read_xml_glob(**kwargs)\n\n    @classmethod\n    def _to_xml_glob(cls, *args, **kwargs):\n        \"\"\"\n        Write query compiler content to several XML files.\n\n        Parameters\n        ----------\n        *args : args\n            Arguments to pass to the writer method.\n        **kwargs : kwargs\n            Arguments to pass to the writer method.\n        \"\"\"\n        current_execution = get_current_execution()\n        if current_execution not in supported_executions:\n            raise NotImplementedError(\n                f\"`_to_xml_glob()` is not implemented for {current_execution} execution.\"\n            )\n        return cls.io_cls.to_xml_glob(*args, **kwargs)\n\n\n@doc(_doc_factory_class, execution_name=\"PandasOnRay\")\nclass PandasOnRayFactory(BaseFactory):\n    @classmethod\n    @doc(_doc_factory_prepare_method, io_module_name=\"``PandasOnRayIO``\")\n    def prepare(cls):\n        from modin.core.execution.ray.implementations.pandas_on_ray.io import (\n            PandasOnRayIO,\n        )\n\n        cls.io_cls = PandasOnRayIO\n\n\n@doc(_doc_factory_class, execution_name=\"PandasOnPython\")\nclass PandasOnPythonFactory(BaseFactory):\n    @classmethod\n    @doc(_doc_factory_prepare_method, io_module_name=\"``PandasOnPythonIO``\")\n    def prepare(cls):\n        from modin.core.execution.python.implementations.pandas_on_python.io import (\n            PandasOnPythonIO,\n        )\n\n        cls.io_cls = PandasOnPythonIO\n\n\n@doc(_doc_factory_class, execution_name=\"PandasOnDask\")\nclass PandasOnDaskFactory(BaseFactory):\n    @classmethod\n    @doc(_doc_factory_prepare_method, io_module_name=\"``PandasOnDaskIO``\")\n    def prepare(cls):\n        from modin.core.execution.dask.implementations.pandas_on_dask.io import (\n            PandasOnDaskIO,\n        )\n\n        cls.io_cls = PandasOnDaskIO\n\n\n@doc(_doc_factory_class, execution_name=\"PandasOnUnidist\")\nclass PandasOnUnidistFactory(BaseFactory):\n    @classmethod\n    @doc(_doc_factory_prepare_method, io_module_name=\"``PandasOnUnidistIO``\")\n    def prepare(cls):\n        from modin.core.execution.unidist.implementations.pandas_on_unidist.io import (\n            PandasOnUnidistIO,\n        )\n\n        cls.io_cls = PandasOnUnidistIO\n\n\nclass NativeIO(BaseIO):\n    \"\"\"\n    I/O class for native pandas execution.\n\n    This class inherits the default function implementations from the\n    ``BaseIO`` parent class.\n    \"\"\"\n\n    _should_warn_on_default_to_pandas: bool = False\n    query_compiler_cls = NativeQueryCompiler\n\n\n@doc(_doc_factory_class, execution_name=\"NativeOnNative\")\nclass NativeOnNativeFactory(BaseFactory):\n\n    @classmethod\n    @doc(_doc_factory_prepare_method, io_module_name=\"`NativeIO`\")\n    def prepare(cls):\n        cls.io_cls = NativeIO\n"
  },
  {
    "path": "modin/core/execution/modin_aqp.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nThe module for working with displaying progress bars for Modin execution engines.\n\nModin Automatic Query Progress (AQP).\n\"\"\"\n\nimport inspect\nimport os\nimport threading\nimport time\nimport warnings\n\nfrom modin.config import Engine, ProgressBar\n\nprogress_bars = {}\nbar_lock = threading.Lock()\n\n\ndef call_progress_bar(result_parts, line_no):\n    \"\"\"\n    Attach a progress bar to given `result_parts`.\n\n    The progress bar is expected to be shown in a Jupyter Notebook cell.\n\n    Parameters\n    ----------\n    result_parts : list of list of object refs (futures)\n        Objects which are being computed for which progress is requested.\n    line_no : int\n        Line number in the call stack which we're displaying progress for.\n    \"\"\"\n    with warnings.catch_warnings():\n        warnings.simplefilter(\"ignore\")\n        try:\n            from tqdm.autonotebook import tqdm as tqdm_notebook\n        except ImportError:\n            raise ImportError(\"Please pip install tqdm to use the progress bar\")\n        from IPython import get_ipython\n\n    try:\n        cell_no = get_ipython().execution_count\n    # This happens if we are not in ipython or jupyter.\n    # No progress bar is supported in that case.\n    except AttributeError:\n        return\n    pbar_id = f\"{cell_no}-{line_no}\"\n    futures = [\n        block\n        for row in result_parts\n        for partition in row\n        for block in partition.list_of_blocks\n    ]\n    bar_format = (\n        \"{l_bar}{bar}{r_bar}\"\n        if \"DEBUG_PROGRESS_BAR\" in os.environ\n        and os.environ[\"DEBUG_PROGRESS_BAR\"] == \"True\"\n        else \"{desc}: {percentage:3.0f}%{bar} Elapsed time: {elapsed}, estimated remaining time: {remaining}\"\n    )\n    bar_lock.acquire()\n    if pbar_id in progress_bars:\n        if hasattr(progress_bars[pbar_id], \"container\"):\n            if hasattr(progress_bars[pbar_id].container.children[0], \"max\"):\n                index = 0\n            else:\n                index = 1\n            progress_bars[pbar_id].container.children[index].max = progress_bars[\n                pbar_id\n            ].container.children[index].max + len(futures)\n        progress_bars[pbar_id].total = progress_bars[pbar_id].total + len(futures)\n        progress_bars[pbar_id].refresh()\n    else:\n        progress_bars[pbar_id] = tqdm_notebook(\n            total=len(futures),\n            desc=\"Estimated completion of line \" + str(line_no),\n            bar_format=bar_format,\n        )\n    bar_lock.release()\n\n    threading.Thread(target=_show_time_updates, args=(progress_bars[pbar_id],)).start()\n\n    # TODO(https://github.com/modin-project/modin/issues/7429): Use\n    # frame-level engine config.\n    modin_engine = Engine.get()\n    engine_wrapper = None\n    if modin_engine == \"Ray\":\n        from modin.core.execution.ray.common.engine_wrapper import RayWrapper\n\n        engine_wrapper = RayWrapper\n    elif modin_engine == \"Unidist\":\n        from modin.core.execution.unidist.common.engine_wrapper import UnidistWrapper\n\n        engine_wrapper = UnidistWrapper\n    else:\n        raise NotImplementedError(\n            f\"ProgressBar feature is not supported for {modin_engine} engine.\"\n        )\n\n    for i in range(1, len(futures) + 1):\n        engine_wrapper.wait(futures, num_returns=i)\n        progress_bars[pbar_id].update(1)\n        progress_bars[pbar_id].refresh()\n    if progress_bars[pbar_id].n == progress_bars[pbar_id].total:\n        progress_bars[pbar_id].close()\n\n\ndef display_time_updates(bar):\n    \"\"\"\n    Start displaying the progress `bar` in a notebook.\n\n    Parameters\n    ----------\n    bar : tqdm.tqdm\n        The progress bar wrapper to display in a notebook cell.\n    \"\"\"\n    threading.Thread(target=_show_time_updates, args=(bar,)).start()\n\n\ndef _show_time_updates(p_bar):\n    \"\"\"\n    Refresh displayed progress bar `p_bar` periodically until it is complete.\n\n    Parameters\n    ----------\n    p_bar : tqdm.tqdm\n        The progress bar wrapper being displayed to refresh.\n    \"\"\"\n    while p_bar.total > p_bar.n:\n        time.sleep(1)\n        if p_bar.total > p_bar.n:\n            p_bar.refresh()\n\n\ndef progress_bar_wrapper(f):\n    \"\"\"\n    Wrap computation function inside a progress bar.\n\n    Spawns another thread which displays a progress bar showing\n    estimated completion time.\n\n    Parameters\n    ----------\n    f : callable\n        The name of the function to be wrapped.\n\n    Returns\n    -------\n    callable\n        Decorated version of `f` which reports progress.\n    \"\"\"\n    from functools import wraps\n\n    @wraps(f)\n    def magic(*args, **kwargs):\n        result_parts = f(*args, **kwargs)\n        if ProgressBar.get():\n            current_frame = inspect.currentframe()\n            function_name = None\n            while function_name != \"<module>\":\n                (\n                    filename,\n                    line_number,\n                    function_name,\n                    lines,\n                    index,\n                ) = inspect.getframeinfo(current_frame)\n                current_frame = current_frame.f_back\n            t = threading.Thread(\n                target=call_progress_bar,\n                args=(result_parts, line_number),\n            )\n            t.start()\n            # We need to know whether or not we are in a jupyter notebook\n            from IPython import get_ipython\n\n            try:\n                ipy_str = str(type(get_ipython()))\n                if \"zmqshell\" not in ipy_str:\n                    t.join()\n            except Exception:\n                pass\n        return result_parts\n\n    return magic\n"
  },
  {
    "path": "modin/core/execution/python/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Python execution engine.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/python/common/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Python execution engine.\"\"\"\n\nfrom .engine_wrapper import PythonWrapper\n\n__all__ = [\"PythonWrapper\"]\n"
  },
  {
    "path": "modin/core/execution/python/common/engine_wrapper.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Python execution engine.\"\"\"\n\n\nclass PythonWrapper:\n    \"\"\"Python engine wrapper serving for the compatibility purpose with other engines.\"\"\"\n\n    @classmethod\n    def deploy(cls, func, f_args=None, f_kwargs=None, num_returns=1):\n        \"\"\"\n        Run the passed function.\n\n        Parameters\n        ----------\n        func : callable\n        f_args : sequence, optional\n            Positional arguments to pass to the `func`.\n        f_kwargs : dict, optional\n            Keyword arguments to pass to the `func`.\n        num_returns : int, default: 1\n            Number of return values from the `func`.\n\n        Returns\n        -------\n        object\n            Returns the result of the `func`.\n        \"\"\"\n        args = [] if f_args is None else f_args\n        kwargs = {} if f_kwargs is None else f_kwargs\n        return func(*args, **kwargs)\n\n    @classmethod\n    def is_future(cls, item):\n        \"\"\"\n        Check if the item is a Future.\n\n        Parameters\n        ----------\n        item : object\n\n        Returns\n        -------\n        boolean\n            Always return false.\n        \"\"\"\n        return False\n\n    @classmethod\n    def materialize(cls, obj_id):\n        \"\"\"\n        Get the data from the data storage.\n\n        The method only serves for the compatibility purpose, what it actually\n        does is just return the passed value as is.\n\n        Parameters\n        ----------\n        obj_id : object\n\n        Returns\n        -------\n        object\n            The passed `obj_id` itself.\n        \"\"\"\n        return obj_id\n\n    @classmethod\n    def put(cls, data, **kwargs):\n        \"\"\"\n        Put data into the data storage.\n\n        The method only serves for the compatibility purpose, what it actually\n        does is just return the passed value as is.\n\n        Parameters\n        ----------\n        data : object\n        **kwargs : dict\n\n        Returns\n        -------\n        object\n            The passed `data` itself.\n        \"\"\"\n        return data\n"
  },
  {
    "path": "modin/core/execution/python/implementations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Python execution engine and optimized for specific storage formats.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Python execution engine and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe class optimized for pandas on Python execution.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/dataframe/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains class ``PandasOnPythonDataframe``.\n\n``PandasOnPythonDataframe`` is dataframe class with pandas storage format and Python engine.\n\"\"\"\n\nfrom modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\nfrom modin.utils import _inherit_docstrings\n\nfrom ..partitioning.partition_manager import PandasOnPythonDataframePartitionManager\n\n\nclass PandasOnPythonDataframe(PandasDataframe):\n    \"\"\"\n    Class for dataframes with pandas storage format and Python engine.\n\n    ``PandasOnPythonDataframe`` doesn't implement any specific interfaces,\n    all functionality is inherited from the ``PandasDataframe`` class.\n\n    Parameters\n    ----------\n    partitions : np.ndarray\n        A 2D NumPy array of partitions.\n    index : sequence\n        The index for the dataframe. Converted to a ``pandas.Index``.\n    columns : sequence\n        The columns object for the dataframe. Converted to a ``pandas.Index``.\n    row_lengths : list, optional\n        The length of each partition in the rows. The \"height\" of\n        each of the block partitions. Is computed if not provided.\n    column_widths : list, optional\n        The width of each partition in the columns. The \"width\" of\n        each of the block partitions. Is computed if not provided.\n    dtypes : pandas.Series, optional\n        The data types for the dataframe columns.\n    pandas_backend : {\"pyarrow\", None}, optional\n        Backend used by pandas. None - means default NumPy backend.\n    \"\"\"\n\n    _partition_mgr_cls = PandasOnPythonDataframePartitionManager\n\n    @property\n    @_inherit_docstrings(PandasDataframe.engine)\n    def engine(self) -> str:\n        return \"Python\"\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base IO classes optimized for pandas on Python execution.\"\"\"\n\nfrom .io import PandasOnPythonIO\n\n__all__ = [\n    \"PandasOnPythonIO\",\n]\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/io/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module for housing IO classes with pandas storage format and Python engine.\"\"\"\n\nfrom modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe import (\n    PandasOnPythonDataframe,\n)\nfrom modin.core.io import BaseIO\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\nclass PandasOnPythonIO(BaseIO):\n    \"\"\"\n    Class for storing IO functions operating on pandas storage format and Python engine.\n\n    Inherits default function implementations from ``BaseIO`` parent class.\n    \"\"\"\n\n    frame_cls = PandasOnPythonDataframe\n    query_compiler_cls = PandasQueryCompiler\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes related to its partitioning and optimized for pandas on Python execution.\"\"\"\n\nfrom .partition import PandasOnPythonDataframePartition\nfrom .partition_manager import PandasOnPythonDataframePartitionManager\nfrom .virtual_partition import (\n    PandasOnPythonDataframeAxisPartition,\n    PandasOnPythonDataframeColumnPartition,\n    PandasOnPythonDataframeRowPartition,\n)\n\n__all__ = [\n    \"PandasOnPythonDataframePartition\",\n    \"PandasOnPythonDataframePartitionManager\",\n    \"PandasOnPythonDataframeAxisPartition\",\n    \"PandasOnPythonDataframeColumnPartition\",\n    \"PandasOnPythonDataframeRowPartition\",\n]\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/partitioning/partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module defines interface for a partition with pandas storage format and Python engine.\"\"\"\n\nimport warnings\n\nfrom modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition\nfrom modin.core.execution.python.common import PythonWrapper\n\n\nclass PandasOnPythonDataframePartition(PandasDataframePartition):\n    \"\"\"\n    Partition class with interface for pandas storage format and Python engine.\n\n    Class holds the data and metadata for a single partition and implements\n    methods of parent abstract class ``PandasDataframePartition``.\n\n    Parameters\n    ----------\n    data : pandas.DataFrame\n        ``pandas.DataFrame`` that should be wrapped with this class.\n    length : int, optional\n        Length of `data` (number of rows in the input dataframe).\n    width : int, optional\n        Width of `data` (number of columns in the input dataframe).\n    call_queue : list, optional\n        Call queue of the partition (list with entities that should be called\n        before partition materialization).\n\n    Notes\n    -----\n    Objects of this class are treated as immutable by partition manager\n    subclasses. There is no logic for updating in-place.\n    \"\"\"\n\n    execution_wrapper = PythonWrapper\n\n    def __init__(self, data, length=None, width=None, call_queue=None):\n        super().__init__()\n        if hasattr(data, \"copy\"):\n            data = data.copy()\n        self._data = data\n        if call_queue is None:\n            call_queue = []\n        self.call_queue = call_queue\n        self._length_cache = length\n        self._width_cache = width\n\n    def get(self):\n        \"\"\"\n        Flush the `call_queue` and return copy of the data.\n\n        Returns\n        -------\n        pandas.DataFrame\n            Copy of DataFrame that was wrapped by this partition.\n\n        Notes\n        -----\n        Since this object is a simple wrapper, just return the copy of data.\n        \"\"\"\n        self.drain_call_queue()\n        return self._data.copy() if hasattr(self._data, \"copy\") else self._data\n\n    def apply(self, func, *args, **kwargs):\n        \"\"\"\n        Apply a function to the object wrapped by this partition.\n\n        Parameters\n        ----------\n        func : callable\n            Function to apply.\n        *args : iterable\n            Additional positional arguments to be passed in `func`.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `func`.\n\n        Returns\n        -------\n        PandasOnPythonDataframePartition\n            New ``PandasOnPythonDataframePartition`` object.\n        \"\"\"\n\n        def call_queue_closure(data, call_queue):\n            \"\"\"\n            Apply callables from `call_queue` on copy of the `data` and return the result.\n\n            Parameters\n            ----------\n            data : pandas.DataFrame or pandas.Series\n                Data to use for computations.\n            call_queue : array-like\n                Array with callables and it's kwargs to be applied to the `data`.\n\n            Returns\n            -------\n            pandas.DataFrame or pandas.Series\n            \"\"\"\n            result = data.copy()\n            for func, f_args, f_kwargs in call_queue:\n                try:\n                    result = func(result, *f_args, **f_kwargs)\n                except Exception as err:\n                    self.call_queue = []\n                    raise err\n            return result\n\n        self._data = call_queue_closure(self._data, self.call_queue)\n        self.call_queue = []\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            return self.__constructor__(func(self._data.copy(), *args, **kwargs))\n\n    def drain_call_queue(self):\n        \"\"\"Execute all operations stored in the call queue on the object wrapped by this partition.\"\"\"\n        if len(self.call_queue) == 0:\n            return\n        self.apply(lambda x: x)\n\n    def wait(self):\n        \"\"\"\n        Wait for completion of computations on the object wrapped by the partition.\n\n        Internally will be done by flushing the call queue.\n        \"\"\"\n        self.drain_call_queue()\n\n    @classmethod\n    def put(cls, obj):\n        \"\"\"\n        Create partition containing `obj`.\n\n        Parameters\n        ----------\n        obj : pandas.DataFrame\n            DataFrame to be put into the new partition.\n\n        Returns\n        -------\n        PandasOnPythonDataframePartition\n            New ``PandasOnPythonDataframePartition`` object.\n        \"\"\"\n        return cls(obj.copy(), len(obj.index), len(obj.columns))\n\n    @classmethod\n    def preprocess_func(cls, func):\n        \"\"\"\n        Preprocess a function before an ``apply`` call.\n\n        Parameters\n        ----------\n        func : callable\n            Function to preprocess.\n\n        Returns\n        -------\n        callable\n            An object that can be accepted by ``apply``.\n\n        Notes\n        -----\n        No special preprocessing action is required, so unmodified\n        `func` will be returned.\n        \"\"\"\n        return func\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/partitioning/partition_manager.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class for managing partitions with pandas storage format and Python engine.\"\"\"\n\nfrom modin.core.dataframe.pandas.partitioning.partition_manager import (\n    PandasDataframePartitionManager,\n)\nfrom modin.core.execution.python.common import PythonWrapper\n\nfrom .partition import PandasOnPythonDataframePartition\nfrom .virtual_partition import (\n    PandasOnPythonDataframeColumnPartition,\n    PandasOnPythonDataframeRowPartition,\n)\n\n\nclass PandasOnPythonDataframePartitionManager(PandasDataframePartitionManager):\n    \"\"\"\n    Class for managing partitions with pandas storage format and Python engine.\n\n    Inherits all functionality from ``PandasDataframePartitionManager`` base class.\n    \"\"\"\n\n    _partition_class = PandasOnPythonDataframePartition\n    _column_partitions_class = PandasOnPythonDataframeColumnPartition\n    _row_partition_class = PandasOnPythonDataframeRowPartition\n    _execution_wrapper = PythonWrapper\n"
  },
  {
    "path": "modin/core/execution/python/implementations/pandas_on_python/partitioning/virtual_partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module defines interface for a virtual partition with pandas storage format and python engine.\"\"\"\n\nfrom modin.core.dataframe.pandas.partitioning.axis_partition import (\n    PandasDataframeAxisPartition,\n)\nfrom modin.utils import _inherit_docstrings\n\nfrom .partition import PandasOnPythonDataframePartition\n\n\nclass PandasOnPythonDataframeAxisPartition(PandasDataframeAxisPartition):\n    \"\"\"\n    Class defines axis partition interface with pandas storage format and Python engine.\n\n    Inherits functionality from ``PandasDataframeAxisPartition`` class.\n\n    Parameters\n    ----------\n    list_of_partitions : Union[list, PandasOnPythonDataframePartition]\n        List of ``PandasOnPythonDataframePartition`` and\n        ``PandasOnPythonDataframeVirtualPartition`` objects, or a single\n        ``PandasOnPythonDataframePartition``.\n    get_ip : bool, default: False\n        Whether to get node IP addresses to conforming partitions or not.\n    full_axis : bool, default: True\n        Whether or not the virtual partition encompasses the whole axis.\n    call_queue : list, optional\n        A list of tuples (callable, args, kwargs) that contains deferred calls.\n    length : int, optional\n        Length, or reference to length, of wrapped ``pandas.DataFrame``.\n    width : int, optional\n        Width, or reference to width, of wrapped ``pandas.DataFrame``.\n    \"\"\"\n\n    partition_type = PandasOnPythonDataframePartition\n\n\n@_inherit_docstrings(PandasOnPythonDataframeAxisPartition)\nclass PandasOnPythonDataframeColumnPartition(PandasOnPythonDataframeAxisPartition):\n    axis = 0\n\n\n@_inherit_docstrings(PandasOnPythonDataframeAxisPartition)\nclass PandasOnPythonDataframeRowPartition(PandasOnPythonDataframeAxisPartition):\n    axis = 1\n"
  },
  {
    "path": "modin/core/execution/ray/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Ray execution engine.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/ray/common/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Common utilities for Ray execution engine.\"\"\"\n\nfrom .engine_wrapper import MaterializationHook, RayWrapper, SignalActor\nfrom .utils import initialize_ray\n\n__all__ = [\n    \"initialize_ray\",\n    \"RayWrapper\",\n    \"MaterializationHook\",\n    \"SignalActor\",\n]\n"
  },
  {
    "path": "modin/core/execution/ray/common/deferred_execution.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module with classes and utilities for deferred remote execution in Ray workers.\"\"\"\n\nfrom enum import Enum\nfrom itertools import islice\nfrom typing import (\n    Any,\n    Callable,\n    Dict,\n    Generator,\n    Iterable,\n    List,\n    Optional,\n    Tuple,\n    Union,\n)\n\nimport pandas\nimport ray\nfrom ray._private.services import get_node_ip_address\n\nfrom modin.config import RayTaskCustomResources\nfrom modin.core.execution.ray.common import MaterializationHook, RayWrapper\nfrom modin.logging import get_logger\n\nObjectRefType = Union[ray.ObjectRef, None]\nObjectRefOrListType = Union[ObjectRefType, List[ObjectRefType]]\nListOrTuple = (list, tuple)\n\n\nclass DeferredExecution:\n    \"\"\"\n    Deferred execution task.\n\n    This class represents a single node in the execution tree. The input is either\n    an object reference or another node on which this node depends.\n    The output is calculated by the specified Callable.\n\n    If the input is a DeferredExecution node, it is executed first and the execution\n    output is used as the input for this one. All the executions are performed in a\n    single batch (i.e. using a single remote call) and the results are saved in all\n    the nodes that have multiple subscribers.\n\n    Parameters\n    ----------\n    data : ObjectRefType or DeferredExecution\n        The execution input.\n    func : callable or ObjectRefType\n        A function to be executed.\n    args : list or tuple\n        Additional positional arguments to be passed in `func`.\n    kwargs : dict\n        Additional keyword arguments to be passed in `func`.\n    num_returns : int, optional\n        The number of the return values.\n\n    Attributes\n    ----------\n    data : ObjectRefType or DeferredExecution\n        The execution input.\n    func : callable or ObjectRefType\n        A function to be executed.\n    args : list or tuple\n        Additional positional arguments to be passed in `func`.\n    kwargs : dict\n        Additional keyword arguments to be passed in `func`.\n    num_returns : int\n        The number of the return values.\n    flat_args : bool\n        True means that there are no lists or DeferredExecution objects in `args`.\n        In this case, no arguments processing is performed and `args` is passed\n        to the remote method as is.\n    flat_kwargs : bool\n        The same as `flat_args` but for the `kwargs` values.\n    \"\"\"\n\n    def __init__(\n        self,\n        data: Union[\n            ObjectRefType,\n            \"DeferredExecution\",\n            List[Union[ObjectRefType, \"DeferredExecution\"]],\n        ],\n        func: Union[Callable, ObjectRefType],\n        args: Union[List[Any], Tuple[Any]],\n        kwargs: Dict[str, Any],\n        num_returns=1,\n    ):\n        if isinstance(data, DeferredExecution):\n            data.subscribe()\n        self.data = data\n        self.func = func\n        self.args = args\n        self.kwargs = kwargs\n        self.num_returns = num_returns\n        self.flat_args = self._flat_args(args)\n        self.flat_kwargs = self._flat_args(kwargs.values())\n        self.subscribers = 0\n\n    @classmethod\n    def _flat_args(cls, args: Iterable):\n        \"\"\"\n        Check if the arguments list is flat and subscribe to all `DeferredExecution` objects.\n\n        Parameters\n        ----------\n        args : Iterable\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        flat = True\n        for arg in args:\n            if isinstance(arg, DeferredExecution):\n                flat = False\n                arg.subscribe()\n            elif isinstance(arg, ListOrTuple):\n                flat = False\n                cls._flat_args(arg)\n        return flat\n\n    def exec(\n        self,\n    ) -> Tuple[ObjectRefOrListType, Union[\"MetaList\", List], Union[int, List[int]]]:\n        \"\"\"\n        Execute this task, if required.\n\n        Returns\n        -------\n        tuple\n            The execution result, MetaList, containing the length, width and\n            the worker's ip address (the last value in the list) and the values\n            offset in the list. I.e. length = meta_list[offset],\n            width = meta_list[offset + 1], ip = meta_list[-1].\n        \"\"\"\n        if self.has_result:\n            return self.data, self.meta, self.meta_offset\n\n        if (\n            not isinstance(self.data, DeferredExecution)\n            and self.flat_args\n            and self.flat_kwargs\n            and self.num_returns == 1\n        ):\n            result, length, width, ip = remote_exec_func.options(\n                resources=RayTaskCustomResources.get()\n            ).remote(self.func, self.data, *self.args, **self.kwargs)\n            meta = MetaList([length, width, ip])\n            self._set_result(result, meta, 0)\n            return result, meta, 0\n\n        # If there are no subscribers, we still need the result here. We don't need to decrement\n        # it back. After the execution, the result is saved and the counter has no effect.\n        self.subscribers += 2\n        consumers, output = self._deconstruct()\n        # The last result is the MetaList, so adding +1 here.\n        num_returns = sum(c.num_returns for c in consumers) + 1\n        results = self._remote_exec_chain(num_returns, *output)\n        meta = MetaList(results.pop())\n        meta_offset = 0\n        results = iter(results)\n        for de in consumers:\n            if de.num_returns == 1:\n                de._set_result(next(results), meta, meta_offset)\n                meta_offset += 2\n            else:\n                res = list(islice(results, num_returns))\n                offsets = list(range(0, 2 * num_returns, 2))\n                de._set_result(res, meta, offsets)\n                meta_offset += 2 * num_returns\n        return self.data, self.meta, self.meta_offset\n\n    @property\n    def has_result(self):\n        \"\"\"\n        Return true if this task has already been executed and the result is set.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return not hasattr(self, \"func\")\n\n    def subscribe(self):\n        \"\"\"\n        Increment the `subscribers` counter.\n\n        Subscriber is any instance that could trigger the execution of this task.\n        In case of a multiple subscribers, the execution could be triggerred multiple\n        times. To prevent the multiple executions, the execution result is returned\n        from the worker and saved in this instance. Subsequent calls to `execute()`\n        return the previously saved result.\n        \"\"\"\n        self.subscribers += 1\n\n    def unsubscribe(self):\n        \"\"\"Decrement the `subscribers` counter.\"\"\"\n        self.subscribers -= 1\n        assert self.subscribers >= 0\n\n    def _deconstruct(self) -> Tuple[List[\"DeferredExecution\"], List[Any]]:\n        \"\"\"\n        Convert the specified execution tree to a flat list.\n\n        This is required for the automatic Ray object references\n        materialization before passing the list to a Ray worker.\n\n        The format of the list is the following:\n        <input object> sequence<<function> <n><args> <n><kwargs> <ref> <nret>>...\n        If <n> before <args> is >= 0, then the next n objects are the function arguments.\n        If it is -1, it means that the method arguments contain list and/or\n        DeferredExecution (chain) objects. In this case the next values are read\n        one by one until `_Tag.END` is encountered. If the value is `_Tag.LIST`,\n        then the next sequence of values up to `_Tag.END` is converted to list.\n        If the value is `_Tag.CHAIN`, then the next sequence of values up to\n        `_Tag.END` has exactly the same format, as described here.\n        If the value is `_Tag.REF`, then the next value is a reference id, i.e.\n        the actual value should be retrieved by this id from the previously\n        saved objects. The <input object> could also be `_Tag.REF` or `_Tag.LIST`.\n\n        If <n> before <kwargs> is >=0, then the next 2*n values are the argument\n        names and values in the following format - [name1, value1, name2, value2...].\n        If it's -1, then the next values are converted to list in the same way as\n        <args> and the argument names are the next len(<args>) values.\n\n        <ref> is an integer reference id. If it's not 0, then there is another\n        chain referring to the execution result of this method and, thus, it must\n        be saved so that other chains could retrieve the object by the id.\n\n        <nret> field contains either the `num_returns` value or 0. If it's 0, the\n        execution result is not returned, but is just passed to the next task in the\n        chain. If it's 1, the result is returned as is. Otherwise, it's expected that\n        the result is iterable and the specified number of values is returned from\n        the iterator. The values lengths and widths are added to the meta list.\n\n        Returns\n        -------\n        tuple of list\n            * The first list is the result consumers.\n                If a DeferredExecution has multiple subscribers, the execution result\n                should be returned and saved in order to avoid duplicate executions.\n                These DeferredExecution tasks are added to this list and, after the\n                execution, the results are passed to the ``_set_result()`` method of\n                each task.\n            * The second is a flat list of arguments that could be passed to the remote executor.\n        \"\"\"\n        stack = []\n        result_consumers = []\n        output = []\n        # Using stack and generators to avoid the ``RecursionError``s.\n        stack.append(self._deconstruct_chain(self, output, stack, result_consumers))\n        while stack:\n            try:\n                gen = stack.pop()\n                next_gen = next(gen)\n                stack.append(gen)\n                stack.append(next_gen)\n            except StopIteration:\n                pass\n        return result_consumers, output\n\n    @classmethod\n    def _deconstruct_chain(\n        cls,\n        de: \"DeferredExecution\",\n        output: List,\n        stack: List,\n        result_consumers: List[\"DeferredExecution\"],\n    ):\n        \"\"\"\n        Deconstruct the specified DeferredExecution chain.\n\n        Parameters\n        ----------\n        de : DeferredExecution\n            The chain to be deconstructed.\n        output : list\n            Put the arguments to this list.\n        stack : list\n            Used to eliminate recursive calls, that may lead to the RecursionError.\n        result_consumers : list of DeferredExecution\n            The result consumers.\n\n        Yields\n        ------\n        Generator\n            The ``_deconstruct_list()`` generator.\n        \"\"\"\n        out_append = output.append\n        out_extend = output.extend\n        while True:\n            de.unsubscribe()\n            if (out_pos := getattr(de, \"out_pos\", None)) and not de.has_result:\n                out_append(_Tag.REF)\n                out_append(out_pos)\n                output[out_pos] = out_pos\n                if de.subscribers == 0:\n                    # We may have subscribed to the same node multiple times.\n                    # It could happen, for example, if it's passed to the args\n                    # multiple times, or it's one of the parent nodes and also\n                    # passed to the args. In this case, there are no multiple\n                    # subscribers, and we don't need to return the result.\n                    output[out_pos + 1] = 0\n                    result_consumers.remove(de)\n                break\n            elif not isinstance(data := de.data, DeferredExecution):\n                if isinstance(data, ListOrTuple):\n                    yield cls._deconstruct_list(\n                        data, output, stack, result_consumers, out_append\n                    )\n                else:\n                    out_append(data)\n                if not de.has_result:\n                    stack.append(de)\n                break\n            else:\n                stack.append(de)\n                de = data\n\n        while stack and isinstance(stack[-1], DeferredExecution):\n            de: DeferredExecution = stack.pop()\n            args = de.args\n            kwargs = de.kwargs\n            out_append(de.func)\n            if de.flat_args:\n                out_append(len(args))\n                out_extend(args)\n            else:\n                out_append(-1)\n                yield cls._deconstruct_list(\n                    args, output, stack, result_consumers, out_append\n                )\n            if de.flat_kwargs:\n                out_append(len(kwargs))\n                for item in kwargs.items():\n                    out_extend(item)\n            else:\n                out_append(-1)\n                yield cls._deconstruct_list(\n                    kwargs.values(), output, stack, result_consumers, out_append\n                )\n                out_extend(kwargs)\n\n            out_append(0)  # Placeholder for ref id\n            if de.subscribers > 0:\n                # Ref id. This is the index in the output list.\n                de.out_pos = len(output) - 1\n                result_consumers.append(de)\n                out_append(de.num_returns)  # Return result for this node\n            else:\n                out_append(0)  # Do not return result for this node\n\n    @classmethod\n    def _deconstruct_list(\n        cls,\n        lst: Iterable,\n        output: List,\n        stack: List,\n        result_consumers: List[\"DeferredExecution\"],\n        out_append: Callable,\n    ):\n        \"\"\"\n        Deconstruct the specified list.\n\n        Parameters\n        ----------\n        lst : list\n        output : list\n        stack : list\n        result_consumers : list\n        out_append : Callable\n            The reference to the ``list.append()`` method.\n\n        Yields\n        ------\n        Generator\n            Either ``_deconstruct_list()`` or ``_deconstruct_chain()`` generator.\n        \"\"\"\n        for obj in lst:\n            if isinstance(obj, DeferredExecution):\n                if out_pos := getattr(obj, \"out_pos\", None):\n                    obj.unsubscribe()\n                    if obj.has_result:\n                        out_append(obj.data)\n                    else:\n                        out_append(_Tag.REF)\n                        out_append(out_pos)\n                        output[out_pos] = out_pos\n                        if obj.subscribers == 0:\n                            output[out_pos + 1] = 0\n                            result_consumers.remove(obj)\n                else:\n                    out_append(_Tag.CHAIN)\n                    yield cls._deconstruct_chain(obj, output, stack, result_consumers)\n                    out_append(_Tag.END)\n            elif isinstance(obj, ListOrTuple):\n                out_append(_Tag.LIST)\n                yield cls._deconstruct_list(\n                    obj, output, stack, result_consumers, out_append\n                )\n            else:\n                out_append(obj)\n        out_append(_Tag.END)\n\n    @staticmethod\n    def _remote_exec_chain(num_returns: int, *args: Tuple) -> List[Any]:\n        \"\"\"\n        Execute the deconstructed chain in a worker process.\n\n        Parameters\n        ----------\n        num_returns : int\n            The number of return values.\n        *args : tuple\n            A deconstructed chain to be executed.\n\n        Returns\n        -------\n        list\n            The execution results. The last element of this list is the ``MetaList``.\n        \"\"\"\n        # Prefer _remote_exec_single_chain(). It has fewer arguments and\n        # does not require the num_returns to be specified in options.\n        if num_returns == 2:\n            return _remote_exec_single_chain.options(\n                resources=RayTaskCustomResources.get()\n            ).remote(*args)\n        else:\n            return _remote_exec_multi_chain.options(\n                num_returns=num_returns, resources=RayTaskCustomResources.get()\n            ).remote(num_returns, *args)\n\n    def _set_result(\n        self,\n        result: ObjectRefOrListType,\n        meta: \"MetaList\",\n        meta_offset: Union[int, List[int]],\n    ):\n        \"\"\"\n        Set the execution result.\n\n        Parameters\n        ----------\n        result : ObjectRefOrListType\n        meta : MetaList\n        meta_offset : int or list of int\n        \"\"\"\n        del self.func, self.args, self.kwargs, self.flat_args, self.flat_kwargs\n        self.data = result\n        self.meta = meta\n        self.meta_offset = meta_offset\n\n    def __reduce__(self):\n        \"\"\"Not serializable.\"\"\"\n        raise NotImplementedError(\"DeferredExecution is not serializable!\")\n\n\nclass MetaList:\n    \"\"\"\n    Meta information, containing the result lengths and the worker address.\n\n    Parameters\n    ----------\n    obj : ray.ObjectID or list\n    \"\"\"\n\n    def __init__(self, obj: Union[ray.ObjectID, List]):\n        self._obj = obj\n\n    def __getitem__(self, index):\n        \"\"\"\n        Get item at the specified index.\n\n        Parameters\n        ----------\n        index : int\n\n        Returns\n        -------\n        Any\n        \"\"\"\n        obj = self._obj\n        return obj[index] if isinstance(obj, list) else MetaListHook(self, index)\n\n    def __setitem__(self, index, value):\n        \"\"\"\n        Set item at the specified index.\n\n        Parameters\n        ----------\n        index : int\n        value : Any\n        \"\"\"\n        obj = self._obj\n        if not isinstance(obj, list):\n            self._obj = obj = RayWrapper.materialize(obj)\n        obj[index] = value\n\n\nclass MetaListHook(MaterializationHook):\n    \"\"\"\n    Used by MetaList.__getitem__() for lazy materialization and getting a single value from the list.\n\n    Parameters\n    ----------\n    meta : MetaList\n        Non-materialized list to get the value from.\n    idx : int\n        The value index in the list.\n    \"\"\"\n\n    def __init__(self, meta: MetaList, idx: int):\n        self.meta = meta\n        self.idx = idx\n\n    def pre_materialize(self):\n        \"\"\"\n        Get item at self.idx or object ref if not materialized.\n\n        Returns\n        -------\n        object\n        \"\"\"\n        obj = self.meta._obj\n        return obj[self.idx] if isinstance(obj, list) else obj\n\n    def post_materialize(self, materialized):\n        \"\"\"\n        Save the materialized list in self.meta and get the item at self.idx.\n\n        Parameters\n        ----------\n        materialized : list\n\n        Returns\n        -------\n        object\n        \"\"\"\n        self.meta._obj = materialized\n        return materialized[self.idx]\n\n\nclass _Tag(Enum):  # noqa: PR01\n    \"\"\"\n    A set of special values used for the method arguments de/construction.\n\n    See ``DeferredExecution._deconstruct()`` for details.\n    \"\"\"\n\n    # The next item is an execution chain\n    CHAIN = 0\n    # The next item is a reference\n    REF = 1\n    # The next item a list\n    LIST = 2\n    # End of list or chain\n    END = 3\n\n\nclass _RemoteExecutor:\n    \"\"\"Remote functions for DeferredExecution.\"\"\"\n\n    @staticmethod\n    def exec_func(fn: Callable, obj: Any, args: Tuple, kwargs: Dict) -> Any:\n        \"\"\"\n        Execute the specified function.\n\n        Parameters\n        ----------\n        fn : Callable\n        obj : Any\n        args : Tuple\n        kwargs : dict\n\n        Returns\n        -------\n        Any\n        \"\"\"\n        try:\n            try:\n                return fn(obj, *args, **kwargs)\n                # Sometimes Arrow forces us to make a copy of an object before we operate on it. We\n                # don't want the error to propagate to the user, and we want to avoid copying unless\n                # we absolutely have to.\n            except ValueError as err:\n                if isinstance(obj, (pandas.DataFrame, pandas.Series)):\n                    return fn(obj.copy(), *args, **kwargs)\n                else:\n                    raise err\n        except Exception as err:\n            get_logger().error(\n                f\"{err}. fn={fn}, obj={obj}, args={args}, kwargs={kwargs}\"\n            )\n            raise err\n\n    @classmethod\n    def construct(cls, num_returns: int, args: Tuple):  # pragma: no cover\n        \"\"\"\n        Construct and execute the specified chain.\n\n        This function is called in a worker process. The last value, returned by\n        this generator, is the meta list, containing the objects lengths and widths\n        and the worker ip address, as the last value in the list.\n\n        Parameters\n        ----------\n        num_returns : int\n        args : tuple\n\n        Yields\n        ------\n        Any\n            The execution results and the MetaList as the last value.\n        \"\"\"\n        chain = list(reversed(args))\n        meta = []\n        try:\n            stack = [cls.construct_chain(chain, {}, meta, None)]\n            while stack:\n                try:\n                    gen = stack.pop()\n                    obj = next(gen)\n                    stack.append(gen)\n                    if isinstance(obj, Generator):\n                        stack.append(obj)\n                    else:\n                        yield obj\n                except StopIteration:\n                    pass\n        except Exception as err:\n            get_logger().error(f\"{err}. args={args}, chain={list(reversed(chain))}\")\n            raise err\n        meta.append(get_node_ip_address())\n        yield meta\n\n    @classmethod\n    def construct_chain(\n        cls,\n        chain: List,\n        refs: Dict[int, Any],\n        meta: List,\n        lst: Optional[List],\n    ):  # pragma: no cover\n        \"\"\"\n        Construct the chain and execute it one by one.\n\n        Parameters\n        ----------\n        chain : list\n            A flat list containing the execution tree, deconstructed by\n            ``DeferredExecution._deconstruct()``.\n        refs : dict\n            If an execution result is required for multiple chains, the\n            reference to this result is saved in this dict.\n        meta : list\n            The lengths of the returned objects are added to this list.\n        lst : list\n            If specified, the execution result is added to this list.\n            This is used when a chain is passed as an argument to a\n            DeferredExecution task.\n\n        Yields\n        ------\n        Any\n            Either the ``construct_list()`` generator or the execution results.\n        \"\"\"\n        pop = chain.pop\n        tg_e = _Tag.END\n\n        obj = pop()\n        if obj is _Tag.REF:\n            obj = refs[pop()]\n        elif obj is _Tag.LIST:\n            obj = []\n            yield cls.construct_list(obj, chain, refs, meta)\n\n        while chain:\n            fn = pop()\n            if fn == tg_e:\n                lst.append(obj)\n                break\n\n            if (args_len := pop()) >= 0:\n                if args_len == 0:\n                    args = []\n                else:\n                    args = chain[-args_len:]\n                    del chain[-args_len:]\n                    args.reverse()\n            else:\n                args = []\n                yield cls.construct_list(args, chain, refs, meta)\n            if (args_len := pop()) >= 0:\n                kwargs = {pop(): pop() for _ in range(args_len)}\n            else:\n                values = []\n                yield cls.construct_list(values, chain, refs, meta)\n                kwargs = {pop(): v for v in values}\n\n            obj = cls.exec_func(fn, obj, args, kwargs)\n\n            if ref := pop():  # <ref> is not 0 - adding the result to refs\n                refs[ref] = obj\n            if (num_returns := pop()) == 0:\n                continue\n\n            itr = iter([obj] if num_returns == 1 else obj)\n            for _ in range(num_returns):\n                obj = next(itr)\n                meta.append(len(obj) if hasattr(obj, \"__len__\") else 0)\n                meta.append(len(obj.columns) if hasattr(obj, \"columns\") else 0)\n                yield obj\n\n    @classmethod\n    def construct_list(\n        cls,\n        lst: List,\n        chain: List,\n        refs: Dict[int, Any],\n        meta: List,\n    ):  # pragma: no cover\n        \"\"\"\n        Construct the list.\n\n        Parameters\n        ----------\n        lst : list\n        chain : list\n        refs : dict\n        meta : list\n\n        Yields\n        ------\n        Any\n            Either ``construct_chain()`` or ``construct_list()`` generator.\n        \"\"\"\n        pop = chain.pop\n        lst_append = lst.append\n        while True:\n            obj = pop()\n            if isinstance(obj, _Tag):\n                if obj == _Tag.END:\n                    break\n                elif obj == _Tag.CHAIN:\n                    yield cls.construct_chain(chain, refs, meta, lst)\n                elif obj == _Tag.LIST:\n                    lst_append([])\n                    yield cls.construct_list(lst[-1], chain, refs, meta)\n                elif obj is _Tag.REF:\n                    lst_append(refs[pop()])\n                else:\n                    raise ValueError(f\"Unexpected tag {obj}\")\n            else:\n                lst_append(obj)\n\n    def __reduce__(self):\n        \"\"\"\n        Use a single instance on deserialization.\n\n        Returns\n        -------\n        str\n            Returns the ``_REMOTE_EXEC`` attribute name.\n        \"\"\"\n        return \"_REMOTE_EXEC\"\n\n\n_REMOTE_EXEC = _RemoteExecutor()\n\n\n@ray.remote(num_returns=4)\ndef remote_exec_func(\n    fn: Callable,\n    obj: Any,\n    *flat_args: Tuple,\n    remote_executor=_REMOTE_EXEC,\n    **flat_kwargs: Dict,\n):  # pragma: no cover\n    \"\"\"\n    Execute the specified function with the arguments in a worker process.\n\n    The object `obj` is passed to the function as the first argument.\n    Note: all the arguments must be flat, i.e. no lists, no chains.\n\n    Parameters\n    ----------\n    fn : Callable\n    obj : Any\n    *flat_args : list\n    remote_executor : _RemoteExecutor, default: _REMOTE_EXEC\n        Do not change, it's used to avoid excessive serializations.\n    **flat_kwargs : dict\n\n    Returns\n    -------\n    tuple[Any, int, int, str]\n    The execution result, the result length and width, the worked address.\n    \"\"\"\n    obj = remote_executor.exec_func(fn, obj, flat_args, flat_kwargs)\n    return (\n        obj,\n        len(obj) if hasattr(obj, \"__len__\") else 0,\n        len(obj.columns) if hasattr(obj, \"columns\") else 0,\n        get_node_ip_address(),\n    )\n\n\n@ray.remote(num_returns=2)\ndef _remote_exec_single_chain(\n    *args: Tuple, remote_executor=_REMOTE_EXEC\n) -> Generator:  # pragma: no cover\n    \"\"\"\n    Execute the deconstructed chain with a single return value in a worker process.\n\n    Parameters\n    ----------\n    *args : tuple\n        A deconstructed chain to be executed.\n    remote_executor : _RemoteExecutor, default: _REMOTE_EXEC\n        Do not change, it's used to avoid excessive serializations.\n\n    Returns\n    -------\n    Generator\n    \"\"\"\n    return remote_executor.construct(num_returns=2, args=args)\n\n\n@ray.remote\ndef _remote_exec_multi_chain(\n    num_returns: int, *args: Tuple, remote_executor=_REMOTE_EXEC\n) -> Generator:  # pragma: no cover\n    \"\"\"\n    Execute the deconstructed chain with a multiple return values in a worker process.\n\n    Parameters\n    ----------\n    num_returns : int\n        The number of return values.\n    *args : tuple\n        A deconstructed chain to be executed.\n    remote_executor : _RemoteExecutor, default: _REMOTE_EXEC\n        Do not change, it's used to avoid excessive serializations.\n\n    Returns\n    -------\n    Generator\n    \"\"\"\n    return remote_executor.construct(num_returns, args)\n"
  },
  {
    "path": "modin/core/execution/ray/common/engine_wrapper.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nThe module with helper mixin for executing functions remotely.\n\nTo be used as a piece of building a Ray-based engine.\n\"\"\"\n\nimport asyncio\nimport os\nfrom types import FunctionType\nfrom typing import Sequence\n\nimport pandas\nimport ray\n\nfrom modin.config import RayTaskCustomResources\nfrom modin.error_message import ErrorMessage\n\n\n@ray.remote\ndef _deploy_ray_func(func, *args, return_pandas_df=None, **kwargs):  # pragma: no cover\n    \"\"\"\n    Wrap `func` to ease calling it remotely.\n\n    Parameters\n    ----------\n    func : callable\n        A local function that we want to call remotely.\n    *args : iterable\n        Positional arguments to pass to `func` when calling remotely.\n    return_pandas_df : bool, optional\n        Whether to convert the result of `func` to a pandas DataFrame or not.\n    **kwargs : dict\n        Keyword arguments to pass to `func` when calling remotely.\n\n    Returns\n    -------\n    ray.ObjectRef or list\n        Ray identifier of the result being put to Plasma store.\n    \"\"\"\n    result = func(*args, **kwargs)\n    if return_pandas_df and not isinstance(result, pandas.DataFrame):\n        result = pandas.DataFrame(result)\n    return result\n\n\nclass RayWrapper:\n    \"\"\"Mixin that provides means of running functions remotely and getting local results.\"\"\"\n\n    _func_cache = {}\n\n    @classmethod\n    def deploy(\n        cls, func, f_args=None, f_kwargs=None, return_pandas_df=None, num_returns=1\n    ):\n        \"\"\"\n        Run local `func` remotely.\n\n        Parameters\n        ----------\n        func : callable or ray.ObjectID\n            The function to perform.\n        f_args : list or tuple, optional\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict, optional\n            Keyword arguments to pass to ``func``.\n        return_pandas_df : bool, optional\n            Whether to convert the result of `func` to a pandas DataFrame or not.\n        num_returns : int, default: 1\n            Amount of return values expected from `func`.\n\n        Returns\n        -------\n        ray.ObjectRef or list\n            Ray identifier of the result being put to Plasma store.\n        \"\"\"\n        args = [] if f_args is None else f_args\n        kwargs = {} if f_kwargs is None else f_kwargs\n        return _deploy_ray_func.options(\n            num_returns=num_returns, resources=RayTaskCustomResources.get()\n        ).remote(func, *args, return_pandas_df=return_pandas_df, **kwargs)\n\n    @classmethod\n    def is_future(cls, item):\n        \"\"\"\n        Check if the item is a Future.\n\n        Parameters\n        ----------\n        item : ray.ObjectID or object\n            Future or object to check.\n\n        Returns\n        -------\n        boolean\n            If the value is a future.\n        \"\"\"\n        return isinstance(item, ObjectRefTypes)\n\n    @classmethod\n    def materialize(cls, obj_id):\n        \"\"\"\n        Get the value of object from the Plasma store.\n\n        Parameters\n        ----------\n        obj_id : ray.ObjectID\n            Ray object identifier to get the value by.\n\n        Returns\n        -------\n        object\n            Whatever was identified by `obj_id`.\n        \"\"\"\n        if isinstance(obj_id, MaterializationHook):\n            obj = obj_id.pre_materialize()\n            return (\n                obj_id.post_materialize(ray.get(obj))\n                if isinstance(obj, ray.ObjectRef)\n                else obj\n            )\n\n        if not isinstance(obj_id, Sequence):\n            return ray.get(obj_id) if isinstance(obj_id, ray.ObjectRef) else obj_id\n\n        if all(isinstance(obj, ray.ObjectRef) for obj in obj_id):\n            return ray.get(obj_id)\n\n        ids = {}\n        result = []\n        for obj in obj_id:\n            if not isinstance(obj, ObjectRefTypes):\n                result.append(obj)\n                continue\n            if isinstance(obj, MaterializationHook):\n                oid = obj.pre_materialize()\n                if isinstance(oid, ray.ObjectRef):\n                    hook = obj\n                    obj = oid\n                else:\n                    result.append(oid)\n                    continue\n            else:\n                hook = None\n\n            idx = ids.get(obj, None)\n            if idx is None:\n                ids[obj] = idx = len(ids)\n            if hook is None:\n                result.append(obj)\n            else:\n                hook._materialized_idx = idx\n                result.append(hook)\n\n        if len(ids) == 0:\n            return result\n\n        materialized = ray.get(list(ids.keys()))\n        for i in range(len(result)):\n            if isinstance((obj := result[i]), ObjectRefTypes):\n                if isinstance(obj, MaterializationHook):\n                    result[i] = obj.post_materialize(\n                        materialized[obj._materialized_idx]\n                    )\n                else:\n                    result[i] = materialized[ids[obj]]\n        return result\n\n    @classmethod\n    def put(cls, data, **kwargs):\n        \"\"\"\n        Store an object in the object store.\n\n        Parameters\n        ----------\n        data : object\n            The Python object to be stored.\n        **kwargs : dict\n            Additional keyword arguments.\n\n        Returns\n        -------\n        ray.ObjectID\n            Ray object identifier to get the value by.\n        \"\"\"\n        if isinstance(data, FunctionType):\n            qname = data.__qualname__\n            if \"<locals>\" not in qname and \"<lambda>\" not in qname:\n                ref = cls._func_cache.get(data, None)\n                if ref is None:\n                    if len(cls._func_cache) < 1024:\n                        ref = ray.put(data)\n                        cls._func_cache[data] = ref\n                    else:\n                        msg = \"To many functions in the RayWrapper cache!\"\n                        assert \"MODIN_GITHUB_CI\" not in os.environ, msg\n                        ErrorMessage.warn(msg)\n                return ref\n        return ray.put(data, **kwargs)\n\n    @classmethod\n    def wait(cls, obj_ids, num_returns=None):\n        \"\"\"\n        Wait on the objects without materializing them (blocking operation).\n\n        ``ray.wait`` assumes a list of unique object references: see\n        https://github.com/modin-project/modin/issues/5045\n\n        Parameters\n        ----------\n        obj_ids : list, scalar\n        num_returns : int, optional\n        \"\"\"\n        if not isinstance(obj_ids, Sequence):\n            obj_ids = list(obj_ids)\n\n        ids = set()\n        for obj in obj_ids:\n            if isinstance(obj, MaterializationHook):\n                obj = obj.pre_materialize()\n            if isinstance(obj, ray.ObjectRef):\n                ids.add(obj)\n\n        if num_ids := len(ids):\n            ray.wait(list(ids), num_returns=num_returns or num_ids)\n\n\n@ray.remote\nclass SignalActor:  # pragma: no cover\n    \"\"\"\n    Help synchronize across tasks and actors on cluster.\n\n    For details see: https://docs.ray.io/en/latest/advanced.html?highlight=signalactor#multi-node-synchronization-using-an-actor\n\n    Parameters\n    ----------\n    event_count : int\n        Number of events required for synchronization.\n    \"\"\"\n\n    def __init__(self, event_count: int):\n        self.events = [asyncio.Event() for _ in range(event_count)]\n\n    def send(self, event_idx: int):\n        \"\"\"\n        Indicate that event with `event_idx` has occurred.\n\n        Parameters\n        ----------\n        event_idx : int\n        \"\"\"\n        self.events[event_idx].set()\n\n    async def wait(self, event_idx: int):\n        \"\"\"\n        Wait until event with `event_idx` has occurred.\n\n        Parameters\n        ----------\n        event_idx : int\n        \"\"\"\n        await self.events[event_idx].wait()\n\n    def is_set(self, event_idx: int) -> bool:\n        \"\"\"\n        Check that event with `event_idx` had occurred or not.\n\n        Parameters\n        ----------\n        event_idx : int\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.events[event_idx].is_set()\n\n\nclass MaterializationHook:\n    \"\"\"The Hook is called during the materialization and allows performing pre/post computations.\"\"\"\n\n    def pre_materialize(self):\n        \"\"\"\n        Get an object reference to be materialized or a pre-computed value.\n\n        Returns\n        -------\n        ray.ObjectRef or object\n        \"\"\"\n        raise NotImplementedError()\n\n    def post_materialize(self, materialized):\n        \"\"\"\n        Perform computations on the materialized object.\n\n        Parameters\n        ----------\n        materialized : object\n            The materialized object to be post-computed.\n\n        Returns\n        -------\n        object\n            The post-computed object.\n        \"\"\"\n        raise NotImplementedError()\n\n    def __reduce__(self):\n        \"\"\"\n        Replace this hook with the materialized object on serialization.\n\n        Returns\n        -------\n        tuple\n        \"\"\"\n        data = RayWrapper.materialize(self)\n        if not isinstance(data, int):\n            raise NotImplementedError(\"Only integers are currently supported\")\n        return int, (data,)\n\n\nObjectRefTypes = (ray.ObjectRef, MaterializationHook)\n"
  },
  {
    "path": "modin/core/execution/ray/common/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds utility and initialization routines for Modin on Ray.\"\"\"\n\nimport os\nimport sys\nimport warnings\nfrom typing import Optional\n\nimport psutil\nimport ray\nfrom packaging import version\n\nfrom modin.config import (\n    CIAWSAccessKeyID,\n    CIAWSSecretAccessKey,\n    CpuCount,\n    GithubCI,\n    GpuCount,\n    IsRayCluster,\n    Memory,\n    NPartitions,\n    RayInitCustomResources,\n    RayRedisAddress,\n    RayRedisPassword,\n    ValueSource,\n)\nfrom modin.core.execution.utils import set_env\nfrom modin.error_message import ErrorMessage\n\nfrom .engine_wrapper import ObjectRefTypes, RayWrapper\n\n_OBJECT_STORE_TO_SYSTEM_MEMORY_RATIO = 0.6\n# This constant should be in sync with the limit in ray, which is private,\n# not exposed to users, and not documented:\n# https://github.com/ray-project/ray/blob/4692e8d8023e789120d3f22b41ffb136b50f70ea/python/ray/_private/ray_constants.py#L57-L62\n_MAC_OBJECT_STORE_LIMIT_BYTES = 2 * 2**30\n\n_RAY_IGNORE_UNHANDLED_ERRORS_VAR = \"RAY_IGNORE_UNHANDLED_ERRORS\"\n\nObjectIDType = ObjectRefTypes\n\n\ndef initialize_ray(\n    override_is_cluster=False,\n    override_redis_address: str = None,\n    override_redis_password: str = None,\n):\n    \"\"\"\n    Initialize Ray based on parameters, ``modin.config`` variables and internal defaults.\n\n    Parameters\n    ----------\n    override_is_cluster : bool, default: False\n        Whether to override the detection of Modin being run in a cluster\n        and always assume this runs on cluster head node.\n        This also overrides Ray worker detection and always runs the initialization\n        function (runs from main thread only by default).\n        If not specified, ``modin.config.IsRayCluster`` variable is used.\n    override_redis_address : str, optional\n        What Redis address to connect to when running in Ray cluster.\n        If not specified, ``modin.config.RayRedisAddress`` is used.\n    override_redis_password : str, optional\n        What password to use when connecting to Redis.\n        If not specified, ``modin.config.RayRedisPassword`` is used.\n    \"\"\"\n    # We need these vars to be set for each Ray's worker in order to ensure that\n    # the `pandas` module has been fully imported inside of each process before\n    # any execution begins:\n    # https://github.com/modin-project/modin/pull/4603\n    env_vars = {\n        \"__MODIN_AUTOIMPORT_PANDAS__\": \"1\",\n        \"PYTHONWARNINGS\": \"ignore::FutureWarning\",\n    }\n    if GithubCI.get():\n        # need these to write parquet to the moto service mocking s3.\n        env_vars.update(\n            {\n                \"AWS_ACCESS_KEY_ID\": CIAWSAccessKeyID.get(),\n                \"AWS_SECRET_ACCESS_KEY\": CIAWSSecretAccessKey.get(),\n            }\n        )\n    extra_init_kw = {}\n    is_cluster = override_is_cluster or IsRayCluster.get()\n    if not ray.is_initialized() or override_is_cluster:\n        redis_address = override_redis_address or RayRedisAddress.get()\n        redis_password = (\n            (\n                ray.ray_constants.REDIS_DEFAULT_PASSWORD\n                if is_cluster\n                else RayRedisPassword.get()\n            )\n            if override_redis_password is None\n            and RayRedisPassword.get_value_source() == ValueSource.DEFAULT\n            else override_redis_password or RayRedisPassword.get()\n        )\n\n        if is_cluster:\n            extra_init_kw[\"runtime_env\"] = {\"env_vars\": env_vars}\n            # We only start ray in a cluster setting for the head node.\n            ray.init(\n                address=redis_address or \"auto\",\n                include_dashboard=False,\n                ignore_reinit_error=True,\n                _redis_password=redis_password,\n                **extra_init_kw,\n            )\n        else:\n            object_store_memory = _get_object_store_memory()\n            ray_init_kwargs = {\n                \"num_cpus\": CpuCount.get(),\n                \"num_gpus\": GpuCount.get(),\n                \"include_dashboard\": False,\n                \"ignore_reinit_error\": True,\n                \"object_store_memory\": object_store_memory,\n                \"_redis_password\": redis_password,\n                \"_memory\": object_store_memory,\n                \"resources\": RayInitCustomResources.get(),\n                **extra_init_kw,\n            }\n            # It should be enough to simply set the required variables for the main process\n            # for Ray to automatically propagate them to each new worker on the same node.\n            # Although Ray doesn't guarantee this behavior it works as expected most of the\n            # time and doesn't enforce us with any overhead that Ray's native `runtime_env`\n            # is usually causing. You can visit this gh-issue for more info:\n            # https://github.com/modin-project/modin/issues/5157#issuecomment-1500225150\n            with set_env(**env_vars):\n                ray.init(**ray_init_kwargs)\n\n    # Now ray is initialized, check runtime env config - especially useful if we join\n    # an externally pre-configured cluster\n    runtime_env_vars = ray.get_runtime_context().runtime_env.get(\"env_vars\", {})\n    for varname, varvalue in env_vars.items():\n        if str(runtime_env_vars.get(varname, \"\")) != str(varvalue):\n            if is_cluster:\n                ErrorMessage.single_warning(\n                    \"When using a pre-initialized Ray cluster, please ensure that the runtime env \"\n                    + f\"sets environment variable {varname} to {varvalue}\"\n                )\n\n    num_cpus = int(ray.cluster_resources()[\"CPU\"])\n    NPartitions._put(num_cpus)\n    CpuCount._put(num_cpus)\n\n    # TODO(https://github.com/ray-project/ray/issues/28216): remove this\n    # workaround once Ray gives a better way to suppress task errors.\n    # Ideally we would not set global environment variables.\n    # If user has explicitly set _RAY_IGNORE_UNHANDLED_ERRORS_VAR, don't\n    # don't override its value.\n    if _RAY_IGNORE_UNHANDLED_ERRORS_VAR not in os.environ:\n        os.environ[_RAY_IGNORE_UNHANDLED_ERRORS_VAR] = \"1\"\n\n\ndef _get_object_store_memory() -> Optional[int]:\n    \"\"\"\n    Get the object store memory we should start Ray with, in bytes.\n\n    - If the ``Memory`` config variable is set, return that.\n    - On Linux, take system memory from /dev/shm. On other systems use total\n      virtual memory.\n    - On Mac, never return more than Ray-specified upper limit.\n\n    Returns\n    -------\n    Optional[int]\n        The object store memory size in bytes, or None if we should use the Ray\n        default.\n    \"\"\"\n    object_store_memory = Memory.get()\n    if object_store_memory is not None:\n        return object_store_memory\n    virtual_memory = psutil.virtual_memory().total\n    if sys.platform.startswith(\"linux\"):\n        shm_fd = os.open(\"/dev/shm\", os.O_RDONLY)\n        try:\n            shm_stats = os.fstatvfs(shm_fd)\n            system_memory = shm_stats.f_bsize * shm_stats.f_bavail\n            if system_memory / (virtual_memory / 2) < 0.99:\n                warnings.warn(\n                    f\"The size of /dev/shm is too small ({system_memory} bytes). The required size \"\n                    + f\"at least half of RAM ({virtual_memory // 2} bytes). Please, delete files in /dev/shm or \"\n                    + \"increase size of /dev/shm with --shm-size in Docker. Also, you can can override the memory \"\n                    + \"size for each Ray worker (in bytes) to the MODIN_MEMORY environment variable.\"\n                )\n        finally:\n            os.close(shm_fd)\n    else:\n        system_memory = virtual_memory\n    bytes_per_gb = 1e9\n    object_store_memory = int(\n        _OBJECT_STORE_TO_SYSTEM_MEMORY_RATIO\n        * system_memory\n        // bytes_per_gb\n        * bytes_per_gb\n    )\n    if object_store_memory == 0:\n        return None\n    # Newer versions of ray don't allow us to initialize ray with object store\n    # size larger than that _MAC_OBJECT_STORE_LIMIT_BYTES. It seems that\n    # object store > the limit is too slow even on ray 1.0.0. However, limiting\n    # the object store to _MAC_OBJECT_STORE_LIMIT_BYTES only seems to start\n    # helping at ray version 1.3.0. So if ray version is at least 1.3.0, cap\n    # the object store at _MAC_OBJECT_STORE_LIMIT_BYTES.\n    # For background on the ray bug see:\n    # - https://github.com/ray-project/ray/issues/20388\n    # - https://github.com/modin-project/modin/issues/4872\n    if sys.platform == \"darwin\" and version.parse(ray.__version__) >= version.parse(\n        \"1.3.0\"\n    ):\n        object_store_memory = min(object_store_memory, _MAC_OBJECT_STORE_LIMIT_BYTES)\n    return object_store_memory\n\n\ndef deserialize(obj):  # pragma: no cover\n    \"\"\"\n    Deserialize a Ray object.\n\n    Parameters\n    ----------\n    obj : ObjectIDType, iterable of ObjectIDType, or mapping of keys to ObjectIDTypes\n        Object(s) to deserialize.\n\n    Returns\n    -------\n    obj\n        The deserialized object.\n    \"\"\"\n    if isinstance(obj, ObjectIDType):\n        return RayWrapper.materialize(obj)\n    elif isinstance(obj, (tuple, list)):\n        # Ray will error if any elements are not ObjectIDType, but we still want ray to\n        # perform batch deserialization for us -- thus, we must submit only the list elements\n        # that are ObjectIDType, deserialize them, and restore them to their correct list index\n        oid_indices, oids = [], []\n        for i, ray_id in enumerate(obj):\n            if isinstance(ray_id, ObjectIDType):\n                oid_indices.append(i)\n                oids.append(ray_id)\n        ray_result = RayWrapper.materialize(oids)\n        new_lst = list(obj[:])\n        for i, deser_item in zip(oid_indices, ray_result):\n            new_lst[i] = deser_item\n        # Check that all objects have been deserialized\n        assert not any([isinstance(o, ObjectIDType) for o in new_lst])\n        return new_lst\n    elif isinstance(obj, dict) and any(\n        isinstance(val, ObjectIDType) for val in obj.values()\n    ):\n        return dict(zip(obj.keys(), RayWrapper.materialize(list(obj.values()))))\n    else:\n        return obj\n"
  },
  {
    "path": "modin/core/execution/ray/generic/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Generic functionality for Ray execution engine.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/ray/generic/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Generic IO functionality for Ray execution engine.\"\"\"\n\nfrom .io import RayIO\n\n__all__ = [\"RayIO\"]\n"
  },
  {
    "path": "modin/core/execution/ray/generic/io/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds base class implementing required I/O over Ray.\"\"\"\n\nfrom modin.core.io import BaseIO\n\n\nclass RayIO(BaseIO):\n    \"\"\"Base class for doing I/O operations over Ray.\"\"\"\n\n    @classmethod\n    def from_ray(cls, ray_obj):\n        \"\"\"\n        Create a Modin `query_compiler` from a Ray Dataset.\n\n        Parameters\n        ----------\n        ray_obj : ray.data.Dataset\n            The Ray Dataset to convert from.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the Ray Dataset.\n\n        Notes\n        -----\n        This function must be implemented in every subclass\n        otherwise NotImplementedError will be raised.\n        \"\"\"\n        raise NotImplementedError(\n            f\"Modin dataset can't be created from `ray.data.Dataset` using {cls}.\"\n        )\n\n    @classmethod\n    def to_ray(cls, modin_obj):\n        \"\"\"\n        Convert a Modin DataFrame/Series to a Ray Dataset.\n\n        Parameters\n        ----------\n        modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n            The Modin DataFrame/Series to convert.\n\n        Returns\n        -------\n        ray.data.Dataset\n            Converted object with type depending on input.\n\n        Notes\n        -----\n        This function must be implemented in every subclass\n        otherwise NotImplementedError will be raised.\n        \"\"\"\n        raise NotImplementedError(\n            f\"`ray.data.Dataset` can't be created from Modin DataFrame/Series using {cls}.\"\n        )\n"
  },
  {
    "path": "modin/core/execution/ray/generic/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Generic partitioning functionality for Ray execution engine.\"\"\"\n\nfrom .partition_manager import GenericRayDataframePartitionManager\n\n__all__ = [\n    \"GenericRayDataframePartitionManager\",\n]\n"
  },
  {
    "path": "modin/core/execution/ray/generic/partitioning/partition_manager.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds Modin partition manager implemented for Ray.\"\"\"\n\nimport numpy as np\n\nfrom modin.core.dataframe.pandas.partitioning.partition_manager import (\n    PandasDataframePartitionManager,\n)\nfrom modin.core.execution.ray.common import RayWrapper\n\n\nclass GenericRayDataframePartitionManager(PandasDataframePartitionManager):\n    \"\"\"The class implements the interface in `PandasDataframePartitionManager`.\"\"\"\n\n    @classmethod\n    def to_numpy(cls, partitions, **kwargs):\n        \"\"\"\n        Convert `partitions` into a NumPy array.\n\n        Parameters\n        ----------\n        partitions : NumPy array\n            A 2-D array of partitions to convert to local NumPy array.\n        **kwargs : dict\n            Keyword arguments to pass to each partition ``.to_numpy()`` call.\n\n        Returns\n        -------\n        NumPy array\n        \"\"\"\n        if partitions.shape[1] == 1:\n            parts = cls.get_objects_from_partitions(partitions.flatten())\n            parts = [part.to_numpy(**kwargs) for part in parts]\n        else:\n            parts = RayWrapper.materialize(\n                [\n                    obj.apply(\n                        lambda df, **kwargs: df.to_numpy(**kwargs)\n                    ).list_of_blocks[0]\n                    for row in partitions\n                    for obj in row\n                ]\n            )\n        rows, cols = partitions.shape\n        parts = [parts[i * cols : (i + 1) * cols] for i in range(rows)]\n        return np.block(parts)\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Ray execution engine and optimized for specific storage formats.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to Ray execution engine and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe class optimized for pandas on Ray execution.\"\"\"\n\nfrom .dataframe import PandasOnRayDataframe\n\n__all__ = [\"PandasOnRayDataframe\"]\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/dataframe/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that implements ``PandasDataframe`` using Ray.\"\"\"\n\nfrom modin.core.dataframe.base.dataframe.utils import Axis\nfrom modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\nfrom modin.utils import _inherit_docstrings\n\nfrom ..partitioning.partition_manager import PandasOnRayDataframePartitionManager\n\n\nclass PandasOnRayDataframe(PandasDataframe):\n    \"\"\"\n    The class implements the interface in ``PandasDataframe`` using Ray.\n\n    Parameters\n    ----------\n    partitions : np.ndarray\n        A 2D NumPy array of partitions.\n    index : sequence\n        The index for the dataframe. Converted to a ``pandas.Index``.\n    columns : sequence\n        The columns object for the dataframe. Converted to a ``pandas.Index``.\n    row_lengths : list, optional\n        The length of each partition in the rows. The \"height\" of\n        each of the block partitions. Is computed if not provided.\n    column_widths : list, optional\n        The width of each partition in the columns. The \"width\" of\n        each of the block partitions. Is computed if not provided.\n    dtypes : pandas.Series, optional\n        The data types for the dataframe columns.\n    pandas_backend : {\"pyarrow\", None}, optional\n        Backend used by pandas. None - means default NumPy backend.\n    \"\"\"\n\n    _partition_mgr_cls = PandasOnRayDataframePartitionManager\n\n    def _get_lengths(self, parts, axis):\n        \"\"\"\n        Get list of  dimensions for all the provided parts.\n\n        Parameters\n        ----------\n        parts : list\n            List of parttions.\n        axis : {0, 1}\n            The axis along which to get the lengths (0 - length across rows or, 1 - width across columns).\n\n        Returns\n        -------\n        list\n        \"\"\"\n        if axis == Axis.ROW_WISE:\n            dims = [part.length(False) for part in parts]\n        else:\n            dims = [part.width(False) for part in parts]\n\n        return self._partition_mgr_cls.materialize_futures(dims)\n\n    @property\n    @_inherit_docstrings(PandasDataframe.engine)\n    def engine(self) -> str:\n        return \"Ray\"\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base IO classes optimized for pandas on Ray execution.\"\"\"\n\nfrom .io import PandasOnRayIO\n\n__all__ = [\"PandasOnRayIO\"]\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/io/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds the factory which performs I/O using pandas on Ray.\"\"\"\n\nimport io\n\nimport numpy as np\nimport pandas\nfrom pandas.io.common import get_handle, stringify_path\nfrom ray.data import from_pandas_refs\n\nfrom modin.config import RayTaskCustomResources\nfrom modin.core.execution.ray.common import RayWrapper, SignalActor\nfrom modin.core.execution.ray.generic.io import RayIO\nfrom modin.core.io import (\n    CSVDispatcher,\n    ExcelDispatcher,\n    FeatherDispatcher,\n    FWFDispatcher,\n    JSONDispatcher,\n    ParquetDispatcher,\n    SQLDispatcher,\n)\nfrom modin.core.storage_formats.pandas.parsers import (\n    PandasCSVParser,\n    PandasExcelParser,\n    PandasFeatherParser,\n    PandasFWFParser,\n    PandasJSONParser,\n    PandasParquetParser,\n    PandasSQLParser,\n)\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\nfrom modin.distributed.dataframe.pandas.partitions import (\n    from_partitions,\n    unwrap_partitions,\n)\nfrom modin.experimental.core.io import (\n    ExperimentalCSVGlobDispatcher,\n    ExperimentalCustomTextDispatcher,\n    ExperimentalGlobDispatcher,\n    ExperimentalSQLDispatcher,\n)\nfrom modin.experimental.core.storage_formats.pandas.parsers import (\n    ExperimentalCustomTextParser,\n    ExperimentalPandasCSVGlobParser,\n    ExperimentalPandasJsonParser,\n    ExperimentalPandasParquetParser,\n    ExperimentalPandasPickleParser,\n    ExperimentalPandasXmlParser,\n)\n\nfrom ..dataframe import PandasOnRayDataframe\nfrom ..partitioning import PandasOnRayDataframePartition\n\n\nclass PandasOnRayIO(RayIO):\n    \"\"\"Factory providing methods for performing I/O operations using pandas as storage format on Ray as engine.\"\"\"\n\n    frame_cls = PandasOnRayDataframe\n    frame_partition_cls = PandasOnRayDataframePartition\n    query_compiler_cls = PandasQueryCompiler\n    build_args = dict(\n        frame_partition_cls=PandasOnRayDataframePartition,\n        query_compiler_cls=PandasQueryCompiler,\n        frame_cls=PandasOnRayDataframe,\n        base_io=RayIO,\n    )\n\n    def __make_read(*classes, build_args=build_args):\n        # used to reduce code duplication\n        return type(\"\", (RayWrapper, *classes), build_args).read\n\n    def __make_write(*classes, build_args=build_args):\n        # used to reduce code duplication\n        return type(\"\", (RayWrapper, *classes), build_args).write\n\n    read_csv = __make_read(PandasCSVParser, CSVDispatcher)\n    read_fwf = __make_read(PandasFWFParser, FWFDispatcher)\n    read_json = __make_read(PandasJSONParser, JSONDispatcher)\n    read_parquet = __make_read(PandasParquetParser, ParquetDispatcher)\n    to_parquet = __make_write(ParquetDispatcher)\n    # Blocked on pandas-dev/pandas#12236. It is faster to default to pandas.\n    # read_hdf = __make_read(PandasHDFParser, HDFReader)\n    read_feather = __make_read(PandasFeatherParser, FeatherDispatcher)\n    read_sql = __make_read(PandasSQLParser, SQLDispatcher)\n    to_sql = __make_write(SQLDispatcher)\n    read_excel = __make_read(PandasExcelParser, ExcelDispatcher)\n\n    # experimental methods that don't exist in pandas\n    read_csv_glob = __make_read(\n        ExperimentalPandasCSVGlobParser, ExperimentalCSVGlobDispatcher\n    )\n    read_parquet_glob = __make_read(\n        ExperimentalPandasParquetParser, ExperimentalGlobDispatcher\n    )\n    to_parquet_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": RayIO.to_parquet},\n    )\n    read_json_glob = __make_read(\n        ExperimentalPandasJsonParser, ExperimentalGlobDispatcher\n    )\n    to_json_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": RayIO.to_json},\n    )\n    read_xml_glob = __make_read(ExperimentalPandasXmlParser, ExperimentalGlobDispatcher)\n    to_xml_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": RayIO.to_xml},\n    )\n    read_pickle_glob = __make_read(\n        ExperimentalPandasPickleParser, ExperimentalGlobDispatcher\n    )\n    to_pickle_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": RayIO.to_pickle},\n    )\n    read_custom_text = __make_read(\n        ExperimentalCustomTextParser, ExperimentalCustomTextDispatcher\n    )\n    read_sql_distributed = __make_read(\n        ExperimentalSQLDispatcher, build_args={**build_args, \"base_read\": read_sql}\n    )\n\n    del __make_read  # to not pollute class namespace\n    del __make_write  # to not pollute class namespace\n\n    @staticmethod\n    def _to_csv_check_support(kwargs):\n        \"\"\"\n        Check if parallel version of ``to_csv`` could be used.\n\n        Parameters\n        ----------\n        kwargs : dict\n            Keyword arguments passed to ``.to_csv()``.\n\n        Returns\n        -------\n        bool\n            Whether parallel version of ``to_csv`` is applicable.\n        \"\"\"\n        path_or_buf = kwargs[\"path_or_buf\"]\n        compression = kwargs[\"compression\"]\n        if not isinstance(path_or_buf, str):\n            return False\n        # case when the pointer is placed at the beginning of the file.\n        if \"r\" in kwargs[\"mode\"] and \"+\" in kwargs[\"mode\"]:\n            return False\n        # encodings with BOM don't support;\n        # instead of one mark in result bytes we will have them by the number of partitions\n        # so we should fallback in pandas for `utf-16`, `utf-32` with all aliases, in instance\n        # (`utf_32_be`, `utf_16_le` and so on)\n        if kwargs[\"encoding\"] is not None:\n            encoding = kwargs[\"encoding\"].lower()\n            if \"u\" in encoding or \"utf\" in encoding:\n                if \"16\" in encoding or \"32\" in encoding:\n                    return False\n        if compression is None or not compression == \"infer\":\n            return False\n        if any((path_or_buf.endswith(ext) for ext in [\".gz\", \".bz2\", \".zip\", \".xz\"])):\n            return False\n        return True\n\n    @classmethod\n    def to_csv(cls, qc, **kwargs):\n        \"\"\"\n        Write records stored in the `qc` to a CSV file.\n\n        Parameters\n        ----------\n        qc : BaseQueryCompiler\n            The query compiler of the Modin dataframe that we want to run ``to_csv`` on.\n        **kwargs : dict\n            Parameters for ``pandas.to_csv(**kwargs)``.\n        \"\"\"\n        kwargs[\"path_or_buf\"] = stringify_path(kwargs[\"path_or_buf\"])\n        if not cls._to_csv_check_support(kwargs):\n            return RayIO.to_csv(qc, **kwargs)\n\n        signals = SignalActor.options(resources=RayTaskCustomResources.get()).remote(\n            len(qc._modin_frame._partitions) + 1\n        )\n\n        def func(df, **kw):  # pragma: no cover\n            \"\"\"\n            Dump a chunk of rows as csv, then save them to target maintaining order.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                A chunk of rows to write to a CSV file.\n            **kw : dict\n                Arguments to pass to ``pandas.to_csv(**kw)`` plus an extra argument\n                `partition_idx` serving as chunk index to maintain rows order.\n            \"\"\"\n            partition_idx = kw[\"partition_idx\"]\n            # the copy is made to not implicitly change the input parameters;\n            # to write to an intermediate buffer, we need to change `path_or_buf` in kwargs\n            csv_kwargs = kwargs.copy()\n            if partition_idx != 0:\n                # we need to create a new file only for first recording\n                # all the rest should be recorded in appending mode\n                if \"w\" in csv_kwargs[\"mode\"]:\n                    csv_kwargs[\"mode\"] = csv_kwargs[\"mode\"].replace(\"w\", \"a\")\n                # It is enough to write the header for the first partition\n                csv_kwargs[\"header\"] = False\n\n            # for parallelization purposes, each partition is written to an intermediate buffer\n            path_or_buf = csv_kwargs[\"path_or_buf\"]\n            is_binary = \"b\" in csv_kwargs[\"mode\"]\n            csv_kwargs[\"path_or_buf\"] = io.BytesIO() if is_binary else io.StringIO()\n            storage_options = csv_kwargs.pop(\"storage_options\", None)\n            df.to_csv(**csv_kwargs)\n            csv_kwargs.update({\"storage_options\": storage_options})\n            content = csv_kwargs[\"path_or_buf\"].getvalue()\n            csv_kwargs[\"path_or_buf\"].close()\n\n            # each process waits for its turn to write to a file\n            RayWrapper.materialize(signals.wait.remote(partition_idx))\n\n            # preparing to write data from the buffer to a file\n            with get_handle(\n                path_or_buf,\n                # in case when using URL in implicit text mode\n                # pandas try to open `path_or_buf` in binary mode\n                csv_kwargs[\"mode\"] if is_binary else csv_kwargs[\"mode\"] + \"t\",\n                encoding=kwargs[\"encoding\"],\n                errors=kwargs[\"errors\"],\n                compression=kwargs[\"compression\"],\n                storage_options=kwargs.get(\"storage_options\", None),\n                is_text=not is_binary,\n            ) as handles:\n                handles.handle.write(content)\n\n            # signal that the next process can start writing to the file\n            RayWrapper.materialize(signals.send.remote(partition_idx + 1))\n            # used for synchronization purposes\n            return pandas.DataFrame()\n\n        # signaling that the partition with id==0 can be written to the file\n        RayWrapper.materialize(signals.send.remote(0))\n        # Ensure that the metadata is syncrhonized\n        qc._modin_frame._propagate_index_objs(axis=None)\n        result = qc._modin_frame._partition_mgr_cls.map_axis_partitions(\n            axis=1,\n            partitions=qc._modin_frame._partitions,\n            map_func=func,\n            keep_partitioning=True,\n            lengths=None,\n            enumerate_partitions=True,\n            max_retries=0,\n        )\n        # pending completion\n        RayWrapper.materialize(\n            [part.list_of_blocks[0] for row in result for part in row]\n        )\n\n    @classmethod\n    def from_ray(cls, ray_obj):\n        \"\"\"\n        Create a Modin `query_compiler` from a Ray Dataset.\n\n        Parameters\n        ----------\n        ray_obj : ray.data.Dataset\n            The Ray Dataset to convert from.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the Ray Dataset.\n        \"\"\"\n        pd_objs = ray_obj.to_pandas_refs()\n        return from_partitions(pd_objs, axis=0)._query_compiler\n\n    @classmethod\n    def to_ray(cls, modin_obj):\n        \"\"\"\n        Convert a Modin DataFrame/Series to a Ray Dataset.\n\n        Parameters\n        ----------\n        modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n            The Modin DataFrame/Series to convert.\n\n        Returns\n        -------\n        ray.data.Dataset\n            Converted object with type depending on input.\n        \"\"\"\n        parts = unwrap_partitions(modin_obj, axis=0)\n        return from_pandas_refs(parts)\n\n    @classmethod\n    def from_map(cls, func, iterable, *args, **kwargs):\n        \"\"\"\n        Create a Modin `query_compiler` from a map function.\n\n        This method will construct a Modin `query_compiler` split by row partitions.\n        The number of row partitions matches the number of elements in the iterable object.\n\n        Parameters\n        ----------\n        func : callable\n            Function to map across the iterable object.\n        iterable : Iterable\n            An iterable object.\n        *args : tuple\n            Positional arguments to pass in `func`.\n        **kwargs : dict\n            Keyword arguments to pass in `func`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data returned by map function.\n        \"\"\"\n        func = cls.frame_cls._partition_mgr_cls.preprocess_func(func)\n        partitions = np.array(\n            [\n                [\n                    cls.frame_partition_cls(\n                        RayWrapper.deploy(\n                            func, f_args=(obj,) + args, return_pandas_df=True, **kwargs\n                        )\n                    )\n                ]\n                for obj in iterable\n            ]\n        )\n        return cls.query_compiler_cls(cls.frame_cls(partitions))\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes related to its partitioning and optimized for pandas on Ray execution.\"\"\"\n\nfrom .partition import PandasOnRayDataframePartition\nfrom .partition_manager import PandasOnRayDataframePartitionManager\nfrom .virtual_partition import (\n    PandasOnRayDataframeColumnPartition,\n    PandasOnRayDataframeRowPartition,\n    PandasOnRayDataframeVirtualPartition,\n)\n\n__all__ = [\n    \"PandasOnRayDataframePartition\",\n    \"PandasOnRayDataframePartitionManager\",\n    \"PandasOnRayDataframeVirtualPartition\",\n    \"PandasOnRayDataframeColumnPartition\",\n    \"PandasOnRayDataframeRowPartition\",\n]\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that wraps data (block partition) and its metadata.\"\"\"\n\nfrom typing import Callable, Union\n\nimport pandas\nimport ray\n\nfrom modin.config import LazyExecution, RayTaskCustomResources\nfrom modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition\nfrom modin.core.execution.ray.common import MaterializationHook, RayWrapper\nfrom modin.core.execution.ray.common.deferred_execution import (\n    DeferredExecution,\n    MetaList,\n    MetaListHook,\n)\nfrom modin.core.execution.ray.common.utils import ObjectIDType\nfrom modin.logging import disable_logging, get_logger\nfrom modin.pandas.indexing import compute_sliced_len\nfrom modin.utils import _inherit_docstrings\n\n\nclass PandasOnRayDataframePartition(PandasDataframePartition):\n    \"\"\"\n    The class implements the interface in ``PandasDataframePartition``.\n\n    Parameters\n    ----------\n    data : ObjectIDType or DeferredExecution\n        A reference to ``pandas.DataFrame`` that needs to be wrapped with this class\n        or a reference to DeferredExecution that needs to be executed on demand.\n    length : ObjectIDType or int, optional\n        Length or reference to it of wrapped ``pandas.DataFrame``.\n    width : ObjectIDType or int, optional\n        Width or reference to it of wrapped ``pandas.DataFrame``.\n    ip : ObjectIDType or str, optional\n        Node IP address or reference to it that holds wrapped ``pandas.DataFrame``.\n    meta : MetaList\n        Meta information, containing the lengths and the worker address (the last value).\n    meta_offset : int\n        The lengths offset in the meta list.\n    \"\"\"\n\n    execution_wrapper = RayWrapper\n\n    def __init__(\n        self,\n        data: Union[ray.ObjectRef, DeferredExecution],\n        length: int = None,\n        width: int = None,\n        ip: str = None,\n        meta: MetaList = None,\n        meta_offset: int = 0,\n    ):\n        super().__init__()\n        if isinstance(data, DeferredExecution):\n            data.subscribe()\n        self._data_ref = data\n        # The metadata is stored in the MetaList at 0 offset. If the data is\n        # a DeferredExecution, the _meta will be replaced with the list, returned\n        # by the remote function. The returned list may contain data for multiple\n        # results and, in this case, _meta_offset corresponds to the meta related to\n        # this partition.\n        if meta is None:\n            self._meta = MetaList([length, width, ip])\n            self._meta_offset = 0\n        else:\n            self._meta = meta\n            self._meta_offset = meta_offset\n\n        log = get_logger()\n        self._is_debug(log) and log.debug(\n            \"Partition ID: {}, Height: {}, Width: {}, Node IP: {}\".format(\n                self._identity,\n                str(self._length_cache),\n                str(self._width_cache),\n                str(self._ip_cache),\n            )\n        )\n\n    @disable_logging\n    def __del__(self):\n        \"\"\"Unsubscribe from DeferredExecution.\"\"\"\n        if isinstance(self._data_ref, DeferredExecution):\n            self._data_ref.unsubscribe()\n\n    def apply(self, func: Union[Callable, ray.ObjectRef], *args, **kwargs):\n        \"\"\"\n        Apply a function to the object wrapped by this partition.\n\n        Parameters\n        ----------\n        func : callable or ray.ObjectRef\n            A function to apply.\n        *args : iterable\n            Additional positional arguments to be passed in `func`.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `func`.\n\n        Returns\n        -------\n        PandasOnRayDataframePartition\n            A new ``PandasOnRayDataframePartition`` object.\n\n        Notes\n        -----\n        It does not matter if `func` is callable or an ``ray.ObjectRef``. Ray will\n        handle it correctly either way. The keyword arguments are sent as a dictionary.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.apply::{self._identity}\")\n        de = DeferredExecution(self._data_ref, func, args, kwargs)\n        data, meta, meta_offset = de.exec()\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.apply::{self._identity}\")\n        return self.__constructor__(data, meta=meta, meta_offset=meta_offset)\n\n    @_inherit_docstrings(PandasDataframePartition.add_to_apply_calls)\n    def add_to_apply_calls(\n        self,\n        func: Union[Callable, ray.ObjectRef],\n        *args,\n        length=None,\n        width=None,\n        **kwargs,\n    ):\n        return self.__constructor__(\n            data=DeferredExecution(self._data_ref, func, args, kwargs),\n            length=length,\n            width=width,\n        )\n\n    @_inherit_docstrings(PandasDataframePartition.drain_call_queue)\n    def drain_call_queue(self):\n        data = self._data_ref\n        if not isinstance(data, DeferredExecution):\n            return data\n\n        log = get_logger()\n        self._is_debug(log) and log.debug(\n            f\"ENTER::Partition.drain_call_queue::{self._identity}\"\n        )\n        self._data_ref, self._meta, self._meta_offset = data.exec()\n        self._is_debug(log) and log.debug(\n            f\"EXIT::Partition.drain_call_queue::{self._identity}\"\n        )\n\n    @_inherit_docstrings(PandasDataframePartition.wait)\n    def wait(self):\n        self.drain_call_queue()\n        RayWrapper.wait(self._data_ref)\n\n    def __copy__(self):\n        \"\"\"\n        Create a copy of this partition.\n\n        Returns\n        -------\n        PandasOnRayDataframePartition\n            A copy of this partition.\n        \"\"\"\n        return self.__constructor__(\n            self._data_ref,\n            meta=self._meta,\n            meta_offset=self._meta_offset,\n        )\n\n    def mask(self, row_labels, col_labels):\n        \"\"\"\n        Lazily create a mask that extracts the indices provided.\n\n        Parameters\n        ----------\n        row_labels : list-like, slice or label\n            The row labels for the rows to extract.\n        col_labels : list-like, slice or label\n            The column labels for the columns to extract.\n\n        Returns\n        -------\n        PandasOnRayDataframePartition\n            A new ``PandasOnRayDataframePartition`` object.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.mask::{self._identity}\")\n        new_obj = super().mask(row_labels, col_labels)\n        if isinstance(row_labels, slice) and isinstance(\n            (len_cache := self._length_cache), ObjectIDType\n        ):\n            if row_labels == slice(None):\n                # fast path - full axis take\n                new_obj._length_cache = len_cache\n            else:\n                new_obj._length_cache = SlicerHook(len_cache, row_labels)\n        if isinstance(col_labels, slice) and isinstance(\n            (width_cache := self._width_cache), ObjectIDType\n        ):\n            if col_labels == slice(None):\n                # fast path - full axis take\n                new_obj._width_cache = width_cache\n            else:\n                new_obj._width_cache = SlicerHook(width_cache, col_labels)\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.mask::{self._identity}\")\n        return new_obj\n\n    @classmethod\n    def put(cls, obj: pandas.DataFrame):\n        \"\"\"\n        Put the data frame into Plasma store and wrap it with partition object.\n\n        Parameters\n        ----------\n        obj : pandas.DataFrame\n            A data frame to be put.\n\n        Returns\n        -------\n        PandasOnRayDataframePartition\n            A new ``PandasOnRayDataframePartition`` object.\n        \"\"\"\n        return cls(cls.execution_wrapper.put(obj), len(obj.index), len(obj.columns))\n\n    @classmethod\n    def preprocess_func(cls, func):\n        \"\"\"\n        Put a function into the Plasma store to use in ``apply``.\n\n        Parameters\n        ----------\n        func : callable\n            A function to preprocess.\n\n        Returns\n        -------\n        ray.ObjectRef\n            A reference to `func`.\n        \"\"\"\n        return cls.execution_wrapper.put(func)\n\n    def length(self, materialize=True):\n        \"\"\"\n        Get the length of the object wrapped by this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or ray.ObjectRef\n            The length of the object.\n        \"\"\"\n        if (length := self._length_cache) is None:\n            self.drain_call_queue()\n            if (length := self._length_cache) is None:\n                length, self._width_cache = _get_index_and_columns.options(\n                    resources=RayTaskCustomResources.get()\n                ).remote(self._data_ref)\n                self._length_cache = length\n        if materialize and isinstance(length, ObjectIDType):\n            self._length_cache = length = RayWrapper.materialize(length)\n        return length\n\n    def width(self, materialize=True):\n        \"\"\"\n        Get the width of the object wrapped by the partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or ray.ObjectRef\n            The width of the object.\n        \"\"\"\n        if (width := self._width_cache) is None:\n            self.drain_call_queue()\n            if (width := self._width_cache) is None:\n                self._length_cache, width = _get_index_and_columns.options(\n                    resources=RayTaskCustomResources.get()\n                ).remote(self._data_ref)\n                self._width_cache = width\n        if materialize and isinstance(width, ObjectIDType):\n            self._width_cache = width = RayWrapper.materialize(width)\n        return width\n\n    def ip(self, materialize=True):\n        \"\"\"\n        Get the node IP address of the object wrapped by this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        str\n            IP address of the node that holds the data.\n        \"\"\"\n        if (ip := self._ip_cache) is None:\n            self.drain_call_queue()\n        if materialize and isinstance(ip, ObjectIDType):\n            self._ip_cache = ip = RayWrapper.materialize(ip)\n        return ip\n\n    @property\n    def _data(self) -> ray.ObjectRef:  # noqa: GL08\n        self.drain_call_queue()\n        return self._data_ref\n\n    @property\n    def _length_cache(self):  # noqa: GL08\n        return self._meta[self._meta_offset]\n\n    @_length_cache.setter\n    def _length_cache(self, value):  # noqa: GL08\n        self._meta[self._meta_offset] = value\n\n    @property\n    def _width_cache(self):  # noqa: GL08\n        return self._meta[self._meta_offset + 1]\n\n    @_width_cache.setter\n    def _width_cache(self, value):  # noqa: GL08\n        self._meta[self._meta_offset + 1] = value\n\n    @property\n    def _ip_cache(self):  # noqa: GL08\n        return self._meta[-1]\n\n    @_ip_cache.setter\n    def _ip_cache(self, value):  # noqa: GL08\n        self._meta[-1] = value\n\n\n@ray.remote(num_returns=2)\ndef _get_index_and_columns(df):  # pragma: no cover\n    \"\"\"\n    Get the number of rows and columns of a pandas DataFrame.\n\n    Parameters\n    ----------\n    df : pandas.DataFrame\n        A pandas DataFrame which dimensions are needed.\n\n    Returns\n    -------\n    int\n        The number of rows.\n    int\n        The number of columns.\n    \"\"\"\n    return len(df.index), len(df.columns)\n\n\nPandasOnRayDataframePartition._eager_exec_func = PandasOnRayDataframePartition.apply\nPandasOnRayDataframePartition._lazy_exec_func = (\n    PandasOnRayDataframePartition.add_to_apply_calls\n)\n\n\ndef _configure_lazy_exec(cls: LazyExecution):\n    \"\"\"Configure lazy execution mode for PandasOnRayDataframePartition.\"\"\"\n    mode = cls.get()\n    get_logger().debug(f\"Ray lazy execution mode: {mode}\")\n    if mode == \"Auto\":\n        PandasOnRayDataframePartition.apply = (\n            PandasOnRayDataframePartition._eager_exec_func\n        )\n        PandasOnRayDataframePartition.add_to_apply_calls = (\n            PandasOnRayDataframePartition._lazy_exec_func\n        )\n    elif mode == \"On\":\n\n        def lazy_exec(self, func, *args, **kwargs):\n            return self._lazy_exec_func(func, *args, length=None, width=None, **kwargs)\n\n        PandasOnRayDataframePartition.apply = lazy_exec\n        PandasOnRayDataframePartition.add_to_apply_calls = (\n            PandasOnRayDataframePartition._lazy_exec_func\n        )\n    elif mode == \"Off\":\n\n        def eager_exec(self, func, *args, length=None, width=None, **kwargs):\n            return self._eager_exec_func(func, *args, **kwargs)\n\n        PandasOnRayDataframePartition.apply = (\n            PandasOnRayDataframePartition._eager_exec_func\n        )\n        PandasOnRayDataframePartition.add_to_apply_calls = eager_exec\n    else:\n        raise ValueError(f\"Invalid lazy execution mode: {mode}\")\n\n\nLazyExecution.subscribe(_configure_lazy_exec)\n\n\nclass SlicerHook(MaterializationHook):\n    \"\"\"\n    Used by mask() for the slilced length computation.\n\n    Parameters\n    ----------\n    ref : ObjectIDType\n        Non-materialized length to be sliced.\n    slc : slice\n        The slice to be applied.\n    \"\"\"\n\n    def __init__(self, ref: ObjectIDType, slc: slice):\n        self.ref = ref\n        self.slc = slc\n\n    def pre_materialize(self):\n        \"\"\"\n        Get the sliced length or object ref if not materialized.\n\n        Returns\n        -------\n        int or ObjectIDType\n        \"\"\"\n        if isinstance(self.ref, MetaListHook):\n            len_or_ref = self.ref.pre_materialize()\n            return (\n                compute_sliced_len(self.slc, len_or_ref)\n                if isinstance(len_or_ref, int)\n                else len_or_ref\n            )\n        return self.ref\n\n    def post_materialize(self, materialized):\n        \"\"\"\n        Get the sliced length.\n\n        Parameters\n        ----------\n        materialized : list or int\n\n        Returns\n        -------\n        int\n        \"\"\"\n        if isinstance(self.ref, MetaListHook):\n            materialized = self.ref.post_materialize(materialized)\n        return compute_sliced_len(self.slc, materialized)\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition_manager.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that implements ``GenericRayDataframePartitionManager`` using Ray.\"\"\"\n\nimport numpy as np\nfrom pandas.core.dtypes.common import is_numeric_dtype\n\nfrom modin.config import AsyncReadMode\nfrom modin.core.execution.modin_aqp import progress_bar_wrapper\nfrom modin.core.execution.ray.common import RayWrapper\nfrom modin.core.execution.ray.generic.partitioning import (\n    GenericRayDataframePartitionManager,\n)\nfrom modin.logging import get_logger\nfrom modin.utils import _inherit_docstrings\n\nfrom .partition import PandasOnRayDataframePartition\nfrom .virtual_partition import (\n    PandasOnRayDataframeColumnPartition,\n    PandasOnRayDataframeRowPartition,\n)\n\n\nclass PandasOnRayDataframePartitionManager(GenericRayDataframePartitionManager):\n    \"\"\"The class implements the interface in `PandasDataframePartitionManager`.\"\"\"\n\n    # This object uses RayRemotePartition objects as the underlying store.\n    _partition_class = PandasOnRayDataframePartition\n    _column_partitions_class = PandasOnRayDataframeColumnPartition\n    _row_partition_class = PandasOnRayDataframeRowPartition\n    _execution_wrapper = RayWrapper\n    materialize_futures = RayWrapper.materialize\n\n    @classmethod\n    def wait_partitions(cls, partitions):\n        \"\"\"\n        Wait on the objects wrapped by `partitions` in parallel, without materializing them.\n\n        This method will block until all computations in the list have completed.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            NumPy array with ``PandasDataframePartition``-s.\n        \"\"\"\n        RayWrapper.wait(\n            [block for partition in partitions for block in partition.list_of_blocks]\n        )\n\n    @classmethod\n    @_inherit_docstrings(\n        GenericRayDataframePartitionManager.split_pandas_df_into_partitions\n    )\n    def split_pandas_df_into_partitions(\n        cls, df, row_chunksize, col_chunksize, update_bar\n    ):\n        # it was found out, that with the following condition it's more beneficial\n        # to use the distributed splitting, let's break them down:\n        #   1. The distributed splitting is used only when there's more than 6mln elements\n        #   in the `df`, as with fewer data it's better to use the sequential splitting\n        #   2. Only used with numerical data, as with other dtypes, putting the whole big\n        #   dataframe into the storage takes too much time.\n        #   3. The distributed splitting consumes more memory that the sequential one.\n        #   It was estimated that it requires ~2.5x of the dataframe size, for now there\n        #   was no good way found to automatically fall back to the sequential\n        #   implementation in case of not enough memory, so currently we're enabling\n        #   the distributed version only if 'AsyncReadMode' is set to True. Follow this\n        #   discussion for more info on why automatical dispatching is hard:\n        #   https://github.com/modin-project/modin/pull/6640#issuecomment-1759932664\n        enough_elements = (len(df) * len(df.columns)) > 6_000_000\n        all_numeric_types = all(is_numeric_dtype(dtype) for dtype in df.dtypes)\n        async_mode_on = AsyncReadMode.get()\n\n        distributed_splitting = enough_elements and all_numeric_types and async_mode_on\n\n        log = get_logger()\n\n        if not distributed_splitting:\n            log.info(\n                \"Using sequential splitting in '.from_pandas()' because of some of the conditions are False: \"\n                + f\"{enough_elements=}; {all_numeric_types=}; {async_mode_on=}\"\n            )\n            return super().split_pandas_df_into_partitions(\n                df, row_chunksize, col_chunksize, update_bar\n            )\n\n        log.info(\"Using distributed splitting in '.from_pandas()'\")\n        put_func = cls._partition_class.put\n\n        def mask(part, row_loc, col_loc):\n            # 2D iloc works surprisingly slow, so doing this chained iloc calls:\n            # https://github.com/pandas-dev/pandas/issues/55202\n            return part.apply(lambda df: df.iloc[row_loc, :].iloc[:, col_loc])\n\n        main_part = put_func(df)\n        parts = [\n            [\n                update_bar(\n                    mask(\n                        main_part,\n                        slice(i, i + row_chunksize),\n                        slice(j, j + col_chunksize),\n                    ),\n                )\n                for j in range(0, len(df.columns), col_chunksize)\n            ]\n            for i in range(0, len(df), row_chunksize)\n        ]\n        return np.array(parts)\n\n\ndef _make_wrapped_method(name: str):\n    \"\"\"\n    Define new attribute that should work with progress bar.\n\n    Parameters\n    ----------\n    name : str\n        Name of `GenericRayDataframePartitionManager` attribute that should be reused.\n\n    Notes\n    -----\n    - `classmethod` decorator shouldn't be applied twice, so we refer to `__func__` attribute.\n    - New attribute is defined for `PandasOnRayDataframePartitionManager`.\n    \"\"\"\n    setattr(\n        PandasOnRayDataframePartitionManager,\n        name,\n        classmethod(\n            progress_bar_wrapper(\n                getattr(GenericRayDataframePartitionManager, name).__func__\n            )\n        ),\n    )\n\n\nfor method in (\n    \"map_partitions\",\n    \"lazy_map_partitions\",\n    \"map_axis_partitions\",\n    \"_apply_func_to_list_of_partitions\",\n    \"apply_func_to_select_indices\",\n    \"apply_func_to_select_indices_along_full_axis\",\n    \"apply_func_to_indices_both_axis\",\n    \"n_ary_operation\",\n):\n    _make_wrapped_method(method)\n"
  },
  {
    "path": "modin/core/execution/ray/implementations/pandas_on_ray/partitioning/virtual_partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses classes responsible for storing a virtual partition and applying a function to it.\"\"\"\n\nimport pandas\nimport ray\nfrom ray.util import get_node_ip_address\n\nfrom modin.config import RayTaskCustomResources\nfrom modin.core.dataframe.pandas.partitioning.axis_partition import (\n    PandasDataframeAxisPartition,\n)\nfrom modin.core.execution.ray.common import RayWrapper\nfrom modin.utils import _inherit_docstrings\n\nfrom .partition import PandasOnRayDataframePartition\n\n\nclass PandasOnRayDataframeVirtualPartition(PandasDataframeAxisPartition):\n    \"\"\"\n    The class implements the interface in ``PandasDataframeAxisPartition``.\n\n    Parameters\n    ----------\n    list_of_partitions : Union[list, PandasOnRayDataframePartition]\n        List of ``PandasOnRayDataframePartition`` and\n        ``PandasOnRayDataframeVirtualPartition`` objects, or a single\n        ``PandasOnRayDataframePartition``.\n    get_ip : bool, default: False\n        Whether to get node IP addresses to conforming partitions or not.\n    full_axis : bool, default: True\n        Whether or not the virtual partition encompasses the whole axis.\n    call_queue : list, optional\n        A list of tuples (callable, args, kwargs) that contains deferred calls.\n    length : ray.ObjectRef or int, optional\n        Length, or reference to length, of wrapped ``pandas.DataFrame``.\n    width : ray.ObjectRef or int, optional\n        Width, or reference to width, of wrapped ``pandas.DataFrame``.\n    \"\"\"\n\n    _PARTITIONS_METADATA_LEN = 3  # (length, width, ip)\n    partition_type = PandasOnRayDataframePartition\n    axis = None\n\n    # these variables are intentionally initialized at runtime (see #6023)\n    _DEPLOY_AXIS_FUNC = None\n    _DEPLOY_SPLIT_FUNC = None\n    _DRAIN_FUNC = None\n\n    @classmethod\n    def _get_deploy_axis_func(cls):  # noqa: GL08\n        if cls._DEPLOY_AXIS_FUNC is None:\n            cls._DEPLOY_AXIS_FUNC = RayWrapper.put(\n                PandasDataframeAxisPartition.deploy_axis_func\n            )\n        return cls._DEPLOY_AXIS_FUNC\n\n    @classmethod\n    def _get_deploy_split_func(cls):  # noqa: GL08\n        if cls._DEPLOY_SPLIT_FUNC is None:\n            cls._DEPLOY_SPLIT_FUNC = RayWrapper.put(\n                PandasDataframeAxisPartition.deploy_splitting_func\n            )\n        return cls._DEPLOY_SPLIT_FUNC\n\n    @classmethod\n    def _get_drain_func(cls):  # noqa: GL08\n        if cls._DRAIN_FUNC is None:\n            cls._DRAIN_FUNC = RayWrapper.put(PandasDataframeAxisPartition.drain)\n        return cls._DRAIN_FUNC\n\n    @property\n    def list_of_ips(self):\n        \"\"\"\n        Get the IPs holding the physical objects composing this partition.\n\n        Returns\n        -------\n        List\n            A list of IPs as ``ray.ObjectRef`` or str.\n        \"\"\"\n        # Defer draining call queue until we get the ip address\n        result = [None] * len(self.list_of_block_partitions)\n        for idx, partition in enumerate(self.list_of_block_partitions):\n            partition.drain_call_queue()\n            result[idx] = partition.ip(materialize=False)\n        return result\n\n    @classmethod\n    @_inherit_docstrings(PandasDataframeAxisPartition.deploy_splitting_func)\n    def deploy_splitting_func(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        *partitions,\n        extract_metadata=False,\n    ):\n        return _deploy_ray_func.options(\n            num_returns=(\n                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)\n                if extract_metadata\n                else num_splits\n            ),\n            resources=RayTaskCustomResources.get(),\n        ).remote(\n            cls._get_deploy_split_func(),\n            *f_args,\n            num_splits,\n            *partitions,\n            axis=axis,\n            f_to_deploy=func,\n            f_len_args=len(f_args),\n            f_kwargs=f_kwargs,\n            extract_metadata=extract_metadata,\n        )\n\n    @classmethod\n    def deploy_axis_func(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        maintain_partitioning,\n        *partitions,\n        min_block_size,\n        lengths=None,\n        manual_partition=False,\n        max_retries=None,\n    ):\n        \"\"\"\n        Deploy a function along a full axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see ``split_result_of_axis_func_pandas``).\n        maintain_partitioning : bool\n            If True, keep the old partitioning if possible.\n            If False, create a new partition layout.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column).\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n        lengths : list, optional\n            The list of lengths to shuffle the object.\n        manual_partition : bool, default: False\n            If True, partition the result with `lengths`.\n        max_retries : int, default: None\n            The max number of times to retry the func.\n\n        Returns\n        -------\n        list\n            A list of ``ray.ObjectRef``-s.\n        \"\"\"\n        return _deploy_ray_func.options(\n            num_returns=(num_splits if lengths is None else len(lengths))\n            * (1 + cls._PARTITIONS_METADATA_LEN),\n            **({\"max_retries\": max_retries} if max_retries is not None else {}),\n            resources=RayTaskCustomResources.get(),\n        ).remote(\n            cls._get_deploy_axis_func(),\n            *f_args,\n            num_splits,\n            maintain_partitioning,\n            *partitions,\n            axis=axis,\n            f_to_deploy=func,\n            f_len_args=len(f_args),\n            f_kwargs=f_kwargs,\n            manual_partition=manual_partition,\n            min_block_size=min_block_size,\n            lengths=lengths,\n            return_generator=True,\n        )\n\n    @classmethod\n    def deploy_func_between_two_axis_partitions(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        len_of_left,\n        other_shape,\n        *partitions,\n        min_block_size,\n    ):\n        \"\"\"\n        Deploy a function along a full axis between two data sets.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see ``split_result_of_axis_func_pandas``).\n        len_of_left : int\n            The number of values in `partitions` that belong to the left data set.\n        other_shape : np.ndarray\n            The shape of right frame in terms of partitions, i.e.\n            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column) for both data sets.\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n\n        Returns\n        -------\n        list\n            A list of ``ray.ObjectRef``-s.\n        \"\"\"\n        return _deploy_ray_func.options(\n            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN),\n            resources=RayTaskCustomResources.get(),\n        ).remote(\n            PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions,\n            *f_args,\n            num_splits,\n            len_of_left,\n            other_shape,\n            *partitions,\n            axis=axis,\n            f_to_deploy=func,\n            f_len_args=len(f_args),\n            f_kwargs=f_kwargs,\n            min_block_size=min_block_size,\n            return_generator=True,\n        )\n\n    def wait(self):\n        \"\"\"Wait completing computations on the object wrapped by the partition.\"\"\"\n        self.drain_call_queue()\n        futures = self.list_of_blocks\n        RayWrapper.wait(futures)\n\n\n@_inherit_docstrings(PandasOnRayDataframeVirtualPartition)\nclass PandasOnRayDataframeColumnPartition(PandasOnRayDataframeVirtualPartition):\n    axis = 0\n\n\n@_inherit_docstrings(PandasOnRayDataframeVirtualPartition)\nclass PandasOnRayDataframeRowPartition(PandasOnRayDataframeVirtualPartition):\n    axis = 1\n\n\n@ray.remote\ndef _deploy_ray_func(\n    deployer,\n    *positional_args,\n    axis,\n    f_to_deploy,\n    f_len_args,\n    f_kwargs,\n    extract_metadata=True,\n    **kwargs,\n):  # pragma: no cover\n    \"\"\"\n    Execute a function on an axis partition in a worker process.\n\n    This is ALWAYS called on either ``PandasDataframeAxisPartition.deploy_axis_func``\n    or ``PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions``, which both\n    serve to deploy another dataframe function on a Ray worker process. The provided `positional_args`\n    contains positional arguments for both: `deployer` and for `f_to_deploy`, the parameters can be separated\n    using the `f_len_args` value. The parameters are combined so they will be deserialized by Ray before the\n    kernel is executed (`f_kwargs` will never contain more Ray objects, and thus does not require deserialization).\n\n    Parameters\n    ----------\n    deployer : callable\n        A `PandasDataFrameAxisPartition.deploy_*` method that will call ``f_to_deploy``.\n    *positional_args : list\n        The first `f_len_args` elements in this list represent positional arguments\n        to pass to the `f_to_deploy`. The rest are positional arguments that will be\n        passed to `deployer`.\n    axis : {0, 1}\n        The axis to perform the function along. This argument is keyword only.\n    f_to_deploy : callable or RayObjectID\n        The function to deploy. This argument is keyword only.\n    f_len_args : int\n        Number of positional arguments to pass to ``f_to_deploy``. This argument is keyword only.\n    f_kwargs : dict\n        Keyword arguments to pass to ``f_to_deploy``. This argument is keyword only.\n    extract_metadata : bool, default: True\n        Whether to return metadata (length, width, ip) of the result. Passing `False` may relax\n        the load on object storage as the remote function would return 4 times fewer futures.\n        Passing `False` makes sense for temporary results where you know for sure that the\n        metadata will never be requested. This argument is keyword only.\n    **kwargs : dict\n        Keyword arguments to pass to ``deployer``.\n\n    Returns\n    -------\n    list : Union[tuple, list]\n        The result of the function call, and metadata for it.\n\n    Notes\n    -----\n    Ray functions are not detected by codecov (thus pragma: no cover).\n    \"\"\"\n    f_args = positional_args[:f_len_args]\n    deploy_args = positional_args[f_len_args:]\n    result = deployer(axis, f_to_deploy, f_args, f_kwargs, *deploy_args, **kwargs)\n\n    if not extract_metadata:\n        for item in result:\n            yield item\n    else:\n        ip = get_node_ip_address()\n        for r in result:\n            if isinstance(r, pandas.DataFrame):\n                for item in [r, len(r), len(r.columns), ip]:\n                    yield item\n            else:\n                for item in [r, None, None, ip]:\n                    yield item\n"
  },
  {
    "path": "modin/core/execution/unidist/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to unidist execution engine.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/unidist/common/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Common utilities for unidist execution engine.\"\"\"\n\nfrom .engine_wrapper import SignalActor, UnidistWrapper\nfrom .utils import initialize_unidist\n\n__all__ = [\n    \"initialize_unidist\",\n    \"UnidistWrapper\",\n    \"SignalActor\",\n]\n"
  },
  {
    "path": "modin/core/execution/unidist/common/engine_wrapper.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nThe module with helper mixin for executing functions remotely.\n\nTo be used as a piece of building a unidist-based engine.\n\"\"\"\n\nimport asyncio\n\nimport pandas\nimport unidist\n\n\n@unidist.remote\ndef _deploy_unidist_func(\n    func, *args, return_pandas_df=None, **kwargs\n):  # pragma: no cover\n    \"\"\"\n    Wrap `func` to ease calling it remotely.\n\n    Parameters\n    ----------\n    func : callable\n        A local function that we want to call remotely.\n    *args : iterable\n        Positional arguments to pass to `func` when calling remotely.\n    return_pandas_df : bool, optional\n        Whether to convert the result of `func` to a pandas DataFrame or not.\n    **kwargs : dict\n        Keyword arguments to pass to `func` when calling remotely.\n\n    Returns\n    -------\n    unidist.ObjectRef or list[unidist.ObjectRef]\n        Unidist identifier of the result being put to object store.\n    \"\"\"\n    result = func(*args, **kwargs)\n    if return_pandas_df and not isinstance(result, pandas.DataFrame):\n        result = pandas.DataFrame(result)\n    return result\n\n\nclass UnidistWrapper:\n    \"\"\"Mixin that provides means of running functions remotely and getting local results.\"\"\"\n\n    @classmethod\n    def deploy(\n        cls, func, f_args=None, f_kwargs=None, return_pandas_df=None, num_returns=1\n    ):\n        \"\"\"\n        Run local `func` remotely.\n\n        Parameters\n        ----------\n        func : callable or unidist.ObjectRef\n            The function to perform.\n        f_args : list or tuple, optional\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict, optional\n            Keyword arguments to pass to ``func``.\n        return_pandas_df : bool, optional\n            Whether to convert the result of `func` to a pandas DataFrame or not.\n        num_returns : int, default: 1\n            Amount of return values expected from `func`.\n\n        Returns\n        -------\n        unidist.ObjectRef or list\n            Unidist identifier of the result being put to object store.\n        \"\"\"\n        args = [] if f_args is None else f_args\n        kwargs = {} if f_kwargs is None else f_kwargs\n        return _deploy_unidist_func.options(num_returns=num_returns).remote(\n            func, *args, return_pandas_df=return_pandas_df, **kwargs\n        )\n\n    @classmethod\n    def is_future(cls, item):\n        \"\"\"\n        Check if the item is a Future.\n\n        Parameters\n        ----------\n        item : unidist.ObjectRef or object\n            Future or object to check.\n\n        Returns\n        -------\n        boolean\n            If the value is a future.\n        \"\"\"\n        return unidist.is_object_ref(item)\n\n    @classmethod\n    def materialize(cls, obj_id):\n        \"\"\"\n        Get the value of object from the object store.\n\n        Parameters\n        ----------\n        obj_id : unidist.ObjectRef\n            Unidist object identifier to get the value by.\n\n        Returns\n        -------\n        object\n            Whatever was identified by `obj_id`.\n        \"\"\"\n        return unidist.get(obj_id)\n\n    @classmethod\n    def put(cls, data, **kwargs):\n        \"\"\"\n        Put data into the object store.\n\n        Parameters\n        ----------\n        data : object\n            Data to be put.\n        **kwargs : dict\n            Additional keyword arguments (mostly for compatibility).\n\n        Returns\n        -------\n        unidist.ObjectRef\n            A reference to `data`.\n        \"\"\"\n        return unidist.put(data)\n\n    @classmethod\n    def wait(cls, obj_ids, num_returns=None):\n        \"\"\"\n        Wait on the objects without materializing them (blocking operation).\n\n        ``unidist.wait`` assumes a list of unique object references: see\n        https://github.com/modin-project/modin/issues/5045\n\n        Parameters\n        ----------\n        obj_ids : list, scalar\n        num_returns : int, optional\n        \"\"\"\n        if not isinstance(obj_ids, list):\n            obj_ids = [obj_ids]\n        unique_ids = list(set(obj_ids))\n        if num_returns is None:\n            num_returns = len(unique_ids)\n        if num_returns > 0:\n            unidist.wait(unique_ids, num_returns=num_returns)\n\n\n@unidist.remote\nclass SignalActor:  # pragma: no cover\n    \"\"\"\n    Help synchronize across tasks and actors on cluster.\n\n    Parameters\n    ----------\n    event_count : int\n        Number of events required for synchronization.\n\n    Notes\n    -----\n    For details see: https://docs.ray.io/en/latest/advanced.html?highlight=signalactor#multi-node-synchronization-using-an-actor.\n    \"\"\"\n\n    def __init__(self, event_count: int):\n        self.events = [asyncio.Event() for _ in range(event_count)]\n\n    def send(self, event_idx: int):\n        \"\"\"\n        Indicate that event with `event_idx` has occurred.\n\n        Parameters\n        ----------\n        event_idx : int\n        \"\"\"\n        self.events[event_idx].set()\n\n    async def wait(self, event_idx: int):\n        \"\"\"\n        Wait until event with `event_idx` has occurred.\n\n        Parameters\n        ----------\n        event_idx : int\n        \"\"\"\n        await self.events[event_idx].wait()\n"
  },
  {
    "path": "modin/core/execution/unidist/common/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds utility and initialization routines for Modin on unidist.\"\"\"\n\nimport unidist\nimport unidist.config as unidist_cfg\n\nimport modin.config as modin_cfg\n\nfrom .engine_wrapper import UnidistWrapper\n\n\ndef initialize_unidist():\n    \"\"\"\n    Initialize unidist based on ``modin.config`` variables and internal defaults.\n    \"\"\"\n\n    if unidist_cfg.Backend.get() != \"mpi\":\n        raise RuntimeError(\n            f\"Modin only supports MPI through unidist for now, got unidist backend '{unidist_cfg.Backend.get()}'\"\n        )\n\n    if not unidist.is_initialized():\n        modin_cfg.CpuCount.subscribe(\n            lambda cpu_count: unidist_cfg.CpuCount.put(cpu_count.get())\n        )\n        unidist_cfg.MpiRuntimeEnv.put(\n            {\"env_vars\": {\"PYTHONWARNINGS\": \"ignore::FutureWarning\"}}\n        )\n        unidist.init()\n\n    num_cpus = sum(v[\"CPU\"] for v in unidist.cluster_resources().values())\n    modin_cfg.NPartitions._put(num_cpus)\n    modin_cfg.CpuCount._put(num_cpus)\n\n\ndef deserialize(obj):  # pragma: no cover\n    \"\"\"\n    Deserialize a unidist object.\n\n    Parameters\n    ----------\n    obj : unidist.ObjectRef, iterable of unidist.ObjectRef, or mapping of keys to unidist.ObjectRef\n        Object(s) to deserialize.\n\n    Returns\n    -------\n    obj\n        The deserialized object(s).\n    \"\"\"\n    if unidist.is_object_ref(obj):\n        return UnidistWrapper.materialize(obj)\n    elif isinstance(obj, (tuple, list)):\n        # Unidist will error if any elements are not ObjectRef, but we still want unidist to\n        # perform batch deserialization for us -- thus, we must submit only the list elements\n        # that are ObjectRef, deserialize them, and restore them to their correct list index\n        ref_indices, refs = [], []\n        for i, unidist_ref in enumerate(obj):\n            if unidist.is_object_ref(unidist_ref):\n                ref_indices.append(i)\n                refs.append(unidist_ref)\n        unidist_result = UnidistWrapper.materialize(refs)\n        new_lst = list(obj)\n        for i, deser_item in zip(ref_indices, unidist_result):\n            new_lst[i] = deser_item\n        # Check that all objects have been deserialized\n        assert not any(unidist.is_object_ref(o) for o in new_lst)\n        return new_lst\n    elif isinstance(obj, dict) and any(\n        unidist.is_object_ref(val) for val in obj.values()\n    ):\n        return dict(zip(obj.keys(), deserialize(tuple(obj.values()))))\n    else:\n        return obj\n"
  },
  {
    "path": "modin/core/execution/unidist/generic/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Generic functionality for unidist execution engine.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/unidist/generic/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Generic IO functionality for unidist execution engine.\"\"\"\n\nfrom .io import UnidistIO\n\n__all__ = [\"UnidistIO\"]\n"
  },
  {
    "path": "modin/core/execution/unidist/generic/io/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds base class implementing required I/O over unidist.\"\"\"\n\nfrom modin.core.io import BaseIO\n\n\nclass UnidistIO(BaseIO):\n    \"\"\"Base class for doing I/O operations over unidist.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/unidist/generic/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Generic partitioning functionality for unidist execution engine.\"\"\"\n\nfrom .partition_manager import GenericUnidistDataframePartitionManager\n\n__all__ = [\n    \"GenericUnidistDataframePartitionManager\",\n]\n"
  },
  {
    "path": "modin/core/execution/unidist/generic/partitioning/partition_manager.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds Modin partition manager implemented for unidist.\"\"\"\n\nimport numpy as np\n\nfrom modin.core.dataframe.pandas.partitioning.partition_manager import (\n    PandasDataframePartitionManager,\n)\nfrom modin.core.execution.unidist.common import UnidistWrapper\n\n\nclass GenericUnidistDataframePartitionManager(PandasDataframePartitionManager):\n    \"\"\"The class implements the interface in `PandasDataframePartitionManager`.\"\"\"\n\n    @classmethod\n    def to_numpy(cls, partitions, **kwargs):\n        \"\"\"\n        Convert `partitions` into a NumPy array.\n\n        Parameters\n        ----------\n        partitions : NumPy array\n            A 2-D array of partitions to convert to local NumPy array.\n        **kwargs : dict\n            Keyword arguments to pass to each partition ``.to_numpy()`` call.\n\n        Returns\n        -------\n        NumPy array\n        \"\"\"\n        if partitions.shape[1] == 1:\n            parts = cls.get_objects_from_partitions(partitions.flatten())\n            parts = [part.to_numpy(**kwargs) for part in parts]\n        else:\n            parts = UnidistWrapper.materialize(\n                [\n                    obj.apply(\n                        lambda df, **kwargs: df.to_numpy(**kwargs)\n                    ).list_of_blocks[0]\n                    for row in partitions\n                    for obj in row\n                ]\n            )\n        rows, cols = partitions.shape\n        parts = [parts[i * cols : (i + 1) * cols] for i in range(rows)]\n        return np.block(parts)\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental Modin's functionality related to unidist execution engine and optimized for specific storage formats.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to unidist execution engine and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe class optimized for pandas on unidist execution.\"\"\"\n\nfrom .dataframe import PandasOnUnidistDataframe\n\n__all__ = [\"PandasOnUnidistDataframe\"]\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/dataframe/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that implements ``PandasDataframe`` using unidist.\"\"\"\n\nfrom modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\nfrom modin.utils import _inherit_docstrings\n\nfrom ..partitioning.partition_manager import PandasOnUnidistDataframePartitionManager\n\n\nclass PandasOnUnidistDataframe(PandasDataframe):\n    \"\"\"\n    The class implements the interface in ``PandasDataframe`` using unidist.\n\n    Parameters\n    ----------\n    partitions : np.ndarray\n        A 2D NumPy array of partitions.\n    index : sequence\n        The index for the dataframe. Converted to a ``pandas.Index``.\n    columns : sequence\n        The columns object for the dataframe. Converted to a ``pandas.Index``.\n    row_lengths : list, optional\n        The length of each partition in the rows. The \"height\" of\n        each of the block partitions. Is computed if not provided.\n    column_widths : list, optional\n        The width of each partition in the columns. The \"width\" of\n        each of the block partitions. Is computed if not provided.\n    dtypes : pandas.Series, optional\n        The data types for the dataframe columns.\n    pandas_backend : {\"pyarrow\", None}, optional\n        Backend used by pandas. None - means default NumPy backend.\n    \"\"\"\n\n    _partition_mgr_cls = PandasOnUnidistDataframePartitionManager\n\n    def support_materialization_in_worker_process(self) -> bool:\n        # more details why this is not `True` in https://github.com/modin-project/modin/pull/6673\n        return False\n\n    @property\n    @_inherit_docstrings(PandasDataframe.engine)\n    def engine(self) -> str:\n        return \"Unidist\"\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base IO classes optimized for pandas on unidist execution.\"\"\"\n\nfrom .io import PandasOnUnidistIO\n\n__all__ = [\"PandasOnUnidistIO\"]\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/io/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module holds the factory which performs I/O using pandas on unidist.\"\"\"\n\nimport io\n\nimport numpy as np\nimport pandas\nfrom pandas.io.common import get_handle, stringify_path\n\nfrom modin.core.execution.unidist.common import SignalActor, UnidistWrapper\nfrom modin.core.execution.unidist.generic.io import UnidistIO\nfrom modin.core.io import (\n    CSVDispatcher,\n    ExcelDispatcher,\n    FeatherDispatcher,\n    FWFDispatcher,\n    JSONDispatcher,\n    ParquetDispatcher,\n    SQLDispatcher,\n)\nfrom modin.core.storage_formats.pandas.parsers import (\n    PandasCSVParser,\n    PandasExcelParser,\n    PandasFeatherParser,\n    PandasFWFParser,\n    PandasJSONParser,\n    PandasParquetParser,\n    PandasSQLParser,\n)\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\nfrom modin.experimental.core.io import (\n    ExperimentalCSVGlobDispatcher,\n    ExperimentalCustomTextDispatcher,\n    ExperimentalGlobDispatcher,\n    ExperimentalSQLDispatcher,\n)\nfrom modin.experimental.core.storage_formats.pandas.parsers import (\n    ExperimentalCustomTextParser,\n    ExperimentalPandasCSVGlobParser,\n    ExperimentalPandasJsonParser,\n    ExperimentalPandasParquetParser,\n    ExperimentalPandasPickleParser,\n    ExperimentalPandasXmlParser,\n)\n\nfrom ..dataframe import PandasOnUnidistDataframe\nfrom ..partitioning import PandasOnUnidistDataframePartition\n\n\nclass PandasOnUnidistIO(UnidistIO):\n    \"\"\"Factory providing methods for performing I/O operations using pandas as storage format on unidist as engine.\"\"\"\n\n    frame_cls = PandasOnUnidistDataframe\n    frame_partition_cls = PandasOnUnidistDataframePartition\n    query_compiler_cls = PandasQueryCompiler\n    build_args = dict(\n        frame_partition_cls=PandasOnUnidistDataframePartition,\n        query_compiler_cls=PandasQueryCompiler,\n        frame_cls=PandasOnUnidistDataframe,\n        base_io=UnidistIO,\n    )\n\n    def __make_read(*classes, build_args=build_args):\n        # used to reduce code duplication\n        return type(\"\", (UnidistWrapper, *classes), build_args).read\n\n    def __make_write(*classes, build_args=build_args):\n        # used to reduce code duplication\n        return type(\"\", (UnidistWrapper, *classes), build_args).write\n\n    read_csv = __make_read(PandasCSVParser, CSVDispatcher)\n    read_fwf = __make_read(PandasFWFParser, FWFDispatcher)\n    read_json = __make_read(PandasJSONParser, JSONDispatcher)\n    read_parquet = __make_read(PandasParquetParser, ParquetDispatcher)\n    to_parquet = __make_write(ParquetDispatcher)\n    # Blocked on pandas-dev/pandas#12236. It is faster to default to pandas.\n    # read_hdf = __make_read(PandasHDFParser, HDFReader)\n    read_feather = __make_read(PandasFeatherParser, FeatherDispatcher)\n    read_sql = __make_read(PandasSQLParser, SQLDispatcher)\n    to_sql = __make_write(SQLDispatcher)\n    read_excel = __make_read(PandasExcelParser, ExcelDispatcher)\n\n    # experimental methods that don't exist in pandas\n    read_csv_glob = __make_read(\n        ExperimentalPandasCSVGlobParser, ExperimentalCSVGlobDispatcher\n    )\n    read_parquet_glob = __make_read(\n        ExperimentalPandasParquetParser, ExperimentalGlobDispatcher\n    )\n    to_parquet_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": UnidistIO.to_parquet},\n    )\n    read_json_glob = __make_read(\n        ExperimentalPandasJsonParser, ExperimentalGlobDispatcher\n    )\n    to_json_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": UnidistIO.to_json},\n    )\n    read_xml_glob = __make_read(ExperimentalPandasXmlParser, ExperimentalGlobDispatcher)\n    to_xml_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": UnidistIO.to_xml},\n    )\n    read_pickle_glob = __make_read(\n        ExperimentalPandasPickleParser, ExperimentalGlobDispatcher\n    )\n    to_pickle_glob = __make_write(\n        ExperimentalGlobDispatcher,\n        build_args={**build_args, \"base_write\": UnidistIO.to_pickle},\n    )\n    read_custom_text = __make_read(\n        ExperimentalCustomTextParser, ExperimentalCustomTextDispatcher\n    )\n    read_sql_distributed = __make_read(\n        ExperimentalSQLDispatcher, build_args={**build_args, \"base_read\": read_sql}\n    )\n\n    del __make_read  # to not pollute class namespace\n    del __make_write  # to not pollute class namespace\n\n    @staticmethod\n    def _to_csv_check_support(kwargs):\n        \"\"\"\n        Check if parallel version of ``to_csv`` could be used.\n\n        Parameters\n        ----------\n        kwargs : dict\n            Keyword arguments passed to ``.to_csv()``.\n\n        Returns\n        -------\n        bool\n            Whether parallel version of ``to_csv`` is applicable.\n        \"\"\"\n        path_or_buf = kwargs[\"path_or_buf\"]\n        compression = kwargs[\"compression\"]\n        if not isinstance(path_or_buf, str):\n            return False\n        # case when the pointer is placed at the beginning of the file.\n        if \"r\" in kwargs[\"mode\"] and \"+\" in kwargs[\"mode\"]:\n            return False\n        # encodings with BOM don't support;\n        # instead of one mark in result bytes we will have them by the number of partitions\n        # so we should fallback in pandas for `utf-16`, `utf-32` with all aliases, in instance\n        # (`utf_32_be`, `utf_16_le` and so on)\n        if kwargs[\"encoding\"] is not None:\n            encoding = kwargs[\"encoding\"].lower()\n            if \"u\" in encoding or \"utf\" in encoding:\n                if \"16\" in encoding or \"32\" in encoding:\n                    return False\n        if compression is None or not compression == \"infer\":\n            return False\n        if any((path_or_buf.endswith(ext) for ext in [\".gz\", \".bz2\", \".zip\", \".xz\"])):\n            return False\n        return True\n\n    @classmethod\n    def to_csv(cls, qc, **kwargs):\n        \"\"\"\n        Write records stored in the `qc` to a CSV file.\n\n        Parameters\n        ----------\n        qc : BaseQueryCompiler\n            The query compiler of the Modin dataframe that we want to run ``to_csv`` on.\n        **kwargs : dict\n            Parameters for ``pandas.to_csv(**kwargs)``.\n        \"\"\"\n        kwargs[\"path_or_buf\"] = stringify_path(kwargs[\"path_or_buf\"])\n        if not cls._to_csv_check_support(kwargs):\n            return UnidistIO.to_csv(qc, **kwargs)\n\n        signals = SignalActor.remote(len(qc._modin_frame._partitions) + 1)\n\n        def func(df, **kw):  # pragma: no cover\n            \"\"\"\n            Dump a chunk of rows as csv, then save them to target maintaining order.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                A chunk of rows to write to a CSV file.\n            **kw : dict\n                Arguments to pass to ``pandas.to_csv(**kw)`` plus an extra argument\n                `partition_idx` serving as chunk index to maintain rows order.\n            \"\"\"\n            partition_idx = kw[\"partition_idx\"]\n            # the copy is made to not implicitly change the input parameters;\n            # to write to an intermediate buffer, we need to change `path_or_buf` in kwargs\n            csv_kwargs = kwargs.copy()\n            if partition_idx != 0:\n                # we need to create a new file only for first recording\n                # all the rest should be recorded in appending mode\n                if \"w\" in csv_kwargs[\"mode\"]:\n                    csv_kwargs[\"mode\"] = csv_kwargs[\"mode\"].replace(\"w\", \"a\")\n                # It is enough to write the header for the first partition\n                csv_kwargs[\"header\"] = False\n\n            # for parallelization purposes, each partition is written to an intermediate buffer\n            path_or_buf = csv_kwargs[\"path_or_buf\"]\n            is_binary = \"b\" in csv_kwargs[\"mode\"]\n            csv_kwargs[\"path_or_buf\"] = io.BytesIO() if is_binary else io.StringIO()\n            storage_options = csv_kwargs.pop(\"storage_options\", None)\n            df.to_csv(**csv_kwargs)\n            csv_kwargs.update({\"storage_options\": storage_options})\n            content = csv_kwargs[\"path_or_buf\"].getvalue()\n            csv_kwargs[\"path_or_buf\"].close()\n\n            # each process waits for its turn to write to a file\n            UnidistWrapper.materialize(signals.wait.remote(partition_idx))\n\n            # preparing to write data from the buffer to a file\n            with get_handle(\n                path_or_buf,\n                # in case when using URL in implicit text mode\n                # pandas try to open `path_or_buf` in binary mode\n                csv_kwargs[\"mode\"] if is_binary else csv_kwargs[\"mode\"] + \"t\",\n                encoding=kwargs[\"encoding\"],\n                errors=kwargs[\"errors\"],\n                compression=kwargs[\"compression\"],\n                storage_options=kwargs.get(\"storage_options\", None),\n                is_text=not is_binary,\n            ) as handles:\n                handles.handle.write(content)\n\n            # signal that the next process can start writing to the file\n            UnidistWrapper.materialize(signals.send.remote(partition_idx + 1))\n            # used for synchronization purposes\n            return pandas.DataFrame()\n\n        # signaling that the partition with id==0 can be written to the file\n        UnidistWrapper.materialize(signals.send.remote(0))\n        # Ensure that the metadata is syncrhonized\n        qc._modin_frame._propagate_index_objs(axis=None)\n        result = qc._modin_frame._partition_mgr_cls.map_axis_partitions(\n            axis=1,\n            partitions=qc._modin_frame._partitions,\n            map_func=func,\n            keep_partitioning=True,\n            lengths=None,\n            enumerate_partitions=True,\n            max_retries=0,\n        )\n        # pending completion\n        UnidistWrapper.materialize(\n            [part.list_of_blocks[0] for row in result for part in row]\n        )\n\n    @classmethod\n    def from_map(cls, func, iterable, *args, **kwargs):\n        \"\"\"\n        Create a Modin `query_compiler` from a map function.\n\n        This method will construct a Modin `query_compiler` split by row partitions.\n        The number of row partitions matches the number of elements in the iterable object.\n\n        Parameters\n        ----------\n        func : callable\n            Function to map across the iterable object.\n        iterable : Iterable\n            An iterable object.\n        *args : tuple\n            Positional arguments to pass in `func`.\n        **kwargs : dict\n            Keyword arguments to pass in `func`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data returned by map function.\n        \"\"\"\n        func = cls.frame_cls._partition_mgr_cls.preprocess_func(func)\n        partitions = np.array(\n            [\n                [\n                    cls.frame_partition_cls(\n                        UnidistWrapper.deploy(\n                            func,\n                            f_args=(obj,) + args,\n                            f_kwargs=kwargs,\n                            return_pandas_df=True,\n                        )\n                    )\n                ]\n                for obj in iterable\n            ]\n        )\n        return cls.query_compiler_cls(cls.frame_cls(partitions))\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Base Modin Dataframe classes related to its partitioning and optimized for pandas on unidist execution.\"\"\"\n\nfrom .partition import PandasOnUnidistDataframePartition\nfrom .partition_manager import PandasOnUnidistDataframePartitionManager\nfrom .virtual_partition import (\n    PandasOnUnidistDataframeColumnPartition,\n    PandasOnUnidistDataframeRowPartition,\n    PandasOnUnidistDataframeVirtualPartition,\n)\n\n__all__ = [\n    \"PandasOnUnidistDataframePartitionManager\",\n    \"PandasOnUnidistDataframePartition\",\n    \"PandasOnUnidistDataframeVirtualPartition\",\n    \"PandasOnUnidistDataframeColumnPartition\",\n    \"PandasOnUnidistDataframeRowPartition\",\n]\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that wraps data (block partition) and its metadata.\"\"\"\n\nimport warnings\n\nimport pandas\nimport unidist\n\nfrom modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition\nfrom modin.core.execution.unidist.common import UnidistWrapper\nfrom modin.core.execution.unidist.common.utils import deserialize\nfrom modin.logging import get_logger\nfrom modin.pandas.indexing import compute_sliced_len\n\ncompute_sliced_len = unidist.remote(compute_sliced_len)\n\n\nclass PandasOnUnidistDataframePartition(PandasDataframePartition):\n    \"\"\"\n    The class implements the interface in ``PandasDataframePartition``.\n\n    Parameters\n    ----------\n    data : unidist.ObjectRef\n        A reference to ``pandas.DataFrame`` that need to be wrapped with this class.\n    length : unidist.ObjectRef or int, optional\n        Length or reference to it of wrapped ``pandas.DataFrame``.\n    width : unidist.ObjectRef or int, optional\n        Width or reference to it of wrapped ``pandas.DataFrame``.\n    ip : unidist.ObjectRef or str, optional\n        Node IP address or reference to it that holds wrapped ``pandas.DataFrame``.\n    call_queue : list\n        Call queue that needs to be executed on wrapped ``pandas.DataFrame``.\n    \"\"\"\n\n    execution_wrapper = UnidistWrapper\n\n    def __init__(self, data, length=None, width=None, ip=None, call_queue=None):\n        super().__init__()\n        assert unidist.is_object_ref(data)\n        self._data = data\n        self.call_queue = call_queue if call_queue is not None else []\n        self._length_cache = length\n        self._width_cache = width\n        self._ip_cache = ip\n\n        log = get_logger()\n        self._is_debug(log) and log.debug(\n            \"Partition ID: {}, Height: {}, Width: {}, Node IP: {}\".format(\n                self._identity,\n                str(self._length_cache),\n                str(self._width_cache),\n                str(self._ip_cache),\n            )\n        )\n\n    def apply(self, func, *args, **kwargs):\n        \"\"\"\n        Apply a function to the object wrapped by this partition.\n\n        Parameters\n        ----------\n        func : callable or unidist.ObjectRef\n            A function to apply.\n        *args : iterable\n            Additional positional arguments to be passed in `func`.\n        **kwargs : dict\n            Additional keyword arguments to be passed in `func`.\n\n        Returns\n        -------\n        PandasOnUnidistDataframePartition\n            A new ``PandasOnUnidistDataframePartition`` object.\n\n        Notes\n        -----\n        It does not matter if `func` is callable or an ``unidist.ObjectRef``. Unidist will\n        handle it correctly either way. The keyword arguments are sent as a dictionary.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.apply::{self._identity}\")\n        data = self._data\n        call_queue = self.call_queue + [[func, args, kwargs]]\n        if len(call_queue) > 1:\n            self._is_debug(log) and log.debug(\n                f\"SUBMIT::_apply_list_of_funcs::{self._identity}\"\n            )\n            result, length, width, ip = _apply_list_of_funcs.remote(call_queue, data)\n        else:\n            # We handle `len(call_queue) == 1` in a different way because\n            # this dramatically improves performance.\n            result, length, width, ip = _apply_func.remote(data, func, *args, **kwargs)\n            self._is_debug(log) and log.debug(f\"SUBMIT::_apply_func::{self._identity}\")\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.apply::{self._identity}\")\n        return self.__constructor__(result, length, width, ip)\n\n    def drain_call_queue(self):\n        \"\"\"Execute all operations stored in the call queue on the object wrapped by this partition.\"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(\n            f\"ENTER::Partition.drain_call_queue::{self._identity}\"\n        )\n        if len(self.call_queue) == 0:\n            return\n        data = self._data\n        call_queue = self.call_queue\n        if len(call_queue) > 1:\n            self._is_debug(log) and log.debug(\n                f\"SUBMIT::_apply_list_of_funcs::{self._identity}\"\n            )\n            (\n                self._data,\n                new_length,\n                new_width,\n                self._ip_cache,\n            ) = _apply_list_of_funcs.remote(call_queue, data)\n        else:\n            # We handle `len(call_queue) == 1` in a different way because\n            # this dramatically improves performance.\n            func, f_args, f_kwargs = call_queue[0]\n            self._is_debug(log) and log.debug(f\"SUBMIT::_apply_func::{self._identity}\")\n            (\n                self._data,\n                new_length,\n                new_width,\n                self._ip_cache,\n            ) = _apply_func.remote(data, func, *f_args, **f_kwargs)\n        self._is_debug(log) and log.debug(\n            f\"EXIT::Partition.drain_call_queue::{self._identity}\"\n        )\n        self.call_queue = []\n\n        # GH#4732 if we already have evaluated width/length cached as ints,\n        #  don't overwrite that cache with non-evaluated values.\n        if not isinstance(self._length_cache, int):\n            self._length_cache = new_length\n        if not isinstance(self._width_cache, int):\n            self._width_cache = new_width\n\n    def wait(self):\n        \"\"\"Wait completing computations on the object wrapped by the partition.\"\"\"\n        self.drain_call_queue()\n        UnidistWrapper.wait(self._data)\n\n    def mask(self, row_labels, col_labels):\n        \"\"\"\n        Lazily create a mask that extracts the indices provided.\n\n        Parameters\n        ----------\n        row_labels : list-like, slice or label\n            The row labels for the rows to extract.\n        col_labels : list-like, slice or label\n            The column labels for the columns to extract.\n\n        Returns\n        -------\n        PandasOnUnidistDataframePartition\n            A new ``PandasOnUnidistDataframePartition`` object.\n        \"\"\"\n        log = get_logger()\n        self._is_debug(log) and log.debug(f\"ENTER::Partition.mask::{self._identity}\")\n        new_obj = super().mask(row_labels, col_labels)\n        if isinstance(row_labels, slice) and unidist.is_object_ref(self._length_cache):\n            if row_labels == slice(None):\n                # fast path - full axis take\n                new_obj._length_cache = self._length_cache\n            else:\n                new_obj._length_cache = compute_sliced_len.remote(\n                    row_labels, self._length_cache\n                )\n        if isinstance(col_labels, slice) and unidist.is_object_ref(self._width_cache):\n            if col_labels == slice(None):\n                # fast path - full axis take\n                new_obj._width_cache = self._width_cache\n            else:\n                new_obj._width_cache = compute_sliced_len.remote(\n                    col_labels, self._width_cache\n                )\n        self._is_debug(log) and log.debug(f\"EXIT::Partition.mask::{self._identity}\")\n        return new_obj\n\n    @classmethod\n    def put(cls, obj):\n        \"\"\"\n        Put an object into object store and wrap it with partition object.\n\n        Parameters\n        ----------\n        obj : any\n            An object to be put.\n\n        Returns\n        -------\n        PandasOnUnidistDataframePartition\n            A new ``PandasOnUnidistDataframePartition`` object.\n        \"\"\"\n        return cls(cls.execution_wrapper.put(obj), len(obj.index), len(obj.columns))\n\n    @classmethod\n    def preprocess_func(cls, func):\n        \"\"\"\n        Put a function into the object store to use in ``apply``.\n\n        Parameters\n        ----------\n        func : callable\n            A function to preprocess.\n\n        Returns\n        -------\n        unidist.ObjectRef\n            A reference to `func`.\n        \"\"\"\n        return cls.execution_wrapper.put(func)\n\n    def length(self, materialize=True):\n        \"\"\"\n        Get the length of the object wrapped by this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or unidist.ObjectRef\n            The length of the object.\n        \"\"\"\n        if self._length_cache is None:\n            if len(self.call_queue):\n                self.drain_call_queue()\n            else:\n                (\n                    self._length_cache,\n                    self._width_cache,\n                ) = _get_index_and_columns_size.remote(self._data)\n        if unidist.is_object_ref(self._length_cache) and materialize:\n            self._length_cache = UnidistWrapper.materialize(self._length_cache)\n        return self._length_cache\n\n    def width(self, materialize=True):\n        \"\"\"\n        Get the width of the object wrapped by the partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        int or unidist.ObjectRef\n            The width of the object.\n        \"\"\"\n        if self._width_cache is None:\n            if len(self.call_queue):\n                self.drain_call_queue()\n            else:\n                (\n                    self._length_cache,\n                    self._width_cache,\n                ) = _get_index_and_columns_size.remote(self._data)\n        if unidist.is_object_ref(self._width_cache) and materialize:\n            self._width_cache = UnidistWrapper.materialize(self._width_cache)\n        return self._width_cache\n\n    def ip(self, materialize=True):\n        \"\"\"\n        Get the node IP address of the object wrapped by this partition.\n\n        Parameters\n        ----------\n        materialize : bool, default: True\n            Whether to forcibly materialize the result into an integer. If ``False``\n            was specified, may return a future of the result if it hasn't been\n            materialized yet.\n\n        Returns\n        -------\n        str\n            IP address of the node that holds the data.\n        \"\"\"\n        if self._ip_cache is None:\n            if len(self.call_queue):\n                self.drain_call_queue()\n            else:\n                self._ip_cache = self.apply(lambda df: pandas.DataFrame([]))._ip_cache\n        if materialize and unidist.is_object_ref(self._ip_cache):\n            self._ip_cache = UnidistWrapper.materialize(self._ip_cache)\n        return self._ip_cache\n\n\n@unidist.remote(num_returns=2)\ndef _get_index_and_columns_size(df):  # pragma: no cover\n    \"\"\"\n    Get the number of rows and columns of a pandas DataFrame.\n\n    Parameters\n    ----------\n    df : pandas.DataFrame\n        A pandas DataFrame which dimensions are needed.\n\n    Returns\n    -------\n    int\n        The number of rows.\n    int\n        The number of columns.\n    \"\"\"\n    return len(df.index), len(df.columns)\n\n\n@unidist.remote(num_returns=4)\ndef _apply_func(partition, func, *args, **kwargs):  # pragma: no cover\n    \"\"\"\n    Execute a function on the partition in a worker process.\n\n    Parameters\n    ----------\n    partition : pandas.DataFrame\n        A pandas DataFrame the function needs to be executed on.\n    func : callable\n        The function to perform on the partition.\n    *args : list\n        Positional arguments to pass to ``func``.\n    **kwargs : dict\n        Keyword arguments to pass to ``func``.\n\n    Returns\n    -------\n    pandas.DataFrame\n        The resulting pandas DataFrame.\n    int\n        The number of rows of the resulting pandas DataFrame.\n    int\n        The number of columns of the resulting pandas DataFrame.\n    str\n        The node IP address of the worker process.\n\n    Notes\n    -----\n    Directly passing a call queue entry (i.e. a list of [func, args, kwargs]) instead of\n    destructuring it causes a performance penalty.\n    \"\"\"\n    try:\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            result = func(partition, *args, **kwargs)\n    # Sometimes Arrow forces us to make a copy of an object before we operate on it. We\n    # don't want the error to propagate to the user, and we want to avoid copying unless\n    # we absolutely have to.\n    except ValueError:\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            result = func(partition.copy(), *args, **kwargs)\n    return (\n        result,\n        len(result) if hasattr(result, \"__len__\") else 0,\n        len(getattr(result, \"columns\", ())),\n        unidist.get_ip(),\n    )\n\n\n@unidist.remote(num_returns=4)\ndef _apply_list_of_funcs(call_queue, partition):  # pragma: no cover\n    \"\"\"\n    Execute all operations stored in the call queue on the partition in a worker process.\n\n    Parameters\n    ----------\n    call_queue : list\n        A call queue that needs to be executed on the partition.\n    partition : pandas.DataFrame\n        A pandas DataFrame the call queue needs to be executed on.\n\n    Returns\n    -------\n    pandas.DataFrame\n        The resulting pandas DataFrame.\n    int\n        The number of rows of the resulting pandas DataFrame.\n    int\n        The number of columns of the resulting pandas DataFrame.\n    str\n        The node IP address of the worker process.\n    \"\"\"\n    for func, f_args, f_kwargs in call_queue:\n        func = deserialize(func)\n        args = deserialize(f_args)\n        kwargs = deserialize(f_kwargs)\n        try:\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\"ignore\", category=FutureWarning)\n                partition = func(partition, *args, **kwargs)\n        # Sometimes Arrow forces us to make a copy of an object before we operate on it. We\n        # don't want the error to propagate to the user, and we want to avoid copying unless\n        # we absolutely have to.\n        except ValueError:\n            with warnings.catch_warnings():\n                warnings.filterwarnings(\"ignore\", category=FutureWarning)\n                partition = func(partition.copy(), *args, **kwargs)\n\n    return (\n        partition,\n        len(partition) if hasattr(partition, \"__len__\") else 0,\n        len(partition.columns) if hasattr(partition, \"columns\") else 0,\n        unidist.get_ip(),\n    )\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition_manager.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses class that implements ``GenericUnidistDataframePartitionManager`` using Unidist.\"\"\"\n\nfrom modin.core.execution.modin_aqp import progress_bar_wrapper\nfrom modin.core.execution.unidist.common import UnidistWrapper\nfrom modin.core.execution.unidist.generic.partitioning import (\n    GenericUnidistDataframePartitionManager,\n)\n\nfrom .partition import PandasOnUnidistDataframePartition\nfrom .virtual_partition import (\n    PandasOnUnidistDataframeColumnPartition,\n    PandasOnUnidistDataframeRowPartition,\n)\n\n\nclass PandasOnUnidistDataframePartitionManager(GenericUnidistDataframePartitionManager):\n    \"\"\"The class implements the interface in `PandasDataframePartitionManager`.\"\"\"\n\n    # This object uses PandasOnUnidistDataframePartition objects as the underlying store.\n    _partition_class = PandasOnUnidistDataframePartition\n    _column_partitions_class = PandasOnUnidistDataframeColumnPartition\n    _row_partition_class = PandasOnUnidistDataframeRowPartition\n    _execution_wrapper = UnidistWrapper\n\n    @classmethod\n    def wait_partitions(cls, partitions):\n        \"\"\"\n        Wait on the objects wrapped by `partitions` in parallel, without materializing them.\n\n        This method will block until all computations in the list have completed.\n\n        Parameters\n        ----------\n        partitions : np.ndarray\n            NumPy array with ``PandasDataframePartition``-s.\n        \"\"\"\n        UnidistWrapper.wait(\n            [block for partition in partitions for block in partition.list_of_blocks]\n        )\n\n\ndef _make_wrapped_method(name: str):\n    \"\"\"\n    Define new attribute that should work with progress bar.\n\n    Parameters\n    ----------\n    name : str\n        Name of `GenericUnidistDataframePartitionManager` attribute that should be reused.\n\n    Notes\n    -----\n    - `classmethod` decorator shouldn't be applied twice, so we refer to `__func__` attribute.\n    - New attribute is defined for `PandasOnUnidistDataframePartitionManager`.\n    \"\"\"\n    setattr(\n        PandasOnUnidistDataframePartitionManager,\n        name,\n        classmethod(\n            progress_bar_wrapper(\n                getattr(GenericUnidistDataframePartitionManager, name).__func__\n            )\n        ),\n    )\n\n\nfor method in (\n    \"map_partitions\",\n    \"lazy_map_partitions\",\n    \"map_axis_partitions\",\n    \"_apply_func_to_list_of_partitions\",\n    \"apply_func_to_select_indices\",\n    \"apply_func_to_select_indices_along_full_axis\",\n    \"apply_func_to_indices_both_axis\",\n    \"n_ary_operation\",\n):\n    _make_wrapped_method(method)\n"
  },
  {
    "path": "modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/virtual_partition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses classes responsible for storing a virtual partition and applying a function to it.\"\"\"\n\nimport warnings\n\nimport pandas\nimport unidist\n\nfrom modin.core.dataframe.pandas.partitioning.axis_partition import (\n    PandasDataframeAxisPartition,\n)\nfrom modin.core.execution.unidist.common import UnidistWrapper\nfrom modin.core.execution.unidist.common.utils import deserialize\nfrom modin.utils import _inherit_docstrings\n\nfrom .partition import PandasOnUnidistDataframePartition\n\n\nclass PandasOnUnidistDataframeVirtualPartition(PandasDataframeAxisPartition):\n    \"\"\"\n    The class implements the interface in ``PandasDataframeAxisPartition``.\n\n    Parameters\n    ----------\n    list_of_partitions : Union[list, PandasOnUnidistDataframePartition]\n        List of ``PandasOnUnidistDataframePartition`` and\n        ``PandasOnUnidistDataframeVirtualPartition`` objects, or a single\n        ``PandasOnUnidistDataframePartition``.\n    get_ip : bool, default: False\n        Whether to get node IP addresses to conforming partitions or not.\n    full_axis : bool, default: True\n        Whether or not the virtual partition encompasses the whole axis.\n    call_queue : list, optional\n        A list of tuples (callable, args, kwargs) that contains deferred calls.\n    length : unidist.ObjectRef or int, optional\n        Length, or reference to length, of wrapped ``pandas.DataFrame``.\n    width : unidist.ObjectRef or int, optional\n        Width, or reference to width, of wrapped ``pandas.DataFrame``.\n    \"\"\"\n\n    _PARTITIONS_METADATA_LEN = 3  # (length, width, ip)\n    partition_type = PandasOnUnidistDataframePartition\n    axis = None\n\n    # these variables are intentionally initialized at runtime (see #6023)\n    _DEPLOY_AXIS_FUNC = None\n    _DEPLOY_SPLIT_FUNC = None\n    _DRAIN_FUNC = None\n\n    @classmethod\n    def _get_deploy_axis_func(cls):  # noqa: GL08\n        if cls._DEPLOY_AXIS_FUNC is None:\n            cls._DEPLOY_AXIS_FUNC = UnidistWrapper.put(\n                PandasDataframeAxisPartition.deploy_axis_func\n            )\n        return cls._DEPLOY_AXIS_FUNC\n\n    @classmethod\n    def _get_deploy_split_func(cls):  # noqa: GL08\n        if cls._DEPLOY_SPLIT_FUNC is None:\n            cls._DEPLOY_SPLIT_FUNC = UnidistWrapper.put(\n                PandasDataframeAxisPartition.deploy_splitting_func\n            )\n        return cls._DEPLOY_SPLIT_FUNC\n\n    @classmethod\n    def _get_drain_func(cls):  # noqa: GL08\n        if cls._DRAIN_FUNC is None:\n            cls._DRAIN_FUNC = UnidistWrapper.put(PandasDataframeAxisPartition.drain)\n        return cls._DRAIN_FUNC\n\n    @property\n    def list_of_ips(self):\n        \"\"\"\n        Get the IPs holding the physical objects composing this partition.\n\n        Returns\n        -------\n        List\n            A list of IPs as ``unidist.ObjectRef`` or str.\n        \"\"\"\n        # Defer draining call queue until we get the ip address\n        result = [None] * len(self.list_of_block_partitions)\n        for idx, partition in enumerate(self.list_of_block_partitions):\n            partition.drain_call_queue()\n            result[idx] = partition.ip(materialize=False)\n        return result\n\n    @classmethod\n    @_inherit_docstrings(PandasDataframeAxisPartition.deploy_splitting_func)\n    def deploy_splitting_func(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        *partitions,\n        extract_metadata=False,\n    ):\n        return _deploy_unidist_func.options(\n            num_returns=(\n                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)\n                if extract_metadata\n                else num_splits\n            ),\n        ).remote(\n            cls._get_deploy_split_func(),\n            axis,\n            func,\n            f_args,\n            f_kwargs,\n            num_splits,\n            *partitions,\n            extract_metadata=extract_metadata,\n        )\n\n    @classmethod\n    def deploy_axis_func(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        maintain_partitioning,\n        *partitions,\n        min_block_size,\n        lengths=None,\n        manual_partition=False,\n        max_retries=None,\n    ):\n        \"\"\"\n        Deploy a function along a full axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see ``split_result_of_axis_func_pandas``).\n        maintain_partitioning : bool\n            If True, keep the old partitioning if possible.\n            If False, create a new partition layout.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column).\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n        lengths : list, optional\n            The list of lengths to shuffle the object.\n        manual_partition : bool, default: False\n            If True, partition the result with `lengths`.\n        max_retries : int, default: None\n            The max number of times to retry the func.\n\n        Returns\n        -------\n        list\n            A list of ``unidist.ObjectRef``-s.\n        \"\"\"\n        return _deploy_unidist_func.options(\n            num_returns=(num_splits if lengths is None else len(lengths))\n            * (1 + cls._PARTITIONS_METADATA_LEN),\n            **({\"max_retries\": max_retries} if max_retries is not None else {}),\n        ).remote(\n            cls._get_deploy_axis_func(),\n            axis,\n            func,\n            f_args,\n            f_kwargs,\n            num_splits,\n            maintain_partitioning,\n            *partitions,\n            manual_partition=manual_partition,\n            min_block_size=min_block_size,\n            lengths=lengths,\n        )\n\n    @classmethod\n    def deploy_func_between_two_axis_partitions(\n        cls,\n        axis,\n        func,\n        f_args,\n        f_kwargs,\n        num_splits,\n        len_of_left,\n        other_shape,\n        *partitions,\n        min_block_size,\n    ):\n        \"\"\"\n        Deploy a function along a full axis between two data sets.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            The axis to perform the function along.\n        func : callable\n            The function to perform.\n        f_args : list or tuple\n            Positional arguments to pass to ``func``.\n        f_kwargs : dict\n            Keyword arguments to pass to ``func``.\n        num_splits : int\n            The number of splits to return (see ``split_result_of_axis_func_pandas``).\n        len_of_left : int\n            The number of values in `partitions` that belong to the left data set.\n        other_shape : np.ndarray\n            The shape of right frame in terms of partitions, i.e.\n            (other_shape[i-1], other_shape[i]) will indicate slice to restore i-1 axis partition.\n        *partitions : iterable\n            All partitions that make up the full axis (row or column) for both data sets.\n        min_block_size : int\n            Minimum number of rows/columns in a single split.\n\n        Returns\n        -------\n        list\n            A list of ``unidist.ObjectRef``-s.\n        \"\"\"\n        return _deploy_unidist_func.options(\n            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)\n        ).remote(\n            PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions,\n            axis,\n            func,\n            f_args,\n            f_kwargs,\n            num_splits,\n            len_of_left,\n            other_shape,\n            *partitions,\n            min_block_size=min_block_size,\n        )\n\n    def wait(self):\n        \"\"\"Wait completing computations on the object wrapped by the partition.\"\"\"\n        self.drain_call_queue()\n        futures = self.list_of_blocks\n        UnidistWrapper.wait(futures)\n\n\n@_inherit_docstrings(PandasOnUnidistDataframeVirtualPartition)\nclass PandasOnUnidistDataframeColumnPartition(PandasOnUnidistDataframeVirtualPartition):\n    axis = 0\n\n\n@_inherit_docstrings(PandasOnUnidistDataframeVirtualPartition)\nclass PandasOnUnidistDataframeRowPartition(PandasOnUnidistDataframeVirtualPartition):\n    axis = 1\n\n\n@unidist.remote\ndef _deploy_unidist_func(\n    deployer,\n    axis,\n    f_to_deploy,\n    f_args,\n    f_kwargs,\n    *args,\n    extract_metadata=True,\n    **kwargs,\n):  # pragma: no cover\n    \"\"\"\n    Execute a function on an axis partition in a worker process.\n\n    This is ALWAYS called on either ``PandasDataframeAxisPartition.deploy_axis_func``\n    or ``PandasDataframeAxisPartition.deploy_func_between_two_axis_partitions``, which both\n    serve to deploy another dataframe function on a unidist worker process. The provided ``f_args``\n    is thus are deserialized here (on the unidist worker) before the function is called (``f_kwargs``\n    will never contain more unidist objects, and thus does not require deserialization).\n\n    Parameters\n    ----------\n    deployer : callable\n        A `PandasDataFrameAxisPartition.deploy_*` method that will call ``f_to_deploy``.\n    axis : {0, 1}\n        The axis to perform the function along.\n    f_to_deploy : callable or unidist.ObjectRef\n        The function to deploy.\n    f_args : list or tuple\n        Positional arguments to pass to ``f_to_deploy``.\n    f_kwargs : dict\n        Keyword arguments to pass to ``f_to_deploy``.\n    *args : list\n        Positional arguments to pass to ``deployer``.\n    extract_metadata : bool, default: True\n        Whether to return metadata (length, width, ip) of the result. Passing `False` may relax\n        the load on object storage as the remote function would return 4 times fewer futures.\n        Passing `False` makes sense for temporary results where you know for sure that the\n        metadata will never be requested.\n    **kwargs : dict\n        Keyword arguments to pass to ``deployer``.\n\n    Returns\n    -------\n    list : Union[tuple, list]\n        The result of the function call, and metadata for it.\n\n    Notes\n    -----\n    Unidist functions are not detected by codecov (thus pragma: no cover).\n    \"\"\"\n    f_args = deserialize(f_args)\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", category=FutureWarning)\n        result = deployer(axis, f_to_deploy, f_args, f_kwargs, *args, **kwargs)\n    if not extract_metadata:\n        return result\n    ip = unidist.get_ip()\n    if isinstance(result, pandas.DataFrame):\n        return result, len(result), len(result.columns), ip\n    elif all(isinstance(r, pandas.DataFrame) for r in result):\n        return [i for r in result for i in [r, len(r), len(r.columns), ip]]\n    else:\n        return [i for r in result for i in [r, None, None, ip]]\n"
  },
  {
    "path": "modin/core/execution/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"General utils for execution module.\"\"\"\n\nimport contextlib\nimport os\n\nfrom modin.error_message import ErrorMessage\n\n\n@contextlib.contextmanager\ndef set_env(**environ):\n    \"\"\"\n    Temporarily set the process environment variables.\n    \"\"\"\n    old_environ = os.environ.copy()\n    os.environ.update(environ)\n    try:\n        yield\n    finally:\n        os.environ.clear()\n        os.environ.update(old_environ)\n\n\nif \"_MODIN_DOC_CHECKER_\" in os.environ:\n\n    # The doc checker should get the non-processed functions\n    def remote_function(func, ignore_defaults=False):\n        return func\n\n\n# Check if the function already exists to avoid circular imports\nelif \"remote_function\" not in dir():\n    # TODO(https://github.com/modin-project/modin/issues/7429): Use\n    # frame-level engine config.\n\n    from modin.config import Engine\n\n    if Engine.get() == \"Ray\":\n        from modin.core.execution.ray.common import RayWrapper\n\n        _preprocess_func = RayWrapper.put\n    elif Engine.get() == \"Unidist\":\n        from modin.core.execution.unidist.common import UnidistWrapper\n\n        _preprocess_func = UnidistWrapper.put\n    elif Engine.get() == \"Dask\":\n        from modin.core.execution.dask.common import DaskWrapper\n\n        # The function cache is not supported for Dask\n        def remote_function(func, ignore_defaults=False):\n            return DaskWrapper.put(func)\n\n    else:\n\n        def remote_function(func, ignore_defaults=False):\n            return func\n\n    if \"remote_function\" not in dir():\n        _remote_function_cache = {}\n\n        def remote_function(func, ignore_defaults=False):  # noqa: F811\n            if \"<locals>\" in func.__qualname__:  # Nested function\n                if func.__closure__:\n                    ErrorMessage.single_warning(\n                        f\"The nested function {func} can not be cached, because \"\n                        + \"it captures objects from the outer scope.\"\n                    )\n                    return func\n                if not ignore_defaults and func.__defaults__:\n                    ErrorMessage.single_warning(\n                        f\"The nested function {func} can not be cached, because it has \"\n                        + \"default values. Use `ignore_defaults` to forcibly enable caching.\"\n                    )\n                    return func\n                # For the nested functions, use __code__ as the key, because it's the same\n                # object for each instance of the function.\n                key = id(func.__code__)\n            else:\n                key = func\n            ref = _remote_function_cache.get(key, None)\n            if ref is None:\n                ref = _preprocess_func(func)\n                _remote_function_cache[key] = ref\n            return ref\n"
  },
  {
    "path": "modin/core/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"IO functions implementations.\"\"\"\n\nfrom .column_stores.feather_dispatcher import FeatherDispatcher\nfrom .column_stores.hdf_dispatcher import HDFDispatcher\nfrom .column_stores.parquet_dispatcher import ParquetDispatcher\nfrom .file_dispatcher import FileDispatcher\nfrom .io import BaseIO\nfrom .sql.sql_dispatcher import SQLDispatcher\nfrom .text.csv_dispatcher import CSVDispatcher\nfrom .text.excel_dispatcher import ExcelDispatcher\nfrom .text.fwf_dispatcher import FWFDispatcher\nfrom .text.json_dispatcher import JSONDispatcher\nfrom .text.text_file_dispatcher import TextFileDispatcher\n\n__all__ = [\n    \"BaseIO\",\n    \"CSVDispatcher\",\n    \"FWFDispatcher\",\n    \"JSONDispatcher\",\n    \"FileDispatcher\",\n    \"TextFileDispatcher\",\n    \"ParquetDispatcher\",\n    \"HDFDispatcher\",\n    \"FeatherDispatcher\",\n    \"SQLDispatcher\",\n    \"ExcelDispatcher\",\n]\n"
  },
  {
    "path": "modin/core/io/column_stores/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Columnar store format type IO functions implementations.\"\"\"\n"
  },
  {
    "path": "modin/core/io/column_stores/column_store_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule houses `ColumnStoreDispatcher` class.\n\n`ColumnStoreDispatcher` contains utils for handling columnar store format files,\ninherits util functions for handling files from `FileDispatcher` class and can be\nused as base class for dipatchers of specific columnar store formats.\n\"\"\"\n\nimport numpy as np\nimport pandas\n\nfrom modin.config import MinColumnPartitionSize, MinRowPartitionSize, NPartitions\nfrom modin.core.io.file_dispatcher import FileDispatcher\nfrom modin.core.storage_formats.pandas.utils import compute_chunksize\n\n\nclass ColumnStoreDispatcher(FileDispatcher):\n    \"\"\"\n    Class handles utils for reading columnar store format files.\n\n    Inherits some util functions for processing files from `FileDispatcher` class.\n    \"\"\"\n\n    @classmethod\n    def call_deploy(cls, fname, col_partitions, **kwargs):\n        \"\"\"\n        Deploy remote tasks to the workers with passed parameters.\n\n        Parameters\n        ----------\n        fname : str, path object or file-like object\n            Name of the file to read.\n        col_partitions : list\n            List of arrays with columns names that should be read\n            by each partition.\n        **kwargs : dict\n            Parameters of deploying read_* function.\n\n        Returns\n        -------\n        np.ndarray\n            Array with references to the task deploy result for each partition.\n        \"\"\"\n        return np.array(\n            [\n                cls.deploy(\n                    func=cls.parse,\n                    f_kwargs={\n                        \"fname\": fname,\n                        \"columns\": cols,\n                        \"num_splits\": NPartitions.get(),\n                        **kwargs,\n                    },\n                    num_returns=NPartitions.get() + 2,\n                )\n                for cols in col_partitions\n            ]\n        ).T\n\n    @classmethod\n    def build_partition(cls, partition_ids, row_lengths, column_widths):\n        \"\"\"\n        Build array with partitions of `cls.frame_partition_cls` class.\n\n        Parameters\n        ----------\n        partition_ids : list\n            Array with references to the partitions data.\n        row_lengths : list\n            Partitions rows lengths.\n        column_widths : list\n            Number of columns in each partition.\n\n        Returns\n        -------\n        np.ndarray\n            array with shape equals to the shape of `partition_ids` and\n            filed with partition objects.\n        \"\"\"\n        return np.array(\n            [\n                [\n                    cls.frame_partition_cls(\n                        partition_ids[i][j],\n                        length=row_lengths[i],\n                        width=column_widths[j],\n                    )\n                    for j in range(len(partition_ids[i]))\n                ]\n                for i in range(len(partition_ids))\n            ]\n        )\n\n    @classmethod\n    def build_index(cls, partition_ids):\n        \"\"\"\n        Compute index and its split sizes of resulting Modin DataFrame.\n\n        Parameters\n        ----------\n        partition_ids : list\n            Array with references to the partitions data.\n\n        Returns\n        -------\n        index : pandas.Index\n            Index of resulting Modin DataFrame.\n        row_lengths : list\n            List with lengths of index chunks.\n        \"\"\"\n        index_len = (\n            0 if len(partition_ids) == 0 else cls.materialize(partition_ids[-2][0])\n        )\n        if isinstance(index_len, int):\n            index = pandas.RangeIndex(index_len)\n        else:\n            index = index_len\n            index_len = len(index)\n        num_partitions = NPartitions.get()\n        min_block_size = MinRowPartitionSize.get()\n        index_chunksize = compute_chunksize(index_len, num_partitions, min_block_size)\n        if index_chunksize > index_len:\n            row_lengths = [index_len] + [0 for _ in range(num_partitions - 1)]\n        else:\n            row_lengths = [\n                (\n                    index_chunksize\n                    if (i + 1) * index_chunksize < index_len\n                    else max(0, index_len - (index_chunksize * i))\n                )\n                for i in range(num_partitions)\n            ]\n        return index, row_lengths\n\n    @classmethod\n    def build_columns(cls, columns, num_row_parts=None):\n        \"\"\"\n        Split columns into chunks that should be read by workers.\n\n        Parameters\n        ----------\n        columns : list\n            List of columns that should be read from file.\n        num_row_parts : int, optional\n            Number of parts the dataset is split into. This parameter is used\n            to align the column partitioning with it so we won't end up with an\n            over partitioned frame.\n\n        Returns\n        -------\n        col_partitions : list\n            List of lists with columns for reading by workers.\n        column_widths : list\n            List with lengths of `col_partitions` subarrays\n            (number of columns that should be read by workers).\n        \"\"\"\n        columns_length = len(columns)\n        if columns_length == 0:\n            return [], []\n        if num_row_parts is None:\n            # in column formats we mostly read columns in parallel rather than rows,\n            # so we try to chunk columns as much as possible\n            min_block_size = 1\n        else:\n            num_remaining_parts = round(NPartitions.get() / num_row_parts)\n            min_block_size = min(\n                columns_length // num_remaining_parts, MinColumnPartitionSize.get()\n            )\n        column_splits = compute_chunksize(\n            columns_length, NPartitions.get(), max(1, min_block_size)\n        )\n        col_partitions = [\n            columns[i : i + column_splits]\n            for i in range(0, columns_length, column_splits)\n        ]\n        column_widths = [len(c) for c in col_partitions]\n        return col_partitions, column_widths\n\n    @classmethod\n    def build_dtypes(cls, partition_ids, columns):\n        \"\"\"\n        Compute common for all partitions `dtypes` for each of the DataFrame column.\n\n        Parameters\n        ----------\n        partition_ids : list\n            Array with references to the partitions data.\n        columns : list\n            List of columns that should be read from file.\n\n        Returns\n        -------\n        dtypes : pandas.Series\n            Series with dtypes for columns.\n        \"\"\"\n        dtypes = pandas.concat(cls.materialize(list(partition_ids)), axis=0)\n        dtypes.index = columns\n        return dtypes\n\n    @classmethod\n    def build_query_compiler(cls, path, columns, **kwargs):\n        \"\"\"\n        Build query compiler from deployed tasks outputs.\n\n        Parameters\n        ----------\n        path : str, path object or file-like object\n            Path to the file to read.\n        columns : list\n            List of columns that should be read from file.\n        **kwargs : dict\n            Parameters of deploying read_* function.\n\n        Returns\n        -------\n        new_query_compiler : BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        col_partitions, column_widths = cls.build_columns(columns)\n        partition_ids = cls.call_deploy(path, col_partitions, **kwargs)\n        index, row_lens = cls.build_index(partition_ids)\n        remote_parts = cls.build_partition(partition_ids[:-2], row_lens, column_widths)\n        dtypes = (\n            cls.build_dtypes(partition_ids[-1], columns)\n            if len(partition_ids) > 0\n            else None\n        )\n        new_query_compiler = cls.query_compiler_cls(\n            cls.frame_cls(\n                remote_parts,\n                index,\n                columns,\n                row_lens,\n                column_widths,\n                dtypes=dtypes,\n            )\n        )\n        return new_query_compiler\n"
  },
  {
    "path": "modin/core/io/column_stores/feather_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `FeatherDispatcher` class, that is used for reading `.feather` files.\"\"\"\n\nfrom pandas.io.common import stringify_path\n\nfrom modin.core.io.column_stores.column_store_dispatcher import ColumnStoreDispatcher\nfrom modin.core.io.file_dispatcher import OpenFile\nfrom modin.utils import import_optional_dependency\n\n\nclass FeatherDispatcher(ColumnStoreDispatcher):\n    \"\"\"Class handles utils for reading `.feather` files.\"\"\"\n\n    @classmethod\n    def _read(cls, path, columns=None, **kwargs):\n        \"\"\"\n        Read data from the file path, returning a query compiler.\n\n        Parameters\n        ----------\n        path : str or file-like object\n            The filepath of the feather file.\n        columns : array-like, optional\n            Columns to read from file. If not provided, all columns are read.\n        **kwargs : dict\n            `read_feather` function kwargs.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Query compiler with imported data for further processing.\n\n        Notes\n        -----\n        `PyArrow` engine and local files only are supported for now,\n        multi threading is set to False by default.\n        PyArrow feather is used. Please refer to the documentation here\n        https://arrow.apache.org/docs/python/api.html#feather-format\n        \"\"\"\n        path = stringify_path(path)\n        path = cls.get_path(path)\n        if columns is None:\n            import_optional_dependency(\n                \"pyarrow\", \"pyarrow is required to read feather files.\"\n            )\n            from pyarrow import ipc\n\n            with OpenFile(\n                path,\n                **(kwargs.get(\"storage_options\", None) or {}),\n            ) as file:\n                # Opens the file to extract its metadata\n                reader = ipc.open_file(file)\n            # TODO: pyarrow's schema contains much more metadata than just column names, it also\n            # has dtypes and index information that we could use when building a dataframe\n            index_cols = frozenset(\n                col\n                for col in reader.schema.pandas_metadata[\"index_columns\"]\n                # 'index_columns' field may also contain dictionary fields describing actual\n                # RangeIndices, so we're only filtering here for string column names\n                if isinstance(col, str)\n            )\n            # Filtering out the columns that describe the frame's index\n            columns = [col for col in reader.schema.names if col not in index_cols]\n        return cls.build_query_compiler(\n            path,\n            columns,\n            use_threads=False,\n            storage_options=kwargs[\"storage_options\"],\n            dtype_backend=kwargs[\"dtype_backend\"],\n        )\n"
  },
  {
    "path": "modin/core/io/column_stores/hdf_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `HDFDispatcher` class, that is used for reading hdf data.\"\"\"\n\nimport pandas\n\nfrom modin.core.io.column_stores.column_store_dispatcher import ColumnStoreDispatcher\n\n\nclass HDFDispatcher(ColumnStoreDispatcher):  # pragma: no cover\n    \"\"\"\n    Class handles utils for reading hdf data.\n\n    Inherits some common for columnar store files util functions from\n    `ColumnStoreDispatcher` class.\n    \"\"\"\n\n    @classmethod\n    def _validate_hdf_format(cls, path_or_buf):\n        \"\"\"\n        Validate `path_or_buf` and then return `table_type` parameter of store group attribute.\n\n        Parameters\n        ----------\n        path_or_buf : str, buffer or path object\n            Path to the file to open, or an open :class:`pandas.HDFStore` object.\n\n        Returns\n        -------\n        str\n            `table_type` parameter of store group attribute.\n        \"\"\"\n        s = pandas.HDFStore(path_or_buf)\n        groups = s.groups()\n        if len(groups) == 0:\n            raise ValueError(\"No dataset in HDF5 file.\")\n        candidate_only_group = groups[0]\n        format = getattr(candidate_only_group._v_attrs, \"table_type\", None)\n        s.close()\n        return format\n\n    @classmethod\n    def _read(cls, path_or_buf, **kwargs):\n        \"\"\"\n        Load an h5 file from the file path or buffer, returning a query compiler.\n\n        Parameters\n        ----------\n        path_or_buf : str, buffer or path object\n            Path to the file to open, or an open :class:`pandas.HDFStore` object.\n        **kwargs : dict\n            Pass into pandas.read_hdf function.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        if cls._validate_hdf_format(path_or_buf=path_or_buf) is None:\n            return cls.single_worker_read(\n                path_or_buf,\n                reason=\"File format seems to be `fixed`. For better distribution consider \"\n                + \"saving the file in `table` format. df.to_hdf(format=`table`).\",\n                **kwargs\n            )\n\n        columns = kwargs.pop(\"columns\", None)\n        # Have to do this because of Dask's keyword arguments\n        kwargs[\"_key\"] = kwargs.pop(\"key\", None)\n        if not columns:\n            start = kwargs.pop(\"start\", None)\n            stop = kwargs.pop(\"stop\", None)\n            empty_pd_df = pandas.read_hdf(path_or_buf, start=0, stop=0, **kwargs)\n            if start is not None:\n                kwargs[\"start\"] = start\n            if stop is not None:\n                kwargs[\"stop\"] = stop\n            columns = empty_pd_df.columns\n        return cls.build_query_compiler(path_or_buf, columns, **kwargs)\n"
  },
  {
    "path": "modin/core/io/column_stores/parquet_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `ParquetDispatcher` class, that is used for reading `.parquet` files.\"\"\"\n\nfrom __future__ import annotations\n\nimport functools\nimport json\nimport os\nimport re\nfrom typing import TYPE_CHECKING\n\nimport fsspec\nimport numpy as np\nimport pandas\nimport pandas._libs.lib as lib\nfrom fsspec.core import url_to_fs\nfrom fsspec.spec import AbstractBufferedFile\nfrom packaging import version\nfrom pandas.io.common import stringify_path\n\nfrom modin.config import MinColumnPartitionSize, MinRowPartitionSize, NPartitions\nfrom modin.core.io.column_stores.column_store_dispatcher import ColumnStoreDispatcher\nfrom modin.error_message import ErrorMessage\nfrom modin.utils import _inherit_docstrings\n\nif TYPE_CHECKING:\n    from modin.core.storage_formats.pandas.parsers import ParquetFileToRead\n\n\nclass ColumnStoreDataset:\n    \"\"\"\n    Base class that encapsulates Parquet engine-specific details.\n\n    This class exposes a set of functions that are commonly used in the\n    `read_parquet` implementation.\n\n    Attributes\n    ----------\n    path : str, path object or file-like object\n        The filepath of the parquet file in local filesystem or hdfs.\n    storage_options : dict\n        Parameters for specific storage engine.\n    _fs_path : str, path object or file-like object\n        The filepath or handle of the parquet dataset specific to the\n        filesystem implementation. E.g. for `s3://test/example`, _fs\n        would be set to S3FileSystem and _fs_path would be `test/example`.\n    _fs : Filesystem\n        Filesystem object specific to the given parquet file/dataset.\n    dataset : ParquetDataset or ParquetFile\n        Underlying dataset implementation for PyArrow and fastparquet\n        respectively.\n    \"\"\"\n\n    def __init__(self, path, storage_options):  # noqa : PR01\n        self.path = path.__fspath__() if isinstance(path, os.PathLike) else path\n        self.storage_options = storage_options\n        self._fs_path = None\n        self._fs = None\n        self.dataset = self._init_dataset()\n\n    @property\n    def pandas_metadata(self):\n        \"\"\"Return the pandas metadata of the dataset.\"\"\"\n        raise NotImplementedError\n\n    @property\n    def columns(self):\n        \"\"\"Return the list of columns in the dataset.\"\"\"\n        raise NotImplementedError\n\n    @property\n    def engine(self):\n        \"\"\"Return string representing what engine is being used.\"\"\"\n        raise NotImplementedError\n\n    @functools.cached_property\n    def files(self):\n        \"\"\"Return the list of formatted file paths of the dataset.\"\"\"\n        raise NotImplementedError\n\n    @functools.cached_property\n    def row_groups_per_file(self):\n        \"\"\"Return a list with the number of row groups per file.\"\"\"\n        raise NotImplementedError\n\n    @property\n    def fs(self):\n        \"\"\"\n        Return the filesystem object associated with the dataset path.\n\n        Returns\n        -------\n        filesystem\n            Filesystem object.\n        \"\"\"\n        if self._fs is None:\n            if isinstance(self.path, AbstractBufferedFile):\n                self._fs = self.path.fs\n            else:\n                self._fs, self._fs_path = url_to_fs(self.path, **self.storage_options)\n        return self._fs\n\n    @property\n    def fs_path(self):\n        \"\"\"\n        Return the filesystem-specific path or file handle.\n\n        Returns\n        -------\n        fs_path : str, path object or file-like object\n            String path specific to filesystem or a file handle.\n        \"\"\"\n        if self._fs_path is None:\n            if isinstance(self.path, AbstractBufferedFile):\n                self._fs_path = self.path\n            else:\n                self._fs, self._fs_path = url_to_fs(self.path, **self.storage_options)\n        return self._fs_path\n\n    def to_pandas_dataframe(self, columns):\n        \"\"\"\n        Read the given columns as a pandas dataframe.\n\n        Parameters\n        ----------\n        columns : list\n            List of columns that should be read from file.\n        \"\"\"\n        raise NotImplementedError\n\n    def _get_files(self, files):\n        \"\"\"\n        Retrieve list of formatted file names in dataset path.\n\n        Parameters\n        ----------\n        files : list\n            List of files from path.\n\n        Returns\n        -------\n        fs_files : list\n            List of files from path with fs-protocol prepended.\n        \"\"\"\n        # Older versions of fsspec doesn't support unstrip_protocol(). It\n        # was only added relatively recently:\n        # https://github.com/fsspec/filesystem_spec/pull/828\n\n        def _unstrip_protocol(protocol, path):\n            protos = (protocol,) if isinstance(protocol, str) else protocol\n            for protocol in protos:\n                if path.startswith(f\"{protocol}://\"):\n                    return path\n            return f\"{protos[0]}://{path}\"\n\n        if isinstance(self.path, AbstractBufferedFile):\n            return [self.path]\n        # version.parse() is expensive, so we can split this into two separate loops\n        if version.parse(fsspec.__version__) < version.parse(\"2022.5.0\"):\n            fs_files = [_unstrip_protocol(self.fs.protocol, fpath) for fpath in files]\n        else:\n            fs_files = [self.fs.unstrip_protocol(fpath) for fpath in files]\n\n        return fs_files\n\n\n@_inherit_docstrings(ColumnStoreDataset)\nclass PyArrowDataset(ColumnStoreDataset):\n    def _init_dataset(self):  # noqa: GL08\n        from pyarrow.parquet import ParquetDataset\n\n        return ParquetDataset(self.fs_path, filesystem=self.fs)\n\n    @property\n    def pandas_metadata(self):\n        return self.dataset.schema.pandas_metadata\n\n    @property\n    def columns(self):\n        return self.dataset.schema.names\n\n    @property\n    def engine(self):\n        return \"pyarrow\"\n\n    @functools.cached_property\n    def row_groups_per_file(self):\n        from pyarrow.parquet import ParquetFile\n\n        row_groups_per_file = []\n        # Count up the total number of row groups across all files and\n        # keep track of row groups per file to use later.\n        for file in self.files:\n            with self.fs.open(file) as f:\n                row_groups = ParquetFile(f).num_row_groups\n                row_groups_per_file.append(row_groups)\n        return row_groups_per_file\n\n    @functools.cached_property\n    def files(self):\n        files = self.dataset.files\n        return self._get_files(files)\n\n    def to_pandas_dataframe(\n        self,\n        columns,\n    ):\n        from pyarrow.parquet import read_table\n\n        return read_table(\n            self._fs_path, columns=columns, filesystem=self.fs\n        ).to_pandas()\n\n\n@_inherit_docstrings(ColumnStoreDataset)\nclass FastParquetDataset(ColumnStoreDataset):\n    def _init_dataset(self):  # noqa: GL08\n        from fastparquet import ParquetFile\n\n        return ParquetFile(self.fs_path, fs=self.fs)\n\n    @property\n    def pandas_metadata(self):\n        if \"pandas\" not in self.dataset.key_value_metadata:\n            return {}\n        return json.loads(self.dataset.key_value_metadata[\"pandas\"])\n\n    @property\n    def columns(self):\n        return self.dataset.columns\n\n    @property\n    def engine(self):\n        return \"fastparquet\"\n\n    @functools.cached_property\n    def row_groups_per_file(self):\n        from fastparquet import ParquetFile\n\n        row_groups_per_file = []\n        # Count up the total number of row groups across all files and\n        # keep track of row groups per file to use later.\n        for file in self.files:\n            with self.fs.open(file) as f:\n                row_groups = ParquetFile(f).info[\"row_groups\"]\n                row_groups_per_file.append(row_groups)\n        return row_groups_per_file\n\n    @functools.cached_property\n    def files(self):\n        return self._get_files(self._get_fastparquet_files())\n\n    def to_pandas_dataframe(self, columns):\n        return self.dataset.to_pandas(columns=columns)\n\n    # Karthik Velayutham writes:\n    #\n    # fastparquet doesn't have a nice method like PyArrow, so we\n    # have to copy some of their logic here while we work on getting\n    # an easier method to get a list of valid files.\n    # See: https://github.com/dask/fastparquet/issues/795\n    def _get_fastparquet_files(self):  # noqa: GL08\n        if \"*\" in self.path:\n            files = self.fs.glob(self.path)\n        else:\n            # (Resolving issue #6778)\n            #\n            # Users will pass in a directory to a delta table, which stores parquet\n            # files in various directories along with other, non-parquet files. We\n            # need to identify those parquet files and not the non-parquet files.\n            #\n            # However, we also need to support users passing in explicit files that\n            # don't necessarily have the `.parq` or `.parquet` extension -- if a user\n            # says that a file is parquet, then we should probably give it a shot.\n            if self.fs.isfile(self.path):\n                files = self.fs.find(self.path)\n            else:\n                files = [\n                    f\n                    for f in self.fs.find(self.path)\n                    if f.endswith(\".parquet\") or f.endswith(\".parq\")\n                ]\n        return files\n\n\nclass ParquetDispatcher(ColumnStoreDispatcher):\n    \"\"\"Class handles utils for reading `.parquet` files.\"\"\"\n\n    index_regex = re.compile(r\"__index_level_\\d+__\")\n\n    @classmethod\n    def get_dataset(cls, path, engine, storage_options):\n        \"\"\"\n        Retrieve Parquet engine specific Dataset implementation.\n\n        Parameters\n        ----------\n        path : str, path object or file-like object\n            The filepath of the parquet file in local filesystem or hdfs.\n        engine : str\n            Parquet library to use (only 'PyArrow' is supported for now).\n        storage_options : dict\n            Parameters for specific storage engine.\n\n        Returns\n        -------\n        Dataset\n            Either a PyArrowDataset or FastParquetDataset object.\n        \"\"\"\n        if engine == \"auto\":\n            # We follow in concordance with pandas\n            engine_classes = [PyArrowDataset, FastParquetDataset]\n\n            error_msgs = \"\"\n            for engine_class in engine_classes:\n                try:\n                    return engine_class(path, storage_options)\n                except ImportError as err:\n                    error_msgs += \"\\n - \" + str(err)\n\n            raise ImportError(\n                \"Unable to find a usable engine; \"\n                + \"tried using: 'pyarrow', 'fastparquet'.\\n\"\n                + \"A suitable version of \"\n                + \"pyarrow or fastparquet is required for parquet \"\n                + \"support.\\n\"\n                + \"Trying to import the above resulted in these errors:\"\n                + f\"{error_msgs}\"\n            )\n        elif engine == \"pyarrow\":\n            return PyArrowDataset(path, storage_options)\n        elif engine == \"fastparquet\":\n            return FastParquetDataset(path, storage_options)\n        else:\n            raise ValueError(\"engine must be one of 'pyarrow', 'fastparquet'\")\n\n    @classmethod\n    def _determine_partitioning(\n        cls, dataset: ColumnStoreDataset\n    ) -> \"list[list[ParquetFileToRead]]\":\n        \"\"\"\n        Determine which partition will read certain files/row groups of the dataset.\n\n        Parameters\n        ----------\n        dataset : ColumnStoreDataset\n\n        Returns\n        -------\n        list[list[ParquetFileToRead]]\n            Each element in the returned list describes a list of files that a partition has to read.\n        \"\"\"\n        from modin.core.storage_formats.pandas.parsers import ParquetFileToRead\n\n        parquet_files = dataset.files\n        row_groups_per_file = dataset.row_groups_per_file\n        num_row_groups = sum(row_groups_per_file)\n\n        if num_row_groups == 0:\n            return []\n\n        num_splits = min(NPartitions.get(), num_row_groups)\n        part_size = num_row_groups // num_splits\n        # If 'num_splits' does not divide 'num_row_groups' then we can't cover all of\n        # the row groups using the original 'part_size'. According to the 'reminder'\n        # there has to be that number of partitions that should read 'part_size + 1'\n        # number of row groups.\n        reminder = num_row_groups % num_splits\n        part_sizes = [part_size] * (num_splits - reminder) + [part_size + 1] * reminder\n\n        partition_files = []\n        file_idx = 0\n        row_group_idx = 0\n        row_groups_left_in_current_file = row_groups_per_file[file_idx]\n        # this is used for sanity check at the end, verifying that we indeed added all of the row groups\n        total_row_groups_added = 0\n        for size in part_sizes:\n            row_groups_taken = 0\n            part_files = []\n            while row_groups_taken != size:\n                if row_groups_left_in_current_file < 1:\n                    file_idx += 1\n                    row_group_idx = 0\n                    row_groups_left_in_current_file = row_groups_per_file[file_idx]\n\n                to_take = min(size - row_groups_taken, row_groups_left_in_current_file)\n                part_files.append(\n                    ParquetFileToRead(\n                        parquet_files[file_idx],\n                        row_group_start=row_group_idx,\n                        row_group_end=row_group_idx + to_take,\n                    )\n                )\n                row_groups_left_in_current_file -= to_take\n                row_groups_taken += to_take\n                row_group_idx += to_take\n\n            total_row_groups_added += row_groups_taken\n            partition_files.append(part_files)\n\n        sanity_check = (\n            len(partition_files) == num_splits\n            and total_row_groups_added == num_row_groups\n        )\n        ErrorMessage.catch_bugs_and_request_email(\n            failure_condition=not sanity_check,\n            extra_log=\"row groups added does not match total num of row groups across parquet files\",\n        )\n        return partition_files\n\n    @classmethod\n    def call_deploy(\n        cls,\n        partition_files: \"list[list[ParquetFileToRead]]\",\n        col_partitions: \"list[list[str]]\",\n        storage_options: dict,\n        engine: str,\n        **kwargs,\n    ):\n        \"\"\"\n        Deploy remote tasks to the workers with passed parameters.\n\n        Parameters\n        ----------\n        partition_files : list[list[ParquetFileToRead]]\n            List of arrays with files that should be read by each partition.\n        col_partitions : list[list[str]]\n            List of arrays with columns names that should be read\n            by each partition.\n        storage_options : dict\n            Parameters for specific storage engine.\n        engine : {\"auto\", \"pyarrow\", \"fastparquet\"}\n            Parquet library to use for reading.\n        **kwargs : dict\n            Parameters of deploying read_* function.\n\n        Returns\n        -------\n        List\n            Array with references to the task deploy result for each partition.\n        \"\"\"\n        # If we don't have any columns to read, we should just return an empty\n        # set of references.\n        if len(col_partitions) == 0:\n            return []\n\n        all_partitions = []\n        for files_to_read in partition_files:\n            all_partitions.append(\n                [\n                    cls.deploy(\n                        func=cls.parse,\n                        f_kwargs={\n                            \"files_for_parser\": files_to_read,\n                            \"columns\": cols,\n                            \"engine\": engine,\n                            \"storage_options\": storage_options,\n                            **kwargs,\n                        },\n                        num_returns=3,\n                    )\n                    for cols in col_partitions\n                ]\n            )\n        return all_partitions\n\n    @classmethod\n    def build_partition(cls, partition_ids, column_widths):\n        \"\"\"\n        Build array with partitions of `cls.frame_partition_cls` class.\n\n        Parameters\n        ----------\n        partition_ids : list\n            Array with references to the partitions data.\n        column_widths : list\n            Number of columns in each partition.\n\n        Returns\n        -------\n        np.ndarray\n            array with shape equals to the shape of `partition_ids` and\n            filed with partition objects.\n\n        Notes\n        -----\n        The second level of partitions_ids contains a list of object references\n        for each read call:\n        partition_ids[i][j] -> [ObjectRef(df), ObjectRef(df.index), ObjectRef(len(df))].\n        \"\"\"\n        return np.array(\n            [\n                [\n                    cls.frame_partition_cls(\n                        part_id[0],\n                        length=part_id[2],\n                        width=col_width,\n                    )\n                    for part_id, col_width in zip(part_ids, column_widths)\n                ]\n                for part_ids in partition_ids\n            ]\n        )\n\n    @classmethod\n    def build_index(cls, dataset, partition_ids, index_columns, filters):\n        \"\"\"\n        Compute index and its split sizes of resulting Modin DataFrame.\n\n        Parameters\n        ----------\n        dataset : Dataset\n            Dataset object of Parquet file/files.\n        partition_ids : list\n            Array with references to the partitions data.\n        index_columns : list\n            List of index columns specified by pandas metadata.\n        filters : list\n            List of filters to be used in reading the Parquet file/files.\n\n        Returns\n        -------\n        index : pandas.Index\n            Index of resulting Modin DataFrame.\n        needs_index_sync : bool\n            Whether the partition indices need to be synced with frame\n            index because there's no index column, or at least one\n            index column is a RangeIndex.\n\n        Notes\n        -----\n        See `build_partition` for more detail on the contents of partitions_ids.\n        \"\"\"\n        range_index = True\n        range_index_metadata = None\n        column_names_to_read = []\n        for column in index_columns:\n            # https://pandas.pydata.org/docs/development/developer.html#storing-pandas-dataframe-objects-in-apache-parquet-format\n            # describes the format of the index column metadata.\n            # It is a list, where each entry is either a string or a dictionary.\n            # A string means that a column stored in the dataset is (part of) the index.\n            # A dictionary is metadata about a RangeIndex, which is metadata-only and not stored\n            # in the dataset as a column.\n            # There cannot be both for a single dataframe, because a MultiIndex can only contain\n            # \"actual data\" columns and not RangeIndex objects.\n            # See similar code in pyarrow: https://github.com/apache/arrow/blob/44811ba18477560711d512939535c8389dd7787b/python/pyarrow/pandas_compat.py#L912-L926\n            # and in fastparquet, here is where RangeIndex is handled: https://github.com/dask/fastparquet/blob/df1219300a96bc1baf9ebad85f4f5676a130c9e8/fastparquet/api.py#L809-L815\n            if isinstance(column, str):\n                column_names_to_read.append(column)\n                range_index = False\n            elif column[\"kind\"] == \"range\":\n                range_index_metadata = column\n\n        # When the index has meaningful values, stored in a column, we will replicate those\n        # exactly in the Modin dataframe's index. This index may have repeated values, be unsorted,\n        # etc. This is all fine.\n        # A range index is the special case: we want the Modin dataframe to have a single range,\n        # not a range that keeps restarting. i.e. if the partitions have index 0-9, 0-19, 0-29,\n        # we want our Modin dataframe to have 0-59.\n        # When there are no filters, it is relatively cheap to construct the index by\n        # actually reading in the necessary data, here in the main process.\n        # When there are filters, we let the workers materialize the indices before combining to\n        # get a single range.\n\n        # For the second check, let us consider the case where we have an empty dataframe,\n        # that has a valid index.\n        if (range_index and filters is None) or (\n            len(partition_ids) == 0 and len(column_names_to_read) != 0\n        ):\n            complete_index = dataset.to_pandas_dataframe(\n                columns=column_names_to_read\n            ).index\n        # Empty DataFrame case\n        elif len(partition_ids) == 0:\n            return [], False\n        else:\n            index_ids = [part_id[0][1] for part_id in partition_ids if len(part_id) > 0]\n            index_objs = cls.materialize(index_ids)\n            if range_index:\n                # There are filters, so we had to materialize in order to\n                # determine how many items there actually are\n                total_filtered_length = sum(\n                    len(index_part) for index_part in index_objs\n                )\n\n                metadata_length_mismatch = False\n                if range_index_metadata is not None:\n                    metadata_implied_length = (\n                        range_index_metadata[\"stop\"] - range_index_metadata[\"start\"]\n                    ) / range_index_metadata[\"step\"]\n                    metadata_length_mismatch = (\n                        total_filtered_length != metadata_implied_length\n                    )\n\n                # pyarrow ignores the RangeIndex metadata if it is not consistent with data length.\n                # https://github.com/apache/arrow/blob/44811ba18477560711d512939535c8389dd7787b/python/pyarrow/pandas_compat.py#L924-L926\n                # fastparquet keeps the start and step from the metadata and just adjusts to the length.\n                # https://github.com/dask/fastparquet/blob/df1219300a96bc1baf9ebad85f4f5676a130c9e8/fastparquet/api.py#L815\n                if range_index_metadata is None or (\n                    isinstance(dataset, PyArrowDataset) and metadata_length_mismatch\n                ):\n                    complete_index = pandas.RangeIndex(total_filtered_length)\n                else:\n                    complete_index = pandas.RangeIndex(\n                        start=range_index_metadata[\"start\"],\n                        step=range_index_metadata[\"step\"],\n                        stop=(\n                            range_index_metadata[\"start\"]\n                            + (total_filtered_length * range_index_metadata[\"step\"])\n                        ),\n                        name=range_index_metadata[\"name\"],\n                    )\n            else:\n                complete_index = index_objs[0].append(index_objs[1:])\n        return complete_index, range_index or (len(index_columns) == 0)\n\n    @classmethod\n    def _normalize_partitioning(cls, remote_parts, row_lengths, column_widths):\n        \"\"\"\n        Normalize partitioning according to the default partitioning scheme in Modin.\n\n        The result of 'read_parquet()' is often under partitioned over rows and over partitioned\n        over columns, so this method expands the number of row splits and shrink the number of column splits.\n\n        Parameters\n        ----------\n        remote_parts : np.ndarray\n        row_lengths : list of ints or None\n            Row lengths, if 'None', won't repartition across rows.\n        column_widths : list of ints\n\n        Returns\n        -------\n        remote_parts : np.ndarray\n        row_lengths : list of ints or None\n        column_widths : list of ints\n        \"\"\"\n        if len(remote_parts) == 0:\n            return remote_parts, row_lengths, column_widths\n\n        from modin.core.storage_formats.pandas.utils import get_length_list\n\n        # The code in this function is actually a duplication of what 'BaseQueryCompiler.repartition()' does,\n        # however this implementation works much faster for some reason\n\n        actual_row_nparts = remote_parts.shape[0]\n\n        if row_lengths is not None:\n            desired_row_nparts = max(\n                1, min(sum(row_lengths) // MinRowPartitionSize.get(), NPartitions.get())\n            )\n        else:\n            desired_row_nparts = actual_row_nparts\n\n        # only repartition along rows if the actual number of row splits 1.5 times SMALLER than desired\n        if 1.5 * actual_row_nparts < desired_row_nparts:\n            # assuming that the sizes of parquet's row groups are more or less equal,\n            # so trying to use the same number of splits for each partition\n            splits_per_partition = desired_row_nparts // actual_row_nparts\n            remainder = desired_row_nparts % actual_row_nparts\n\n            new_parts = []\n            new_row_lengths = []\n\n            for row_idx, (part_len, row_parts) in enumerate(\n                zip(row_lengths, remote_parts)\n            ):\n                num_splits = splits_per_partition\n                # 'remainder' indicates how many partitions have to be split into 'num_splits + 1' splits\n                # to have exactly 'desired_row_nparts' in the end\n                if row_idx < remainder:\n                    num_splits += 1\n\n                if num_splits == 1:\n                    new_parts.append(row_parts)\n                    new_row_lengths.append(part_len)\n                    continue\n\n                offset = len(new_parts)\n                # adding empty row parts according to the number of splits\n                new_parts.extend([[] for _ in range(num_splits)])\n                for part in row_parts:\n                    split = cls.frame_cls._partition_mgr_cls._column_partitions_class(\n                        [part]\n                    ).apply(\n                        lambda df: df,\n                        num_splits=num_splits,\n                        maintain_partitioning=False,\n                    )\n                    for i in range(num_splits):\n                        new_parts[offset + i].append(split[i])\n\n                new_row_lengths.extend(\n                    get_length_list(part_len, num_splits, MinRowPartitionSize.get())\n                )\n\n            remote_parts = np.array(new_parts)\n            row_lengths = new_row_lengths\n\n        desired_col_nparts = max(\n            1,\n            min(sum(column_widths) // MinColumnPartitionSize.get(), NPartitions.get()),\n        )\n        # only repartition along cols if the actual number of col splits 1.5 times BIGGER than desired\n        if 1.5 * desired_col_nparts < remote_parts.shape[1]:\n            remote_parts = np.array(\n                [\n                    (\n                        cls.frame_cls._partition_mgr_cls._row_partition_class(\n                            row_parts\n                        ).apply(\n                            lambda df: df,\n                            num_splits=desired_col_nparts,\n                            maintain_partitioning=False,\n                        )\n                    )\n                    for row_parts in remote_parts\n                ]\n            )\n            column_widths = get_length_list(\n                sum(column_widths), desired_col_nparts, MinColumnPartitionSize.get()\n            )\n\n        return remote_parts, row_lengths, column_widths\n\n    @classmethod\n    def build_query_compiler(cls, dataset, columns, index_columns, **kwargs):\n        \"\"\"\n        Build query compiler from deployed tasks outputs.\n\n        Parameters\n        ----------\n        dataset : Dataset\n            Dataset object of Parquet file/files.\n        columns : list\n            List of columns that should be read from file.\n        index_columns : list\n            List of index columns specified by pandas metadata.\n        **kwargs : dict\n            Parameters of deploying read_* function.\n\n        Returns\n        -------\n        new_query_compiler : BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        storage_options = kwargs.pop(\"storage_options\", {}) or {}\n        filters = kwargs.get(\"filters\", None)\n\n        partition_files = cls._determine_partitioning(dataset)\n        col_partitions, column_widths = cls.build_columns(\n            columns,\n            num_row_parts=len(partition_files),\n        )\n        partition_ids = cls.call_deploy(\n            partition_files, col_partitions, storage_options, dataset.engine, **kwargs\n        )\n        index, sync_index = cls.build_index(\n            dataset, partition_ids, index_columns, filters\n        )\n        remote_parts = cls.build_partition(partition_ids, column_widths)\n        if len(partition_ids) > 0:\n            row_lengths = [part.length() for part in remote_parts.T[0]]\n        else:\n            row_lengths = None\n\n        remote_parts, row_lengths, column_widths = cls._normalize_partitioning(\n            remote_parts, row_lengths, column_widths\n        )\n\n        if (\n            dataset.pandas_metadata\n            and \"column_indexes\" in dataset.pandas_metadata\n            and len(dataset.pandas_metadata[\"column_indexes\"]) == 1\n            and dataset.pandas_metadata[\"column_indexes\"][0][\"numpy_type\"] == \"int64\"\n        ):\n            columns = pandas.Index(columns).astype(\"int64\").to_list()\n\n        frame = cls.frame_cls(\n            remote_parts,\n            index,\n            columns,\n            row_lengths=row_lengths,\n            column_widths=column_widths,\n            dtypes=None,\n        )\n        if sync_index:\n            frame.synchronize_labels(axis=0)\n        return cls.query_compiler_cls(frame)\n\n    @classmethod\n    def _read(cls, path, engine, columns, use_nullable_dtypes, dtype_backend, **kwargs):\n        \"\"\"\n        Load a parquet object from the file path, returning a query compiler.\n\n        Parameters\n        ----------\n        path : str, path object or file-like object\n            The filepath of the parquet file in local filesystem or hdfs.\n        engine : {\"auto\", \"pyarrow\", \"fastparquet\"}\n            Parquet library to use.\n        columns : list\n            If not None, only these columns will be read from the file.\n        use_nullable_dtypes : Union[bool, lib.NoDefault]\n        dtype_backend : {\"numpy_nullable\", \"pyarrow\", lib.no_default}\n        **kwargs : dict\n            Keyword arguments.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            A new Query Compiler.\n\n        Notes\n        -----\n        ParquetFile API is used. Please refer to the documentation here\n        https://arrow.apache.org/docs/python/parquet.html\n        \"\"\"\n        if (\n            (set(kwargs) - {\"storage_options\", \"filters\", \"filesystem\"})\n            or use_nullable_dtypes != lib.no_default\n            or kwargs.get(\"filesystem\") is not None\n        ):\n            return cls.single_worker_read(\n                path,\n                engine=engine,\n                columns=columns,\n                use_nullable_dtypes=use_nullable_dtypes,\n                dtype_backend=dtype_backend,\n                reason=\"Parquet options that are not currently supported\",\n                **kwargs,\n            )\n\n        path = stringify_path(path)\n        if isinstance(path, list):\n            # TODO(https://github.com/modin-project/modin/issues/5723): read all\n            # files in parallel.\n            compilers: list[cls.query_compiler_cls] = [\n                cls._read(\n                    p, engine, columns, use_nullable_dtypes, dtype_backend, **kwargs\n                )\n                for p in path\n            ]\n            return compilers[0].concat(axis=0, other=compilers[1:], ignore_index=True)\n        if isinstance(path, str):\n            if os.path.isdir(path):\n                path_generator = os.walk(path)\n            else:\n                storage_options = kwargs.get(\"storage_options\")\n                if storage_options is not None:\n                    fs, fs_path = url_to_fs(path, **storage_options)\n                else:\n                    fs, fs_path = url_to_fs(path)\n                path_generator = fs.walk(fs_path)\n            partitioned_columns = set()\n            # We do a tree walk of the path directory because partitioned\n            # parquet directories have a unique column at each directory level.\n            # Thus, we can use os.walk(), which does a dfs search, to walk\n            # through the different columns that the data is partitioned on\n            for _, dir_names, files in path_generator:\n                if dir_names:\n                    partitioned_columns.add(dir_names[0].split(\"=\")[0])\n                if files:\n                    # Metadata files, git files, .DSStore\n                    # TODO: fix conditional for column partitioning, see issue #4637\n                    if len(files[0]) > 0 and files[0][0] == \".\":\n                        continue\n                    break\n            partitioned_columns = list(partitioned_columns)\n            if len(partitioned_columns):\n                return cls.single_worker_read(\n                    path,\n                    engine=engine,\n                    columns=columns,\n                    use_nullable_dtypes=use_nullable_dtypes,\n                    dtype_backend=dtype_backend,\n                    reason=\"Mixed partitioning columns in Parquet\",\n                    **kwargs,\n                )\n\n        dataset = cls.get_dataset(path, engine, kwargs.get(\"storage_options\") or {})\n        index_columns = (\n            dataset.pandas_metadata.get(\"index_columns\", [])\n            if dataset.pandas_metadata\n            else []\n        )\n        # If we have columns as None, then we default to reading in all the columns\n        column_names = columns if columns else dataset.columns\n        columns = [\n            c\n            for c in column_names\n            if c not in index_columns and not cls.index_regex.match(c)\n        ]\n\n        return cls.build_query_compiler(\n            dataset, columns, index_columns, dtype_backend=dtype_backend, **kwargs\n        )\n\n    @classmethod\n    def write(cls, qc, **kwargs):\n        \"\"\"\n        Write a ``DataFrame`` to the binary parquet format.\n\n        Parameters\n        ----------\n        qc : BaseQueryCompiler\n            The query compiler of the Modin dataframe that we want to run `to_parquet` on.\n        **kwargs : dict\n            Parameters for `pandas.to_parquet(**kwargs)`.\n        \"\"\"\n        kwargs[\"path\"] = stringify_path(kwargs[\"path\"])\n        output_path = kwargs[\"path\"]\n        if not isinstance(output_path, str):\n            return cls.base_io.to_parquet(qc, **kwargs)\n        client_kwargs = (kwargs.get(\"storage_options\") or {}).get(\"client_kwargs\", {})\n        fs, url = fsspec.core.url_to_fs(output_path, client_kwargs=client_kwargs)\n        fs.mkdirs(url, exist_ok=True)\n\n        def func(df, **kw):  # pragma: no cover\n            \"\"\"\n            Dump a chunk of rows as parquet, then save them to target maintaining order.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                A chunk of rows to write to a parquet file.\n            **kw : dict\n                Arguments to pass to ``pandas.to_parquet(**kwargs)`` plus an extra argument\n                `partition_idx` serving as chunk index to maintain rows order.\n            \"\"\"\n            compression = kwargs[\"compression\"]\n            partition_idx = kw[\"partition_idx\"]\n            kwargs[\"path\"] = (\n                f\"{output_path}/part-{partition_idx:04d}.{compression}.parquet\"\n            )\n            df.to_parquet(**kwargs)\n            return pandas.DataFrame()\n\n        # Ensure that the metadata is synchronized\n        qc._modin_frame._propagate_index_objs(axis=None)\n        result = qc._modin_frame._partition_mgr_cls.map_axis_partitions(\n            axis=1,\n            partitions=qc._modin_frame._partitions,\n            map_func=func,\n            keep_partitioning=True,\n            lengths=None,\n            enumerate_partitions=True,\n        )\n        # pending completion\n        cls.materialize([part.list_of_blocks[0] for row in result for part in row])\n"
  },
  {
    "path": "modin/core/io/file_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule houses `FileDispatcher` class.\n\n`FileDispatcher` can be used as abstract base class for dispatchers of specific file formats or\nfor direct files processing.\n\"\"\"\n\nimport os\n\nimport fsspec\nimport numpy as np\nfrom pandas.io.common import is_fsspec_url, is_url\n\nfrom modin.config import AsyncReadMode\nfrom modin.logging import ClassLogger\nfrom modin.logging.config import LogLevel\nfrom modin.utils import ModinAssumptionError\n\nNOT_IMPLEMENTED_MESSAGE = \"Implement in children classes!\"\n\n\nclass OpenFile:\n    \"\"\"\n    OpenFile is a context manager for an input file.\n\n    OpenFile uses fsspec to open files on __enter__. On __exit__, it closes the\n    fsspec file. This class exists to encapsulate the special behavior in\n    __enter__ around anon=False and anon=True for s3 buckets.\n\n    Parameters\n    ----------\n    file_path : str\n        String that represents the path to the file (paths to S3 buckets\n        are also acceptable).\n    mode : str, default: \"rb\"\n        String, which defines which mode file should be open.\n    compression : str, default: \"infer\"\n        File compression name.\n    **kwargs : dict\n        Keywords arguments to be passed into ``fsspec.open`` function.\n\n    Attributes\n    ----------\n    file_path : str\n        String that represents the path to the file\n    mode : str\n        String that defines which mode the file should be opened in.\n    compression : str\n        File compression name.\n    file : fsspec.core.OpenFile\n        The opened file.\n    kwargs : dict\n        Keywords arguments to be passed into ``fsspec.open`` function.\n    \"\"\"\n\n    def __init__(self, file_path, mode=\"rb\", compression=\"infer\", **kwargs):\n        self.file_path = file_path\n        self.mode = mode\n        self.compression = compression\n        self.kwargs = kwargs\n\n    def __enter__(self):\n        \"\"\"\n        Open the file with fsspec and return the opened file.\n\n        Returns\n        -------\n        fsspec.core.OpenFile\n            The opened file.\n        \"\"\"\n        try:\n            from botocore.exceptions import NoCredentialsError\n\n            credential_error_type = (\n                NoCredentialsError,\n                PermissionError,\n            )\n        except ModuleNotFoundError:\n            credential_error_type = (PermissionError,)\n\n        args = (self.file_path, self.mode, self.compression)\n\n        self.file = fsspec.open(*args, **self.kwargs)\n        try:\n            return self.file.open()\n        except credential_error_type:\n            self.kwargs[\"anon\"] = True\n            self.file = fsspec.open(*args, **self.kwargs)\n        return self.file.open()\n\n    def __exit__(self, *args):\n        \"\"\"\n        Close the file.\n\n        Parameters\n        ----------\n        *args : any type\n            Variable positional arguments, all unused.\n        \"\"\"\n        self.file.close()\n\n\nclass FileDispatcher(ClassLogger, modin_layer=\"CORE-IO\", log_level=LogLevel.DEBUG):\n    \"\"\"\n    Class handles util functions for reading data from different kinds of files.\n\n    Notes\n    -----\n    `_read`, `deploy`, `parse` and `materialize` are abstract methods and should be\n    implemented in the child classes (functions signatures can differ between child\n    classes).\n    \"\"\"\n\n    BUFFER_UNSUPPORTED_MSG = (\n        \"Reading from buffers or other non-path-like objects is not supported\"\n    )\n\n    frame_cls = None\n    frame_partition_cls = None\n    query_compiler_cls = None\n\n    @classmethod\n    def read(cls, *args, **kwargs):\n        \"\"\"\n        Read data according passed `args` and `kwargs`.\n\n        Parameters\n        ----------\n        *args : iterable\n            Positional arguments to be passed into `_read` function.\n        **kwargs : dict\n            Keywords arguments to be passed into `_read` function.\n\n        Returns\n        -------\n        query_compiler : BaseQueryCompiler\n            Query compiler with imported data for further processing.\n\n        Notes\n        -----\n        `read` is high-level function that calls specific for defined storage format, engine and\n        dispatcher class `_read` function with passed parameters and performs some\n        postprocessing work on the resulting query_compiler object.\n        \"\"\"\n        try:\n            query_compiler = cls._read(*args, **kwargs)\n        except ModinAssumptionError as err:\n            param_name = \"path_or_buf\" if \"path_or_buf\" in kwargs else \"fname\"\n            fname = kwargs.pop(param_name)\n            return cls.single_worker_read(fname, *args, reason=str(err), **kwargs)\n        # TextFileReader can also be returned from `_read`.\n        if not AsyncReadMode.get() and hasattr(query_compiler, \"dtypes\"):\n            # at the moment it is not possible to use `wait_partitions` function;\n            # in a situation where the reading function is called in a row with the\n            # same parameters, `wait_partitions` considers that we have waited for\n            # the end of remote calculations, however, when trying to materialize the\n            # received data, it is clear that the calculations have not yet ended.\n            # for example, `test_io_exp.py::test_read_evaluated_dict` is failed because of that.\n            # see #5944 for details\n            _ = query_compiler.dtypes\n        return query_compiler\n\n    @classmethod\n    def _read(cls, *args, **kwargs):\n        \"\"\"\n        Perform reading of the data from file.\n\n        Should be implemented in the child class.\n\n        Parameters\n        ----------\n        *args : iterable\n            Positional arguments of the function.\n        **kwargs : dict\n            Keywords arguments of the function.\n        \"\"\"\n        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)\n\n    @classmethod\n    def get_path(cls, file_path):\n        \"\"\"\n        Process `file_path` in accordance to it's type.\n\n        Parameters\n        ----------\n        file_path : str, os.PathLike[str] object or file-like object\n            The file, or a path to the file. Paths to S3 buckets are also\n            acceptable.\n\n        Returns\n        -------\n        str\n            Updated or verified `file_path` parameter.\n\n        Notes\n        -----\n        if `file_path` is a URL, parameter will be returned as is, otherwise\n        absolute path will be returned.\n        \"\"\"\n        if is_fsspec_url(file_path) or is_url(file_path):\n            return file_path\n        else:\n            return os.path.abspath(file_path)\n\n    @classmethod\n    def file_size(cls, f):\n        \"\"\"\n        Get the size of file associated with file handle `f`.\n\n        Parameters\n        ----------\n        f : file-like object\n            File-like object, that should be used to get file size.\n\n        Returns\n        -------\n        int\n            File size in bytes.\n        \"\"\"\n        cur_pos = f.tell()\n        f.seek(0, os.SEEK_END)\n        size = f.tell()\n        f.seek(cur_pos, os.SEEK_SET)\n        return size\n\n    @classmethod\n    def file_exists(cls, file_path, storage_options=None):\n        \"\"\"\n        Check if `file_path` exists.\n\n        Parameters\n        ----------\n        file_path : str\n            String that represents the path to the file (paths to S3 buckets\n            are also acceptable).\n        storage_options : dict, optional\n            Keyword from `read_*` functions.\n\n        Returns\n        -------\n        bool\n            Whether file exists or not.\n        \"\"\"\n        if not is_fsspec_url(file_path) and not is_url(file_path):\n            return os.path.exists(file_path)\n\n        try:\n            from botocore.exceptions import (\n                ConnectTimeoutError,\n                EndpointConnectionError,\n                NoCredentialsError,\n            )\n\n            credential_error_type = (\n                NoCredentialsError,\n                PermissionError,\n                EndpointConnectionError,\n                ConnectTimeoutError,\n            )\n        except ModuleNotFoundError:\n            credential_error_type = (PermissionError,)\n\n        if storage_options is not None:\n            new_storage_options = dict(storage_options)\n            new_storage_options.pop(\"anon\", None)\n        else:\n            new_storage_options = {}\n\n        fs, _ = fsspec.core.url_to_fs(file_path, **new_storage_options)\n        exists = False\n        try:\n            exists = fs.exists(file_path)\n        except credential_error_type:\n            fs, _ = fsspec.core.url_to_fs(file_path, anon=True, **new_storage_options)\n            exists = fs.exists(file_path)\n\n        return exists\n\n    @classmethod\n    def deploy(cls, func, *args, num_returns=1, **kwargs):  # noqa: PR01\n        \"\"\"\n        Deploy remote task.\n\n        Should be implemented in the task class (for example in the `RayWrapper`).\n        \"\"\"\n        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)\n\n    def parse(self, func, args, num_returns):  # noqa: PR01\n        \"\"\"\n        Parse file's data in the worker process.\n\n        Should be implemented in the parser class (for example in the `PandasCSVParser`).\n        \"\"\"\n        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)\n\n    @classmethod\n    def materialize(cls, obj_id):  # noqa: PR01\n        \"\"\"\n        Get results from worker.\n\n        Should be implemented in the task class (for example in the `RayWrapper`).\n        \"\"\"\n        raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE)\n\n    @classmethod\n    def build_partition(cls, partition_ids, row_lengths, column_widths):\n        \"\"\"\n        Build array with partitions of `cls.frame_partition_cls` class.\n\n        Parameters\n        ----------\n        partition_ids : list\n            Array with references to the partitions data.\n        row_lengths : list\n            Partitions rows lengths.\n        column_widths : list\n            Number of columns in each partition.\n\n        Returns\n        -------\n        np.ndarray\n            array with shape equals to the shape of `partition_ids` and\n            filed with partition objects.\n        \"\"\"\n        return np.array(\n            [\n                [\n                    cls.frame_partition_cls(\n                        partition_ids[i][j],\n                        length=row_lengths[i],\n                        width=column_widths[j],\n                    )\n                    for j in range(len(partition_ids[i]))\n                ]\n                for i in range(len(partition_ids))\n            ]\n        )\n\n    @classmethod\n    def _file_not_found_msg(cls, filename: str):  # noqa: GL08\n        return f\"No such file: '{filename}'\"\n"
  },
  {
    "path": "modin/core/io/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule houses `BaseIO` class.\n\n`BaseIO` is base class for IO classes, that stores IO functions.\n\"\"\"\n\nfrom typing import Any\n\nimport pandas\nfrom pandas._libs.lib import no_default\nfrom pandas.util._decorators import doc\n\nfrom modin.core.storage_formats import BaseQueryCompiler\nfrom modin.db_conn import ModinDatabaseConnection\nfrom modin.error_message import ErrorMessage\nfrom modin.pandas.io import ExcelFile\nfrom modin.utils import _inherit_docstrings\n\n_doc_default_io_method = \"\"\"\n{summary} using pandas.\nFor parameters description please refer to pandas API.\n\nReturns\n-------\n{returns}\n\"\"\"\n\n_doc_returns_qc = \"\"\"BaseQueryCompiler\n    QueryCompiler with read data.\"\"\"\n\n_doc_returns_qc_or_parser = \"\"\"BaseQueryCompiler or TextParser\n    QueryCompiler or TextParser with read data.\"\"\"\n\n\nclass BaseIO:\n    \"\"\"Class for basic utils and default implementation of IO functions.\"\"\"\n\n    query_compiler_cls: BaseQueryCompiler = None\n    frame_cls = None\n    _should_warn_on_default_to_pandas: bool = True\n\n    @classmethod\n    def _maybe_warn_on_default(cls, *, message: str = \"\", reason: str = \"\") -> None:\n        \"\"\"\n        If this class is configured to warn on default to pandas, warn.\n\n        Parameters\n        ----------\n        message : str, default: \"\"\n            Method that is causing a default to pandas.\n        reason : str, default: \"\"\n            Reason for default.\n        \"\"\"\n        if cls._should_warn_on_default_to_pandas:\n            ErrorMessage.default_to_pandas(message=message, reason=reason)\n\n    @classmethod\n    def from_non_pandas(cls, *args, **kwargs):\n        \"\"\"\n        Create a Modin `query_compiler` from a non-pandas `object`.\n\n        Parameters\n        ----------\n        *args : iterable\n            Positional arguments to be passed into `func`.\n        **kwargs : dict\n            Keyword arguments to be passed into `func`.\n        \"\"\"\n        return None\n\n    @classmethod\n    def from_pandas(cls, df) -> BaseQueryCompiler:\n        \"\"\"\n        Create a Modin `query_compiler` from a `pandas.DataFrame`.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            The pandas DataFrame to convert from.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the `pandas.DataFrame`.\n        \"\"\"\n        return cls.query_compiler_cls.from_pandas(df, cls.frame_cls)\n\n    @classmethod\n    def from_arrow(cls, at):\n        \"\"\"\n        Create a Modin `query_compiler` from a `pyarrow.Table`.\n\n        Parameters\n        ----------\n        at : Arrow Table\n            The Arrow Table to convert from.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the Arrow Table.\n        \"\"\"\n        return cls.query_compiler_cls.from_arrow(at, cls.frame_cls)\n\n    @classmethod\n    def from_interchange_dataframe(cls, df):\n        \"\"\"\n        Create a Modin QueryCompiler from a DataFrame supporting the DataFrame exchange protocol `__dataframe__()`.\n\n        Parameters\n        ----------\n        df : DataFrame\n            The DataFrame object supporting the DataFrame exchange protocol.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the DataFrame.\n        \"\"\"\n        return cls.query_compiler_cls.from_interchange_dataframe(df, cls.frame_cls)\n\n    @classmethod\n    def from_ray(cls, ray_obj):\n        \"\"\"\n        Create a Modin `query_compiler` from a Ray Dataset.\n\n        Parameters\n        ----------\n        ray_obj : ray.data.Dataset\n            The Ray Dataset to convert from.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the Ray Dataset.\n\n        Notes\n        -----\n        Ray Dataset can only be converted to a Modin Dataframe if Modin uses a Ray engine.\n        If another engine is used, the runtime exception will be raised.\n        \"\"\"\n        raise RuntimeError(\n            \"Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine.\"\n        )\n\n    @classmethod\n    def from_dask(cls, dask_obj):\n        \"\"\"\n        Create a Modin `query_compiler` from a Dask DataFrame.\n\n        Parameters\n        ----------\n        dask_obj : dask.dataframe.DataFrame\n            The Dask DataFrame to convert from.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the Dask DataFrame.\n\n        Notes\n        -----\n        Dask DataFrame can only be converted to a Modin DataFrame if Modin uses a Dask engine.\n        If another engine is used, the runtime exception will be raised.\n        \"\"\"\n        raise RuntimeError(\n            \"Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine.\"\n        )\n\n    @classmethod\n    def from_map(cls, func, iterable, *args, **kwargs):\n        \"\"\"\n        Create a Modin `query_compiler` from a map function.\n\n        This method will construct a Modin `query_compiler` split by row partitions.\n        The number of row partitions matches the number of elements in the iterable object.\n\n        Parameters\n        ----------\n        func : callable\n            Function to map across the iterable object.\n        iterable : Iterable\n            An iterable object.\n        *args : tuple\n            Positional arguments to pass in `func`.\n        **kwargs : dict\n            Keyword arguments to pass in `func`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data returned by map function.\n        \"\"\"\n        raise RuntimeError(\n            \"Modin DataFrame can only be created if Modin uses Ray, Dask or MPI engine.\"\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_parquet, apilink=\"pandas.read_parquet\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Load a parquet object from the file path, returning a query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_parquet(cls, **kwargs):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_parquet`\")\n        return cls.from_pandas(pandas.read_parquet(**kwargs))\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_csv, apilink=\"pandas.read_csv\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read a comma-separated values (CSV) file into query compiler\",\n        returns=_doc_returns_qc_or_parser,\n    )\n    def read_csv(\n        cls,\n        filepath_or_buffer,\n        **kwargs,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_csv`\")\n        pd_obj = pandas.read_csv(filepath_or_buffer, **kwargs)\n        if isinstance(pd_obj, pandas.DataFrame):\n            return cls.from_pandas(pd_obj)\n        if isinstance(pd_obj, pandas.io.parsers.TextFileReader):\n            # Overwriting the read method should return a Modin DataFrame for calls\n            # to __next__ and get_chunk\n            pd_read = pd_obj.read\n            pd_obj.read = lambda *args, **kw: cls.from_pandas(pd_read(*args, **kw))\n        return pd_obj\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_json, apilink=\"pandas.read_json\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Convert a JSON string to query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_json(\n        cls,\n        **kwargs,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_json`\")\n        return cls.from_pandas(pandas.read_json(**kwargs))\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_gbq, apilink=\"pandas.read_gbq\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Load data from Google BigQuery into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_gbq(\n        cls,\n        query: str,\n        project_id=None,\n        index_col=None,\n        col_order=None,\n        reauth=False,\n        auth_local_webserver=False,\n        dialect=None,\n        location=None,\n        configuration=None,\n        credentials=None,\n        use_bqstorage_api=None,\n        private_key=None,\n        verbose=None,\n        progress_bar_type=None,\n        max_results=None,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_gbq`\")\n        return cls.from_pandas(\n            pandas.read_gbq(\n                query,\n                project_id=project_id,\n                index_col=index_col,\n                col_order=col_order,\n                reauth=reauth,\n                auth_local_webserver=auth_local_webserver,\n                dialect=dialect,\n                location=location,\n                configuration=configuration,\n                credentials=credentials,\n                use_bqstorage_api=use_bqstorage_api,\n                progress_bar_type=progress_bar_type,\n                max_results=max_results,\n            )\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_html, apilink=\"pandas.read_html\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read HTML tables into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_html(\n        cls,\n        io,\n        *,\n        match=\".+\",\n        flavor=None,\n        header=None,\n        index_col=None,\n        skiprows=None,\n        attrs=None,\n        parse_dates=False,\n        thousands=\",\",\n        encoding=None,\n        decimal=\".\",\n        converters=None,\n        na_values=None,\n        keep_default_na=True,\n        displayed_only=True,\n        **kwargs,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_html`\")\n        result = pandas.read_html(\n            io=io,\n            match=match,\n            flavor=flavor,\n            header=header,\n            index_col=index_col,\n            skiprows=skiprows,\n            attrs=attrs,\n            parse_dates=parse_dates,\n            thousands=thousands,\n            encoding=encoding,\n            decimal=decimal,\n            converters=converters,\n            na_values=na_values,\n            keep_default_na=keep_default_na,\n            displayed_only=displayed_only,\n            **kwargs,\n        )\n        return (cls.from_pandas(df) for df in result)\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_clipboard, apilink=\"pandas.read_clipboard\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read text from clipboard into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_clipboard(cls, sep=r\"\\s+\", **kwargs):  # pragma: no cover # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_clipboard`\")\n        return cls.from_pandas(pandas.read_clipboard(sep=sep, **kwargs))\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_excel, apilink=\"pandas.read_excel\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read an Excel file into query compiler\",\n        returns=\"\"\"BaseQueryCompiler or dict :\n    QueryCompiler or dict with read data.\"\"\",\n    )\n    def read_excel(cls, **kwargs):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_excel`\")\n        if isinstance(kwargs[\"io\"], ExcelFile):\n            # otherwise, Modin objects may be passed to the pandas context, resulting\n            # in undefined behavior\n            # for example in the case: pd.read_excel(pd.ExcelFile), since reading from\n            # pd.ExcelFile in `read_excel` isn't supported\n            kwargs[\"io\"]._set_pandas_mode()\n        intermediate = pandas.read_excel(**kwargs)\n        if isinstance(intermediate, dict):\n            parsed = type(intermediate)()\n            for key in intermediate.keys():\n                parsed[key] = cls.from_pandas(intermediate.get(key))\n            return parsed\n        else:\n            return cls.from_pandas(intermediate)\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_hdf, apilink=\"pandas.read_hdf\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read data from hdf store into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_hdf(\n        cls,\n        path_or_buf,\n        key=None,\n        mode: str = \"r\",\n        errors: str = \"strict\",\n        where=None,\n        start=None,\n        stop=None,\n        columns=None,\n        iterator=False,\n        chunksize=None,\n        **kwargs,\n    ):  # noqa: PR01\n        from modin.pandas.io import HDFStore\n\n        cls._maybe_warn_on_default(message=\"`read_hdf`\")\n        modin_store = isinstance(path_or_buf, HDFStore)\n        if modin_store:\n            path_or_buf._return_modin_dataframe = False\n        df = pandas.read_hdf(\n            path_or_buf,\n            key=key,\n            mode=mode,\n            columns=columns,\n            errors=errors,\n            where=where,\n            start=start,\n            stop=stop,\n            iterator=iterator,\n            chunksize=chunksize,\n            **kwargs,\n        )\n        if modin_store:\n            path_or_buf._return_modin_dataframe = True\n\n        return cls.from_pandas(df)\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_feather, apilink=\"pandas.read_feather\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Load a feather-format object from the file path into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_feather(\n        cls,\n        path,\n        **kwargs,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_feather`\")\n        return cls.from_pandas(\n            pandas.read_feather(\n                path,\n                **kwargs,\n            )\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_stata, apilink=\"pandas.read_stata\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read Stata file into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_stata(\n        cls,\n        filepath_or_buffer,\n        **kwargs,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_stata`\")\n        return cls.from_pandas(pandas.read_stata(filepath_or_buffer, **kwargs))\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_sas, apilink=\"pandas.read_sas\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read SAS files stored as either XPORT or SAS7BDAT format files\\ninto query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_sas(\n        cls,\n        filepath_or_buffer,\n        *,\n        format=None,\n        index=None,\n        encoding=None,\n        chunksize=None,\n        iterator=False,\n        **kwargs,\n    ):  # pragma: no cover # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_sas`\")\n        return cls.from_pandas(\n            pandas.read_sas(\n                filepath_or_buffer,\n                format=format,\n                index=index,\n                encoding=encoding,\n                chunksize=chunksize,\n                iterator=iterator,\n                **kwargs,\n            )\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_pickle, apilink=\"pandas.read_pickle\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Load pickled pandas object (or any object) from file into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_pickle(\n        cls,\n        filepath_or_buffer,\n        **kwargs,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_pickle`\")\n        return cls.from_pandas(\n            pandas.read_pickle(\n                filepath_or_buffer,\n                **kwargs,\n            )\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_sql, apilink=\"pandas.read_sql\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read SQL query or database table into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_sql(\n        cls,\n        sql,\n        con,\n        index_col=None,\n        coerce_float=True,\n        params=None,\n        parse_dates=None,\n        columns=None,\n        chunksize=None,\n        dtype_backend=no_default,\n        dtype=None,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_sql`\")\n        if isinstance(con, ModinDatabaseConnection):\n            con = con.get_connection()\n        result = pandas.read_sql(\n            sql,\n            con,\n            index_col=index_col,\n            coerce_float=coerce_float,\n            params=params,\n            parse_dates=parse_dates,\n            columns=columns,\n            chunksize=chunksize,\n            dtype_backend=dtype_backend,\n            dtype=dtype,\n        )\n\n        if isinstance(result, (pandas.DataFrame, pandas.Series)):\n            return cls.from_pandas(result)\n        return (cls.from_pandas(df) for df in result)\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_fwf, apilink=\"pandas.read_fwf\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read a table of fixed-width formatted lines into query compiler\",\n        returns=_doc_returns_qc_or_parser,\n    )\n    def read_fwf(\n        cls,\n        filepath_or_buffer,\n        *,\n        colspecs=\"infer\",\n        widths=None,\n        infer_nrows=100,\n        dtype_backend=no_default,\n        iterator=False,\n        chunksize=None,\n        **kwds,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_fwf`\")\n        pd_obj = pandas.read_fwf(\n            filepath_or_buffer,\n            colspecs=colspecs,\n            widths=widths,\n            infer_nrows=infer_nrows,\n            dtype_backend=dtype_backend,\n            iterator=iterator,\n            chunksize=chunksize,\n            **kwds,\n        )\n        if isinstance(pd_obj, pandas.DataFrame):\n            return cls.from_pandas(pd_obj)\n        if isinstance(pd_obj, pandas.io.parsers.TextFileReader):\n            # Overwriting the read method should return a Modin DataFrame for calls\n            # to __next__ and get_chunk\n            pd_read = pd_obj.read\n            pd_obj.read = lambda *args, **kwargs: cls.from_pandas(\n                pd_read(*args, **kwargs)\n            )\n        return pd_obj\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_sql_table, apilink=\"pandas.read_sql_table\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read SQL database table into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_sql_table(\n        cls,\n        table_name,\n        con,\n        schema=None,\n        index_col=None,\n        coerce_float=True,\n        parse_dates=None,\n        columns=None,\n        chunksize=None,\n        dtype_backend=no_default,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_sql_table`\")\n        return cls.from_pandas(\n            pandas.read_sql_table(\n                table_name,\n                con,\n                schema=schema,\n                index_col=index_col,\n                coerce_float=coerce_float,\n                parse_dates=parse_dates,\n                columns=columns,\n                chunksize=chunksize,\n                dtype_backend=dtype_backend,\n            )\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_sql_query, apilink=\"pandas.read_sql_query\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Read SQL query into query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_sql_query(\n        cls,\n        sql,\n        con,\n        **kwargs,\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_sql_query`\")\n        return cls.from_pandas(\n            pandas.read_sql_query(\n                sql,\n                con,\n                **kwargs,\n            )\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.read_spss, apilink=\"pandas.read_spss\")\n    @doc(\n        _doc_default_io_method,\n        summary=\"Load an SPSS file from the file path, returning a query compiler\",\n        returns=_doc_returns_qc,\n    )\n    def read_spss(\n        cls, path, usecols, convert_categoricals, dtype_backend\n    ):  # noqa: PR01\n        cls._maybe_warn_on_default(message=\"`read_spss`\")\n        return cls.from_pandas(\n            pandas.read_spss(\n                path,\n                usecols=usecols,\n                convert_categoricals=convert_categoricals,\n                dtype_backend=dtype_backend,\n            )\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.DataFrame.to_sql, apilink=\"pandas.DataFrame.to_sql\")\n    def to_sql(\n        cls,\n        qc,\n        name,\n        con,\n        schema=None,\n        if_exists=\"fail\",\n        index=True,\n        index_label=None,\n        chunksize=None,\n        dtype=None,\n        method=None,\n    ):  # noqa: PR01\n        \"\"\"\n        Write records stored in a DataFrame to a SQL database using pandas.\n\n        For parameters description please refer to pandas API.\n        \"\"\"\n        cls._maybe_warn_on_default(message=\"`to_sql`\")\n        df = qc.to_pandas()\n        df.to_sql(\n            name=name,\n            con=con,\n            schema=schema,\n            if_exists=if_exists,\n            index=index,\n            index_label=index_label,\n            chunksize=chunksize,\n            dtype=dtype,\n            method=method,\n        )\n\n    @classmethod\n    @_inherit_docstrings(\n        pandas.DataFrame.to_pickle, apilink=\"pandas.DataFrame.to_pickle\"\n    )\n    def to_pickle(\n        cls,\n        obj: Any,\n        filepath_or_buffer,\n        **kwargs,\n    ):  # noqa: PR01, D200\n        \"\"\"\n        Pickle (serialize) object to file.\n        \"\"\"\n        cls._maybe_warn_on_default(message=\"`to_pickle`\")\n        if isinstance(obj, BaseQueryCompiler):\n            obj = obj.to_pandas()\n\n        return pandas.to_pickle(\n            obj,\n            filepath_or_buffer=filepath_or_buffer,\n            **kwargs,\n        )\n\n    @classmethod\n    @_inherit_docstrings(pandas.DataFrame.to_csv, apilink=\"pandas.DataFrame.to_csv\")\n    def to_csv(cls, obj, **kwargs):  # noqa: PR01\n        \"\"\"\n        Write object to a comma-separated values (CSV) file using pandas.\n\n        For parameters description please refer to pandas API.\n        \"\"\"\n        cls._maybe_warn_on_default(message=\"`to_csv`\")\n        if isinstance(obj, BaseQueryCompiler):\n            obj = obj.to_pandas()\n\n        return obj.to_csv(**kwargs)\n\n    @classmethod\n    @_inherit_docstrings(pandas.DataFrame.to_json, apilink=\"pandas.DataFrame.to_json\")\n    def to_json(cls, obj, path, **kwargs):  # noqa: PR01\n        \"\"\"\n        Convert the object to a JSON string.\n\n        For parameters description please refer to pandas API.\n        \"\"\"\n        cls._maybe_warn_on_default(message=\"`to_json`\")\n        if isinstance(obj, BaseQueryCompiler):\n            obj = obj.to_pandas()\n\n        return obj.to_json(path, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(pandas.Series.to_json, apilink=\"pandas.Series.to_json\")\n    def to_json_series(cls, obj, path, **kwargs):  # noqa: PR01\n        \"\"\"\n        Convert the object to a JSON string.\n\n        For parameters description please refer to pandas API.\n        \"\"\"\n        cls._maybe_warn_on_default(message=\"`to_json`\")\n        if isinstance(obj, BaseQueryCompiler):\n            obj = obj.to_pandas().squeeze(axis=1)\n\n        return obj.to_json(path, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(pandas.DataFrame.to_xml, apilink=\"pandas.DataFrame.to_xml\")\n    def to_xml(cls, obj, path_or_buffer, **kwargs):  # noqa: PR01\n        \"\"\"\n        Convert the object to a XML string.\n\n        For parameters description please refer to pandas API.\n        \"\"\"\n        cls._maybe_warn_on_default(message=\"`to_xml`\")\n        if isinstance(obj, BaseQueryCompiler):\n            obj = obj.to_pandas()\n\n        return obj.to_xml(path_or_buffer, **kwargs)\n\n    @classmethod\n    @_inherit_docstrings(\n        pandas.DataFrame.to_parquet, apilink=\"pandas.DataFrame.to_parquet\"\n    )\n    def to_parquet(cls, obj, path, **kwargs):  # noqa: PR01\n        \"\"\"\n        Write object to the binary parquet format using pandas.\n\n        For parameters description please refer to pandas API.\n        \"\"\"\n        cls._maybe_warn_on_default(message=\"`to_parquet`\")\n        if isinstance(obj, BaseQueryCompiler):\n            obj = obj.to_pandas()\n\n        return obj.to_parquet(path, **kwargs)\n\n    @classmethod\n    def to_ray(cls, modin_obj):\n        \"\"\"\n        Convert a Modin DataFrame/Series to a Ray Dataset.\n\n        Parameters\n        ----------\n        modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n            The Modin DataFrame/Series to convert.\n\n        Returns\n        -------\n        ray.data.Dataset\n            Converted object with type depending on input.\n\n        Notes\n        -----\n        Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.\n        If another engine is used, the runtime exception will be raised.\n        \"\"\"\n        raise RuntimeError(\n            \"Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine.\"\n        )\n\n    @classmethod\n    def to_dask(cls, modin_obj):\n        \"\"\"\n        Convert a Modin DataFrame to a Dask DataFrame.\n\n        Parameters\n        ----------\n        modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n            The Modin DataFrame/Series to convert.\n\n        Returns\n        -------\n        dask.dataframe.DataFrame or dask.dataframe.Series\n            Converted object with type depending on input.\n\n        Notes\n        -----\n        Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.\n        If another engine is used, the runtime exception will be raised.\n        \"\"\"\n        raise RuntimeError(\n            \"Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine.\"\n        )\n"
  },
  {
    "path": "modin/core/io/sql/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"SQL format type IO functions implementations.\"\"\"\n"
  },
  {
    "path": "modin/core/io/sql/sql_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule houses `SQLDispatcher` class.\n\n`SQLDispatcher` contains utils for handling SQL queries or database tables,\ninherits util functions for handling files from `FileDispatcher` class and can be\nused as base class for dipatchers of SQL queries.\n\"\"\"\n\nimport math\n\nimport numpy as np\nimport pandas\n\nfrom modin.config import NPartitions, ReadSqlEngine\nfrom modin.core.io.file_dispatcher import FileDispatcher\nfrom modin.db_conn import ModinDatabaseConnection\n\n\nclass SQLDispatcher(FileDispatcher):\n    \"\"\"Class handles utils for reading SQL queries or database tables.\"\"\"\n\n    @classmethod\n    def _is_supported_sqlalchemy_object(cls, obj):  # noqa: GL08\n        supported = None\n        try:\n            import sqlalchemy as sa\n\n            supported = isinstance(obj, (sa.engine.Engine, sa.engine.Connection))\n        except ImportError:\n            supported = False\n        return supported\n\n    @classmethod\n    def _read(cls, sql, con, index_col=None, **kwargs):\n        \"\"\"\n        Read a SQL query or database table into a query compiler.\n\n        Parameters\n        ----------\n        sql : str or SQLAlchemy Selectable (select or text object)\n            SQL query to be executed or a table name.\n        con : SQLAlchemy connectable, str, sqlite3 connection, or ModinDatabaseConnection\n            Connection object to database.\n        index_col : str or list of str, optional\n            Column(s) to set as index(MultiIndex).\n        **kwargs : dict\n            Parameters to pass into `pandas.read_sql` function.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        if isinstance(con, str):\n            con = ModinDatabaseConnection(\"sqlalchemy\", con)\n\n        if cls._is_supported_sqlalchemy_object(con):\n            con = ModinDatabaseConnection(\n                \"sqlalchemy\", con.engine.url.render_as_string(hide_password=False)\n            )\n\n        if not isinstance(con, ModinDatabaseConnection):\n            return cls.single_worker_read(\n                sql,\n                con=con,\n                index_col=index_col,\n                read_sql_engine=ReadSqlEngine.get(),\n                reason=\"To use the parallel implementation of `read_sql`, pass either \"\n                + \"a SQLAlchemy connectable, the SQL connection string, or a ModinDatabaseConnection \"\n                + \"with the arguments required to make a connection, instead \"\n                + f\"of {type(con)}. For documentation on the ModinDatabaseConnection, see \"\n                + \"https://modin.readthedocs.io/en/latest/supported_apis/io_supported.html#connecting-to-a-database-for-read-sql\",\n                **kwargs,\n            )\n        row_count_query = con.row_count_query(sql)\n        connection_for_pandas = con.get_connection()\n        colum_names_query = con.column_names_query(sql)\n        row_cnt = pandas.read_sql(row_count_query, connection_for_pandas).squeeze()\n        cols_names_df = pandas.read_sql(\n            colum_names_query, connection_for_pandas, index_col=index_col\n        )\n        cols_names = cols_names_df.columns\n        num_partitions = NPartitions.get()\n        partition_ids = [None] * num_partitions\n        index_ids = [None] * num_partitions\n        dtypes_ids = [None] * num_partitions\n        limit = math.ceil(row_cnt / num_partitions)\n        for part in range(num_partitions):\n            offset = part * limit\n            query = con.partition_query(sql, limit, offset)\n            *partition_ids[part], index_ids[part], dtypes_ids[part] = cls.deploy(\n                func=cls.parse,\n                f_kwargs={\n                    \"num_splits\": num_partitions,\n                    \"sql\": query,\n                    \"con\": con,\n                    \"index_col\": index_col,\n                    \"read_sql_engine\": ReadSqlEngine.get(),\n                    **kwargs,\n                },\n                num_returns=num_partitions + 2,\n            )\n            partition_ids[part] = [\n                cls.frame_partition_cls(obj) for obj in partition_ids[part]\n            ]\n        if index_col is None:  # sum all lens returned from partitions\n            index_lens = cls.materialize(index_ids)\n            new_index = pandas.RangeIndex(sum(index_lens))\n        else:  # concat index returned from partitions\n            index_lst = [\n                x for part_index in cls.materialize(index_ids) for x in part_index\n            ]\n            new_index = pandas.Index(index_lst).set_names(index_col)\n        new_frame = cls.frame_cls(np.array(partition_ids), new_index, cols_names)\n        new_frame.synchronize_labels(axis=0)\n        return cls.query_compiler_cls(new_frame)\n\n    @classmethod\n    def write(cls, qc, **kwargs):\n        \"\"\"\n        Write records stored in the `qc` to a SQL database.\n\n        Parameters\n        ----------\n        qc : BaseQueryCompiler\n            The query compiler of the Modin dataframe that we want to run ``to_sql`` on.\n        **kwargs : dict\n            Parameters for ``pandas.to_sql(**kwargs)``.\n        \"\"\"\n        # we first insert an empty DF in order to create the full table in the database\n        # This also helps to validate the input against pandas\n        # we would like to_sql() to complete only when all rows have been inserted into the database\n        # since the mapping operation is non-blocking, each partition will return an empty DF\n        # so at the end, the blocking operation will be this empty DF to_pandas\n\n        if not isinstance(\n            kwargs[\"con\"], str\n        ) and not cls._is_supported_sqlalchemy_object(kwargs[\"con\"]):\n            return cls.base_io.to_sql(qc, **kwargs)\n\n        # In the case that we are given a SQLAlchemy Connection or Engine, the objects\n        # are not pickleable. We have to convert it to the URL string and connect from\n        # each of the workers.\n        if cls._is_supported_sqlalchemy_object(kwargs[\"con\"]):\n            kwargs[\"con\"] = kwargs[\"con\"].engine.url.render_as_string(\n                hide_password=False\n            )\n\n        empty_df = qc.getitem_row_array([0]).to_pandas().head(0)\n        empty_df.to_sql(**kwargs)\n        # so each partition will append its respective DF\n        kwargs[\"if_exists\"] = \"append\"\n        columns = qc.columns\n\n        def func(df):  # pragma: no cover\n            \"\"\"\n            Override column names in the wrapped dataframe and convert it to SQL.\n\n            Notes\n            -----\n            This function returns an empty ``pandas.DataFrame`` because ``apply_full_axis``\n            expects a Frame object as a result of operation (and ``to_sql`` has no dataframe result).\n            \"\"\"\n            df.columns = columns\n            df.to_sql(**kwargs)\n            return pandas.DataFrame()\n\n        # Ensure that the metadata is synchronized\n        qc._modin_frame._propagate_index_objs(axis=None)\n        result = qc._modin_frame.apply_full_axis(1, func, new_index=[], new_columns=[])\n        cls.materialize(\n            [part.list_of_blocks[0] for row in result._partitions for part in row]\n        )\n"
  },
  {
    "path": "modin/core/io/text/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Text format type IO functions implementations.\"\"\"\n"
  },
  {
    "path": "modin/core/io/text/csv_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `CSVDispatcher` class, that is used for reading `.csv` files.\"\"\"\n\nfrom modin.core.io.text.text_file_dispatcher import TextFileDispatcher\n\n\nclass CSVDispatcher(TextFileDispatcher):\n    \"\"\"Class handles utils for reading `.csv` files.\"\"\"\n"
  },
  {
    "path": "modin/core/io/text/excel_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `ExcelDispatcher` class, that is used for reading excel files.\"\"\"\n\nimport os\nimport re\nimport warnings\nfrom io import BytesIO\n\nimport pandas\nfrom pandas.io.common import stringify_path\n\nfrom modin.config import NPartitions\nfrom modin.core.io.text.text_file_dispatcher import TextFileDispatcher\nfrom modin.pandas.io import ExcelFile\n\nEXCEL_READ_BLOCK_SIZE = 4096\n\n\nclass ExcelDispatcher(TextFileDispatcher):\n    \"\"\"Class handles utils for reading excel files.\"\"\"\n\n    @classmethod\n    def _read(cls, io, **kwargs):\n        \"\"\"\n        Read data from `io` according to the passed `read_excel` `kwargs` parameters.\n\n        Parameters\n        ----------\n        io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object\n            `io` parameter of `read_excel` function.\n        **kwargs : dict\n            Parameters of `read_excel` function.\n\n        Returns\n        -------\n        new_query_compiler : BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        io = stringify_path(io)\n        if (\n            kwargs.get(\"engine\", None) is not None\n            and kwargs.get(\"engine\") != \"openpyxl\"\n        ):\n            return cls.single_worker_read(\n                io,\n                reason=\"Modin only implements parallel `read_excel` with `openpyxl` engine, \"\n                + 'please specify `engine=None` or `engine=\"openpyxl\"` to '\n                + \"use Modin's parallel implementation.\",\n                **kwargs\n            )\n\n        if kwargs.get(\"skiprows\") is not None:\n            return cls.single_worker_read(\n                io,\n                reason=\"Modin doesn't support 'skiprows' parameter of `read_excel`\",\n                **kwargs\n            )\n\n        if isinstance(io, bytes):\n            io = BytesIO(io)\n\n        # isinstance(ExcelFile, os.PathLike) == True\n        if not isinstance(io, (str, os.PathLike, BytesIO)) or isinstance(\n            io, (ExcelFile, pandas.ExcelFile)\n        ):\n            if isinstance(io, ExcelFile):\n                io._set_pandas_mode()\n            return cls.single_worker_read(\n                io,\n                reason=\"Modin only implements parallel `read_excel` the following types of `io`: \"\n                + \"str, os.PathLike, io.BytesIO.\",\n                **kwargs\n            )\n\n        from zipfile import ZipFile\n\n        from openpyxl.reader.excel import ExcelReader\n        from openpyxl.worksheet._reader import WorksheetReader\n        from openpyxl.worksheet.worksheet import Worksheet\n\n        from modin.core.storage_formats.pandas.parsers import PandasExcelParser\n\n        sheet_name = kwargs.get(\"sheet_name\", 0)\n        if sheet_name is None or isinstance(sheet_name, list):\n            return cls.single_worker_read(\n                io,\n                reason=\"`read_excel` functionality is only implemented for a single sheet at a \"\n                + \"time. Multiple sheet reading coming soon!\",\n                **kwargs\n            )\n\n        warnings.warn(\n            \"Parallel `read_excel` is a new feature! If you run into any \"\n            + \"problems, please visit https://github.com/modin-project/modin/issues. \"\n            + \"If you find a new issue and can't file it on GitHub, please \"\n            + \"email bug_reports@modin.org.\"\n        )\n\n        # NOTE: ExcelReader() in read-only mode does not close file handle by itself\n        # work around that by passing file object if we received some path\n        io_file = open(io, \"rb\") if isinstance(io, (str, os.PathLike)) else io\n        try:\n            ex = ExcelReader(io_file, read_only=True)\n            ex.read()\n            wb = ex.wb\n\n            # Get shared strings\n            ex.read_manifest()\n            ex.read_strings()\n            ws = Worksheet(wb)\n        finally:\n            if isinstance(io, (str, os.PathLike)):\n                # close only if it were us who opened the object\n                io_file.close()\n\n        pandas_kw = dict(kwargs)  # preserve original kwargs\n        with ZipFile(io) as z:\n            # Convert index to sheet name in file\n            if isinstance(sheet_name, int):\n                sheet_name = \"sheet{}\".format(sheet_name + 1)\n            else:\n                sheet_name = \"sheet{}\".format(wb.sheetnames.index(sheet_name) + 1)\n            if any(sheet_name.lower() in name for name in z.namelist()):\n                sheet_name = sheet_name.lower()\n            elif any(sheet_name.title() in name for name in z.namelist()):\n                sheet_name = sheet_name.title()\n            else:\n                raise ValueError(\"Sheet {} not found\".format(sheet_name.lower()))\n            # Pass this value to the workers\n            kwargs[\"sheet_name\"] = sheet_name\n\n            f = z.open(\"xl/worksheets/{}.xml\".format(sheet_name))\n            f = BytesIO(f.read())\n            total_bytes = cls.file_size(f)\n\n            # Read some bytes from the sheet so we can extract the XML header and first\n            # line. We need to make sure we get the first line of the data as well\n            # because that is where the column names are. The header information will\n            # be extracted and sent to all of the nodes.\n            sheet_block = f.read(EXCEL_READ_BLOCK_SIZE)\n            end_of_row_tag = b\"</row>\"\n            while end_of_row_tag not in sheet_block:\n                sheet_block += f.read(EXCEL_READ_BLOCK_SIZE)\n            idx_of_header_end = sheet_block.index(end_of_row_tag) + len(end_of_row_tag)\n            sheet_header_with_first_row = sheet_block[:idx_of_header_end]\n\n            if kwargs[\"header\"] is not None:\n                # Reset the file pointer to begin at the end of the header information.\n                f.seek(idx_of_header_end)\n                sheet_header = sheet_header_with_first_row\n            else:\n                start_of_row_tag = b\"<row\"\n                idx_of_header_start = sheet_block.index(start_of_row_tag)\n                sheet_header = sheet_block[:idx_of_header_start]\n                # Reset the file pointer to begin at the end of the header information.\n                f.seek(idx_of_header_start)\n\n            kwargs[\"_header\"] = sheet_header\n            footer = b\"</sheetData></worksheet>\"\n            # Use openpyxml to parse the data\n            common_args = (\n                ws,\n                BytesIO(sheet_header_with_first_row + footer),\n                ex.shared_strings,\n                False,\n            )\n            if cls.need_rich_text_param():\n                reader = WorksheetReader(*common_args, rich_text=False)\n            else:\n                reader = WorksheetReader(*common_args)\n            # Attach cells to the worksheet\n            reader.bind_cells()\n            data = PandasExcelParser.get_sheet_data(\n                ws, kwargs.get(\"convert_float\", True)\n            )\n            # Extract column names from parsed data.\n            if kwargs[\"header\"] is None:\n                column_names = pandas.RangeIndex(len(data[0]))\n            else:\n                column_names = pandas.Index(data[0])\n            index_col = kwargs.get(\"index_col\", None)\n            # Remove column names that are specified as `index_col`\n            if index_col is not None:\n                column_names = column_names.drop(column_names[index_col])\n\n            if not all(column_names) or kwargs.get(\"usecols\"):\n                # some column names are empty, use pandas reader to take the names from it\n                pandas_kw[\"nrows\"] = 1\n                df = pandas.read_excel(io, **pandas_kw)\n                column_names = df.columns\n\n            # Compute partition metadata upfront so it is uniform for all partitions\n            chunk_size = max(1, (total_bytes - f.tell()) // NPartitions.get())\n            column_widths, num_splits = cls._define_metadata(\n                pandas.DataFrame(columns=column_names), column_names\n            )\n            kwargs[\"fname\"] = io\n            # Skiprows will be used to inform a partition how many rows come before it.\n            kwargs[\"skiprows\"] = 0\n            row_count = 0\n            data_ids = []\n            index_ids = []\n            dtypes_ids = []\n\n            kwargs[\"num_splits\"] = num_splits\n\n            while f.tell() < total_bytes:\n                args = kwargs\n                args[\"skiprows\"] = row_count + args[\"skiprows\"]\n                args[\"start\"] = f.tell()\n                chunk = f.read(chunk_size)\n                # This edge case can happen when we have reached the end of the data\n                # but not the end of the file.\n                if b\"<row\" not in chunk:\n                    break\n                row_close_tag = b\"</row>\"\n                row_count = re.subn(row_close_tag, b\"\", chunk)[1]\n\n                # Make sure we are reading at least one row.\n                while row_count == 0:\n                    chunk += f.read(chunk_size)\n                    row_count += re.subn(row_close_tag, b\"\", chunk)[1]\n\n                last_index = chunk.rindex(row_close_tag)\n                f.seek(-(len(chunk) - last_index) + len(row_close_tag), 1)\n                args[\"end\"] = f.tell()\n\n                # If there is no data, exit before triggering computation.\n                if b\"</row>\" not in chunk and b\"</sheetData>\" in chunk:\n                    break\n                remote_results_list = cls.deploy(\n                    func=cls.parse,\n                    f_kwargs=args,\n                    num_returns=num_splits + 2,\n                )\n                data_ids.append(remote_results_list[:-2])\n                index_ids.append(remote_results_list[-2])\n                dtypes_ids.append(remote_results_list[-1])\n\n                # The end of the spreadsheet\n                if b\"</sheetData>\" in chunk:\n                    break\n\n        # Compute the index based on a sum of the lengths of each partition (by default)\n        # or based on the column(s) that were requested.\n        if index_col is None:\n            row_lengths = cls.materialize(index_ids)\n            new_index = pandas.RangeIndex(sum(row_lengths))\n        else:\n            index_objs = cls.materialize(index_ids)\n            row_lengths = [len(o) for o in index_objs]\n            new_index = index_objs[0].append(index_objs[1:])\n\n        data_ids = cls.build_partition(data_ids, row_lengths, column_widths)\n\n        # Compute dtypes by getting collecting and combining all of the partitions. The\n        # reported dtypes from differing rows can be different based on the inference in\n        # the limited data seen by each worker. We use pandas to compute the exact dtype\n        # over the whole column for each column. The index is set below.\n        dtypes = cls.get_dtypes(dtypes_ids, column_names)\n\n        new_frame = cls.frame_cls(\n            data_ids,\n            new_index,\n            column_names,\n            row_lengths,\n            column_widths,\n            dtypes=dtypes,\n        )\n        new_query_compiler = cls.query_compiler_cls(new_frame)\n        if index_col is None:\n            new_query_compiler._modin_frame.synchronize_labels(axis=0)\n        return new_query_compiler\n"
  },
  {
    "path": "modin/core/io/text/fwf_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `FWFDispatcher` class, that is used for reading of tables with fixed-width formatted lines.\"\"\"\n\nfrom typing import Optional, Sequence, Tuple, Union\n\nfrom modin.core.io.text.text_file_dispatcher import TextFileDispatcher\n\n\nclass FWFDispatcher(TextFileDispatcher):\n    \"\"\"Class handles utils for reading of tables with fixed-width formatted lines.\"\"\"\n\n    @classmethod\n    def check_parameters_support(\n        cls,\n        filepath_or_buffer,\n        read_kwargs: dict,\n        skiprows_md: Union[Sequence, callable, int],\n        header_size: int,\n    ) -> Tuple[bool, Optional[str]]:\n        \"\"\"\n        Check support of parameters of `read_fwf` function.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of `read_fwf` function.\n        read_kwargs : dict\n            Parameters of `read_fwf` function.\n        skiprows_md : int, array or callable\n            `skiprows` parameter modified for easier handling by Modin.\n        header_size : int\n            Number of rows that are used by header.\n\n        Returns\n        -------\n        bool\n            Whether passed parameters are supported or not.\n        Optional[str]\n            `None` if parameters are supported, otherwise an error\n            message describing why parameters are not supported.\n        \"\"\"\n        if read_kwargs[\"infer_nrows\"] > 100:\n            return (\n                False,\n                \"`infer_nrows` is a significant portion of the number of rows, so Pandas may be faster\",\n            )\n        return super().check_parameters_support(\n            filepath_or_buffer, read_kwargs, skiprows_md, header_size\n        )\n"
  },
  {
    "path": "modin/core/io/text/json_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `JSONDispatcher` class, that is used for reading `.json` files.\"\"\"\n\nfrom io import BytesIO\n\nimport numpy as np\nimport pandas\nfrom pandas.io.common import stringify_path\n\nfrom modin.config import NPartitions\nfrom modin.core.io.file_dispatcher import OpenFile\nfrom modin.core.io.text.text_file_dispatcher import TextFileDispatcher\n\n\nclass JSONDispatcher(TextFileDispatcher):\n    \"\"\"Class handles utils for reading `.json` files.\"\"\"\n\n    @classmethod\n    def _read(cls, path_or_buf, **kwargs):\n        \"\"\"\n        Read data from `path_or_buf` according to the passed `read_json` `kwargs` parameters.\n\n        Parameters\n        ----------\n        path_or_buf : str, path object or file-like object\n            `path_or_buf` parameter of `read_json` function.\n        **kwargs : dict\n            Parameters of `read_json` function.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        path_or_buf = stringify_path(path_or_buf)\n        path_or_buf = cls.get_path_or_buffer(path_or_buf)\n        if isinstance(path_or_buf, str):\n            if not cls.file_exists(\n                path_or_buf, storage_options=kwargs.get(\"storage_options\")\n            ):\n                return cls.single_worker_read(\n                    path_or_buf, reason=cls._file_not_found_msg(path_or_buf), **kwargs\n                )\n            path_or_buf = cls.get_path(path_or_buf)\n        elif not cls.pathlib_or_pypath(path_or_buf):\n            return cls.single_worker_read(\n                path_or_buf, reason=cls.BUFFER_UNSUPPORTED_MSG, **kwargs\n            )\n        if not kwargs.get(\"lines\", False):\n            return cls.single_worker_read(\n                path_or_buf, reason=\"`lines` argument not supported\", **kwargs\n            )\n        with OpenFile(\n            path_or_buf,\n            \"rb\",\n            **(kwargs.get(\"storage_options\", None) or {}),\n        ) as f:\n            columns = pandas.read_json(BytesIO(b\"\" + f.readline()), lines=True).columns\n        kwargs[\"columns\"] = columns\n        empty_pd_df = pandas.DataFrame(columns=columns)\n\n        with OpenFile(\n            path_or_buf,\n            \"rb\",\n            kwargs.get(\"compression\", \"infer\"),\n            **(kwargs.get(\"storage_options\", None) or {}),\n        ) as f:\n            column_widths, num_splits = cls._define_metadata(empty_pd_df, columns)\n            args = {\"fname\": path_or_buf, \"num_splits\": num_splits, **kwargs}\n            splits, _ = cls.partitioned_file(\n                f,\n                num_partitions=NPartitions.get(),\n            )\n            partition_ids = [None] * len(splits)\n            index_ids = [None] * len(splits)\n            dtypes_ids = [None] * len(splits)\n            for idx, (start, end) in enumerate(splits):\n                args.update({\"start\": start, \"end\": end})\n                *partition_ids[idx], index_ids[idx], dtypes_ids[idx], _ = cls.deploy(\n                    func=cls.parse,\n                    f_kwargs=args,\n                    num_returns=num_splits + 3,\n                )\n        # partition_id[-1] contains the columns for each partition, which will be useful\n        # for implementing when `lines=False`.\n        row_lengths = cls.materialize(index_ids)\n        new_index = pandas.RangeIndex(sum(row_lengths))\n\n        partition_ids = cls.build_partition(partition_ids, row_lengths, column_widths)\n\n        # Compute dtypes by getting collecting and combining all of the partitions. The\n        # reported dtypes from differing rows can be different based on the inference in\n        # the limited data seen by each worker. We use pandas to compute the exact dtype\n        # over the whole column for each column. The index is set below.\n        dtypes = cls.get_dtypes(dtypes_ids, columns)\n\n        new_frame = cls.frame_cls(\n            np.array(partition_ids),\n            new_index,\n            columns,\n            row_lengths,\n            column_widths,\n            dtypes=dtypes,\n        )\n        new_frame.synchronize_labels(axis=0)\n        return cls.query_compiler_cls(new_frame)\n"
  },
  {
    "path": "modin/core/io/text/text_file_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule houses `TextFileDispatcher` class.\n\n`TextFileDispatcher` contains utils for text formats files, inherits util functions for\nfiles from `FileDispatcher` class and can be used as base class for dipatchers of SQL queries.\n\"\"\"\nimport codecs\nimport io\nimport os\nimport warnings\nfrom csv import QUOTE_NONE\nfrom typing import Callable, Optional, Sequence, Tuple, Union\n\nimport numpy as np\nimport pandas\nimport pandas._libs.lib as lib\nfrom pandas.core.dtypes.common import is_list_like\nfrom pandas.io.common import stringify_path\n\nfrom modin.config import MinColumnPartitionSize, NPartitions\nfrom modin.core.io.file_dispatcher import FileDispatcher, OpenFile\nfrom modin.core.io.text.utils import CustomNewlineIterator\nfrom modin.core.storage_formats.pandas.utils import compute_chunksize\nfrom modin.utils import _inherit_docstrings\n\nColumnNamesTypes = Tuple[Union[pandas.Index, pandas.MultiIndex]]\nIndexColType = Union[int, str, bool, Sequence[int], Sequence[str], None]\n\n\nclass TextFileDispatcher(FileDispatcher):\n    \"\"\"Class handles utils for reading text formats files.\"\"\"\n\n    @classmethod\n    def get_path_or_buffer(cls, filepath_or_buffer):\n        \"\"\"\n        Extract path from `filepath_or_buffer`.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of `read_csv` function.\n\n        Returns\n        -------\n        str or path object\n            verified `filepath_or_buffer` parameter.\n\n        Notes\n        -----\n        Given a buffer, try and extract the filepath from it so that we can\n        use it without having to fall back to pandas and share file objects between\n        workers. Given a filepath, return it immediately.\n        \"\"\"\n        if (\n            hasattr(filepath_or_buffer, \"name\")\n            and hasattr(filepath_or_buffer, \"seekable\")\n            and filepath_or_buffer.seekable()\n            and filepath_or_buffer.tell() == 0\n        ):\n            buffer_filepath = filepath_or_buffer.name\n            if cls.file_exists(buffer_filepath):\n                warnings.warn(\n                    \"For performance reasons, the filepath will be \"\n                    + \"used in place of the file handle passed in \"\n                    + \"to load the data\"\n                )\n                return cls.get_path(buffer_filepath)\n        return filepath_or_buffer\n\n    @classmethod\n    def build_partition(cls, partition_ids, row_lengths, column_widths):\n        \"\"\"\n        Build array with partitions of `cls.frame_partition_cls` class.\n\n        Parameters\n        ----------\n        partition_ids : list\n                Array with references to the partitions data.\n        row_lengths : list\n                Partitions rows lengths.\n        column_widths : list\n                Number of columns in each partition.\n\n        Returns\n        -------\n        np.ndarray\n            array with shape equals to the shape of `partition_ids` and\n            filed with partitions objects.\n        \"\"\"\n        return np.array(\n            [\n                [\n                    cls.frame_partition_cls(\n                        partition_ids[i][j],\n                        length=row_lengths[i],\n                        width=column_widths[j],\n                    )\n                    for j in range(len(partition_ids[i]))\n                ]\n                for i in range(len(partition_ids))\n            ]\n        )\n\n    @classmethod\n    def pathlib_or_pypath(cls, filepath_or_buffer):\n        \"\"\"\n        Check if `filepath_or_buffer` is instance of `py.path.local` or `pathlib.Path`.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of `read_csv` function.\n\n        Returns\n        -------\n        bool\n            Whether or not `filepath_or_buffer` is instance of `py.path.local`\n            or `pathlib.Path`.\n        \"\"\"\n        try:\n            import py\n\n            if isinstance(filepath_or_buffer, py.path.local):\n                return True\n        except ImportError:  # pragma: no cover\n            pass\n        try:\n            import pathlib\n\n            if isinstance(filepath_or_buffer, pathlib.Path):\n                return True\n        except ImportError:  # pragma: no cover\n            pass\n        return False\n\n    @classmethod\n    def offset(\n        cls,\n        f,\n        offset_size: int,\n        quotechar: bytes = b'\"',\n        is_quoting: bool = True,\n        encoding: str = None,\n        newline: bytes = None,\n    ):\n        \"\"\"\n        Move the file offset at the specified amount of bytes.\n\n        Parameters\n        ----------\n        f : file-like object\n            File handle that should be used for offset movement.\n        offset_size : int\n            Number of bytes to read and ignore.\n        quotechar : bytes, default: b'\"'\n            Indicate quote in a file.\n        is_quoting : bool, default: True\n            Whether or not to consider quotes.\n        encoding : str, optional\n            Encoding of `f`.\n        newline : bytes, optional\n            Byte or sequence of bytes indicating line endings.\n\n        Returns\n        -------\n        bool\n            If file pointer reached the end of the file, but did not find\n            closing quote returns `False`. `True` in any other case.\n        \"\"\"\n        if is_quoting:\n            chunk = f.read(offset_size)\n            outside_quotes = not chunk.count(quotechar) % 2\n        else:\n            f.seek(offset_size, os.SEEK_CUR)\n            outside_quotes = True\n\n        # after we read `offset_size` bytes, we most likely break the line but\n        # the modin implementation doesn't work correctly in the case, so we must\n        # make sure that the line is read completely to the lineterminator,\n        # which is what the `_read_rows` does\n        outside_quotes, _ = cls._read_rows(\n            f,\n            nrows=1,\n            quotechar=quotechar,\n            is_quoting=is_quoting,\n            outside_quotes=outside_quotes,\n            encoding=encoding,\n            newline=newline,\n        )\n\n        return outside_quotes\n\n    @classmethod\n    def partitioned_file(\n        cls,\n        f,\n        num_partitions: int = None,\n        nrows: int = None,\n        skiprows: int = None,\n        quotechar: bytes = b'\"',\n        is_quoting: bool = True,\n        encoding: str = None,\n        newline: bytes = None,\n        header_size: int = 0,\n        pre_reading: int = 0,\n        get_metadata_kw: dict = None,\n    ):\n        \"\"\"\n        Compute chunk sizes in bytes for every partition.\n\n        Parameters\n        ----------\n        f : file-like object\n            File handle of file to be partitioned.\n        num_partitions : int, optional\n            For what number of partitions split a file.\n            If not specified grabs the value from `modin.config.NPartitions.get()`.\n        nrows : int, optional\n            Number of rows of file to read.\n        skiprows : int, optional\n            Specifies rows to skip.\n        quotechar : bytes, default: b'\"'\n            Indicate quote in a file.\n        is_quoting : bool, default: True\n            Whether or not to consider quotes.\n        encoding : str, optional\n            Encoding of `f`.\n        newline : bytes, optional\n            Byte or sequence of bytes indicating line endings.\n        header_size : int, default: 0\n            Number of rows, that occupied by header.\n        pre_reading : int, default: 0\n            Number of rows between header and skipped rows, that should be read.\n        get_metadata_kw : dict, optional\n            Keyword arguments for `cls.read_callback` to compute metadata if needed.\n            This option is not compatible with `pre_reading!=0`.\n\n        Returns\n        -------\n        list\n            List with the next elements:\n                int : partition start read byte\n                int : partition end read byte\n        pandas.DataFrame or None\n            Dataframe from which metadata can be retrieved. Can be None if `get_metadata_kw=None`.\n        \"\"\"\n        if get_metadata_kw is not None and pre_reading != 0:\n            raise ValueError(\n                f\"Incompatible combination of parameters: {get_metadata_kw=}, {pre_reading=}\"\n            )\n        read_rows_counter = 0\n        outside_quotes = True\n\n        if num_partitions is None:\n            num_partitions = NPartitions.get() - 1 if pre_reading else NPartitions.get()\n\n        rows_skipper = cls.rows_skipper_builder(\n            f, quotechar, is_quoting=is_quoting, encoding=encoding, newline=newline\n        )\n        result = []\n\n        file_size = cls.file_size(f)\n\n        pd_df_metadata = None\n        if pre_reading:\n            rows_skipper(header_size)\n            pre_reading_start = f.tell()\n            outside_quotes, read_rows = cls._read_rows(\n                f,\n                nrows=pre_reading,\n                quotechar=quotechar,\n                is_quoting=is_quoting,\n                outside_quotes=outside_quotes,\n                encoding=encoding,\n                newline=newline,\n            )\n            read_rows_counter += read_rows\n\n            result.append((pre_reading_start, f.tell()))\n\n            # add outside_quotes\n            if is_quoting and not outside_quotes:\n                warnings.warn(\"File has mismatched quotes\")\n            rows_skipper(skiprows)\n        else:\n            rows_skipper(skiprows)\n            if get_metadata_kw:\n                start = f.tell()\n                # For correct behavior, if we want to avoid double skipping rows,\n                # we need to get metadata after skipping.\n                pd_df_metadata = cls.read_callback(f, **get_metadata_kw)\n                f.seek(start)\n            rows_skipper(header_size)\n\n        start = f.tell()\n        if nrows:\n            partition_size = max(1, num_partitions, nrows // num_partitions)\n            while f.tell() < file_size and read_rows_counter < nrows:\n                if read_rows_counter + partition_size > nrows:\n                    # it's possible only if is_quoting==True\n                    partition_size = nrows - read_rows_counter\n                outside_quotes, read_rows = cls._read_rows(\n                    f,\n                    nrows=partition_size,\n                    quotechar=quotechar,\n                    is_quoting=is_quoting,\n                    encoding=encoding,\n                    newline=newline,\n                )\n                result.append((start, f.tell()))\n                start = f.tell()\n                read_rows_counter += read_rows\n\n                # add outside_quotes\n                if is_quoting and not outside_quotes:\n                    warnings.warn(\"File has mismatched quotes\")\n        else:\n            partition_size = max(1, num_partitions, file_size // num_partitions)\n            while f.tell() < file_size:\n                outside_quotes = cls.offset(\n                    f,\n                    offset_size=partition_size,\n                    quotechar=quotechar,\n                    is_quoting=is_quoting,\n                    encoding=encoding,\n                    newline=newline,\n                )\n\n                result.append((start, f.tell()))\n                start = f.tell()\n\n                # add outside_quotes\n                if is_quoting and not outside_quotes:\n                    warnings.warn(\"File has mismatched quotes\")\n        return result, pd_df_metadata\n\n    @classmethod\n    def _read_rows(\n        cls,\n        f,\n        nrows: int,\n        quotechar: bytes = b'\"',\n        is_quoting: bool = True,\n        outside_quotes: bool = True,\n        encoding: str = None,\n        newline: bytes = None,\n    ):\n        \"\"\"\n        Move the file offset at the specified amount of rows.\n\n        Parameters\n        ----------\n        f : file-like object\n            File handle that should be used for offset movement.\n        nrows : int\n            Number of rows to read.\n        quotechar : bytes, default: b'\"'\n            Indicate quote in a file.\n        is_quoting : bool, default: True\n            Whether or not to consider quotes.\n        outside_quotes : bool, default: True\n            Whether the file pointer is within quotes or not at the time this function is called.\n        encoding : str, optional\n            Encoding of `f`.\n        newline : bytes, optional\n            Byte or sequence of bytes indicating line endings.\n\n        Returns\n        -------\n        bool\n            If file pointer reached the end of the file, but did not find closing quote\n            returns `False`. `True` in any other case.\n        int\n            Number of rows that were read.\n        \"\"\"\n        if nrows is not None and nrows <= 0:\n            return True, 0\n\n        rows_read = 0\n\n        if encoding and (\n            \"utf\" in encoding\n            and \"8\" not in encoding\n            or encoding == \"unicode_escape\"\n            or encoding.replace(\"-\", \"_\") == \"utf_8_sig\"\n        ):\n            iterator = CustomNewlineIterator(f, newline)\n        else:\n            iterator = f\n\n        for line in iterator:\n            if is_quoting and line.count(quotechar) % 2:\n                outside_quotes = not outside_quotes\n            if outside_quotes:\n                rows_read += 1\n                if rows_read >= nrows:\n                    break\n\n        if isinstance(iterator, CustomNewlineIterator):\n            iterator.seek()\n\n        # case when EOF\n        if not outside_quotes:\n            rows_read += 1\n\n        return outside_quotes, rows_read\n\n    @classmethod\n    def compute_newline(cls, file_like, encoding, quotechar):\n        \"\"\"\n        Compute byte or sequence of bytes indicating line endings.\n\n        Parameters\n        ----------\n        file_like : file-like object\n            File handle that should be used for line endings computing.\n        encoding : str\n            Encoding of `file_like`.\n        quotechar : str\n            Quotechar used for parsing `file-like`.\n\n        Returns\n        -------\n        bytes\n            line endings\n        \"\"\"\n        newline = None\n\n        if encoding is None:\n            return newline, quotechar.encode(\"UTF-8\")\n\n        quotechar = quotechar.encode(encoding)\n        encoding = encoding.replace(\"-\", \"_\")\n\n        if (\n            \"utf\" in encoding\n            and \"8\" not in encoding\n            or encoding == \"unicode_escape\"\n            or encoding == \"utf_8_sig\"\n        ):\n            # trigger for computing f.newlines\n            file_like.readline()\n            # in bytes\n            newline = file_like.newlines.encode(encoding)\n            boms = ()\n            if encoding == \"utf_8_sig\":\n                boms = (codecs.BOM_UTF8,)\n            elif \"16\" in encoding:\n                boms = (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)\n            elif \"32\" in encoding:\n                boms = (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)\n\n            for bom in boms:\n                if newline.startswith(bom):\n                    bom_len = len(bom)\n                    newline = newline[bom_len:]\n                    quotechar = quotechar[bom_len:]\n                    break\n\n        return newline, quotechar\n\n    # _read helper functions\n    @classmethod\n    def rows_skipper_builder(\n        cls, f, quotechar, is_quoting, encoding=None, newline=None\n    ):\n        \"\"\"\n        Build object for skipping passed number of lines.\n\n        Parameters\n        ----------\n        f : file-like object\n            File handle that should be used for offset movement.\n        quotechar : bytes\n            Indicate quote in a file.\n        is_quoting : bool\n            Whether or not to consider quotes.\n        encoding : str, optional\n            Encoding of `f`.\n        newline : bytes, optional\n            Byte or sequence of bytes indicating line endings.\n\n        Returns\n        -------\n        object\n            skipper object.\n        \"\"\"\n\n        def skipper(n):\n            if n == 0 or n is None:\n                return 0\n            else:\n                return cls._read_rows(\n                    f,\n                    quotechar=quotechar,\n                    is_quoting=is_quoting,\n                    nrows=n,\n                    encoding=encoding,\n                    newline=newline,\n                )[1]\n\n        return skipper\n\n    @classmethod\n    def _define_header_size(\n        cls,\n        header: Union[int, Sequence[int], str, None] = \"infer\",\n        names: Optional[Sequence] = lib.no_default,\n    ) -> int:\n        \"\"\"\n        Define the number of rows that are used by header.\n\n        Parameters\n        ----------\n        header : int, list of int or str, default: \"infer\"\n            Original `header` parameter of `read_csv` function.\n        names :  array-like, optional\n            Original names parameter of `read_csv` function.\n\n        Returns\n        -------\n        header_size : int\n            The number of rows that are used by header.\n        \"\"\"\n        header_size = 0\n        if header == \"infer\" and names in [lib.no_default, None]:\n            header_size += 1\n        elif isinstance(header, int):\n            header_size += header + 1\n        elif hasattr(header, \"__iter__\") and not isinstance(header, str):\n            header_size += max(header) + 1\n\n        return header_size\n\n    @classmethod\n    def _define_metadata(\n        cls,\n        df: pandas.DataFrame,\n        column_names: ColumnNamesTypes,\n    ) -> Tuple[list, int]:\n        \"\"\"\n        Define partitioning metadata.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            The DataFrame to split.\n        column_names : ColumnNamesTypes\n            Column names of df.\n\n        Returns\n        -------\n        column_widths : list\n            Column width to use during new frame creation (number of\n            columns for each partition).\n        num_splits : int\n            The maximum number of splits to separate the DataFrame into.\n        \"\"\"\n        # This is the number of splits for the columns\n        num_splits = min(len(column_names) or 1, NPartitions.get())\n        min_block_size = MinColumnPartitionSize.get()\n        column_chunksize = compute_chunksize(df.shape[1], num_splits, min_block_size)\n        if column_chunksize > len(column_names):\n            column_widths = [len(column_names)]\n            # This prevents us from unnecessarily serializing a bunch of empty\n            # objects.\n            num_splits = 1\n        else:\n            # split columns into chunks with maximal size column_chunksize, for example\n            # if num_splits == 4, len(column_names) == 80 and column_chunksize == 32,\n            # column_widths will be [32, 32, 16, 0]\n            column_widths = [\n                (\n                    column_chunksize\n                    if len(column_names) > (column_chunksize * (i + 1))\n                    else (\n                        0\n                        if len(column_names) < (column_chunksize * i)\n                        else len(column_names) - (column_chunksize * i)\n                    )\n                )\n                for i in range(num_splits)\n            ]\n\n        return column_widths, num_splits\n\n    _parse_func = None\n\n    @classmethod\n    def preprocess_func(cls):  # noqa: RT01\n        \"\"\"Prepare a function for transmission to remote workers.\"\"\"\n        if cls._parse_func is None:\n            cls._parse_func = cls.put(cls.parse)\n        return cls._parse_func\n\n    @classmethod\n    def _launch_tasks(\n        cls, splits: list, *partition_args, **partition_kwargs\n    ) -> Tuple[list, list, list]:\n        \"\"\"\n        Launch tasks to read partitions.\n\n        Parameters\n        ----------\n        splits : list\n            List of tuples with partitions data, which defines\n            parser task (start/end read bytes and etc.).\n        *partition_args : tuple\n            Positional arguments to be passed to the parser function.\n        **partition_kwargs : dict\n            `kwargs` that should be passed to the parser function.\n\n        Returns\n        -------\n        partition_ids : list\n            array with references to the partitions data.\n        index_ids : list\n            array with references to the partitions index objects.\n        dtypes_ids : list\n            array with references to the partitions dtypes objects.\n        \"\"\"\n        partition_ids = [None] * len(splits)\n        index_ids = [None] * len(splits)\n        dtypes_ids = [None] * len(splits)\n        # this is done mostly for performance; see PR#5678 for details\n        func = cls.preprocess_func()\n        for idx, (start, end) in enumerate(splits):\n            partition_kwargs.update({\"start\": start, \"end\": end})\n            *partition_ids[idx], index_ids[idx], dtypes_ids[idx] = cls.deploy(\n                func=func,\n                f_args=partition_args,\n                f_kwargs=partition_kwargs,\n                num_returns=partition_kwargs.get(\"num_splits\") + 2,\n            )\n        return partition_ids, index_ids, dtypes_ids\n\n    @classmethod\n    def check_parameters_support(\n        cls,\n        filepath_or_buffer,\n        read_kwargs: dict,\n        skiprows_md: Union[Sequence, callable, int],\n        header_size: int,\n    ) -> Tuple[bool, Optional[str]]:\n        \"\"\"\n        Check support of only general parameters of `read_*` function.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of `read_*` function.\n        read_kwargs : dict\n            Parameters of `read_*` function.\n        skiprows_md : int, array or callable\n            `skiprows` parameter modified for easier handling by Modin.\n        header_size : int\n            Number of rows that are used by header.\n\n        Returns\n        -------\n        bool\n            Whether passed parameters are supported or not.\n        Optional[str]\n            `None` if parameters are supported, otherwise an error\n            message describing why parameters are not supported.\n        \"\"\"\n        skiprows = read_kwargs.get(\"skiprows\")\n        if isinstance(filepath_or_buffer, str):\n            if not cls.file_exists(\n                filepath_or_buffer, read_kwargs.get(\"storage_options\")\n            ):\n                return (False, cls._file_not_found_msg(filepath_or_buffer))\n        elif not cls.pathlib_or_pypath(filepath_or_buffer):\n            return (False, cls.BUFFER_UNSUPPORTED_MSG)\n\n        if read_kwargs[\"chunksize\"] is not None:\n            return (False, \"`chunksize` parameter is not supported\")\n\n        if read_kwargs.get(\"iterator\"):\n            return (False, \"`iterator==True` parameter is not supported\")\n\n        if read_kwargs.get(\"dialect\") is not None:\n            return (False, \"`dialect` parameter is not supported\")\n\n        if read_kwargs[\"lineterminator\"] is not None:\n            return (False, \"`lineterminator` parameter is not supported\")\n\n        if read_kwargs[\"escapechar\"] is not None:\n            return (False, \"`escapechar` parameter is not supported\")\n\n        if read_kwargs.get(\"skipfooter\"):\n            if read_kwargs.get(\"nrows\") or read_kwargs.get(\"engine\") == \"c\":\n                return (False, \"Exception is raised by pandas itself\")\n\n        skiprows_supported = True\n        if is_list_like(skiprows_md) and skiprows_md[0] < header_size:\n            skiprows_supported = False\n        elif callable(skiprows):\n            # check if `skiprows` callable gives True for any of header indices\n            is_intersection = any(\n                cls._get_skip_mask(pandas.RangeIndex(header_size), skiprows)\n            )\n            if is_intersection:\n                skiprows_supported = False\n\n        if not skiprows_supported:\n            return (\n                False,\n                \"Values of `header` and `skiprows` parameters have intersections; \"\n                + \"this case is unsupported by Modin\",\n            )\n\n        return (True, None)\n\n    @classmethod\n    @_inherit_docstrings(pandas.io.parsers.base_parser.ParserBase._validate_usecols_arg)\n    def _validate_usecols_arg(cls, usecols):\n        msg = (\n            \"'usecols' must either be list-like of all strings, all unicode, \"\n            + \"all integers or a callable.\"\n        )\n        if usecols is not None:\n            if callable(usecols):\n                return usecols, None\n\n            if not is_list_like(usecols):\n                raise ValueError(msg)\n\n            usecols_dtype = lib.infer_dtype(usecols, skipna=False)\n\n            if usecols_dtype not in (\"empty\", \"integer\", \"string\"):\n                raise ValueError(msg)\n\n            usecols = set(usecols)\n\n            return usecols, usecols_dtype\n        return usecols, None\n\n    @classmethod\n    def _manage_skiprows_parameter(\n        cls,\n        skiprows: Union[int, Sequence[int], Callable, None] = None,\n        header_size: int = 0,\n    ) -> Tuple[Union[int, Sequence, Callable], bool, int]:\n        \"\"\"\n        Manage `skiprows` parameter of read_csv and read_fwf functions.\n\n        Change `skiprows` parameter in the way Modin could more optimally\n        process it. `csv_dispatcher` and `fwf_dispatcher` have two mechanisms of rows skipping:\n\n        1) During file partitioning (setting of file limits that should be read\n        by each partition) exact rows can be excluded from partitioning scope,\n        thus they won't be read at all and can be considered as skipped. This is\n        the most effective way of rows skipping (since it doesn't require any\n        actual data reading and postprocessing), but in this case `skiprows`\n        parameter can be an integer only. When it possible Modin always uses\n        this approach by setting of `skiprows_partitioning` return value.\n\n        2) Rows for skipping can be dropped after full dataset import. This is\n        more expensive way since it requires extra IO work and postprocessing\n        afterwards, but `skiprows` parameter can be of any non-integer type\n        supported by any pandas read function. These rows is\n        specified by setting of `skiprows_md` return value.\n\n        In some cases, if `skiprows` is uniformly distributed array (e.g. [1,2,3]),\n        `skiprows` can be \"squashed\" and represented as integer to make a fastpath.\n        If there is a gap between the first row for skipping and the last line of\n        the header (that will be skipped too), then assign to read this gap first\n        (assign the first partition to read these rows be setting of `pre_reading`\n        return value). See `Examples` section for details.\n\n        Parameters\n        ----------\n        skiprows : int, array or callable, optional\n            Original `skiprows` parameter of any pandas read function.\n        header_size : int, default: 0\n            Number of rows that are used by header.\n\n        Returns\n        -------\n        skiprows_md : int, array or callable\n            Updated skiprows parameter. If `skiprows` is an array, this\n            array will be sorted. Also parameter will be aligned to\n            actual data in the `query_compiler` (which, for example,\n            doesn't contain header rows)\n        pre_reading : int\n            The number of rows that should be read before data file\n            splitting for further reading (the number of rows for\n            the first partition).\n        skiprows_partitioning : int\n            The number of rows that should be skipped virtually (skipped during\n            data file partitioning).\n\n        Examples\n        --------\n        Let's consider case when `header`=\"infer\" and `skiprows`=[3,4,5]. In\n        this specific case fastpath can be done since `skiprows` is uniformly\n        distributed array, so we can \"squash\" it to integer and set\n        `skiprows_partitioning`=3. But if no additional action will be done,\n        these three rows will be skipped right after header line, that corresponds\n        to `skiprows`=[1,2,3]. Now, to avoid this discrepancy, we need to assign\n        the first partition to read data between header line and the first\n        row for skipping by setting of `pre_reading` parameter, so setting\n        `pre_reading`=2. During data file partitiong, these lines will be assigned\n        for reading for the first partition, and then file position will be set at\n        the beginning of rows that should be skipped by `skiprows_partitioning`.\n        After skipping of these rows, the rest data will be divided between the\n        rest of partitions, see rows assignement below:\n\n        0 - header line (skip during partitioning)\n        1 - pre_reading (assign to read by the first partition)\n        2 - pre_reading (assign to read by the first partition)\n        3 - skiprows_partitioning (skip during partitioning)\n        4 - skiprows_partitioning (skip during partitioning)\n        5 - skiprows_partitioning (skip during partitioning)\n        6 - data to partition (divide between the rest of partitions)\n        7 - data to partition (divide between the rest of partitions)\n        \"\"\"\n        pre_reading = skiprows_partitioning = skiprows_md = 0\n        if isinstance(skiprows, int):\n            skiprows_partitioning = skiprows\n        elif is_list_like(skiprows) and len(skiprows) > 0:\n            skiprows_md = np.sort(skiprows)\n            if np.all(np.diff(skiprows_md) == 1):\n                # `skiprows` is uniformly distributed array.\n                pre_reading = (\n                    skiprows_md[0] - header_size if skiprows_md[0] > header_size else 0\n                )\n                skiprows_partitioning = len(skiprows_md)\n                skiprows_md = 0\n            elif skiprows_md[0] > header_size:\n                skiprows_md = skiprows_md - header_size\n        elif callable(skiprows):\n\n            def skiprows_func(x):\n                return skiprows(x + header_size)\n\n            skiprows_md = skiprows_func\n\n        return skiprows_md, pre_reading, skiprows_partitioning\n\n    @classmethod\n    def _define_index(\n        cls,\n        index_ids: list,\n        index_name: str,\n    ) -> Tuple[IndexColType, list]:\n        \"\"\"\n        Compute the resulting DataFrame index and index lengths for each of partitions.\n\n        Parameters\n        ----------\n        index_ids : list\n            Array with references to the partitions index objects.\n        index_name : str\n            Name that should be assigned to the index if `index_col`\n            is not provided.\n\n        Returns\n        -------\n        new_index : IndexColType\n            Index that should be passed to the new_frame constructor.\n        row_lengths : list\n            Partitions rows lengths.\n        \"\"\"\n        index_objs = cls.materialize(index_ids)\n        if len(index_objs) == 0 or isinstance(index_objs[0], int):\n            row_lengths = index_objs\n            new_index = pandas.RangeIndex(sum(index_objs))\n        else:\n            row_lengths = [len(o) for o in index_objs]\n            new_index = index_objs[0].append(index_objs[1:])\n            new_index.name = index_name\n\n        return new_index, row_lengths\n\n    @classmethod\n    def _get_new_qc(\n        cls,\n        partition_ids: list,\n        index_ids: list,\n        dtypes_ids: list,\n        index_col: IndexColType,\n        index_name: str,\n        column_widths: list,\n        column_names: ColumnNamesTypes,\n        skiprows_md: Union[Sequence, callable, None] = None,\n        header_size: int = None,\n        **kwargs,\n    ):\n        \"\"\"\n        Get new query compiler from data received from workers.\n\n        Parameters\n        ----------\n        partition_ids : list\n            Array with references to the partitions data.\n        index_ids : list\n            Array with references to the partitions index objects.\n        dtypes_ids : list\n            Array with references to the partitions dtypes objects.\n        index_col : IndexColType\n            `index_col` parameter of `read_csv` function.\n        index_name : str\n            Name that should be assigned to the index if `index_col`\n            is not provided.\n        column_widths : list\n            Number of columns in each partition.\n        column_names : ColumnNamesTypes\n            Array with columns names.\n        skiprows_md : array-like or callable, optional\n            Specifies rows to skip.\n        header_size : int, default: 0\n            Number of rows, that occupied by header.\n        **kwargs : dict\n            Parameters of `read_csv` function needed for postprocessing.\n\n        Returns\n        -------\n        new_query_compiler : BaseQueryCompiler\n            New query compiler, created from `new_frame`.\n        \"\"\"\n        partition_ids = cls.build_partition(\n            partition_ids, [None] * len(index_ids), column_widths\n        )\n\n        new_frame = cls.frame_cls(\n            partition_ids,\n            lambda: cls._define_index(index_ids, index_name),\n            column_names,\n            None,\n            column_widths,\n            dtypes=lambda: cls.get_dtypes(dtypes_ids, column_names),\n        )\n        new_query_compiler = cls.query_compiler_cls(new_frame)\n        skipfooter = kwargs.get(\"skipfooter\", None)\n        if skipfooter:\n            new_query_compiler = new_query_compiler.drop(\n                new_query_compiler.index[-skipfooter:]\n            )\n        if skiprows_md is not None:\n            # skip rows that passed as array or callable\n            nrows = kwargs.get(\"nrows\", None)\n            index_range = pandas.RangeIndex(len(new_query_compiler.index))\n            if is_list_like(skiprows_md):\n                new_query_compiler = new_query_compiler.take_2d_positional(\n                    index=index_range.delete(skiprows_md)\n                )\n            elif callable(skiprows_md):\n                skip_mask = cls._get_skip_mask(index_range, skiprows_md)\n                if not isinstance(skip_mask, np.ndarray):\n                    skip_mask = skip_mask.to_numpy(\"bool\")\n                view_idx = index_range[~skip_mask]\n                new_query_compiler = new_query_compiler.take_2d_positional(\n                    index=view_idx\n                )\n            else:\n                raise TypeError(\n                    f\"Not acceptable type of `skiprows` parameter: {type(skiprows_md)}\"\n                )\n\n            if not isinstance(new_query_compiler.index, pandas.MultiIndex):\n                new_query_compiler = new_query_compiler.reset_index(drop=True)\n\n            if nrows:\n                new_query_compiler = new_query_compiler.take_2d_positional(\n                    pandas.RangeIndex(len(new_query_compiler.index))[:nrows]\n                )\n        if index_col is None or index_col is False:\n            new_query_compiler._modin_frame.synchronize_labels(axis=0)\n\n        return new_query_compiler\n\n    @classmethod\n    def _read(cls, filepath_or_buffer, **kwargs):\n        \"\"\"\n        Read data from `filepath_or_buffer` according to `kwargs` parameters.\n\n        Used in `read_csv` and `read_fwf` Modin implementations.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of read functions.\n        **kwargs : dict\n            Parameters of read functions.\n\n        Returns\n        -------\n        new_query_compiler : BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        filepath_or_buffer = stringify_path(filepath_or_buffer)\n        filepath_or_buffer_md = (\n            cls.get_path(filepath_or_buffer)\n            if isinstance(filepath_or_buffer, str)\n            else cls.get_path_or_buffer(filepath_or_buffer)\n        )\n        compression_infered = cls.infer_compression(\n            filepath_or_buffer, kwargs[\"compression\"]\n        )\n        # Getting frequently used kwargs;\n        # They should be defined in higher level\n        names = kwargs[\"names\"]\n        index_col = kwargs[\"index_col\"]\n        encoding = kwargs[\"encoding\"]\n        skiprows = kwargs[\"skiprows\"]\n        header = kwargs[\"header\"]\n        # Define header size for further skipping (Header can be skipped because header\n        # information will be obtained further from empty_df, so no need to handle it\n        # by workers)\n        header_size = cls._define_header_size(\n            header,\n            names,\n        )\n        (\n            skiprows_md,\n            pre_reading,\n            skiprows_partitioning,\n        ) = cls._manage_skiprows_parameter(skiprows, header_size)\n        should_handle_skiprows = skiprows_md is not None and not isinstance(\n            skiprows_md, int\n        )\n\n        (use_modin_impl, fallback_reason) = cls.check_parameters_support(\n            filepath_or_buffer_md,\n            kwargs,\n            skiprows_md,\n            header_size,\n        )\n        if not use_modin_impl:\n            return cls.single_worker_read(\n                filepath_or_buffer,\n                kwargs,\n                reason=fallback_reason,\n            )\n\n        is_quoting = kwargs[\"quoting\"] != QUOTE_NONE\n        usecols = kwargs[\"usecols\"]\n        use_inferred_column_names = cls._uses_inferred_column_names(\n            names, skiprows, kwargs[\"skipfooter\"], usecols\n        )\n\n        # Computing metadata simultaneously with skipping rows allows us to not\n        # do extra work and improve performance for certain cases, as otherwise,\n        # it would require double re-reading of skipped rows in order to retrieve metadata.\n        can_compute_metadata_while_skipping_rows = (\n            # basic supported case: isinstance(skiprows, int) without any additional params\n            isinstance(skiprows, int)\n            and (usecols is None or skiprows is None)\n            and pre_reading == 0\n        )\n        get_metadata_kw = dict(kwargs, nrows=1, skipfooter=0, index_col=index_col)\n        if get_metadata_kw.get(\"engine\", None) == \"pyarrow\":\n            # pyarrow engine doesn't support `nrows` option;\n            # https://github.com/pandas-dev/pandas/issues/38872 can be used to track pyarrow engine features\n            get_metadata_kw[\"engine\"] = \"c\"\n        if not can_compute_metadata_while_skipping_rows:\n            pd_df_metadata = cls.read_callback(\n                filepath_or_buffer_md,\n                **get_metadata_kw,\n            )\n            column_names = pd_df_metadata.columns\n            column_widths, num_splits = cls._define_metadata(\n                pd_df_metadata, column_names\n            )\n            get_metadata_kw = None\n        else:\n            get_metadata_kw = dict(get_metadata_kw, skiprows=None)\n            # `memory_map` doesn't work with file-like object so we can't use it here.\n            # We can definitely skip it without violating the reading logic\n            # since this parameter is intended to optimize reading.\n            # For reading a couple of lines, this is not essential.\n            get_metadata_kw.pop(\"memory_map\", None)\n            # These parameters are already used when opening file `f`,\n            # they do not need to be used again.\n            get_metadata_kw.pop(\"storage_options\", None)\n            get_metadata_kw.pop(\"compression\", None)\n\n        with OpenFile(\n            filepath_or_buffer_md,\n            \"rb\",\n            compression_infered,\n            **(kwargs.get(\"storage_options\", None) or {}),\n        ) as f:\n            old_pos = f.tell()\n            fio = io.TextIOWrapper(f, encoding=encoding, newline=\"\")\n            newline, quotechar = cls.compute_newline(\n                fio, encoding, kwargs.get(\"quotechar\", '\"')\n            )\n            f.seek(old_pos)\n\n            splits, pd_df_metadata_temp = cls.partitioned_file(\n                f,\n                num_partitions=NPartitions.get(),\n                nrows=kwargs[\"nrows\"] if not should_handle_skiprows else None,\n                skiprows=skiprows_partitioning,\n                quotechar=quotechar,\n                is_quoting=is_quoting,\n                encoding=encoding,\n                newline=newline,\n                header_size=header_size,\n                pre_reading=pre_reading,\n                get_metadata_kw=get_metadata_kw,\n            )\n            if can_compute_metadata_while_skipping_rows:\n                pd_df_metadata = pd_df_metadata_temp\n\n        # compute dtypes if possible\n        common_dtypes = None\n        if kwargs[\"dtype\"] is None:\n            most_common_dtype = (object,)\n            common_dtypes = {}\n            for col, dtype in pd_df_metadata.dtypes.to_dict().items():\n                if dtype in most_common_dtype:\n                    common_dtypes[col] = dtype\n        column_names = pd_df_metadata.columns\n        column_widths, num_splits = cls._define_metadata(pd_df_metadata, column_names)\n        # kwargs that will be passed to the workers\n        partition_kwargs = dict(\n            kwargs,\n            header_size=0 if use_inferred_column_names else header_size,\n            names=column_names if use_inferred_column_names else names,\n            header=\"infer\" if use_inferred_column_names else header,\n            skipfooter=0,\n            skiprows=None,\n            nrows=None,\n            compression=compression_infered,\n            common_dtypes=common_dtypes,\n        )\n        # this is done mostly for performance; see PR#5678 for details\n        filepath_or_buffer_md_ref = cls.put(filepath_or_buffer_md)\n        kwargs_ref = cls.put(partition_kwargs)\n        partition_ids, index_ids, dtypes_ids = cls._launch_tasks(\n            splits,\n            filepath_or_buffer_md_ref,\n            kwargs_ref,\n            num_splits=num_splits,\n        )\n\n        new_query_compiler = cls._get_new_qc(\n            partition_ids=partition_ids,\n            index_ids=index_ids,\n            dtypes_ids=dtypes_ids,\n            index_col=index_col,\n            index_name=pd_df_metadata.index.name,\n            column_widths=column_widths,\n            column_names=column_names,\n            skiprows_md=skiprows_md if should_handle_skiprows else None,\n            header_size=header_size,\n            skipfooter=kwargs[\"skipfooter\"],\n            parse_dates=kwargs[\"parse_dates\"],\n            nrows=kwargs[\"nrows\"] if should_handle_skiprows else None,\n        )\n        return new_query_compiler\n\n    @classmethod\n    def _get_skip_mask(cls, rows_index: pandas.Index, skiprows: Callable):\n        \"\"\"\n        Get mask of skipped by callable `skiprows` rows.\n\n        Parameters\n        ----------\n        rows_index : pandas.Index\n            Rows index to get mask for.\n        skiprows : Callable\n            Callable to check whether row index should be skipped.\n\n        Returns\n        -------\n        pandas.Index\n        \"\"\"\n        try:\n            # direct `skiprows` call is more efficient than using of\n            # map method, but in some cases it can work incorrectly, e.g.\n            # when `skiprows` contains `in` operator\n            mask = skiprows(rows_index)\n            assert is_list_like(mask)\n        except (ValueError, TypeError, AssertionError):\n            # ValueError can be raised if `skiprows` callable contains membership operator\n            # TypeError is raised if `skiprows` callable contains bitwise operator\n            # AssertionError is raised if unexpected behavior was detected\n            mask = rows_index.map(skiprows)\n\n        return mask\n\n    @staticmethod\n    def _uses_inferred_column_names(names, skiprows, skipfooter, usecols):\n        \"\"\"\n        Tell whether need to use inferred column names in workers or not.\n\n        1) ``False`` is returned in 2 cases and means next:\n            1.a) `names` parameter was provided from the API layer. In this case parameter\n            `names` must be provided as `names` parameter for ``read_csv`` in the workers.\n            1.b) `names` parameter wasn't provided from the API layer. In this case column names\n            inference must happen in each partition.\n        2) ``True`` is returned in case when inferred column names from pre-reading stage must be\n            provided as `names` parameter for ``read_csv`` in the workers.\n\n        In case `names` was provided, the other parameters aren't checked. Otherwise, inferred column\n        names should be used in a case of not full data reading which is defined by `skipfooter` parameter,\n        when need to skip lines at the bottom of file or by `skiprows` parameter, when need to skip lines at\n        the top of file (but if `usecols` was provided, column names inference must happen in the workers).\n\n        Parameters\n        ----------\n        names : array-like\n            List of column names to use.\n        skiprows : list-like, int or callable\n            Line numbers to skip (0-indexed) or number of lines to skip (int) at\n            the start of the file. If callable, the callable function will be\n            evaluated against the row indices, returning ``True`` if the row should\n            be skipped and ``False`` otherwise.\n        skipfooter : int\n            Number of lines at bottom of the file to skip.\n        usecols : list-like or callable\n            Subset of the columns.\n\n        Returns\n        -------\n        bool\n            Whether to use inferred column names in ``read_csv`` of the workers or not.\n        \"\"\"\n        if names not in [None, lib.no_default]:\n            return False\n        if skipfooter != 0:\n            return True\n        if isinstance(skiprows, int) and skiprows == 0:\n            return False\n        if is_list_like(skiprows):\n            return usecols is None\n        return skiprows is not None\n"
  },
  {
    "path": "modin/core/io/text/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Contains utility functions for dispatchers.\"\"\"\n\nimport io\n\n\nclass CustomNewlineIterator:\n    r\"\"\"\n    Used to iterate through files in binary mode line by line where newline != b'\\n'.\n\n    Parameters\n    ----------\n    _file : file-like object\n        File-like object to iterate over.\n    newline : bytes\n        Byte or sequence of bytes indicating line endings.\n    \"\"\"\n\n    def __init__(self, _file, newline):\n        self.file = _file\n        self.newline = newline\n        self.bytes_read = self.chunk_size = 0\n\n    def __iter__(self):\n        \"\"\"\n        Iterate over lines.\n\n        Yields\n        ------\n        bytes\n            Data from file.\n        \"\"\"\n        buffer_size = io.DEFAULT_BUFFER_SIZE\n        chunk = self.file.read(buffer_size)\n        self.chunk_size = 0\n        while chunk:\n            self.bytes_read = 0\n            self.chunk_size = len(chunk)\n            # split remove newline bytes from line\n            lines = chunk.split(self.newline)\n            for line in lines[:-1]:\n                self.bytes_read += len(line) + len(self.newline)\n                yield line\n            chunk = self.file.read(buffer_size)\n            if lines[-1]:\n                # last line can be read without newline bytes\n                chunk = lines[-1] + chunk\n\n    def seek(self):\n        \"\"\"Change the stream positition to where the last returned line ends.\"\"\"\n        self.file.seek(self.bytes_read - self.chunk_size, 1)\n"
  },
  {
    "path": "modin/core/storage_formats/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Modin's functionality related to storage formats supported.\"\"\"\n\nfrom .base import BaseQueryCompiler\nfrom .pandas import PandasQueryCompiler\n\n__all__ = [\"BaseQueryCompiler\", \"PandasQueryCompiler\"]\n"
  },
  {
    "path": "modin/core/storage_formats/base/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module represents the base query compiler that defines the common query compiler API.\"\"\"\n\nfrom .query_compiler import BaseQueryCompiler\n\n__all__ = [\"BaseQueryCompiler\"]\n"
  },
  {
    "path": "modin/core/storage_formats/base/doc_utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module contains decorators for documentation of the query compiler methods.\"\"\"\n\nfrom functools import partial\n\nfrom modin.utils import align_indents, append_to_docstring, format_string\n\n_one_column_warning = \"\"\"\n.. warning::\n    This method is supported only by one-column query compilers.\n\"\"\"\n\n_deprecation_warning = \"\"\"\n.. warning::\n    This method duplicates logic of ``{0}`` and will be removed soon.\n\"\"\"\n\n_refer_to_note = \"\"\"\nNotes\n-----\nPlease refer to ``modin.pandas.{0}`` for more information\nabout parameters and output format.\n\"\"\"\n\nadd_one_column_warning = append_to_docstring(_one_column_warning)\n\n\ndef add_deprecation_warning(replacement_method):\n    \"\"\"\n    Build decorator which appends deprecation warning to the function's docstring.\n\n    Appended warning indicates that the current method duplicates functionality of\n    some other method and so is slated to be removed in the future.\n\n    Parameters\n    ----------\n    replacement_method : str\n        Name of the method to use instead of deprecated.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    message = _deprecation_warning.format(replacement_method)\n    return append_to_docstring(message)\n\n\ndef add_refer_to(method):\n    \"\"\"\n    Build decorator which appends link to the high-level equivalent method to the function's docstring.\n\n    Parameters\n    ----------\n    method : str\n        Method name in ``modin.pandas`` module to refer to.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    # FIXME: this would break numpydoc if there already is a `Notes` section\n    note = _refer_to_note.format(method)\n    return append_to_docstring(note)\n\n\ndef doc_qc_method(\n    template,\n    params=None,\n    refer_to=None,\n    refer_to_module_name=None,\n    one_column_method=False,\n    **kwargs,\n):\n    \"\"\"\n    Build decorator which adds docstring for query compiler method.\n\n    Parameters\n    ----------\n    template : str\n        Method docstring in the NumPy docstyle format. Must contain {params}\n        placeholder.\n    params : str, optional\n        Method parameters in the NumPy docstyle format to substitute\n        in the `template`. `params` string should not include the \"Parameters\"\n        header.\n    refer_to : str, optional\n        Method name in `refer_to_module_name` module to refer to for more information\n        about parameters and output format.\n    refer_to_module_name : str, optional\n    one_column_method : bool, default: False\n        Whether to append note that this method is for one-column\n        query compilers only.\n    **kwargs : dict\n        Values to substitute in the `template`.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    params_template = \"\"\"\n\n        Parameters\n        ----------\n        {params}\n        \"\"\"\n\n    params = format_string(params_template, params=params) if params else \"\"\n    substituted = format_string(template, params=params, refer_to=refer_to, **kwargs)\n    if refer_to_module_name:\n        refer_to = f\"{refer_to_module_name}.{refer_to}\"\n\n    def decorator(func):\n        func.__doc__ = substituted\n        appendix = \"\"\n        if refer_to:\n            appendix += _refer_to_note.format(refer_to)\n        if one_column_method:\n            appendix += _one_column_warning\n        if appendix:\n            func = append_to_docstring(appendix)(func)\n        return func\n\n    return decorator\n\n\ndef doc_binary_method(operation, sign, self_on_right=False, op_type=\"arithmetic\"):\n    \"\"\"\n    Build decorator which adds docstring for binary method.\n\n    Parameters\n    ----------\n    operation : str\n        Name of the binary operation.\n    sign : str\n        Sign which represents specified binary operation.\n    self_on_right : bool, default: False\n        Whether `self` is the right operand.\n    op_type : {\"arithmetic\", \"logical\", \"comparison\"}, default: \"arithmetic\"\n        Type of the binary operation.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    template = \"\"\"\n    Perform element-wise {operation} (``{verbose}``).\n\n    If axes are not equal, perform frames alignment first.\n\n    Parameters\n    ----------\n    other : BaseQueryCompiler, scalar or array-like\n        Other operand of the binary operation.\n    broadcast : bool, default: False\n        If `other` is a one-column query compiler, indicates whether it is a Series or not.\n        Frames and Series have to be processed differently, however we can't distinguish them\n        at the query compiler level, so this parameter is a hint that is passed from a high-level API.\n    {extra_params}**kwargs : dict\n        Serves the compatibility purpose. Does not affect the result.\n\n    Returns\n    -------\n    BaseQueryCompiler\n        Result of binary operation.\n    \"\"\"\n\n    extra_params = {\n        \"logical\": \"\"\"\n        level : int or label\n            In case of MultiIndex match index values on the passed level.\n        axis : {{0, 1}}\n            Axis to match indices along for 1D `other` (list or QueryCompiler that represents Series).\n            0 is for index, when 1 is for columns.\n        \"\"\",\n        \"arithmetic\": \"\"\"\n        level : int or label\n            In case of MultiIndex match index values on the passed level.\n        axis : {{0, 1}}\n            Axis to match indices along for 1D `other` (list or QueryCompiler that represents Series).\n            0 is for index, when 1 is for columns.\n        fill_value : float or None\n            Value to fill missing elements during frame alignment.\n        \"\"\",\n        \"series_comparison\": \"\"\"\n        level : int or label\n            In case of MultiIndex match index values on the passed level.\n        fill_value : float or None\n            Value to fill missing elements during frame alignment.\n        axis : {{0, 1}}\n            Unused. Parameter needed for compatibility with DataFrame.\n        \"\"\",\n    }\n\n    verbose_substitution = (\n        f\"other {sign} self\" if self_on_right else f\"self {sign} other\"\n    )\n    params_substitution = extra_params.get(op_type, \"\")\n    return doc_qc_method(\n        template,\n        extra_params=params_substitution,\n        operation=operation,\n        verbose=verbose_substitution,\n    )\n\n\ndef doc_reduce_agg(method, refer_to, params=None, extra_params=None):\n    \"\"\"\n    Build decorator which adds docstring for the reduce method.\n\n    Parameters\n    ----------\n    method : str\n        The result of the method.\n    refer_to : str\n        Method name in ``modin.pandas.DataFrame`` module to refer to for\n        more information about parameters and output format.\n    params : str, optional\n        Method parameters in the NumPy docstyle format to substitute\n        to the docstring template.\n    extra_params : sequence of str, optional\n        Method parameter names to append to the docstring template. Parameter\n        type and description will be grabbed from ``extra_params_map`` (Please\n        refer to the source code of this function to explore the map).\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    template = \"\"\"\n        Get the {method} for each column or row.\n        {params}\n        Returns\n        -------\n        BaseQueryCompiler\n            One-column QueryCompiler with index labels of the specified axis,\n            where each row contains the {method} for the corresponding\n            row or column.\n        \"\"\"\n\n    if params is None:\n        params = \"\"\"\n        axis : {{0, 1}}\n        numeric_only : bool, optional\"\"\"\n\n    extra_params_map = {\n        \"skipna\": \"\"\"\n        skipna : bool, default: True\"\"\",\n        \"min_count\": \"\"\"\n        min_count : int\"\"\",\n        \"ddof\": \"\"\"\n        ddof : int\"\"\",\n        \"*args\": \"\"\"\n        *args : iterable\n            Serves the compatibility purpose. Does not affect the result.\"\"\",\n        \"**kwargs\": \"\"\"\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\"\"\",\n    }\n\n    params += \"\".join(\n        [\n            align_indents(\n                source=params, target=extra_params_map.get(param, f\"\\n{param} : object\")\n            )\n            for param in (extra_params or [])\n        ]\n    )\n    return doc_qc_method(\n        template,\n        params=params,\n        method=method,\n        refer_to=f\"DataFrame.{refer_to}\",\n    )\n\n\ndoc_cum_agg = partial(\n    doc_qc_method,\n    template=\"\"\"\n    Get cumulative {method} for every row or column.\n\n    Parameters\n    ----------\n    fold_axis : {{0, 1}}\n    skipna : bool\n    **kwargs : dict\n        Serves the compatibility purpose. Does not affect the result.\n\n    Returns\n    -------\n    BaseQueryCompiler\n        QueryCompiler of the same shape as `self`, where each element is the {method}\n        of all the previous values in this row or column.\n    \"\"\",\n    refer_to_module_name=\"DataFrame\",\n)\n\ndoc_resample = partial(\n    doc_qc_method,\n    template=\"\"\"\n    Resample time-series data and apply aggregation on it.\n\n    Group data into intervals by time-series row/column with\n    a specified frequency and {action}.\n\n    Parameters\n    ----------\n    resample_kwargs : dict\n        Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.\n    {extra_params}\n    Returns\n    -------\n    BaseQueryCompiler\n        New QueryCompiler containing the result of resample aggregation built by the\n        following rules:\n\n        {build_rules}\n    \"\"\",\n    refer_to_module_name=\"resample.Resampler\",\n)\n\n\ndef doc_resample_reduce(result, refer_to, params=None, compatibility_params=True):\n    \"\"\"\n    Build decorator which adds docstring for the resample reduce method.\n\n    Parameters\n    ----------\n    result : str\n        The result of the method.\n    refer_to : str\n        Method name in ``modin.pandas.resample.Resampler`` module to refer to for\n        more information about parameters and output format.\n    params : str, optional\n        Method parameters in the NumPy docstyle format to substitute\n        to the docstring template.\n    compatibility_params : bool, default: True\n        Whether method takes `*args` and `**kwargs` that do not affect\n        the result.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    action = f\"compute {result} for each group\"\n\n    params_substitution = (\n        (\n            \"\"\"\n        *args : iterable\n            Serves the compatibility purpose. Does not affect the result.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n        \"\"\"\n        )\n        if compatibility_params\n        else \"\"\n    )\n\n    if params:\n        params_substitution = format_string(\n            \"{params}\\n{params_substitution}\",\n            params=params,\n            params_substitution=params_substitution,\n        )\n\n    build_rules = f\"\"\"\n            - Labels on the specified axis are the group names (time-stamps)\n            - Labels on the opposite of specified axis are preserved.\n            - Each element of QueryCompiler is the {result} for the\n              corresponding group and column/row.\"\"\"\n    return doc_resample(\n        action=action,\n        extra_params=params_substitution,\n        build_rules=build_rules,\n        refer_to=refer_to,\n    )\n\n\ndef doc_resample_agg(action, output, refer_to, params=None):\n    \"\"\"\n    Build decorator which adds docstring for the resample aggregation method.\n\n    Parameters\n    ----------\n    action : str\n        What method does with the resampled data.\n    output : str\n        What is the content of column names in the result.\n    refer_to : str\n        Method name in ``modin.pandas.resample.Resampler`` module to refer to for\n        more information about parameters and output format.\n    params : str, optional\n        Method parameters in the NumPy docstyle format to substitute\n        to the docstring template.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    action = f\"{action} for each group over the specified axis\"\n\n    params_substitution = \"\"\"\n        *args : iterable\n            Positional arguments to pass to the aggregation function.\n        **kwargs : dict\n            Keyword arguments to pass to the aggregation function.\n        \"\"\"\n\n    if params:\n        params_substitution = format_string(\n            \"{params}\\n{params_substitution}\",\n            params=params,\n            params_substitution=params_substitution,\n        )\n\n    build_rules = f\"\"\"\n            - Labels on the specified axis are the group names (time-stamps)\n            - Labels on the opposite of specified axis are a MultiIndex, where first level\n              contains preserved labels of this axis and the second level is the {output}.\n            - Each element of QueryCompiler is the result of corresponding function for the\n              corresponding group and column/row.\"\"\"\n    return doc_resample(\n        action=action,\n        extra_params=params_substitution,\n        build_rules=build_rules,\n        refer_to=refer_to,\n    )\n\n\ndef doc_resample_fillna(method, refer_to, params=None, overwrite_template_params=False):\n    \"\"\"\n    Build decorator which adds docstring for the resample fillna query compiler method.\n\n    Parameters\n    ----------\n    method : str\n        Fillna method name.\n    refer_to : str\n        Method name in ``modin.pandas.resample.Resampler`` module to refer to for\n        more information about parameters and output format.\n    params : str, optional\n        Method parameters in the NumPy docstyle format to substitute\n        to the docstring template.\n    overwrite_template_params : bool, default: False\n        If `params` is specified indicates whether to overwrite method parameters in\n        the docstring template or append then at the end.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    action = f\"fill missing values in each group independently using {method} method\"\n    params_substitution = \"limit : int\\n\"\n\n    if params:\n        params_substitution = (\n            params\n            if overwrite_template_params\n            else format_string(\n                \"{params}\\n{params_substitution}\",\n                params=params,\n                params_substitution=params_substitution,\n            )\n        )\n\n    build_rules = \"- QueryCompiler contains unsampled data with missing values filled.\"\n\n    return doc_resample(\n        action=action,\n        extra_params=params_substitution,\n        build_rules=build_rules,\n        refer_to=refer_to,\n    )\n\n\ndoc_dt = partial(\n    doc_qc_method,\n    template=\"\"\"\n    Get {prop} for each {dt_type} value.\n    {params}\n    Returns\n    -------\n    BaseQueryCompiler\n        New QueryCompiler with the same shape as `self`, where each element is\n        {prop} for the corresponding {dt_type} value.\n    \"\"\",\n    one_column_method=True,\n    refer_to_module_name=\"Series.dt\",\n)\n\ndoc_dt_timestamp = partial(doc_dt, dt_type=\"datetime\")\ndoc_dt_interval = partial(doc_dt, dt_type=\"interval\")\ndoc_dt_period = partial(doc_dt, dt_type=\"period\")\n\ndoc_dt_round = partial(\n    doc_qc_method,\n    template=\"\"\"\n    Perform {refer_to} operation on the underlying time-series data to the specified `freq`.\n\n    Parameters\n    ----------\n    freq : str\n    ambiguous : {{\"raise\", \"infer\", \"NaT\"}} or bool mask, default: \"raise\"\n    nonexistent : {{\"raise\", \"shift_forward\", \"shift_backward\", \"NaT\"}} or timedelta, default: \"raise\"\n\n    Returns\n    -------\n    BaseQueryCompiler\n        New QueryCompiler with performed {refer_to} operation on every element.\n    \"\"\",\n    one_column_method=True,\n    refer_to_module_name=\"Series.dt\",\n)\n\ndoc_str_method = partial(\n    doc_qc_method,\n    template=\"\"\"\n    Apply \"{refer_to}\" function to each string value in QueryCompiler.\n    {params}\n    Returns\n    -------\n    BaseQueryCompiler\n        New QueryCompiler containing the result of execution of the \"{refer_to}\" function\n        against each string element.\n    \"\"\",\n    one_column_method=True,\n    refer_to_module_name=\"Series.str\",\n)\n\n\ndef doc_window_method(\n    window_cls_name,\n    result,\n    refer_to,\n    action=None,\n    win_type=\"rolling window\",\n    params=None,\n    build_rules=\"aggregation\",\n):\n    \"\"\"\n    Build decorator which adds docstring for a window method.\n\n    Parameters\n    ----------\n    window_cls_name : str\n        The Window class the method is on.\n    result : str\n        The result of the method.\n    refer_to : str\n        Method name in ``modin.pandas.window.Window`` module to refer to\n        for more information about parameters and output format.\n    action : str, optional\n        What method does with the created window.\n    win_type : str, default: \"rolling_window\"\n        Type of window that the method creates.\n    params : str, optional\n        Method parameters in the NumPy docstyle format to substitute\n        to the docstring template.\n    build_rules : str, default: \"aggregation\"\n        Description of the data output format.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    template = \"\"\"\n        Create {win_type} and {action} for each window over the given axis.\n\n        Parameters\n        ----------\n        fold_axis : {{0, 1}}\n        {window_args_name} : list\n            Rolling windows arguments with the same signature as ``modin.pandas.DataFrame.rolling``.\n        {extra_params}\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing {result} for each window, built by the following\n            rules:\n\n            {build_rules}\n        \"\"\"\n    doc_build_rules = {\n        \"aggregation\": f\"\"\"\n            - Output QueryCompiler has the same shape and axes labels as the source.\n            - Each element is the {result} for the corresponding window.\"\"\",\n        \"udf_aggregation\": \"\"\"\n            - Labels on the specified axis are preserved.\n            - Labels on the opposite of specified axis are MultiIndex, where first level\n              contains preserved labels of this axis and the second level has the function names.\n            - Each element of QueryCompiler is the result of corresponding function for the\n              corresponding window and column/row.\"\"\",\n    }\n    if action is None:\n        action = f\"compute {result}\"\n    if win_type == \"rolling window\":\n        window_args_name = \"rolling_kwargs\"\n    elif win_type == \"expanding window\":\n        window_args_name = \"expanding_args\"\n    else:\n        window_args_name = \"window_kwargs\"\n\n    # We need that `params` value ended with new line to have\n    # an empty line between \"parameters\" and \"return\" sections\n    if params and params[-1] != \"\\n\":\n        params += \"\\n\"\n\n    if params is None:\n        params = \"\"\n\n    return doc_qc_method(\n        template,\n        result=result,\n        action=action,\n        win_type=win_type,\n        extra_params=params,\n        build_rules=doc_build_rules.get(build_rules, build_rules),\n        refer_to=f\"{window_cls_name}.{refer_to}\",\n        window_args_name=window_args_name,\n    )\n\n\ndef doc_groupby_method(result, refer_to, action=None):\n    \"\"\"\n    Build decorator which adds docstring for the groupby reduce method.\n\n    Parameters\n    ----------\n    result : str\n        The result of reduce.\n    refer_to : str\n        Method name in ``modin.pandas.groupby`` module to refer to\n        for more information about parameters and output format.\n    action : str, optional\n        What method does with groups.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    template = \"\"\"\n    Group QueryCompiler data and {action} for every group.\n\n    Parameters\n    ----------\n    by : BaseQueryCompiler, column or index label, Grouper or list of such\n        Object that determine groups.\n    axis : {{0, 1}}\n        Axis to group and apply aggregation function along.\n        0 is for index, when 1 is for columns.\n    groupby_kwargs : dict\n        GroupBy parameters as expected by ``modin.pandas.DataFrame.groupby`` signature.\n    agg_args : list-like\n        Positional arguments to pass to the `agg_func`.\n    agg_kwargs : dict\n        Key arguments to pass to the `agg_func`.\n    drop : bool, default: False\n        If `by` is a QueryCompiler indicates whether or not by-data came\n        from the `self`.\n\n    Returns\n    -------\n    BaseQueryCompiler\n        QueryCompiler containing the result of groupby reduce built by the\n        following rules:\n\n        - Labels on the opposite of specified axis are preserved.\n        - If groupby_args[\"as_index\"] is True then labels on the specified axis\n          are the group names, otherwise labels would be default: 0, 1 ... n.\n        - If groupby_args[\"as_index\"] is False, then first N columns/rows of the frame\n          contain group names, where N is the columns/rows to group on.\n        - Each element of QueryCompiler is the {result} for the\n          corresponding group and column/row.\n\n    .. warning\n        `map_args` and `reduce_args` parameters are deprecated. They're leaked here from\n        ``PandasQueryCompiler.groupby_*``, pandas storage format implements groupby via TreeReduce\n        approach, but for other storage formats these parameters make no sense, and so they'll be removed in the future.\n    \"\"\"\n    if action is None:\n        action = f\"compute {result}\"\n\n    return doc_qc_method(\n        template, result=result, action=action, refer_to=f\"GroupBy.{refer_to}\"\n    )\n"
  },
  {
    "path": "modin/core/storage_formats/base/query_compiler.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains class ``BaseQueryCompiler``.\n\n``BaseQueryCompiler`` is a parent abstract class for any other query compiler class.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport abc\nimport warnings\nfrom enum import IntEnum\nfrom functools import cached_property\nfrom types import MappingProxyType\nfrom typing import TYPE_CHECKING, Any, Hashable, List, Literal, Optional, Union\n\nimport numpy as np\nimport pandas\nimport pandas.core.resample\nfrom pandas._typing import DtypeBackend, IndexLabel, Suffixes\nfrom pandas.core.dtypes.common import is_number, is_scalar\n\nfrom modin.config.envvars import Backend, Execution\nfrom modin.core.dataframe.algebra.default2pandas import (\n    BinaryDefault,\n    CatDefault,\n    DataFrameDefault,\n    DateTimeDefault,\n    ExpandingDefault,\n    GroupByDefault,\n    ListDefault,\n    ResampleDefault,\n    RollingDefault,\n    SeriesDefault,\n    SeriesGroupByDefault,\n    StrDefault,\n    StructDefault,\n)\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    ProtocolDataframe,\n)\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger\nfrom modin.logging.config import LogLevel\nfrom modin.logging.logger_decorator import disable_logging\nfrom modin.utils import MODIN_UNNAMED_SERIES_LABEL, try_cast_to_pandas\n\nfrom . import doc_utils\n\nif TYPE_CHECKING:\n    from typing_extensions import Self\n\n    # TODO: should be ModinDataframe\n    # https://github.com/modin-project/modin/issues/7244\n    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\n    from modin.pandas import DataFrame, Series\n    from modin.pandas.base import BasePandasDataset\n\n\ndef _get_axis(axis):\n    \"\"\"\n    Build index labels getter of the specified axis.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to get labels from.\n\n    Returns\n    -------\n    callable(BaseQueryCompiler) -> pandas.Index\n    \"\"\"\n\n    def axis_getter(self: \"BaseQueryCompiler\") -> pandas.Index:\n        self._maybe_warn_on_default(message=f\"DataFrame.get_axis({axis})\")\n        return self.to_pandas().axes[axis]\n\n    return axis_getter\n\n\ndef _set_axis(axis):\n    \"\"\"\n    Build index labels setter of the specified axis.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to set labels on.\n\n    Returns\n    -------\n    callable(BaseQueryCompiler)\n    \"\"\"\n\n    def axis_setter(self, labels):\n        new_qc = DataFrameDefault.register(pandas.DataFrame.set_axis)(\n            self, axis=axis, labels=labels\n        )\n        self.__dict__.update(new_qc.__dict__)\n\n    return axis_setter\n\n\nclass QCCoercionCost(IntEnum):  # noqa: PR01\n    \"\"\"\n    Coercion costs between different Query Compiler backends.\n\n    Coercion costs between query compilers can be expressed\n    as integers in the range 0 to 1000, where 1000 is\n    considered impossible. Since coercion costs can be a\n    function of many variables ( dataset size, partitioning,\n    network throughput, and query time ) we define a set range\n    of cost values to simplify comparisons between two query\n    compilers / engines in a unified way.\n\n    COST_ZERO means there is no cost associated, or that the query compilers\n    are the same.\n\n    COST_IMPOSSIBLE means the coercion is effectively impossible, which can\n    occur if the target system is unable to store the data as a result\n    of the coercion. Currently this does not prevent coercion.\n    \"\"\"\n\n    COST_ZERO = 0\n    COST_LOW = 250\n    COST_MEDIUM = 500\n    COST_HIGH = 750\n    COST_IMPOSSIBLE = 1000\n\n    @classmethod\n    def validate_coersion_cost(cls, cost: QCCoercionCost):\n        \"\"\"\n        Validate that the coercion cost is within range.\n\n        Parameters\n        ----------\n        cost : QCCoercionCost\n        \"\"\"\n        if int(cost) < int(QCCoercionCost.COST_ZERO) or int(cost) > int(\n            QCCoercionCost.COST_IMPOSSIBLE\n        ):\n            raise ValueError(\"Query compiler coercsion cost out of range\")\n\n\n# FIXME: many of the BaseQueryCompiler methods are hiding actual arguments\n# by using *args and **kwargs. They should be spread into actual parameters.\n# Currently actual arguments are placed in the methods docstrings, but since they're\n# not presented in the function's signature it makes linter to raise `PR02: unknown parameters`\n# warning. For now, they're silenced by using `noqa` (Modin issue #3108).\nclass BaseQueryCompiler(\n    ClassLogger, abc.ABC, modin_layer=\"QUERY-COMPILER\", log_level=LogLevel.DEBUG\n):\n    \"\"\"\n    Abstract class that handles the queries to Modin dataframes.\n\n    This class defines common query compilers API, most of the methods\n    are already implemented and defaulting to pandas.\n\n    Attributes\n    ----------\n    lazy_row_labels : bool, default False\n        True if the backend defers computations of the row labels (`df.index` for a frame).\n        Used by the frontend to avoid unnecessary execution or defer error validation.\n    lazy_row_count : bool, default False\n        True if the backend defers computations of the number of rows (`len(df.index)`).\n        Used by the frontend to avoid unnecessary execution or defer error validation.\n    lazy_column_types : bool, default False\n        True if the backend defers computations of the column types (`df.dtypes`).\n        Used by the frontend to avoid unnecessary execution or defer error validation.\n    lazy_column_labels : bool, default False\n        True if the backend defers computations of the column labels (`df.columns`).\n        Used by the frontend to avoid unnecessary execution or defer error validation.\n    lazy_column_count : bool, default False\n        True if the backend defers computations of the number of columns (`len(df.columns)`).\n        Used by the frontend to avoid unnecessary execution or defer error validation.\n    _shape_hint : {\"row\", \"column\", None}, default: None\n        Shape hint for frames known to be a column or a row, otherwise None.\n\n    Notes\n    -----\n    See the Abstract Methods and Fields section immediately below this\n    for a list of requirements for subclassing this object.\n    \"\"\"\n\n    # four variables can handle reasonably complex automatic engine-switching\n    # behavior, though the operation overhead (both initial and per-row)\n    # values may vary by engine.\n    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE: int = 1\n    _OPERATION_INITIALIZATION_OVERHEAD: int = 0\n    _OPERATION_PER_ROW_OVERHEAD: int = 0\n    _TRANSFER_THRESHOLD: int = 0\n\n    _modin_frame: PandasDataframe\n    _shape_hint: Optional[str]\n    _should_warn_on_default_to_pandas: bool = True\n\n    @classmethod\n    def _maybe_warn_on_default(cls, *, message: str = \"\", reason: str = \"\") -> None:\n        \"\"\"\n        If this class is configured to warn on default to pandas, warn.\n\n        Parameters\n        ----------\n        message : str, default: \"\"\n            Method that is defaulting to pandas.\n        reason : str, default: \"\"\n            Reason for default.\n        \"\"\"\n        if cls._should_warn_on_default_to_pandas:\n            ErrorMessage.default_to_pandas(message=message, reason=reason)\n\n    @disable_logging\n    def get_backend(self) -> str:\n        \"\"\"\n        Get the backend for this query compiler.\n\n        Returns\n        -------\n        str\n            The backend for this query compiler.\n        \"\"\"\n        return Backend.get_backend_for_execution(\n            Execution(\n                engine=self.engine,\n                storage_format=self.storage_format,\n            )\n        )\n\n    @property\n    @abc.abstractmethod\n    def storage_format(self) -> str:\n        \"\"\"\n        The storage format for this query compiler.\n\n        Returns\n        -------\n        str\n            The storage format.\n        \"\"\"\n        pass\n\n    @property\n    @abc.abstractmethod\n    def engine(self) -> str:\n        \"\"\"\n        The engine for this query compiler.\n\n        Returns\n        -------\n        str\n            The engine.\n        \"\"\"\n        pass\n\n    def __wrap_in_qc(self, obj):\n        \"\"\"\n        Wrap `obj` in query compiler.\n\n        Parameters\n        ----------\n        obj : any\n            Object to wrap.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Query compiler wrapping the object.\n        \"\"\"\n        if isinstance(obj, pandas.Series):\n            if obj.name is None:\n                obj.name = MODIN_UNNAMED_SERIES_LABEL\n            obj = obj.to_frame()\n        if isinstance(obj, pandas.DataFrame):\n            return self.from_pandas(obj, type(self._modin_frame))\n        else:\n            return obj\n\n    def default_to_pandas(self, pandas_op, *args, **kwargs) -> Self:\n        \"\"\"\n        Do fallback to pandas for the passed function.\n\n        Parameters\n        ----------\n        pandas_op : callable(pandas.DataFrame) -> object\n            Function to apply to the casted to pandas frame.\n        *args : iterable\n            Positional arguments to pass to `pandas_op`.\n        **kwargs : dict\n            Key-value arguments to pass to `pandas_op`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            The result of the `pandas_op`, converted back to ``BaseQueryCompiler``.\n        \"\"\"\n        op_name = getattr(pandas_op, \"__name__\", str(pandas_op))\n        self._maybe_warn_on_default(message=op_name)\n        args = try_cast_to_pandas(args)\n        kwargs = try_cast_to_pandas(kwargs)\n\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            result = pandas_op(try_cast_to_pandas(self), *args, **kwargs)\n        if isinstance(result, (tuple, list)):\n            if \"Series.tolist\" in pandas_op.__name__:\n                # fast path: no need to iterate over the result from `tolist` function\n                return result\n            return [self.__wrap_in_qc(obj) for obj in result]\n        return self.__wrap_in_qc(result)\n\n    @disable_logging\n    def move_to_cost(\n        self,\n        other_qc_type: type,\n        api_cls_name: Optional[str],\n        operation: str,\n        arguments: MappingProxyType[str, Any],\n    ) -> Optional[int]:\n        \"\"\"\n        Return the coercion costs of this qc to other_qc type.\n\n        This is called for forced casting and opportunistic switching\n        decision points. Values returned must be within the acceptable\n        range of QCCoercionCost\n\n        The question is: What are the transfer costs associated with\n        moving this data to the other_qc_type?\n\n        Parameters\n        ----------\n        other_qc_type : QueryCompiler Class\n            The query compiler class to which we should return the cost of switching.\n        api_cls_name : Optional[str]\n            The name of the class performing the operation which can be used as a\n            consideration for the costing analysis. `None` means the function does not belong to a class.\n        operation : str\n            The operation being performed which can be used as a consideration\n            for the costing analysis.\n        arguments : MappingProxyType[str, Any]\n            The arguments to the operation.\n\n        Returns\n        -------\n        Optional[int]\n            Cost of migrating the data from this qc to the other_qc or\n            None if the cost cannot be determined.\n        \"\"\"\n        if isinstance(self, other_qc_type):\n            return QCCoercionCost.COST_ZERO\n        if self.__class__._transfer_threshold() <= 0:\n            return QCCoercionCost.COST_ZERO\n        cost = int(\n            (\n                QCCoercionCost.COST_IMPOSSIBLE\n                * self._max_shape()[0]\n                / self.__class__._transfer_threshold()\n            )\n        )\n        if cost > QCCoercionCost.COST_IMPOSSIBLE:\n            return QCCoercionCost.COST_IMPOSSIBLE\n        return cost\n\n    @classmethod\n    def _stay_cost_rows(\n        cls, rows: int, per_row_overhead: int, max_size: int, op_init_overhead: int\n    ) -> int:\n        \"\"\"\n        Get the cost of staying on this query compiler for an operation.\n\n        Parameters\n        ----------\n        rows : int\n            The number of input rows.\n        per_row_overhead : int\n            Per-row cost of this operation.\n        max_size : int\n            Max rows for this query compiler.\n        op_init_overhead : int\n            Overhead cost of this operation.\n\n        Returns\n        -------\n        int\n            Cost of staying on this query compiler.\n        \"\"\"\n        if rows > max_size:\n            return QCCoercionCost.COST_IMPOSSIBLE\n        cost_all_rows = rows * per_row_overhead\n        normalized_cost_all_rows = (\n            cost_all_rows / max_size * QCCoercionCost.COST_IMPOSSIBLE\n        )\n        total_cost = normalized_cost_all_rows + op_init_overhead\n        if total_cost > QCCoercionCost.COST_IMPOSSIBLE:\n            return QCCoercionCost.COST_IMPOSSIBLE\n        return int(total_cost)\n\n    @disable_logging\n    def stay_cost(\n        self,\n        api_cls_name: Optional[str],\n        operation: str,\n        arguments: MappingProxyType[str, Any],\n    ) -> Optional[int]:\n        \"\"\"\n        Return the \"opportunity cost\" of not moving the data.\n\n        This is called for opportunistic decision points where we\n        have a single data frame which may be moved to another engine.\n        This is can often the inverse of the move_to_cost, but it can\n        be independently calculated and different. For instance, the\n        move_to_cost may include the cost of network transmission to\n        the other engine, where as the cost returned by 'stay_cost'\n        may be simply the cost of running the operation locally.\n\n        The question is: What is the cost of running this operation on\n        the current dataframe?\n\n        Values returned must be within the acceptable range of\n        QCCoercionCost\n\n        Parameters\n        ----------\n        api_cls_name : str\n            The class name performing the operation which can be used as a\n            consideration for the costing analysis. `None` means the function is\n            not associated with a class.\n        operation : str, default: None\n            The operation being performed which can be used as a consideration\n            for the costing analysis.\n        arguments : MappingProxyType[str, Any]\n            The arguments to the operation.\n\n        Returns\n        -------\n        Optional[int]\n            Cost of doing this operation on the current backend.\n        \"\"\"\n        return self._stay_cost_rows(\n            self._max_shape()[0],\n            self._OPERATION_PER_ROW_OVERHEAD,\n            self.__class__._engine_max_size(),\n            self._OPERATION_INITIALIZATION_OVERHEAD,\n        )\n\n    @disable_logging\n    @classmethod\n    def move_to_me_cost(\n        cls,\n        other_qc: BaseQueryCompiler,\n        api_cls_name: Optional[str],\n        operation: str,\n        arguments: MappingProxyType[str, Any],\n    ) -> Optional[int]:\n        \"\"\"\n        Return the execution and hidden coercion costs from other_qc.\n\n        This can be implemented as a class method version of stay_cost, though\n        since this class is not yet instantiated it may have a different\n        implementation. It may also include hidden transport or serialization\n        costs.\n\n        Values returned must be within the acceptable range of QCCoercionCost.\n\n        The question is: What is the cost of executing this operation if it\n        were to move to this query compiler?\n\n        Parameters\n        ----------\n        other_qc : BaseQueryCompiler\n            The query compiler from which we should return the cost of switching.\n        api_cls_name : Optional[str]\n            The class name performing the operation which can be used as a\n            consideration for the costing analysis. `None` means the function\n            is not associated with a class.\n        operation : str\n            The operation being performed which can be used as a consideration\n            for the costing analysis.\n        arguments : MappingProxyType[str, Any]\n            The arguments to the operation.\n\n        Returns\n        -------\n        Optional[int]\n            Cost of migrating the data from other_qc to this qc or\n            None if the cost cannot be determined.\n        \"\"\"\n        row_count = other_qc._max_shape()[0]\n\n        return cls._stay_cost_rows(\n            row_count,\n            cls._OPERATION_PER_ROW_OVERHEAD,\n            cls._engine_max_size(),\n            cls._OPERATION_INITIALIZATION_OVERHEAD,\n        )\n\n    @classmethod\n    def _engine_max_size(cls) -> int:\n        \"\"\"Maximum number of rows this engine can handle.\"\"\"\n        return cls._MAX_SIZE_THIS_ENGINE_CAN_HANDLE\n\n    @classmethod\n    def _transfer_threshold(cls) -> int:\n        \"\"\"Maximum number of rows this backend can handle before transferring data to another backend.\"\"\"\n        return cls._TRANSFER_THRESHOLD\n\n    @disable_logging\n    @classmethod\n    def max_cost(cls) -> int:\n        \"\"\"\n        Return the max cost allowed by this engine.\n\n        Returns\n        -------\n        int\n            Max cost allowed for migrating the data to this qc.\n        \"\"\"\n        return QCCoercionCost.COST_IMPOSSIBLE\n\n    # Abstract Methods and Fields: Must implement in children classes\n    # In some cases, there you may be able to use the same implementation for\n    # some of these abstract methods, but for the sake of generality they are\n    # treated differently.\n\n    lazy_row_labels = False\n    lazy_row_count = False\n    lazy_column_types = False\n    lazy_column_labels = False\n    lazy_column_count = False\n\n    def _max_shape(self) -> tuple[int, int]:\n        \"\"\"\n        Return the maximum dimensions of the frame.\n\n        For lazily evaluated engines the shape of the dataset may be expensive to\n        determine (see lazy_shape), but the maximum shape can be calculated\n        inexpensively.\n\n        Returns\n        -------\n        Tuple\n            Maximum shape of the dataframe (height, width).\n        \"\"\"\n        return self.get_axis_len(axis=0), self.get_axis_len(axis=1)\n\n    @property\n    def lazy_shape(self):\n        \"\"\"\n        Whether either of the underlying dataframe's dimensions (row count/column count) are computed lazily.\n\n        If True, the frontend should avoid length/shape checks as much as possible.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.lazy_row_count or self.lazy_column_count\n\n    _shape_hint = None\n\n    # Metadata modification abstract methods\n    def add_prefix(self, prefix, axis=1):\n        \"\"\"\n        Add string prefix to the index labels along specified axis.\n\n        Parameters\n        ----------\n        prefix : str\n            The string to add before each label.\n        axis : {0, 1}, default: 1\n            Axis to add prefix along. 0 is for index and 1 is for columns.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New query compiler with updated labels.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.add_prefix)(\n            self, prefix=prefix, axis=axis\n        )\n\n    def add_suffix(self, suffix, axis=1):\n        \"\"\"\n        Add string suffix to the index labels along specified axis.\n\n        Parameters\n        ----------\n        suffix : str\n            The string to add after each label.\n        axis : {0, 1}, default: 1\n            Axis to add suffix along. 0 is for index and 1 is for columns.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New query compiler with updated labels.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.add_suffix)(\n            self, suffix=suffix, axis=axis\n        )\n\n    # END Metadata modification abstract methods\n\n    # Abstract copy\n\n    def copy(self):\n        \"\"\"\n        Make a copy of this object.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Copy of self.\n\n        Notes\n        -----\n        For copy, we don't want a situation where we modify the metadata of the\n        copies if we end up modifying something here. We copy all of the metadata\n        to prevent that.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.copy)(self)\n\n    # END Abstract copy\n\n    # Abstract join and append helper functions\n\n    def concat(self, axis, other, **kwargs):  # noqa: PR02\n        \"\"\"\n        Concatenate `self` with passed query compilers along specified axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to concatenate along. 0 is for index and 1 is for columns.\n        other : BaseQueryCompiler or list of such\n            Objects to concatenate with `self`.\n        join : {'outer', 'inner', 'right', 'left'}, default: 'outer'\n            Type of join that will be used if indices on the other axis are different.\n            (note: if specified, has to be passed as ``join=value``).\n        ignore_index : bool, default: False\n            If True, do not use the index values along the concatenation axis.\n            The resulting axis will be labeled 0, …, n - 1.\n            (note: if specified, has to be passed as ``ignore_index=value``).\n        sort : bool, default: False\n            Whether or not to sort non-concatenation axis.\n            (note: if specified, has to be passed as ``sort=value``).\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Concatenated objects.\n        \"\"\"\n        concat_join = [\"inner\", \"outer\"]\n\n        def concat(df, axis, other, **kwargs):\n            kwargs.pop(\"join_axes\", None)\n            ignore_index = kwargs.get(\"ignore_index\", False)\n            if kwargs.get(\"join\", \"outer\") in concat_join:\n                if not isinstance(other, list):\n                    other = [other]\n                other = [df] + other\n                result = pandas.concat(other, axis=axis, **kwargs)\n            else:\n                if isinstance(other, (list, np.ndarray)) and len(other) == 1:\n                    other = other[0]\n                ignore_index = kwargs.pop(\"ignore_index\", None)\n                kwargs[\"how\"] = kwargs.pop(\"join\", None)\n                if (\n                    isinstance(other, (pandas.DataFrame, pandas.Series))\n                    or len(other) <= 1\n                ):\n                    kwargs[\"rsuffix\"] = \"r_\"\n                result = df.join(other, **kwargs)\n            if ignore_index:\n                if axis == 0:\n                    result = result.reset_index(drop=True)\n                else:\n                    result.columns = pandas.RangeIndex(len(result.columns))\n            return result\n\n        return DataFrameDefault.register(concat)(self, axis=axis, other=other, **kwargs)\n\n    # END Abstract join and append helper functions\n\n    # Data Management Methods\n    @abc.abstractmethod\n    def free(self):\n        \"\"\"Trigger a cleanup of this object.\"\"\"\n        pass\n\n    @abc.abstractmethod\n    def finalize(self):\n        \"\"\"Finalize constructing the dataframe calling all deferred functions which were used to build it.\"\"\"\n        pass\n\n    @abc.abstractmethod\n    def execute(self):\n        \"\"\"Wait for all computations to complete without materializing data.\"\"\"\n        pass\n\n    def support_materialization_in_worker_process(self) -> bool:\n        \"\"\"\n        Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._modin_frame.support_materialization_in_worker_process()\n\n    # END Data Management Methods\n\n    # Data Movement Methods\n    def move_to(self, target_backend: str) -> Union[BaseQueryCompiler, Any]:\n        \"\"\"\n        Move this query compiler to the specified backend.\n\n        Parameters\n        ----------\n        target_backend : str\n            The backend to move to.\n\n        Returns\n        -------\n        BaseQueryCompiler or Any\n            The new query compiler with the source data, or a sentinel `NotImplemented`\n            value if transfer is not implemented.\n        \"\"\"\n        return NotImplemented\n\n    @classmethod\n    def move_from(cls, source_qc: BaseQueryCompiler) -> Union[BaseQueryCompiler, Any]:\n        \"\"\"\n        Move the source query compiler to the current backend.\n\n        Parameters\n        ----------\n        source_qc : BaseQueryCompiler\n            The source query compiler to move data from.\n\n        Returns\n        -------\n        BaseQueryCompiler or Any\n            A new query compiler with the source data, or a sentinel `NotImplemented`\n            value if transfer is not implemented.\n        \"\"\"\n        return NotImplemented\n\n    # END Data Movement Methods\n\n    # To/From Pandas\n    @abc.abstractmethod\n    def to_pandas(self):\n        \"\"\"\n        Convert underlying query compilers data to ``pandas.DataFrame``.\n\n        Returns\n        -------\n        pandas.DataFrame\n            The QueryCompiler converted to pandas.\n        \"\"\"\n        pass\n\n    @classmethod\n    @abc.abstractmethod\n    def from_pandas(cls, df, data_cls):\n        \"\"\"\n        Build QueryCompiler from pandas DataFrame.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            The pandas DataFrame to convert from.\n        data_cls : type\n            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class\n            (or its descendant) to convert to.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the pandas DataFrame.\n        \"\"\"\n        pass\n\n    # END To/From Pandas\n\n    # From Arrow\n    @classmethod\n    @abc.abstractmethod\n    def from_arrow(cls, at, data_cls):\n        \"\"\"\n        Build QueryCompiler from Arrow Table.\n\n        Parameters\n        ----------\n        at : Arrow Table\n            The Arrow Table to convert from.\n        data_cls : type\n            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class\n            (or its descendant) to convert to.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the pandas DataFrame.\n        \"\"\"\n        pass\n\n    # END From Arrow\n\n    # To NumPy\n\n    def to_numpy(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Convert underlying query compilers data to NumPy array.\n\n        Parameters\n        ----------\n        dtype : dtype\n            The dtype of the resulted array.\n        copy : bool\n            Whether to ensure that the returned value is not a view on another array.\n        na_value : object\n            The value to replace missing values with.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        np.ndarray\n            The QueryCompiler converted to NumPy array.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.to_numpy)(self, **kwargs)\n\n    # END To NumPy\n\n    def do_array_ufunc_implementation(\n        self,\n        frame: BasePandasDataset,\n        ufunc: np.ufunc,\n        method: str,\n        *inputs: Any,\n        **kwargs: Any,\n    ) -> Union[\"DataFrame\", \"Series\", Any]:\n        \"\"\"\n        Apply the provided NumPy ufunc to the underlying data.\n\n        This method is called by the ``__array_ufunc__`` dispatcher on BasePandasDataset.\n\n        Unlike other query compiler methods, this function directly operates on the input DataFrame/Series\n        to allow for easier argument processing. The default implementation defaults to pandas, but\n        a query compiler sub-class may override this method to provide a distributed implementation.\n\n        See NumPy docs: https://numpy.org/doc/stable/user/basics.subclassing.html#array-ufunc-for-ufuncs\n\n        Parameters\n        ----------\n        frame : BasePandasDataset\n            The DataFrame or Series on which the ufunc was called. Its query compiler must match ``self``.\n\n        ufunc : np.ufunc\n            The function to apply.\n\n        method : str\n            The name of the function to apply.\n\n        *inputs : Any\n            Positional arguments to pass to ``ufunc``.\n\n        **kwargs : Any\n            Keyword arguments to pass to ``ufunc``.\n\n        Returns\n        -------\n        DataFrame, Series, or Any\n            The result of applying the ufunc to ``frame``.\n        \"\"\"\n        assert (\n            self is frame._query_compiler\n        ), \"array ufunc called with mismatched query compiler and input frame\"\n        # we can't use the regular default_to_pandas() method because self is one of the\n        # `inputs` to __array_ufunc__, and pandas has some checks on the identity of the\n        # inputs [1]. The usual default to pandas will call _to_pandas() on the inputs\n        # as well as on self, but that gives inputs[0] a different identity from self.\n        #\n        # [1] https://github.com/pandas-dev/pandas/blob/2c4c072ade78b96a9eb05097a5fcf4347a3768f3/pandas/_libs/ops_dispatch.pyx#L99-L109\n        self._maybe_warn_on_default(message=\"__array_ufunc__\")\n        pandas_self = frame._to_pandas()\n        pandas_result = pandas_self.__array_ufunc__(\n            ufunc,\n            method,\n            *(\n                pandas_self if each_input is frame else try_cast_to_pandas(each_input)\n                for each_input in inputs\n            ),\n            **try_cast_to_pandas(kwargs),\n        )\n        if isinstance(pandas_result, pandas.DataFrame):\n            from modin.pandas import DataFrame\n\n            return DataFrame(pandas_result)\n        elif isinstance(pandas_result, pandas.Series):\n            from modin.pandas import Series\n\n            return Series(pandas_result)\n        # ufuncs are required to be one-to-one mappings, so this branch should never be hit\n        return pandas_result  # pragma: no cover\n\n    def do_array_function_implementation(\n        self,\n        frame: BasePandasDataset,\n        func: callable,\n        types: tuple,\n        args: tuple,\n        kwargs: dict,\n    ) -> Union[\"DataFrame\", \"Series\", Any]:\n        \"\"\"\n        Apply the provided NumPy array function to the underlying data.\n\n        This method is called by the ``__array_function__`` dispatcher on BasePandasDataset.\n\n        Unlike other query compiler methods, this function directly operates on the input DataFrame/Series\n        to allow for easier argument processing. The default implementation defaults to pandas, but\n        a query compiler sub-class may override this method to provide a distributed implementation.\n\n        See NumPy docs: https://numpy.org/neps/nep-0018-array-function-protocol.html#nep18\n\n        Parameters\n        ----------\n        frame : BasePandasDataset\n            The DataFrame or Series on which the ufunc was called. Its query compiler must match ``self``.\n        func : np.func\n            The NumPy func to apply.\n        types : tuple\n            The types of the args.\n        args : tuple\n            The args to the func.\n        kwargs : dict\n            Additional keyword arguments.\n\n        Returns\n        -------\n        DataFrame | Series | Any\n            The result of applying the function to this dataset. By default, it will return\n            a NumPy array.\n        \"\"\"\n        from modin.pandas.base import BasePandasDataset\n\n        assert (\n            self is frame._query_compiler\n        ), \"__array_function__ called with mismatched query compiler and input frame\"\n        # Replace each modin type with numpy ndarray, since we convert modin frames to np ndarrays.\n        new_types = (\n            np.ndarray if issubclass(tpe, BasePandasDataset) else tpe for tpe in types\n        )\n        return frame.__array__().__array_function__(func, new_types, args, kwargs)\n\n    # Dataframe exchange protocol\n\n    @abc.abstractmethod\n    def to_interchange_dataframe(\n        self, nan_as_null: bool = False, allow_copy: bool = True\n    ) -> ProtocolDataframe:\n        \"\"\"\n        Get a DataFrame exchange protocol object representing data of the Modin DataFrame.\n\n        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.\n\n        Parameters\n        ----------\n        nan_as_null : bool, default: False\n            A keyword intended for the consumer to tell the producer\n            to overwrite null values in the data with ``NaN`` (or ``NaT``).\n            This currently has no effect; once support for nullable extension\n            dtypes is added, this value should be propagated to columns.\n        allow_copy : bool, default: True\n            A keyword that defines whether or not the library is allowed\n            to make a copy of the data. For example, copying data would be necessary\n            if a library supports strided buffers, given that this protocol\n            specifies contiguous buffers. Currently, if the flag is set to ``False``\n            and a copy is needed, a ``RuntimeError`` will be raised.\n\n        Returns\n        -------\n        ProtocolDataframe\n            A dataframe object following the DataFrame protocol specification.\n        \"\"\"\n        pass\n\n    @classmethod\n    @abc.abstractmethod\n    def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls):\n        \"\"\"\n        Build QueryCompiler from a DataFrame object supporting the dataframe exchange protocol `__dataframe__()`.\n\n        Parameters\n        ----------\n        df : ProtocolDataframe\n            The DataFrame object supporting the dataframe exchange protocol.\n        data_cls : type\n            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class\n            (or its descendant) to convert to.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing data from the DataFrame.\n        \"\"\"\n        pass\n\n    # END Dataframe exchange protocol\n\n    def to_list(self):\n        \"\"\"\n        Return a list of the values.\n\n        These are each a scalar type, which is a Python scalar (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period).\n\n        Returns\n        -------\n        list\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.to_list)(self)\n\n    @doc_utils.add_refer_to(\"DataFrame.to_dict\")\n    def dataframe_to_dict(self, orient=\"dict\", into=dict, index=True):  # noqa: PR01\n        \"\"\"\n        Convert the DataFrame to a dictionary.\n\n        Returns\n        -------\n        dict or `into` instance\n        \"\"\"\n        return self.to_pandas().to_dict(orient, into, index)\n\n    @doc_utils.add_refer_to(\"Series.to_dict\")\n    def series_to_dict(self, into=dict):  # noqa: PR01\n        \"\"\"\n        Convert the Series to a dictionary.\n\n        Returns\n        -------\n        dict or `into` instance\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.to_dict)(self, into)\n\n    # Abstract inter-data operations (e.g. add, sub)\n    # These operations require two DataFrames and will change the shape of the\n    # data if the index objects don't match. An outer join + op is performed,\n    # such that columns/rows that don't have an index on the other DataFrame\n    # result in NaN values.\n\n    @doc_utils.add_refer_to(\"DataFrame.align\")\n    def align(self, other, **kwargs):\n        \"\"\"\n        Align two objects on their axes with the specified join method.\n\n        Join method is specified for each axis Index.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler\n        **kwargs : dict\n            Other arguments for aligning.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Aligned `self`.\n        BaseQueryCompiler\n            Aligned `other`.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.align)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"addition\", sign=\"+\")\n    def add(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.add)(self, other=other, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.combine\")\n    def combine(self, other, **kwargs):  # noqa: PR02\n        \"\"\"\n        Perform column-wise combine with another QueryCompiler with passed `func`.\n\n        If axes are not equal, perform frames alignment first.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler\n            Left operand of the binary operation.\n        func : callable(pandas.Series, pandas.Series) -> pandas.Series\n            Function that takes two ``pandas.Series`` with aligned axes\n            and returns one ``pandas.Series`` as resulting combination.\n        fill_value : float or None\n            Value to fill missing values with after frame alignment occurred.\n        overwrite : bool\n            If True, columns in `self` that do not exist in `other`\n            will be overwritten with NaNs.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Result of combine.\n        \"\"\"\n        return BinaryDefault.register(pandas.DataFrame.combine)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.combine_first\")\n    def combine_first(self, other, **kwargs):  # noqa: PR02\n        \"\"\"\n        Fill null elements of `self` with value in the same location in `other`.\n\n        If axes are not equal, perform frames alignment first.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler\n            Provided frame to use to fill null values from.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return BinaryDefault.register(pandas.DataFrame.combine_first)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"equality comparison\", sign=\"==\")\n    def eq(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"equality comparison\", sign=\"==\", op_type=\"series_comparison\"\n    )\n    def series_eq(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.Series.eq)(\n            self,\n            other=other,\n            squeeze_self=True,\n            squeeze_other=kwargs.pop(\"squeeze_other\", False),\n            **kwargs,\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.equals\")\n    def equals(self, other):  # noqa: PR01, RT01\n        return BinaryDefault.register(pandas.DataFrame.equals)(self, other=other)\n\n    @doc_utils.doc_binary_method(operation=\"integer division\", sign=\"//\")\n    def floordiv(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.floordiv)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"Series.divmod\")\n    def divmod(self, other, **kwargs):\n        \"\"\"\n        Return Integer division and modulo of `self` and `other`, element-wise (binary operator divmod).\n\n        Equivalent to divmod(`self`, `other`), but with support to substitute a fill_value for missing data in either one of the inputs.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler or scalar value\n        **kwargs : dict\n            Other arguments for division.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Compiler representing Series with divisor part of division.\n        BaseQueryCompiler\n            Compiler representing Series with modulo part of division.\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.divmod)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"greater than or equal comparison\", sign=\">=\", op_type=\"comparison\"\n    )\n    def ge(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"greater than or equal comparison\",\n        sign=\">=\",\n        op_type=\"series_comparison\",\n    )\n    def series_ge(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.Series.ge)(\n            self,\n            other=other,\n            squeeze_self=True,\n            squeeze_other=kwargs.pop(\"squeeze_other\", False),\n            **kwargs,\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"greater than comparison\", sign=\">\", op_type=\"comparison\"\n    )\n    def gt(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"greater than comparison\", sign=\">\", op_type=\"series_comparison\"\n    )\n    def series_gt(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.Series.gt)(\n            self,\n            other=other,\n            squeeze_self=True,\n            squeeze_other=kwargs.pop(\"squeeze_other\", False),\n            **kwargs,\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"less than or equal comparison\", sign=\"<=\", op_type=\"comparison\"\n    )\n    def le(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"less than or equal comparison\",\n        sign=\"<=\",\n        op_type=\"series_comparison\",\n    )\n    def series_le(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.Series.le)(\n            self,\n            other=other,\n            squeeze_self=True,\n            squeeze_other=kwargs.pop(\"squeeze_other\", False),\n            **kwargs,\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"less than comparison\", sign=\"<\", op_type=\"comparison\"\n    )\n    def lt(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"less than\", sign=\"<\", op_type=\"series_comparison\"\n    )\n    def series_lt(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.Series.lt)(\n            self,\n            other=other,\n            squeeze_self=True,\n            squeeze_other=kwargs.pop(\"squeeze_other\", False),\n            **kwargs,\n        )\n\n    @doc_utils.doc_binary_method(operation=\"modulo\", sign=\"%\")\n    def mod(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.mod)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(operation=\"multiplication\", sign=\"*\")\n    def mul(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.mul)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"multiplication\", sign=\"*\", self_on_right=True\n    )\n    def rmul(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.rmul)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.corr\")\n    def corr(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Compute pairwise correlation of columns, excluding NA/null values.\n\n        Parameters\n        ----------\n        method : {'pearson', 'kendall', 'spearman'} or callable(pandas.Series, pandas.Series) -> pandas.Series\n            Correlation method.\n        min_periods : int\n            Minimum number of observations required per pair of columns\n            to have a valid result. If fewer than `min_periods` non-NA values\n            are present the result will be NA.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Correlation matrix.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.corr)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"Series.corr\")\n    def series_corr(self, **kwargs):  # noqa: PR01\n        \"\"\"\n        Compute correlation with `other` Series, excluding missing values.\n\n        The two `Series` objects are not required to be the same length and will be\n        aligned internally before the correlation function is applied.\n\n        Returns\n        -------\n        float\n            Correlation with other.\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.corr)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.corrwith\")\n    def corrwith(self, **kwargs):  # noqa: PR01\n        \"\"\"\n        Compute pairwise correlation.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.corrwith)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.cov\")\n    def cov(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Compute pairwise covariance of columns, excluding NA/null values.\n\n        Parameters\n        ----------\n        min_periods : int\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Covariance matrix.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.cov)(self, **kwargs)\n\n    def dot(self, other, **kwargs):  # noqa: PR02\n        \"\"\"\n        Compute the matrix multiplication of `self` and `other`.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler or NumPy array\n            The other query compiler or NumPy array to matrix multiply with `self`.\n        squeeze_self : boolean\n            If `self` is a one-column query compiler, indicates whether it represents Series object.\n        squeeze_other : boolean\n            If `other` is a one-column query compiler, indicates whether it represents Series object.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            A new query compiler that contains result of the matrix multiply.\n        \"\"\"\n        if kwargs.get(\"squeeze_self\", False):\n            applyier = pandas.Series.dot\n        else:\n            applyier = pandas.DataFrame.dot\n        return BinaryDefault.register(applyier)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"not equal comparison\", sign=\"!=\", op_type=\"comparison\"\n    )\n    def ne(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(\n        operation=\"not equal comparison\", sign=\"!=\", op_type=\"series_comparison\"\n    )\n    def series_ne(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.Series.ne)(\n            self,\n            other=other,\n            squeeze_self=True,\n            squeeze_other=kwargs.pop(\"squeeze_other\", False),\n            **kwargs,\n        )\n\n    @doc_utils.doc_binary_method(operation=\"exponential power\", sign=\"**\")\n    def pow(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.pow)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(operation=\"addition\", sign=\"+\", self_on_right=True)\n    def radd(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.radd)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"Series.rdivmod\")\n    def rdivmod(self, other, **kwargs):\n        \"\"\"\n        Return Integer division and modulo of `self` and `other`, element-wise (binary operator rdivmod).\n\n        Equivalent to `other` divmod `self`, but with support to substitute a fill_value for missing data in either one of the inputs.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler or scalar value\n        **kwargs : dict\n            Other arguments for division.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Compiler representing Series with divisor part of division.\n        BaseQueryCompiler\n            Compiler representing Series with modulo part of division.\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.rdivmod)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"integer division\", sign=\"//\", self_on_right=True\n    )\n    def rfloordiv(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.rfloordiv)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"modulo\", sign=\"%\", self_on_right=True)\n    def rmod(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.rmod)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"exponential power\", sign=\"**\", self_on_right=True\n    )\n    def rpow(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.rpow)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"subtraction\", sign=\"-\", self_on_right=True)\n    def rsub(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.rsub)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"division\", sign=\"/\", self_on_right=True)\n    def rtruediv(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.rtruediv)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"subtraction\", sign=\"-\")\n    def sub(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.sub)(self, other=other, **kwargs)\n\n    @doc_utils.doc_binary_method(operation=\"division\", sign=\"/\")\n    def truediv(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.truediv)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"conjunction\", sign=\"&\", op_type=\"logical\")\n    def __and__(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.__and__)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"disjunction\", sign=\"|\", op_type=\"logical\")\n    def __or__(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.__or__)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"conjunction\", sign=\"&\", op_type=\"logical\", self_on_right=True\n    )\n    def __rand__(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.__rand__)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"disjunction\", sign=\"|\", op_type=\"logical\", self_on_right=True\n    )\n    def __ror__(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.__ror__)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(\n        operation=\"exclusive or\", sign=\"^\", op_type=\"logical\", self_on_right=True\n    )\n    def __rxor__(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.__rxor__)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.doc_binary_method(operation=\"exclusive or\", sign=\"^\", op_type=\"logical\")\n    def __xor__(self, other, **kwargs):  # noqa: PR02\n        return BinaryDefault.register(pandas.DataFrame.__xor__)(\n            self, other=other, **kwargs\n        )\n\n    # FIXME: query compiler shoudln't care about differences between Frame and Series.\n    # We should combine `df_update` and `series_update` into one method (Modin issue #3101).\n    @doc_utils.add_refer_to(\"DataFrame.update\")\n    def df_update(self, other, **kwargs):  # noqa: PR02\n        \"\"\"\n        Update values of `self` using non-NA values of `other` at the corresponding positions.\n\n        If axes are not equal, perform frames alignment first.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler\n            Frame to grab replacement values from.\n        join : {\"left\"}\n            Specify type of join to align frames if axes are not equal\n            (note: currently only one type of join is implemented).\n        overwrite : bool\n            Whether to overwrite every corresponding value of self, or only if it's NAN.\n        filter_func : callable(pandas.Series, pandas.Series) -> numpy.ndarray<bool>\n            Function that takes column of the self and return bool mask for values, that\n            should be overwritten in the self frame.\n        errors : {\"raise\", \"ignore\"}\n            If \"raise\", will raise a ``ValueError`` if `self` and `other` both contain\n            non-NA data in the same place.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with updated values.\n        \"\"\"\n        return BinaryDefault.register(pandas.DataFrame.update, inplace=True)(\n            self, other=other, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"Series.update\")\n    def series_update(self, other, **kwargs):  # noqa: PR02\n        \"\"\"\n        Update values of `self` using values of `other` at the corresponding indices.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler\n            One-column query compiler with updated values.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with updated values.\n        \"\"\"\n        return BinaryDefault.register(pandas.Series.update, inplace=True)(\n            self,\n            other=other,\n            squeeze_self=True,\n            squeeze_other=True,\n            **kwargs,\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.asfreq\")\n    def asfreq(self, **kwargs):  # noqa: PR01\n        \"\"\"\n        Convert time series to specified frequency.\n\n        Returns the original data conformed to a new index with the specified frequency.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler reindexed to the specified frequency.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.asfreq)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.clip\")\n    def clip(self, lower, upper, **kwargs):  # noqa: PR02\n        \"\"\"\n        Trim values at input threshold.\n\n        Parameters\n        ----------\n        lower : float or list-like\n        upper : float or list-like\n        axis : {0, 1}\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with values limited by the specified thresholds.\n        \"\"\"\n        if isinstance(lower, BaseQueryCompiler):\n            lower = lower.to_pandas().squeeze(1)\n        if isinstance(upper, BaseQueryCompiler):\n            upper = upper.to_pandas().squeeze(1)\n        return DataFrameDefault.register(pandas.DataFrame.clip)(\n            self, lower=lower, upper=upper, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.where\")\n    def where(self, cond, other, **kwargs):  # noqa: PR02\n        \"\"\"\n        Update values of `self` using values from `other` at positions where `cond` is False.\n\n        Parameters\n        ----------\n        cond : BaseQueryCompiler\n            Boolean mask. True - keep the self value, False - replace by `other` value.\n        other : BaseQueryCompiler or pandas.Series\n            Object to grab replacement values from.\n        axis : {0, 1}\n            Axis to align frames along if axes of self, `cond` and `other` are not equal.\n            0 is for index, when 1 is for columns.\n        level : int or label, optional\n            Level of MultiIndex to align frames along if axes of self, `cond`\n            and `other` are not equal. Currently `level` parameter is not implemented,\n            so only None value is acceptable.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with updated data.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.where)(\n            self, cond=cond, other=other, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.merge\")\n    def merge(self, right, **kwargs):  # noqa: PR02\n        \"\"\"\n        Merge QueryCompiler objects using a database-style join.\n\n        Parameters\n        ----------\n        right : BaseQueryCompiler\n            QueryCompiler of the right frame to merge with.\n        how : {\"left\", \"right\", \"outer\", \"inner\", \"cross\"}\n        on : label or list of such\n        left_on : label or list of such\n        right_on : label or list of such\n        left_index : bool\n        right_index : bool\n        sort : bool\n        suffixes : list-like\n        copy : bool\n        indicator : bool or str\n        validate : str\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler that contains result of the merge.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.merge)(\n            self, right=right, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"merge_ordered\")\n    def merge_ordered(self, right, **kwargs):  # noqa: PR01\n        \"\"\"\n        Perform a merge for ordered data with optional filling/interpolation.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.merge_ordered)(self, right, **kwargs)\n\n    def _get_column_as_pandas_series(self, key):\n        \"\"\"\n        Get column data by label as pandas.Series.\n\n        Parameters\n        ----------\n        key : Any\n            Column label.\n\n        Returns\n        -------\n        pandas.Series\n        \"\"\"\n        result = self.getitem_array([key]).to_pandas().squeeze(axis=1)\n        if not isinstance(result, pandas.Series):\n            raise RuntimeError(\n                f\"Expected getting column {key} to give \"\n                + f\"pandas.Series, but instead got {type(result)}\"\n            )\n        return result\n\n    def merge_asof(\n        self,\n        right: \"BaseQueryCompiler\",\n        left_on: Optional[IndexLabel] = None,\n        right_on: Optional[IndexLabel] = None,\n        left_index: bool = False,\n        right_index: bool = False,\n        left_by=None,\n        right_by=None,\n        suffixes: Suffixes = (\"_x\", \"_y\"),\n        tolerance=None,\n        allow_exact_matches: bool = True,\n        direction: str = \"backward\",\n    ):  # noqa: GL08\n        self._maybe_warn_on_default(message=\"`merge_asof`\")\n        # Pandas fallbacks for tricky cases:\n        if (\n            # No idea how this works or why it does what it does; and in fact\n            # there's a Pandas bug suggesting it's wrong:\n            # https://github.com/pandas-dev/pandas/issues/33463\n            (left_index and right_on is not None)\n            # This is the case where by is a list of columns. If we're copying lots\n            # of columns out of Pandas, maybe not worth trying our path, it's not\n            # clear it's any better:\n            or not (left_by is None or is_scalar(left_by))\n            or not (right_by is None or is_scalar(right_by))\n            # The implementation below assumes that the right index is unique\n            # because it uses merge_asof to map each position in the merged\n            # index to the label of the one right row that should be merged\n            # at that row position.\n            or not right.index.is_unique\n        ):\n            return self.default_to_pandas(\n                pandas.merge_asof,\n                right,\n                left_on=left_on,\n                right_on=right_on,\n                left_index=left_index,\n                right_index=right_index,\n                left_by=left_by,\n                right_by=right_by,\n                suffixes=suffixes,\n                tolerance=tolerance,\n                allow_exact_matches=allow_exact_matches,\n                direction=direction,\n            )\n\n        if left_on is None:\n            left_column = self.index\n        else:\n            left_column = self._get_column_as_pandas_series(left_on)\n\n        if right_on is None:\n            right_column = right.index\n        else:\n            right_column = right._get_column_as_pandas_series(right_on)\n\n        left_pandas_limited = {\"on\": left_column}\n        right_pandas_limited = {\"on\": right_column, \"right_labels\": right.index}\n        extra_kwargs = {}  # extra arguments to Pandas merge_asof\n\n        if left_by is not None or right_by is not None:\n            extra_kwargs[\"by\"] = \"by\"\n            left_pandas_limited[\"by\"] = self._get_column_as_pandas_series(left_by)\n            right_pandas_limited[\"by\"] = right._get_column_as_pandas_series(right_by)\n\n        # 1. Construct Pandas DataFrames with just the 'on' and optional 'by'\n        # columns, and the index as another column.\n        left_pandas_limited = pandas.DataFrame(left_pandas_limited, index=self.index)\n        right_pandas_limited = pandas.DataFrame(right_pandas_limited)\n\n        # 2. Use Pandas' merge_asof to figure out how to map labels on left to\n        # labels on the right.\n        merged = pandas.merge_asof(\n            left_pandas_limited,\n            right_pandas_limited,\n            on=\"on\",\n            direction=direction,\n            allow_exact_matches=allow_exact_matches,\n            tolerance=tolerance,\n            **extra_kwargs,\n        )\n        # Now merged[\"right_labels\"] shows which labels from right map to left's index.\n\n        # 3. Re-index right using the merged[\"right_labels\"]; at this point right\n        # should be same length and (semantically) same order as left:\n        right_subset = right.reindex(\n            axis=0, labels=pandas.Index(merged[\"right_labels\"])\n        )\n        if not right_index:\n            right_subset = right_subset.drop(columns=[right_on])\n        if right_by is not None and left_by == right_by:\n            right_subset = right_subset.drop(columns=[right_by])\n        right_subset.index = self.index\n\n        # 4. Merge left and the new shrunken right:\n        result = self.merge(\n            right_subset,\n            left_index=True,\n            right_index=True,\n            suffixes=suffixes,\n            how=\"left\",\n        )\n\n        # 5. Clean up to match Pandas output:\n        if left_on is not None and right_index:\n            result = result.insert(\n                # In theory this could use get_indexer_for(), but that causes an error:\n                list(result.columns).index(left_on + suffixes[0]),\n                left_on,\n                result.getitem_array([left_on + suffixes[0]]),\n            )\n        if not left_index and not right_index:\n            result = result.reset_index(drop=True)\n\n        return result\n\n    @doc_utils.add_refer_to(\"DataFrame.join\")\n    def join(self, right, **kwargs):  # noqa: PR02\n        \"\"\"\n        Join columns of another QueryCompiler.\n\n        Parameters\n        ----------\n        right : BaseQueryCompiler\n            QueryCompiler of the right frame to join with.\n        on : label or list of such\n        how : {\"left\", \"right\", \"outer\", \"inner\"}\n        lsuffix : str\n        rsuffix : str\n        sort : bool\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler that contains result of the join.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.join)(self, right, **kwargs)\n\n    # END Abstract inter-data operations\n\n    # Abstract Transpose\n    def transpose(self, *args, **kwargs):  # noqa: PR02\n        \"\"\"\n        Transpose this QueryCompiler.\n\n        Parameters\n        ----------\n        copy : bool\n            Whether to copy the data after transposing.\n        *args : iterable\n            Serves the compatibility purpose. Does not affect the result.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Transposed new QueryCompiler.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.transpose)(\n            self, *args, **kwargs\n        )\n\n    def columnarize(self):\n        \"\"\"\n        Transpose this QueryCompiler if it has a single row but multiple columns.\n\n        This method should be called for QueryCompilers representing a Series object,\n        i.e. ``self.is_series_like()`` should be True.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Transposed new QueryCompiler or self.\n        \"\"\"\n        if self._shape_hint == \"column\":\n            return self\n\n        result = self\n        if len(self.columns) != 1 or (\n            len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL\n        ):\n            result = self.transpose()\n        result._shape_hint = \"column\"\n        return result\n\n    def is_series_like(self):\n        \"\"\"\n        Check whether this QueryCompiler can represent ``modin.pandas.Series`` object.\n\n        Returns\n        -------\n        bool\n            Return True if QueryCompiler has a single column or row, False otherwise.\n        \"\"\"\n        return len(self.columns) == 1 or len(self.index) == 1\n\n    # END Abstract Transpose\n\n    # Abstract reindex/reset_index (may shuffle data)\n    @doc_utils.add_refer_to(\"DataFrame.reindex\")\n    def reindex(self, axis, labels, **kwargs):  # noqa: PR02\n        \"\"\"\n        Align QueryCompiler data with a new index along specified axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to align labels along. 0 is for index, 1 is for columns.\n        labels : list-like\n            Index-labels to align with.\n        method : {None, \"backfill\"/\"bfill\", \"pad\"/\"ffill\", \"nearest\"}\n            Method to use for filling holes in reindexed frame.\n        fill_value : scalar\n            Value to use for missing values in the resulted frame.\n        limit : int\n        tolerance : int\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with aligned axis.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.reindex)(\n            self, axis=axis, labels=labels, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.reset_index\")\n    def reset_index(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Reset the index, or a level of it.\n\n        Parameters\n        ----------\n        drop : bool\n            Whether to drop the reset index or insert it at the beginning of the frame.\n        level : int or label, optional\n            Level to remove from index. Removes all levels by default.\n        col_level : int or label\n            If the columns have multiple levels, determines which level the labels\n            are inserted into.\n        col_fill : label\n            If the columns have multiple levels, determines how the other levels\n            are named.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with reset index.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.reset_index)(self, **kwargs)\n\n    def set_index_from_columns(\n        self, keys: List[Hashable], drop: bool = True, append: bool = False\n    ):\n        \"\"\"\n        Create new row labels from a list of columns.\n\n        Parameters\n        ----------\n        keys : list of hashable\n            The list of column names that will become the new index.\n        drop : bool, default: True\n            Whether or not to drop the columns provided in the `keys` argument.\n        append : bool, default: True\n            Whether or not to add the columns in `keys` as new levels appended to the\n            existing index.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            A new QueryCompiler with updated index.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.set_index)(\n            self, keys=keys, drop=drop, append=append\n        )\n\n    # END Abstract reindex/reset_index\n\n    # Full Reduce operations\n    #\n    # These operations result in a reduced dimensionality of data.\n    # Currently, this means a Pandas Series will be returned, but in the future\n    # we will implement a Distributed Series, and this will be returned\n    # instead.\n\n    def is_monotonic_increasing(self):\n        \"\"\"\n        Return boolean if values in the object are monotonically increasing.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.is_monotonic_increasing)(self)\n\n    def is_monotonic_decreasing(self):\n        \"\"\"\n        Return boolean if values in the object are monotonically decreasing.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.is_monotonic_decreasing)(self)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"number of non-NaN values\", refer_to=\"count\", extra_params=[\"**kwargs\"]\n    )\n    def count(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.count)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"maximum value\", refer_to=\"max\", extra_params=[\"skipna\", \"**kwargs\"]\n    )\n    def max(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.max)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"mean value\", refer_to=\"mean\", extra_params=[\"skipna\", \"**kwargs\"]\n    )\n    def mean(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.mean)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"minimum value\", refer_to=\"min\", extra_params=[\"skipna\", \"**kwargs\"]\n    )\n    def min(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.min)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"production\",\n        refer_to=\"prod\",\n        extra_params=[\"**kwargs\"],\n        params=\"axis : {0, 1}\",\n    )\n    def prod(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.prod)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"sum\",\n        refer_to=\"sum\",\n        extra_params=[\"**kwargs\"],\n        params=\"axis : {0, 1}\",\n    )\n    def sum(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.sum)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.mask\")\n    def mask(self, cond, other, **kwargs):  # noqa: PR01\n        \"\"\"\n        Replace values where the condition `cond` is True.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with elements replaced with ones from `other` where `cond` is True.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.mask)(\n            self, cond, other, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.pct_change\")\n    def pct_change(self, **kwargs):  # noqa: PR01\n        \"\"\"\n        Percentage change between the current and a prior element.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.pct_change)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"to_datetime\")\n    def to_datetime(self, *args, **kwargs):\n        \"\"\"\n        Convert columns of the QueryCompiler to the datetime dtype.\n\n        Parameters\n        ----------\n        *args : iterable\n        **kwargs : dict\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with all columns converted to datetime dtype.\n        \"\"\"\n        return SeriesDefault.register(pandas.to_datetime)(self, *args, **kwargs)\n\n    # END Abstract full Reduce operations\n\n    # Abstract map partitions operations\n    # These operations are operations that apply a function to every partition.\n    def abs(self):\n        \"\"\"\n        Get absolute numeric value of each element.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with absolute numeric value of each element.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.abs)(self)\n\n    def map(self, func, *args, **kwargs):\n        \"\"\"\n        Apply passed function elementwise.\n\n        Parameters\n        ----------\n        func : callable(scalar) -> scalar\n            Function to apply to each element of the QueryCompiler.\n        *args : iterable\n        **kwargs : dict\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Transformed QueryCompiler.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.map)(\n            self, func, *args, **kwargs\n        )\n\n    # FIXME: `**kwargs` which follows `numpy.conj` signature was inherited\n    # from ``PandasQueryCompiler``, we should get rid of this dependency.\n    # (Modin issue #3108)\n    def conj(self, **kwargs):\n        \"\"\"\n        Get the complex conjugate for every element of self.\n\n        Parameters\n        ----------\n        **kwargs : dict\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with conjugate applied element-wise.\n\n        Notes\n        -----\n        Please refer to ``numpy.conj`` for parameters description.\n        \"\"\"\n\n        def conj(df, *args, **kwargs):\n            return pandas.DataFrame(np.conj(df))\n\n        return DataFrameDefault.register(conj)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.interpolate\")\n    def interpolate(self, **kwargs):  # noqa: PR01\n        \"\"\"\n        Fill NaN values using an interpolation method.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Returns the same object type as the caller, interpolated at some or all NaN values.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.interpolate)(self, **kwargs)\n\n    # FIXME:\n    #   1. This function takes Modin Series and DataFrames via `values` parameter,\n    #      we should avoid leaking of the high-level objects to the query compiler level.\n    #      (Modin issue #3106)\n    #   2. Spread **kwargs into actual arguments (Modin issue #3108).\n    def isin(self, values, ignore_indices=False, **kwargs):  # noqa: PR02\n        \"\"\"\n        Check for each element of `self` whether it's contained in passed `values`.\n\n        Parameters\n        ----------\n        values : list-like, modin.pandas.Series, modin.pandas.DataFrame or dict\n            Values to check elements of self in.\n        ignore_indices : bool, default: False\n            Whether to execute ``isin()`` only on an intersection of indices.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Boolean mask for self of whether an element at the corresponding\n            position is contained in `values`.\n        \"\"\"\n        if isinstance(values, type(self)) and ignore_indices:\n            # Pandas logic is that it ignores indexing if 'values' is a 1D object\n            values = values.to_pandas().squeeze(axis=1)\n        if self._shape_hint == \"column\":\n            return SeriesDefault.register(pandas.Series.isin)(self, values, **kwargs)\n        else:\n            return DataFrameDefault.register(pandas.DataFrame.isin)(\n                self, values, **kwargs\n            )\n\n    def isna(self):\n        \"\"\"\n        Check for each element of self whether it's NaN.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Boolean mask for self of whether an element at the corresponding\n            position is NaN.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.isna)(self)\n\n    # FIXME: this method is not supposed to take any parameters (Modin issue #3108).\n    def negative(self, **kwargs):\n        \"\"\"\n        Change the sign for every value of self.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n\n        Notes\n        -----\n        Be aware, that all QueryCompiler values have to be numeric.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.__neg__)(self, **kwargs)\n\n    def notna(self):\n        \"\"\"\n        Check for each element of `self` whether it's existing (non-missing) value.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Boolean mask for `self` of whether an element at the corresponding\n            position is not NaN.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.notna)(self)\n\n    @doc_utils.add_refer_to(\"DataFrame.round\")\n    def round(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Round every numeric value up to specified number of decimals.\n\n        Parameters\n        ----------\n        decimals : int or list-like\n            Number of decimals to round each column to.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with rounded values.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.round)(self, **kwargs)\n\n    # FIXME:\n    #   1. high-level objects leaks to the query compiler (Modin issue #3106).\n    #   2. remove `inplace` parameter.\n    @doc_utils.add_refer_to(\"DataFrame.replace\")\n    def replace(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Replace values given in `to_replace` by `value`.\n\n        Parameters\n        ----------\n        to_replace : scalar, list-like, regex, modin.pandas.Series, or None\n        value : scalar, list-like, regex or dict\n        inplace : {False}\n            This parameter serves the compatibility purpose. Always has to be False.\n        limit : int or None\n        regex : bool or same types as `to_replace`\n        method : {\"pad\", \"ffill\", \"bfill\", None}\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with all `to_replace` values replaced by `value`.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.replace)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"Series.argsort\")\n    def argsort(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Return the integer indices that would sort the Series values.\n\n        Override ndarray.argsort. Argsorts the value, omitting NA/null values,\n        and places the result in the same locations as the non-NA values.\n\n        Parameters\n        ----------\n        axis : {0 or 'index'}\n            Unused. Parameter needed for compatibility with DataFrame.\n        kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'\n            Choice of sorting algorithm. See :func:`numpy.sort` for more\n            information. 'mergesort' and 'stable' are the only stable algorithms.\n        order : None\n            Has no effect but is accepted for compatibility with NumPy.\n        **kwargs : dict\n            Serves compatibility purposes.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            One-column QueryCompiler with positions of values within the\n            sort order with -1 indicating nan values.\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.argsort)(self, **kwargs)\n\n    @doc_utils.add_one_column_warning\n    # FIXME: adding refer-to note will create two instances of the \"Notes\" section,\n    # this breaks numpydoc style rules and also crashes the doc-style checker script.\n    # For now manually added the refer-to message.\n    # @doc_utils.add_refer_to(\"Series.view\")\n    def series_view(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Reinterpret underlying data with new dtype.\n\n        Parameters\n        ----------\n        dtype : dtype\n            Data type to reinterpret underlying data with.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler of the same data in memory, with reinterpreted values.\n\n        Notes\n        -----\n            - Be aware, that if this method do fallback to pandas, then newly created\n              QueryCompiler will be the copy of the original data.\n            - Please refer to ``modin.pandas.Series.view`` for more information\n              about parameters and output format.\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.view)(self, **kwargs)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"to_numeric\")\n    def to_numeric(self, *args, **kwargs):  # noqa: PR02\n        \"\"\"\n        Convert underlying data to numeric dtype.\n\n        Parameters\n        ----------\n        errors : {\"ignore\", \"raise\", \"coerce\"}\n        downcast : {\"integer\", \"signed\", \"unsigned\", \"float\", None}\n        *args : iterable\n            Serves the compatibility purpose. Does not affect the result.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with converted to numeric values.\n        \"\"\"\n        return SeriesDefault.register(pandas.to_numeric)(self, *args, **kwargs)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"to_timedelta\")\n    def to_timedelta(self, unit=\"ns\", errors=\"raise\"):  # noqa: PR02\n        \"\"\"\n        Convert argument to timedelta.\n\n        Parameters\n        ----------\n        unit : str, default: \"ns\"\n            Denotes the unit of the arg for numeric arg. Defaults to \"ns\".\n        errors : {\"ignore\", \"raise\", \"coerce\"}, default: \"raise\"\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with converted to timedelta values.\n        \"\"\"\n        return SeriesDefault.register(pandas.to_timedelta)(\n            self, unit=unit, errors=errors\n        )\n\n    # 'qc.unique()' uses most of the arguments from 'df.drop_duplicates()', so refering to this method\n    @doc_utils.add_refer_to(\"DataFrame.drop_duplicates\")\n    def unique(self, keep=\"first\", ignore_index=True, subset=None):\n        \"\"\"\n        Get unique rows of `self`.\n\n        Parameters\n        ----------\n        keep : {\"first\", \"last\", False}, default: \"first\"\n            Which duplicates to keep.\n        ignore_index : bool, default: True\n            If ``True``, the resulting axis will be labeled ``0, 1, …, n - 1``.\n        subset : list, optional\n            Only consider certain columns for identifying duplicates, if `None`, use all of the columns.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with unique values.\n        \"\"\"\n        if subset is not None:\n            mask = self.getitem_column_array(subset, ignore_order=True)\n        else:\n            mask = self\n        without_duplicates = self.getitem_array(mask.duplicated(keep=keep).invert())\n        if ignore_index:\n            without_duplicates = without_duplicates.reset_index(drop=True)\n        return without_duplicates\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.searchsorted\")\n    def searchsorted(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Find positions in a sorted `self` where `value` should be inserted to maintain order.\n\n        Parameters\n        ----------\n        value : list-like\n        side : {\"left\", \"right\"}\n        sorter : list-like, optional\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            One-column QueryCompiler which contains indices to insert.\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.searchsorted)(self, **kwargs)\n\n    # END Abstract map partitions operations\n\n    @doc_utils.add_refer_to(\"DataFrame.stack\")\n    def stack(self, level, dropna, sort):\n        \"\"\"\n        Stack the prescribed level(s) from columns to index.\n\n        Parameters\n        ----------\n        level : int or label\n        dropna : bool\n        sort : bool\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.stack)(\n            self,\n            level=level,\n            dropna=dropna,\n            sort=sort,\n        )\n\n    # Abstract map partitions across select indices\n    def astype(self, col_dtypes, errors: str = \"raise\"):  # noqa: PR02\n        \"\"\"\n        Convert columns dtypes to given dtypes.\n\n        Parameters\n        ----------\n        col_dtypes : dict or str\n            Map for column names and new dtypes.\n        errors : {'raise', 'ignore'}, default: 'raise'\n            Control raising of exceptions on invalid data for provided dtype.\n            - raise : allow exceptions to be raised\n            - ignore : suppress exceptions. On error return original object.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with updated dtypes.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.astype)(\n            self, dtype=col_dtypes, errors=errors\n        )\n\n    def infer_objects(self):\n        \"\"\"\n        Attempt to infer better dtypes for object columns.\n\n        Attempts soft conversion of object-dtyped columns, leaving non-object\n        and unconvertible columns unchanged. The inference rules are the same\n        as during normal Series/DataFrame construction.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New query compiler with udpated dtypes.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.infer_objects)(self)\n\n    def convert_dtypes(\n        self,\n        infer_objects: bool = True,\n        convert_string: bool = True,\n        convert_integer: bool = True,\n        convert_boolean: bool = True,\n        convert_floating: bool = True,\n        dtype_backend: DtypeBackend = \"numpy_nullable\",\n    ):\n        \"\"\"\n        Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.\n\n        Parameters\n        ----------\n        infer_objects : bool, default: True\n            Whether object dtypes should be converted to the best possible types.\n        convert_string : bool, default: True\n            Whether object dtypes should be converted to ``pd.StringDtype()``.\n        convert_integer : bool, default: True\n            Whether, if possbile, conversion should be done to integer extension types.\n        convert_boolean : bool, default: True\n            Whether object dtypes should be converted to ``pd.BooleanDtype()``.\n        convert_floating : bool, default: True\n            Whether, if possible, conversion can be done to floating extension types.\n            If `convert_integer` is also True, preference will be give to integer dtypes\n            if the floats can be faithfully casted to integers.\n        dtype_backend : {\"numpy_nullable\", \"pyarrow\"}, default: \"numpy_nullable\"\n            Which dtype_backend to use, e.g. whether a DataFrame should use nullable\n            dtypes for all dtypes that have a nullable\n            implementation when \"numpy_nullable\" is set, PyArrow is used for all\n            dtypes if \"pyarrow\" is set.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with updated dtypes.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.convert_dtypes)(\n            self,\n            infer_objects=infer_objects,\n            convert_string=convert_string,\n            convert_integer=convert_integer,\n            convert_boolean=convert_boolean,\n            convert_floating=convert_floating,\n            dtype_backend=dtype_backend,\n        )\n\n    @property\n    def dtypes(self):\n        \"\"\"\n        Get columns dtypes.\n\n        Returns\n        -------\n        pandas.Series\n            Series with dtypes of each column.\n        \"\"\"\n        return self.to_pandas().dtypes\n\n    # END Abstract map partitions across select indices\n\n    # Abstract column/row partitions reduce operations\n    #\n    # These operations result in a reduced dimensionality of data.\n    # Currently, this means a Pandas Series will be returned, but in the future\n    # we will implement a Distributed Series, and this will be returned\n    # instead.\n\n    # FIXME: we're handling level parameter at front-end, it shouldn't\n    # propagate to the query compiler (Modin issue #3102)\n    @doc_utils.add_refer_to(\"DataFrame.all\")\n    def all(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Return whether all the elements are true, potentially over an axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}, optional\n        bool_only : bool, optional\n        skipna : bool\n        level : int or label\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            If axis was specified return one-column QueryCompiler with index labels\n            of the specified axis, where each row contains boolean of whether all elements\n            at the corresponding row or column are True. Otherwise return QueryCompiler\n            with a single bool of whether all elements are True.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.all)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.any\")\n    def any(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Return whether any element is true, potentially over an axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}, optional\n        bool_only : bool, optional\n        skipna : bool\n        level : int or label\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            If axis was specified return one-column QueryCompiler with index labels\n            of the specified axis, where each row contains boolean of whether any element\n            at the corresponding row or column is True. Otherwise return QueryCompiler\n            with a single bool of whether any element is True.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.any)(self, **kwargs)\n\n    def first_valid_index(self):\n        \"\"\"\n        Return index label of first non-NaN/NULL value.\n\n        Returns\n        -------\n        scalar\n        \"\"\"\n        return (\n            DataFrameDefault.register(pandas.DataFrame.first_valid_index)(self)\n            .to_pandas()\n            .squeeze()\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.idxmax\")\n    def idxmax(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Get position of the first occurrence of the maximum for each row or column.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n        skipna : bool\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            One-column QueryCompiler with index labels of the specified axis,\n            where each row contains position of the maximum element for the\n            corresponding row or column.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.idxmax)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.idxmin\")\n    def idxmin(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Get position of the first occurrence of the minimum for each row or column.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n        skipna : bool\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            One-column QueryCompiler with index labels of the specified axis,\n            where each row contains position of the minimum element for the\n            corresponding row or column.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.idxmin)(self, **kwargs)\n\n    def last_valid_index(self):\n        \"\"\"\n        Return index label of last non-NaN/NULL value.\n\n        Returns\n        -------\n        scalar\n        \"\"\"\n        return (\n            DataFrameDefault.register(pandas.DataFrame.last_valid_index)(self)\n            .to_pandas()\n            .squeeze()\n        )\n\n    @doc_utils.doc_reduce_agg(\n        method=\"median value\", refer_to=\"median\", extra_params=[\"skipna\", \"**kwargs\"]\n    )\n    def median(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.median)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.memory_usage\")\n    def memory_usage(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Return the memory usage of each column in bytes.\n\n        Parameters\n        ----------\n        index : bool\n        deep : bool\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            One-column QueryCompiler with index labels of `self`, where each row\n            contains the memory usage for the corresponding column.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.memory_usage)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.sizeof\")\n    def sizeof(self):\n        \"\"\"\n        Compute the total memory usage for `self`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Result that holds either a value or Series of values.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.__sizeof__)(self)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"number of unique values\",\n        refer_to=\"nunique\",\n        params=\"\"\"\n        axis : {0, 1}\n        dropna : bool\"\"\",\n        extra_params=[\"**kwargs\"],\n    )\n    def nunique(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.nunique)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"value at the given quantile\",\n        refer_to=\"quantile\",\n        params=\"\"\"\n        q : float\n        axis : {0, 1}\n        numeric_only : bool\n        interpolation : {\"linear\", \"lower\", \"higher\", \"midpoint\", \"nearest\"}\"\"\",\n        extra_params=[\"**kwargs\"],\n    )\n    def quantile_for_single_value(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.quantile)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"unbiased skew\", refer_to=\"skew\", extra_params=[\"skipna\", \"**kwargs\"]\n    )\n    def skew(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.skew)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"standard deviation of the mean\",\n        refer_to=\"sem\",\n        extra_params=[\"skipna\", \"ddof\", \"**kwargs\"],\n    )\n    def sem(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.sem)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"standard deviation\",\n        refer_to=\"std\",\n        extra_params=[\"skipna\", \"ddof\", \"**kwargs\"],\n    )\n    def std(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.std)(self, **kwargs)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"variance\", refer_to=\"var\", extra_params=[\"skipna\", \"ddof\", \"**kwargs\"]\n    )\n    def var(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.var)(self, **kwargs)\n\n    # END Abstract column/row partitions reduce operations\n\n    @doc_utils.add_refer_to(\"DataFrame.describe\")\n    def describe(self, percentiles: np.ndarray):\n        \"\"\"\n        Generate descriptive statistics.\n\n        Parameters\n        ----------\n        percentiles : list-like\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler object containing the descriptive statistics\n            of the underlying data.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.describe)(\n            self,\n            percentiles=percentiles,\n            include=\"all\",\n        )\n\n    # Map across rows/columns\n    # These operations require some global knowledge of the full column/row\n    # that is being operated on. This means that we have to put all of that\n    # data in the same place.\n\n    @doc_utils.doc_cum_agg(method=\"sum\", refer_to=\"cumsum\")\n    def cumsum(self, fold_axis, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.cumsum)(self, **kwargs)\n\n    @doc_utils.doc_cum_agg(method=\"maximum\", refer_to=\"cummax\")\n    def cummax(self, fold_axis, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.cummax)(self, **kwargs)\n\n    @doc_utils.doc_cum_agg(method=\"minimum\", refer_to=\"cummin\")\n    def cummin(self, fold_axis, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.cummin)(self, **kwargs)\n\n    @doc_utils.doc_cum_agg(method=\"product\", refer_to=\"cumprod\")\n    def cumprod(self, fold_axis, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.cumprod)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.diff\")\n    def diff(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        First discrete difference of element.\n\n        Parameters\n        ----------\n        periods : int\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler of the same shape as `self`, where each element is the difference\n            between the corresponding value and the previous value in this row or column.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.diff)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.dropna\")\n    def dropna(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Remove missing values.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n        how : {\"any\", \"all\"}\n        thresh : int, optional\n        subset : list of labels\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with null values dropped along given axis.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.dropna)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.duplicated\")\n    def duplicated(self, **kwargs):\n        \"\"\"\n        Return boolean Series denoting duplicate rows.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Additional keyword arguments to be passed in to `pandas.DataFrame.duplicated`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing boolean Series denoting duplicate rows.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.duplicated)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.nlargest\")\n    def nlargest(self, n=5, columns=None, keep=\"first\"):\n        \"\"\"\n        Return the first `n` rows ordered by `columns` in descending order.\n\n        Parameters\n        ----------\n        n : int, default: 5\n        columns : list of labels, optional\n            Column labels to order by.\n            (note: this parameter can be omitted only for a single-column query compilers\n            representing Series object, otherwise `columns` has to be specified).\n        keep : {\"first\", \"last\", \"all\"}, default: \"first\"\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        if columns is None:\n            return SeriesDefault.register(pandas.Series.nlargest)(self, n=n, keep=keep)\n        else:\n            return DataFrameDefault.register(pandas.DataFrame.nlargest)(\n                self, n=n, columns=columns, keep=keep\n            )\n\n    @doc_utils.add_refer_to(\"DataFrame.nsmallest\")\n    def nsmallest(self, n=5, columns=None, keep=\"first\"):\n        \"\"\"\n        Return the first `n` rows ordered by `columns` in ascending order.\n\n        Parameters\n        ----------\n        n : int, default: 5\n        columns : list of labels, optional\n            Column labels to order by.\n            (note: this parameter can be omitted only for a single-column query compilers\n            representing Series object, otherwise `columns` has to be specified).\n        keep : {\"first\", \"last\", \"all\"}, default: \"first\"\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        if columns is None:\n            return SeriesDefault.register(pandas.Series.nsmallest)(self, n=n, keep=keep)\n        else:\n            return DataFrameDefault.register(pandas.DataFrame.nsmallest)(\n                self, n=n, columns=columns, keep=keep\n            )\n\n    @doc_utils.add_refer_to(\"DataFrame.query\")\n    def rowwise_query(self, expr, **kwargs):\n        \"\"\"\n        Query columns of the QueryCompiler with a boolean expression row-wise.\n\n        Parameters\n        ----------\n        expr : str\n        **kwargs : dict\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing the rows where the boolean expression is satisfied.\n        \"\"\"\n        raise NotImplementedError(\n            \"Row-wise queries execution is not implemented for the selected backend.\"\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.eval\")\n    def eval(self, expr, **kwargs):\n        \"\"\"\n        Evaluate string expression on QueryCompiler columns.\n\n        Parameters\n        ----------\n        expr : str\n        **kwargs : dict\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing the result of evaluation.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.eval)(\n            self, expr=expr, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.mode\")\n    def mode(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Get the modes for every column or row.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n        numeric_only : bool\n        dropna : bool\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with modes calculated along given axis.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.mode)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.fillna\")\n    def fillna(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Replace NaN values using provided method.\n\n        Parameters\n        ----------\n        value : scalar or dict\n        method : {\"backfill\", \"bfill\", \"pad\", \"ffill\", None}\n        axis : {0, 1}\n        inplace : {False}\n            This parameter serves the compatibility purpose. Always has to be False.\n        limit : int, optional\n        downcast : dict, optional\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with all null values filled.\n        \"\"\"\n        squeeze_self = kwargs.pop(\"squeeze_self\", False)\n        squeeze_value = kwargs.pop(\"squeeze_value\", False)\n\n        def fillna(df, value, **kwargs):\n            if squeeze_self:\n                df = df.squeeze(axis=1)\n            if squeeze_value:\n                value = value.squeeze(axis=1)\n            return df.fillna(value, **kwargs)\n\n        return DataFrameDefault.register(fillna)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.rank\")\n    def rank(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Compute numerical rank along the specified axis.\n\n        By default, equal values are assigned a rank that is the average of the ranks\n        of those values, this behavior can be changed via `method` parameter.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n        method : {\"average\", \"min\", \"max\", \"first\", \"dense\"}\n        numeric_only : bool\n        na_option : {\"keep\", \"top\", \"bottom\"}\n        ascending : bool\n        pct : bool\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler of the same shape as `self`, where each element is the\n            numerical rank of the corresponding value along row or column.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.rank)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.sort_index\")\n    def sort_index(self, **kwargs):  # noqa: PR02\n        \"\"\"\n        Sort data by index or column labels.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n        level : int, label or list of such\n        ascending : bool\n        inplace : bool\n        kind : {\"quicksort\", \"mergesort\", \"heapsort\"}\n        na_position : {\"first\", \"last\"}\n        sort_remaining : bool\n        ignore_index : bool\n        key : callable(pandas.Index) -> pandas.Index, optional\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing the data sorted by columns or indices.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.sort_index)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.melt\")\n    def melt(self, *args, **kwargs):  # noqa: PR02\n        \"\"\"\n        Unpivot QueryCompiler data from wide to long format.\n\n        Parameters\n        ----------\n        id_vars : list of labels, optional\n        value_vars : list of labels, optional\n        var_name : label\n        value_name : label\n        col_level : int or label\n        ignore_index : bool\n        *args : iterable\n            Serves the compatibility purpose. Does not affect the result.\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with unpivoted data.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.melt)(self, *args, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.sort_values\")\n    def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):  # noqa: PR02\n        \"\"\"\n        Reorder the columns based on the lexicographic order of the given rows.\n\n        Parameters\n        ----------\n        rows : label or list of labels\n            The row or rows to sort by.\n        ascending : bool, default: True\n            Sort in ascending order (True) or descending order (False).\n        kind : {\"quicksort\", \"mergesort\", \"heapsort\"}\n        na_position : {\"first\", \"last\"}\n        ignore_index : bool\n        key : callable(pandas.Index) -> pandas.Index, optional\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler that contains result of the sort.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.sort_values)(\n            self, by=rows, axis=1, ascending=ascending, **kwargs\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.sort_values\")\n    def sort_rows_by_column_values(\n        self, columns, ascending=True, **kwargs\n    ):  # noqa: PR02\n        \"\"\"\n        Reorder the rows based on the lexicographic order of the given columns.\n\n        Parameters\n        ----------\n        columns : label or list of labels\n            The column or columns to sort by.\n        ascending : bool, default: True\n            Sort in ascending order (True) or descending order (False).\n        kind : {\"quicksort\", \"mergesort\", \"heapsort\"}\n        na_position : {\"first\", \"last\"}\n        ignore_index : bool\n        key : callable(pandas.Index) -> pandas.Index, optional\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler that contains result of the sort.\n        \"\"\"\n        # Avoid index/column name collisions by renaming and restoring after sorting\n        index_renaming = None\n        if is_scalar(columns):\n            columns = [columns]\n        if any(name in columns for name in self.index.names):\n            index_renaming = self.index.names\n            self.index = self.index.set_names([None] * len(self.index.names))\n        new_query_compiler = DataFrameDefault.register(pandas.DataFrame.sort_values)(\n            self, by=columns, axis=0, ascending=ascending, **kwargs\n        )\n        if index_renaming is not None:\n            new_query_compiler.index = new_query_compiler.index.set_names(\n                index_renaming\n            )\n        return new_query_compiler\n\n    # END Abstract map across rows/columns\n\n    # Map across rows/columns\n    # These operations require some global knowledge of the full column/row\n    # that is being operated on. This means that we have to put all of that\n    # data in the same place.\n    @doc_utils.doc_reduce_agg(\n        method=\"value at the given quantile\",\n        refer_to=\"quantile\",\n        params=\"\"\"\n        q : list-like\n        axis : {0, 1}\n        numeric_only : bool\n        interpolation : {\"linear\", \"lower\", \"higher\", \"midpoint\", \"nearest\"}\"\"\",\n        extra_params=[\"**kwargs\"],\n    )\n    def quantile_for_list_of_values(self, **kwargs):  # noqa: PR02\n        return DataFrameDefault.register(pandas.DataFrame.quantile)(self, **kwargs)\n\n    # END Abstract map across rows/columns\n\n    # Abstract __getitem__ methods\n    def getitem_array(self, key):\n        \"\"\"\n        Mask QueryCompiler with `key`.\n\n        Parameters\n        ----------\n        key : BaseQueryCompiler, np.ndarray or list of column labels\n            Boolean mask represented by QueryCompiler or ``np.ndarray`` of the same\n            shape as `self`, or enumerable of columns to pick.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New masked QueryCompiler.\n        \"\"\"\n        if isinstance(key, type(self)):\n            key = key.to_pandas().squeeze(axis=1)\n\n        def getitem_array(df, key):\n            return df[key]\n\n        return DataFrameDefault.register(getitem_array)(self, key)\n\n    def getitem_column_array(self, key, numeric=False, ignore_order=False):\n        \"\"\"\n        Get column data for target labels.\n\n        Parameters\n        ----------\n        key : list-like\n            Target labels by which to retrieve data.\n        numeric : bool, default: False\n            Whether or not the key passed in represents the numeric index\n            or the named index.\n        ignore_order : bool, default: False\n            Allow returning columns in an arbitrary order for the sake of performance.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler that contains specified columns.\n        \"\"\"\n\n        def get_column(df, key):\n            if numeric:\n                return df.iloc[:, key]\n            else:\n                return df[key]\n\n        return DataFrameDefault.register(get_column)(self, key=key)\n\n    def getitem_row_array(self, key):\n        \"\"\"\n        Get row data for target indices.\n\n        Parameters\n        ----------\n        key : list-like\n            Numeric indices of the rows to pick.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler that contains specified rows.\n        \"\"\"\n\n        def get_row(df, key):\n            return df.iloc[key]\n\n        return DataFrameDefault.register(get_row)(self, key=key)\n\n    def lookup(self, row_labels, col_labels):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Label-based \"fancy indexing\" function for ``DataFrame``.\n        \"\"\"\n        return self.default_to_pandas(pandas.DataFrame.lookup, row_labels, col_labels)\n\n    # END Abstract __getitem__ methods\n\n    # Abstract insert\n    # This method changes the shape of the resulting data. In Pandas, this\n    # operation is always inplace, but this object is immutable, so we just\n    # return a new one from here and let the front end handle the inplace\n    # update.\n    def insert(self, loc, column, value):\n        \"\"\"\n        Insert new column.\n\n        Parameters\n        ----------\n        loc : int\n            Insertion position.\n        column : label\n            Label of the new column.\n        value : One-column BaseQueryCompiler, 1D array or scalar\n            Data to fill new column with.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler with new column inserted.\n        \"\"\"\n\n        def inserter(df, loc, column, value):\n            if isinstance(value, pandas.DataFrame):\n                value = value.squeeze(axis=1)\n            df.insert(loc, column, value)\n            return df\n\n        return DataFrameDefault.register(inserter, inplace=True)(\n            self, loc=loc, column=column, value=value\n        )\n\n    # END Abstract insert\n\n    # __setitem__ methods\n    def setitem_bool(self, row_loc, col_loc, item):\n        \"\"\"\n        Set an item to the given location based on `row_loc` and `col_loc`.\n\n        Parameters\n        ----------\n        row_loc : BaseQueryCompiler\n            Query Compiler holding a Series of booleans.\n        col_loc : label\n            Column label in `self`.\n        item : scalar\n            An item to be set.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with the inserted item.\n\n        Notes\n        -----\n        Currently, this method is only used to set a scalar to the given location.\n        \"\"\"\n\n        def _set_item(df, row_loc, col_loc, item):\n            df.loc[row_loc.squeeze(axis=1), col_loc] = item\n            return df\n\n        return DataFrameDefault.register(_set_item)(\n            self, row_loc=row_loc, col_loc=col_loc, item=item\n        )\n\n    # END __setitem__ methods\n\n    # Abstract drop\n    def drop(self, index=None, columns=None, errors: str = \"raise\"):\n        \"\"\"\n        Drop specified rows or columns.\n\n        Parameters\n        ----------\n        index : list of labels, optional\n            Labels of rows to drop.\n        columns : list of labels, optional\n            Labels of columns to drop.\n        errors : str, default: \"raise\"\n            If 'ignore', suppress error and only existing labels are dropped.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with removed data.\n        \"\"\"\n        if index is None and columns is None:\n            return self\n        else:\n            return DataFrameDefault.register(pandas.DataFrame.drop)(\n                self, index=index, columns=columns, errors=errors\n            )\n\n    # END drop\n\n    # UDF (apply and agg) methods\n    # There is a wide range of behaviors that are supported, so a lot of the\n    # logic can get a bit convoluted.\n    def apply(self, func, axis, raw=False, result_type=None, *args, **kwargs):\n        \"\"\"\n        Apply passed function across given axis.\n\n        Parameters\n        ----------\n        func : callable(pandas.Series) -> scalar, str, list or dict of such\n            The function to apply to each column or row.\n        axis : {0, 1}\n            Target axis to apply the function along.\n            0 is for index, 1 is for columns.\n        raw : bool, default: False\n            Whether to pass a high-level Series object (False) or a raw representation\n            of the data (True).\n        result_type : {\"expand\", \"reduce\", \"broadcast\", None}, default: None\n            Determines how to treat list-like return type of the `func` (works only if\n            a single function was passed):\n\n            - \"expand\": expand list-like result into columns.\n            - \"reduce\": keep result into a single cell (opposite of \"expand\").\n            - \"broadcast\": broadcast result to original data shape (overwrite the existing column/row with the function result).\n            - None: use \"expand\" strategy if Series is returned, \"reduce\" otherwise.\n        *args : iterable\n            Positional arguments to pass to `func`.\n        **kwargs : dict\n            Keyword arguments to pass to `func`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler that contains the results of execution and is built by\n            the following rules:\n\n            - Index of the specified axis contains: the names of the passed functions if multiple\n              functions are passed, otherwise: indices of the `func` result if \"expand\" strategy\n              is used, indices of the original frame if \"broadcast\" strategy is used, a single\n              label `MODIN_UNNAMED_SERIES_LABEL` if \"reduce\" strategy is used.\n            - Labels of the opposite axis are preserved.\n            - Each element is the result of execution of `func` against\n              corresponding row/column.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.apply)(\n            self,\n            func=func,\n            axis=axis,\n            raw=raw,\n            result_type=result_type,\n            *args,\n            **kwargs,\n        )\n\n    def apply_on_series(self, func, *args, **kwargs):\n        \"\"\"\n        Apply passed function on underlying Series.\n\n        Parameters\n        ----------\n        func : callable(pandas.Series) -> scalar, str, list or dict of such\n            The function to apply to each row.\n        *args : iterable\n            Positional arguments to pass to `func`.\n        **kwargs : dict\n            Keyword arguments to pass to `func`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        assert self.is_series_like()\n\n        return SeriesDefault.register(pandas.Series.apply)(\n            self,\n            func=func,\n            *args,\n            **kwargs,\n        )\n\n    def explode(self, column):\n        \"\"\"\n        Explode the given columns.\n\n        Parameters\n        ----------\n        column : Union[Hashable, Sequence[Hashable]]\n            The columns to explode.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler that contains the results of execution. For each row\n            in the input QueryCompiler, if the selected columns each contain M\n            items, there will be M rows created by exploding the columns.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.explode)(self, column)\n\n    # END UDF\n\n    # Manual Partitioning methods (e.g. merge, groupby)\n    # These methods require some sort of manual partitioning due to their\n    # nature. They require certain data to exist on the same partition, and\n    # after the shuffle, there should be only a local map required.\n\n    # FIXME: `map_args` and `reduce_args` leaked there from `PandasQueryCompiler.groupby_*`,\n    # pandas storage format implements groupby via TreeReduce approach, but for other storage formats these\n    # parameters make no sense, they shouldn't be present in a base class.\n\n    @doc_utils.doc_groupby_method(\n        action=\"count non-null values\",\n        result=\"number of non-null values\",\n        refer_to=\"count\",\n    )\n    def groupby_count(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.count)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"check whether any element is True\",\n        result=\"boolean of whether there is any element which is True\",\n        refer_to=\"any\",\n    )\n    def groupby_any(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.any)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get the index of the minimum value\",\n        result=\"index of minimum value\",\n        refer_to=\"idxmin\",\n    )\n    def groupby_idxmin(\n        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.idxmin)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get the index of the maximum value\",\n        result=\"index of maximum value\",\n        refer_to=\"idxmax\",\n    )\n    def groupby_idxmax(\n        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.idxmax)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get the minimum value\", result=\"minimum value\", refer_to=\"min\"\n    )\n    def groupby_min(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.min)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(result=\"product\", refer_to=\"prod\")\n    def groupby_prod(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.prod)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get the maximum value\", result=\"maximum value\", refer_to=\"max\"\n    )\n    def groupby_max(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.max)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"check whether all elements are True\",\n        result=\"boolean of whether all elements are True\",\n        refer_to=\"all\",\n    )\n    def groupby_all(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.all)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(result=\"sum\", refer_to=\"sum\")\n    def groupby_sum(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.sum)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get the number of elements\",\n        result=\"number of elements\",\n        refer_to=\"size\",\n    )\n    def groupby_size(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        result = GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.size)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n            method=\"size\",\n        )\n        if not groupby_kwargs.get(\"as_index\", False):\n            # Renaming 'MODIN_UNNAMED_SERIES_LABEL' to a proper name\n            result.columns = result.columns[:-1].append(pandas.Index([\"size\"]))\n        return result\n\n    @doc_utils.add_refer_to(\"GroupBy.rolling\")\n    def groupby_rolling(\n        self,\n        by,\n        agg_func,\n        axis,\n        groupby_kwargs,\n        rolling_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        \"\"\"\n        Group QueryCompiler data and apply passed aggregation function to a rolling window in each group.\n\n        Parameters\n        ----------\n        by : BaseQueryCompiler, column or index label, Grouper or list of such\n            Object that determine groups.\n        agg_func : str, dict or callable(Series | DataFrame) -> scalar | Series | DataFrame\n            Function to apply to the GroupBy object.\n        axis : {0, 1}\n            Axis to group and apply aggregation function along.\n            0 is for index, when 1 is for columns.\n        groupby_kwargs : dict\n            GroupBy parameters as expected by ``modin.pandas.DataFrame.groupby`` signature.\n        rolling_kwargs : dict\n            Parameters to build a rolling window as expected by ``modin.pandas.window.RollingGroupby`` signature.\n        agg_args : list-like\n            Positional arguments to pass to the `agg_func`.\n        agg_kwargs : dict\n            Key arguments to pass to the `agg_func`.\n        drop : bool, default: False\n            If `by` is a QueryCompiler indicates whether or not by-data came\n            from the `self`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing the result of groupby aggregation.\n        \"\"\"\n        if isinstance(agg_func, str):\n            str_func = agg_func\n\n            def agg_func(window, *args, **kwargs):\n                return getattr(window, str_func)(*args, **kwargs)\n\n        else:\n            assert callable(agg_func)\n        return self.groupby_agg(\n            by=by,\n            agg_func=lambda grp, *args, **kwargs: agg_func(\n                grp.rolling(**rolling_kwargs), *args, **kwargs\n            ),\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            how=\"direct\",\n            drop=drop,\n        )\n\n    @doc_utils.add_refer_to(\"GroupBy.aggregate\")\n    def groupby_agg(\n        self,\n        by,\n        agg_func,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        how=\"axis_wise\",\n        drop=False,\n        series_groupby=False,\n    ):\n        \"\"\"\n        Group QueryCompiler data and apply passed aggregation function.\n\n        Parameters\n        ----------\n        by : BaseQueryCompiler, column or index label, Grouper or list of such\n            Object that determine groups.\n        agg_func : str, dict or callable(Series | DataFrame) -> scalar | Series | DataFrame\n            Function to apply to the GroupBy object.\n        axis : {0, 1}\n            Axis to group and apply aggregation function along.\n            0 is for index, when 1 is for columns.\n        groupby_kwargs : dict\n            GroupBy parameters as expected by ``modin.pandas.DataFrame.groupby`` signature.\n        agg_args : list-like\n            Positional arguments to pass to the `agg_func`.\n        agg_kwargs : dict\n            Key arguments to pass to the `agg_func`.\n        how : {'axis_wise', 'group_wise', 'transform'}, default: 'axis_wise'\n            How to apply passed `agg_func`:\n                - 'axis_wise': apply the function against each row/column.\n                - 'group_wise': apply the function against every group.\n                - 'transform': apply the function against every group and broadcast\n                  the result to the original Query Compiler shape.\n        drop : bool, default: False\n            If `by` is a QueryCompiler indicates whether or not by-data came\n            from the `self`.\n        series_groupby : bool, default: False\n            Whether we should treat `self` as Series when performing groupby.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing the result of groupby aggregation.\n        \"\"\"\n        if isinstance(by, type(self)) and len(by.columns) == 1:\n            by = by.columns[0] if drop else by.to_pandas().squeeze()\n        # converting QC 'by' to a list of column labels only if this 'by' comes from the self (if drop is True)\n        elif drop and isinstance(by, type(self)):\n            by = list(by.columns)\n\n        defaulter = SeriesGroupByDefault if series_groupby else GroupByDefault\n        return defaulter.register(defaulter.get_aggregation_method(how))(\n            self,\n            by=by,\n            agg_func=agg_func,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute the mean value\", result=\"mean value\", refer_to=\"mean\"\n    )\n    def groupby_mean(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"mean\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute unbiased skew\", result=\"unbiased skew\", refer_to=\"skew\"\n    )\n    def groupby_skew(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        if axis == 1:\n            # To avoid `ValueError: Operation skew does not support axis=1` due to the\n            # difference in the behavior of `groupby(...).skew(axis=1)` and\n            # `groupby(...).agg(\"skew\", axis=1)`.\n            return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.skew)(\n                self,\n                by=by,\n                axis=axis,\n                groupby_kwargs=groupby_kwargs,\n                agg_args=agg_args,\n                agg_kwargs=agg_kwargs,\n                drop=drop,\n            )\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"skew\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute cumulative count\",\n        result=\"count of all the previous values\",\n        refer_to=\"cumcount\",\n    )\n    def groupby_cumcount(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"cumcount\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute cumulative sum\",\n        result=\"sum of all the previous values\",\n        refer_to=\"cumsum\",\n    )\n    def groupby_cumsum(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"cumsum\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get cumulative maximum\",\n        result=\"maximum of all the previous values\",\n        refer_to=\"cummax\",\n    )\n    def groupby_cummax(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"cummax\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get cumulative minimum\",\n        result=\"minimum of all the previous values\",\n        refer_to=\"cummin\",\n    )\n    def groupby_cummin(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"cummin\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get cumulative production\",\n        result=\"production of all the previous values\",\n        refer_to=\"cumprod\",\n    )\n    def groupby_cumprod(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"cumprod\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute standard deviation\", result=\"standard deviation\", refer_to=\"std\"\n    )\n    def groupby_std(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"std\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute standard error\", result=\"standard error\", refer_to=\"sem\"\n    )\n    def groupby_sem(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"sem\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute numerical rank\", result=\"numerical rank\", refer_to=\"rank\"\n    )\n    def groupby_rank(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"rank\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute variance\", result=\"variance\", refer_to=\"var\"\n    )\n    def groupby_var(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"var\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute correlation\", result=\"correlation\", refer_to=\"corr\"\n    )\n    def groupby_corr(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"corr\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute covariance\", result=\"covariance\", refer_to=\"cov\"\n    )\n    def groupby_cov(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"cov\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get the number of unique values\",\n        result=\"number of unique values\",\n        refer_to=\"nunique\",\n    )\n    def groupby_nunique(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"nunique\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get the median value\", result=\"median value\", refer_to=\"median\"\n    )\n    def groupby_median(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"median\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"compute specified quantile\",\n        result=\"quantile value\",\n        refer_to=\"quantile\",\n    )\n    def groupby_quantile(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"quantile\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"fill NaN values\",\n        result=\"`fill_value` if it was NaN, original value otherwise\",\n        refer_to=\"fillna\",\n    )\n    def groupby_fillna(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"fillna\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    def groupby_diff(\n        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False\n    ):  # noqa: GL08\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"diff\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    def groupby_pct_change(\n        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False\n    ):  # noqa: GL08\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"pct_change\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get data types\", result=\"data type\", refer_to=\"dtypes\"\n    )\n    def groupby_dtypes(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"dtypes\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"construct DataFrame from group with provided name\",\n        result=\"DataFrame for given group\",\n        refer_to=\"get_group\",\n    )\n    def groupby_get_group(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"get_group\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"shift data with the specified settings\",\n        result=\"shifted value\",\n        refer_to=\"shift\",\n    )\n    def groupby_shift(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"shift\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get first value in group\",\n        result=\"first value\",\n        refer_to=\"first\",\n    )\n    def groupby_first(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"first\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get last value in group\",\n        result=\"last value\",\n        refer_to=\"last\",\n    )\n    def groupby_last(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"last\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get first n values of a group\",\n        result=\"first n values of a group\",\n        refer_to=\"head\",\n    )\n    def groupby_head(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"head\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get last n values in group\",\n        result=\"last n values\",\n        refer_to=\"tail\",\n    )\n    def groupby_tail(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"tail\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get nth value in group\",\n        result=\"nth value\",\n        refer_to=\"nth\",\n    )\n    def groupby_nth(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"nth\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get group number of each value\",\n        result=\"group number of each value\",\n        refer_to=\"ngroup\",\n    )\n    def groupby_ngroup(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"ngroup\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get n largest values in group\",\n        result=\"n largest values\",\n        refer_to=\"nlargest\",\n    )\n    def groupby_nlargest(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"nlargest\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n            series_groupby=True,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get n nsmallest values in group\",\n        result=\"n nsmallest values\",\n        refer_to=\"nsmallest\",\n    )\n    def groupby_nsmallest(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"nsmallest\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n            series_groupby=True,\n        )\n\n    @doc_utils.doc_groupby_method(\n        action=\"get unique values in group\",\n        result=\"unique values\",\n        refer_to=\"unique\",\n    )\n    def groupby_unique(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            agg_func=\"unique\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n            series_groupby=True,\n        )\n\n    def groupby_ohlc(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        is_df,\n    ):  # noqa: GL08\n        if not is_df:\n            return self.groupby_agg(\n                by=by,\n                agg_func=\"ohlc\",\n                axis=axis,\n                groupby_kwargs=groupby_kwargs,\n                agg_args=agg_args,\n                agg_kwargs=agg_kwargs,\n                series_groupby=True,\n            )\n        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.ohlc)(\n            self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=True,\n        )\n\n    # END Manual Partitioning methods\n\n    @doc_utils.add_refer_to(\"DataFrame.unstack\")\n    def unstack(self, level, fill_value):\n        \"\"\"\n        Pivot a level of the (necessarily hierarchical) index labels.\n\n        Parameters\n        ----------\n        level : int or label\n        fill_value : scalar or dict\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.unstack)(\n            self, level=level, fill_value=fill_value\n        )\n\n    @doc_utils.add_refer_to(\"wide_to_long\")\n    def wide_to_long(self, **kwargs):  # noqa: PR01\n        \"\"\"\n        Unpivot a DataFrame from wide to long format.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.wide_to_long)(self, **kwargs)\n\n    @doc_utils.add_refer_to(\"DataFrame.pivot\")\n    def pivot(self, index, columns, values):\n        \"\"\"\n        Produce pivot table based on column values.\n\n        Parameters\n        ----------\n        index : label or list of such, pandas.Index, optional\n        columns : label or list of such\n        values : label or list of such, optional\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing pivot table.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.pivot)(\n            self, index=index, columns=columns, values=values\n        )\n\n    @doc_utils.add_refer_to(\"DataFrame.pivot_table\")\n    def pivot_table(\n        self,\n        index,\n        values,\n        columns,\n        aggfunc,\n        fill_value,\n        margins,\n        dropna,\n        margins_name,\n        observed,\n        sort,\n    ):\n        \"\"\"\n        Create a spreadsheet-style pivot table from underlying data.\n\n        Parameters\n        ----------\n        index : label, pandas.Grouper, array or list of such\n        values : label, optional\n        columns : column, pandas.Grouper, array or list of such\n        aggfunc : callable(pandas.Series) -> scalar, dict of list of such\n        fill_value : scalar, optional\n        margins : bool\n        dropna : bool\n        margins_name : str\n        observed : bool\n        sort : bool\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.pivot_table)(\n            self,\n            index=index,\n            values=values,\n            columns=columns,\n            aggfunc=aggfunc,\n            fill_value=fill_value,\n            margins=margins,\n            dropna=dropna,\n            margins_name=margins_name,\n            observed=observed,\n            sort=sort,\n        )\n\n    @doc_utils.add_refer_to(\"get_dummies\")\n    def get_dummies(self, columns, **kwargs):  # noqa: PR02\n        \"\"\"\n        Convert categorical variables to dummy variables for certain columns.\n\n        Parameters\n        ----------\n        columns : label or list of such\n            Columns to convert.\n        prefix : str or list of such\n        prefix_sep : str\n        dummy_na : bool\n        drop_first : bool\n        dtype : dtype\n        **kwargs : dict\n            Serves the compatibility purpose. Does not affect the result.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with categorical variables converted to dummy.\n        \"\"\"\n\n        def get_dummies(df, columns, **kwargs):\n            return pandas.get_dummies(df, columns=columns, **kwargs)\n\n        return DataFrameDefault.register(get_dummies)(self, columns=columns, **kwargs)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.repeat\")\n    def repeat(self, repeats):\n        \"\"\"\n        Repeat each element of one-column QueryCompiler given number of times.\n\n        Parameters\n        ----------\n        repeats : int or array of ints\n            The number of repetitions for each element. This should be a\n            non-negative integer. Repeating 0 times will return an empty\n            QueryCompiler.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with repeated elements.\n        \"\"\"\n        return SeriesDefault.register(pandas.Series.repeat)(self, repeats=repeats)\n\n    @doc_utils.add_refer_to(\"cut\")\n    def cut(\n        self,\n        bins,\n        **kwargs,\n    ):\n        \"\"\"\n        Bin values into discrete intervals.\n\n        Parameters\n        ----------\n        bins : int, array of ints, or IntervalIndex\n            The criteria to bin by.\n        **kwargs : dict\n            The keyword arguments to pass through.\n\n        Returns\n        -------\n        BaseQueryCompiler or np.ndarray or list[np.ndarray]\n            Returns the result of pd.cut.\n        \"\"\"\n\n        def squeeze_and_cut(df, *args, **kwargs):\n            # We need this function to ensure we squeeze our internal\n            # representation (a dataframe) to a Series.\n            series = df.squeeze(axis=1)\n            return pandas.cut(series, *args, **kwargs)\n\n        # We use `default_to_pandas` here since the type and number of\n        # results can change depending on the input arguments.\n        return self.default_to_pandas(squeeze_and_cut, bins, **kwargs)\n\n    # Indexing\n\n    index = property(_get_axis(0), _set_axis(0))\n    columns = property(_get_axis(1), _set_axis(1))\n\n    def get_axis(self, axis):\n        \"\"\"\n        Return index labels of the specified axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to return labels on.\n            0 is for index, when 1 is for columns.\n\n        Returns\n        -------\n        pandas.Index\n        \"\"\"\n        return self.index if axis == 0 else self.columns\n\n    def get_axis_len(self, axis: Literal[0, 1]) -> int:\n        \"\"\"\n        Return the length of the specified axis.\n\n        A query compiler may choose to override this method if it has a more efficient way\n        of computing the length of an axis without materializing it.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to return labels on.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        return len(self.get_axis(axis))\n\n    def take_2d_labels(\n        self,\n        index,\n        columns,\n    ):\n        \"\"\"\n        Take the given labels.\n\n        Parameters\n        ----------\n        index : slice, scalar, list-like, or BaseQueryCompiler\n            Labels of rows to grab.\n        columns : slice, scalar, list-like, or BaseQueryCompiler\n            Labels of columns to grab.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Subset of this QueryCompiler.\n        \"\"\"\n        row_lookup, col_lookup = self.get_positions_from_labels(index, columns)\n        if isinstance(row_lookup, slice):\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=row_lookup != slice(None),\n                extra_log=f\"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}\",\n            )\n            row_lookup = None\n        if isinstance(col_lookup, slice):\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=col_lookup != slice(None),\n                extra_log=f\"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}\",\n            )\n            col_lookup = None\n        return self.take_2d_positional(row_lookup, col_lookup)\n\n    def get_positions_from_labels(self, row_loc, col_loc):\n        \"\"\"\n        Compute index and column positions from their respective locators.\n\n        Inputs to this method are arguments the the pandas user could pass to loc.\n        This function will compute the corresponding index and column positions\n        that the user could equivalently pass to iloc.\n\n        Parameters\n        ----------\n        row_loc : scalar, slice, list, array or tuple\n            Row locator.\n        col_loc : scalar, slice, list, array or tuple\n            Columns locator.\n\n        Returns\n        -------\n        row_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise\n            List of index labels.\n        col_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise\n            List of columns labels.\n\n        Notes\n        -----\n        Usage of `slice(None)` as a resulting lookup is a hack to pass information about\n        full-axis grab without computing actual indices that triggers lazy computations.\n        Ideally, this API should get rid of using slices as indexers and either use a\n        common ``Indexer`` object or range and ``np.ndarray`` only.\n        \"\"\"\n        from modin.pandas.indexing import (\n            boolean_mask_to_numeric,\n            is_boolean_array,\n            is_list_like,\n            is_range_like,\n        )\n\n        lookups = []\n        for axis, axis_loc in enumerate((row_loc, col_loc)):\n            if is_scalar(axis_loc):\n                axis_loc = np.array([axis_loc])\n            if isinstance(axis_loc, pandas.RangeIndex):\n                axis_lookup = axis_loc\n            elif isinstance(axis_loc, slice) or is_range_like(axis_loc):\n                if isinstance(axis_loc, slice) and axis_loc == slice(None):\n                    axis_lookup = axis_loc\n                else:\n                    axis_labels = self.get_axis(axis)\n                    # `slice_indexer` returns a fully-defined numeric slice for a non-fully-defined labels-based slice\n                    # RangeIndex and range use a semi-open interval, while\n                    # slice_indexer uses a closed interval. Subtract 1 step from the\n                    # end of the interval to get the equivalent closed interval.\n                    if axis_loc.stop is None or not is_number(axis_loc.stop):\n                        slice_stop = axis_loc.stop\n                    else:\n                        slice_stop = axis_loc.stop - (\n                            0 if axis_loc.step is None else axis_loc.step\n                        )\n                    axis_lookup = axis_labels.slice_indexer(\n                        axis_loc.start,\n                        slice_stop,\n                        axis_loc.step,\n                    )\n                    # Converting negative indices to their actual positions:\n                    axis_lookup = pandas.RangeIndex(\n                        start=(\n                            axis_lookup.start\n                            if axis_lookup.start >= 0\n                            else axis_lookup.start + len(axis_labels)\n                        ),\n                        stop=(\n                            axis_lookup.stop\n                            if axis_lookup.stop >= 0\n                            else axis_lookup.stop + len(axis_labels)\n                        ),\n                        step=axis_lookup.step,\n                    )\n            elif self.has_multiindex(axis):\n                # `Index.get_locs` raises an IndexError by itself if missing labels were provided,\n                # we don't have to do missing-check for the received `axis_lookup`.\n                if isinstance(axis_loc, pandas.MultiIndex):\n                    axis_lookup = self.get_axis(axis).get_indexer_for(axis_loc)\n                else:\n                    axis_lookup = self.get_axis(axis).get_locs(axis_loc)\n            elif is_boolean_array(axis_loc):\n                axis_lookup = boolean_mask_to_numeric(axis_loc)\n            else:\n                axis_labels = self.get_axis(axis)\n                if is_list_like(axis_loc) and not isinstance(\n                    axis_loc, (np.ndarray, pandas.Index)\n                ):\n                    # `Index.get_indexer_for` works much faster with numpy arrays than with python lists,\n                    # so although we lose some time here on converting to numpy, `Index.get_indexer_for`\n                    # speedup covers the loss that we gain here.\n                    axis_loc = np.array(axis_loc, dtype=axis_labels.dtype)\n                axis_lookup = axis_labels.get_indexer_for(axis_loc)\n                # `Index.get_indexer_for` sets -1 value for missing labels, we have to verify whether\n                # there are any -1 in the received indexer to raise a KeyError here.\n                missing_mask = axis_lookup == -1\n                if missing_mask.any():\n                    missing_labels = (\n                        axis_loc[missing_mask]\n                        if is_list_like(axis_loc)\n                        # If `axis_loc` is not a list-like then we can't select certain\n                        # labels that are missing and so printing the whole indexer\n                        else axis_loc\n                    )\n                    raise KeyError(missing_labels)\n\n            if isinstance(axis_lookup, pandas.Index) and not is_range_like(axis_lookup):\n                axis_lookup = axis_lookup.values\n\n            lookups.append(axis_lookup)\n        return lookups\n\n    def take_2d_positional(self, index=None, columns=None):\n        \"\"\"\n        Index QueryCompiler with passed keys.\n\n        Parameters\n        ----------\n        index : list-like of ints, optional\n            Positional indices of rows to grab.\n        columns : list-like of ints, optional\n            Positional indices of columns to grab.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New masked QueryCompiler.\n        \"\"\"\n        index = slice(None) if index is None else index\n        columns = slice(None) if columns is None else columns\n\n        def applyer(df):\n            return df.iloc[index, columns]\n\n        return DataFrameDefault.register(applyer)(self)\n\n    def insert_item(self, axis, loc, value, how=\"inner\", replace=False):\n        \"\"\"\n        Insert rows/columns defined by `value` at the specified position.\n\n        If frames are not aligned along specified axis, perform frames alignment first.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to insert along. 0 means insert rows, when 1 means insert columns.\n        loc : int\n            Position to insert `value`.\n        value : BaseQueryCompiler\n            Rows/columns to insert.\n        how : {\"inner\", \"outer\", \"left\", \"right\"}, default: \"inner\"\n            Type of join that will be used if frames are not aligned.\n        replace : bool, default: False\n            Whether to insert item after column/row at `loc-th` position or to replace\n            it by `value`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with inserted values.\n        \"\"\"\n        assert isinstance(value, type(self))\n\n        def mask(idx):\n            if len(idx) == len(self.get_axis(axis)):\n                return self\n            return (\n                self.getitem_column_array(idx, numeric=True)\n                if axis\n                else self.getitem_row_array(idx)\n            )\n\n        if 0 <= loc < len(self.get_axis(axis)):\n            first_mask = mask(list(range(loc)))\n            second_mask_loc = loc + 1 if replace else loc\n            second_mask = mask(list(range(second_mask_loc, len(self.get_axis(axis)))))\n            return first_mask.concat(axis, [value, second_mask], join=how, sort=False)\n        else:\n            return self.concat(axis, [value], join=how, sort=False)\n\n    def setitem(self, axis, key, value):\n        \"\"\"\n        Set the row/column defined by `key` to the `value` provided.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to set `value` along. 0 means set row, 1 means set column.\n        key : label\n            Row/column label to set `value` in.\n        value : BaseQueryCompiler, list-like or scalar\n            Define new row/column value.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with updated `key` value.\n        \"\"\"\n\n        def setitem(df, axis, key, value):\n            if is_scalar(key) and isinstance(value, pandas.DataFrame):\n                value = value.squeeze()\n            if not axis:\n                df[key] = value\n            else:\n                df.loc[key] = value\n            return df\n\n        return DataFrameDefault.register(setitem)(self, axis=axis, key=key, value=value)\n\n    def write_items(\n        self, row_numeric_index, col_numeric_index, item, need_columns_reindex=True\n    ):\n        \"\"\"\n        Update QueryCompiler elements at the specified positions by passed values.\n\n        In contrast to ``setitem`` this method allows to do 2D assignments.\n\n        Parameters\n        ----------\n        row_numeric_index : list of ints\n            Row positions to write value.\n        col_numeric_index : list of ints\n            Column positions to write value.\n        item : Any\n            Values to write. If not a scalar will be broadcasted according to\n            `row_numeric_index` and `col_numeric_index`.\n        need_columns_reindex : bool, default: True\n            In the case of assigning columns to a dataframe (broadcasting is\n            part of the flow), reindexing is not needed.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with updated values.\n        \"\"\"\n        # We have to keep this import away from the module level to avoid circular import\n        from modin.pandas.utils import broadcast_item, is_scalar\n\n        if not isinstance(row_numeric_index, slice):\n            row_numeric_index = list(row_numeric_index)\n        if not isinstance(col_numeric_index, slice):\n            col_numeric_index = list(col_numeric_index)\n\n        def write_items(df, broadcasted_items):\n            if isinstance(df.iloc[row_numeric_index, col_numeric_index], pandas.Series):\n                broadcasted_items = broadcasted_items.squeeze()\n            df.iloc[row_numeric_index, col_numeric_index] = broadcasted_items\n            return df\n\n        if not is_scalar(item):\n            broadcasted_item, _, _, _ = broadcast_item(\n                self,\n                row_numeric_index,\n                col_numeric_index,\n                item,\n                need_columns_reindex=need_columns_reindex,\n                sort_lookups_and_item=False,\n            )\n        else:\n            broadcasted_item = item\n\n        return DataFrameDefault.register(write_items)(\n            self, broadcasted_items=broadcasted_item\n        )\n\n    # END Abstract methods for QueryCompiler\n\n    @cached_property\n    def __constructor__(self) -> type[Self]:\n        \"\"\"\n        Get query compiler constructor.\n\n        By default, constructor method will invoke an init.\n\n        Returns\n        -------\n        callable\n        \"\"\"\n        return type(self)\n\n    # __delitem__\n    # This will change the shape of the resulting data.\n    def delitem(self, key):\n        \"\"\"\n        Drop `key` column.\n\n        Parameters\n        ----------\n        key : label\n            Column name to drop.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler without `key` column.\n        \"\"\"\n        return self.drop(columns=[key])\n\n    # END __delitem__\n\n    def has_multiindex(self, axis=0):\n        \"\"\"\n        Check if specified axis is indexed by MultiIndex.\n\n        Parameters\n        ----------\n        axis : {0, 1}, default: 0\n            The axis to check (0 - index, 1 - columns).\n\n        Returns\n        -------\n        bool\n            True if index at specified axis is MultiIndex and False otherwise.\n        \"\"\"\n        if axis == 0:\n            return isinstance(self.index, pandas.MultiIndex)\n        assert axis == 1\n        return isinstance(self.columns, pandas.MultiIndex)\n\n    @property\n    def frame_has_materialized_dtypes(self) -> bool:\n        \"\"\"\n        Check if the underlying dataframe has materialized dtypes.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._modin_frame.has_materialized_dtypes\n\n    @property\n    def frame_has_materialized_columns(self) -> bool:\n        \"\"\"\n        Check if the underlying dataframe has materialized columns.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._modin_frame.has_materialized_columns\n\n    @property\n    def frame_has_materialized_index(self) -> bool:\n        \"\"\"\n        Check if the underlying dataframe has materialized index.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._modin_frame.has_materialized_index\n\n    def set_frame_dtypes_cache(self, dtypes):\n        \"\"\"\n        Set dtypes cache for the underlying dataframe frame.\n\n        Parameters\n        ----------\n        dtypes : pandas.Series, ModinDtypes, callable or None\n        \"\"\"\n        self._modin_frame.set_dtypes_cache(dtypes)\n\n    def set_frame_index_cache(self, index):\n        \"\"\"\n        Set index cache for underlying dataframe.\n\n        Parameters\n        ----------\n        index : sequence, callable or None\n        \"\"\"\n        self._modin_frame.set_index_cache(index)\n\n    def set_frame_columns_cache(self, index):\n        \"\"\"\n        Set columns cache for underlying dataframe.\n\n        Parameters\n        ----------\n        index : sequence, callable or None\n        \"\"\"\n        self._modin_frame.set_columns_cache(index)\n\n    @property\n    def frame_has_index_cache(self):\n        \"\"\"\n        Check if the index cache exists for underlying dataframe.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._modin_frame.has_index_cache\n\n    @property\n    def frame_has_columns_cache(self):\n        \"\"\"\n        Check if the columns cache exists for underlying dataframe.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._modin_frame.has_columns_cache\n\n    @property\n    def frame_has_dtypes_cache(self) -> bool:\n        \"\"\"\n        Check if the dtypes cache exists for the underlying dataframe.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self._modin_frame.has_dtypes_cache\n\n    def get_index_name(self, axis=0):\n        \"\"\"\n        Get index name of specified axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}, default: 0\n            Axis to get index name on.\n\n        Returns\n        -------\n        hashable\n            Index name, None for MultiIndex.\n        \"\"\"\n        return self.get_axis(axis).name\n\n    def set_index_name(self, name, axis=0):\n        \"\"\"\n        Set index name for the specified axis.\n\n        Parameters\n        ----------\n        name : hashable\n            New index name.\n        axis : {0, 1}, default: 0\n            Axis to set name along.\n        \"\"\"\n        self.get_axis(axis).name = name\n\n    def get_index_names(self, axis=0):\n        \"\"\"\n        Get index names of specified axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}, default: 0\n            Axis to get index names on.\n\n        Returns\n        -------\n        list\n            Index names.\n        \"\"\"\n        return self.get_axis(axis).names\n\n    def set_index_names(self, names, axis=0):\n        \"\"\"\n        Set index names for the specified axis.\n\n        Parameters\n        ----------\n        names : list\n            New index names.\n        axis : {0, 1}, default: 0\n            Axis to set names along.\n        \"\"\"\n        self.get_axis(axis).names = names\n\n    def get_dtypes_set(self):\n        \"\"\"\n        Get a set of dtypes that are in this query compiler.\n\n        Returns\n        -------\n        set\n        \"\"\"\n        return set(self.dtypes.values)\n\n    # DateTime methods\n    def between_time(self, **kwargs):  # noqa: PR01\n        \"\"\"\n        Select values between particular times of the day (e.g., 9:00-9:30 AM).\n\n        By setting start_time to be later than end_time, you can get the times that are not between the two times.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.between_time)(self, **kwargs)\n\n    def shift(\n        self,\n        periods,\n        freq,\n        axis,\n        fill_value,\n    ):  # noqa: GL08\n        return DataFrameDefault.register(pandas.DataFrame.shift)(\n            self, periods, freq, axis, fill_value\n        )\n\n    def tz_convert(\n        self,\n        tz,\n        axis=0,\n        level=None,\n        copy=True,\n    ):\n        \"\"\"\n        Convert tz-aware axis to target time zone.\n\n        Parameters\n        ----------\n        tz : str or tzinfo object or None\n            Target time zone. Passing None will convert to UTC\n            and remove the timezone information.\n        axis : int, default: 0\n            The axis to localize.\n        level : int, str, default: None\n            If axis is a MultiIndex, convert a specific level. Otherwise must be None.\n        copy : bool, default: True\n            Also make a copy of the underlying data.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            A new query compiler with the converted axis.\n        \"\"\"\n        if level is not None:\n            new_labels = (\n                pandas.Series(index=self.get_axis(axis))\n                .tz_convert(tz, level=level)\n                .index\n            )\n        else:\n            new_labels = self.get_axis(axis).tz_convert(tz)\n        obj = self.copy() if copy else self\n        if axis == 0:\n            obj.index = new_labels\n        else:\n            obj.columns = new_labels\n        return obj\n\n    def tz_localize(\n        self, tz, axis=0, level=None, copy=True, ambiguous=\"raise\", nonexistent=\"raise\"\n    ):\n        \"\"\"\n        Localize tz-naive index of a Series or DataFrame to target time zone.\n\n        Parameters\n        ----------\n        tz : tzstr or tzinfo or None\n            Time zone to localize. Passing None will remove the time zone\n            information and preserve local time.\n        axis : int, default: 0\n            The axis to localize.\n        level : int, str, default: None\n            If axis is a MultiIndex, localize a specific level. Otherwise must be None.\n        copy : bool, default: True\n            Also make a copy of the underlying data.\n        ambiguous : str, bool-ndarray, NaT, default: \"raise\"\n            Behaviour on ambiguous times.\n        nonexistent : str, default: \"raise\"\n            What to do with nonexistent times.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            A new query compiler with the localized axis.\n        \"\"\"\n        new_labels = (\n            pandas.Series(index=self.get_axis(axis))\n            .tz_localize(\n                tz,\n                axis=axis,\n                level=level,\n                copy=False,\n                ambiguous=ambiguous,\n                nonexistent=nonexistent,\n            )\n            .index\n        )\n        obj = self.copy() if copy else self\n        if axis == 0:\n            obj.index = new_labels\n        else:\n            obj.columns = new_labels\n        return obj\n\n    @doc_utils.doc_dt_round(refer_to=\"ceil\")\n    def dt_ceil(self, freq, ambiguous=\"raise\", nonexistent=\"raise\"):\n        return DateTimeDefault.register(pandas.Series.dt.ceil)(\n            self, freq, ambiguous, nonexistent\n        )\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.components\")\n    def dt_components(self):\n        \"\"\"\n        Spread each date-time value into its components (days, hours, minutes...).\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.components)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the date without timezone information\", refer_to=\"date\"\n    )\n    def dt_date(self):\n        return DateTimeDefault.register(pandas.Series.dt.date)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"day component\", refer_to=\"day\")\n    def dt_day(self):\n        return DateTimeDefault.register(pandas.Series.dt.day)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"day name\", refer_to=\"day_name\", params=\"locale : str, optional\"\n    )\n    def dt_day_name(self, locale=None):\n        return DateTimeDefault.register(pandas.Series.dt.day_name)(self, locale)\n\n    @doc_utils.doc_dt_timestamp(prop=\"integer day of week\", refer_to=\"dayofweek\")\n    # FIXME: `dt_dayofweek` is an alias for `dt_weekday`, one of them should\n    # be removed (Modin issue #3107).\n    def dt_dayofweek(self):\n        return DateTimeDefault.register(pandas.Series.dt.dayofweek)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"day of year\", refer_to=\"dayofyear\")\n    def dt_dayofyear(self):\n        return DateTimeDefault.register(pandas.Series.dt.dayofyear)(self)\n\n    @doc_utils.doc_dt_interval(prop=\"days\", refer_to=\"days\")\n    def dt_days(self):\n        return DateTimeDefault.register(pandas.Series.dt.days)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"number of days in month\", refer_to=\"days_in_month\"\n    )\n    # FIXME: `dt_days_in_month` is an alias for `dt_daysinmonth`, one of them should\n    # be removed (Modin issue #3107).\n    def dt_days_in_month(self):\n        return DateTimeDefault.register(pandas.Series.dt.days_in_month)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"number of days in month\", refer_to=\"daysinmonth\")\n    def dt_daysinmonth(self):\n        return DateTimeDefault.register(pandas.Series.dt.daysinmonth)(self)\n\n    @doc_utils.doc_dt_period(prop=\"the timestamp of end time\", refer_to=\"end_time\")\n    def dt_end_time(self):\n        return DateTimeDefault.register(pandas.Series.dt.end_time)(self)\n\n    @doc_utils.doc_dt_round(refer_to=\"floor\")\n    def dt_floor(self, freq, ambiguous=\"raise\", nonexistent=\"raise\"):\n        return DateTimeDefault.register(pandas.Series.dt.floor)(\n            self, freq, ambiguous, nonexistent\n        )\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.freq\")\n    def dt_freq(self):\n        \"\"\"\n        Get the time frequency of the underlying time-series data.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing a single value, the frequency of the data.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.freq)(self)\n\n    @doc_utils.add_refer_to(\"Series.dt.unit\")\n    def dt_unit(self):  # noqa: RT01\n        return DateTimeDefault.register(pandas.Series.dt.unit)(self)\n\n    @doc_utils.add_refer_to(\"Series.dt.as_unit\")\n    def dt_as_unit(self, *args, **kwargs):  # noqa: PR01, RT01\n        return DateTimeDefault.register(pandas.Series.dt.as_unit)(self, *args, **kwargs)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"Calculate year, week, and day according to the ISO 8601 standard.\",\n        refer_to=\"isocalendar\",\n    )\n    def dt_isocalendar(self):\n        return DateTimeDefault.register(pandas.Series.dt.isocalendar)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"hour\", refer_to=\"hour\")\n    def dt_hour(self):\n        return DateTimeDefault.register(pandas.Series.dt.hour)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the boolean of whether corresponding year is leap\",\n        refer_to=\"is_leap_year\",\n    )\n    def dt_is_leap_year(self):\n        return DateTimeDefault.register(pandas.Series.dt.is_leap_year)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the boolean of whether the date is the last day of the month\",\n        refer_to=\"is_month_end\",\n    )\n    def dt_is_month_end(self):\n        return DateTimeDefault.register(pandas.Series.dt.is_month_end)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the boolean of whether the date is the first day of the month\",\n        refer_to=\"is_month_start\",\n    )\n    def dt_is_month_start(self):\n        return DateTimeDefault.register(pandas.Series.dt.is_month_start)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the boolean of whether the date is the last day of the quarter\",\n        refer_to=\"is_quarter_end\",\n    )\n    def dt_is_quarter_end(self):\n        return DateTimeDefault.register(pandas.Series.dt.is_quarter_end)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the boolean of whether the date is the first day of the quarter\",\n        refer_to=\"is_quarter_start\",\n    )\n    def dt_is_quarter_start(self):\n        return DateTimeDefault.register(pandas.Series.dt.is_quarter_start)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the boolean of whether the date is the last day of the year\",\n        refer_to=\"is_year_end\",\n    )\n    def dt_is_year_end(self):\n        return DateTimeDefault.register(pandas.Series.dt.is_year_end)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the boolean of whether the date is the first day of the year\",\n        refer_to=\"is_year_start\",\n    )\n    def dt_is_year_start(self):\n        return DateTimeDefault.register(pandas.Series.dt.is_year_start)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"microseconds component\", refer_to=\"microsecond\")\n    def dt_microsecond(self):\n        return DateTimeDefault.register(pandas.Series.dt.microsecond)(self)\n\n    @doc_utils.doc_dt_interval(prop=\"microseconds component\", refer_to=\"microseconds\")\n    def dt_microseconds(self):\n        return DateTimeDefault.register(pandas.Series.dt.microseconds)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"minute component\", refer_to=\"minute\")\n    def dt_minute(self):\n        return DateTimeDefault.register(pandas.Series.dt.minute)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"month component\", refer_to=\"month\")\n    def dt_month(self):\n        return DateTimeDefault.register(pandas.Series.dt.month)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"the month name\", refer_to=\"month name\", params=\"locale : str, optional\"\n    )\n    def dt_month_name(self, locale=None):\n        return DateTimeDefault.register(pandas.Series.dt.month_name)(self, locale)\n\n    @doc_utils.doc_dt_timestamp(prop=\"nanoseconds component\", refer_to=\"nanosecond\")\n    def dt_nanosecond(self):\n        return DateTimeDefault.register(pandas.Series.dt.nanosecond)(self)\n\n    @doc_utils.doc_dt_interval(prop=\"nanoseconds component\", refer_to=\"nanoseconds\")\n    def dt_nanoseconds(self):\n        return DateTimeDefault.register(pandas.Series.dt.nanoseconds)(self)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.normalize\")\n    def dt_normalize(self):\n        \"\"\"\n        Set the time component of each date-time value to midnight.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing date-time values with midnight time.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.normalize)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"quarter component\", refer_to=\"quarter\")\n    def dt_quarter(self):\n        return DateTimeDefault.register(pandas.Series.dt.quarter)(self)\n\n    @doc_utils.doc_dt_period(prop=\"the fiscal year\", refer_to=\"qyear\")\n    def dt_qyear(self):\n        return DateTimeDefault.register(pandas.Series.dt.qyear)(self)\n\n    @doc_utils.doc_dt_round(refer_to=\"round\")\n    def dt_round(self, freq, ambiguous=\"raise\", nonexistent=\"raise\"):\n        return DateTimeDefault.register(pandas.Series.dt.round)(\n            self, freq, ambiguous, nonexistent\n        )\n\n    @doc_utils.doc_dt_timestamp(prop=\"seconds component\", refer_to=\"second\")\n    def dt_second(self):\n        return DateTimeDefault.register(pandas.Series.dt.second)(self)\n\n    @doc_utils.doc_dt_interval(prop=\"seconds component\", refer_to=\"seconds\")\n    def dt_seconds(self):\n        return DateTimeDefault.register(pandas.Series.dt.seconds)(self)\n\n    @doc_utils.doc_dt_period(prop=\"the timestamp of start time\", refer_to=\"start_time\")\n    def dt_start_time(self):\n        return DateTimeDefault.register(pandas.Series.dt.start_time)(self)\n\n    @doc_utils.add_refer_to(\"Series.dt.strftime\")\n    def dt_strftime(self, date_format):\n        \"\"\"\n        Format underlying date-time data using specified format.\n\n        Parameters\n        ----------\n        date_format : str\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing formatted date-time values.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.strftime)(self, date_format)\n\n    @doc_utils.doc_dt_timestamp(prop=\"time component\", refer_to=\"time\")\n    def dt_time(self):\n        return DateTimeDefault.register(pandas.Series.dt.time)(self)\n\n    @doc_utils.doc_dt_timestamp(\n        prop=\"time component with timezone information\", refer_to=\"timetz\"\n    )\n    def dt_timetz(self):\n        return DateTimeDefault.register(pandas.Series.dt.timetz)(self)\n\n    @doc_utils.add_refer_to(\"Series.dt.asfreq\")\n    def dt_asfreq(self, freq=None, how: str = \"E\"):\n        \"\"\"\n        Convert the PeriodArray to the specified frequency `freq`.\n\n        Equivalent to applying pandas.Period.asfreq() with the given arguments to each Period in this PeriodArray.\n\n        Parameters\n        ----------\n        freq : str, optional\n            A frequency.\n        how : str {'E', 'S'}, default: 'E'\n            Whether the elements should be aligned to the end or start within pa period.\n            * 'E', \"END\", or \"FINISH\" for end,\n            * 'S', \"START\", or \"BEGIN\" for start.\n            January 31st (\"END\") vs. January 1st (\"START\") for example.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing period data.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.asfreq)(self, freq, how)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.to_period\")\n    def dt_to_period(self, freq=None):\n        \"\"\"\n        Convert underlying data to the period at a particular frequency.\n\n        Parameters\n        ----------\n        freq : str, optional\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing period data.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.to_period)(self, freq)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.to_pydatetime\")\n    def dt_to_pydatetime(self):\n        \"\"\"\n        Convert underlying data to array of python native ``datetime``.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing 1D array of ``datetime`` objects.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.to_pydatetime)(self)\n\n    # FIXME: there are no references to this method, we should either remove it\n    # or add a call reference at the DataFrame level (Modin issue #3103).\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.to_pytimedelta\")\n    def dt_to_pytimedelta(self):\n        \"\"\"\n        Convert underlying data to array of python native ``datetime.timedelta``.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing 1D array of ``datetime.timedelta``.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.to_pytimedelta)(self)\n\n    @doc_utils.doc_dt_period(\n        prop=\"the timestamp representation\", refer_to=\"to_timestamp\"\n    )\n    def dt_to_timestamp(self):\n        return DateTimeDefault.register(pandas.Series.dt.to_timestamp)(self)\n\n    @doc_utils.doc_dt_interval(prop=\"duration in seconds\", refer_to=\"total_seconds\")\n    def dt_total_seconds(self):\n        return DateTimeDefault.register(pandas.Series.dt.total_seconds)(self)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.tz\")\n    def dt_tz(self):\n        \"\"\"\n        Get the time-zone of the underlying time-series data.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler containing a single value, time-zone of the data.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.tz)(self)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.tz_convert\")\n    def dt_tz_convert(self, tz):\n        \"\"\"\n        Convert time-series data to the specified time zone.\n\n        Parameters\n        ----------\n        tz : str, pytz.timezone\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing values with converted time zone.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.tz_convert)(self, tz)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.dt.tz_localize\")\n    def dt_tz_localize(self, tz, ambiguous=\"raise\", nonexistent=\"raise\"):\n        \"\"\"\n        Localize tz-naive to tz-aware.\n\n        Parameters\n        ----------\n        tz : str, pytz.timezone, optional\n        ambiguous : {\"raise\", \"inner\", \"NaT\"} or bool mask, default: \"raise\"\n        nonexistent : {\"raise\", \"shift_forward\", \"shift_backward, \"NaT\"} or pandas.timedelta, default: \"raise\"\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing values with localized time zone.\n        \"\"\"\n        return DateTimeDefault.register(pandas.Series.dt.tz_localize)(\n            self, tz, ambiguous, nonexistent\n        )\n\n    @doc_utils.doc_dt_timestamp(prop=\"integer day of week\", refer_to=\"weekday\")\n    def dt_weekday(self):\n        return DateTimeDefault.register(pandas.Series.dt.weekday)(self)\n\n    @doc_utils.doc_dt_timestamp(prop=\"year component\", refer_to=\"year\")\n    def dt_year(self):\n        return DateTimeDefault.register(pandas.Series.dt.year)(self)\n\n    # End of DateTime methods\n\n    def first(self, offset: pandas.DateOffset):\n        \"\"\"\n        Select initial periods of time series data based on a date offset.\n\n        When having a query compiler with dates as index, this function can\n        select the first few rows based on a date offset.\n\n        Parameters\n        ----------\n        offset : pandas.DateOffset\n            The offset length of the data to select.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New compiler containing the selected data.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.first)(self, offset)\n\n    def last(self, offset: pandas.DateOffset):\n        \"\"\"\n        Select final periods of time series data based on a date offset.\n\n        For a query compiler with a sorted DatetimeIndex, this function\n        selects the last few rows based on a date offset.\n\n        Parameters\n        ----------\n        offset : pandas.DateOffset\n            The offset length of the data to select.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New compiler containing the selected data.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.last)(self, offset)\n\n    # Resample methods\n\n    # FIXME:\n    #   1. Query Compiler shouldn't care about differences between Series and DataFrame\n    #      so `resample_agg_df` and `resample_agg_ser` should be combined (Modin issue #3104).\n    #   2. In DataFrame API `Resampler.aggregate` is an alias for `Resampler.apply`\n    #      we should remove one of these methods: `resample_agg_*` or `resample_app_*` (Modin issue #3107).\n    @doc_utils.doc_resample_agg(\n        action=\"apply passed aggregation function\",\n        params=\"func : str, dict, callable(pandas.Series) -> scalar, or list of such\",\n        output=\"function names\",\n        refer_to=\"agg\",\n    )\n    def resample_agg_df(self, resample_kwargs, func, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.aggregate)(\n            self, resample_kwargs, func, *args, **kwargs\n        )\n\n    @doc_utils.add_deprecation_warning(replacement_method=\"resample_agg_df\")\n    @doc_utils.doc_resample_agg(\n        action=\"apply passed aggregation function in a one-column query compiler\",\n        params=\"func : str, dict, callable(pandas.Series) -> scalar, or list of such\",\n        output=\"function names\",\n        refer_to=\"agg\",\n    )\n    def resample_agg_ser(self, resample_kwargs, func, *args, **kwargs):\n        return ResampleDefault.register(\n            pandas.core.resample.Resampler.aggregate, squeeze_self=True\n        )(self, resample_kwargs, func, *args, **kwargs)\n\n    @doc_utils.add_deprecation_warning(replacement_method=\"resample_agg_df\")\n    @doc_utils.doc_resample_agg(\n        action=\"apply passed aggregation function\",\n        params=\"func : str, dict, callable(pandas.Series) -> scalar, or list of such\",\n        output=\"function names\",\n        refer_to=\"apply\",\n    )\n    def resample_app_df(self, resample_kwargs, func, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.apply)(\n            self, resample_kwargs, func, *args, **kwargs\n        )\n\n    @doc_utils.add_deprecation_warning(replacement_method=\"resample_agg_df\")\n    @doc_utils.doc_resample_agg(\n        action=\"apply passed aggregation function in a one-column query compiler\",\n        params=\"func : str, dict, callable(pandas.Series) -> scalar, or list of such\",\n        output=\"function names\",\n        refer_to=\"apply\",\n    )\n    def resample_app_ser(self, resample_kwargs, func, *args, **kwargs):\n        return ResampleDefault.register(\n            pandas.core.resample.Resampler.apply, squeeze_self=True\n        )(self, resample_kwargs, func, *args, **kwargs)\n\n    def resample_asfreq(self, resample_kwargs, fill_value):\n        \"\"\"\n        Resample time-series data and get the values at the new frequency.\n\n        Group data into intervals by time-series row/column with\n        a specified frequency and get values at the new frequency.\n\n        Parameters\n        ----------\n        resample_kwargs : dict\n            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.\n        fill_value : scalar\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing values at the specified frequency.\n        \"\"\"\n        return ResampleDefault.register(pandas.core.resample.Resampler.asfreq)(\n            self, resample_kwargs, fill_value\n        )\n\n    @doc_utils.doc_resample_fillna(method=\"back-fill\", refer_to=\"bfill\")\n    def resample_bfill(self, resample_kwargs, limit):\n        return ResampleDefault.register(pandas.core.resample.Resampler.bfill)(\n            self, resample_kwargs, limit\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"number of non-NA values\", refer_to=\"count\", compatibility_params=False\n    )\n    def resample_count(self, resample_kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.count)(\n            self, resample_kwargs\n        )\n\n    @doc_utils.doc_resample_fillna(method=\"forward-fill\", refer_to=\"ffill\")\n    def resample_ffill(self, resample_kwargs, limit):\n        return ResampleDefault.register(pandas.core.resample.Resampler.ffill)(\n            self, resample_kwargs, limit\n        )\n\n    # FIXME: we should combine all resample fillna methods into `resample_fillna`\n    # (Modin issue #3107)\n    @doc_utils.doc_resample_fillna(\n        method=\"specified\", refer_to=\"fillna\", params=\"method : str\"\n    )\n    def resample_fillna(self, resample_kwargs, method, limit):\n        return ResampleDefault.register(pandas.core.resample.Resampler.fillna)(\n            self, resample_kwargs, method, limit\n        )\n\n    @doc_utils.doc_resample_reduce(result=\"first element\", refer_to=\"first\")\n    def resample_first(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.first)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    # FIXME: This function takes Modin DataFrame via `obj` parameter,\n    # we should avoid leaking of the high-level objects to the query compiler level.\n    # (Modin issue #3106)\n    def resample_get_group(self, resample_kwargs, name, obj):\n        \"\"\"\n        Resample time-series data and get the specified group.\n\n        Group data into intervals by time-series row/column with\n        a specified frequency and get the values of the specified group.\n\n        Parameters\n        ----------\n        resample_kwargs : dict\n            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.\n        name : object\n        obj : modin.pandas.DataFrame, optional\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing the values from the specified group.\n        \"\"\"\n        return ResampleDefault.register(pandas.core.resample.Resampler.get_group)(\n            self, resample_kwargs, name, obj\n        )\n\n    @doc_utils.doc_resample_fillna(\n        method=\"specified interpolation\",\n        refer_to=\"interpolate\",\n        params=\"\"\"\n        method : str\n        axis : {0, 1}\n        limit : int\n        inplace : {False}\n            This parameter serves the compatibility purpose. Always has to be False.\n        limit_direction : {\"forward\", \"backward\", \"both\"}\n        limit_area : {None, \"inside\", \"outside\"}\n        downcast : str, optional\n        **kwargs : dict\n        \"\"\",\n        overwrite_template_params=True,\n    )\n    def resample_interpolate(\n        self,\n        resample_kwargs,\n        method,\n        axis,\n        limit,\n        inplace,\n        limit_direction,\n        limit_area,\n        downcast,\n        **kwargs,\n    ):\n        return ResampleDefault.register(pandas.core.resample.Resampler.interpolate)(\n            self,\n            resample_kwargs,\n            method,\n            axis=axis,\n            limit=limit,\n            inplace=inplace,\n            limit_direction=limit_direction,\n            limit_area=limit_area,\n            downcast=downcast,\n            **kwargs,\n        )\n\n    @doc_utils.doc_resample_reduce(result=\"last element\", refer_to=\"last\")\n    def resample_last(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.last)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(result=\"maximum value\", refer_to=\"max\")\n    def resample_max(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.max)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(result=\"mean value\", refer_to=\"mean\")\n    def resample_mean(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.mean)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(result=\"median value\", refer_to=\"median\")\n    def resample_median(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.median)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(result=\"minimum value\", refer_to=\"min\")\n    def resample_min(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.min)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_fillna(method=\"'nearest'\", refer_to=\"nearest\")\n    def resample_nearest(self, resample_kwargs, limit):\n        return ResampleDefault.register(pandas.core.resample.Resampler.nearest)(\n            self, resample_kwargs, limit\n        )\n\n    @doc_utils.doc_resample_reduce(result=\"number of unique values\", refer_to=\"nunique\")\n    def resample_nunique(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.nunique)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    # FIXME: Query Compiler shouldn't care about differences between Series and DataFrame\n    # so `resample_ohlc_df` and `resample_ohlc_ser` should be combined (Modin issue #3104).\n    @doc_utils.doc_resample_agg(\n        action=\"compute open, high, low and close values\",\n        output=\"labels of columns containing computed values\",\n        refer_to=\"ohlc\",\n    )\n    def resample_ohlc_df(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.ohlc)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_agg(\n        action=\"compute open, high, low and close values\",\n        output=\"labels of columns containing computed values\",\n        refer_to=\"ohlc\",\n    )\n    def resample_ohlc_ser(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(\n            pandas.core.resample.Resampler.ohlc, squeeze_self=True\n        )(self, resample_kwargs, *args, **kwargs)\n\n    # FIXME: This method require us to build high-level resampler object\n    # which we shouldn't do at the query compiler. We need to move this at the front.\n    # (Modin issue #3105)\n    @doc_utils.add_refer_to(\"Resampler.pipe\")\n    def resample_pipe(self, resample_kwargs, func, *args, **kwargs):\n        \"\"\"\n        Resample time-series data and apply aggregation on it.\n\n        Group data into intervals by time-series row/column with\n        a specified frequency, build equivalent ``pandas.Resampler`` object\n        and apply passed function to it.\n\n        Parameters\n        ----------\n        resample_kwargs : dict\n            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.\n        func : callable(pandas.Resampler) -> object or tuple(callable, str)\n        *args : iterable\n            Positional arguments to pass to function.\n        **kwargs : dict\n            Keyword arguments to pass to function.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing the result of passed function.\n        \"\"\"\n        return ResampleDefault.register(pandas.core.resample.Resampler.pipe)(\n            self, resample_kwargs, func, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"product\",\n        params=\"min_count : int\",\n        refer_to=\"prod\",\n    )\n    def resample_prod(self, resample_kwargs, min_count, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.prod)(\n            self, resample_kwargs, min_count, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"quantile\", params=\"q : float\", refer_to=\"quantile\"\n    )\n    def resample_quantile(self, resample_kwargs, q, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.quantile)(\n            self, resample_kwargs, q, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"standard error of the mean\",\n        refer_to=\"sem\",\n    )\n    def resample_sem(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.sem)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"number of elements in a group\", refer_to=\"size\"\n    )\n    def resample_size(self, resample_kwargs, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.size)(\n            self, resample_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"standard deviation\", params=\"ddof : int\", refer_to=\"std\"\n    )\n    def resample_std(self, resample_kwargs, ddof, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.std)(\n            self, resample_kwargs, ddof, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"sum\",\n        params=\"min_count : int\",\n        refer_to=\"sum\",\n    )\n    def resample_sum(self, resample_kwargs, min_count, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.sum)(\n            self, resample_kwargs, min_count, *args, **kwargs\n        )\n\n    def resample_transform(self, resample_kwargs, arg, *args, **kwargs):\n        \"\"\"\n        Resample time-series data and apply aggregation on it.\n\n        Group data into intervals by time-series row/column with\n        a specified frequency and call passed function on each group.\n        In contrast to ``resample_app_df`` apply function to the whole group,\n        instead of a single axis.\n\n        Parameters\n        ----------\n        resample_kwargs : dict\n            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.\n        arg : callable(pandas.DataFrame) -> pandas.Series\n        *args : iterable\n            Positional arguments to pass to function.\n        **kwargs : dict\n            Keyword arguments to pass to function.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing the result of passed function.\n        \"\"\"\n        return ResampleDefault.register(pandas.core.resample.Resampler.transform)(\n            self, resample_kwargs, arg, *args, **kwargs\n        )\n\n    @doc_utils.doc_resample_reduce(\n        result=\"variance\", params=\"ddof : int\", refer_to=\"var\"\n    )\n    def resample_var(self, resample_kwargs, ddof, *args, **kwargs):\n        return ResampleDefault.register(pandas.core.resample.Resampler.var)(\n            self, resample_kwargs, ddof, *args, **kwargs\n        )\n\n    # End of Resample methods\n\n    # Str methods\n\n    @doc_utils.doc_str_method(refer_to=\"capitalize\", params=\"\")\n    def str_capitalize(self):\n        return StrDefault.register(pandas.Series.str.capitalize)(self)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"center\",\n        params=\"\"\"\n        width : int\n        fillchar : str, default: ' '\"\"\",\n    )\n    def str_center(self, width, fillchar=\" \"):\n        return StrDefault.register(pandas.Series.str.center)(self, width, fillchar)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"contains\",\n        params=\"\"\"\n        pat : str\n        case : bool, default: True\n        flags : int, default: 0\n        na : object, default: None\n        regex : bool, default: True\"\"\",\n    )\n    def str_contains(self, pat, case=True, flags=0, na=None, regex=True):\n        return StrDefault.register(pandas.Series.str.contains)(\n            self, pat, case, flags, na, regex\n        )\n\n    @doc_utils.doc_str_method(\n        refer_to=\"count\",\n        params=\"\"\"\n        pat : str\n        flags : int, default: 0\"\"\",\n    )\n    def str_count(self, pat, flags=0):\n        return StrDefault.register(pandas.Series.str.count)(self, pat, flags)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"endswith\",\n        params=\"\"\"\n        pat : str\n        na : object, default: None\"\"\",\n    )\n    def str_endswith(self, pat, na=None):\n        return StrDefault.register(pandas.Series.str.endswith)(self, pat, na)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"find\",\n        params=\"\"\"\n        sub : str\n        start : int, default: 0\n        end : int, optional\"\"\",\n    )\n    def str_find(self, sub, start=0, end=None):\n        return StrDefault.register(pandas.Series.str.find)(self, sub, start, end)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"findall\",\n        params=\"\"\"\n        pat : str\n        flags : int, default: 0\"\"\",\n    )\n    def str_findall(self, pat, flags=0):\n        return StrDefault.register(pandas.Series.str.findall)(self, pat, flags)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"fullmatch\",\n        params=\"\"\"\n        pat : str\n        case : bool, default: True\n        flags : int, default: 0\n        na : object, default: None\"\"\",\n    )\n    def str_fullmatch(self, pat, case=True, flags=0, na=None):\n        return StrDefault.register(pandas.Series.str.fullmatch)(\n            self, pat, case, flags, na\n        )\n\n    @doc_utils.doc_str_method(refer_to=\"get\", params=\"i : int\")\n    def str_get(self, i):\n        return StrDefault.register(pandas.Series.str.get)(self, i)\n\n    @doc_utils.doc_str_method(refer_to=\"get_dummies\", params=\"sep : str\")\n    def str_get_dummies(self, sep):\n        return StrDefault.register(pandas.Series.str.get_dummies)(self, sep)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"index\",\n        params=\"\"\"\n        sub : str\n        start : int, default: 0\n        end : int, optional\"\"\",\n    )\n    def str_index(self, sub, start=0, end=None):\n        return StrDefault.register(pandas.Series.str.index)(self, sub, start, end)\n\n    @doc_utils.doc_str_method(refer_to=\"isalnum\", params=\"\")\n    def str_isalnum(self):\n        return StrDefault.register(pandas.Series.str.isalnum)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"isalpha\", params=\"\")\n    def str_isalpha(self):\n        return StrDefault.register(pandas.Series.str.isalpha)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"isdecimal\", params=\"\")\n    def str_isdecimal(self):\n        return StrDefault.register(pandas.Series.str.isdecimal)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"isdigit\", params=\"\")\n    def str_isdigit(self):\n        return StrDefault.register(pandas.Series.str.isdigit)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"islower\", params=\"\")\n    def str_islower(self):\n        return StrDefault.register(pandas.Series.str.islower)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"isnumeric\", params=\"\")\n    def str_isnumeric(self):\n        return StrDefault.register(pandas.Series.str.isnumeric)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"isspace\", params=\"\")\n    def str_isspace(self):\n        return StrDefault.register(pandas.Series.str.isspace)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"istitle\", params=\"\")\n    def str_istitle(self):\n        return StrDefault.register(pandas.Series.str.istitle)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"isupper\", params=\"\")\n    def str_isupper(self):\n        return StrDefault.register(pandas.Series.str.isupper)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"join\", params=\"sep : str\")\n    def str_join(self, sep):\n        return StrDefault.register(pandas.Series.str.join)(self, sep)\n\n    @doc_utils.doc_str_method(refer_to=\"len\", params=\"\")\n    def str_len(self):\n        return StrDefault.register(pandas.Series.str.len)(self)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"ljust\",\n        params=\"\"\"\n        width : int\n        fillchar : str, default: ' '\"\"\",\n    )\n    def str_ljust(self, width, fillchar=\" \"):\n        return StrDefault.register(pandas.Series.str.ljust)(self, width, fillchar)\n\n    @doc_utils.doc_str_method(refer_to=\"lower\", params=\"\")\n    def str_lower(self):\n        return StrDefault.register(pandas.Series.str.lower)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"lstrip\", params=\"to_strip : str, optional\")\n    def str_lstrip(self, to_strip=None):\n        return StrDefault.register(pandas.Series.str.lstrip)(self, to_strip)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"match\",\n        params=\"\"\"\n        pat : str\n        case : bool, default: True\n        flags : int, default: 0\n        na : object, default: None\"\"\",\n    )\n    def str_match(self, pat, case=True, flags=0, na=None):\n        return StrDefault.register(pandas.Series.str.match)(self, pat, case, flags, na)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"extract\",\n        params=\"\"\"\n        pat : str\n        flags : int, default: 0\n        expand : bool, default: True\"\"\",\n    )\n    def str_extract(self, pat, flags=0, expand=True):\n        return StrDefault.register(pandas.Series.str.extract)(self, pat, flags, expand)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"extractall\",\n        params=\"\"\"\n        pat : str\n        flags : int, default: 0\"\"\",\n    )\n    def str_extractall(self, pat, flags=0):\n        return StrDefault.register(pandas.Series.str.extractall)(self, pat, flags)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"normalize\", params=\"form : {'NFC', 'NFKC', 'NFD', 'NFKD'}\"\n    )\n    def str_normalize(self, form):\n        return StrDefault.register(pandas.Series.str.normalize)(self, form)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"pad\",\n        params=\"\"\"\n        width : int\n        side : {'left', 'right', 'both'}, default: 'left'\n        fillchar : str, default: ' '\"\"\",\n    )\n    def str_pad(self, width, side=\"left\", fillchar=\" \"):\n        return StrDefault.register(pandas.Series.str.pad)(self, width, side, fillchar)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"partition\",\n        params=\"\"\"\n        sep : str, default: ' '\n        expand : bool, default: True\"\"\",\n    )\n    def str_partition(self, sep=\" \", expand=True):\n        return StrDefault.register(pandas.Series.str.partition)(self, sep, expand)\n\n    @doc_utils.doc_str_method(refer_to=\"removeprefix\", params=\"prefix : str\")\n    def str_removeprefix(self, prefix):\n        return StrDefault.register(pandas.Series.str.removeprefix)(self, prefix)\n\n    @doc_utils.doc_str_method(refer_to=\"removesuffix\", params=\"suffix : str\")\n    def str_removesuffix(self, suffix):\n        return StrDefault.register(pandas.Series.str.removesuffix)(self, suffix)\n\n    @doc_utils.doc_str_method(refer_to=\"repeat\", params=\"repeats : int\")\n    def str_repeat(self, repeats):\n        return StrDefault.register(pandas.Series.str.repeat)(self, repeats)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"replace\",\n        params=\"\"\"\n        pat : str\n        repl : str or callable\n        n : int, default: -1\n        case : bool, optional\n        flags : int, default: 0\n        regex : bool, default: None\"\"\",\n    )\n    def str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):\n        return StrDefault.register(pandas.Series.str.replace)(\n            self, pat, repl, n, case, flags, regex\n        )\n\n    @doc_utils.doc_str_method(\n        refer_to=\"rfind\",\n        params=\"\"\"\n        sub : str\n        start : int, default: 0\n        end : int, optional\"\"\",\n    )\n    def str_rfind(self, sub, start=0, end=None):\n        return StrDefault.register(pandas.Series.str.rfind)(self, sub, start, end)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"rindex\",\n        params=\"\"\"\n        sub : str\n        start : int, default: 0\n        end : int, optional\"\"\",\n    )\n    def str_rindex(self, sub, start=0, end=None):\n        return StrDefault.register(pandas.Series.str.rindex)(self, sub, start, end)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"rjust\",\n        params=\"\"\"\n        width : int\n        fillchar : str, default: ' '\"\"\",\n    )\n    def str_rjust(self, width, fillchar=\" \"):\n        return StrDefault.register(pandas.Series.str.rjust)(self, width, fillchar)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"rpartition\",\n        params=\"\"\"\n        sep : str, default: ' '\n        expand : bool, default: True\"\"\",\n    )\n    def str_rpartition(self, sep=\" \", expand=True):\n        return StrDefault.register(pandas.Series.str.rpartition)(self, sep, expand)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"rsplit\",\n        params=\"\"\"\n        pat : str, optional\n        n : int, default: -1\n        expand : bool, default: False\"\"\",\n    )\n    def str_rsplit(self, pat=None, *, n=-1, expand=False):\n        return StrDefault.register(pandas.Series.str.rsplit)(\n            self, pat, n=n, expand=expand\n        )\n\n    @doc_utils.doc_str_method(refer_to=\"rstrip\", params=\"to_strip : str, optional\")\n    def str_rstrip(self, to_strip=None):\n        return StrDefault.register(pandas.Series.str.rstrip)(self, to_strip)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"slice\",\n        params=\"\"\"\n        start : int, optional\n        stop : int, optional\n        step : int, optional\"\"\",\n    )\n    def str_slice(self, start=None, stop=None, step=None):\n        return StrDefault.register(pandas.Series.str.slice)(self, start, stop, step)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"slice_replace\",\n        params=\"\"\"\n        start : int, optional\n        stop : int, optional\n        repl : str or callable, optional\"\"\",\n    )\n    def str_slice_replace(self, start=None, stop=None, repl=None):\n        return StrDefault.register(pandas.Series.str.slice_replace)(\n            self, start, stop, repl\n        )\n\n    @doc_utils.doc_str_method(\n        refer_to=\"split\",\n        params=\"\"\"\n        pat : str, optional\n        n : int, default: -1\n        expand : bool, default: False\n        regex : bool, default: None\"\"\",\n    )\n    def str_split(self, pat=None, *, n=-1, expand=False, regex=None):\n        return StrDefault.register(pandas.Series.str.split)(\n            self, pat, n=n, expand=expand, regex=regex\n        )\n\n    @doc_utils.doc_str_method(\n        refer_to=\"startswith\",\n        params=\"\"\"\n        pat : str\n        na : object, default: None\"\"\",\n    )\n    def str_startswith(self, pat, na=None):\n        return StrDefault.register(pandas.Series.str.startswith)(self, pat, na)\n\n    @doc_utils.doc_str_method(refer_to=\"strip\", params=\"to_strip : str, optional\")\n    def str_strip(self, to_strip=None):\n        return StrDefault.register(pandas.Series.str.strip)(self, to_strip)\n\n    @doc_utils.doc_str_method(refer_to=\"swapcase\", params=\"\")\n    def str_swapcase(self):\n        return StrDefault.register(pandas.Series.str.swapcase)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"title\", params=\"\")\n    def str_title(self):\n        return StrDefault.register(pandas.Series.str.title)(self)\n\n    @doc_utils.doc_str_method(refer_to=\"translate\", params=\"table : dict\")\n    def str_translate(self, table):\n        return StrDefault.register(pandas.Series.str.translate)(self, table)\n\n    @doc_utils.doc_str_method(refer_to=\"upper\", params=\"\")\n    def str_upper(self):\n        return StrDefault.register(pandas.Series.str.upper)(self)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"wrap\",\n        params=\"\"\"\n        width : int\n        **kwargs : dict\"\"\",\n    )\n    def str_wrap(self, width, **kwargs):\n        return StrDefault.register(pandas.Series.str.wrap)(self, width, **kwargs)\n\n    @doc_utils.doc_str_method(refer_to=\"zfill\", params=\"width : int\")\n    def str_zfill(self, width):\n        return StrDefault.register(pandas.Series.str.zfill)(self, width)\n\n    @doc_utils.doc_str_method(refer_to=\"__getitem__\", params=\"key : object\")\n    def str___getitem__(self, key):\n        return StrDefault.register(pandas.Series.str.__getitem__)(self, key)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"encode\",\n        params=\"\"\"\n            encoding : str,\n            errors : str, default = 'strict'\"\"\",\n    )\n    def str_encode(self, encoding, errors):\n        return StrDefault.register(pandas.Series.str.encode)(self, encoding, errors)\n\n    @doc_utils.doc_str_method(\n        refer_to=\"decode\",\n        params=\"\"\"\n                encoding : str,\n                errors : str, default = 'strict'\n                dtype : str or dtype, optional\"\"\",\n    )\n    def str_decode(self, encoding, errors, dtype):\n        return StrDefault.register(pandas.Series.str.decode)(\n            self, encoding, errors, dtype\n        )\n\n    @doc_utils.doc_str_method(\n        refer_to=\"cat\",\n        params=\"\"\"\n            others : Series, Index, DataFrame, np.ndarray or list-like,\n            sep : str, default: '',\n            na_rep : str or None, default: None,\n            join : {'left', 'right', 'outer', 'inner'}, default: 'left'\"\"\",\n    )\n    def str_cat(self, others, sep=None, na_rep=None, join=\"left\"):\n        return StrDefault.register(pandas.Series.str.cat)(\n            self, others, sep, na_rep, join\n        )\n\n    @doc_utils.doc_str_method(\n        refer_to=\"casefold\",\n        params=\"\",\n    )\n    def str_casefold(self):\n        return StrDefault.register(pandas.Series.str.casefold)(self)\n\n    # End of Str methods\n\n    # Rolling methods\n\n    # FIXME: most of the rolling/window methods take *args and **kwargs parameters\n    # which are only needed for the compatibility with numpy, this behavior is inherited\n    # from the API level, we should get rid of it (Modin issue #3108).\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"the result of passed functions\",\n        action=\"apply specified functions\",\n        refer_to=\"aggregate\",\n        params=\"\"\"\n        func : str, dict, callable(pandas.Series) -> scalar, or list of such\n        *args : iterable\n        **kwargs : dict\"\"\",\n        build_rules=\"udf_aggregation\",\n    )\n    def rolling_aggregate(self, fold_axis, rolling_kwargs, func, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.aggregate)(\n            self, rolling_kwargs, func, *args, **kwargs\n        )\n\n    # FIXME: at the query compiler method `rolling_apply` is an alias for `rolling_aggregate`,\n    # one of these should be removed (Modin issue #3107).\n    @doc_utils.add_deprecation_warning(replacement_method=\"rolling_aggregate\")\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"the result of passed function\",\n        action=\"apply specified function\",\n        refer_to=\"apply\",\n        params=\"\"\"\n        func : callable(pandas.Series) -> scalar\n        raw : bool, default: False\n        engine : None, default: None\n            This parameters serves the compatibility purpose. Always has to be None.\n        engine_kwargs : None, default: None\n            This parameters serves the compatibility purpose. Always has to be None.\n        args : tuple, optional\n        kwargs : dict, optional\"\"\",\n        build_rules=\"udf_aggregation\",\n    )\n    def rolling_apply(\n        self,\n        fold_axis,\n        rolling_kwargs,\n        func,\n        raw=False,\n        engine=None,\n        engine_kwargs=None,\n        args=None,\n        kwargs=None,\n    ):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.apply)(\n            self, rolling_kwargs, func, raw, engine, engine_kwargs, args, kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"correlation\",\n        refer_to=\"corr\",\n        params=\"\"\"\n        other : modin.pandas.Series, modin.pandas.DataFrame, list-like, optional\n        pairwise : bool, optional\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_corr(\n        self, fold_axis, rolling_kwargs, other=None, pairwise=None, *args, **kwargs\n    ):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.corr)(\n            self, rolling_kwargs, other, pairwise, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\", result=\"number of non-NA values\", refer_to=\"count\"\n    )\n    def rolling_count(self, fold_axis, rolling_kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.count)(\n            self, rolling_kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"covariance\",\n        refer_to=\"cov\",\n        params=\"\"\"\n        other : modin.pandas.Series, modin.pandas.DataFrame, list-like, optional\n        pairwise : bool, optional\n        ddof : int, default:  1\n        **kwargs : dict\"\"\",\n    )\n    def rolling_cov(\n        self, fold_axis, rolling_kwargs, other=None, pairwise=None, ddof=1, **kwargs\n    ):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.cov)(\n            self, rolling_kwargs, other, pairwise, ddof, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"unbiased kurtosis\",\n        refer_to=\"kurt\",\n        params=\"**kwargs : dict\",\n    )\n    def rolling_kurt(self, fold_axis, rolling_kwargs, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.kurt)(\n            self, rolling_kwargs, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"maximum value\",\n        refer_to=\"max\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_max(self, fold_axis, rolling_kwargs, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.max)(\n            self, rolling_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"mean value\",\n        refer_to=\"mean\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_mean(self, fold_axis, rolling_kwargs, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.mean)(\n            self, rolling_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"median value\",\n        refer_to=\"median\",\n        params=\"**kwargs : dict\",\n    )\n    def rolling_median(self, fold_axis, rolling_kwargs, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.median)(\n            self, rolling_kwargs, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"minimum value\",\n        refer_to=\"min\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_min(self, fold_axis, rolling_kwargs, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.min)(\n            self, rolling_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"quantile\",\n        refer_to=\"quantile\",\n        params=\"\"\"\n        quantile : float\n        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}, default: 'linear'\n        **kwargs : dict\"\"\",\n    )\n    def rolling_quantile(\n        self, fold_axis, rolling_kwargs, quantile, interpolation=\"linear\", **kwargs\n    ):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.quantile)(\n            self, rolling_kwargs, quantile, interpolation, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"unbiased skewness\",\n        refer_to=\"skew\",\n        params=\"**kwargs : dict\",\n    )\n    def rolling_skew(self, fold_axis, rolling_kwargs, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.skew)(\n            self, rolling_kwargs, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"standard deviation\",\n        refer_to=\"std\",\n        params=\"\"\"\n        ddof : int, default: 1\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_std(self, fold_axis, rolling_kwargs, ddof=1, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.std)(\n            self, rolling_kwargs, ddof, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"sum\",\n        refer_to=\"sum\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_sum(self, fold_axis, rolling_kwargs, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.sum)(\n            self, rolling_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"sem\",\n        refer_to=\"sem\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_sem(self, fold_axis, rolling_kwargs, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.sem)(\n            self, rolling_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"variance\",\n        refer_to=\"var\",\n        params=\"\"\"\n        ddof : int, default: 1\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_var(self, fold_axis, rolling_kwargs, ddof=1, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.var)(\n            self, rolling_kwargs, ddof, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        result=\"rank\",\n        refer_to=\"rank\",\n        params=\"\"\"\n        method : {'average', 'min', 'max'}, default: 'average'\n        ascending : bool, default: True\n        pct : bool, default: False\n        numeric_only : bool, default: False\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def rolling_rank(\n        self,\n        fold_axis,\n        rolling_kwargs,\n        method=\"average\",\n        ascending=True,\n        pct=False,\n        numeric_only=False,\n        *args,\n        **kwargs,\n    ):\n        return RollingDefault.register(pandas.core.window.rolling.Rolling.rank)(\n            self,\n            rolling_kwargs,\n            method=method,\n            ascending=ascending,\n            pct=pct,\n            numeric_only=numeric_only,\n            *args,\n            **kwargs,\n        )\n\n    # End of Rolling methods\n\n    # Begin Expanding methods\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"the result of passed functions\",\n        action=\"apply specified functions\",\n        refer_to=\"aggregate\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        func : str, dict, callable(pandas.Series) -> scalar, or list of such\n        *args : iterable\n        **kwargs : dict\"\"\",\n        build_rules=\"udf_aggregation\",\n    )\n    def expanding_aggregate(self, fold_axis, expanding_args, func, *args, **kwargs):\n        return ExpandingDefault.register(\n            pandas.core.window.expanding.Expanding.aggregate\n        )(self, expanding_args, func, *args, **kwargs)\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"sum\",\n        refer_to=\"sum\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_sum(self, fold_axis, expanding_args, *args, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.sum)(\n            self, expanding_args, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"minimum value\",\n        refer_to=\"min\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_min(self, fold_axis, expanding_args, *args, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.min)(\n            self, expanding_args, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"maximum value\",\n        refer_to=\"max\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_max(self, fold_axis, expanding_args, *args, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.max)(\n            self, expanding_args, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"mean value\",\n        refer_to=\"mean\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_mean(self, fold_axis, expanding_args, *args, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.mean)(\n            self, expanding_args, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"median\",\n        refer_to=\"median\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        numeric_only : bool, default: False\n        engine : Optional[str], default: None\n        engine_kwargs : Optional[dict], default: None\n        **kwargs : dict\"\"\",\n    )\n    def expanding_median(\n        self,\n        fold_axis,\n        expanding_args,\n        numeric_only=False,\n        engine=None,\n        engine_kwargs=None,\n        **kwargs,\n    ):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.median)(\n            self,\n            expanding_args,\n            numeric_only=numeric_only,\n            engine=engine,\n            engine_kwargs=engine_kwargs,\n            **kwargs,\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"variance\",\n        refer_to=\"var\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        ddof : int, default: 1\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_var(self, fold_axis, expanding_args, ddof=1, *args, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.var)(\n            self, expanding_args, ddof=ddof, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"standard deviation\",\n        refer_to=\"std\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        ddof : int, default: 1\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_std(self, fold_axis, expanding_args, ddof=1, *args, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.std)(\n            self, expanding_args, ddof=ddof, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"correlation\",\n        refer_to=\"corr\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        squeeze_self : bool\n        squeeze_other : bool\n        other : pandas.Series or pandas.DataFrame, default: None\n        pairwise : bool | None, default: None\n        ddof : int, default: 1\n        numeric_only : bool, default: False\n        **kwargs : dict\"\"\",\n    )\n    def expanding_corr(\n        self,\n        fold_axis,\n        expanding_args,\n        squeeze_self,\n        squeeze_other,\n        other=None,\n        pairwise=None,\n        ddof=1,\n        numeric_only=False,\n        **kwargs,\n    ):\n        other_for_default = (\n            other\n            if other is None\n            else (\n                other.to_pandas().squeeze(axis=1)\n                if squeeze_other\n                else other.to_pandas()\n            )\n        )\n        return ExpandingDefault.register(\n            pandas.core.window.expanding.Expanding.corr,\n            squeeze_self=squeeze_self,\n        )(\n            self,\n            expanding_args,\n            other=other_for_default,\n            pairwise=pairwise,\n            ddof=ddof,\n            numeric_only=numeric_only,\n            **kwargs,\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"sample covariance\",\n        refer_to=\"cov\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        squeeze_self : bool\n        squeeze_other : bool\n        other : pandas.Series or pandas.DataFrame, default: None\n        pairwise : bool | None, default: None\n        ddof : int, default: 1\n        numeric_only : bool, default: False\n        **kwargs : dict\"\"\",\n    )\n    def expanding_cov(\n        self,\n        fold_axis,\n        expanding_args,\n        squeeze_self,\n        squeeze_other,\n        other=None,\n        pairwise=None,\n        ddof=1,\n        numeric_only=False,\n        **kwargs,\n    ):\n        other_for_default = (\n            other\n            if other is None\n            else (\n                other.to_pandas().squeeze(axis=1)\n                if squeeze_other\n                else other.to_pandas()\n            )\n        )\n        return ExpandingDefault.register(\n            pandas.core.window.expanding.Expanding.cov,\n            squeeze_self=squeeze_self,\n        )(\n            self,\n            expanding_args,\n            other=other_for_default,\n            pairwise=pairwise,\n            ddof=ddof,\n            numeric_only=numeric_only,\n            **kwargs,\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"standard deviation\",\n        refer_to=\"std\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        ddof : int, default: 1\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_count(self, fold_axis, expanding_args, ddof=1, *args, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.count)(\n            self, expanding_args, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"quantile\",\n        refer_to=\"quantile\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        quantile : float\n        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}, default: 'linear'\n        **kwargs : dict\"\"\",\n    )\n    def expanding_quantile(\n        self, fold_axis, expanding_args, quantile, interpolation, **kwargs\n    ):\n        return ExpandingDefault.register(\n            pandas.core.window.expanding.Expanding.quantile\n        )(self, expanding_args, quantile, interpolation, **kwargs)\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"unbiased standard error mean\",\n        refer_to=\"std\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        ddof : int, default: 1\n        numeric_only : bool, default: False\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_sem(\n        self, fold_axis, expanding_args, ddof=1, numeric_only=False, *args, **kwargs\n    ):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.sem)(\n            self, expanding_args, ddof=ddof, numeric_only=numeric_only, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"unbiased skewness\",\n        refer_to=\"skew\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        numeric_only : bool, default: False\n        **kwargs : dict\"\"\",\n    )\n    def expanding_skew(self, fold_axis, expanding_args, numeric_only=False, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.skew)(\n            self, expanding_args, numeric_only=numeric_only, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"Fisher’s definition of kurtosis without bias\",\n        refer_to=\"kurt\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        numeric_only : bool, default: False\n        **kwargs : dict\"\"\",\n    )\n    def expanding_kurt(self, fold_axis, expanding_args, numeric_only=False, **kwargs):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.kurt)(\n            self, expanding_args, numeric_only=numeric_only, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Expanding\",\n        result=\"rank\",\n        refer_to=\"rank\",\n        win_type=\"expanding window\",\n        params=\"\"\"\n        method : {'average', 'min', 'max'}, default: 'average'\n        ascending : bool, default: True\n        pct : bool, default: False\n        numeric_only : bool, default: False\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def expanding_rank(\n        self,\n        fold_axis,\n        expanding_args,\n        method=\"average\",\n        ascending=True,\n        pct=False,\n        numeric_only=False,\n        *args,\n        **kwargs,\n    ):\n        return ExpandingDefault.register(pandas.core.window.expanding.Expanding.rank)(\n            self,\n            expanding_args,\n            method=method,\n            ascending=ascending,\n            pct=pct,\n            numeric_only=numeric_only,\n            *args,\n            **kwargs,\n        )\n\n    # End of Expanding methods\n\n    # Window methods\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        win_type=\"window of the specified type\",\n        result=\"mean\",\n        refer_to=\"mean\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def window_mean(self, fold_axis, window_kwargs, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.Window.mean)(\n            self, window_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        win_type=\"window of the specified type\",\n        result=\"standard deviation\",\n        refer_to=\"std\",\n        params=\"\"\"\n        ddof : int, default: 1\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def window_std(self, fold_axis, window_kwargs, ddof=1, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.Window.std)(\n            self, window_kwargs, ddof, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        win_type=\"window of the specified type\",\n        result=\"sum\",\n        refer_to=\"sum\",\n        params=\"\"\"\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def window_sum(self, fold_axis, window_kwargs, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.Window.sum)(\n            self, window_kwargs, *args, **kwargs\n        )\n\n    @doc_utils.doc_window_method(\n        window_cls_name=\"Rolling\",\n        win_type=\"window of the specified type\",\n        result=\"variance\",\n        refer_to=\"var\",\n        params=\"\"\"\n        ddof : int, default: 1\n        *args : iterable\n        **kwargs : dict\"\"\",\n    )\n    def window_var(self, fold_axis, window_kwargs, ddof=1, *args, **kwargs):\n        return RollingDefault.register(pandas.core.window.Window.var)(\n            self, window_kwargs, ddof, *args, **kwargs\n        )\n\n    # End of Window methods\n\n    # Categories methods\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.cat.codes\")\n    def cat_codes(self):\n        \"\"\"\n        Convert underlying categories data into its codes.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing the integer codes of the underlying\n            categories.\n        \"\"\"\n        return CatDefault.register(pandas.Series.cat.codes)(self)\n\n    # End of Categories methods\n\n    # List accessor's methods\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.list.flatten\")\n    def list_flatten(self):\n        \"\"\"\n        Flatten list values.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return ListDefault.register(pandas.Series.list.flatten)(self)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.list.len\")\n    def list_len(self):\n        \"\"\"\n        Return the length of each list in the Series.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return ListDefault.register(pandas.Series.list.len)(self)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.list.__getitem__\")\n    def list__getitem__(self, key):  # noqa: PR01\n        \"\"\"\n        Index or slice lists in the Series.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return ListDefault.register(pandas.Series.list.__getitem__)(self, key=key)\n\n    # End of List accessor's methods\n\n    # Struct accessor's methods\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.struct.dtypes\")\n    def struct_dtypes(self):\n        \"\"\"\n        Return the dtype object of each child field of the struct.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return StructDefault.register(pandas.Series.struct.dtypes)(self)\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.struct.field\")\n    def struct_field(self, name_or_index):  # noqa: PR01\n        \"\"\"\n        Extract a child field of a struct as a Series.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return StructDefault.register(pandas.Series.struct.field)(\n            self, name_or_index=name_or_index\n        )\n\n    @doc_utils.add_one_column_warning\n    @doc_utils.add_refer_to(\"Series.struct.explode\")\n    def struct_explode(self):\n        \"\"\"\n        Extract all child fields of a struct as a DataFrame.\n\n        Returns\n        -------\n        BaseQueryCompiler\n        \"\"\"\n        return StructDefault.register(pandas.Series.struct.explode)(self)\n\n    # End of Struct accessor's methods\n\n    # DataFrame methods\n\n    def invert(self):\n        \"\"\"\n        Apply bitwise inversion for each element of the QueryCompiler.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing bitwise inversion for each value.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.__invert__)(self)\n\n    @doc_utils.doc_reduce_agg(\n        method=\"unbiased kurtosis\", refer_to=\"kurt\", extra_params=[\"skipna\", \"**kwargs\"]\n    )\n    def kurt(self, axis, numeric_only=False, skipna=True, **kwargs):\n        return DataFrameDefault.register(pandas.DataFrame.kurt)(\n            self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs\n        )\n\n    sum_min_count = sum\n    prod_min_count = prod\n\n    @doc_utils.add_refer_to(\"DataFrame.compare\")\n    def compare(self, other, align_axis, keep_shape, keep_equal, result_names):\n        \"\"\"\n        Compare data of two QueryCompilers and highlight the difference.\n\n        Parameters\n        ----------\n        other : BaseQueryCompiler\n            Query compiler to compare with. Have to be the same shape and the same\n            labeling as `self`.\n        align_axis : {0, 1}\n        keep_shape : bool\n        keep_equal : bool\n        result_names : tuple\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler containing the differences between `self` and passed\n            query compiler.\n        \"\"\"\n        return DataFrameDefault.register(pandas.DataFrame.compare)(\n            self,\n            other=other,\n            align_axis=align_axis,\n            keep_shape=keep_shape,\n            keep_equal=keep_equal,\n            result_names=result_names,\n        )\n\n    @doc_utils.add_refer_to(\"Series.case_when\")\n    def case_when(self, caselist):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Replace values where the conditions are True.\n        \"\"\"\n        # A workaround for https://github.com/modin-project/modin/issues/7041\n        qc_type = type(self)\n        caselist = [\n            tuple(\n                data.to_pandas().squeeze(axis=1) if isinstance(data, qc_type) else data\n                for data in case_tuple\n            )\n            for case_tuple in caselist\n        ]\n        return SeriesDefault.register(pandas.Series.case_when)(self, caselist=caselist)\n\n    def get_pandas_backend(self) -> Optional[str]:\n        \"\"\"\n        Get backend stored in `_modin_frame`.\n\n        Returns\n        -------\n        str | None\n            Backend name.\n        \"\"\"\n        return self._modin_frame._pandas_backend\n\n    def repartition(self, axis=None):\n        \"\"\"\n        Repartitioning QueryCompiler objects to get ideal partitions inside.\n\n        Allows to improve performance where the query compiler can't improve\n        yet by doing implicit repartitioning.\n\n        Parameters\n        ----------\n        axis : {0, 1, None}, optional\n            The axis along which the repartitioning occurs.\n            `None` is used for repartitioning along both axes.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            The repartitioned BaseQueryCompiler.\n        \"\"\"\n        axes = [0, 1] if axis is None else [axis]\n\n        new_query_compiler = self\n        for _ax in axes:\n            new_query_compiler = new_query_compiler.__constructor__(\n                new_query_compiler._modin_frame.apply_full_axis(\n                    _ax,\n                    lambda df: df,\n                    new_index=self._modin_frame.copy_index_cache(copy_lengths=_ax == 1),\n                    new_columns=self._modin_frame.copy_columns_cache(\n                        copy_lengths=_ax == 0\n                    ),\n                    dtypes=self._modin_frame.copy_dtypes_cache(),\n                    keep_partitioning=False,\n                    sync_labels=False,\n                )\n            )\n        return new_query_compiler\n\n    # End of DataFrame methods\n"
  },
  {
    "path": "modin/core/storage_formats/base/query_compiler_calculator.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains ``BackendCostCalculator`` class.\n\n``BackendCostCalculator`` is used to determine the casting cost\nbetween a set of different backends. It aggregates the cost across\nall query compilers to determine the best query compiler to use.\n\"\"\"\n\nimport random\nfrom types import MappingProxyType\nfrom typing import Any, Optional\n\nfrom modin.config import Backend, BackendJoinConsiderAllBackends\nfrom modin.core.storage_formats.base.query_compiler import (\n    BaseQueryCompiler,\n    QCCoercionCost,\n)\nfrom modin.logging import get_logger\nfrom modin.logging.metrics import emit_metric\n\n\ndef all_switchable_backends() -> list[str]:\n    \"\"\"\n    Return a list of all currently active backends that are candidates for switching.\n\n    Returns\n    -------\n    list\n        A list of valid backends.\n    \"\"\"\n    return list(\n        filter(\n            # Disable automatically switching to these engines for now, because\n            # 1) _get_prepared_factory_for_backend() currently calls\n            # _initialize_engine(), which starts up the ray/dask/unidist\n            #  processes\n            # 2) we can't decide to switch to unidist in the middle of execution.\n            lambda backend: backend not in (\"Ray\", \"Unidist\", \"Dask\"),\n            Backend.get_active_backends(),\n        )\n    )\n\n\nclass AggregatedBackendData:\n    \"\"\"\n    Contains information on Backends considered for computation.\n\n    Parameters\n    ----------\n    backend : str\n        String representing the backend name.\n    qc_cls : type[QueryCompiler]\n        The query compiler sub-class for this backend.\n    \"\"\"\n\n    def __init__(self, backend: str, qc_cls: type[BaseQueryCompiler]):\n        self.backend = backend\n        self.qc_cls = qc_cls\n        self.cost = 0\n        self.max_cost = qc_cls.max_cost()\n\n\nclass BackendCostCalculator:\n    \"\"\"\n    Calculate which Backend should be used for an operation.\n\n    Given a set of QueryCompilers containing various data, determine\n    which query compiler's backend would minimize the cost of casting\n    or coercion. Use the aggregate sum of coercion to determine overall\n    cost.\n\n    Parameters\n    ----------\n    operation_arguments : MappingProxyType[str, Any]\n        Mapping from operation argument names to their values.\n    api_cls_name : str or None\n        Representing the class name of the function being called.\n    operation : str representing the operation being performed\n    query_compilers : list of query compiler arguments\n    preop_switch : bool\n        True if the operation is a pre-operation switch point.\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        operation_arguments: MappingProxyType[str, Any],\n        api_cls_name: Optional[str],\n        operation: str,\n        query_compilers: list[BaseQueryCompiler],\n        preop_switch: bool,\n    ):\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        self._qc_list: list[BaseQueryCompiler] = []\n        self._result_backend = None\n        self._api_cls_name = api_cls_name\n        self._op = operation\n        self._operation_arguments = operation_arguments\n        self._backend_data = {}\n        self._qc_list = query_compilers[:]\n        for query_compiler in query_compilers:\n            # If a QC's backend was not configured as active, we need to create an entry for it here.\n            backend = query_compiler.get_backend()\n            if backend not in self._backend_data:\n                self._backend_data[backend] = AggregatedBackendData(\n                    backend,\n                    FactoryDispatcher._get_prepared_factory_for_backend(\n                        backend=backend\n                    ).io_cls.query_compiler_cls,\n                )\n        if preop_switch and BackendJoinConsiderAllBackends.get():\n            # Initialize backend data for any backends not found among query compiler arguments.\n            # Because we default to the first query compiler's backend if no cost information is available,\n            # this initialization must occur after iterating over query compiler arguments to ensure\n            # correct ordering in dictionary arguments.\n            for backend in all_switchable_backends():\n                if backend not in self._backend_data:\n                    self._backend_data[backend] = AggregatedBackendData(\n                        backend,\n                        FactoryDispatcher._get_prepared_factory_for_backend(\n                            backend=backend\n                        ).io_cls.query_compiler_cls,\n                    )\n\n    def calculate(self) -> str:\n        \"\"\"\n        Calculate which query compiler we should cast to.\n\n        Switching calculation is performed as follows:\n        - For every registered query compiler in qc_list, with backend `backend_from`, compute\n          `self_cost = qc_from.stay_cost(...)` and add it to the total cost for `backend_from`.\n          - For every valid target `backend_to`, compute `qc_from.move_to_cost(qc_cls_to, ...)`. If it\n            returns None, instead compute `qc_cls_to.move_to_me_cost(qc_from, ...)`. Add the result\n            to the cost for `backend_to`.\n        At a high level, the cost for choosing a particular backend is the sum of\n            (all stay costs for data already on that backend)\n            + (cost of moving all other query compilers to this backend)\n\n        If the operation is a registered pre-operation switch point, then the list of target backends\n        is ALL active backends. Otherwise, only backends found among the arguments are considered.\n        Post-operation switch points are not yet supported.\n\n        If the arguments contain no query compilers for a particular backend, then there are no stay\n        costs. In this scenario, we expect the move_to cost for this backend to outweigh the corresponding\n        stay costs for each query compiler's original backend.\n\n        If no argument QCs have cost information for each other (that is, move_to_cost and move_to_me_cost\n        returns None), then we attempt to move all data to the backend of the first QC.\n\n        We considered a few alternative algorithms for switching calculation:\n\n        1. Instead of considering all active backends, consider only backends found among input QCs.\n        This was used in the calculator's original implementation, as we figured transfer cost to\n        unrelated backends would outweigh any possible gains in computation speed. However, certain\n        pathological cases that significantly changed the size of input or output data (e.g. cross join)\n        would create situations where transferring data after the computation became prohibitively\n        expensive, so we chose to allow switching to unrelated backends.\n        Additionally, the original implementation had a bug where stay_cost was only computed for the\n        _first_ query compiler of each backend, thus under-reporting the cost of computation for any\n        backend with multiple QCs present. In practice this very rarely affected the chosen result.\n        2. Compute stay/move costs only once for each backend pair, but force QCs to consider other\n        arguments when calculating.\n        This approach is the most robust and accurate for cases like cross join, where a product of\n        transfer costs between backends is more reflective of cost than size. This approach requires\n        more work in the query compiler, as each QC must be aware of when multiple QC arguments are\n        passed and adjust the cost computation accordingly. It is also unclear how often this would\n        make a meaningful difference compared to the summation approach.\n\n        Returns\n        -------\n        str\n            A string representing a backend.\n\n        Raises\n        ------\n        ValueError\n            Raises ValueError when the reported transfer cost for every backend exceeds its maximum cost.\n        \"\"\"\n        if self._result_backend is not None:\n            return self._result_backend\n        if len(self._qc_list) == 1:\n            return self._qc_list[0].get_backend()\n        if len(self._qc_list) == 0:\n            raise ValueError(\"No query compilers registered\")\n        # See docstring for explanation of switching decision algorithm.\n        for qc_from in self._qc_list:\n            # Add self cost for the current query compiler\n            self_cost = qc_from.stay_cost(\n                self._api_cls_name, self._op, self._operation_arguments\n            )\n            backend_from = qc_from.get_backend()\n            if self_cost is not None:\n                self._add_cost_data(backend_from, self_cost)\n\n            for backend_to, agg_data_to in self._backend_data.items():\n                if backend_to == backend_from:\n                    continue\n                qc_cls_to = agg_data_to.qc_cls\n                cost = qc_from.move_to_cost(\n                    qc_cls_to,\n                    self._api_cls_name,\n                    self._op,\n                    self._operation_arguments,\n                )\n                if cost is not None:\n                    self._add_cost_data(backend_to, cost)\n                else:\n                    # We have some information asymmetry in query compilers,\n                    # qc_from does not know about qc_to types so we instead\n                    # ask the same question but of qc_to.\n                    cost = qc_cls_to.move_to_me_cost(\n                        qc_from,\n                        self._api_cls_name,\n                        self._op,\n                        self._operation_arguments,\n                    )\n                    if cost is not None:\n                        self._add_cost_data(backend_to, cost)\n\n        self._result_backend = None\n\n        def get_min_cost_backend(skip_exceeds_max_cost=True) -> str:\n            result = None\n            min_value = None\n            for k, v in self._backend_data.items():\n                if skip_exceeds_max_cost and v.cost > v.max_cost:\n                    continue\n                if min_value is None or min_value > v.cost:\n                    min_value = v.cost\n                    result = k\n            return result\n\n        # Get the best backend, skipping backends where we may exceed\n        # the total cost\n        self._result_backend = get_min_cost_backend(skip_exceeds_max_cost=True)\n\n        # If we still do not have a backend, pick the best backend while\n        # ignoring max_cost\n        if self._result_backend is None:\n            self._result_backend = get_min_cost_backend(skip_exceeds_max_cost=False)\n\n        # This should not happen\n        if self._result_backend is None:\n            raise ValueError(\"No backends are available to calculate costs.\")\n\n        if len(self._backend_data) > 1:\n            get_logger().info(\n                f\"BackendCostCalculator results for {'pd' if self._api_cls_name is None else self._api_cls_name}.{self._op}: {self._calc_result_log(self._result_backend)}\"\n            )\n            # Does not need to be secure, should not use system entropy\n            metrics_group = \"%04x\" % random.randrange(16**4)\n            for qc in self._qc_list:\n                max_shape = qc._max_shape()\n                backend = qc.get_backend()\n                emit_metric(\n                    f\"hybrid.merge.candidate.{backend}.group.{metrics_group}.rows\",\n                    max_shape[0],\n                )\n                emit_metric(\n                    f\"hybrid.merge.candidate.{backend}.group.{metrics_group}.cols\",\n                    max_shape[1],\n                )\n            for k, v in self._backend_data.items():\n                emit_metric(\n                    f\"hybrid.merge.candidate.{k}.group.{metrics_group}.cost\", v.cost\n                )\n            emit_metric(\n                f\"hybrid.merge.decision.{self._result_backend}.group.{metrics_group}\",\n                1,\n            )\n\n        return self._result_backend\n\n    def _add_cost_data(self, backend, cost):\n        \"\"\"\n        Add the cost data to the calculator.\n\n        Parameters\n        ----------\n        backend : str\n            String representing the backend for this engine.\n        cost : dict\n            Dictionary of query compiler classes to costs.\n        \"\"\"\n        # We can assume that if we call this method, backend\n        # exists in the backend_data map\n        QCCoercionCost.validate_coersion_cost(cost)\n        self._backend_data[backend].cost += cost\n\n    def _calc_result_log(self, selected_backend: str) -> str:\n        \"\"\"\n        Create a string summary of the backend costs.\n\n        The format is\n            [*|][backend name]:[cost]/[max_cost],...\n        where '*' indicates this was the selected backend\n        and [cost]/[max_cost] represents the aggregated\n        cost of moving to that backend over the maximum\n        cost allowed on that backend.\n\n        Parameters\n        ----------\n        selected_backend : str\n            String representing the backend selected by\n            the calculator.\n\n        Returns\n        -------\n        str\n            String representation of calculator state.\n        \"\"\"\n        return \", \".join(\n            f\"{'*'+k if k is selected_backend else k}:{v.cost}/{v.max_cost}\"\n            for k, v in self._backend_data.items()\n        )\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module represents the query compiler level for the pandas storage format.\"\"\"\n\nfrom .query_compiler import PandasQueryCompiler\n\n__all__ = [\"PandasQueryCompiler\"]\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/aggregations.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Contains implementations for aggregation functions.\"\"\"\n\nfrom __future__ import annotations\n\nfrom enum import Enum\nfrom typing import TYPE_CHECKING, Callable, Tuple\n\nimport numpy as np\nimport pandas\nfrom pandas.core.dtypes.common import is_numeric_dtype\n\nif TYPE_CHECKING:\n    from .query_compiler import PandasQueryCompiler\n\nfrom modin.utils import MODIN_UNNAMED_SERIES_LABEL\n\n\nclass CorrCovBuilder:\n    \"\"\"Responsible for building pandas query compiler's methods computing correlation and covariance matrices.\"\"\"\n\n    class Method(Enum):\n        \"\"\"Enum specifying what method to use (either CORR for correlation or COV for covariance).\"\"\"\n\n        CORR = 1\n        COV = 2\n\n    @classmethod\n    def build_corr_method(\n        cls,\n    ) -> Callable[[PandasQueryCompiler, str, int, bool], PandasQueryCompiler]:\n        \"\"\"\n        Build a query compiler method computing the correlation matrix.\n\n        Returns\n        -------\n        callable(qc: PandasQueryCompiler, method: str, min_periods: int, numeric_only: bool) -> PandasQueryCompiler\n            A callable matching the ``BaseQueryCompiler.corr`` signature and computing the correlation matrix.\n        \"\"\"\n\n        def corr_method(\n            qc: PandasQueryCompiler,\n            method: str,\n            min_periods: int = 1,\n            numeric_only: bool = True,\n        ) -> PandasQueryCompiler:\n            # Further implementation is designed for the default pandas backend (numpy)\n            if method != \"pearson\" or qc.get_pandas_backend() == \"pyarrow\":\n                return super(type(qc), qc).corr(\n                    method=method, min_periods=min_periods, numeric_only=numeric_only\n                )\n\n            if not numeric_only and qc.frame_has_materialized_columns:\n                new_index, new_columns = (\n                    qc._modin_frame.copy_columns_cache(),\n                    qc._modin_frame.copy_columns_cache(),\n                )\n                new_dtypes = pandas.Series(\n                    np.repeat(pandas.api.types.pandas_dtype(\"float\"), len(new_columns)),\n                    index=new_columns,\n                )\n            elif numeric_only and qc.frame_has_materialized_dtypes:\n                old_dtypes = qc.dtypes\n\n                new_columns = old_dtypes[old_dtypes.map(is_numeric_dtype)].index\n                new_index = new_columns.copy()\n                new_dtypes = pandas.Series(\n                    np.repeat(pandas.api.types.pandas_dtype(\"float\"), len(new_columns)),\n                    index=new_columns,\n                )\n            else:\n                new_index, new_columns, new_dtypes = None, None, None\n\n            map, reduce = cls._build_map_reduce_methods(\n                min_periods, method=cls.Method.CORR, numeric_only=numeric_only\n            )\n\n            reduced = qc._modin_frame.apply_full_axis(axis=1, func=map)\n            # The 'reduced' dataset has the shape either (num_cols, num_cols + 3) for a non-NaN case\n            # or (num_cols, num_cols * 4) for a NaN case, so it's acceptable to call `.combine_and_apply()`\n            # here as the number of cols is usually quite small\n            result = reduced.combine_and_apply(\n                func=reduce,\n                new_index=new_index,\n                new_columns=new_columns,\n                new_dtypes=new_dtypes,\n            )\n            return qc.__constructor__(result)\n\n        return corr_method\n\n    @classmethod\n    def build_cov_method(\n        cls,\n    ) -> Callable[[PandasQueryCompiler, int, int], PandasQueryCompiler]:\n        \"\"\"\n        Build a query compiler method computing the covariance matrix.\n\n        Returns\n        -------\n        callable(qc: PandasQueryCompiler, min_periods: int, ddof: int) -> PandasQueryCompiler\n            A callable matching the ``BaseQueryCompiler.cov`` signature and computing the covariance matrix.\n        \"\"\"\n        raise NotImplementedError(\"Computing covariance is not yet implemented.\")\n\n    @classmethod\n    def _build_map_reduce_methods(\n        cls, min_periods: int, method: Method, numeric_only: bool\n    ) -> Tuple[\n        Callable[[pandas.DataFrame], pandas.DataFrame],\n        Callable[[pandas.DataFrame], pandas.DataFrame],\n    ]:\n        \"\"\"\n        Build MapReduce kernels for the specified corr/cov method.\n\n        Parameters\n        ----------\n        min_periods : int\n            The parameter to pass to the reduce method.\n        method : CorrCovBuilder.Method\n            Whether the kernels compute correlation or covariance.\n        numeric_only : bool\n            Whether to only include numeric types.\n\n        Returns\n        -------\n        Tuple[Callable(pandas.DataFrame) -> pandas.DataFrame, Callable(pandas.DataFrame) -> pandas.DataFrame]\n            A tuple holding the Map (at the first position) and the Reduce (at the second position) kernels\n            computing correlation/covariance matrix.\n        \"\"\"\n        if method == cls.Method.COV:\n            raise NotImplementedError(\"Computing covariance is not yet implemented.\")\n\n        return lambda df: _CorrCovKernels.map(\n            df, numeric_only\n        ), lambda df: _CorrCovKernels.reduce(df, min_periods, method)\n\n\nclass _CorrCovKernels:\n    \"\"\"Holds kernel functions computing correlation/covariance matrices in a MapReduce manner.\"\"\"\n\n    @classmethod\n    def map(cls, df: pandas.DataFrame, numeric_only: bool) -> pandas.DataFrame:\n        \"\"\"\n        Perform the Map phase to compute the corr/cov matrix.\n\n        In this kernel we compute all the required components to compute\n        the correlation matrix at the reduce phase, the required components are:\n            1. Matrix holding sums of pairwise multiplications between all columns\n               defined as ``M[col1, col2] = sum(col1[i] * col2[i] for i in range(col_len))``\n            2. Sum for each column (special case if there are NaN values)\n            3. Sum of squares for each column (special case if there are NaN values)\n            4. Number of values in each column (special case if there are NaN values)\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            Partition to compute the aggregations for.\n        numeric_only : bool\n            Whether to only include numeric types.\n\n        Returns\n        -------\n        pandas.DataFrame\n            A MultiIndex columned DataFrame holding the described aggregation results for this\n            specifix partition under the following keys: ``[\"mul\", \"sum\", \"pow2_sum\", \"count\"]``\n        \"\"\"\n        if numeric_only:\n            df = df.select_dtypes(include=\"number\")\n        # It's more convenient to use a NumPy array here as it appears to perform\n        # much faster in for-loops which this kernel function has plenty of\n        raw_df = df.values.T\n        try:\n            nan_mask = np.isnan(raw_df)\n        except TypeError as e:\n            # Pandas raises ValueError on unsupported types, so casting\n            # the exception to a proper type\n            raise ValueError(\"Unsupported types with 'numeric_only=False'\") from e\n\n        has_nans = nan_mask.sum() != 0\n\n        if has_nans:\n            if not raw_df.flags.writeable:\n                # making a copy if the buffer is read-only\n                raw_df = raw_df.copy()\n            # Replacing all NaNs with zeros so we can use much\n            # faster `np.sum()` instead of slow `np.nansum()`\n            np.putmask(raw_df, nan_mask, values=0)\n\n        cols = df.columns\n        # Here we compute a sum of pairwise multiplications between all columns\n        # result:\n        #   col1: [sum(col1 * col2), sum(col1 * col3), ... sum(col1 * colN)]\n        #   col2: [sum(col2 * col3), sum(col2 * col4), ... sum(col2 * colN)]\n        #   ...\n        sum_of_pairwise_mul = pandas.DataFrame(\n            np.dot(raw_df, raw_df.T), index=cols, columns=cols, copy=False\n        )\n\n        if has_nans:\n            sums, sums_of_squares, count = cls._compute_nan_aggs(raw_df, cols, nan_mask)\n        else:\n            sums, sums_of_squares, count = cls._compute_non_nan_aggs(df)\n\n        aggregations = pandas.concat(\n            [sum_of_pairwise_mul, sums, sums_of_squares, count],\n            copy=False,\n            axis=1,\n            keys=[\"mul\", \"sum\", \"pow2_sum\", \"count\"],\n        )\n\n        return aggregations\n\n    @staticmethod\n    def _compute_non_nan_aggs(\n        df: pandas.DataFrame,\n    ) -> Tuple[pandas.Series, pandas.Series, pandas.Series]:\n        \"\"\"\n        Compute sums, sums of square and the number of observations for a partition assuming there are no NaN values in it.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            Partition to compute the aggregations for.\n\n        Returns\n        -------\n        Tuple[sums: pandas.Series, sums_of_squares: pandas.Series, count: pandas.Series]\n            A tuple storing Series where each of them holds the result for\n            one of the described aggregations.\n        \"\"\"\n        sums = df.sum().rename(MODIN_UNNAMED_SERIES_LABEL)\n        sums_of_squares = (df**2).sum().rename(MODIN_UNNAMED_SERIES_LABEL)\n        count = pandas.Series(\n            np.repeat(len(df), len(df.columns)), index=df.columns, copy=False\n        ).rename(MODIN_UNNAMED_SERIES_LABEL)\n        return sums, sums_of_squares, count\n\n    @staticmethod\n    def _compute_nan_aggs(\n        raw_df: np.ndarray, cols: pandas.Index, nan_mask: np.ndarray\n    ) -> Tuple[pandas.DataFrame, pandas.DataFrame, pandas.DataFrame]:\n        \"\"\"\n        Compute sums, sums of square and the number of observations for a partition assuming there are NaN values in it.\n\n        Parameters\n        ----------\n        raw_df : np.ndarray\n            Raw values of the partition to compute the aggregations for.\n        cols : pandas.Index\n            Columns of the partition.\n        nan_mask : np.ndarray[bool]\n            Boolean mask showing positions of NaN values in the `raw_df`.\n\n        Returns\n        -------\n        Tuple[sums: pandas.DataFrame, sums_of_squares: pandas.DataFrame, count: pandas.DataFrame]\n            A tuple storing DataFrames where each of them holds the result for\n            one of the described aggregations.\n        \"\"\"\n        # Unfortunately, in case of NaN values we forced to compute multiple sums/square sums/counts\n        # for each column because we have to exclude values at positions of NaN values in each other\n        # column individually.\n        # Imagine we have a dataframe like this:\n        #   col1: 1, 2  , 3  , 4\n        #   col2: 2, NaN, 3  , 4\n        #   col3: 4, 5  , NaN, 7\n        # In this case we would need to compute 2 different sums/square sums/count for 'col1':\n        #   - The first one excluding the values at the NaN possitions of 'col2' (1 + 3 + 4)\n        #   - And the second one excluding the values at the NaN positions of 'col3' (1 + 2 + 4)\n        # and then also do the same for the rest columns. At the end this should form a matrix\n        # of pairwise sums/square sums/counts:\n        #   sums[col1, col2] = sum(col1[i] for i in non_NA_indices_of_col2)\n        #   sums[col2, col1] = sum(col2[i] for i in non_NA_indices_of_col1)\n        #   ...\n        # Note that sums[col1, col2] != sums[col2, col1]\n        sums = {}\n        sums_of_squares = {}\n        count = {}\n\n        # TODO: is it possible to get rid of this for-loop somehow?\n        for i, col in enumerate(cols):\n            # Here we're taking each column, resizing it to the original frame's shape to compute\n            # aggregations for each other column and then excluding values at those positions where\n            # other columns had NaN values by setting zeros using the validity mask:\n            #  col1: 1, 2  , 3  , 4   df[i].resize()  col1: 1, 2, 3, 4  putmask()  col1: 1, 2, 3, 4\n            #  col2: 2, NaN, 3  , 4   ------------->  col1: 1, 2, 3, 4  -------->  col1: 1, 0, 3, 4\n            #  col3: 4, 5  , NaN, 7                   col1: 1, 2, 3, 4             col1: 1, 2, 0, 4\n            # Note that 'NaN' values in this diagram are just for the sake of visibility, in reality\n            # they were already replaced by zeroes at the beginning of the 'map' phase.\n            col_vals = np.resize(raw_df[i], raw_df.shape)\n            np.putmask(col_vals, nan_mask, values=0)\n\n            sums[col] = pandas.Series(np.sum(col_vals, axis=1), index=cols, copy=False)\n            sums_of_squares[col] = pandas.Series(\n                np.sum(col_vals**2, axis=1), index=cols, copy=False\n            )\n            count[col] = pandas.Series(\n                nan_mask.shape[1] - np.count_nonzero(nan_mask | nan_mask[i], axis=1),\n                index=cols,\n                copy=False,\n            )\n\n        sums = pandas.concat(sums, axis=1, copy=False)\n        sums_of_squares = pandas.concat(sums_of_squares, axis=1, copy=False)\n        count = pandas.concat(count, axis=1, copy=False)\n\n        return sums, sums_of_squares, count\n\n    @classmethod\n    def reduce(\n        cls, df: pandas.DataFrame, min_periods: int, method: CorrCovBuilder.Method\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Perform the Reduce phase to compute the corr/cov matrix.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            A dataframe holding aggregations computed for each partition\n            concatenated along the rows axis.\n        min_periods : int\n            Minimum number of observations required per pair of columns to have a valid result.\n        method : CorrCovBuilder.Method\n            Whether to build a correlation or a covariance matrix.\n\n        Returns\n        -------\n        pandas.DataFrame\n            Either correlation or covariance matrix.\n        \"\"\"\n        if method == CorrCovBuilder.Method.COV:\n            raise NotImplementedError(\"Computing covariance is not yet implemented.\")\n        # The `df` here accumulates the aggregation results retrieved from each row partition\n        # and combined together along the rows axis, so the `df` looks something like this:\n        #   mul  sums  pow2_sums\n        # a .    .     .\n        # b .    .     .            <--- part1 result\n        # c .    .     .\n        # ---------------------------\n        # a .    .     .\n        # b .    .     .            <--- part2 result\n        # c .    .     .\n        # ---------------------------\n        # ...\n        # So to get the total result we have to group on the index and sum the values\n        total_agg = df.groupby(level=0).sum()\n        total_agg = cls._maybe_combine_nan_and_non_nan_aggs(total_agg)\n\n        sum_of_pairwise_mul = total_agg[\"mul\"]\n        sums = total_agg[\"sum\"]\n        sums_of_squares = total_agg[\"pow2_sum\"]\n        count = total_agg[\"count\"]\n\n        cols = sum_of_pairwise_mul.columns\n        # If there are NaNs in the original dataframe, then we have computed a matrix\n        # of sums/square sums/counts at the Map phase, meaning that we now have multiple\n        # columns in `sums`.\n        has_nans = len(sums.columns) > 1\n        if not has_nans:\n            # 'count' is the same for all columns in a non-NaN case, so converting\n            # it to scalar for faster binary operations\n            count = count.iloc[0, 0]\n            if count < min_periods:\n                # Fast-path for too small data\n                return pandas.DataFrame(index=cols, columns=cols, dtype=\"float\")\n\n            # Converting frame to a Series for more convenient handling\n            sums = sums.squeeze(axis=1)\n            sums_of_squares = sums_of_squares.squeeze(axis=1)\n\n        means = sums / count\n        std = np.sqrt(sums_of_squares - 2 * means * sums + count * (means**2))\n\n        # The 'is_nans' condition was moved out of the loop, so the loops themselves\n        # work faster as not being slowed by extra conditions in them\n        if has_nans:\n            return cls._build_corr_table_nan(\n                sum_of_pairwise_mul, means, sums, count, std, cols, min_periods\n            )\n        else:\n            # We've already processed the 'min_periods' parameter for a non-na case above,\n            # so don't need to pass it here\n            return cls._build_corr_table_non_nan(\n                sum_of_pairwise_mul, means, sums, count, std, cols\n            )\n\n    @staticmethod\n    def _maybe_combine_nan_and_non_nan_aggs(\n        total_agg: pandas.DataFrame,\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Pair the aggregation results of partitions having and not having NaN values if needed.\n\n        Parameters\n        ----------\n        total_agg : pandas.DataFrame\n            A dataframe holding aggregations computed for each partition\n            concatenated along the rows axis.\n\n        Returns\n        -------\n        pandas.DataFrame\n            DataFrame with aligned results.\n        \"\"\"\n        # Here we try to align the results between partitions that had and didn't have NaNs.\n        # At the result of the Map phase, partitions with and without NaNs would produce\n        # different results:\n        #   - Partitions with NaNs produce a matrix of pairwise sums/square sums/counts\n        #   - And parts without NaNs produce regular one-column sums/square sums/counts\n        #\n        # As the result, `total_agg` will be something like this:\n        #    mul  | sum   pow2_sum  count | sum          pow2_sum     count\n        #    a  b | a  b  a  b      a  b  | __reduced__  __reduced__  __reduced__\n        # a  .  . | .  .  .  .      .  .  | .            .            .\n        # b  .  . | .  .  .  .      .  .  | .            .            .\n        # --------|-----------------------|----------------------------------------\n        #           ^-- these are results   ^-- and these are results for\n        #           for partitions that     partitions that didn't have NaNs\n        #           had NaNs\n        # So, to get an actual total result of these aggregations, we have to additionally\n        # sum the results from non-NaN and NaN partitions.\n        #\n        # Here we sample the 'sum' columns to check whether we had mixed NaNs and\n        # non-NaNs partitions, if it's not the case we can skip the described step:\n        nsums = total_agg.columns.get_locs([\"sum\"])\n        if not (\n            len(nsums) > 1 and (\"sum\", MODIN_UNNAMED_SERIES_LABEL) in total_agg.columns\n        ):\n            return total_agg\n\n        cols = total_agg.columns\n\n        # Finding column positions for aggregational columns\n        all_agg_idxs = np.where(\n            cols.get_loc(\"sum\") | cols.get_loc(\"pow2_sum\") | cols.get_loc(\"count\")\n        )[0]\n        # Finding column positions for aggregational columns that store\n        # results of non-NaN partitions\n        non_na_agg_idxs = cols.get_indexer_for(\n            pandas.Index(\n                [\n                    (\"sum\", MODIN_UNNAMED_SERIES_LABEL),\n                    (\"pow2_sum\", MODIN_UNNAMED_SERIES_LABEL),\n                    (\"count\", MODIN_UNNAMED_SERIES_LABEL),\n                ]\n            )\n        )\n        # Finding column positions for aggregational columns that store\n        # results of NaN partitions by deducting non-NaN indices from all indices\n        na_agg_idxs = np.setdiff1d(all_agg_idxs, non_na_agg_idxs, assume_unique=True)\n\n        # Using `.values` here so we can ignore the indices (it's really hard\n        # to arrange them for pandas to properly perform the summation)\n        parts_with_nans = total_agg.values[:, na_agg_idxs]\n        parts_without_nans = (\n            total_agg.values[:, non_na_agg_idxs]\n            # Before doing the summation we have to align the shapes\n            # Imagine that we have 'parts_with_nans' like:\n            #    sum   pow2_sum  count\n            #    a  b  a  b      a  b\n            # a  1  2  3  4      5  6\n            # b  1  2  3  4      5  6\n            #\n            # And the 'parts_without_nans' like:\n            #    sum  pow2_sum  count\n            # a  1    3         5\n            # b  2    4         6\n            #\n            # Here we want to sum them in an order so the digit matches (1 + 1), (2 + 2), ...\n            # For that we first have to repeat the values in 'parts_without_nans':\n            #  parts_without_nans.repeat(parts_with_nans.shape[0]):\n            #    sum  pow2_sum  count\n            # a  1    3         5\n            # b  1    3         5\n            # a  2    4         6\n            # b  2    4         6\n            #\n            # And then reshape it using the \"Fortran\" order:\n            #  parts_without_nans.reshape(parts_with_nans.shape, order=\"F\"):\n            #    sum   pow2_sum  count\n            #    a  b  a  b      a  b\n            # a  1  2  3  4      5  6\n            # b  1  2  3  4      5  6\n            # After that the shapes & orders are aligned and we can perform the summation\n            .repeat(repeats=len(parts_with_nans), axis=0).reshape(\n                parts_with_nans.shape, order=\"F\"\n            )\n        )\n        replace_values = parts_with_nans + parts_without_nans\n\n        if not total_agg.values.flags.writeable:\n            # making a copy if the buffer is read-only as\n            # we will need to modify `total_agg` inplace\n            total_agg = total_agg.copy()\n        total_agg.values[:, na_agg_idxs] = replace_values\n\n        return total_agg\n\n    @staticmethod\n    def _build_corr_table_nan(\n        sum_of_pairwise_mul: pandas.DataFrame,\n        means: pandas.DataFrame,\n        sums: pandas.DataFrame,\n        count: pandas.DataFrame,\n        std: pandas.DataFrame,\n        cols: pandas.Index,\n        min_periods: int,\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Build correlation matrix for a DataFrame that had NaN values in it.\n\n        Parameters\n        ----------\n        sum_of_pairwise_mul : pandas.DataFrame\n        means : pandas.DataFrame\n        sums : pandas.DataFrame\n        count : pandas.DataFrame\n        std : pandas.DataFrame\n        cols : pandas.Index\n        min_periods : int\n\n        Returns\n        -------\n        pandas.DataFrame\n            Correlation matrix.\n        \"\"\"\n        res = pandas.DataFrame(index=cols, columns=cols, dtype=\"float\")\n        nan_mask = count < min_periods\n\n        for col in cols:\n            top = (\n                sum_of_pairwise_mul.loc[col]\n                - sums.loc[col] * means[col]\n                - means.loc[col] * sums[col]\n                + count.loc[col] * means.loc[col] * means[col]\n            )\n            down = std.loc[col] * std[col]\n            res.loc[col, :] = top / down\n\n        res[nan_mask] = np.nan\n\n        return res\n\n    @staticmethod\n    def _build_corr_table_non_nan(\n        sum_of_pairwise_mul: pandas.DataFrame,\n        means: pandas.Series,\n        sums: pandas.Series,\n        count: int,\n        std: pandas.Series,\n        cols: pandas.Index,\n    ) -> pandas.DataFrame:\n        \"\"\"\n        Build correlation matrix for a DataFrame that didn't have NaN values in it.\n\n        Parameters\n        ----------\n        sum_of_pairwise_mul : pandas.DataFrame\n        means : pandas.Series\n        sums : pandas.Series\n        count : int\n        std : pandas.Series\n        cols : pandas.Index\n\n        Returns\n        -------\n        pandas.DataFrame\n            Correlation matrix.\n        \"\"\"\n        res = pandas.DataFrame(index=cols, columns=cols, dtype=\"float\")\n\n        for col in cols:\n            top = (\n                sum_of_pairwise_mul.loc[col]\n                - sums.loc[col] * means\n                - means.loc[col] * sums\n                + count * means.loc[col] * means\n            )\n            down = std.loc[col] * std\n            res.loc[col, :] = top / down\n\n        return res\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/groupby.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Contains implementations for GroupbyReduce functions.\"\"\"\n\nimport numpy as np\nimport pandas\nfrom pandas.core.dtypes.cast import find_common_type\n\nfrom modin.config import RangePartitioning\nfrom modin.core.dataframe.algebra import GroupByReduce\nfrom modin.error_message import ErrorMessage\nfrom modin.utils import hashable\n\n\nclass GroupbyReduceImpl:\n    \"\"\"Provide TreeReduce implementations for certain groupby aggregations.\"\"\"\n\n    @classmethod\n    def get_impl(cls, agg_name):\n        \"\"\"\n        Get TreeReduce implementations for the specified `agg_name`.\n\n        Parameters\n        ----------\n        agg_name : hashable\n\n        Returns\n        -------\n        (map_fn: Union[callable, str], reduce_fn: Union[callable, str], default2pandas_fn: callable)\n        \"\"\"\n        try:\n            return cls._groupby_reduce_impls[agg_name]\n        except KeyError:\n            raise KeyError(f\"Have no implementation for {agg_name}.\")\n\n    @classmethod\n    def has_impl_for(cls, agg_func):\n        \"\"\"\n        Check whether the class has TreeReduce implementation for the specified `agg_func`.\n\n        Parameters\n        ----------\n        agg_func : hashable or dict\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        if hashable(agg_func):\n            return agg_func in cls._groupby_reduce_impls\n        if not isinstance(agg_func, dict):\n            return False\n\n        # We have to keep this import away from the module level to avoid circular import\n        from modin.pandas.utils import walk_aggregation_dict\n\n        for _, func, _, _ in walk_aggregation_dict(agg_func):\n            if func not in cls._groupby_reduce_impls:\n                return False\n\n        return True\n\n    @classmethod\n    def build_qc_method(cls, agg_name, finalizer_fn=None):\n        \"\"\"\n        Build a TreeReduce implemented query compiler method for the specified groupby aggregation.\n\n        Parameters\n        ----------\n        agg_name : hashable\n        finalizer_fn : callable(pandas.DataFrame) -> pandas.DataFrame, default: None\n            A callable to execute at the end a groupby kernel against groupby result.\n\n        Returns\n        -------\n        callable\n            Function that takes query compiler and executes GroupBy aggregation\n            with TreeReduce algorithm.\n        \"\"\"\n        map_fn, reduce_fn, d2p_fn = cls.get_impl(agg_name)\n        map_reduce_method = GroupByReduce.register(\n            map_fn, reduce_fn, default_to_pandas_func=d2p_fn, finalizer_fn=finalizer_fn\n        )\n\n        def method(query_compiler, *args, **kwargs):\n            if RangePartitioning.get():\n                try:\n                    if finalizer_fn is not None:\n                        raise NotImplementedError(\n                            \"Range-partitioning groupby is not implemented yet when a finalizing function is specified.\"\n                        )\n                    return query_compiler._groupby_shuffle(\n                        *args, agg_func=agg_name, **kwargs\n                    )\n                except NotImplementedError as e:\n                    ErrorMessage.warn(\n                        f\"Can't use range-partitioning groupby implementation because of: {e}\"\n                        + \"\\nFalling back to a TreeReduce implementation.\"\n                    )\n            return map_reduce_method(query_compiler, *args, **kwargs)\n\n        return method\n\n    @staticmethod\n    def _build_skew_impl():\n        \"\"\"\n        Build TreeReduce implementation for 'skew' groupby aggregation.\n\n        Returns\n        -------\n        (map_fn: callable, reduce_fn: callable, default2pandas_fn: callable)\n        \"\"\"\n\n        def skew_map(dfgb, *args, **kwargs):\n            if dfgb._selection is not None:\n                data_to_agg = dfgb._selected_obj\n            else:\n                cols_to_agg = dfgb.obj.columns.difference(dfgb.exclusions)\n                data_to_agg = dfgb.obj[cols_to_agg]\n\n            df_pow2 = data_to_agg**2\n            df_pow3 = data_to_agg**3\n\n            return pandas.concat(\n                [\n                    dfgb.count(*args, **kwargs),\n                    dfgb.sum(*args, **kwargs),\n                    df_pow2.groupby(dfgb.grouper).sum(*args, **kwargs),\n                    df_pow3.groupby(dfgb.grouper).sum(*args, **kwargs),\n                ],\n                copy=False,\n                axis=1,\n                keys=[\"count\", \"sum\", \"pow2_sum\", \"pow3_sum\"],\n                names=[GroupByReduce.ID_LEVEL_NAME],\n            )\n\n        def skew_reduce(dfgb, *args, **kwargs):\n            df = dfgb.sum(*args, **kwargs)\n            if df.empty:\n                return df.droplevel(GroupByReduce.ID_LEVEL_NAME, axis=1)\n\n            count = df[\"count\"]\n            s = df[\"sum\"]\n            s2 = df[\"pow2_sum\"]\n            s3 = df[\"pow3_sum\"]\n\n            # mean = sum(x) / count\n            m = s / count\n\n            # m2 = sum( (x - m)^ 2) = sum(x^2 - 2*x*m + m^2)\n            m2 = s2 - 2 * m * s + count * (m**2)\n\n            # m3 = sum( (x - m)^ 3) = sum(x^3 - 3*x^2*m + 3*x*m^2 - m^3)\n            m3 = s3 - 3 * m * s2 + 3 * s * (m**2) - count * (m**3)\n\n            # The equation for the 'skew' was taken directly from pandas:\n            # https://github.com/pandas-dev/pandas/blob/8dab54d6573f7186ff0c3b6364d5e4dd635ff3e7/pandas/core/nanops.py#L1226\n            with np.errstate(invalid=\"ignore\", divide=\"ignore\"):\n                skew_res = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)\n\n            # Setting dummy values for invalid results in accordance with pandas\n            skew_res[m2 == 0] = 0\n            skew_res[count < 3] = np.nan\n            return skew_res\n\n        GroupByReduce.register_implementation(skew_map, skew_reduce)\n        return (\n            skew_map,\n            skew_reduce,\n            lambda grp, *args, **kwargs: grp.skew(*args, **kwargs),\n        )\n\n    @staticmethod\n    def _build_mean_impl():\n        \"\"\"\n        Build TreeReduce implementation for 'mean' groupby aggregation.\n\n        Returns\n        -------\n        (map_fn: callable, reduce_fn: callable, default2pandas_fn: callable)\n        \"\"\"\n\n        def mean_map(dfgb, **kwargs):\n            return pandas.concat(\n                [dfgb.sum(**kwargs), dfgb.count()],\n                axis=1,\n                copy=False,\n                keys=[\"sum\", \"count\"],\n                names=[GroupByReduce.ID_LEVEL_NAME],\n            )\n\n        def mean_reduce(dfgb, **kwargs):\n            \"\"\"\n            Compute mean value in each group using sums/counts values within reduce phase.\n\n            Parameters\n            ----------\n            dfgb : pandas.DataFrameGroupBy\n                GroupBy object for column-partition.\n            **kwargs : dict\n                Additional keyword parameters to be passed in ``pandas.DataFrameGroupBy.sum``.\n\n            Returns\n            -------\n            pandas.DataFrame\n                A pandas Dataframe with mean values in each column of each group.\n            \"\"\"\n            sums_counts_df = dfgb.sum(**kwargs)\n            if sums_counts_df.empty:\n                return sums_counts_df.droplevel(GroupByReduce.ID_LEVEL_NAME, axis=1)\n\n            sum_df = sums_counts_df[\"sum\"]\n            count_df = sums_counts_df[\"count\"]\n\n            return sum_df / count_df\n\n        GroupByReduce.register_implementation(mean_map, mean_reduce)\n\n        return (\n            mean_map,\n            mean_reduce,\n            lambda grp, *args, **kwargs: grp.mean(*args, **kwargs),\n        )\n\n\nGroupbyReduceImpl._groupby_reduce_impls = {\n    \"all\": (\"all\", \"all\", lambda grp, *args, **kwargs: grp.all(*args, **kwargs)),\n    \"any\": (\"any\", \"any\", lambda grp, *args, **kwargs: grp.any(*args, **kwargs)),\n    \"count\": (\"count\", \"sum\", lambda grp, *args, **kwargs: grp.count(*args, **kwargs)),\n    \"max\": (\"max\", \"max\", lambda grp, *args, **kwargs: grp.max(*args, **kwargs)),\n    \"mean\": GroupbyReduceImpl._build_mean_impl(),\n    \"min\": (\"min\", \"min\", lambda grp, *args, **kwargs: grp.min(*args, **kwargs)),\n    \"prod\": (\"prod\", \"prod\", lambda grp, *args, **kwargs: grp.prod(*args, **kwargs)),\n    \"size\": (\"size\", \"sum\", lambda grp, *args, **kwargs: grp.size(*args, **kwargs)),\n    \"skew\": GroupbyReduceImpl._build_skew_impl(),\n    \"sum\": (\"sum\", \"sum\", lambda grp, *args, **kwargs: grp.sum(*args, **kwargs)),\n}\n\n\nclass PivotTableImpl:\n    \"\"\"Provide MapReduce, Range-Partitioning and Full-Column implementations for 'pivot_table()'.\"\"\"\n\n    @classmethod\n    def map_reduce_impl(\n        cls, qc, unique_keys, drop_column_level, pivot_kwargs\n    ):  # noqa: PR01\n        \"\"\"Compute 'pivot_table()' using MapReduce implementation.\"\"\"\n        if pivot_kwargs[\"margins\"]:\n            raise NotImplementedError(\n                \"MapReduce 'pivot_table' implementation doesn't support 'margins=True' parameter\"\n            )\n\n        index, columns, values = (\n            pivot_kwargs[\"index\"],\n            pivot_kwargs[\"columns\"],\n            pivot_kwargs[\"values\"],\n        )\n        aggfunc = pivot_kwargs[\"aggfunc\"]\n\n        if not GroupbyReduceImpl.has_impl_for(aggfunc):\n            raise NotImplementedError(\n                \"MapReduce 'pivot_table' implementation only supports 'aggfuncs' that are implemented in 'GroupbyReduceImpl'\"\n            )\n\n        if len(set(index).intersection(columns)) > 0:\n            raise NotImplementedError(\n                \"MapReduce 'pivot_table' implementation doesn't support intersections of 'index' and 'columns'\"\n            )\n\n        to_group, keys_columns = cls._separate_data_from_grouper(\n            qc, values, unique_keys\n        )\n        to_unstack = columns if index else None\n\n        result = GroupbyReduceImpl.build_qc_method(\n            aggfunc,\n            finalizer_fn=lambda df: cls._pivot_table_from_groupby(\n                df,\n                pivot_kwargs[\"dropna\"],\n                drop_column_level,\n                to_unstack,\n                pivot_kwargs[\"fill_value\"],\n            ),\n        )(\n            to_group,\n            by=keys_columns,\n            axis=0,\n            groupby_kwargs={\n                \"observed\": pivot_kwargs[\"observed\"],\n                \"sort\": pivot_kwargs[\"sort\"],\n            },\n            agg_args=(),\n            agg_kwargs={},\n            drop=True,\n        )\n\n        if to_unstack is None:\n            result = result.transpose()\n        return result\n\n    @classmethod\n    def full_axis_impl(\n        cls, qc, unique_keys, drop_column_level, pivot_kwargs\n    ):  # noqa: PR01\n        \"\"\"Compute 'pivot_table()' using full-column-axis implementation.\"\"\"\n        index, columns, values = (\n            pivot_kwargs[\"index\"],\n            pivot_kwargs[\"columns\"],\n            pivot_kwargs[\"values\"],\n        )\n\n        to_group, keys_columns = cls._separate_data_from_grouper(\n            qc, values, unique_keys\n        )\n\n        def applyier(df, other):  # pragma: no cover\n            \"\"\"\n            Build pivot table for a single partition.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                Partition of the self frame.\n            other : pandas.DataFrame\n                Broadcasted partition that contains `value` columns\n                of the self frame.\n\n            Returns\n            -------\n            pandas.DataFrame\n                Pivot table for this particular partition.\n            \"\"\"\n            concated = pandas.concat([df, other], axis=1, copy=False)\n            # to reduce peak memory consumption\n            del df, other\n            result = pandas.pivot_table(\n                concated,\n                **pivot_kwargs,\n            )\n            # to reduce peak memory consumption\n            del concated\n            # if only one value is specified, removing level that maps\n            # columns from `values` to the actual values\n            if drop_column_level is not None:\n                result = result.droplevel(drop_column_level, axis=1)\n\n            # in that case Pandas transposes the result of `pivot_table`,\n            # transposing it back to be consistent with column axis values along\n            # different partitions\n            if len(index) == 0 and len(columns) > 0:\n                common_type = find_common_type(result.dtypes.tolist())\n                # TODO: remove find_common_type+astype after pandas fix the following issue\n                # transpose loses dtypes: https://github.com/pandas-dev/pandas/issues/43337\n                result = result.transpose().astype(common_type, copy=False)\n\n            return result\n\n        result = qc.__constructor__(\n            to_group._modin_frame.broadcast_apply_full_axis(\n                axis=0, func=applyier, other=keys_columns._modin_frame\n            )\n        )\n\n        # transposing the result again, to be consistent with Pandas result\n        if len(index) == 0 and len(columns) > 0:\n            result = result.transpose()\n\n        return result\n\n    @classmethod\n    def range_partition_impl(\n        cls, qc, unique_keys, drop_column_level, pivot_kwargs\n    ):  # noqa: PR01\n        \"\"\"Compute 'pivot_table()' using Range-Partitioning implementation.\"\"\"\n        if pivot_kwargs[\"margins\"]:\n            raise NotImplementedError(\n                \"Range-partitioning 'pivot_table' implementation doesn't support 'margins=True' parameter\"\n            )\n\n        index, columns, values = (\n            pivot_kwargs[\"index\"],\n            pivot_kwargs[\"columns\"],\n            pivot_kwargs[\"values\"],\n        )\n\n        if len(set(index).intersection(columns)) > 0:\n            raise NotImplementedError(\n                \"Range-partitioning 'pivot_table' implementation doesn't support intersections of 'index' and 'columns'\"\n            )\n\n        if values is not None:\n            to_take = list(np.unique(list(index) + list(columns) + list(values)))\n            qc = qc.getitem_column_array(to_take, ignore_order=True)\n\n        to_unstack = columns if index else None\n\n        groupby_result = qc._groupby_shuffle(\n            by=list(unique_keys),\n            agg_func=pivot_kwargs[\"aggfunc\"],\n            axis=0,\n            groupby_kwargs={\n                \"observed\": pivot_kwargs[\"observed\"],\n                \"sort\": pivot_kwargs[\"sort\"],\n            },\n            agg_args=(),\n            agg_kwargs={},\n            drop=True,\n        )\n\n        # the length of 'groupby_result' is typically really small here,\n        # so it's okay to call full-column function\n        result = groupby_result._modin_frame.apply_full_axis(\n            axis=0,\n            func=lambda df: cls._pivot_table_from_groupby(\n                df,\n                pivot_kwargs[\"dropna\"],\n                drop_column_level,\n                to_unstack,\n                pivot_kwargs[\"fill_value\"],\n                # FIXME: Range-partitioning impl has a problem with the resulting order in case of multiple grouping keys,\n                # so passing 'sort=True' explicitly in this case\n                # https://github.com/modin-project/modin/issues/6875\n                sort=pivot_kwargs[\"sort\"] if len(unique_keys) > 1 else False,\n            ),\n        )\n\n        if to_unstack is None:\n            result = result.transpose()\n\n        return qc.__constructor__(result)\n\n    @staticmethod\n    def _pivot_table_from_groupby(\n        df, dropna, drop_column_level, to_unstack, fill_value, sort=False\n    ):\n        \"\"\"\n        Convert group by aggregation result to a pivot table.\n\n        Parameters\n        ----------\n        df : pandas.DataFrame\n            Group by aggregation result.\n        dropna : bool\n            Whether to drop NaN columns.\n        drop_column_level : int or None\n            An extra columns level to drop.\n        to_unstack : list of labels or None\n            Group by keys to pass to ``.unstack()``. Reperent `columns` parameter\n            for ``.pivot_table()``.\n        fill_value : bool\n            Fill value for NaN values.\n        sort : bool, default: False\n            Whether to sort the result along index.\n\n        Returns\n        -------\n        pandas.DataFrame\n        \"\"\"\n        if df.index.nlevels > 1 and to_unstack is not None:\n            df = df.unstack(level=to_unstack)\n        if drop_column_level is not None:\n            df = df.droplevel(drop_column_level, axis=1)\n        if dropna:\n            df = df.dropna(axis=1, how=\"all\")\n        if fill_value is not None:\n            df = df.fillna(fill_value, downcast=\"infer\")\n        if sort:\n            df = df.sort_index(axis=0)\n        return df\n\n    @staticmethod\n    def _separate_data_from_grouper(qc, values, unique_keys):\n        \"\"\"\n        Split `qc` for key columns to group by and values to aggregate.\n\n        Parameters\n        ----------\n        qc : PandasQueryCompiler\n        values : list of labels or None\n            List of columns to aggregate. ``None`` means all columns except 'unique_keys'.\n        unique_keys : list of labels\n            List of key columns to group by.\n\n        Returns\n        -------\n        to_aggregate : PandasQueryCompiler\n        keys_to_group : PandasQueryCompiler\n        \"\"\"\n        if values is None:\n            to_aggregate = qc.drop(columns=unique_keys)\n        else:\n            to_aggregate = qc.getitem_column_array(np.unique(values), ignore_order=True)\n\n        keys_to_group = qc.getitem_column_array(unique_keys, ignore_order=True)\n\n        return to_aggregate, keys_to_group\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/merge.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Contains implementations for Merge/Join.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Optional\n\nimport pandas\nfrom pandas.core.dtypes.common import is_list_like\nfrom pandas.errors import MergeError\n\nfrom modin.config import MinRowPartitionSize, NPartitions\nfrom modin.core.dataframe.base.dataframe.utils import join_columns\nfrom modin.core.dataframe.pandas.metadata import ModinDtypes\n\nfrom .utils import merge_partitioning\n\nif TYPE_CHECKING:\n    from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\n# TODO: add methods for 'join' here\nclass MergeImpl:\n    \"\"\"Provide implementations for merge/join.\"\"\"\n\n    @classmethod\n    def range_partitioning_merge(cls, left, right, kwargs):\n        \"\"\"\n        Execute merge using range-partitioning implementation.\n\n        Parameters\n        ----------\n        left : PandasQueryCompiler\n        right : PandasQueryCompiler\n        kwargs : dict\n            Keyword arguments for ``pandas.merge()`` function.\n\n        Returns\n        -------\n        PandasQueryCompiler\n        \"\"\"\n        if (\n            kwargs.get(\"left_index\", False)\n            or kwargs.get(\"right_index\", False)\n            or kwargs.get(\"left_on\", None) is not None\n            or kwargs.get(\"left_on\", None) is not None\n            or kwargs.get(\"how\", \"left\") not in (\"left\", \"inner\")\n        ):\n            raise NotImplementedError(\n                f\"The passed parameters are not yet supported by range-partitioning merge: {kwargs=}\"\n            )\n\n        on = kwargs.get(\"on\", None)\n        if on is not None and not isinstance(on, list):\n            on = [on]\n        if on is None or len(on) > 1:\n            raise NotImplementedError(\n                f\"Merging on multiple columns is not yet supported by range-partitioning merge: {on=}\"\n            )\n\n        if any(col not in left.columns or col not in right.columns for col in on):\n            raise NotImplementedError(\n                \"Merging on an index level is not yet supported by range-partitioning merge.\"\n            )\n\n        def func(left, right):\n            return left.merge(right, **kwargs)\n\n        new_columns, new_dtypes = cls._compute_result_metadata(\n            left,\n            right,\n            on,\n            left_on=None,\n            right_on=None,\n            suffixes=kwargs.get(\"suffixes\", (\"_x\", \"_y\")),\n        )\n\n        return left.__constructor__(\n            left._modin_frame._apply_func_to_range_partitioning_broadcast(\n                right._modin_frame,\n                func=func,\n                key=on,\n                new_columns=new_columns,\n                new_dtypes=new_dtypes,\n            )\n            # pandas resets the index of the result unless we were merging on an index level,\n            # the current implementation only supports merging on column names, so dropping\n            # the index unconditionally\n        ).reset_index(drop=True)\n\n    @classmethod\n    def row_axis_merge(\n        cls, left: PandasQueryCompiler, right: PandasQueryCompiler, kwargs: dict\n    ) -> PandasQueryCompiler:\n        \"\"\"\n        Execute merge using row-axis implementation.\n\n        Parameters\n        ----------\n        left : PandasQueryCompiler\n        right : PandasQueryCompiler\n        kwargs : dict\n            Keyword arguments for ``pandas.merge()`` function.\n\n        Returns\n        -------\n        PandasQueryCompiler\n        \"\"\"\n        how = kwargs.get(\"how\", \"inner\")\n        on = kwargs.get(\"on\", None)\n        left_on = kwargs.get(\"left_on\", None)\n        right_on = kwargs.get(\"right_on\", None)\n        left_index = kwargs.get(\"left_index\", False)\n        right_index = kwargs.get(\"right_index\", False)\n        sort = kwargs.get(\"sort\", False)\n\n        if (\n            (\n                how in [\"left\", \"inner\"]\n                or (how == \"right\" and right._modin_frame._partitions.size != 0)\n            )\n            and left_index is False\n            and right_index is False\n        ):\n            kwargs[\"sort\"] = False\n\n            reverted = False\n            if how == \"right\":\n                left, right = right, left\n                reverted = True\n\n            def should_keep_index(\n                left: PandasQueryCompiler,\n                right: PandasQueryCompiler,\n            ) -> bool:\n                keep_index = False\n                if left_on is not None and right_on is not None:\n                    keep_index = any(\n                        o in left.index.names\n                        and o in right_on\n                        and o in right.index.names\n                        for o in left_on\n                    )\n                elif on is not None:\n                    keep_index = any(\n                        o in left.index.names and o in right.index.names for o in on\n                    )\n                return keep_index\n\n            def map_func(\n                left, right, kwargs=kwargs\n            ) -> pandas.DataFrame:  # pragma: no cover\n                if reverted:\n                    df = pandas.merge(right, left, **kwargs)\n                else:\n                    df = pandas.merge(left, right, **kwargs)\n                return df\n\n            # Want to ensure that these are python lists\n            if left_on is not None and right_on is not None:\n                left_on = list(left_on) if is_list_like(left_on) else [left_on]\n                right_on = list(right_on) if is_list_like(right_on) else [right_on]\n            elif on is not None:\n                on = list(on) if is_list_like(on) else [on]\n\n            right_to_broadcast = right._modin_frame.combine()\n            new_columns, new_dtypes = cls._compute_result_metadata(\n                *((left, right) if not reverted else (right, left)),\n                on,\n                left_on,\n                right_on,\n                kwargs.get(\"suffixes\", (\"_x\", \"_y\")),\n            )\n\n            # We rebalance when the ratio of the number of existing partitions to\n            # the ideal number of partitions is smaller than this threshold. The\n            # threshold is a heuristic that may need to be tuned for performance.\n            if (\n                left._modin_frame._partitions.shape[0] < 0.3 * NPartitions.get()\n                # to avoid empty partitions after repartition; can materialize index\n                and len(left._modin_frame)\n                > NPartitions.get() * MinRowPartitionSize.get()\n            ):\n                left = left.repartition(axis=0)\n\n            new_left = left.__constructor__(\n                left._modin_frame.broadcast_apply_full_axis(\n                    axis=1,\n                    func=map_func,\n                    other=right_to_broadcast,\n                    # We're going to explicitly change the shape across the 1-axis,\n                    # so we want for partitioning to adapt as well\n                    keep_partitioning=False,\n                    num_splits=merge_partitioning(\n                        left._modin_frame, right._modin_frame, axis=1\n                    ),\n                    new_columns=new_columns,\n                    sync_labels=False,\n                    dtypes=new_dtypes,\n                )\n            )\n\n            # Here we want to understand whether we're joining on a column or on an index level.\n            # It's cool if indexes are already materialized so we can easily check that, if not\n            # it's fine too, we can also decide that by columns, which tend to be already\n            # materialized quite often compared to the indexes.\n            keep_index = False\n            if left.frame_has_materialized_index:\n                keep_index = should_keep_index(left, right)\n            else:\n                # Have to trigger columns materialization. Hope they're already available at this point.\n                if left_on is not None and right_on is not None:\n                    keep_index = any(\n                        o not in right.columns\n                        and o in left_on\n                        and o not in left.columns\n                        for o in right_on\n                    )\n                elif on is not None:\n                    keep_index = any(\n                        o not in right.columns and o not in left.columns for o in on\n                    )\n\n            if sort:\n                if left_on is not None and right_on is not None:\n                    new_left = (\n                        new_left.sort_index(axis=0, level=left_on + right_on)\n                        if keep_index\n                        else new_left.sort_rows_by_column_values(left_on + right_on)\n                    )\n                elif on is not None:\n                    new_left = (\n                        new_left.sort_index(axis=0, level=on)\n                        if keep_index\n                        else new_left.sort_rows_by_column_values(on)\n                    )\n\n            return new_left if keep_index else new_left.reset_index(drop=True)\n        else:\n            return left.default_to_pandas(pandas.DataFrame.merge, right, **kwargs)\n\n    @classmethod\n    def _compute_result_metadata(\n        cls,\n        left: PandasQueryCompiler,\n        right: PandasQueryCompiler,\n        on,\n        left_on,\n        right_on,\n        suffixes,\n    ) -> tuple[Optional[pandas.Index], Optional[ModinDtypes]]:\n        \"\"\"\n        Compute columns and dtypes metadata for the result of merge if possible.\n\n        Parameters\n        ----------\n        left : PandasQueryCompiler\n        right : PandasQueryCompiler\n        on : label, list of labels or None\n            `on` argument that was passed to ``pandas.merge()``.\n        left_on : label, list of labels or None\n            `left_on` argument that was passed to ``pandas.merge()``.\n        right_on : label, list of labels or None\n            `right_on` argument that was passed to ``pandas.merge()``.\n        suffixes : list of strings\n            `suffixes` argument that was passed to ``pandas.merge()``.\n\n        Returns\n        -------\n        new_columns : pandas.Index or None\n            Columns for the result of merge. ``None`` if not enought metadata to compute.\n        new_dtypes : ModinDtypes or None\n            Dtypes for the result of merge. ``None`` if not enought metadata to compute.\n        \"\"\"\n        new_columns = None\n        new_dtypes = None\n\n        if not left.frame_has_materialized_columns:\n            return new_columns, new_dtypes\n\n        if left_on is None and right_on is None:\n            if on is None:\n                on = [c for c in left.columns if c in right.columns]\n            _left_on, _right_on = on, on\n        else:\n            if left_on is None or right_on is None:\n                raise MergeError(\n                    \"Must either pass only 'on' or 'left_on' and 'right_on', not combination of them.\"\n                )\n            _left_on, _right_on = left_on, right_on\n\n        try:\n            new_columns, left_renamer, right_renamer = join_columns(\n                left.columns,\n                right.columns,\n                _left_on,\n                _right_on,\n                suffixes,\n            )\n        except NotImplementedError:\n            # This happens when one of the keys to join is an index level. Pandas behaviour\n            # is really complicated in this case, so we're not computing resulted columns for now.\n            pass\n        else:\n            # renamers may contain columns from 'index', so trying to merge index and column dtypes here\n            right_index_dtypes = (\n                right.index.dtypes\n                if isinstance(right.index, pandas.MultiIndex)\n                else pandas.Series([right.index.dtype], index=[right.index.name])\n            )\n            right_dtypes = pandas.concat([right.dtypes, right_index_dtypes])[\n                right_renamer.keys()\n            ].rename(right_renamer)\n\n            left_index_dtypes = left._modin_frame._index_cache.maybe_get_dtypes()\n            left_dtypes = (\n                ModinDtypes.concat([left._modin_frame._dtypes, left_index_dtypes])\n                .lazy_get(left_renamer.keys())\n                .set_index(list(left_renamer.values()))\n            )\n            new_dtypes = ModinDtypes.concat([left_dtypes, right_dtypes])\n\n        return new_columns, new_dtypes\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/native_query_compiler.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains ``NativeQueryCompiler`` class.\n\n``NativeQueryCompiler`` is responsible for compiling efficient DataFrame algebra\nqueries for small data and empty ``PandasDataFrame``.\n\"\"\"\n\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas.core.dtypes.common import is_scalar\n\nfrom modin.config.envvars import (\n    NativePandasDeepCopy,\n    NativePandasMaxRows,\n    NativePandasTransferThreshold,\n)\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    ProtocolDataframe,\n)\nfrom modin.core.storage_formats.base.query_compiler import BaseQueryCompiler\nfrom modin.utils import _inherit_docstrings, try_cast_to_pandas\n\nif TYPE_CHECKING:\n    from modin.pandas import DataFrame, Series\n    from modin.pandas.base import BasePandasDataset\n\n_NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE = (\n    \"Modin dataframes and series using native execution do not have partitions.\"\n)\n\n\ndef _get_axis(axis):\n    \"\"\"\n    Build index labels getter of the specified axis.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to get labels from. 0 is for index and 1 is for column.\n\n    Returns\n    -------\n    callable(NativeQueryCompiler) -> pandas.Index\n    \"\"\"\n    if axis == 0:\n        return lambda self: self._modin_frame.index\n    else:\n        return lambda self: self._modin_frame.columns\n\n\ndef _set_axis(axis):\n    \"\"\"\n    Build index labels setter of the specified axis.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to set labels on. 0 is for index and 1 is for column.\n\n    Returns\n    -------\n    callable(NativeQueryCompiler)\n    \"\"\"\n    if axis == 0:\n\n        def set_axis(self, idx):\n            self._modin_frame.index = idx\n\n    else:\n\n        def set_axis(self, cols):\n            self._modin_frame.columns = cols\n\n    return set_axis\n\n\n@_inherit_docstrings(BaseQueryCompiler)\nclass NativeQueryCompiler(BaseQueryCompiler):\n    \"\"\"\n    Query compiler for executing operations with native pandas.\n\n    Parameters\n    ----------\n    pandas_frame : pandas.DataFrame\n        The pandas frame to query with the compiled queries.\n    \"\"\"\n\n    _OPERATION_INITIALIZATION_OVERHEAD = 0\n    _OPERATION_PER_ROW_OVERHEAD = 0\n\n    _modin_frame: pandas.DataFrame\n    _should_warn_on_default_to_pandas: bool = False\n\n    def __init__(self, pandas_frame):\n        if hasattr(pandas_frame, \"_to_pandas\"):\n            pandas_frame = pandas_frame._to_pandas()\n        if is_scalar(pandas_frame):\n            pandas_frame = pandas.DataFrame([pandas_frame])\n        elif isinstance(pandas_frame, pandas.DataFrame):\n            # For performance purposes, we create \"shallow\" copies when NativePandasDeepCopy\n            # is disabled (the default value). This may cause unexpected behavior if the\n            # parent native frame is mutated, but creates a very significant performance\n            # improvement on large data.\n            pandas_frame = pandas_frame.copy(deep=NativePandasDeepCopy.get())\n        else:\n            pandas_frame = pandas.DataFrame(pandas_frame)\n\n        self._modin_frame = pandas_frame\n\n    storage_format = property(\n        lambda self: \"Native\", doc=BaseQueryCompiler.storage_format.__doc__\n    )\n    engine = property(lambda self: \"Native\", doc=BaseQueryCompiler.engine.__doc__)\n\n    def execute(self):\n        pass\n\n    @property\n    def frame_has_materialized_dtypes(self) -> bool:\n        \"\"\"\n        Check if the underlying dataframe has materialized dtypes.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return True\n\n    def set_frame_dtypes_cache(self, dtypes):\n        \"\"\"\n        Set dtypes cache for the underlying dataframe frame.\n\n        Parameters\n        ----------\n        dtypes : pandas.Series, ModinDtypes, callable or None\n\n        Notes\n        -----\n        This function is for consistency with other QCs,\n        dtypes should be assigned directly on the frame.\n        \"\"\"\n        pass\n\n    def set_frame_index_cache(self, index):\n        \"\"\"\n        Set index cache for underlying dataframe.\n\n        Parameters\n        ----------\n        index : sequence, callable or None\n\n        Notes\n        -----\n        This function is for consistency with other QCs,\n        index should be assigned directly on the frame.\n        \"\"\"\n        pass\n\n    @property\n    def frame_has_index_cache(self):\n        \"\"\"\n        Check if the index cache exists for underlying dataframe.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return True\n\n    @property\n    def frame_has_dtypes_cache(self) -> bool:\n        \"\"\"\n        Check if the dtypes cache exists for the underlying dataframe.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return True\n\n    def copy(self):\n        # If NativePandasDeepCopy is enabled, no need to perform an explicit copy here since the\n        # constructor will perform one anyway.\n        # If it is disabled, then we need to perform a deep copy.\n        if NativePandasDeepCopy.get():\n            return self.__constructor__(self._modin_frame)\n        else:\n            return self.__constructor__(self._modin_frame.copy(deep=True))\n\n    def to_pandas(self):\n        # For performance purposes, we create \"shallow\" copies when NativePandasDeepCopy\n        # is disabled (the default value). This may cause unexpected behavior if the\n        # parent native frame is mutated, but creates a very significant performance\n        # improvement on large data.\n        return self._modin_frame.copy(deep=NativePandasDeepCopy.get())\n\n    @classmethod\n    def from_pandas(cls, df, data_cls):\n        return cls(df)\n\n    @classmethod\n    def from_arrow(cls, at, data_cls):\n        return cls(at.to_pandas())\n\n    def free(self):\n        return\n\n    def finalize(self):\n        return\n\n    def move_to(self, target_backend: str) -> Union[BaseQueryCompiler, Any]:\n        return NotImplemented\n\n    @classmethod\n    def move_from(cls, source_qc: BaseQueryCompiler) -> Union[BaseQueryCompiler, Any]:\n        return NotImplemented\n\n    @classmethod\n    def _engine_max_size(cls):\n        # do not return the custom configuration for sub-classes\n        if cls == NativeQueryCompiler:\n            return NativePandasMaxRows.get()\n        return cls._MAX_SIZE_THIS_ENGINE_CAN_HANDLE\n\n    @classmethod\n    def _transfer_threshold(cls):\n        # do not return the custom configuration for sub-classes\n        if cls == NativeQueryCompiler:\n            return NativePandasTransferThreshold.get()\n        return cls._TRANSFER_THRESHOLD\n\n    def do_array_ufunc_implementation(\n        self,\n        frame: \"BasePandasDataset\",\n        ufunc: np.ufunc,\n        method: str,\n        *inputs: Any,\n        **kwargs: Any\n    ) -> Union[\"DataFrame\", \"Series\", Any]:\n        assert (\n            self is frame._query_compiler\n        ), \"array ufunc called with mismatched query compiler and input frame\"\n        pandas_frame = self._modin_frame\n        if not frame._is_dataframe:\n            pandas_frame = pandas_frame.iloc[:, 0]\n        pandas_result = pandas_frame.__array_ufunc__(\n            ufunc,\n            method,\n            *(\n                pandas_frame if each_input is frame else try_cast_to_pandas(each_input)\n                for each_input in inputs\n            ),\n            **try_cast_to_pandas(kwargs),\n        )\n        if isinstance(pandas_result, pandas.DataFrame):\n            from modin.pandas import DataFrame\n\n            return DataFrame(pandas_result)\n        elif isinstance(pandas_result, pandas.Series):\n            from modin.pandas import Series\n\n            return Series(pandas_result)\n        # ufuncs are required to be one-to-one mappings, so this branch should never be hit\n        return pandas_result  # pragma: no cover\n\n    # Dataframe interchange protocol\n    def to_interchange_dataframe(\n        self, nan_as_null: bool = False, allow_copy: bool = True\n    ):\n        return self._modin_frame.__dataframe__(\n            nan_as_null=nan_as_null, allow_copy=allow_copy\n        )\n\n    @classmethod\n    def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls):\n        return cls(pandas.api.interchange.from_dataframe(df))\n\n    # END Dataframe interchange protocol\n\n    def support_materialization_in_worker_process(self) -> bool:\n        \"\"\"\n        Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return False\n\n    def get_pandas_backend(self) -> Optional[str]:\n        \"\"\"\n        Get backend stored in `_modin_frame`.\n\n        Returns\n        -------\n        str | None\n            Backend name.\n        \"\"\"\n        return None\n\n    # NOTE that because this query compiler provides the index of its underlying\n    # pandas dataframe, updating the index affects this frame, and vice versa.\n    # Consequently, native execution does not suffer from the issue\n    # https://github.com/modin-project/modin/issues/1618\n    index: pandas.Index = property(_get_axis(0), _set_axis(0))\n    columns = property(_get_axis(1), _set_axis(1))\n\n    @_inherit_docstrings(BaseQueryCompiler.repartition)\n    def repartition(self, axis=None):\n        raise Exception(_NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE)\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/parsers.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\n\"\"\"\nModule houses Modin parser classes, that are used for data parsing on the workers.\n\nNotes\n-----\nData parsing mechanism differs depending on the data format type:\n\n* text format type (CSV, EXCEL, FWF, JSON):\n  File parsing begins from retrieving `start` and `end` parameters from `parse`\n  kwargs - these parameters define start and end bytes of data file, that should\n  be read in the concrete partition. Using this data and file handle got from\n  `fname`, binary data is read by python `read` function. Then resulting data is passed\n  into `pandas.read_*` function as `io.BytesIO` object to get corresponding\n  `pandas.DataFrame` (we need to do this because Modin partitions internally stores data\n  as `pandas.DataFrame`).\n\n* columnar store type (FEATHER, HDF, PARQUET):\n  In this case data chunk to be read is defined by columns names passed as `columns`\n  parameter as part of `parse` kwargs, so no additional action is needed and `fname`\n  and `kwargs` are just passed into `pandas.read_*` function (in some corner cases\n  `pyarrow.read_*` function can be used).\n\n* SQL type:\n  Chunking is incorporated in the `sql` parameter as part of query, so `parse`\n  parameters are passed into `pandas.read_sql` function without modification.\n\"\"\"\n\nimport contextlib\nimport json\nimport os\nimport warnings\nfrom io import BytesIO, IOBase, TextIOWrapper\nfrom typing import Any, NamedTuple\n\nimport fsspec\nimport numpy as np\nimport pandas\nfrom pandas.core.dtypes.cast import find_common_type\nfrom pandas.core.dtypes.concat import union_categoricals\nfrom pandas.io.common import infer_compression\nfrom pandas.util._decorators import doc\n\nfrom modin.config import MinColumnPartitionSize, MinRowPartitionSize\nfrom modin.core.io.file_dispatcher import OpenFile\nfrom modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas\nfrom modin.db_conn import ModinDatabaseConnection\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger\nfrom modin.logging.config import LogLevel\nfrom modin.utils import ModinAssumptionError\n\n_doc_pandas_parser_class = \"\"\"\nClass for handling {data_type} on the workers using pandas storage format.\n\nInherits common functions from `PandasParser` class.\n\"\"\"\n\n_doc_parse_func = \"\"\"\nParse data on the workers.\n\nParameters\n----------\n{parameters}\n**kwargs : dict\n    Keywords arguments to be used by `parse` function or\n    passed into `read_*` function.\n\nReturns\n-------\nlist\n    List with split parse results and it's metadata\n    (index, dtypes, etc.).\n\"\"\"\n\n_doc_parse_parameters_common = \"\"\"fname : str or path object\n    Name of the file or path to read.\"\"\"\n\n_doc_common_read_kwargs = \"\"\"common_read_kwargs : dict\n    Common keyword parameters for read functions.\n\"\"\"\n_doc_parse_parameters_common2 = \"\\n\".join(\n    (_doc_parse_parameters_common, _doc_common_read_kwargs)\n)\n\n\ndef _split_result_for_readers(axis, num_splits, df):  # pragma: no cover\n    \"\"\"\n    Split the read DataFrame into smaller DataFrames and handle all edge cases.\n\n    Parameters\n    ----------\n    axis : int\n        The axis to split across (0 - index, 1 - columns).\n    num_splits : int\n        The number of splits to create.\n    df : pandas.DataFrame\n        `pandas.DataFrame` to split.\n\n    Returns\n    -------\n    list\n        A list of pandas DataFrames.\n    \"\"\"\n    splits = split_result_of_axis_func_pandas(\n        axis,\n        num_splits,\n        df,\n        min_block_size=(\n            MinRowPartitionSize.get() if axis == 0 else MinColumnPartitionSize.get()\n        ),\n    )\n    if not isinstance(splits, list):\n        splits = [splits]\n    return splits\n\n\ndef find_common_type_cat(types):\n    \"\"\"\n    Find a common data type among the given dtypes.\n\n    Parameters\n    ----------\n    types : array-like\n        Array of dtypes.\n\n    Returns\n    -------\n    pandas.core.dtypes.dtypes.ExtensionDtype or\n    np.dtype or\n    None\n        `dtype` that is common for all passed `types`.\n    \"\"\"\n    if all(isinstance(t, pandas.CategoricalDtype) for t in types):\n        if all(t.ordered for t in types):\n            categories = np.sort(np.unique([c for t in types for c in t.categories]))\n            return pandas.CategoricalDtype(\n                categories,\n                ordered=True,\n            )\n        return union_categoricals(\n            [pandas.Categorical([], dtype=t) for t in types],\n            sort_categories=all(t.ordered for t in types),\n        ).dtype\n    else:\n        return find_common_type(list(types))\n\n\nclass PandasParser(ClassLogger, modin_layer=\"PARSER\", log_level=LogLevel.DEBUG):\n    \"\"\"Base class for parser classes with pandas storage format.\"\"\"\n\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def generic_parse(fname, **kwargs):\n        warnings.filterwarnings(\"ignore\")\n        num_splits = kwargs.pop(\"num_splits\", None)\n        start = kwargs.pop(\"start\", None)\n        end = kwargs.pop(\"end\", None)\n        header_size = kwargs.pop(\"header_size\", 0)\n        common_dtypes = kwargs.pop(\"common_dtypes\", None)\n        encoding = kwargs.get(\"encoding\", None)\n        callback = kwargs.pop(\"callback\")\n        if start is None or end is None:\n            # This only happens when we are reading with only one worker (Default)\n            return callback(fname, **kwargs)\n\n        # pop \"compression\" from kwargs because bio is uncompressed\n        with OpenFile(\n            fname,\n            \"rb\",\n            kwargs.pop(\"compression\", \"infer\"),\n            **(kwargs.pop(\"storage_options\", None) or {}),\n        ) as bio:\n            header = b\"\"\n            # In this case we beware that first line can contain BOM, so\n            # adding this line to the `header` for reading and then skip it\n            if encoding and (\n                \"utf\" in encoding\n                and \"8\" not in encoding\n                or encoding == \"unicode_escape\"\n                or encoding.replace(\"-\", \"_\") == \"utf_8_sig\"\n            ):\n                # do not 'close' the wrapper - underlying buffer is managed by `bio` handle\n                fio = TextIOWrapper(bio, encoding=encoding, newline=\"\")\n                if header_size == 0:\n                    header = fio.readline().encode(encoding)\n                    kwargs[\"skiprows\"] = 1\n                for _ in range(header_size):\n                    header += fio.readline().encode(encoding)\n            elif encoding is not None:\n                if header_size == 0:\n                    header = bio.readline()\n                    # `skiprows` can be only None here, so don't check it's type\n                    # and just set to 1\n                    kwargs[\"skiprows\"] = 1\n                for _ in range(header_size):\n                    header += bio.readline()\n            else:\n                for _ in range(header_size):\n                    header += bio.readline()\n\n            bio.seek(start)\n            to_read = header + bio.read(end - start)\n        if \"memory_map\" in kwargs:\n            kwargs = kwargs.copy()\n            del kwargs[\"memory_map\"]\n        if common_dtypes is not None:\n            kwargs[\"dtype\"] = common_dtypes\n        pandas_df = callback(BytesIO(to_read), **kwargs)\n        index = (\n            pandas_df.index\n            if not isinstance(pandas_df.index, pandas.RangeIndex)\n            else len(pandas_df)\n        )\n        return _split_result_for_readers(1, num_splits, pandas_df) + [\n            index,\n            pandas_df.dtypes,\n        ]\n\n    @classmethod\n    def get_dtypes(cls, dtypes_ids, columns):\n        \"\"\"\n        Get common for all partitions dtype for each of the columns.\n\n        Parameters\n        ----------\n        dtypes_ids : list\n            Array with references to the partitions dtypes objects.\n        columns : array-like or Index (1d)\n            The names of the columns in this variable will be used\n            for dtypes creation.\n\n        Returns\n        -------\n        frame_dtypes : pandas.Series, dtype or None\n            Resulting dtype or pandas.Series where column names are used as\n            index and types of columns are used as values for full resulting\n            frame.\n        \"\"\"\n        if len(dtypes_ids) == 0:\n            return None\n        # each element in `partitions_dtypes` is a Series, where column names are\n        # used as index and types of columns for different partitions are used as values\n        partitions_dtypes = cls.materialize(dtypes_ids)\n        if all([len(dtype) == 0 for dtype in partitions_dtypes]):\n            return None\n\n        combined_part_dtypes = pandas.concat(partitions_dtypes, axis=1)\n        frame_dtypes = combined_part_dtypes.iloc[:, 0]\n        frame_dtypes.name = None\n\n        if not combined_part_dtypes.eq(frame_dtypes, axis=0).all(axis=None):\n            ErrorMessage.mismatch_with_pandas(\n                operation=\"read_*\",\n                message=\"Data types of partitions are different! \"\n                + \"Please refer to the troubleshooting section of the Modin documentation \"\n                + \"to fix this issue\",\n            )\n\n            # concat all elements of `partitions_dtypes` and find common dtype\n            # for each of the column among all partitions\n            frame_dtypes = combined_part_dtypes.apply(\n                lambda row: find_common_type_cat(row.values),\n                axis=1,\n            ).squeeze(axis=0)\n\n        # Set the index for the dtypes to the column names\n        if isinstance(frame_dtypes, pandas.Series):\n            frame_dtypes.index = columns\n        else:\n            frame_dtypes = pandas.Series(frame_dtypes, index=columns)\n\n        return frame_dtypes\n\n    @classmethod\n    def single_worker_read(cls, fname, *args, reason: str, **kwargs):\n        \"\"\"\n        Perform reading by single worker (default-to-pandas implementation).\n\n        Parameters\n        ----------\n        fname : str, path object or file-like object\n            Name of the file or file-like object to read.\n        *args : tuple\n            Positional arguments to be passed into `read_*` function.\n        reason : str\n            Message describing the reason for falling back to pandas.\n        **kwargs : dict\n            Keywords arguments to be passed into `read_*` function.\n\n        Returns\n        -------\n        BaseQueryCompiler or\n        dict or\n        pandas.io.parsers.TextFileReader\n            Object with imported data (or with reference to data) for further\n            processing, object type depends on the child class `parse` function\n            result type.\n        \"\"\"\n        ErrorMessage.default_to_pandas(reason=reason)\n        # Use default args for everything\n        pandas_frame = cls.parse(fname, *args, **kwargs)\n        if isinstance(pandas_frame, pandas.io.parsers.TextFileReader):\n            pd_read = pandas_frame.read\n            pandas_frame.read = (\n                lambda *args, **kwargs: cls.query_compiler_cls.from_pandas(\n                    pd_read(*args, **kwargs), cls.frame_cls\n                )\n            )\n            return pandas_frame\n        elif isinstance(pandas_frame, dict):\n            return {\n                i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls)\n                for i, frame in pandas_frame.items()\n            }\n        return cls.query_compiler_cls.from_pandas(pandas_frame, cls.frame_cls)\n\n    @staticmethod\n    def get_types_mapper(dtype_backend):\n        \"\"\"\n        Get types mapper that would be used in read_parquet/read_feather.\n\n        Parameters\n        ----------\n        dtype_backend : {\"numpy_nullable\", \"pyarrow\", lib.no_default}\n\n        Returns\n        -------\n        dict\n        \"\"\"\n        to_pandas_kwargs = {}\n        if dtype_backend == \"numpy_nullable\":\n            from pandas.io._util import _arrow_dtype_mapping\n\n            mapping = _arrow_dtype_mapping()\n            to_pandas_kwargs[\"types_mapper\"] = mapping.get\n        elif dtype_backend == \"pyarrow\":\n            to_pandas_kwargs[\"types_mapper\"] = pandas.ArrowDtype\n        return to_pandas_kwargs\n\n    infer_compression = infer_compression\n\n\n@doc(_doc_pandas_parser_class, data_type=\"CSV files\")\nclass PandasCSVParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common2)\n    def parse(fname, common_read_kwargs, **kwargs):\n        return PandasParser.generic_parse(\n            fname,\n            callback=PandasCSVParser.read_callback,\n            **common_read_kwargs,\n            **kwargs,\n        )\n\n    @staticmethod\n    def read_callback(*args, **kwargs):\n        \"\"\"\n        Parse data on each partition.\n\n        Parameters\n        ----------\n        *args : list\n            Positional arguments to be passed to the callback function.\n        **kwargs : dict\n            Keyword arguments to be passed to the callback function.\n\n        Returns\n        -------\n        pandas.DataFrame or pandas.io.parsers.TextParser\n            Function call result.\n        \"\"\"\n        return pandas.read_csv(*args, **kwargs)\n\n\n@doc(_doc_pandas_parser_class, data_type=\"tables with fixed-width formatted lines\")\nclass PandasFWFParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common2)\n    def parse(fname, common_read_kwargs, **kwargs):\n        return PandasParser.generic_parse(\n            fname,\n            callback=PandasFWFParser.read_callback,\n            **common_read_kwargs,\n            **kwargs,\n        )\n\n    @staticmethod\n    def read_callback(*args, **kwargs):\n        \"\"\"\n        Parse data on each partition.\n\n        Parameters\n        ----------\n        *args : list\n            Positional arguments to be passed to the callback function.\n        **kwargs : dict\n            Keyword arguments to be passed to the callback function.\n\n        Returns\n        -------\n        pandas.DataFrame or pandas.io.parsers.TextFileReader\n            Function call result.\n        \"\"\"\n        return pandas.read_fwf(*args, **kwargs)\n\n\n@doc(_doc_pandas_parser_class, data_type=\"excel files\")\nclass PandasExcelParser(PandasParser):\n    @classmethod\n    def get_sheet_data(cls, sheet, convert_float):\n        \"\"\"\n        Get raw data from the excel sheet.\n\n        Parameters\n        ----------\n        sheet : openpyxl.worksheet.worksheet.Worksheet\n            Sheet to get data from.\n        convert_float : bool\n            Whether to convert floats to ints or not.\n\n        Returns\n        -------\n        list\n            List with sheet data.\n        \"\"\"\n        return [\n            [cls._convert_cell(cell, convert_float) for cell in row]\n            for row in sheet.rows\n        ]\n\n    @classmethod\n    def _convert_cell(cls, cell, convert_float):\n        \"\"\"\n        Convert excel cell to value.\n\n        Parameters\n        ----------\n        cell : openpyxl.cell.cell.Cell\n            Excel cell to convert.\n        convert_float : bool\n            Whether to convert floats to ints or not.\n\n        Returns\n        -------\n        list\n            Value that was converted from the excel cell.\n        \"\"\"\n        if cell.is_date:\n            return cell.value\n        elif cell.data_type == \"e\":\n            return np.nan\n        elif cell.data_type == \"b\":\n            return bool(cell.value)\n        elif cell.value is None:\n            return \"\"\n        elif cell.data_type == \"n\":\n            if convert_float:\n                val = int(cell.value)\n                if val == cell.value:\n                    return val\n            else:\n                return float(cell.value)\n\n        return cell.value\n\n    @staticmethod\n    def need_rich_text_param():\n        \"\"\"\n        Determine whether a required `rich_text` parameter should be specified for the ``WorksheetReader`` constructor.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        import openpyxl\n        from packaging import version\n\n        return version.parse(openpyxl.__version__) >= version.parse(\"3.1.0\")\n\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def parse(fname, **kwargs):\n        num_splits = kwargs.pop(\"num_splits\", None)\n        start = kwargs.pop(\"start\", None)\n        end = kwargs.pop(\"end\", None)\n        excel_header = kwargs.get(\"_header\")\n        sheet_name = kwargs.get(\"sheet_name\", 0)\n        footer = b\"</sheetData></worksheet>\"\n\n        # Default to pandas case, where we are not splitting or partitioning\n        if start is None or end is None:\n            return pandas.read_excel(fname, **kwargs)\n\n        _skiprows = kwargs.pop(\"skiprows\")\n\n        import re\n        from zipfile import ZipFile\n\n        import openpyxl\n        from openpyxl.reader.excel import ExcelReader\n        from openpyxl.worksheet._reader import WorksheetReader\n        from openpyxl.worksheet.worksheet import Worksheet\n        from pandas.core.dtypes.common import is_list_like\n        from pandas.io.excel._util import fill_mi_header, maybe_convert_usecols\n        from pandas.io.parsers import TextParser\n\n        wb = openpyxl.load_workbook(filename=fname, read_only=True)\n        # Get shared strings\n        ex = ExcelReader(fname, read_only=True)\n        ex.read_manifest()\n        ex.read_strings()\n        # Convert string name 0 to string\n        if sheet_name == 0:\n            sheet_name = wb.sheetnames[sheet_name]\n        # get the worksheet to use with the worksheet reader\n        ws = Worksheet(wb)\n        # Read the raw data\n        with ZipFile(fname) as z:\n            with z.open(\"xl/worksheets/{}.xml\".format(sheet_name)) as file:\n                file.seek(start)\n                bytes_data = file.read(end - start)\n\n        def update_row_nums(match):\n            \"\"\"\n            Update the row numbers to start at 1.\n\n            Parameters\n            ----------\n            match : re.Match object\n                The match from the origin `re.sub` looking for row number tags.\n\n            Returns\n            -------\n            str\n                The updated string with new row numbers.\n\n            Notes\n            -----\n            This is needed because the parser we are using does not scale well if\n            the row numbers remain because empty rows are inserted for all \"missing\"\n            rows.\n            \"\"\"\n            b = match.group(0)\n            return re.sub(\n                rb\"\\d+\",\n                lambda c: str(int(c.group(0).decode(\"utf-8\")) - _skiprows).encode(\n                    \"utf-8\"\n                ),\n                b,\n            )\n\n        bytes_data = re.sub(rb'r=\"[A-Z]*\\d+\"', update_row_nums, bytes_data)\n        bytesio = BytesIO(excel_header + bytes_data + footer)\n        # Use openpyxl to read/parse sheet data\n        common_args = (ws, bytesio, ex.shared_strings, False)\n        if PandasExcelParser.need_rich_text_param():\n            reader = WorksheetReader(*common_args, rich_text=False)\n        else:\n            reader = WorksheetReader(*common_args)\n        # Attach cells to worksheet object\n        reader.bind_cells()\n        data = PandasExcelParser.get_sheet_data(ws, kwargs.pop(\"convert_float\", True))\n        usecols = maybe_convert_usecols(kwargs.pop(\"usecols\", None))\n        header = kwargs.pop(\"header\", 0)\n        index_col = kwargs.pop(\"index_col\", None)\n        # skiprows is handled externally\n        skiprows = None\n\n        # Handle header and create MultiIndex for columns if necessary\n        if is_list_like(header) and len(header) == 1:\n            header = header[0]\n        if header is not None and is_list_like(header):\n            control_row = [True] * len(data[0])\n\n            for row in header:\n                data[row], control_row = fill_mi_header(data[row], control_row)\n        # Handle MultiIndex for row Index if necessary\n        if is_list_like(index_col):\n            # Forward fill values for MultiIndex index.\n            if not is_list_like(header):\n                offset = 1 + header\n            else:\n                offset = 1 + max(header)\n\n            # Check if dataset is empty\n            if offset < len(data):\n                for col in index_col:\n                    last = data[offset][col]\n                    for row in range(offset + 1, len(data)):\n                        if data[row][col] == \"\" or data[row][col] is None:\n                            data[row][col] = last\n                        else:\n                            last = data[row][col]\n        parser = TextParser(\n            data,\n            header=header,\n            index_col=index_col,\n            has_index_names=is_list_like(header) and len(header) > 1,\n            skiprows=skiprows,\n            usecols=usecols,\n            skip_blank_lines=False,\n            **kwargs,\n        )\n        pandas_df = parser.read()\n        if (\n            len(pandas_df) > 1\n            and len(pandas_df.columns) != 0\n            and pandas_df.isnull().all().all()\n        ):\n            # Drop NaN rows at the end of the DataFrame\n            pandas_df = pandas.DataFrame(columns=pandas_df.columns)\n\n        # Since we know the number of rows that occur before this partition, we can\n        # correctly assign the index in cases of RangeIndex. If it is not a RangeIndex,\n        # the index is already correct because it came from the data.\n        if isinstance(pandas_df.index, pandas.RangeIndex):\n            pandas_df.index = pandas.RangeIndex(\n                start=_skiprows, stop=len(pandas_df.index) + _skiprows\n            )\n        # We return the length if it is a RangeIndex (common case) to reduce\n        # serialization cost.\n        if index_col is not None:\n            index = pandas_df.index\n        else:\n            # The lengths will become the RangeIndex\n            index = len(pandas_df)\n        return _split_result_for_readers(1, num_splits, pandas_df) + [\n            index,\n            pandas_df.dtypes,\n        ]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"JSON files\")\nclass PandasJSONParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def parse(fname, **kwargs):\n        num_splits = kwargs.pop(\"num_splits\", None)\n        start = kwargs.pop(\"start\", None)\n        end = kwargs.pop(\"end\", None)\n        if start is not None and end is not None:\n            # pop \"compression\" from kwargs because bio is uncompressed\n            with OpenFile(\n                fname,\n                \"rb\",\n                kwargs.pop(\"compression\", \"infer\"),\n                **(kwargs.pop(\"storage_options\", None) or {}),\n            ) as bio:\n                bio.seek(start)\n                to_read = b\"\" + bio.read(end - start)\n            columns = kwargs.pop(\"columns\")\n            pandas_df = pandas.read_json(BytesIO(to_read), **kwargs)\n        else:\n            # This only happens when we are reading with only one worker (Default)\n            return pandas.read_json(fname, **kwargs)\n        if not pandas_df.columns.equals(columns):\n            raise ModinAssumptionError(\"Columns must be the same across all rows.\")\n        partition_columns = pandas_df.columns\n        return _split_result_for_readers(1, num_splits, pandas_df) + [\n            len(pandas_df),\n            pandas_df.dtypes,\n            partition_columns,\n        ]\n\n\nclass ParquetFileToRead(NamedTuple):\n    \"\"\"\n    Class to store path and row group information for parquet reads.\n\n    Parameters\n    ----------\n    path : str, path object or file-like object\n        Name of the file to read.\n    row_group_start : int\n        Row group to start read from.\n    row_group_end : int\n        Row group to stop read.\n    \"\"\"\n\n    path: Any\n    row_group_start: int\n    row_group_end: int\n\n\n@doc(_doc_pandas_parser_class, data_type=\"PARQUET data\")\nclass PandasParquetParser(PandasParser):\n    @staticmethod\n    def _read_row_group_chunk(\n        f, row_group_start, row_group_end, columns, filters, engine, to_pandas_kwargs\n    ):  # noqa: GL08\n        if engine == \"pyarrow\":\n            if filters is not None:\n                import pyarrow.dataset as ds\n                from pyarrow.parquet import filters_to_expression\n\n                parquet_format = ds.ParquetFileFormat()\n                fragment = parquet_format.make_fragment(\n                    f,\n                    row_groups=range(\n                        row_group_start,\n                        row_group_end,\n                    ),\n                )\n                dataset = ds.FileSystemDataset(\n                    [fragment],\n                    schema=fragment.physical_schema,\n                    format=parquet_format,\n                    filesystem=fragment.filesystem,\n                )\n\n                # This lower-level API doesn't have the ability to automatically handle pandas metadata\n                # The following code is based on\n                # https://github.com/apache/arrow/blob/f44e28fa03a64ae5b3d9352d21aee2cc84f9af6c/python/pyarrow/parquet/core.py#L2619-L2628\n\n                # if use_pandas_metadata, we need to include index columns in the\n                # column selection, to be able to restore those in the pandas DataFrame\n                metadata = dataset.schema.metadata or {}\n\n                if b\"pandas\" in metadata and columns is not None:\n                    index_columns = json.loads(metadata[b\"pandas\"].decode(\"utf8\"))[\n                        \"index_columns\"\n                    ]\n                    # In the pandas metadata, the index columns can either be string column names,\n                    # or a dictionary that describes a RangeIndex.\n                    # Here, we are finding the real data columns that need to be read to become part\n                    # of the pandas Index, so we can skip the RangeIndex.\n                    # Not only can a RangeIndex be trivially reconstructed later, but we actually\n                    # ignore partition-level range indices, because we want to have a single Modin\n                    # RangeIndex that spans all partitions.\n                    index_columns = [\n                        col for col in index_columns if not isinstance(col, dict)\n                    ]\n                    columns = list(columns) + list(set(index_columns) - set(columns))\n\n                return dataset.to_table(\n                    columns=columns,\n                    filter=filters_to_expression(filters),\n                ).to_pandas(**to_pandas_kwargs)\n            else:\n                from pyarrow.parquet import ParquetFile\n\n                return (\n                    ParquetFile(f)\n                    .read_row_groups(\n                        range(\n                            row_group_start,\n                            row_group_end,\n                        ),\n                        columns=columns,\n                        use_pandas_metadata=True,\n                    )\n                    .to_pandas(**to_pandas_kwargs)\n                )\n        elif engine == \"fastparquet\":\n            from fastparquet import ParquetFile\n\n            return ParquetFile(f)[row_group_start:row_group_end].to_pandas(\n                columns=columns,\n                filters=filters,\n                # Setting row_filter=True would perform filtering at the row level, which is more correct\n                # (in line with pyarrow)\n                # However, it doesn't work: https://github.com/dask/fastparquet/issues/873\n                # Also, this would create incompatibility with pandas\n            )\n        else:\n            # We shouldn't ever come to this case, so something went wrong\n            raise ValueError(\n                f\"engine must be one of 'pyarrow', 'fastparquet', got: {engine}\"\n            )\n\n    @staticmethod\n    @doc(\n        _doc_parse_func,\n        parameters=\"\"\"files_for_parser : list\n    List of files to be read.\nengine : str\n    Parquet library to use (either PyArrow or fastparquet).\n\"\"\",\n    )\n    def parse(files_for_parser, engine, **kwargs):\n        columns = kwargs.get(\"columns\", None)\n        filters = kwargs.get(\"filters\", None)\n        storage_options = kwargs.get(\"storage_options\", {})\n        chunks = []\n        # `single_worker_read` just passes in a string path or path-like object\n        if isinstance(files_for_parser, (str, os.PathLike)):\n            return pandas.read_parquet(files_for_parser, engine=engine, **kwargs)\n\n        to_pandas_kwargs = PandasParser.get_types_mapper(kwargs[\"dtype_backend\"])\n\n        for file_for_parser in files_for_parser:\n            if isinstance(file_for_parser.path, IOBase):\n                context = contextlib.nullcontext(file_for_parser.path)\n            else:\n                context = fsspec.open(file_for_parser.path, **storage_options)\n            with context as f:\n                chunk = PandasParquetParser._read_row_group_chunk(\n                    f,\n                    file_for_parser.row_group_start,\n                    file_for_parser.row_group_end,\n                    columns,\n                    filters,\n                    engine,\n                    to_pandas_kwargs,\n                )\n            chunks.append(chunk)\n        df = pandas.concat(chunks)\n        return df, df.index, len(df)\n\n\n@doc(_doc_pandas_parser_class, data_type=\"HDF data\")\nclass PandasHDFParser(PandasParser):  # pragma: no cover\n    @staticmethod\n    @doc(\n        _doc_parse_func,\n        parameters=\"\"\"fname : str, path object, pandas.HDFStore or file-like object\n    Name of the file, path pandas.HDFStore or file-like object to read.\"\"\",\n    )\n    def parse(fname, **kwargs):\n        kwargs[\"key\"] = kwargs.pop(\"_key\", None)\n        num_splits = kwargs.pop(\"num_splits\", None)\n        if num_splits is None:\n            return pandas.read_hdf(fname, **kwargs)\n        df = pandas.read_hdf(fname, **kwargs)\n        # Append the length of the index here to build it externally\n        return _split_result_for_readers(0, num_splits, df) + [len(df.index), df.dtypes]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"FEATHER files\")\nclass PandasFeatherParser(PandasParser):\n    @staticmethod\n    @doc(\n        _doc_parse_func,\n        parameters=\"\"\"fname : str, path object or file-like object\n    Name of the file, path or file-like object to read.\"\"\",\n    )\n    def parse(fname, **kwargs):\n        from pyarrow import feather\n\n        num_splits = kwargs.pop(\"num_splits\", None)\n        if num_splits is None:\n            return pandas.read_feather(fname, **kwargs)\n\n        to_pandas_kwargs = PandasParser.get_types_mapper(kwargs[\"dtype_backend\"])\n        del kwargs[\"dtype_backend\"]\n\n        with OpenFile(\n            fname,\n            **(kwargs.pop(\"storage_options\", None) or {}),\n        ) as file:\n            # The implementation is as close as possible to the one in pandas.\n            # For reference see `read_feather` in pandas/io/feather_format.py.\n            if not to_pandas_kwargs:\n                df = feather.read_feather(file, **kwargs)\n            else:\n                # `read_feather` doesn't accept `types_mapper` if pyarrow<11.0\n                pa_table = feather.read_table(file, **kwargs)\n                df = pa_table.to_pandas(**to_pandas_kwargs)\n        # Append the length of the index here to build it externally\n        return _split_result_for_readers(0, num_splits, df) + [len(df.index), df.dtypes]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"SQL queries or tables\")\nclass PandasSQLParser(PandasParser):\n    @staticmethod\n    @doc(\n        _doc_parse_func,\n        parameters=\"\"\"sql : str or SQLAlchemy Selectable (select or text object)\n    SQL query to be executed or a table name.\ncon : SQLAlchemy connectable, str, or sqlite3 connection\n    Connection object to database.\nindex_col : str or list of str\n    Column(s) to set as index(MultiIndex).\nread_sql_engine : str\n    Underlying engine ('pandas' or 'connectorx') used for fetching query result.\"\"\",\n    )\n    def parse(sql, con, index_col, read_sql_engine, **kwargs):\n        enable_cx = False\n        if read_sql_engine == \"Connectorx\":\n            try:\n                import connectorx as cx\n\n                enable_cx = True\n            except ImportError:\n                warnings.warn(\n                    \"Switch to 'pandas.read_sql' since 'connectorx' is not installed, please run 'pip install connectorx'.\"\n                )\n\n        num_splits = kwargs.pop(\"num_splits\", None)\n        if isinstance(con, ModinDatabaseConnection):\n            con = con.get_string() if enable_cx else con.get_connection()\n\n        if num_splits is None:\n            if enable_cx:\n                return cx.read_sql(con, sql, index_col=index_col)\n            return pandas.read_sql(sql, con, index_col=index_col, **kwargs)\n\n        if enable_cx:\n            df = cx.read_sql(con, sql, index_col=index_col)\n        else:\n            df = pandas.read_sql(sql, con, index_col=index_col, **kwargs)\n        if index_col is None:\n            index = len(df)\n        else:\n            index = df.index\n        return _split_result_for_readers(1, num_splits, df) + [index, df.dtypes]\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/query_compiler.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains ``PandasQueryCompiler`` class.\n\n``PandasQueryCompiler`` is responsible for compiling efficient DataFrame algebra\nqueries for the ``PandasDataframe``.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport ast\nimport hashlib\nimport re\nimport warnings\nfrom collections.abc import Iterable\nfrom typing import TYPE_CHECKING, Any, Hashable, List, Literal, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas._libs import lib\nfrom pandas.api.types import is_scalar\nfrom pandas.core.apply import reconstruct_func\nfrom pandas.core.common import is_bool_indexer\nfrom pandas.core.dtypes.cast import find_common_type\nfrom pandas.core.dtypes.common import (\n    is_bool_dtype,\n    is_datetime64_any_dtype,\n    is_list_like,\n    is_numeric_dtype,\n)\nfrom pandas.core.groupby.base import transformation_kernels\nfrom pandas.core.indexes.api import ensure_index_from_sequences\nfrom pandas.core.indexing import check_bool_indexer\nfrom pandas.errors import DataError\n\nfrom modin.config import CpuCount, RangePartitioning\nfrom modin.core.dataframe.algebra import (\n    Binary,\n    Fold,\n    GroupByReduce,\n    Map,\n    Reduce,\n    TreeReduce,\n)\nfrom modin.core.dataframe.algebra.default2pandas.groupby import (\n    GroupBy,\n    GroupByDefault,\n    SeriesGroupByDefault,\n)\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    ProtocolDataframe,\n)\nfrom modin.core.dataframe.pandas.metadata import (\n    DtypesDescriptor,\n    ModinDtypes,\n    ModinIndex,\n    extract_dtype,\n)\nfrom modin.core.storage_formats import BaseQueryCompiler\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import get_logger\nfrom modin.utils import (\n    MODIN_UNNAMED_SERIES_LABEL,\n    _inherit_docstrings,\n    hashable,\n    try_cast_to_pandas,\n    wrap_udf_function,\n)\n\nfrom .aggregations import CorrCovBuilder\nfrom .groupby import GroupbyReduceImpl, PivotTableImpl\nfrom .merge import MergeImpl\nfrom .utils import get_group_names, merge_partitioning\n\nif TYPE_CHECKING:\n    from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\n\n\ndef _get_axis(axis):\n    \"\"\"\n    Build index labels getter of the specified axis.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to get labels from. 0 is for index and 1 is for column.\n\n    Returns\n    -------\n    callable(PandasQueryCompiler) -> pandas.Index\n    \"\"\"\n    if axis == 0:\n        return lambda self: self._modin_frame.index\n    else:\n        return lambda self: self._modin_frame.columns\n\n\ndef _set_axis(axis):\n    \"\"\"\n    Build index labels setter of the specified axis.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to set labels on. 0 is for index and 1 is for column.\n\n    Returns\n    -------\n    callable(PandasQueryCompiler)\n    \"\"\"\n    if axis == 0:\n\n        def set_axis(self, idx):\n            self._modin_frame.index = idx\n\n    else:\n\n        def set_axis(self, cols):\n            self._modin_frame.columns = cols\n\n    return set_axis\n\n\ndef _str_map(func_name):\n    \"\"\"\n    Build function that calls specified string function on frames ``str`` accessor.\n\n    Parameters\n    ----------\n    func_name : str\n        String function name to execute on ``str`` accessor.\n\n    Returns\n    -------\n    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame\n    \"\"\"\n\n    def str_op_builder(df, *args, **kwargs):\n        \"\"\"Apply specified function against `str` accessor of the passed frame.\"\"\"\n        str_s = df.squeeze(axis=1).str\n        res = getattr(pandas.Series.str, func_name)(str_s, *args, **kwargs)\n        if hasattr(res, \"to_frame\"):\n            res = res.to_frame()\n        return res\n\n    return str_op_builder\n\n\ndef _dt_prop_map(property_name):\n    \"\"\"\n    Build function that access specified property of the ``dt`` property of the passed frame.\n\n    Parameters\n    ----------\n    property_name : str\n        Date-time property name to access.\n\n    Returns\n    -------\n    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame\n        Function to be applied in the partitions.\n\n    Notes\n    -----\n    This applies non-callable properties of ``Series.dt``.\n    \"\"\"\n\n    def dt_op_builder(df, *args, **kwargs):\n        \"\"\"Access specified date-time property of the passed frame.\"\"\"\n        squeezed_df = df.squeeze(axis=1)\n        if isinstance(squeezed_df, pandas.DataFrame) and len(squeezed_df.columns) == 0:\n            return squeezed_df\n        assert isinstance(squeezed_df, pandas.Series)\n        prop_val = getattr(squeezed_df.dt, property_name)\n        if isinstance(prop_val, pandas.Series):\n            return prop_val.to_frame()\n        elif isinstance(prop_val, pandas.DataFrame):\n            return prop_val\n        else:\n            return pandas.DataFrame([prop_val])\n\n    return dt_op_builder\n\n\ndef _dt_func_map(func_name):\n    \"\"\"\n    Build function that apply specified method against ``dt`` property of the passed frame.\n\n    Parameters\n    ----------\n    func_name : str\n        Date-time function name to apply.\n\n    Returns\n    -------\n    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame\n        Function to be applied in the partitions.\n\n    Notes\n    -----\n    This applies callable methods of ``Series.dt``.\n    \"\"\"\n\n    def dt_op_builder(df, *args, **kwargs):\n        \"\"\"Apply specified function against ``dt`` accessor of the passed frame.\"\"\"\n        dt_s = df.squeeze(axis=1).dt\n        dt_func_result = getattr(pandas.Series.dt, func_name)(dt_s, *args, **kwargs)\n        # If we don't specify the dtype for the frame, the frame might get the\n        # wrong dtype, e.g. for to_pydatetime in https://github.com/modin-project/modin/issues/4436\n        return pandas.DataFrame(dt_func_result, dtype=dt_func_result.dtype)\n\n    return dt_op_builder\n\n\ndef copy_df_for_func(func, display_name: str = None):\n    \"\"\"\n    Build function that execute specified `func` against passed frame inplace.\n\n    Built function copies passed frame, applies `func` to the copy and returns\n    the modified frame.\n\n    Parameters\n    ----------\n    func : callable(pandas.DataFrame)\n        The function, usually updates a dataframe inplace.\n    display_name : str, optional\n        The function's name, which is displayed by progress bar.\n\n    Returns\n    -------\n    callable(pandas.DataFrame)\n        A callable function to be applied in the partitions.\n    \"\"\"\n\n    def caller(df, *args, **kwargs):\n        \"\"\"Apply specified function the passed frame inplace.\"\"\"\n        df = df.copy()\n        func(df, *args, **kwargs)\n        return df\n\n    if display_name is not None:\n        caller.__name__ = display_name\n    return caller\n\n\ndef _series_logical_binop(func):\n    \"\"\"\n    Build a callable function to pass to Binary.register for Series logical operators.\n\n    Parameters\n    ----------\n    func : callable\n        Binary operator method of pandas.Series to be applied.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    return lambda x, y, **kwargs: func(\n        x.squeeze(axis=1),\n        y.squeeze(axis=1) if kwargs.pop(\"squeeze_other\", False) else y,\n        **kwargs,\n    ).to_frame()\n\n\n@_inherit_docstrings(BaseQueryCompiler)\nclass PandasQueryCompiler(BaseQueryCompiler):\n    \"\"\"\n    Query compiler for the pandas storage format.\n\n    This class translates common query compiler API into the DataFrame Algebra\n    queries, that is supposed to be executed by :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe`.\n\n    Parameters\n    ----------\n    modin_frame : PandasDataframe\n        Modin Frame to query with the compiled queries.\n    shape_hint : {\"row\", \"column\", None}, default: None\n        Shape hint for frames known to be a column or a row, otherwise None.\n    \"\"\"\n\n    _modin_frame: PandasDataframe\n    _shape_hint: Optional[str]\n\n    def __init__(self, modin_frame: PandasDataframe, shape_hint: Optional[str] = None):\n        self._modin_frame = modin_frame\n        self._shape_hint = shape_hint\n\n    storage_format = property(lambda self: self._modin_frame.storage_format)\n    engine = property(lambda self: self._modin_frame.engine)\n\n    @property\n    def lazy_row_labels(self):\n        \"\"\"\n        Whether the row labels are computed lazily.\n\n        Equivalent to `not self.frame_has_materialized_index`.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return not self.frame_has_materialized_index\n\n    @property\n    def lazy_row_count(self):\n        \"\"\"\n        Whether the row count is computed lazily.\n\n        Equivalent to `not self.frame_has_materialized_index`.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return not self.frame_has_materialized_index\n\n    @property\n    def lazy_column_types(self):\n        \"\"\"\n        Whether the dtypes are computed lazily.\n\n        Equivalent to `not self.frame_has_materialized_dtypes`.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return not self.frame_has_materialized_dtypes\n\n    @property\n    def lazy_column_labels(self):\n        \"\"\"\n        Whether the column labels are computed lazily.\n\n        Equivalent to `not self.frame_has_materialized_columns`.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return not self.frame_has_materialized_columns\n\n    @property\n    def lazy_column_count(self):\n        \"\"\"\n        Whether the column count is are computed lazily.\n\n        Equivalent to `not self.frame_has_materialized_columns`.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return not self.frame_has_materialized_columns\n\n    # The default implementation of stay_cost will cache some information\n    # which will violate some assumptions in test_internals. Since this class\n    # is only used for non-hybrid operations we simply return 0 here for now.\n    def stay_cost(self, api_cls_name, operation, arguments):\n        return 0\n\n    def finalize(self):\n        self._modin_frame.finalize()\n\n    def execute(self):\n        self.finalize()\n        self._modin_frame.wait_computations()\n\n    def to_pandas(self):\n        return self._modin_frame.to_pandas()\n\n    @classmethod\n    def from_pandas(cls, df, data_cls):\n        return cls(data_cls.from_pandas(df))\n\n    @classmethod\n    def from_arrow(cls, at, data_cls):\n        return cls(data_cls.from_arrow(at))\n\n    # Dataframe exchange protocol\n\n    def to_interchange_dataframe(\n        self, nan_as_null: bool = False, allow_copy: bool = True\n    ):\n        return self._modin_frame.__dataframe__(\n            nan_as_null=nan_as_null, allow_copy=allow_copy\n        )\n\n    @classmethod\n    def from_interchange_dataframe(cls, df: ProtocolDataframe, data_cls):\n        return cls(data_cls.from_interchange_dataframe(df))\n\n    # END Dataframe exchange protocol\n\n    index: pandas.Index = property(_get_axis(0), _set_axis(0))\n    columns: pandas.Index = property(_get_axis(1), _set_axis(1))\n\n    def get_axis_len(self, axis: Literal[0, 1]) -> int:\n        \"\"\"\n        Return the length of the specified axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to return labels on.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        if axis == 0:\n            return len(self._modin_frame)\n        else:\n            return sum(self._modin_frame.column_widths)\n\n    @property\n    def dtypes(self) -> pandas.Series:\n        return self._modin_frame.dtypes\n\n    def get_dtypes_set(self):\n        return self._modin_frame.get_dtypes_set()\n\n    # END Index, columns, and dtypes objects\n\n    # Metadata modification methods\n    def add_prefix(self, prefix, axis=1):\n        if axis == 1:\n            return self.__constructor__(\n                self._modin_frame.rename(new_col_labels=lambda x: f\"{prefix}{x}\")\n            )\n        else:\n            return self.__constructor__(\n                self._modin_frame.rename(new_row_labels=lambda x: f\"{prefix}{x}\")\n            )\n\n    def add_suffix(self, suffix, axis=1):\n        if axis == 1:\n            return self.__constructor__(\n                self._modin_frame.rename(new_col_labels=lambda x: f\"{x}{suffix}\")\n            )\n        else:\n            return self.__constructor__(\n                self._modin_frame.rename(new_row_labels=lambda x: f\"{x}{suffix}\")\n            )\n\n    # END Metadata modification methods\n\n    # Copy\n    # For copy, we don't want a situation where we modify the metadata of the\n    # copies if we end up modifying something here. We copy all of the metadata\n    # to prevent that.\n    def copy(self):\n        return self.__constructor__(self._modin_frame.copy(), self._shape_hint)\n\n    # END Copy\n\n    # Append/Concat/Join (Not Merge)\n    # The append/concat/join operations should ideally never trigger remote\n    # compute. These operations should only ever be manipulations of the\n    # metadata of the resulting object. It should just be a simple matter of\n    # appending the other object's blocks and adding np.nan columns for the new\n    # columns, if needed. If new columns are added, some compute may be\n    # required, though it can be delayed.\n    #\n    # Currently this computation is not delayed, and it may make a copy of the\n    # DataFrame in memory. This can be problematic and should be fixed in the\n    # future. TODO (devin-petersohn): Delay reindexing\n\n    def concat(self, axis, other, **kwargs):\n        if not isinstance(other, list):\n            other = [other]\n        assert all(\n            isinstance(o, type(self)) for o in other\n        ), \"Different Manager objects are being used. This is not allowed\"\n        sort = kwargs.get(\"sort\", None)\n        if sort is None:\n            sort = False\n        join = kwargs.get(\"join\", \"outer\")\n        ignore_index = kwargs.get(\"ignore_index\", False)\n        other_modin_frame = [o._modin_frame for o in other]\n        new_modin_frame = self._modin_frame.concat(axis, other_modin_frame, join, sort)\n        result = self.__constructor__(new_modin_frame)\n        if ignore_index:\n            if axis == 0:\n                return result.reset_index(drop=True)\n            else:\n                result.columns = pandas.RangeIndex(len(result.columns))\n                return result\n        return result\n\n    # END Append/Concat/Join\n\n    # Data Management Methods\n    def free(self):\n        # TODO create a way to clean up this object.\n        return\n\n    # END Data Management Methods\n\n    # Data Movement Methods\n    def move_to(self, target_backend: str) -> Union[BaseQueryCompiler, Any]:\n        return NotImplemented\n\n    @classmethod\n    def move_from(cls, source_qc: BaseQueryCompiler) -> Union[BaseQueryCompiler, Any]:\n        return NotImplemented\n\n    # END Data Movement Methods\n\n    # To NumPy\n    def to_numpy(self, **kwargs):\n        return self._modin_frame.to_numpy(**kwargs)\n\n    # END To NumPy\n\n    # Binary operations (e.g. add, sub)\n    # These operations require two DataFrames and will change the shape of the\n    # data if the index objects don't match. An outer join + op is performed,\n    # such that columns/rows that don't have an index on the other DataFrame\n    # result in NaN values.\n\n    add = Binary.register(pandas.DataFrame.add, infer_dtypes=\"try_sample\")\n    # 'combine' and 'combine_first' are working with UDFs, so it's better not so sample them\n    combine = Binary.register(pandas.DataFrame.combine, infer_dtypes=\"common_cast\")\n    combine_first = Binary.register(\n        pandas.DataFrame.combine_first, infer_dtypes=\"common_cast\"\n    )\n    eq = Binary.register(pandas.DataFrame.eq, infer_dtypes=\"bool\")\n    equals = Binary.register(\n        lambda df, other: pandas.DataFrame([[df.equals(other)]]),\n        join_type=None,\n        labels=\"drop\",\n        infer_dtypes=\"bool\",\n    )\n    floordiv = Binary.register(pandas.DataFrame.floordiv, infer_dtypes=\"try_sample\")\n    ge = Binary.register(pandas.DataFrame.ge, infer_dtypes=\"bool\")\n    gt = Binary.register(pandas.DataFrame.gt, infer_dtypes=\"bool\")\n    le = Binary.register(pandas.DataFrame.le, infer_dtypes=\"bool\")\n    lt = Binary.register(pandas.DataFrame.lt, infer_dtypes=\"bool\")\n    mod = Binary.register(pandas.DataFrame.mod, infer_dtypes=\"try_sample\")\n    mul = Binary.register(pandas.DataFrame.mul, infer_dtypes=\"try_sample\")\n    rmul = Binary.register(pandas.DataFrame.rmul, infer_dtypes=\"try_sample\")\n    ne = Binary.register(pandas.DataFrame.ne, infer_dtypes=\"bool\")\n    pow = Binary.register(pandas.DataFrame.pow, infer_dtypes=\"try_sample\")\n    radd = Binary.register(pandas.DataFrame.radd, infer_dtypes=\"try_sample\")\n    rfloordiv = Binary.register(pandas.DataFrame.rfloordiv, infer_dtypes=\"try_sample\")\n    rmod = Binary.register(pandas.DataFrame.rmod, infer_dtypes=\"try_sample\")\n    rpow = Binary.register(pandas.DataFrame.rpow, infer_dtypes=\"try_sample\")\n    rsub = Binary.register(pandas.DataFrame.rsub, infer_dtypes=\"try_sample\")\n    rtruediv = Binary.register(pandas.DataFrame.rtruediv, infer_dtypes=\"try_sample\")\n    sub = Binary.register(pandas.DataFrame.sub, infer_dtypes=\"try_sample\")\n    truediv = Binary.register(pandas.DataFrame.truediv, infer_dtypes=\"try_sample\")\n    __and__ = Binary.register(pandas.DataFrame.__and__, infer_dtypes=\"bool\")\n    __or__ = Binary.register(pandas.DataFrame.__or__, infer_dtypes=\"bool\")\n    __rand__ = Binary.register(pandas.DataFrame.__rand__, infer_dtypes=\"bool\")\n    __ror__ = Binary.register(pandas.DataFrame.__ror__, infer_dtypes=\"bool\")\n    __rxor__ = Binary.register(pandas.DataFrame.__rxor__, infer_dtypes=\"bool\")\n    __xor__ = Binary.register(pandas.DataFrame.__xor__, infer_dtypes=\"bool\")\n    df_update = Binary.register(\n        copy_df_for_func(pandas.DataFrame.update, display_name=\"update\"),\n        join_type=\"left\",\n        sort=False,\n    )\n    series_update = Binary.register(\n        copy_df_for_func(\n            lambda x, y: pandas.Series.update(x.squeeze(axis=1), y.squeeze(axis=1)),\n            display_name=\"update\",\n        ),\n        join_type=\"left\",\n        sort=False,\n    )\n\n    # Series logical operators take an additional fill_value flag that dataframe does not\n    series_eq = Binary.register(\n        _series_logical_binop(pandas.Series.eq), infer_dtypes=\"bool\"\n    )\n    series_ge = Binary.register(\n        _series_logical_binop(pandas.Series.ge), infer_dtypes=\"bool\"\n    )\n    series_gt = Binary.register(\n        _series_logical_binop(pandas.Series.gt), infer_dtypes=\"bool\"\n    )\n    series_le = Binary.register(\n        _series_logical_binop(pandas.Series.le), infer_dtypes=\"bool\"\n    )\n    series_lt = Binary.register(\n        _series_logical_binop(pandas.Series.lt), infer_dtypes=\"bool\"\n    )\n    series_ne = Binary.register(\n        _series_logical_binop(pandas.Series.ne), infer_dtypes=\"bool\"\n    )\n\n    # Needed for numpy API\n    _logical_and = Binary.register(\n        lambda df, other, *args, **kwargs: pandas.DataFrame(\n            np.logical_and(df, other, *args, **kwargs)\n        ),\n        infer_dtypes=\"bool\",\n    )\n    _logical_or = Binary.register(\n        lambda df, other, *args, **kwargs: pandas.DataFrame(\n            np.logical_or(df, other, *args, **kwargs)\n        ),\n        infer_dtypes=\"bool\",\n    )\n    _logical_xor = Binary.register(\n        lambda df, other, *args, **kwargs: pandas.DataFrame(\n            np.logical_xor(df, other, *args, **kwargs)\n        ),\n        infer_dtypes=\"bool\",\n    )\n\n    def where(self, cond, other, **kwargs):\n        assert isinstance(\n            cond, type(self)\n        ), \"Must have the same QueryCompiler subclass to perform this operation\"\n        # it's doesn't work if `other` is Series._query_compiler because\n        # `n_ary_op` performs columns copartition both for `cond` and `other`.\n        if isinstance(other, type(self)) and other._shape_hint is not None:\n            other = other.to_pandas()\n        if isinstance(other, type(self)):\n            # Make sure to set join_type=None so the `where` result always has\n            # the same row and column labels as `self`.\n            new_modin_frame = self._modin_frame.n_ary_op(\n                lambda df, cond, other: df.where(cond, other, **kwargs),\n                [\n                    cond._modin_frame,\n                    other._modin_frame,\n                ],\n                join_type=None,\n            )\n        # This will be a Series of scalars to be applied based on the condition\n        # dataframe.\n        else:\n\n            def where_builder_series(df, cond):\n                return df.where(cond, other, **kwargs)\n\n            new_modin_frame = self._modin_frame.n_ary_op(\n                where_builder_series, [cond._modin_frame], join_type=\"left\"\n            )\n        return self.__constructor__(new_modin_frame)\n\n    def merge(self, right, **kwargs):\n        if RangePartitioning.get():\n            try:\n                return MergeImpl.range_partitioning_merge(self, right, kwargs)\n            except NotImplementedError as e:\n                message = (\n                    f\"Can't use range-partitioning merge implementation because of: {e}\"\n                    + \"\\nFalling back to a row-axis implementation.\"\n                )\n                get_logger().info(message)\n        return MergeImpl.row_axis_merge(self, right, kwargs)\n\n    def join(self, right: PandasQueryCompiler, **kwargs) -> PandasQueryCompiler:\n        on = kwargs.get(\"on\", None)\n        how = kwargs.get(\"how\", \"left\")\n        sort = kwargs.get(\"sort\", False)\n        left = self\n\n        if how in [\"left\", \"inner\"] or (\n            how == \"right\" and right._modin_frame._partitions.size != 0\n        ):\n            reverted = False\n            if how == \"right\":\n                left, right = right, left\n                reverted = True\n\n            def map_func(\n                left, right, kwargs=kwargs\n            ) -> pandas.DataFrame:  # pragma: no cover\n                if reverted:\n                    df = pandas.DataFrame.join(right, left, **kwargs)\n                else:\n                    df = pandas.DataFrame.join(left, right, **kwargs)\n                return df\n\n            right_to_broadcast = right._modin_frame.combine()\n            left = left.__constructor__(\n                left._modin_frame.broadcast_apply_full_axis(\n                    axis=1,\n                    func=map_func,\n                    # We're going to explicitly change the shape across the 1-axis,\n                    # so we want for partitioning to adapt as well\n                    keep_partitioning=False,\n                    num_splits=merge_partitioning(\n                        left._modin_frame, right._modin_frame, axis=1\n                    ),\n                    other=right_to_broadcast,\n                )\n            )\n            return left.sort_rows_by_column_values(on) if sort else left\n        else:\n            return left.default_to_pandas(pandas.DataFrame.join, right, **kwargs)\n\n    # END Inter-Data operations\n\n    # Reindex/reset_index (may shuffle data)\n    def reindex(self, axis, labels, **kwargs):\n        new_index, indexer = (self.index, None) if axis else self.index.reindex(labels)\n        new_columns, _ = self.columns.reindex(labels) if axis else (self.columns, None)\n        new_dtypes = None\n        if self.frame_has_materialized_dtypes and kwargs.get(\"method\", None) is None:\n            # For columns, defining types is easier because we don't have to calculate the common\n            # type, since the entire column is filled. A simple `reindex` covers our needs.\n            # For rows, we can avoid calculating common types if we know that no new strings of\n            # arbitrary type have been added (this information is in `indexer`).\n            dtype = pandas.Index([kwargs.get(\"fill_value\", np.nan)]).dtype\n            if axis == 0:\n                new_dtypes = self.dtypes.copy()\n                # \"-1\" means that the required labels are missing in the dataframe and the\n                # corresponding rows will be filled with \"fill_value\" that may change the column type.\n                if indexer is not None and -1 in indexer:\n                    for col, col_dtype in new_dtypes.items():\n                        new_dtypes[col] = find_common_type((col_dtype, dtype))\n            else:\n                new_dtypes = self.dtypes.reindex(labels, fill_value=dtype)\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis,\n            lambda df: df.reindex(labels=labels, axis=axis, **kwargs),\n            new_index=new_index,\n            new_columns=new_columns,\n            dtypes=new_dtypes,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def reset_index(self, **kwargs) -> PandasQueryCompiler:\n        if self.lazy_row_labels:\n\n            def _reset(df, *axis_lengths, partition_idx):  # pragma: no cover\n                df = df.reset_index(**kwargs)\n\n                if isinstance(df.index, pandas.RangeIndex):\n                    # If the resulting index is a pure RangeIndex that means that\n                    # `.reset_index` actually dropped all of the levels of the\n                    # original index and so we have to recompute it manually for each partition\n                    start = sum(axis_lengths[:partition_idx])\n                    stop = sum(axis_lengths[: partition_idx + 1])\n\n                    df.index = pandas.RangeIndex(start, stop)\n                return df\n\n            new_columns = None\n            if kwargs[\"drop\"]:\n                dtypes = self._modin_frame.copy_dtypes_cache()\n                if self.frame_has_columns_cache:\n                    new_columns = self._modin_frame.copy_columns_cache(\n                        copy_lengths=True\n                    )\n            else:\n                # concat index dtypes with column dtypes\n                index_dtypes = self._modin_frame._index_cache.maybe_get_dtypes()\n                try:\n                    dtypes = ModinDtypes.concat(\n                        [\n                            index_dtypes,\n                            self._modin_frame._dtypes,\n                        ]\n                    )\n                except NotImplementedError:\n                    # may raise on duplicated names in materialized 'self.dtypes'\n                    dtypes = None\n                if (\n                    # can precompute new columns if we know columns and index names\n                    self.frame_has_materialized_columns\n                    and index_dtypes is not None\n                ):\n                    empty_index = (\n                        pandas.Index([0], name=index_dtypes.index[0])\n                        if len(index_dtypes) == 1\n                        else pandas.MultiIndex.from_arrays(\n                            [[i] for i in range(len(index_dtypes))],\n                            names=index_dtypes.index,\n                        )\n                    )\n                    new_columns = (\n                        pandas.DataFrame(columns=self.columns, index=empty_index)\n                        .reset_index(**kwargs)\n                        .columns\n                    )\n\n            return self.__constructor__(\n                self._modin_frame.apply_full_axis(\n                    axis=1,\n                    func=_reset,\n                    enumerate_partitions=True,\n                    new_columns=new_columns,\n                    dtypes=dtypes,\n                    sync_labels=False,\n                    pass_axis_lengths_to_partitions=True,\n                )\n            )\n\n        allow_duplicates = kwargs.pop(\"allow_duplicates\", lib.no_default)\n        names = kwargs.pop(\"names\", None)\n        if allow_duplicates not in (lib.no_default, False) or names is not None:\n            return self.default_to_pandas(\n                pandas.DataFrame.reset_index,\n                allow_duplicates=allow_duplicates,\n                names=names,\n                **kwargs,\n            )\n\n        drop = kwargs.get(\"drop\", False)\n        level = kwargs.get(\"level\", None)\n        new_index = None\n        if level is not None:\n            if not isinstance(level, (tuple, list)):\n                level = [level]\n            level = [self.index._get_level_number(lev) for lev in level]\n            uniq_sorted_level = sorted(set(level))\n            if len(uniq_sorted_level) < self.index.nlevels:\n                # We handle this by separately computing the index. We could just\n                # put the labels into the data and pull them back out, but that is\n                # expensive.\n                new_index = (\n                    self.index.droplevel(uniq_sorted_level)\n                    if len(level) < self.index.nlevels\n                    else pandas.RangeIndex(len(self.index))\n                )\n        elif not drop:\n            uniq_sorted_level = list(range(self.index.nlevels))\n\n        if not drop:\n            if len(uniq_sorted_level) < self.index.nlevels:\n                # These are the index levels that will remain after the reset_index\n                keep_levels = [\n                    i for i in range(self.index.nlevels) if i not in uniq_sorted_level\n                ]\n                new_copy = self.copy()\n                # Change the index to have only the levels that will be inserted\n                # into the data. We will replace the old levels later.\n                new_copy.index = self.index.droplevel(keep_levels)\n                new_copy.index.names = [\n                    (\n                        \"level_{}\".format(level_value)\n                        if new_copy.index.names[level_index] is None\n                        else new_copy.index.names[level_index]\n                    )\n                    for level_index, level_value in enumerate(uniq_sorted_level)\n                ]\n                new_modin_frame = new_copy._modin_frame.from_labels()\n                # Replace the levels that will remain as a part of the index.\n                new_modin_frame.index = new_index\n            else:\n                new_modin_frame = self._modin_frame.from_labels()\n            if isinstance(new_modin_frame.columns, pandas.MultiIndex):\n                # Fix col_level and col_fill in generated column names because from_labels works with assumption\n                # that col_level and col_fill are not specified but it expands tuples in level names.\n                col_level = kwargs.get(\"col_level\", 0)\n                col_fill = kwargs.get(\"col_fill\", \"\")\n                if col_level != 0 or col_fill != \"\":\n                    # Modify generated column names if col_level and col_fil have values different from default.\n                    levels_names_list = [\n                        f\"level_{level_index}\" if level_name is None else level_name\n                        for level_index, level_name in enumerate(self.index.names)\n                    ]\n                    if col_fill is None:\n                        # Initialize col_fill if it is None.\n                        # This is some weird undocumented Pandas behavior to take first\n                        # element of the last column name.\n                        last_col_name = levels_names_list[uniq_sorted_level[-1]]\n                        last_col_name = (\n                            list(last_col_name)\n                            if isinstance(last_col_name, tuple)\n                            else [last_col_name]\n                        )\n                        if len(last_col_name) not in (1, self.columns.nlevels):\n                            raise ValueError(\n                                \"col_fill=None is incompatible \"\n                                + f\"with incomplete column name {last_col_name}\"\n                            )\n                        col_fill = last_col_name[0]\n                    columns_list = new_modin_frame.columns.tolist()\n                    for level_index, level_value in enumerate(uniq_sorted_level):\n                        level_name = levels_names_list[level_value]\n                        # Expand tuples into separate items and fill the rest with col_fill\n                        top_level = [col_fill] * col_level\n                        middle_level = (\n                            list(level_name)\n                            if isinstance(level_name, tuple)\n                            else [level_name]\n                        )\n                        bottom_level = [col_fill] * (\n                            self.columns.nlevels - (col_level + len(middle_level))\n                        )\n                        item = tuple(top_level + middle_level + bottom_level)\n                        if len(item) > self.columns.nlevels:\n                            raise ValueError(\n                                \"Item must have length equal to number of levels.\"\n                            )\n                        columns_list[level_index] = item\n                    new_modin_frame.columns = pandas.MultiIndex.from_tuples(\n                        columns_list, names=self.columns.names\n                    )\n            new_self = self.__constructor__(new_modin_frame)\n        else:\n            new_self = self.copy()\n            new_self.index = (\n                # Cheaper to compute row lengths than index\n                pandas.RangeIndex(sum(new_self._modin_frame.row_lengths))\n                if new_index is None\n                else new_index\n            )\n        return new_self\n\n    def set_index_from_columns(\n        self, keys: List[Hashable], drop: bool = True, append: bool = False\n    ):\n        new_modin_frame = self._modin_frame.to_labels(keys)\n        if append:\n            arrays = []\n            # Appending keeps the original order of the index levels, then appends the\n            # new index objects.\n            names = list(self.index.names)\n            if isinstance(self.index, pandas.MultiIndex):\n                for i in range(self.index.nlevels):\n                    arrays.append(self.index._get_level_values(i))\n            else:\n                arrays.append(self.index)\n\n            # Add the names in the correct order.\n            names.extend(new_modin_frame.index.names)\n            if isinstance(new_modin_frame.index, pandas.MultiIndex):\n                for i in range(new_modin_frame.index.nlevels):\n                    arrays.append(new_modin_frame.index._get_level_values(i))\n            else:\n                arrays.append(new_modin_frame.index)\n            new_modin_frame.index = ensure_index_from_sequences(arrays, names)\n        if not drop:\n            # The algebraic operator for this operation always drops the column, but we\n            # can copy the data in this object and just use the index from the result of\n            # the query compiler call.\n            result = self._modin_frame.copy()\n            result.index = new_modin_frame.index\n        else:\n            result = new_modin_frame\n        return self.__constructor__(result)\n\n    # END Reindex/reset_index\n\n    # Transpose\n    # For transpose, we aren't going to immediately copy everything. Since the\n    # actual transpose operation is very fast, we will just do it before any\n    # operation that gets called on the transposed data. See _prepare_method\n    # for how the transpose is applied.\n    #\n    # Our invariants assume that the blocks are transposed, but not the\n    # data inside. Sometimes we have to reverse this transposition of blocks\n    # for simplicity of implementation.\n\n    def transpose(self, *args, **kwargs) -> PandasQueryCompiler:\n        # Switch the index and columns and transpose the data within the blocks.\n        return self.__constructor__(self._modin_frame.transpose())\n\n    def is_series_like(self):\n        return len(self.columns) == 1 or len(self.index) == 1\n\n    # END Transpose\n\n    # TreeReduce operations\n    count = TreeReduce.register(pandas.DataFrame.count, pandas.DataFrame.sum)\n\n    def _dtypes_sum(dtypes: pandas.Series, *func_args, **func_kwargs):  # noqa: GL08\n        # The common type evaluation for `TreeReduce` operator may differ depending\n        # on the pandas function, so it's better to pass a evaluation function that\n        # should be defined for each Modin's function.\n        return find_common_type(dtypes.tolist())\n\n    sum = TreeReduce.register(pandas.DataFrame.sum, compute_dtypes=_dtypes_sum)\n    prod = TreeReduce.register(pandas.DataFrame.prod)\n    any = TreeReduce.register(pandas.DataFrame.any, pandas.DataFrame.any)\n    all = TreeReduce.register(pandas.DataFrame.all, pandas.DataFrame.all)\n    # memory_usage adds an extra column for index usage, but we don't want to distribute\n    # the index memory usage calculation.\n    _memory_usage_without_index = TreeReduce.register(\n        pandas.DataFrame.memory_usage,\n        lambda x, *args, **kwargs: pandas.DataFrame.sum(x),\n        axis=0,\n    )\n\n    def memory_usage(self, **kwargs):\n        index = kwargs.get(\"index\", True)\n        deep = kwargs.get(\"deep\", False)\n        usage_without_index = self._memory_usage_without_index(index=False, deep=deep)\n        return (\n            self.from_pandas(\n                pandas.DataFrame(\n                    [self.index.memory_usage()],\n                    columns=[\"Index\"],\n                    index=[MODIN_UNNAMED_SERIES_LABEL],\n                ),\n                data_cls=type(self._modin_frame),\n            ).concat(axis=1, other=[usage_without_index])\n            if index\n            else usage_without_index\n        )\n\n    def max(self, axis, **kwargs):\n        def map_func(df, **kwargs):\n            return pandas.DataFrame.max(df, **kwargs)\n\n        def reduce_func(df, **kwargs):\n            if kwargs.get(\"numeric_only\", False):\n                kwargs = kwargs.copy()\n                kwargs[\"numeric_only\"] = False\n            return pandas.DataFrame.max(df, **kwargs)\n\n        return TreeReduce.register(map_func, reduce_func)(self, axis=axis, **kwargs)\n\n    def min(self, axis, **kwargs):\n        def map_func(df, **kwargs):\n            return pandas.DataFrame.min(df, **kwargs)\n\n        def reduce_func(df, **kwargs):\n            if kwargs.get(\"numeric_only\", False):\n                kwargs = kwargs.copy()\n                kwargs[\"numeric_only\"] = False\n            return pandas.DataFrame.min(df, **kwargs)\n\n        return TreeReduce.register(map_func, reduce_func)(self, axis=axis, **kwargs)\n\n    def mean(self, axis, **kwargs):\n        if kwargs.get(\"level\") is not None or axis is None:\n            return self.default_to_pandas(pandas.DataFrame.mean, axis=axis, **kwargs)\n\n        skipna = kwargs.get(\"skipna\", True)\n\n        # TODO-FIX: this function may work incorrectly with user-defined \"numeric\" values.\n        # Since `count(numeric_only=True)` discards all unknown \"numeric\" types, we can get incorrect\n        # divisor inside the reduce function.\n        def map_fn(df, numeric_only=False, **kwargs):\n            \"\"\"\n            Perform Map phase of the `mean`.\n\n            Compute sum and number of elements in a given partition.\n            \"\"\"\n            result = pandas.DataFrame(\n                {\n                    \"sum\": df.sum(axis=axis, skipna=skipna, numeric_only=numeric_only),\n                    \"count\": df.count(axis=axis, numeric_only=numeric_only),\n                }\n            )\n            return result if axis else result.T\n\n        def reduce_fn(df, **kwargs):\n            \"\"\"\n            Perform Reduce phase of the `mean`.\n\n            Compute sum for all the the partitions and divide it to\n            the total number of elements.\n            \"\"\"\n            sum_cols = df[\"sum\"] if axis else df.loc[\"sum\"]\n            count_cols = df[\"count\"] if axis else df.loc[\"count\"]\n\n            if not isinstance(sum_cols, pandas.Series):\n                # If we got `NaN` as the result of the sum in any axis partition,\n                # then we must consider the whole sum as `NaN`, so setting `skipna=False`\n                sum_cols = sum_cols.sum(axis=axis, skipna=False)\n                count_cols = count_cols.sum(axis=axis, skipna=False)\n            return sum_cols / count_cols\n\n        def compute_dtypes_fn(dtypes, axis, **kwargs):\n            \"\"\"\n            Compute the resulting Series dtype.\n\n            When computing along rows and there are numeric and boolean columns\n            Pandas returns `object`. In all other cases - `float64`.\n            \"\"\"\n            if (\n                axis == 1\n                and any(is_bool_dtype(t) for t in dtypes)\n                and any(is_numeric_dtype(t) for t in dtypes)\n            ):\n                return \"object\"\n            return \"float64\"\n\n        return TreeReduce.register(\n            map_fn,\n            reduce_fn,\n            compute_dtypes=compute_dtypes_fn,\n        )(self, axis=axis, **kwargs)\n\n    # END TreeReduce operations\n\n    # Reduce operations\n    idxmax = Reduce.register(pandas.DataFrame.idxmax)\n    idxmin = Reduce.register(pandas.DataFrame.idxmin)\n\n    def median(self, axis, **kwargs):\n        if axis is None:\n            return self.default_to_pandas(pandas.DataFrame.median, axis=axis, **kwargs)\n        return Reduce.register(pandas.DataFrame.median)(self, axis=axis, **kwargs)\n\n    def nunique(self, axis=0, dropna=True):\n        if not RangePartitioning.get():\n            return Reduce.register(pandas.DataFrame.nunique)(\n                self, axis=axis, dropna=dropna\n            )\n\n        unsupported_message = \"\"\n        if axis != 0:\n            unsupported_message += (\n                \"Range-partitioning 'nunique()' is only supported for 'axis=0'.\\n\"\n            )\n\n        if len(self.columns) > 1:\n            unsupported_message += \"Range-partitioning 'nunique()' is only supported for a signle-column dataframe.\\n\"\n\n        if len(unsupported_message) > 0:\n            message = (\n                f\"Can't use range-partitioning implementation for 'nunique' because:\\n{unsupported_message}\"\n                + \"Falling back to a full-axis reduce implementation.\"\n            )\n            get_logger().info(message)\n            ErrorMessage.warn(message)\n            return Reduce.register(pandas.DataFrame.nunique)(\n                self, axis=axis, dropna=dropna\n            )\n\n        # compute '.nunique()' for each row partitions\n        new_modin_frame = self._modin_frame._apply_func_to_range_partitioning(\n            key_columns=self.columns.tolist(),\n            func=lambda df: df.nunique(dropna=dropna).to_frame(),\n        )\n        # sum the results of each row part to get the final value\n        new_modin_frame = new_modin_frame.reduce(axis=0, function=lambda df: df.sum())\n        return self.__constructor__(new_modin_frame, shape_hint=\"column\")\n\n    def skew(self, axis, **kwargs):\n        if axis is None:\n            return self.default_to_pandas(pandas.DataFrame.skew, axis=axis, **kwargs)\n        return Reduce.register(pandas.DataFrame.skew)(self, axis=axis, **kwargs)\n\n    def kurt(self, axis, **kwargs):\n        if axis is None:\n            return self.default_to_pandas(pandas.DataFrame.kurt, axis=axis, **kwargs)\n        return Reduce.register(pandas.DataFrame.kurt)(self, axis=axis, **kwargs)\n\n    sem = Reduce.register(pandas.DataFrame.sem)\n    std = Reduce.register(pandas.DataFrame.std)\n    var = Reduce.register(pandas.DataFrame.var)\n    sum_min_count = Reduce.register(pandas.DataFrame.sum)\n    prod_min_count = Reduce.register(pandas.DataFrame.prod)\n    quantile_for_single_value = Reduce.register(pandas.DataFrame.quantile)\n\n    def to_datetime(self, *args, **kwargs):\n        if len(self.columns) == 1:\n            return Map.register(\n                # to_datetime has inplace side effects, see GH#3063\n                lambda df, *args, **kwargs: pandas.to_datetime(\n                    df.squeeze(axis=1), *args, **kwargs\n                ).to_frame(),\n                shape_hint=\"column\",\n            )(self, *args, **kwargs)\n        else:\n            return Reduce.register(pandas.to_datetime, axis=1, shape_hint=\"column\")(\n                self, *args, **kwargs\n            )\n\n    # END Reduce operations\n\n    def _resample_func(\n        self,\n        resample_kwargs,\n        func_name,\n        new_columns=None,\n        df_op=None,\n        allow_range_impl=True,\n        *args,\n        **kwargs,\n    ):\n        \"\"\"\n        Resample underlying time-series data and apply aggregation on it.\n\n        Parameters\n        ----------\n        resample_kwargs : dict\n            Resample parameters in the format of ``modin.pandas.DataFrame.resample`` signature.\n        func_name : str\n            Aggregation function name to apply on resampler object.\n        new_columns : list of labels, optional\n            Actual column labels of the resulted frame, supposed to be a hint for the\n            Modin frame. If not specified will be computed automaticly.\n        df_op : callable(pandas.DataFrame) -> [pandas.DataFrame, pandas.Series], optional\n            Preprocessor function to apply to the passed frame before resampling.\n        allow_range_impl : bool, default: True\n            Whether to use range-partitioning if ``RangePartitioning.get() is True``.\n        *args : args\n            Arguments to pass to the aggregation function.\n        **kwargs : kwargs\n            Arguments to pass to the aggregation function.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            New QueryCompiler containing the result of resample aggregation.\n        \"\"\"\n        from modin.core.dataframe.pandas.dataframe.utils import ShuffleResample\n\n        def map_func(df, resample_kwargs=resample_kwargs):  # pragma: no cover\n            \"\"\"Resample time-series data of the passed frame and apply aggregation function on it.\"\"\"\n            if len(df) == 0:\n                if resample_kwargs[\"on\"] is not None:\n                    df = df.set_index(resample_kwargs[\"on\"])\n                return df\n            if \"bin_bounds\" in df.attrs:\n                timestamps = df.attrs[\"bin_bounds\"]\n                if isinstance(df.index, pandas.MultiIndex):\n                    level_to_keep = resample_kwargs[\"level\"]\n                    if isinstance(level_to_keep, int):\n                        to_drop = [\n                            lvl\n                            for lvl in range(df.index.nlevels)\n                            if lvl != level_to_keep\n                        ]\n                    else:\n                        to_drop = [\n                            lvl for lvl in df.index.names if lvl != level_to_keep\n                        ]\n                    df.index = df.index.droplevel(to_drop)\n                    resample_kwargs = resample_kwargs.copy()\n                    resample_kwargs[\"level\"] = None\n                filler = pandas.DataFrame(\n                    np.nan, index=pandas.Index(timestamps), columns=df.columns\n                )\n                df = pandas.concat([df, filler], copy=False)\n            if df_op is not None:\n                df = df_op(df)\n            resampled_val = df.resample(**resample_kwargs)\n            op = getattr(pandas.core.resample.Resampler, func_name)\n            if callable(op):\n                try:\n                    # This will happen with Arrow buffer read-only errors. We don't want to copy\n                    # all the time, so this will try to fast-path the code first.\n                    val = op(resampled_val, *args, **kwargs)\n                except ValueError:\n                    resampled_val = df.copy().resample(**resample_kwargs)\n                    val = op(resampled_val, *args, **kwargs)\n            else:\n                val = getattr(resampled_val, func_name)\n\n            if isinstance(val, pandas.Series):\n                return val.to_frame()\n            else:\n                return val\n\n        if resample_kwargs[\"on\"] is None:\n            level = [\n                0 if resample_kwargs[\"level\"] is None else resample_kwargs[\"level\"]\n            ]\n            key_columns = []\n        else:\n            level = None\n            key_columns = [resample_kwargs[\"on\"]]\n\n        if (\n            not allow_range_impl\n            or resample_kwargs[\"axis\"] not in (0, \"index\")\n            or not RangePartitioning.get()\n        ):\n            new_modin_frame = self._modin_frame.apply_full_axis(\n                axis=0, func=map_func, new_columns=new_columns\n            )\n        else:\n            new_modin_frame = self._modin_frame._apply_func_to_range_partitioning(\n                key_columns=key_columns,\n                level=level,\n                func=map_func,\n                shuffle_func_cls=ShuffleResample,\n                resample_kwargs=resample_kwargs,\n            )\n        return self.__constructor__(new_modin_frame)\n\n    def resample_get_group(self, resample_kwargs, name, obj):\n        return self._resample_func(\n            resample_kwargs, \"get_group\", name=name, allow_range_impl=False, obj=obj\n        )\n\n    def resample_app_ser(self, resample_kwargs, func, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs,\n            \"apply\",\n            df_op=lambda df: df.squeeze(axis=1),\n            func=func,\n            *args,\n            **kwargs,\n        )\n\n    def resample_app_df(self, resample_kwargs, func, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"apply\", func=func, *args, **kwargs)\n\n    def resample_agg_ser(self, resample_kwargs, func, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs,\n            \"aggregate\",\n            df_op=lambda df: df.squeeze(axis=1),\n            func=func,\n            *args,\n            **kwargs,\n        )\n\n    def resample_agg_df(self, resample_kwargs, func, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs, \"aggregate\", func=func, *args, **kwargs\n        )\n\n    def resample_transform(self, resample_kwargs, arg, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs,\n            \"transform\",\n            arg=arg,\n            allow_range_impl=False,\n            *args,\n            **kwargs,\n        )\n\n    def resample_pipe(self, resample_kwargs, func, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"pipe\", func=func, *args, **kwargs)\n\n    def resample_ffill(self, resample_kwargs, limit):\n        return self._resample_func(\n            resample_kwargs, \"ffill\", limit=limit, allow_range_impl=False\n        )\n\n    def resample_bfill(self, resample_kwargs, limit):\n        return self._resample_func(\n            resample_kwargs, \"bfill\", limit=limit, allow_range_impl=False\n        )\n\n    def resample_nearest(self, resample_kwargs, limit):\n        return self._resample_func(\n            resample_kwargs, \"nearest\", limit=limit, allow_range_impl=False\n        )\n\n    def resample_fillna(self, resample_kwargs, method, limit):\n        return self._resample_func(\n            resample_kwargs,\n            \"fillna\",\n            method=method,\n            limit=limit,\n            allow_range_impl=method is None,\n        )\n\n    def resample_asfreq(self, resample_kwargs, fill_value):\n        return self._resample_func(resample_kwargs, \"asfreq\", fill_value=fill_value)\n\n    def resample_interpolate(\n        self,\n        resample_kwargs,\n        method,\n        axis,\n        limit,\n        inplace,\n        limit_direction,\n        limit_area,\n        downcast,\n        **kwargs,\n    ):\n        return self._resample_func(\n            resample_kwargs,\n            \"interpolate\",\n            axis=axis,\n            limit=limit,\n            inplace=inplace,\n            limit_direction=limit_direction,\n            limit_area=limit_area,\n            downcast=downcast,\n            allow_range_impl=False,\n            **kwargs,\n        )\n\n    def resample_count(self, resample_kwargs):\n        return self._resample_func(resample_kwargs, \"count\")\n\n    def resample_nunique(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"nunique\", *args, **kwargs)\n\n    def resample_first(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs, \"first\", allow_range_impl=False, *args, **kwargs\n        )\n\n    def resample_last(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs, \"last\", allow_range_impl=False, *args, **kwargs\n        )\n\n    def resample_max(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"max\", *args, **kwargs)\n\n    def resample_mean(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"mean\", *args, **kwargs)\n\n    def resample_median(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"median\", *args, **kwargs)\n\n    def resample_min(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"min\", *args, **kwargs)\n\n    def resample_ohlc_ser(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs,\n            \"ohlc\",\n            df_op=lambda df: df.squeeze(axis=1),\n            *args,\n            **kwargs,\n        )\n\n    def resample_ohlc_df(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"ohlc\", *args, **kwargs)\n\n    def resample_prod(self, resample_kwargs, min_count, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs,\n            \"prod\",\n            min_count=min_count,\n            *args,\n            **kwargs,\n        )\n\n    def resample_size(self, resample_kwargs):\n        return self._resample_func(\n            resample_kwargs,\n            \"size\",\n            new_columns=[MODIN_UNNAMED_SERIES_LABEL],\n            allow_range_impl=False,\n        )\n\n    def resample_sem(self, resample_kwargs, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"sem\", *args, **kwargs)\n\n    def resample_std(self, resample_kwargs, ddof, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"std\", ddof=ddof, *args, **kwargs)\n\n    def resample_sum(self, resample_kwargs, min_count, *args, **kwargs):\n        return self._resample_func(\n            resample_kwargs,\n            \"sum\",\n            min_count=min_count,\n            *args,\n            **kwargs,\n        )\n\n    def resample_var(self, resample_kwargs, ddof, *args, **kwargs):\n        return self._resample_func(resample_kwargs, \"var\", ddof=ddof, *args, **kwargs)\n\n    def resample_quantile(self, resample_kwargs, q, **kwargs):\n        return self._resample_func(resample_kwargs, \"quantile\", q=q, **kwargs)\n\n    def expanding_aggregate(self, axis, expanding_args, func, *args, **kwargs):\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis,\n            lambda df: pandas.DataFrame(\n                df.expanding(*expanding_args).aggregate(func=func, *args, **kwargs)\n            ),\n            new_index=self.index,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    expanding_sum = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).sum(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_min = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).min(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_max = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).max(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_mean = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).mean(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_median = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).median(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_var = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).var(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_std = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).std(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_count = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).count(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    def expanding_cov(\n        self,\n        fold_axis,\n        expanding_args,\n        squeeze_self,\n        squeeze_other,\n        other=None,\n        pairwise=None,\n        ddof=1,\n        numeric_only=False,\n        **kwargs,\n    ):\n        other_for_pandas = (\n            other\n            if other is None\n            else (\n                other.to_pandas().squeeze(axis=1)\n                if squeeze_other\n                else other.to_pandas()\n            )\n        )\n        if len(self.columns) > 1:\n            # computing covariance for each column requires having the other columns,\n            # so we can't parallelize this as a full-column operation\n            return self.default_to_pandas(\n                lambda df: pandas.DataFrame.expanding(df, *expanding_args).cov(\n                    other=other_for_pandas,\n                    pairwise=pairwise,\n                    ddof=ddof,\n                    numeric_only=numeric_only,\n                    **kwargs,\n                )\n            )\n        return Fold.register(\n            lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n                (df.squeeze(axis=1) if squeeze_self else df)\n                .expanding(*expanding_args)\n                .cov(*args, **kwargs)\n            ),\n            shape_preserved=True,\n        )(\n            self,\n            fold_axis,\n            expanding_args,\n            other=other_for_pandas,\n            pairwise=pairwise,\n            ddof=ddof,\n            numeric_only=numeric_only,\n            **kwargs,\n        )\n\n    def expanding_corr(\n        self,\n        fold_axis,\n        expanding_args,\n        squeeze_self,\n        squeeze_other,\n        other=None,\n        pairwise=None,\n        ddof=1,\n        numeric_only=False,\n        **kwargs,\n    ):\n        other_for_pandas = (\n            other\n            if other is None\n            else (\n                other.to_pandas().squeeze(axis=1)\n                if squeeze_other\n                else other.to_pandas()\n            )\n        )\n        if len(self.columns) > 1:\n            # computing correlation for each column requires having the other columns,\n            # so we can't parallelize this as a full-column operation\n            return self.default_to_pandas(\n                lambda df: pandas.DataFrame.expanding(df, *expanding_args).corr(\n                    other=other_for_pandas,\n                    pairwise=pairwise,\n                    ddof=ddof,\n                    numeric_only=numeric_only,\n                    **kwargs,\n                )\n            )\n        return Fold.register(\n            lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n                (df.squeeze(axis=1) if squeeze_self else df)\n                .expanding(*expanding_args)\n                .corr(*args, **kwargs)\n            ),\n            shape_preserved=True,\n        )(\n            self,\n            fold_axis,\n            expanding_args,\n            other=other_for_pandas,\n            pairwise=pairwise,\n            ddof=ddof,\n            numeric_only=numeric_only,\n            **kwargs,\n        )\n\n    expanding_quantile = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).quantile(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_sem = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).sem(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_kurt = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).kurt(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_skew = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).skew(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    expanding_rank = Fold.register(\n        lambda df, expanding_args, *args, **kwargs: pandas.DataFrame(\n            df.expanding(*expanding_args).rank(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n\n    window_mean = Fold.register(\n        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).mean(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    window_sum = Fold.register(\n        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).sum(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    window_var = Fold.register(\n        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).var(ddof=ddof, *args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    window_std = Fold.register(\n        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).std(ddof=ddof, *args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_count = Fold.register(\n        lambda df, rolling_kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).count()\n        ),\n        shape_preserved=True,\n    )\n    rolling_sum = Fold.register(\n        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).sum(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_sem = Fold.register(\n        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).sem(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_mean = Fold.register(\n        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).mean(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_median = Fold.register(\n        lambda df, rolling_kwargs, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).median(**kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_var = Fold.register(\n        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).var(ddof=ddof, *args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_std = Fold.register(\n        lambda df, rolling_kwargs, ddof, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).std(ddof=ddof, *args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_min = Fold.register(\n        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).min(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_max = Fold.register(\n        lambda df, rolling_kwargs, *args, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).max(*args, **kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_skew = Fold.register(\n        lambda df, rolling_kwargs, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).skew(**kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_kurt = Fold.register(\n        lambda df, rolling_kwargs, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).kurt(**kwargs)\n        ),\n        shape_preserved=True,\n    )\n    rolling_apply = Fold.register(\n        lambda df, rolling_kwargs, func, raw, engine, engine_kwargs, args, kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).apply(\n                func=func,\n                raw=raw,\n                engine=engine,\n                engine_kwargs=engine_kwargs,\n                args=args,\n                kwargs=kwargs,\n            ),\n        ),\n        shape_preserved=True,\n    )\n    rolling_quantile = Fold.register(\n        lambda df, rolling_kwargs, q, interpolation, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).quantile(\n                q=q, interpolation=interpolation, **kwargs\n            ),\n        ),\n        shape_preserved=True,\n    )\n    rolling_rank = Fold.register(\n        lambda df, rolling_kwargs, method, ascending, pct, numeric_only, **kwargs: pandas.DataFrame(\n            df.rolling(**rolling_kwargs).rank(\n                method=method,\n                ascending=ascending,\n                pct=pct,\n                numeric_only=numeric_only,\n                **kwargs,\n            ),\n        ),\n        shape_preserved=True,\n    )\n\n    def rolling_corr(self, axis, rolling_kwargs, other, pairwise, *args, **kwargs):\n        if len(self.columns) > 1:\n            return self.default_to_pandas(\n                lambda df: pandas.DataFrame.rolling(df, **rolling_kwargs).corr(\n                    other=other, pairwise=pairwise, *args, **kwargs\n                )\n            )\n        else:\n            return Fold.register(\n                lambda df: pandas.DataFrame(\n                    df.rolling(**rolling_kwargs).corr(\n                        other=other, pairwise=pairwise, *args, **kwargs\n                    )\n                ),\n                shape_preserved=True,\n            )(self, axis)\n\n    def rolling_cov(self, axis, rolling_kwargs, other, pairwise, ddof, **kwargs):\n        if len(self.columns) > 1:\n            return self.default_to_pandas(\n                lambda df: pandas.DataFrame.rolling(df, **rolling_kwargs).cov(\n                    other=other, pairwise=pairwise, ddof=ddof, **kwargs\n                )\n            )\n        else:\n            return Fold.register(\n                lambda df: pandas.DataFrame(\n                    df.rolling(**rolling_kwargs).cov(\n                        other=other, pairwise=pairwise, ddof=ddof, **kwargs\n                    )\n                ),\n                shape_preserved=True,\n            )(self, axis)\n\n    def rolling_aggregate(self, axis, rolling_kwargs, func, *args, **kwargs):\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis,\n            lambda df: pandas.DataFrame(\n                df.rolling(**rolling_kwargs).aggregate(func=func, *args, **kwargs)\n            ),\n            new_index=self.index,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def unstack(self, level, fill_value):\n        if not isinstance(self.index, pandas.MultiIndex) or (\n            isinstance(self.index, pandas.MultiIndex)\n            and is_list_like(level)\n            and len(level) == self.index.nlevels\n        ):\n            axis = 1\n            new_columns = [MODIN_UNNAMED_SERIES_LABEL]\n            need_reindex = True\n        else:\n            axis = 0\n            new_columns = None\n            need_reindex = False\n\n        def map_func(df):  # pragma: no cover\n            return pandas.DataFrame(df.unstack(level=level, fill_value=fill_value))\n\n        def is_tree_like_or_1d(calc_index, valid_index):\n            \"\"\"\n            Check whether specified index is a single dimensional or built in a tree manner.\n\n            Parameters\n            ----------\n            calc_index : pandas.Index\n                Frame index to check.\n            valid_index : pandas.Index\n                Frame index on the opposite from `calc_index` axis.\n\n            Returns\n            -------\n            bool\n                True if `calc_index` is not MultiIndex or MultiIndex and built in a tree manner.\n                False otherwise.\n            \"\"\"\n            if not isinstance(calc_index, pandas.MultiIndex):\n                return True\n            actual_len = 1\n            for lvl in calc_index.levels:\n                actual_len *= len(lvl)\n            return len(self.index) * len(self.columns) == actual_len * len(valid_index)\n\n        is_tree_like_or_1d_index = is_tree_like_or_1d(self.index, self.columns)\n        is_tree_like_or_1d_cols = is_tree_like_or_1d(self.columns, self.index)\n\n        is_all_multi_list = False\n        if (\n            isinstance(self.index, pandas.MultiIndex)\n            and isinstance(self.columns, pandas.MultiIndex)\n            and is_list_like(level)\n            and len(level) == self.index.nlevels\n            and is_tree_like_or_1d_index\n            and is_tree_like_or_1d_cols\n        ):\n            is_all_multi_list = True\n            real_cols_bkp = self.columns\n            obj = self.copy()\n            obj.columns = np.arange(len(obj.columns))\n        else:\n            obj = self\n\n        new_modin_frame = obj._modin_frame.apply_full_axis(\n            axis, map_func, new_columns=new_columns\n        )\n        result = self.__constructor__(new_modin_frame)\n\n        def compute_index(index, columns, consider_index=True, consider_columns=True):\n            \"\"\"\n            Compute new index for the unstacked frame.\n\n            Parameters\n            ----------\n            index : pandas.Index\n                Index of the original frame.\n            columns : pandas.Index\n                Columns of the original frame.\n            consider_index : bool, default: True\n                Whether original index contains duplicated values.\n                If True all duplicates will be droped.\n            consider_columns : bool, default: True\n                Whether original columns contains duplicated values.\n                If True all duplicates will be droped.\n\n            Returns\n            -------\n            pandas.Index\n                New index to use in the unstacked frame.\n            \"\"\"\n\n            def get_unique_level_values(index):\n                return [\n                    index.get_level_values(lvl).unique()\n                    for lvl in np.arange(index.nlevels)\n                ]\n\n            new_index = (\n                get_unique_level_values(index)\n                if consider_index\n                else index if isinstance(index, list) else [index]\n            )\n\n            new_columns = (\n                get_unique_level_values(columns) if consider_columns else [columns]\n            )\n            return pandas.MultiIndex.from_product([*new_columns, *new_index])\n\n        if is_all_multi_list and is_tree_like_or_1d_index and is_tree_like_or_1d_cols:\n            result = result.sort_index()\n            index_level_values = [lvl for lvl in obj.index.levels]\n\n            result.index = compute_index(\n                index_level_values, real_cols_bkp, consider_index=False\n            )\n            return result\n\n        if need_reindex:\n            if is_tree_like_or_1d_index and is_tree_like_or_1d_cols:\n                is_recompute_index = isinstance(self.index, pandas.MultiIndex)\n                is_recompute_columns = not is_recompute_index and isinstance(\n                    self.columns, pandas.MultiIndex\n                )\n                new_index = compute_index(\n                    self.index, self.columns, is_recompute_index, is_recompute_columns\n                )\n            elif is_tree_like_or_1d_index != is_tree_like_or_1d_cols:\n                if isinstance(self.columns, pandas.MultiIndex) or not isinstance(\n                    self.index, pandas.MultiIndex\n                ):\n                    return result\n                else:\n                    index = (\n                        self.index.sortlevel()[0]\n                        if is_tree_like_or_1d_index\n                        and not is_tree_like_or_1d_cols\n                        and isinstance(self.index, pandas.MultiIndex)\n                        else self.index\n                    )\n                    index = pandas.MultiIndex.from_tuples(\n                        list(index) * len(self.columns)\n                    )\n                    columns = self.columns.repeat(len(self.index))\n                    index_levels = [\n                        index.get_level_values(i) for i in range(index.nlevels)\n                    ]\n                    new_index = pandas.MultiIndex.from_arrays(\n                        [columns] + index_levels,\n                        names=self.columns.names + self.index.names,\n                    )\n            else:\n                return result\n            result = result.reindex(0, new_index)\n        return result\n\n    def stack(self, level, dropna, sort):\n        if not isinstance(self.columns, pandas.MultiIndex) or (\n            isinstance(self.columns, pandas.MultiIndex)\n            and is_list_like(level)\n            and len(level) == self.columns.nlevels\n        ):\n            new_columns = [MODIN_UNNAMED_SERIES_LABEL]\n        else:\n            new_columns = None\n\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            1,\n            lambda df: pandas.DataFrame(\n                df.stack(level=level, dropna=dropna, sort=sort)\n            ),\n            new_columns=new_columns,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    # Map partitions operations\n    # These operations are operations that apply a function to every partition.\n    def isin(self, values, ignore_indices=False):\n        shape_hint = self._shape_hint\n        if isinstance(values, type(self)):\n            # HACK: if we don't cast to pandas, then the execution engine will try to\n            # propagate the distributed Series to workers and most likely would have\n            # some performance problems.\n            # TODO: A better way of doing so could be passing this `values` as a query compiler\n            # and broadcast accordingly.\n            values = values.to_pandas()\n            if ignore_indices:\n                # Pandas logic is that it ignores indexing if 'values' is a 1D object\n                values = values.squeeze(axis=1)\n\n        def isin_func(df, values):\n            if shape_hint == \"column\":\n                df = df.squeeze(axis=1)\n            res = df.isin(values)\n            if isinstance(res, pandas.Series):\n                res = res.to_frame(\n                    MODIN_UNNAMED_SERIES_LABEL if res.name is None else res.name\n                )\n            return res\n\n        return Map.register(isin_func, shape_hint=shape_hint, dtypes=np.bool_)(\n            self, values\n        )\n\n    abs = Map.register(pandas.DataFrame.abs, dtypes=\"copy\")\n    map = Map.register(pandas.DataFrame.map)\n    conj = Map.register(lambda df, *args, **kwargs: pandas.DataFrame(np.conj(df)))\n\n    def convert_dtypes(\n        self,\n        infer_objects: bool = True,\n        convert_string: bool = True,\n        convert_integer: bool = True,\n        convert_boolean: bool = True,\n        convert_floating: bool = True,\n        dtype_backend: str = \"numpy_nullable\",\n    ):\n        result = Fold.register(pandas.DataFrame.convert_dtypes, shape_preserved=True)(\n            self,\n            infer_objects=infer_objects,\n            convert_string=convert_string,\n            convert_integer=convert_integer,\n            convert_boolean=convert_boolean,\n            convert_floating=convert_floating,\n            dtype_backend=dtype_backend,\n        )\n        # TODO: `numpy_nullable` should be handled similar\n        if dtype_backend == \"pyarrow\":\n            result._modin_frame._pandas_backend = \"pyarrow\"\n        return result\n\n    invert = Map.register(pandas.DataFrame.__invert__, dtypes=\"copy\")\n    isna = Map.register(pandas.DataFrame.isna, dtypes=np.bool_)\n    # TODO: better way to distinguish methods for NumPy API?\n    _isfinite = Map.register(\n        lambda df, *args, **kwargs: pandas.DataFrame(np.isfinite(df, *args, **kwargs)),\n        dtypes=np.bool_,\n    )\n    _isinf = Map.register(  # Needed for numpy API\n        lambda df, *args, **kwargs: pandas.DataFrame(np.isinf(df, *args, **kwargs)),\n        dtypes=np.bool_,\n    )\n    _isnat = Map.register(  # Needed for numpy API\n        lambda df, *args, **kwargs: pandas.DataFrame(np.isnat(df, *args, **kwargs)),\n        dtypes=np.bool_,\n    )\n    _isneginf = Map.register(  # Needed for numpy API\n        lambda df, *args, **kwargs: pandas.DataFrame(np.isneginf(df, *args, **kwargs)),\n        dtypes=np.bool_,\n    )\n    _isposinf = Map.register(  # Needed for numpy API\n        lambda df, *args, **kwargs: pandas.DataFrame(np.isposinf(df, *args, **kwargs)),\n        dtypes=np.bool_,\n    )\n    _iscomplex = Map.register(  # Needed for numpy API\n        lambda df, *args, **kwargs: pandas.DataFrame(np.iscomplex(df, *args, **kwargs)),\n        dtypes=np.bool_,\n    )\n    _isreal = Map.register(  # Needed for numpy API\n        lambda df, *args, **kwargs: pandas.DataFrame(np.isreal(df, *args, **kwargs)),\n        dtypes=np.bool_,\n    )\n    _logical_not = Map.register(np.logical_not, dtypes=np.bool_)  # Needed for numpy API\n    _tanh = Map.register(\n        lambda df, *args, **kwargs: pandas.DataFrame(np.tanh(df, *args, **kwargs))\n    )  # Needed for numpy API\n    _sqrt = Map.register(\n        lambda df, *args, **kwargs: pandas.DataFrame(np.sqrt(df, *args, **kwargs))\n    )  # Needed for numpy API\n    _exp = Map.register(\n        lambda df, *args, **kwargs: pandas.DataFrame(np.exp(df, *args, **kwargs))\n    )  # Needed for numpy API\n    negative = Map.register(pandas.DataFrame.__neg__)\n    notna = Map.register(pandas.DataFrame.notna, dtypes=np.bool_)\n    round = Map.register(pandas.DataFrame.round)\n    replace = Map.register(pandas.DataFrame.replace)\n    series_view = Map.register(\n        lambda df, *args, **kwargs: pandas.DataFrame(\n            df.squeeze(axis=1).view(*args, **kwargs)\n        )\n    )\n    to_numeric = Map.register(\n        lambda df, *args, **kwargs: pandas.DataFrame(\n            pandas.to_numeric(df.squeeze(axis=1), *args, **kwargs)\n        )\n    )\n    to_timedelta = Map.register(\n        lambda s, *args, **kwargs: pandas.to_timedelta(\n            s.squeeze(axis=1), *args, **kwargs\n        ).to_frame(),\n        dtypes=\"timedelta64[ns]\",\n    )\n\n    # END Map partitions operations\n\n    # String map partitions operations\n\n    str_capitalize = Map.register(_str_map(\"capitalize\"), dtypes=\"copy\")\n    str_center = Map.register(_str_map(\"center\"), dtypes=\"copy\")\n    str_contains = Map.register(_str_map(\"contains\"), dtypes=np.bool_)\n    str_count = Map.register(_str_map(\"count\"), dtypes=int)\n    str_endswith = Map.register(_str_map(\"endswith\"), dtypes=np.bool_)\n    str_find = Map.register(_str_map(\"find\"), dtypes=np.int64)\n    str_findall = Map.register(_str_map(\"findall\"), dtypes=\"copy\")\n    str_get = Map.register(_str_map(\"get\"), dtypes=\"copy\")\n    str_index = Map.register(_str_map(\"index\"), dtypes=np.int64)\n    str_isalnum = Map.register(_str_map(\"isalnum\"), dtypes=np.bool_)\n    str_isalpha = Map.register(_str_map(\"isalpha\"), dtypes=np.bool_)\n    str_isdecimal = Map.register(_str_map(\"isdecimal\"), dtypes=np.bool_)\n    str_isdigit = Map.register(_str_map(\"isdigit\"), dtypes=np.bool_)\n    str_islower = Map.register(_str_map(\"islower\"), dtypes=np.bool_)\n    str_isnumeric = Map.register(_str_map(\"isnumeric\"), dtypes=np.bool_)\n    str_isspace = Map.register(_str_map(\"isspace\"), dtypes=np.bool_)\n    str_istitle = Map.register(_str_map(\"istitle\"), dtypes=np.bool_)\n    str_isupper = Map.register(_str_map(\"isupper\"), dtypes=np.bool_)\n    str_join = Map.register(_str_map(\"join\"), dtypes=\"copy\")\n    str_len = Map.register(_str_map(\"len\"), dtypes=int)\n    str_ljust = Map.register(_str_map(\"ljust\"), dtypes=\"copy\")\n    str_lower = Map.register(_str_map(\"lower\"), dtypes=\"copy\")\n    str_lstrip = Map.register(_str_map(\"lstrip\"), dtypes=\"copy\")\n    str_match = Map.register(_str_map(\"match\"), dtypes=\"copy\")\n    str_normalize = Map.register(_str_map(\"normalize\"), dtypes=\"copy\")\n    str_pad = Map.register(_str_map(\"pad\"), dtypes=\"copy\")\n    _str_partition = Map.register(_str_map(\"partition\"), dtypes=\"copy\")\n\n    def str_partition(self, sep=\" \", expand=True):\n        # For `expand`, need an operator that can create more columns than before\n        if expand:\n            return super().str_partition(sep=sep, expand=expand)\n        return self._str_partition(sep=sep, expand=False)\n\n    str_repeat = Map.register(_str_map(\"repeat\"), dtypes=\"copy\")\n    _str_extract = Map.register(_str_map(\"extract\"), dtypes=\"copy\")\n\n    def str_extract(self, pat, flags, expand):\n        regex = re.compile(pat, flags=flags)\n        # need an operator that can create more columns than before\n        if expand and regex.groups == 1:\n            qc = self._str_extract(pat, flags=flags, expand=expand)\n            qc.columns = get_group_names(regex)\n        else:\n            qc = super().str_extract(pat, flags=flags, expand=expand)\n        return qc\n\n    str_replace = Map.register(_str_map(\"replace\"), dtypes=\"copy\", shape_hint=\"column\")\n    str_rfind = Map.register(_str_map(\"rfind\"), dtypes=np.int64, shape_hint=\"column\")\n    str_rindex = Map.register(_str_map(\"rindex\"), dtypes=np.int64, shape_hint=\"column\")\n    str_rjust = Map.register(_str_map(\"rjust\"), dtypes=\"copy\", shape_hint=\"column\")\n    _str_rpartition = Map.register(\n        _str_map(\"rpartition\"), dtypes=\"copy\", shape_hint=\"column\"\n    )\n\n    def str_rpartition(self, sep=\" \", expand=True):\n        if expand:\n            # For `expand`, need an operator that can create more columns than before\n            return super().str_rpartition(sep=sep, expand=expand)\n        return self._str_rpartition(sep=sep, expand=False)\n\n    _str_rsplit = Map.register(_str_map(\"rsplit\"), dtypes=\"copy\", shape_hint=\"column\")\n\n    def str_rsplit(self, pat=None, n=-1, expand=False):\n        if expand:\n            # For `expand`, need an operator that can create more columns than before\n            return super().str_rsplit(pat=pat, n=n, expand=expand)\n        return self._str_rsplit(pat=pat, n=n, expand=False)\n\n    str_rstrip = Map.register(_str_map(\"rstrip\"), dtypes=\"copy\", shape_hint=\"column\")\n    str_slice = Map.register(_str_map(\"slice\"), dtypes=\"copy\", shape_hint=\"column\")\n    str_slice_replace = Map.register(\n        _str_map(\"slice_replace\"), dtypes=\"copy\", shape_hint=\"column\"\n    )\n    _str_split = Map.register(_str_map(\"split\"), dtypes=\"copy\", shape_hint=\"column\")\n\n    def str_split(self, pat=None, n=-1, expand=False, regex=None):\n        if expand:\n            # For `expand`, need an operator that can create more columns than before\n            return super().str_split(pat=pat, n=n, expand=expand, regex=regex)\n        return self._str_split(pat=pat, n=n, expand=False, regex=regex)\n\n    str_startswith = Map.register(\n        _str_map(\"startswith\"), dtypes=np.bool_, shape_hint=\"column\"\n    )\n    str_strip = Map.register(_str_map(\"strip\"), dtypes=\"copy\", shape_hint=\"column\")\n    str_swapcase = Map.register(\n        _str_map(\"swapcase\"), dtypes=\"copy\", shape_hint=\"column\"\n    )\n    str_title = Map.register(_str_map(\"title\"), dtypes=\"copy\", shape_hint=\"column\")\n    str_translate = Map.register(\n        _str_map(\"translate\"), dtypes=\"copy\", shape_hint=\"column\"\n    )\n    str_upper = Map.register(_str_map(\"upper\"), dtypes=\"copy\", shape_hint=\"column\")\n    str_wrap = Map.register(_str_map(\"wrap\"), dtypes=\"copy\", shape_hint=\"column\")\n    str_zfill = Map.register(_str_map(\"zfill\"), dtypes=\"copy\", shape_hint=\"column\")\n    str___getitem__ = Map.register(\n        _str_map(\"__getitem__\"), dtypes=\"copy\", shape_hint=\"column\"\n    )\n\n    # END String map partitions operations\n\n    def unique(self, keep=\"first\", ignore_index=True, subset=None):\n        # kernels with 'pandas.Series.unique()' work faster\n        can_use_unique_kernel = (\n            subset is None\n            and ignore_index\n            and len(self.columns) == 1\n            and keep is not False\n        )\n\n        if not can_use_unique_kernel and not RangePartitioning.get():\n            return super().unique(keep=keep, ignore_index=ignore_index, subset=subset)\n\n        if RangePartitioning.get():\n            new_modin_frame = self._modin_frame._apply_func_to_range_partitioning(\n                key_columns=self.columns.tolist() if subset is None else subset,\n                func=(\n                    (\n                        lambda df: pandas.DataFrame(\n                            df.squeeze(axis=1).unique(), columns=[\"__reduced__\"]\n                        )\n                    )\n                    if can_use_unique_kernel\n                    else (\n                        lambda df: df.drop_duplicates(\n                            keep=keep, ignore_index=ignore_index, subset=subset\n                        )\n                    )\n                ),\n                preserve_columns=True,\n            )\n        else:\n            # return self.to_pandas().squeeze(axis=1).unique() works faster\n            # but returns pandas type instead of query compiler\n            # TODO: https://github.com/modin-project/modin/issues/7182\n            new_modin_frame = self._modin_frame.apply_full_axis(\n                0,\n                lambda x: x.squeeze(axis=1).unique(),\n                new_columns=self.columns,\n            )\n        return self.__constructor__(new_modin_frame, shape_hint=self._shape_hint)\n\n    def searchsorted(self, **kwargs):\n        def searchsorted(df):\n            \"\"\"Apply `searchsorted` function to a single partition.\"\"\"\n            result = df.squeeze(axis=1).searchsorted(**kwargs)\n            if not is_list_like(result):\n                result = [result]\n            return pandas.DataFrame(result)\n\n        return self.default_to_pandas(searchsorted)\n\n    # Dt map partitions operations\n\n    dt_date = Map.register(_dt_prop_map(\"date\"), dtypes=np.object_)\n    dt_time = Map.register(_dt_prop_map(\"time\"), dtypes=np.object_)\n    dt_timetz = Map.register(_dt_prop_map(\"timetz\"), dtypes=np.object_)\n    dt_year = Map.register(_dt_prop_map(\"year\"), dtypes=np.int32)\n    dt_month = Map.register(_dt_prop_map(\"month\"), dtypes=np.int32)\n    dt_day = Map.register(_dt_prop_map(\"day\"), dtypes=np.int32)\n    dt_hour = Map.register(_dt_prop_map(\"hour\"), dtypes=np.int64)\n    dt_minute = Map.register(_dt_prop_map(\"minute\"), dtypes=np.int64)\n    dt_second = Map.register(_dt_prop_map(\"second\"), dtypes=np.int64)\n    dt_microsecond = Map.register(_dt_prop_map(\"microsecond\"), dtypes=np.int64)\n    dt_nanosecond = Map.register(_dt_prop_map(\"nanosecond\"), dtypes=np.int64)\n    dt_dayofweek = Map.register(_dt_prop_map(\"dayofweek\"), dtypes=np.int64)\n    dt_weekday = Map.register(_dt_prop_map(\"weekday\"), dtypes=np.int64)\n    dt_dayofyear = Map.register(_dt_prop_map(\"dayofyear\"), dtypes=np.int64)\n    dt_quarter = Map.register(_dt_prop_map(\"quarter\"), dtypes=np.int64)\n    dt_is_month_start = Map.register(_dt_prop_map(\"is_month_start\"), dtypes=np.bool_)\n    dt_is_month_end = Map.register(_dt_prop_map(\"is_month_end\"), dtypes=np.bool_)\n    dt_is_quarter_start = Map.register(\n        _dt_prop_map(\"is_quarter_start\"), dtypes=np.bool_\n    )\n    dt_is_quarter_end = Map.register(_dt_prop_map(\"is_quarter_end\"), dtypes=np.bool_)\n    dt_is_year_start = Map.register(_dt_prop_map(\"is_year_start\"), dtypes=np.bool_)\n    dt_is_year_end = Map.register(_dt_prop_map(\"is_year_end\"), dtypes=np.bool_)\n    dt_is_leap_year = Map.register(_dt_prop_map(\"is_leap_year\"), dtypes=np.bool_)\n    dt_daysinmonth = Map.register(_dt_prop_map(\"daysinmonth\"), dtypes=np.int64)\n    dt_days_in_month = Map.register(_dt_prop_map(\"days_in_month\"), dtypes=np.int64)\n    dt_asfreq = Map.register(_dt_func_map(\"asfreq\"))\n    dt_to_period = Map.register(_dt_func_map(\"to_period\"))\n    dt_to_pydatetime = Map.register(_dt_func_map(\"to_pydatetime\"), dtypes=np.object_)\n    dt_tz_localize = Map.register(_dt_func_map(\"tz_localize\"))\n    dt_tz_convert = Map.register(_dt_func_map(\"tz_convert\"))\n    dt_normalize = Map.register(_dt_func_map(\"normalize\"))\n    dt_strftime = Map.register(_dt_func_map(\"strftime\"), dtypes=np.object_)\n    dt_round = Map.register(_dt_func_map(\"round\"))\n    dt_floor = Map.register(_dt_func_map(\"floor\"))\n    dt_ceil = Map.register(_dt_func_map(\"ceil\"))\n    dt_month_name = Map.register(_dt_func_map(\"month_name\"), dtypes=np.object_)\n    dt_day_name = Map.register(_dt_func_map(\"day_name\"), dtypes=np.object_)\n    dt_to_pytimedelta = Map.register(_dt_func_map(\"to_pytimedelta\"), dtypes=np.object_)\n    dt_total_seconds = Map.register(_dt_func_map(\"total_seconds\"), dtypes=np.float64)\n    dt_seconds = Map.register(_dt_prop_map(\"seconds\"), dtypes=np.int64)\n    dt_days = Map.register(_dt_prop_map(\"days\"), dtypes=np.int64)\n    dt_microseconds = Map.register(_dt_prop_map(\"microseconds\"), dtypes=np.int64)\n    dt_nanoseconds = Map.register(_dt_prop_map(\"nanoseconds\"), dtypes=np.int64)\n    dt_qyear = Map.register(_dt_prop_map(\"qyear\"), dtypes=np.int64)\n    dt_start_time = Map.register(_dt_prop_map(\"start_time\"))\n    dt_end_time = Map.register(_dt_prop_map(\"end_time\"))\n    dt_to_timestamp = Map.register(_dt_func_map(\"to_timestamp\"))\n\n    # END Dt map partitions operations\n\n    def astype(self, col_dtypes, errors: str = \"raise\"):\n        # `errors` parameter needs to be part of the function signature because\n        # other query compilers may not take care of error handling at the API\n        # layer. This query compiler assumes there won't be any errors due to\n        # invalid type keys.\n        return self.__constructor__(\n            self._modin_frame.astype(col_dtypes, errors=errors),\n            shape_hint=self._shape_hint,\n        )\n\n    def infer_objects(self):\n        return self.__constructor__(self._modin_frame.infer_objects())\n\n    # Column/Row partitions reduce operations\n\n    def first_valid_index(self):\n        def first_valid_index_builder(df):\n            \"\"\"Get the position of the first valid index in a single partition.\"\"\"\n            return df.set_axis(pandas.RangeIndex(len(df.index)), axis=\"index\").apply(\n                lambda df: df.first_valid_index()\n            )\n\n        # We get the minimum from each column, then take the min of that to get\n        # first_valid_index. The `to_pandas()` here is just for a single value and\n        # `squeeze` will convert it to a scalar.\n        first_result = (\n            self.__constructor__(self._modin_frame.reduce(0, first_valid_index_builder))\n            .min(axis=1)\n            .to_pandas()\n            .squeeze()\n        )\n        return self.index[first_result]\n\n    def last_valid_index(self):\n        def last_valid_index_builder(df):\n            \"\"\"Get the position of the last valid index in a single partition.\"\"\"\n            return df.set_axis(pandas.RangeIndex(len(df.index)), axis=\"index\").apply(\n                lambda df: df.last_valid_index()\n            )\n\n        # We get the maximum from each column, then take the max of that to get\n        # last_valid_index. The `to_pandas()` here is just for a single value and\n        # `squeeze` will convert it to a scalar.\n        first_result = (\n            self.__constructor__(self._modin_frame.reduce(0, last_valid_index_builder))\n            .max(axis=1)\n            .to_pandas()\n            .squeeze()\n        )\n        return self.index[first_result]\n\n    # END Column/Row partitions reduce operations\n\n    def describe(self, percentiles: np.ndarray):\n        # Use pandas to calculate the correct columns\n        empty_df = (\n            pandas.DataFrame(columns=self.columns)\n            .astype(self.dtypes)\n            .describe(percentiles, include=\"all\")\n        )\n        new_index = empty_df.index\n\n        def describe_builder(df, internal_indices=[]):  # pragma: no cover\n            \"\"\"Apply `describe` function to the subset of columns in a single partition.\"\"\"\n            # The index of the resulting dataframe is the same amongst all partitions\n            # when dealing with the same data type. However, if we work with columns\n            # that contain strings, we can get extra values in our result index such as\n            # 'unique', 'top', and 'freq'. Since we call describe() on each partition,\n            # we can have cases where certain partitions do not contain any of the\n            # object string data leading to an index mismatch between partitions.\n            # Thus, we must reindex each partition with the global new_index.\n            return (\n                df.iloc[:, internal_indices]\n                .describe(percentiles=percentiles, include=\"all\")\n                .reindex(new_index)\n            )\n\n        return self.__constructor__(\n            self._modin_frame.apply_full_axis_select_indices(\n                0,\n                describe_builder,\n                empty_df.columns,\n                new_index=new_index,\n                new_columns=empty_df.columns,\n            )\n        )\n\n    # END Column/Row partitions reduce operations over select indices\n\n    # Map across rows/columns\n    # These operations require some global knowledge of the full column/row\n    # that is being operated on. This means that we have to put all of that\n    # data in the same place.\n\n    cummax = Fold.register(pandas.DataFrame.cummax, shape_preserved=True)\n    cummin = Fold.register(pandas.DataFrame.cummin, shape_preserved=True)\n    cumsum = Fold.register(pandas.DataFrame.cumsum, shape_preserved=True)\n    cumprod = Fold.register(pandas.DataFrame.cumprod, shape_preserved=True)\n    _diff = Fold.register(pandas.DataFrame.diff, shape_preserved=True)\n\n    def diff(self, axis, periods):\n        return self._diff(fold_axis=axis, axis=axis, periods=periods)\n\n    def clip(self, lower, upper, **kwargs):\n        if isinstance(lower, BaseQueryCompiler):\n            lower = lower.to_pandas().squeeze(1)\n        if isinstance(upper, BaseQueryCompiler):\n            upper = upper.to_pandas().squeeze(1)\n        kwargs[\"upper\"] = upper\n        kwargs[\"lower\"] = lower\n        axis = kwargs.get(\"axis\", 0)\n        if is_list_like(lower) or is_list_like(upper):\n            new_modin_frame = self._modin_frame.fold(\n                axis, lambda df: df.clip(**kwargs), shape_preserved=True\n            )\n        else:\n            new_modin_frame = self._modin_frame.map(lambda df: df.clip(**kwargs))\n        return self.__constructor__(new_modin_frame)\n\n    corr = CorrCovBuilder.build_corr_method()\n\n    def cov(self, min_periods=None, ddof=1):\n        if self.get_pandas_backend() == \"pyarrow\":\n            return super().cov(min_periods=min_periods, ddof=ddof)\n        # _nancorr use numpy which incompatible with pandas dataframes on pyarrow\n        return self._nancorr(min_periods=min_periods, cov=True, ddof=ddof)\n\n    def _nancorr(self, min_periods=1, cov=False, ddof=1):\n        \"\"\"\n        Compute either pairwise covariance or pairwise correlation of columns.\n\n        This function considers NA/null values the same like pandas does.\n\n        Parameters\n        ----------\n        min_periods : int, default: 1\n            Minimum number of observations required per pair of columns\n            to have a valid result.\n        cov : boolean, default: False\n            Either covariance or correlation should be computed.\n        ddof : int, default: 1\n            Means Delta Degrees of Freedom. The divisor used in calculations.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            The covariance or correlation matrix.\n\n        Notes\n        -----\n        This method is only used to compute covariance at the moment.\n        \"\"\"\n        other = self.to_numpy()\n        try:\n            other_mask = self._isfinite().to_numpy()\n        except TypeError as err:\n            # Pandas raises ValueError on unsupported types, so casting\n            # the exception to a proper type\n            raise ValueError(\"Unsupported types with 'numeric_only=False'\") from err\n        n_cols = other.shape[1]\n\n        if min_periods is None:\n            min_periods = 1\n\n        def map_func(df):  # pragma: no cover\n            \"\"\"Compute covariance or correlation matrix for the passed frame.\"\"\"\n            df = df.to_numpy()\n            n_rows = df.shape[0]\n            df_mask = np.isfinite(df)\n\n            result = np.empty((n_rows, n_cols), dtype=np.float64)\n\n            for i in range(n_rows):\n                df_ith_row = df[i]\n                df_ith_mask = df_mask[i]\n\n                for j in range(n_cols):\n                    other_jth_col = other[:, j]\n\n                    valid = df_ith_mask & other_mask[:, j]\n\n                    vx = df_ith_row[valid]\n                    vy = other_jth_col[valid]\n\n                    nobs = len(vx)\n\n                    if nobs < min_periods:\n                        result[i, j] = np.nan\n                    else:\n                        vx = vx - vx.mean()\n                        vy = vy - vy.mean()\n                        sumxy = (vx * vy).sum()\n                        sumxx = (vx * vx).sum()\n                        sumyy = (vy * vy).sum()\n\n                        denom = (nobs - ddof) if cov else np.sqrt(sumxx * sumyy)\n                        if denom != 0:\n                            result[i, j] = sumxy / denom\n                        else:\n                            result[i, j] = np.nan\n\n            return pandas.DataFrame(result)\n\n        columns = self.columns\n        index = columns.copy()\n        transponed_self = self.transpose()\n        new_modin_frame = transponed_self._modin_frame.apply_full_axis(\n            1, map_func, new_index=index, new_columns=columns\n        )\n        return transponed_self.__constructor__(new_modin_frame)\n\n    def dot(self, other, squeeze_self=None, squeeze_other=None):\n        if isinstance(other, PandasQueryCompiler):\n            other = (\n                other.to_pandas().squeeze(axis=1)\n                if squeeze_other\n                else other.to_pandas()\n            )\n\n        num_cols = other.shape[1] if len(other.shape) > 1 else 1\n        if len(self.columns) == 1:\n            new_index = (\n                [MODIN_UNNAMED_SERIES_LABEL]\n                if (len(self.index) == 1 or squeeze_self) and num_cols == 1\n                else None\n            )\n            new_columns = (\n                [MODIN_UNNAMED_SERIES_LABEL] if squeeze_self and num_cols == 1 else None\n            )\n            axis = 0\n        else:\n            new_index = self.index\n            new_columns = [MODIN_UNNAMED_SERIES_LABEL] if num_cols == 1 else None\n            axis = 1\n\n        # If either new index or new columns are supposed to be a single-dimensional,\n        # then we use a special labeling for them. Besides setting the new labels as\n        # a metadata to the resulted frame, we also want to set them inside the kernel,\n        # so actual partitions would be labeled accordingly (there's a 'sync_label'\n        # parameter that can do the same, but doing it manually is faster)\n        align_index = isinstance(new_index, list) and new_index == [\n            MODIN_UNNAMED_SERIES_LABEL\n        ]\n        align_columns = new_columns == [MODIN_UNNAMED_SERIES_LABEL]\n\n        def map_func(df, other=other, squeeze_self=squeeze_self):  # pragma: no cover\n            \"\"\"Compute matrix multiplication of the passed frames.\"\"\"\n            result = df.squeeze(axis=1).dot(other) if squeeze_self else df.dot(other)\n\n            if is_list_like(result):\n                res = pandas.DataFrame(result)\n            else:\n                res = pandas.DataFrame([result])\n\n            # manual aligning with external index to avoid `sync_labels` overhead\n            if align_columns:\n                res.columns = [MODIN_UNNAMED_SERIES_LABEL]\n            if align_index:\n                res.index = [MODIN_UNNAMED_SERIES_LABEL]\n            return res\n\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis,\n            map_func,\n            new_index=new_index,\n            new_columns=new_columns,\n            sync_labels=False,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def _nsort(self, n, columns=None, keep=\"first\", sort_type=\"nsmallest\"):\n        \"\"\"\n        Return first N rows of the data sorted in the specified order.\n\n        Parameters\n        ----------\n        n : int\n            Number of rows to return.\n        columns : list of labels, optional\n            Column labels to sort data by.\n        keep : {\"first\", \"last\", \"all\"}, default: \"first\"\n            How to pick first rows in case of duplicated values:\n            - \"first\": prioritize first occurrence.\n            - \"last\": prioritize last occurrence.\n            - \"all\": do not drop any duplicates, even if it means selecting more than `n` rows.\n        sort_type : {\"nsmallest\", \"nlargest\"}, default: \"nsmallest\"\n            \"nsmallest\" means sort in descending order, \"nlargest\" means\n            sort in ascending order.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            New QueryCompiler containing the first N rows of the data\n            sorted in the given order.\n        \"\"\"\n\n        def map_func(df, n=n, keep=keep, columns=columns):  # pragma: no cover\n            \"\"\"Return first `N` rows of the sorted data for a single partition.\"\"\"\n            if columns is None:\n                return pandas.DataFrame(\n                    getattr(pandas.Series, sort_type)(\n                        df.squeeze(axis=1), n=n, keep=keep\n                    )\n                )\n            return getattr(pandas.DataFrame, sort_type)(\n                df, n=n, columns=columns, keep=keep\n            )\n\n        if columns is None:\n            new_columns = [MODIN_UNNAMED_SERIES_LABEL]\n        else:\n            new_columns = self.columns\n\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis=0, func=map_func, new_columns=new_columns\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def nsmallest(self, *args, **kwargs):\n        return self._nsort(sort_type=\"nsmallest\", *args, **kwargs)\n\n    def nlargest(self, *args, **kwargs):\n        return self._nsort(sort_type=\"nlargest\", *args, **kwargs)\n\n    def eval(self, expr, **kwargs):\n        # Make a copy of columns and eval on the copy to determine if result type is\n        # series or not\n        empty_eval = (\n            pandas.DataFrame(columns=self.columns)\n            .astype(self.dtypes)\n            .eval(expr, inplace=False, **kwargs)\n        )\n        if isinstance(empty_eval, pandas.Series):\n            new_columns = (\n                [empty_eval.name]\n                if empty_eval.name is not None\n                else [MODIN_UNNAMED_SERIES_LABEL]\n            )\n        else:\n            new_columns = empty_eval.columns\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            1,\n            lambda df: pandas.DataFrame(df.eval(expr, inplace=False, **kwargs)),\n            new_index=self.index,\n            new_columns=new_columns,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def mode(self, **kwargs):\n        axis = kwargs.get(\"axis\", 0)\n\n        def mode_builder(df):  # pragma: no cover\n            \"\"\"Compute modes for a single partition.\"\"\"\n            result = pandas.DataFrame(df.mode(**kwargs))\n            # We return a dataframe with the same shape as the input to ensure\n            # that all the partitions will be the same shape\n            if axis == 0 and len(df) != len(result):\n                # Pad rows\n                result = result.reindex(index=pandas.RangeIndex(len(df.index)))\n            elif axis == 1 and len(df.columns) != len(result.columns):\n                # Pad columns\n                result = result.reindex(columns=pandas.RangeIndex(len(df.columns)))\n            return pandas.DataFrame(result)\n\n        if axis == 0:\n            new_index = pandas.RangeIndex(len(self.index))\n            new_columns = self.columns\n        else:\n            new_index = self.index\n            new_columns = pandas.RangeIndex(len(self.columns))\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis, mode_builder, new_index=new_index, new_columns=new_columns\n        )\n        return self.__constructor__(new_modin_frame).dropna(axis=axis, how=\"all\")\n\n    def fillna(self, **kwargs):\n        squeeze_self = kwargs.pop(\"squeeze_self\", False)\n        squeeze_value = kwargs.pop(\"squeeze_value\", False)\n        axis = kwargs.get(\"axis\", 0)\n        value = kwargs.pop(\"value\")\n        method = kwargs.get(\"method\", None)\n        limit = kwargs.get(\"limit\", None)\n        full_axis = method is not None or limit is not None\n        new_dtypes = None\n        if isinstance(value, BaseQueryCompiler):\n            # This code assumes that the operation occurs with the same query compiler\n            assert isinstance(value, PandasQueryCompiler)\n            if squeeze_self:\n                # Self is a Series type object\n                if full_axis:\n                    value = value.to_pandas().squeeze(axis=1)\n\n                    def fillna_builder(series):  # pragma: no cover\n                        # `limit` parameter works only on `Series` type, so we have to squeeze both objects to get\n                        # correct behavior.\n                        return series.squeeze(axis=1).fillna(value=value, **kwargs)\n\n                    new_modin_frame = self._modin_frame.apply_full_axis(\n                        0, fillna_builder\n                    )\n                else:\n\n                    def fillna_builder(df, value_arg):\n                        if isinstance(value_arg, pandas.DataFrame):\n                            value_arg = value_arg.squeeze(axis=1)\n                        res = df.squeeze(axis=1).fillna(value=value_arg, **kwargs)\n                        return pandas.DataFrame(res)\n\n                    new_modin_frame = self._modin_frame.n_ary_op(\n                        fillna_builder,\n                        [value._modin_frame],\n                        join_type=\"left\",\n                        copartition_along_columns=False,\n                    )\n\n                return self.__constructor__(new_modin_frame)\n            else:\n                # Self is a DataFrame type object\n                if squeeze_value:\n                    # Value is Series type object\n                    value = value.to_pandas().squeeze(axis=1)\n\n                    def fillna(df):\n                        return df.fillna(value=value, **kwargs)\n\n                    # Continue to end of this function\n\n                else:\n                    # Value is a DataFrame type object\n                    def fillna_builder(df, right):\n                        return df.fillna(value=right, **kwargs)\n\n                    new_modin_frame = self._modin_frame.broadcast_apply(\n                        0, fillna_builder, value._modin_frame\n                    )\n                    return self.__constructor__(new_modin_frame)\n\n        elif isinstance(value, dict):\n            if squeeze_self:\n                # For Series dict works along the index.\n                def fillna(df):\n                    return pandas.DataFrame(\n                        df.squeeze(axis=1).fillna(value=value, **kwargs)\n                    )\n\n            else:\n                # For DataFrames dict works along columns, all columns have to be present.\n                def fillna(df):\n                    func_dict = {\n                        col: val for (col, val) in value.items() if col in df.columns\n                    }\n                    return df.fillna(value=func_dict, **kwargs)\n\n                if self.frame_has_materialized_dtypes:\n                    dtypes = self.dtypes\n                    value_dtypes = pandas.DataFrame(\n                        {k: [v] for (k, v) in value.items()}\n                    ).dtypes\n                    if all(\n                        find_common_type([dtypes[col], dtype]) == dtypes[col]\n                        for (col, dtype) in value_dtypes.items()\n                        if col in dtypes\n                    ):\n                        new_dtypes = dtypes\n\n        else:\n            if self.frame_has_materialized_dtypes:\n                dtype = pandas.Series(value).dtype\n                if all(find_common_type([t, dtype]) == t for t in self.dtypes):\n                    new_dtypes = self.dtypes\n\n            def fillna(df):\n                return df.fillna(value=value, **kwargs)\n\n        if full_axis:\n            new_modin_frame = self._modin_frame.fold(axis, fillna, shape_preserved=True)\n        else:\n            new_modin_frame = self._modin_frame.map(fillna, dtypes=new_dtypes)\n        return self.__constructor__(new_modin_frame)\n\n    def quantile_for_list_of_values(self, **kwargs):\n        axis = kwargs.get(\"axis\", 0)\n        q = kwargs.get(\"q\")\n        numeric_only = kwargs.get(\"numeric_only\", True)\n        assert isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple))\n\n        if numeric_only:\n            new_columns = self._modin_frame.numeric_columns()\n        else:\n            new_columns = [\n                col\n                for col, dtype in zip(self.columns, self.dtypes)\n                if (is_numeric_dtype(dtype) or lib.is_np_dtype(dtype, \"mM\"))\n            ]\n        if axis == 1:\n            query_compiler = self.getitem_column_array(new_columns)\n            new_columns = self.index\n        else:\n            query_compiler = self\n\n        def quantile_builder(df, **kwargs):\n            result = df.quantile(**kwargs)\n            return result.T if kwargs.get(\"axis\", 0) == 1 else result\n\n        # This took a long time to debug, so here is the rundown of why this is needed.\n        # Previously, we were operating on select indices, but that was broken. We were\n        # not correctly setting the columns/index. Because of how we compute `to_pandas`\n        # and because of the static nature of the index for `axis=1` it is easier to\n        # just handle this as the transpose (see `quantile_builder` above for the\n        # transpose within the partition) than it is to completely rework other\n        # internal methods. Basically we are returning the transpose of the object for\n        # correctness and cleanliness of the code.\n        if axis == 1:\n            q_index = new_columns\n            new_columns = pandas.Index(q)\n        else:\n            q_index = pandas.Index(q)\n        new_modin_frame = query_compiler._modin_frame.apply_full_axis(\n            axis,\n            lambda df: quantile_builder(df, **kwargs),\n            new_index=q_index,\n            new_columns=new_columns,\n            dtypes=np.float64,\n        )\n        result = self.__constructor__(new_modin_frame)\n        return result.transpose() if axis == 1 else result\n\n    def rank(self, **kwargs):\n        axis = kwargs.get(\"axis\", 0)\n        numeric_only = True if axis else kwargs.get(\"numeric_only\", False)\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis,\n            lambda df: df.rank(**kwargs),\n            new_index=self._modin_frame.copy_index_cache(copy_lengths=True),\n            new_columns=(\n                self._modin_frame.copy_columns_cache(copy_lengths=True)\n                if not numeric_only\n                else None\n            ),\n            dtypes=np.float64,\n            sync_labels=False,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def sort_index(self, **kwargs):\n        axis = kwargs.pop(\"axis\", 0)\n        level = kwargs.pop(\"level\", None)\n        sort_remaining = kwargs.pop(\"sort_remaining\", True)\n        kwargs[\"inplace\"] = False\n\n        if level is not None or self.has_multiindex(axis=axis):\n            return self.default_to_pandas(\n                pandas.DataFrame.sort_index,\n                axis=axis,\n                level=level,\n                sort_remaining=sort_remaining,\n                **kwargs,\n            )\n\n        # sort_index can have ascending be None and behaves as if it is False.\n        # sort_values cannot have ascending be None. Thus, the following logic is to\n        # convert the ascending argument to one that works with sort_values\n        ascending = kwargs.pop(\"ascending\", True)\n        if ascending is None:\n            ascending = False\n        kwargs[\"ascending\"] = ascending\n        if axis:\n            new_columns = self.columns.to_frame().sort_index(**kwargs).index\n            new_index = self.index\n        else:\n            new_index = self.index.to_frame().sort_index(**kwargs).index\n            new_columns = self.columns\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis,\n            lambda df: df.sort_index(\n                axis=axis, level=level, sort_remaining=sort_remaining, **kwargs\n            ),\n            new_index,\n            new_columns,\n            dtypes=\"copy\" if axis == 0 else None,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def melt(\n        self,\n        id_vars=None,\n        value_vars=None,\n        var_name=None,\n        value_name=\"value\",\n        col_level=None,\n        ignore_index=True,\n    ):\n        ErrorMessage.mismatch_with_pandas(\n            operation=\"melt\", message=\"Order of rows could be different from pandas\"\n        )\n\n        if var_name is None:\n            var_name = \"variable\"\n\n        def _convert_to_list(x):\n            \"\"\"Convert passed object to a list.\"\"\"\n            if is_list_like(x):\n                x = [*x]\n            elif x is not None:\n                x = [x]\n            else:\n                x = []\n            return x\n\n        id_vars, value_vars = map(_convert_to_list, [id_vars, value_vars])\n\n        if len(value_vars) == 0:\n            value_vars = self.columns.drop(id_vars)\n\n        if len(id_vars) != 0:\n            to_broadcast = self.getitem_column_array(id_vars)._modin_frame\n        else:\n            to_broadcast = None\n\n        def applyier(df, internal_indices, other=[], internal_other_indices=[]):\n            \"\"\"\n            Apply `melt` function to a single partition.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                Partition of the self frame.\n            internal_indices : list of ints\n                Positional indices of columns in this particular partition which\n                represents `value_vars` columns in the source frame.\n            other : pandas.DataFrame\n                Broadcasted partition which contains `id_vars` columns of the\n                source frame.\n            internal_other_indices : list of ints\n                Positional indices of columns in `other` partition which\n                represents `id_vars` columns in the source frame.\n\n            Returns\n            -------\n            pandas.DataFrame\n                The result of the `melt` function for this particular partition.\n            \"\"\"\n            if len(other):\n                other = pandas.concat(other, axis=1)\n                columns_to_add = other.columns.difference(df.columns)\n                df = pandas.concat([df, other[columns_to_add]], axis=1)\n            return df.melt(\n                id_vars=id_vars,\n                value_vars=df.columns[internal_indices],\n                var_name=var_name,\n                value_name=value_name,\n                col_level=col_level,\n            )\n\n        # we have no able to calculate correct indices here, so making it `dummy_index`\n        inconsistent_frame = self._modin_frame.broadcast_apply_select_indices(\n            axis=0,\n            apply_indices=value_vars,\n            func=applyier,\n            other=to_broadcast,\n            new_index=[\"dummy_index\"] * len(id_vars),\n            new_columns=[\"dummy_index\"] * len(id_vars),\n        )\n        # after applying `melt` for selected indices we will get partitions like this:\n        #     id_vars   vars   value |     id_vars   vars   value\n        #  0      foo   col3       1 |  0      foo   col5       a    so stacking it into\n        #  1      fiz   col3       2 |  1      fiz   col5       b    `new_parts` to get\n        #  2      bar   col3       3 |  2      bar   col5       c    correct answer\n        #  3      zoo   col3       4 |  3      zoo   col5       d\n        new_parts = np.array(\n            [np.array([x]) for x in np.concatenate(inconsistent_frame._partitions.T)]\n        )\n        new_index = pandas.RangeIndex(len(self.index) * len(value_vars))\n        new_modin_frame = self._modin_frame.__constructor__(\n            new_parts,\n            index=new_index,\n            columns=id_vars + [var_name, value_name],\n        )\n        result = self.__constructor__(new_modin_frame)\n        # this assigment needs to propagate correct indices into partitions\n        result.index = new_index\n        return result\n\n    # END Map across rows/columns\n\n    # __getitem__ methods\n    __getitem_bool = Binary.register(\n        lambda df, r: df[[r]] if is_scalar(r) else df[r],\n        join_type=\"left\",\n        labels=\"drop\",\n    )\n\n    # __setitem__ methods\n    def setitem_bool(self, row_loc: PandasQueryCompiler, col_loc, item):\n        def _set_item(df, row_loc):  # pragma: no cover\n            df = df.copy()\n            df.loc[row_loc.squeeze(axis=1), col_loc] = item\n            return df\n\n        if self.frame_has_materialized_dtypes and is_scalar(item):\n            new_dtypes = self.dtypes.copy()\n            old_dtypes = new_dtypes[col_loc]\n            item_type = extract_dtype(item)\n            if isinstance(old_dtypes, pandas.Series):\n                new_dtypes[col_loc] = [\n                    find_common_type([dtype, item_type]) for dtype in old_dtypes.values\n                ]\n            else:\n                new_dtypes[col_loc] = find_common_type([old_dtypes, item_type])\n        else:\n            new_dtypes = None\n\n        new_modin_frame = self._modin_frame.broadcast_apply_full_axis(\n            axis=1,\n            func=_set_item,\n            other=row_loc._modin_frame,\n            new_index=self._modin_frame.copy_index_cache(copy_lengths=True),\n            new_columns=self._modin_frame.copy_columns_cache(),\n            keep_partitioning=False,\n            dtypes=new_dtypes,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    # END __setitem__ methods\n\n    def __validate_bool_indexer(self, indexer):\n        if len(indexer) != len(self.index):\n            raise ValueError(\n                f\"Item wrong length {len(indexer)} instead of {len(self.index)}.\"\n            )\n        if isinstance(indexer, pandas.Series) and not indexer.equals(self.index):\n            warnings.warn(\n                \"Boolean Series key will be reindexed to match DataFrame index.\",\n                PendingDeprecationWarning,\n                stacklevel=4,\n            )\n\n    def getitem_array(self, key):\n        if isinstance(key, type(self)):\n            # here we check for a subset of bool indexers only to simplify the code;\n            # there could (potentially) be more of those, but we assume the most frequent\n            # ones are just of bool dtype\n            if len(key.dtypes) == 1 and is_bool_dtype(key.dtypes.iloc[0]):\n                self.__validate_bool_indexer(key.index)\n                return self.__getitem_bool(key, broadcast=True, dtypes=\"copy\")\n\n            key = key.to_pandas().squeeze(axis=1)\n\n        if is_bool_indexer(key):\n            self.__validate_bool_indexer(key)\n            key = check_bool_indexer(self.index, key)\n            # We convert to a RangeIndex because getitem_row_array is expecting a list\n            # of indices, and RangeIndex will give us the exact indices of each boolean\n            # requested.\n            key = pandas.RangeIndex(len(self.index))[key]\n            if len(key):\n                return self.getitem_row_array(key)\n            else:\n                return self.from_pandas(\n                    pandas.DataFrame(columns=self.columns), type(self._modin_frame)\n                )\n        else:\n            if any(k not in self.columns for k in key):\n                raise KeyError(\n                    \"{} not index\".format(\n                        str([k for k in key if k not in self.columns]).replace(\",\", \"\")\n                    )\n                )\n            return self.getitem_column_array(key)\n\n    def getitem_column_array(\n        self, key, numeric=False, ignore_order=False\n    ) -> PandasQueryCompiler:\n        shape_hint = \"column\" if len(key) == 1 else None\n        if numeric:\n            if ignore_order and is_list_like(key):\n                key = np.sort(key)\n            new_modin_frame = self._modin_frame.take_2d_labels_or_positional(\n                col_positions=key\n            )\n        else:\n            if ignore_order and is_list_like(key):\n                key_set = frozenset(key)\n                key = [col for col in self.columns if col in key_set]\n            new_modin_frame = self._modin_frame.take_2d_labels_or_positional(\n                col_labels=key\n            )\n        return self.__constructor__(new_modin_frame, shape_hint=shape_hint)\n\n    def getitem_row_array(self, key):\n        return self.__constructor__(\n            self._modin_frame.take_2d_labels_or_positional(row_positions=key)\n        )\n\n    def setitem(self, axis, key, value):\n        # Default to pandas for empty frames to avoid complex partitioning issues\n        if axis == 0 and not self.lazy_row_count and self.get_axis_len(0) == 0:\n\n            def do_setitem(df: pandas.DataFrame, key, value) -> pandas.DataFrame:\n                df[key] = value\n                return df\n\n            return self.default_to_pandas(do_setitem, key=key, value=value)\n\n        if axis == 0:\n            value = self._wrap_column_data(value)\n        return self._setitem(axis=axis, key=key, value=value, how=None)\n\n    def _setitem(self, axis, key, value, how=\"inner\"):\n        \"\"\"\n        Set the row/column defined by `key` to the `value` provided.\n\n        In contrast with `setitem` with this function you can specify how\n        to handle non-aligned `self` and `value`.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to set `value` along. 0 means set row, 1 means set column.\n        key : scalar\n            Row/column label to set `value` in.\n        value : PandasQueryCompiler (1xN), list-like or scalar\n            Define new row/column value.\n        how : {\"inner\", \"outer\", \"left\", \"right\", None}, default: \"inner\"\n            Type of join to perform if specified axis of `self` and `value` are not\n            equal. If `how` is `None`, reindex `value` with `self` labels without joining.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            New QueryCompiler with updated `key` value.\n        \"\"\"\n\n        def setitem_builder(df, internal_indices=[]):  # pragma: no cover\n            \"\"\"\n            Set the row/column to the `value` in a single partition.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                Partition of the self frame.\n            internal_indices : list of ints\n                Positional indices of rows/columns in this particular partition\n                which represents `key` in the source frame.\n\n            Returns\n            -------\n            pandas.DataFrame\n                Partition data with updated values.\n            \"\"\"\n            df = df.copy()\n            if len(internal_indices) == 1:\n                if axis == 0:\n                    df[df.columns[internal_indices[0]]] = value\n                else:\n                    df.iloc[internal_indices[0]] = value\n            else:\n                if axis == 0:\n                    df[df.columns[internal_indices]] = value\n                else:\n                    df.iloc[internal_indices] = value\n            return df\n\n        if isinstance(value, type(self)):\n            value.columns = [key]\n            if axis == 1:\n                value = value.transpose()\n            idx = self.get_axis(axis ^ 1).get_indexer_for([key])[0]\n            return self.insert_item(axis ^ 1, idx, value, how, replace=True)\n\n        if axis == 0:\n            value_dtype = extract_dtype(value)\n\n            old_columns = self.columns.difference(pandas.Index([key]))\n            old_dtypes = ModinDtypes(self._modin_frame._dtypes).lazy_get(old_columns)\n            new_dtypes = ModinDtypes.concat(\n                [\n                    old_dtypes,\n                    DtypesDescriptor({key: value_dtype}, cols_with_unknown_dtypes=[]),\n                ]\n                # get dtypes in a proper order\n            ).lazy_get(self.columns)\n        else:\n            # TODO: apply 'find_common_dtype' to the value's dtype and old column dtypes\n            new_dtypes = None\n\n        # TODO: rework by passing list-like values to `apply_select_indices`\n        # as an item to distribute\n        if is_list_like(value):\n            new_modin_frame = self._modin_frame.apply_full_axis_select_indices(\n                axis,\n                setitem_builder,\n                [key],\n                new_index=self.index,\n                new_columns=self.columns,\n                keep_remaining=True,\n                new_dtypes=new_dtypes,\n            )\n        else:\n            new_modin_frame = self._modin_frame.apply_select_indices(\n                axis,\n                setitem_builder,\n                [key],\n                new_index=self.index,\n                new_columns=self.columns,\n                new_dtypes=new_dtypes,\n                keep_remaining=True,\n            )\n        return self.__constructor__(new_modin_frame)\n\n    # END __getitem__ methods\n\n    # Drop/Dropna\n    # This will change the shape of the resulting data.\n    def dropna(self, **kwargs):\n        is_column_wise = kwargs.get(\"axis\", 0) == 1\n        no_thresh_passed = kwargs.get(\"thresh\", lib.no_default) in (\n            lib.no_default,\n            None,\n        )\n        # The map reduce approach works well for frames with few columnar partitions\n        processable_amount_of_partitions = (\n            self._modin_frame.num_parts < CpuCount.get() * 32\n        )\n\n        if is_column_wise and no_thresh_passed and processable_amount_of_partitions:\n            how = kwargs.get(\"how\", \"any\")\n            subset = kwargs.get(\"subset\")\n            how = \"any\" if how in (lib.no_default, None) else how\n            condition = lambda df: getattr(df, how)()  # noqa: E731 (lambda assignment)\n\n            def mapper(df: pandas.DataFrame):\n                \"\"\"Compute a mask indicating whether there are all/any NaN values in each column.\"\"\"\n                if subset is not None:\n                    subset_mask = condition(\n                        df.loc[df.index.intersection(subset)].isna()\n                    )\n                    # we have to keep other columns so setting their mask\n                    # values with `False`\n                    mask = pandas.Series(\n                        np.zeros(df.shape[1], dtype=bool), index=df.columns\n                    )\n                    mask.update(subset_mask)\n                else:\n                    mask = condition(df.isna())\n                # for proper partitioning at the 'reduce' phase each partition has to\n                # represent a one-row frame rather than a one-column frame, so calling `.T` here\n                return mask.to_frame().T\n\n            masks = self._modin_frame.apply_full_axis(\n                func=mapper, axis=1, keep_partitioning=True\n            )\n\n            def reduce(df: pandas.DataFrame, mask: pandas.DataFrame):\n                \"\"\"Drop columns from `df` that satisfy the NaN `mask`.\"\"\"\n                # `mask` here consists of several rows each representing the masks result\n                # for a certain row partition:\n                #     col1  col2   col3\n                # 0   True  True  False                         col1     True\n                # 1  False  True  False  ---> mask.any() --->   col2     True\n                # 2   True  True  False                         col3    False\n                # in order to get the proper 1D mask we have to reduce the partition's\n                # results by applying the condition one more time\n                to_take_mask = ~condition(mask)\n\n                to_take = []\n                for col, value in to_take_mask.items():\n                    if value and col in df:\n                        to_take.append(col)\n\n                return df[to_take]\n\n            result = self._modin_frame.broadcast_apply(\n                # 'masks' have identical partitioning as we specified 'keep_partitioning=True' before,\n                # this means that we can safely skip the 'co-partitioning' stage\n                axis=1,\n                func=reduce,\n                other=masks,\n                copartition=False,\n                labels=\"drop\",\n            )\n            return self.__constructor__(result, shape_hint=self._shape_hint)\n\n        return self.__constructor__(\n            self._modin_frame.filter(\n                kwargs.get(\"axis\", 0) ^ 1,\n                lambda df: pandas.DataFrame.dropna(df, **kwargs),\n            ),\n            shape_hint=self._shape_hint,\n        )\n\n    def drop(\n        self, index=None, columns=None, errors: str = \"raise\"\n    ) -> PandasQueryCompiler:\n        # `errors` parameter needs to be part of the function signature because\n        # other query compilers may not take care of error handling at the API\n        # layer. This query compiler assumes there won't be any errors due to\n        # invalid keys.\n        if index is not None:\n            index = np.sort(self.index.get_indexer_for(self.index.difference(index)))\n        if columns is not None:\n            columns = np.sort(\n                self.columns.get_indexer_for(self.columns.difference(columns))\n            )\n        new_modin_frame = self._modin_frame.take_2d_labels_or_positional(\n            row_positions=index, col_positions=columns\n        )\n        return self.__constructor__(new_modin_frame)\n\n    # END Drop/Dropna\n\n    def duplicated(self, **kwargs):\n        def _compute_hash(df):\n            result = df.apply(\n                lambda s: hashlib.new(\"md5\", str(tuple(s)).encode()).hexdigest(), axis=1\n            )\n            if isinstance(result, pandas.Series):\n                result = result.to_frame(\n                    result.name\n                    if result.name is not None\n                    else MODIN_UNNAMED_SERIES_LABEL\n                )\n            return result\n\n        def _compute_duplicated(df):  # pragma: no cover\n            result = df.duplicated(**kwargs)\n            if isinstance(result, pandas.Series):\n                result = result.to_frame(\n                    result.name\n                    if result.name is not None\n                    else MODIN_UNNAMED_SERIES_LABEL\n                )\n            return result\n\n        if self._modin_frame._partitions.shape[1] > 1:\n            # if the number of columns (or column partitions) we are checking for duplicates is larger than 1,\n            # we must first hash them to generate a single value that can be compared across rows.\n            hashed_modin_frame = self._modin_frame.reduce(\n                axis=1,\n                function=_compute_hash,\n                dtypes=pandas.api.types.pandas_dtype(\"O\"),\n            )\n        else:\n            hashed_modin_frame = self._modin_frame\n        new_modin_frame = hashed_modin_frame.apply_full_axis(\n            axis=0,\n            func=_compute_duplicated,\n            new_index=self._modin_frame.copy_index_cache(),\n            new_columns=[MODIN_UNNAMED_SERIES_LABEL],\n            dtypes=np.bool_,\n            keep_partitioning=True,\n        )\n        return self.__constructor__(new_modin_frame, shape_hint=\"column\")\n\n    # Insert\n    # This method changes the shape of the resulting data. In Pandas, this\n    # operation is always inplace, but this object is immutable, so we just\n    # return a new one from here and let the front end handle the inplace\n    # update.\n    def insert(self, loc, column, value):\n        value = self._wrap_column_data(value)\n        if isinstance(value, type(self)):\n            value.columns = [column]\n            return self.insert_item(axis=1, loc=loc, value=value, how=None)\n\n        def insert(df, internal_indices=[]):  # pragma: no cover\n            \"\"\"\n            Insert new column to the partition.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                Partition of the self frame.\n            internal_indices : list of ints\n                Positional index of the column in this particular partition\n                to insert new column after.\n            \"\"\"\n            internal_idx = int(internal_indices[0])\n            df.insert(internal_idx, column, value)\n            return df\n\n        value_dtype = extract_dtype(value)\n        new_columns = self.columns.insert(loc, column)\n        new_dtypes = ModinDtypes.concat(\n            [\n                self._modin_frame._dtypes,\n                DtypesDescriptor({column: value_dtype}, cols_with_unknown_dtypes=[]),\n            ]\n        ).lazy_get(\n            new_columns\n        )  # get dtypes in a proper order\n\n        # TODO: rework by passing list-like values to `apply_select_indices`\n        # as an item to distribute\n        new_modin_frame = self._modin_frame.apply_full_axis_select_indices(\n            0,\n            insert,\n            numeric_indices=[loc],\n            keep_remaining=True,\n            new_index=self.index,\n            new_columns=new_columns,\n            new_dtypes=new_dtypes,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def _wrap_column_data(self, data):\n        \"\"\"\n        If the data is list-like, create a single column query compiler.\n\n        Parameters\n        ----------\n        data : any\n\n        Returns\n        -------\n        data or PandasQueryCompiler\n        \"\"\"\n        if is_list_like(data):\n            return self.from_pandas(\n                pandas.DataFrame(pandas.Series(data, index=self.index)),\n                data_cls=type(self._modin_frame),\n            )\n        return data\n\n    # END Insert\n\n    def explode(self, column):\n        return self.__constructor__(\n            self._modin_frame.explode(1, lambda df: df.explode(column))\n        )\n\n    # UDF (apply and agg) methods\n    # There is a wide range of behaviors that are supported, so a lot of the\n    # logic can get a bit convoluted.\n    def apply(self, func, axis, *args, **kwargs):\n        # if any of args contain modin object, we should\n        # convert it to pandas\n        args = try_cast_to_pandas(args)\n        kwargs = try_cast_to_pandas(kwargs)\n        _, func, _, _ = reconstruct_func(func, **kwargs)\n        if isinstance(func, dict):\n            return self._dict_func(func, axis, *args, **kwargs)\n        elif is_list_like(func):\n            return self._list_like_func(func, axis, *args, **kwargs)\n        else:\n            return self._callable_func(func, axis, *args, **kwargs)\n\n    def apply_on_series(self, func, *args, **kwargs):\n        args = try_cast_to_pandas(args)\n        kwargs = try_cast_to_pandas(kwargs)\n\n        assert self.is_series_like()\n\n        # We use apply_full_axis here instead of map since the latter assumes that the\n        # shape of the DataFrame does not change. However, it is possible for functions\n        # applied to Series objects to end up creating DataFrames. It is possible that\n        # using apply_full_axis is much less performant compared to using a variant of\n        # map.\n        return self.__constructor__(\n            self._modin_frame.apply_full_axis(\n                1, lambda df: df.squeeze(axis=1).apply(func, *args, **kwargs)\n            )\n        )\n\n    def _dict_func(self, func, axis, *args, **kwargs):\n        \"\"\"\n        Apply passed functions to the specified rows/columns.\n\n        Parameters\n        ----------\n        func : dict(label) -> [callable, str]\n            Dictionary that maps axis labels to the function to apply against them.\n        axis : {0, 1}\n            Target axis to apply functions along. 0 means apply to columns,\n            1 means apply to rows.\n        *args : args\n            Arguments to pass to the specified functions.\n        **kwargs : kwargs\n            Arguments to pass to the specified functions.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            New QueryCompiler containing the results of passed functions.\n        \"\"\"\n        if \"axis\" not in kwargs:\n            kwargs[\"axis\"] = axis\n\n        func = {k: wrap_udf_function(v) if callable(v) else v for k, v in func.items()}\n\n        def dict_apply_builder(df, internal_indices=[]):  # pragma: no cover\n            # Sometimes `apply` can return a `Series`, but we require that internally\n            # all objects are `DataFrame`s.\n            # It looks like it doesn't need to use `internal_indices` option internally\n            # for the case since `apply` use labels from dictionary keys in `func` variable.\n            return pandas.DataFrame(df.apply(func, *args, **kwargs))\n\n        labels = list(func.keys())\n        return self.__constructor__(\n            self._modin_frame.apply_full_axis_select_indices(\n                axis,\n                dict_apply_builder,\n                labels,\n                new_index=labels if axis == 1 else None,\n                new_columns=labels if axis == 0 else None,\n                keep_remaining=False,\n            )\n        )\n\n    def _list_like_func(self, func, axis, *args, **kwargs):\n        \"\"\"\n        Apply passed functions to each row/column.\n\n        Parameters\n        ----------\n        func : list of callable\n            List of functions to apply against each row/column.\n        axis : {0, 1}\n            Target axis to apply functions along. 0 means apply to columns,\n            1 means apply to rows.\n        *args : args\n            Arguments to pass to the specified functions.\n        **kwargs : kwargs\n            Arguments to pass to the specified functions.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            New QueryCompiler containing the results of passed functions.\n        \"\"\"\n        # When the function is list-like, the function names become the index/columns\n        new_index = (\n            [f if isinstance(f, str) else f.__name__ for f in func]\n            if axis == 0\n            else self.index\n        )\n        new_columns = (\n            [f if isinstance(f, str) else f.__name__ for f in func]\n            if axis == 1\n            else self.columns\n        )\n        func = [wrap_udf_function(f) if callable(f) else f for f in func]\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis,\n            lambda df: pandas.DataFrame(df.apply(func, axis, *args, **kwargs)),\n            new_index=new_index,\n            new_columns=new_columns,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def rowwise_query(self, expr, **kwargs):\n        \"\"\"\n        Query the columns of a ``PandasQueryCompiler`` with a boolean row-wise expression.\n\n        Basically, in row-wise expressions we only allow column names, constants\n        and other variables captured using the '@' symbol. No function/method\n        cannot be called inside such expressions.\n\n        Parameters\n        ----------\n        expr : str\n            Row-wise boolean expression.\n        **kwargs : dict\n            Arguments to pass to the ``pandas.DataFrame.query()``.\n\n        Returns\n        -------\n        PandasQueryCompiler\n\n        Raises\n        ------\n        NotImplementedError\n            In case the passed expression cannot be executed row-wise.\n        \"\"\"\n        # Walk through the AST and verify it doesn't contain any nodes that\n        # prevent us from executing the query row-wise (we're basically\n        # looking for 'ast.Call')\n        nodes = ast.parse(expr.replace(\"@\", \"\")).body\n        is_row_wise_query = True\n\n        while nodes:\n            node = nodes.pop()\n            if isinstance(node, ast.Expr):\n                node = getattr(node, \"value\", node)\n\n            if isinstance(node, ast.UnaryOp):\n                nodes.append(node.operand)\n            elif isinstance(node, ast.BinOp):\n                nodes.extend([node.left, node.right])\n            elif isinstance(node, ast.BoolOp):\n                nodes.extend(node.values)\n            elif isinstance(node, ast.Compare):\n                nodes.extend([node.left] + node.comparators)\n            elif isinstance(node, (ast.Name, ast.Constant)):\n                pass\n            else:\n                # if we end up here then the expression is no longer simple\n                # enough to run it row-wise, so exiting\n                is_row_wise_query = False\n                break\n\n        if not is_row_wise_query:\n            raise NotImplementedError(\"A non row-wise query was passed.\")\n\n        def query_builder(df, **modin_internal_kwargs):\n            return df.query(expr, inplace=False, **kwargs, **modin_internal_kwargs)\n\n        return self.__constructor__(self._modin_frame.filter(1, query_builder))\n\n    def _callable_func(self, func, axis, *args, **kwargs):\n        \"\"\"\n        Apply passed function to each row/column.\n\n        Parameters\n        ----------\n        func : callable or str\n            Function to apply.\n        axis : {0, 1}\n            Target axis to apply function along. 0 means apply to columns,\n            1 means apply to rows.\n        *args : args\n            Arguments to pass to the specified function.\n        **kwargs : kwargs\n            Arguments to pass to the specified function.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            New QueryCompiler containing the results of passed function\n            for each row/column.\n        \"\"\"\n        if callable(func):\n            func = wrap_udf_function(func)\n\n        new_modin_frame = self._modin_frame.apply_full_axis(\n            axis, lambda df: df.apply(func, axis=axis, *args, **kwargs)\n        )\n        return self.__constructor__(new_modin_frame)\n\n    # END UDF\n\n    # Manual Partitioning methods (e.g. merge, groupby)\n    # These methods require some sort of manual partitioning due to their\n    # nature. They require certain data to exist on the same partition, and\n    # after the shuffle, there should be only a local map required.\n\n    def _groupby_separate_by(self, by, drop):\n        \"\"\"\n        Separate internal and external groupers in `by` argument of groupby.\n\n        Parameters\n        ----------\n        by : BaseQueryCompiler, column or index label, Grouper or list\n        drop : bool\n            Indicates whether or not by data came from self frame.\n            True, by data came from self. False, external by data.\n\n        Returns\n        -------\n        external_by : list of BaseQueryCompiler and arrays\n            Values to group by.\n        internal_by : list of str\n            List of column names from `self` to group by.\n        by_positions : list of ints\n            Specifies the order of grouping by `internal_by` and `external_by` columns.\n            Each element in `by_positions` specifies an index from either `external_by` or `internal_by`.\n            Indices for `external_by` are positive and start from 0. Indices for `internal_by` are negative\n            and start from -1 (so in order to convert them to a valid indices one should do ``-idx - 1``)\n            '''\n            by_positions = [0, -1, 1, -2, 2, 3]\n            internal_by = [\"col1\", \"col2\"]\n            external_by = [sr1, sr2, sr3, sr4]\n\n            df.groupby([sr1, \"col1\", sr2, \"col2\", sr3, sr4])\n            '''.\n        \"\"\"\n        if isinstance(by, type(self)):\n            if drop:\n                internal_by = by.columns.tolist()\n                external_by = []\n                by_positions = [-i - 1 for i in range(len(internal_by))]\n            else:\n                internal_by = []\n                external_by = [by]\n                by_positions = [i for i in range(len(external_by[0].columns))]\n        else:\n            if not isinstance(by, list):\n                by = [by] if by is not None else []\n            internal_by = []\n            external_by = []\n            external_by_counter = 0\n            by_positions = []\n            for o in by:\n                if isinstance(o, pandas.Grouper) and o.key in self.columns:\n                    internal_by.append(o.key)\n                    by_positions.append(-len(internal_by))\n                elif hashable(o) and o in self.columns:\n                    internal_by.append(o)\n                    by_positions.append(-len(internal_by))\n                else:\n                    external_by.append(o)\n                    for _ in range(len(o.columns) if isinstance(o, type(self)) else 1):\n                        by_positions.append(external_by_counter)\n                        external_by_counter += 1\n        return external_by, internal_by, by_positions\n\n    groupby_all = GroupbyReduceImpl.build_qc_method(\"all\")\n    groupby_any = GroupbyReduceImpl.build_qc_method(\"any\")\n    groupby_count = GroupbyReduceImpl.build_qc_method(\"count\")\n    groupby_max = GroupbyReduceImpl.build_qc_method(\"max\")\n    groupby_min = GroupbyReduceImpl.build_qc_method(\"min\")\n    groupby_prod = GroupbyReduceImpl.build_qc_method(\"prod\")\n    groupby_sum = GroupbyReduceImpl.build_qc_method(\"sum\")\n    groupby_skew = GroupbyReduceImpl.build_qc_method(\"skew\")\n\n    def groupby_nth(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        result = super().groupby_nth(\n            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop\n        )\n        if not groupby_kwargs.get(\"as_index\", True):\n            # pandas keeps order of columns intact, follow suit\n            return result.getitem_column_array(self.columns)\n        return result\n\n    def groupby_mean(self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False):\n        if RangePartitioning.get():\n            try:\n                return self._groupby_shuffle(\n                    by=by,\n                    agg_func=\"mean\",\n                    axis=axis,\n                    groupby_kwargs=groupby_kwargs,\n                    agg_args=agg_args,\n                    agg_kwargs=agg_kwargs,\n                    drop=drop,\n                )\n            except NotImplementedError as e:\n                ErrorMessage.warn(\n                    f\"Can't use range-partitioning groupby implementation because of: {e}\"\n                    + \"\\nFalling back to a TreeReduce implementation.\"\n                )\n\n        _, internal_by, _ = self._groupby_separate_by(by, drop)\n\n        numeric_only = agg_kwargs.get(\"numeric_only\", False)\n        datetime_cols = (\n            {\n                col: dtype\n                for col, dtype in zip(self.dtypes.index, self.dtypes)\n                if is_datetime64_any_dtype(dtype) and col not in internal_by\n            }\n            if not numeric_only\n            else dict()\n        )\n\n        if len(datetime_cols) > 0:\n            datetime_qc = self.getitem_array(datetime_cols)\n            if datetime_qc.isna().any().any(axis=1).to_pandas().squeeze():\n                return super().groupby_mean(\n                    by=by,\n                    axis=axis,\n                    groupby_kwargs=groupby_kwargs,\n                    agg_args=agg_args,\n                    agg_kwargs=agg_kwargs,\n                    drop=drop,\n                )\n\n        qc_with_converted_datetime_cols = (\n            self.astype({col: \"int64\" for col in datetime_cols.keys()})\n            if len(datetime_cols) > 0\n            else self\n        )\n\n        result = GroupbyReduceImpl.build_qc_method(\"mean\")(\n            query_compiler=qc_with_converted_datetime_cols,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n        if len(datetime_cols) > 0:\n            result = result.astype({col: dtype for col, dtype in datetime_cols.items()})\n        return result\n\n    def groupby_size(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        if RangePartitioning.get():\n            try:\n                return self._groupby_shuffle(\n                    by=by,\n                    agg_func=\"size\",\n                    axis=axis,\n                    groupby_kwargs=groupby_kwargs,\n                    agg_args=agg_args,\n                    agg_kwargs=agg_kwargs,\n                    drop=drop,\n                )\n            except NotImplementedError as e:\n                ErrorMessage.warn(\n                    f\"Can't use range-partitioning groupby implementation because of: {e}\"\n                    + \"\\nFalling back to a TreeReduce implementation.\"\n                )\n\n        result = self._groupby_dict_reduce(\n            by=by,\n            axis=axis,\n            agg_func={self.columns[0]: [(\"__size_col__\", \"size\")]},\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            groupby_kwargs=groupby_kwargs,\n            drop=drop,\n            method=\"size\",\n            default_to_pandas_func=lambda grp: grp.size(),\n        )\n        if groupby_kwargs.get(\"as_index\", True):\n            result.columns = [MODIN_UNNAMED_SERIES_LABEL]\n        elif isinstance(result.columns, pandas.MultiIndex):\n            # Dropping one extra-level which was added because of renaming aggregation\n            result.columns = (\n                result.columns[:-1].droplevel(-1).append(pandas.Index([\"size\"]))\n            )\n        return result\n\n    def _groupby_dict_reduce(\n        self,\n        by,\n        agg_func,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n        **kwargs,\n    ):\n        \"\"\"\n        Group underlying data and apply aggregation functions to each group of the specified column/row.\n\n        This method is responsible of performing dictionary groupby aggregation for such functions,\n        that can be implemented via TreeReduce approach.\n\n        Parameters\n        ----------\n        by : PandasQueryCompiler, column or index label, Grouper or list of such\n            Object that determine groups.\n        agg_func : dict(label) -> str\n            Dictionary that maps row/column labels to the function names.\n            **Note:** specified functions have to be supported by ``modin.core.dataframe.algebra.GroupByReduce``.\n            Supported functions are listed in the ``modin.core.dataframe.algebra.GroupByReduce.groupby_reduce_functions``\n            dictionary.\n        axis : {0, 1}\n            Axis to group and apply aggregation function along.\n            0 is for index, when 1 is for columns.\n        groupby_kwargs : dict\n            GroupBy parameters in the format of ``modin.pandas.DataFrame.groupby`` signature.\n        agg_args : list-like\n            Serves the compatibility purpose. Does not affect the result.\n        agg_kwargs : dict\n            Arguments to pass to the aggregation functions.\n        drop : bool, default: False\n            If `by` is a QueryCompiler indicates whether or not by-data came\n            from the `self`.\n        **kwargs : dict\n            Additional parameters to pass to the ``modin.core.dataframe.algebra.GroupByReduce.register``.\n\n        Returns\n        -------\n        PandasQueryCompiler\n            New QueryCompiler containing the result of groupby dictionary aggregation.\n        \"\"\"\n        map_dict = {}\n        reduce_dict = {}\n        kwargs.setdefault(\n            \"default_to_pandas_func\",\n            lambda grp, *args, **kwargs: grp.agg(agg_func, *args, **kwargs),\n        )\n\n        rename_columns = any(\n            not isinstance(fn, str) and isinstance(fn, Iterable)\n            for fn in agg_func.values()\n        )\n        for col, col_funcs in agg_func.items():\n            if not rename_columns:\n                map_dict[col], reduce_dict[col], _ = GroupbyReduceImpl.get_impl(\n                    col_funcs\n                )\n                continue\n\n            if isinstance(col_funcs, str):\n                col_funcs = [col_funcs]\n\n            map_fns = []\n            for i, fn in enumerate(col_funcs):\n                if not isinstance(fn, str) and isinstance(fn, Iterable):\n                    new_col_name, func = fn\n                elif isinstance(fn, str):\n                    new_col_name, func = fn, fn\n                else:\n                    raise TypeError\n\n                map_fn, reduce_fn, _ = GroupbyReduceImpl.get_impl(func)\n\n                map_fns.append((new_col_name, map_fn))\n                reduced_col_name = (\n                    (*col, new_col_name)\n                    if isinstance(col, tuple)\n                    else (col, new_col_name)\n                )\n                reduce_dict[reduced_col_name] = reduce_fn\n            map_dict[col] = map_fns\n        return GroupByReduce.register(map_dict, reduce_dict, **kwargs)(\n            query_compiler=self,\n            by=by,\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    def groupby_dtypes(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        return self.groupby_agg(\n            by=by,\n            axis=axis,\n            agg_func=lambda df: df.dtypes,\n            how=\"group_wise\",\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            groupby_kwargs=groupby_kwargs,\n            drop=drop,\n        )\n\n    @_inherit_docstrings(BaseQueryCompiler.groupby_agg)\n    def _groupby_shuffle(\n        self,\n        by,\n        agg_func,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n        how=\"axis_wise\",\n        series_groupby=False,\n    ):\n        # Defaulting to pandas in case of an empty frame as we can't process it properly.\n        # Higher API level won't pass empty data here unless the frame has delayed\n        # computations. FIXME: We apparently lose some laziness here (due to index access)\n        # because of the inability to process empty groupby natively.\n        if len(self.columns) == 0 or len(self._modin_frame) == 0:\n            return super().groupby_agg(\n                by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop\n            )\n\n        grouping_on_level = groupby_kwargs.get(\"level\") is not None\n        if any(\n            isinstance(obj, pandas.Grouper)\n            for obj in (by if isinstance(by, list) else [by])\n        ):\n            raise NotImplementedError(\n                \"Grouping on a pandas.Grouper with range-partitioning groupby is not yet supported: \"\n                + \"https://github.com/modin-project/modin/issues/5926\"\n            )\n\n        if grouping_on_level:\n            external_by, internal_by, by_positions = [], [], []\n        else:\n            external_by, internal_by, by_positions = self._groupby_separate_by(by, drop)\n\n        all_external_are_qcs = all(isinstance(obj, type(self)) for obj in external_by)\n        if not all_external_are_qcs:\n            raise NotImplementedError(\n                \"Grouping on an external grouper with range-partitioning groupby is only supported with Series'es: \"\n                + \"https://github.com/modin-project/modin/issues/5926\"\n            )\n\n        is_transform = how == \"transform\" or GroupBy.is_transformation_kernel(agg_func)\n        if is_transform:\n            # https://github.com/modin-project/modin/issues/5924\n            ErrorMessage.mismatch_with_pandas(\n                operation=\"range-partitioning groupby\",\n                message=\"the order of rows may be shuffled for the result\",\n            )\n\n        # This check materializes dtypes for 'by' columns\n        if not is_transform and groupby_kwargs.get(\"observed\", False) in (\n            False,\n            lib.no_default,\n        ):\n            # The following 'dtypes' check materializes dtypes for 'by' columns\n            internal_dtypes = pandas.Series()\n            external_dtypes = pandas.Series()\n            if len(internal_by) > 0:\n                internal_dtypes = (\n                    self._modin_frame._dtypes.lazy_get(internal_by).get()\n                    if isinstance(self._modin_frame._dtypes, ModinDtypes)\n                    else self.dtypes[internal_by]\n                )\n            if len(external_by) > 0:\n                dtypes_list = []\n                for obj in external_by:\n                    if not isinstance(obj, type(self)):\n                        # we're only interested in categorical dtypes here, which can only\n                        # appear in modin objects\n                        continue\n                    dtypes_list.append(obj.dtypes)\n                external_dtypes = pandas.concat(dtypes_list)\n\n            by_dtypes = pandas.concat([internal_dtypes, external_dtypes])\n            add_missing_cats = any(\n                isinstance(dtype, pandas.CategoricalDtype) for dtype in by_dtypes\n            )\n        else:\n            add_missing_cats = False\n\n        if add_missing_cats and not groupby_kwargs.get(\"as_index\", True):\n            raise NotImplementedError(\n                \"Range-partitioning groupby is not implemented for grouping on categorical columns with \"\n                + \"the following set of parameters {'as_index': False, 'observed': False}. Change either 'as_index' \"\n                + \"or 'observed' to True and try again. \"\n                + \"https://github.com/modin-project/modin/issues/5926\"\n            )\n\n        if isinstance(agg_func, dict):\n            assert (\n                how == \"axis_wise\"\n            ), f\"Only 'axis_wise' aggregation is supported with dictionary functions, got: {how}\"\n\n            subset = internal_by + list(agg_func.keys())\n            # extracting unique values; no we can't use np.unique here as it would\n            # convert a list of tuples to a 2D matrix and so mess up the result\n            subset = list(dict.fromkeys(subset))\n            obj = self.getitem_column_array(subset)\n        else:\n            obj = self\n\n        agg_method = (\n            SeriesGroupByDefault if series_groupby else GroupByDefault\n        ).get_aggregation_method(how)\n        original_agg_func = agg_func\n\n        def agg_func(grp, *args, **kwargs):\n            result = agg_method(grp, original_agg_func, *args, **kwargs)\n\n            # Convert Series to DataFrame\n            if result.ndim == 1:\n                result = result.to_frame(\n                    MODIN_UNNAMED_SERIES_LABEL if result.name is None else result.name\n                )\n\n            return result\n\n        result = obj._modin_frame.groupby(\n            axis=axis,\n            internal_by=internal_by,\n            external_by=[\n                obj._modin_frame if isinstance(obj, type(self)) else obj\n                for obj in external_by\n            ],\n            by_positions=by_positions,\n            series_groupby=series_groupby,\n            operator=lambda grp: agg_func(grp, *agg_args, **agg_kwargs),\n            # UDFs passed to '.apply()' are allowed to produce results with arbitrary shapes,\n            # that's why we have to align the partition's shapes/labeling across different\n            # row partitions\n            align_result_columns=how == \"group_wise\",\n            add_missing_cats=add_missing_cats,\n            **groupby_kwargs,\n        )\n        result_qc: PandasQueryCompiler = self.__constructor__(result)\n\n        if not is_transform and not groupby_kwargs.get(\"as_index\", True):\n            return result_qc.reset_index(drop=True)\n\n        return result_qc\n\n    def groupby_corr(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        ErrorMessage.default_to_pandas(\"`GroupBy.corr`\")\n        # TODO(https://github.com/modin-project/modin/issues/1323) implement this.\n        # Right now, using this class's groupby_agg method, even with how=\"group_wise\",\n        # produces a result with the wrong index, so default to pandas by using the\n        # super class's groupby_agg method.\n        return super().groupby_agg(\n            by=by,\n            agg_func=\"corr\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    def groupby_cov(\n        self,\n        by,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        ErrorMessage.default_to_pandas(\"`GroupBy.cov`\")\n        # TODO(https://github.com/modin-project/modin/issues/1322) implement this.\n        # Right now, using this class's groupby_agg method, even with how=\"group_wise\",\n        # produces a result with the wrong index, so default to pandas by using the\n        # super class's groupby_agg method.\n        return super().groupby_agg(\n            by=by,\n            agg_func=\"cov\",\n            axis=axis,\n            groupby_kwargs=groupby_kwargs,\n            agg_args=agg_args,\n            agg_kwargs=agg_kwargs,\n            drop=drop,\n        )\n\n    def groupby_rolling(\n        self,\n        by,\n        agg_func,\n        axis,\n        groupby_kwargs,\n        rolling_kwargs,\n        agg_args,\n        agg_kwargs,\n        drop=False,\n    ):\n        # 'corr' and 'cov' require knowledge about the whole row axis (all columns have\n        # to be available in the same partitions), this requirement is not being satisfied\n        # in the current groupby implementation\n        unsupported_groupby = (\n            agg_func in (\"corr\", \"cov\") or rolling_kwargs.get(\"on\") is not None\n        )\n\n        if isinstance(agg_func, str):\n            str_func = agg_func\n\n            def agg_func(window, *args, **kwargs):\n                return getattr(window, str_func)(*args, **kwargs)\n\n        else:\n            assert callable(agg_func)\n\n        kwargs = {\n            \"by\": by,\n            \"agg_func\": lambda grp, *args, **kwargs: agg_func(\n                grp.rolling(**rolling_kwargs), *args, **kwargs\n            ),\n            \"axis\": axis,\n            \"groupby_kwargs\": groupby_kwargs,\n            \"agg_args\": agg_args,\n            \"agg_kwargs\": agg_kwargs,\n            \"how\": \"direct\",\n            \"drop\": drop,\n        }\n\n        if unsupported_groupby:\n            return super(PandasQueryCompiler, self).groupby_agg(**kwargs)\n\n        try:\n            return self._groupby_shuffle(**kwargs)\n        except NotImplementedError as e:\n            get_logger().info(\n                f\"Can't use range-partitioning groupby implementation because of: {e}\"\n                + \"\\nFalling back to a full-axis implementation.\"\n            )\n            return self.groupby_agg(**kwargs)\n\n    def groupby_agg(\n        self,\n        by,\n        agg_func,\n        axis,\n        groupby_kwargs,\n        agg_args,\n        agg_kwargs,\n        how=\"axis_wise\",\n        drop=False,\n        series_groupby=False,\n    ):\n        # Defaulting to pandas in case of an empty frame as we can't process it properly.\n        # Higher API level won't pass empty data here unless the frame has delayed\n        # computations. So we apparently lose some laziness here (due to index access)\n        # because of the inability to process empty groupby natively.\n        if len(self.columns) == 0 or len(self._modin_frame) == 0:\n            return super().groupby_agg(\n                by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop\n            )\n\n        # 'group_wise' means 'groupby.apply()'. We're certain that range-partitioning groupby\n        # always works better for '.apply()', so we're using it regardless of the 'RangePartitioning'\n        # value\n        if how == \"group_wise\" or RangePartitioning.get():\n            try:\n                return self._groupby_shuffle(\n                    by=by,\n                    agg_func=agg_func,\n                    axis=axis,\n                    groupby_kwargs=groupby_kwargs,\n                    agg_args=agg_args,\n                    agg_kwargs=agg_kwargs,\n                    drop=drop,\n                    how=how,\n                    series_groupby=series_groupby,\n                )\n            except NotImplementedError as e:\n                # if a user wants to use range-partitioning groupby explicitly, then we should print a visible\n                # warning to them on a failure, otherwise we're only logging it\n                message = (\n                    f\"Can't use range-partitioning groupby implementation because of: {e}\"\n                    + \"\\nFalling back to a full-axis implementation.\"\n                )\n                get_logger().info(message)\n                if RangePartitioning.get():\n                    ErrorMessage.warn(message)\n\n        if isinstance(agg_func, dict) and GroupbyReduceImpl.has_impl_for(agg_func):\n            return self._groupby_dict_reduce(\n                by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, drop\n            )\n\n        is_transform_method = how == \"transform\" or (\n            isinstance(agg_func, str) and agg_func in transformation_kernels\n        )\n\n        original_agg_func = agg_func\n\n        if isinstance(agg_func, dict):\n            assert (\n                how == \"axis_wise\"\n            ), f\"Only 'axis_wise' aggregation is supported with dictionary functions, got: {how}\"\n        else:\n            agg_method = (\n                SeriesGroupByDefault if series_groupby else GroupByDefault\n            ).get_aggregation_method(how)\n\n            def agg_func(grp, *args, **kwargs):\n                return agg_method(grp, original_agg_func, *args, **kwargs)\n\n        # since we're going to modify `groupby_kwargs` dict in a `groupby_agg_builder`,\n        # we want to copy it to not propagate these changes into source dict, in case\n        # of unsuccessful end of function\n        groupby_kwargs = groupby_kwargs.copy()\n\n        as_index = groupby_kwargs.get(\"as_index\", True)\n        external_by, internal_by, _ = self._groupby_separate_by(by, drop)\n        internal_qc = (\n            [self.getitem_column_array(internal_by)] if len(internal_by) else []\n        )\n        by = internal_qc + external_by\n\n        broadcastable_by = [o._modin_frame for o in by if isinstance(o, type(self))]\n        not_broadcastable_by = [o for o in by if not isinstance(o, type(self))]\n\n        def groupby_agg_builder(df, by=None, drop=False, partition_idx=None):\n            \"\"\"\n            Compute groupby aggregation for a single partition.\n\n            Parameters\n            ----------\n            df : pandas.DataFrame\n                Partition of the self frame.\n            by : pandas.DataFrame, optional\n                Broadcasted partition which contains `by` columns.\n            drop : bool, default: False\n                Indicates whether `by` partition came from the `self` frame.\n            partition_idx : int, optional\n                Positional partition index along groupby axis.\n\n            Returns\n            -------\n            pandas.DataFrame\n                DataFrame containing the result of groupby aggregation\n                for this particular partition.\n            \"\"\"\n            # Set `as_index` to True to track the metadata of the grouping object\n            # It is used to make sure that between phases we are constructing the\n            # right index and placing columns in the correct order.\n            groupby_kwargs[\"as_index\"] = True\n\n            # We have to filter func-dict BEFORE inserting broadcasted 'by' columns\n            # to avoid multiple aggregation results for 'by' cols in case they're\n            # present in the func-dict:\n            partition_agg_func = GroupByReduce.get_callable(agg_func, df)\n\n            internal_by_cols = pandas.Index([])\n            missed_by_cols = pandas.Index([])\n\n            if by is not None:\n                internal_by_df = by[internal_by]\n\n                if isinstance(internal_by_df, pandas.Series):\n                    internal_by_df = internal_by_df.to_frame()\n\n                missed_by_cols = internal_by_df.columns.difference(df.columns)\n                if len(missed_by_cols) > 0:\n                    df = pandas.concat(\n                        [df, internal_by_df[missed_by_cols]],\n                        axis=1,\n                        copy=False,\n                    )\n\n                internal_by_cols = internal_by_df.columns\n\n                external_by = by.columns.difference(internal_by).unique()\n                external_by_df = by[external_by].squeeze(axis=1)\n\n                if isinstance(external_by_df, pandas.DataFrame):\n                    external_by_cols = [o for _, o in external_by_df.items()]\n                else:\n                    external_by_cols = [external_by_df]\n\n                by = internal_by_cols.tolist() + external_by_cols\n\n            else:\n                by = []\n\n            by += not_broadcastable_by\n            level = groupby_kwargs.get(\"level\", None)\n            if level is not None and not by:\n                by = None\n                by_length = len(level) if is_list_like(level) else 1\n            else:\n                by_length = len(by)\n\n            def compute_groupby(df, drop=False, partition_idx=0):\n                \"\"\"Compute groupby aggregation for a single partition.\"\"\"\n                target_df = df.squeeze(axis=1) if series_groupby else df\n                grouped_df = target_df.groupby(by=by, axis=axis, **groupby_kwargs)\n                try:\n                    result = partition_agg_func(grouped_df, *agg_args, **agg_kwargs)\n                except DataError:\n                    # This happens when the partition is filled with non-numeric data and a\n                    # numeric operation is done. We need to build the index here to avoid\n                    # issues with extracting the index.\n                    result = pandas.DataFrame(index=grouped_df.size().index)\n                if isinstance(result, pandas.Series):\n                    result = result.to_frame(\n                        result.name\n                        if result.name is not None\n                        else MODIN_UNNAMED_SERIES_LABEL\n                    )\n\n                selection = agg_func.keys() if isinstance(agg_func, dict) else None\n                if selection is None:\n                    # Some pandas built-in aggregation functions aggregate 'by' columns\n                    # (for example 'apply', 'dtypes', maybe more...). Since we make sure\n                    # that all of the 'by' columns are presented in every partition by\n                    # inserting the missed ones, we will end up with all of the 'by'\n                    # columns being aggregated in every partition. To avoid duplications\n                    # in the result we drop all of the 'by' columns that were inserted\n                    # in this partition AFTER handling 'as_index' parameter. The order\n                    # is important for proper naming-conflicts handling.\n                    misaggregated_cols = missed_by_cols.intersection(result.columns)\n                else:\n                    misaggregated_cols = []\n\n                if not as_index:\n                    GroupBy.handle_as_index_for_dataframe(\n                        result,\n                        internal_by_cols,\n                        by_cols_dtypes=df[internal_by_cols].dtypes.values,\n                        by_length=by_length,\n                        selection=selection,\n                        partition_idx=partition_idx,\n                        drop=drop,\n                        inplace=True,\n                        method=\"transform\" if is_transform_method else None,\n                    )\n                else:\n                    new_index_names = tuple(\n                        (\n                            None\n                            if isinstance(name, str)\n                            and name.startswith(MODIN_UNNAMED_SERIES_LABEL)\n                            else name\n                        )\n                        for name in result.index.names\n                    )\n                    result.index.names = new_index_names\n\n                if len(misaggregated_cols) > 0:\n                    result.drop(columns=misaggregated_cols, inplace=True)\n\n                return result\n\n            try:\n                return compute_groupby(df, drop, partition_idx)\n            except (ValueError, KeyError):\n                # This will happen with Arrow buffer read-only errors. We don't want to copy\n                # all the time, so this will try to fast-path the code first.\n                return compute_groupby(df.copy(), drop, partition_idx)\n\n        if isinstance(original_agg_func, dict):\n            apply_indices = list(agg_func.keys())\n        elif isinstance(original_agg_func, list):\n            apply_indices = self.columns.difference(internal_by).tolist()\n        else:\n            apply_indices = None\n\n        if (\n            # For now handling only simple cases, where 'by' columns are described by a single query compiler\n            agg_kwargs.get(\"as_index\", True)\n            and len(not_broadcastable_by) == 0\n            and len(broadcastable_by) == 1\n            and broadcastable_by[0].has_materialized_dtypes\n        ):\n            new_index = ModinIndex(\n                # actual value will be assigned on a parent update\n                value=None,\n                axis=0,\n                dtypes=broadcastable_by[0].dtypes,\n            )\n        else:\n            new_index = None\n\n        new_modin_frame = self._modin_frame.broadcast_apply_full_axis(\n            axis=axis,\n            func=lambda df, by=None, partition_idx=None: groupby_agg_builder(\n                df, by, drop, partition_idx\n            ),\n            other=broadcastable_by,\n            new_index=new_index,\n            apply_indices=apply_indices,\n            enumerate_partitions=True,\n        )\n        result = self.__constructor__(new_modin_frame)\n\n        # that means that exception in `compute_groupby` was raised\n        # in every partition, so we also should raise it\n        if (\n            len(result.columns) == 0\n            and len(self.columns) != 0\n            and agg_kwargs.get(\"numeric_only\", False)\n        ):\n            raise TypeError(\"No numeric types to aggregate.\")\n\n        return result\n\n    # END Manual Partitioning methods\n\n    def pivot(self, index, columns, values):\n        from pandas.core.reshape.pivot import _convert_by\n\n        def __convert_by(by):\n            \"\"\"Convert passed value to a list.\"\"\"\n            if isinstance(by, pandas.Index):\n                by = list(by)\n            by = _convert_by(by)\n            if (\n                len(by) > 0\n                and (not is_list_like(by[0]) or isinstance(by[0], tuple))\n                and not all([key in self.columns for key in by])\n            ):\n                by = [by]\n            return by\n\n        index, columns, values = map(__convert_by, [index, columns, values])\n        is_custom_index = (\n            len(index) == 1\n            and is_list_like(index[0])\n            and not isinstance(index[0], tuple)\n        )\n\n        if is_custom_index or len(index) == 0:\n            to_reindex = columns\n        else:\n            to_reindex = index + columns\n\n        if len(values) != 0:\n            obj = self.getitem_column_array(to_reindex + values)\n        else:\n            obj = self\n\n        if is_custom_index:\n            obj.index = index\n\n        reindexed = self.__constructor__(\n            obj._modin_frame.apply_full_axis(\n                1,\n                lambda df: df.set_index(to_reindex, append=(len(to_reindex) == 1)),\n                new_columns=obj.columns.drop(to_reindex),\n            )\n        )\n\n        unstacked = reindexed.unstack(level=columns, fill_value=None)\n        if len(reindexed.columns) == 1 and unstacked.columns.nlevels > 1:\n            unstacked.columns = unstacked.columns.droplevel(0)\n\n        return unstacked\n\n    def pivot_table(\n        self,\n        index,\n        values,\n        columns,\n        aggfunc,\n        fill_value,\n        margins,\n        dropna,\n        margins_name,\n        observed,\n        sort,\n    ):\n        ErrorMessage.mismatch_with_pandas(\n            operation=\"pivot_table\",\n            message=\"Order of columns could be different from pandas\",\n        )\n\n        from pandas.core.reshape.pivot import _convert_by\n\n        def __convert_by(by):\n            \"\"\"Convert passed value to a list.\"\"\"\n            if isinstance(by, pandas.Index):\n                return list(by)\n            return _convert_by(by)\n\n        is_1d_values = values is not None and not is_list_like(values)\n        index, columns = map(__convert_by, [index, columns])\n\n        if len(index) + len(columns) == 0:\n            raise ValueError(\"No group keys passed!\")\n\n        if is_1d_values and len(index) > 0 and len(columns) > 0:\n            drop_column_level = 1 if isinstance(aggfunc, list) else 0\n        else:\n            drop_column_level = None\n\n        # if the value is 'None' it will be converted to an empty list (no columns to aggregate),\n        # which is invalid for 'values', as 'None' means aggregate ALL columns instead\n        if values is not None:\n            values = __convert_by(values)\n\n        # using 'pandas.unique' instead of 'numpy' as it guarantees to not change the original order\n        unique_keys = pandas.Series(index + columns).unique()\n\n        kwargs = {\n            \"qc\": self,\n            \"unique_keys\": unique_keys,\n            \"drop_column_level\": drop_column_level,\n            \"pivot_kwargs\": {\n                \"index\": index,\n                \"values\": values,\n                \"columns\": columns,\n                \"aggfunc\": aggfunc,\n                \"fill_value\": fill_value,\n                \"margins\": margins,\n                \"dropna\": dropna,\n                \"margins_name\": margins_name,\n                \"observed\": observed,\n                \"sort\": sort,\n            },\n        }\n\n        try:\n            return PivotTableImpl.map_reduce_impl(**kwargs)\n        except NotImplementedError as e:\n            message = (\n                f\"Can't use MapReduce 'pivot_table' implementation because of: {e}\"\n                + \"\\nFalling back to a range-partitioning implementation.\"\n            )\n            get_logger().info(message)\n\n        try:\n            return PivotTableImpl.range_partition_impl(**kwargs)\n        except NotImplementedError as e:\n            message = (\n                f\"Can't use range-partitioning 'pivot_table' implementation because of: {e}\"\n                + \"\\nFalling back to a full-axis implementation.\"\n            )\n            get_logger().info(message)\n\n        return PivotTableImpl.full_axis_impl(**kwargs)\n\n    # Get_dummies\n    def get_dummies(self, columns, **kwargs):\n        # `columns` as None does not mean all columns, by default it means only\n        # non-numeric columns.\n        if columns is None:\n            columns = [c for c in self.columns if not is_numeric_dtype(self.dtypes[c])]\n            # If we aren't computing any dummies, there is no need for any\n            # remote compute.\n            if len(columns) == 0:\n                return self.copy()\n        elif not is_list_like(columns):\n            columns = [columns]\n\n        def map_fn(df):  # pragma: no cover\n            cols_to_encode = df.columns.intersection(columns)\n            return pandas.get_dummies(df, columns=cols_to_encode, **kwargs)\n\n        # In some cases, we are mapping across all of the data. It is more\n        # efficient if we are mapping over all of the data to do it this way\n        # than it would be to reuse the code for specific columns.\n        if len(columns) == len(self.columns):\n            new_modin_frame = self._modin_frame.apply_full_axis(\n                0, map_fn, new_index=self.index, dtypes=bool\n            )\n            untouched_frame = None\n        else:\n            new_modin_frame = self._modin_frame.take_2d_labels_or_positional(\n                col_labels=columns\n            ).apply_full_axis(0, map_fn, new_index=self.index, dtypes=bool)\n            untouched_frame = self.drop(columns=columns)\n        # If we mapped over all the data we are done. If not, we need to\n        # prepend the `new_modin_frame` with the raw data from the columns that were\n        # not selected.\n        if len(columns) != len(self.columns):\n            new_modin_frame = untouched_frame._modin_frame.concat(\n                1, [new_modin_frame], how=\"left\", sort=False\n            )\n        return self.__constructor__(new_modin_frame)\n\n    # END Get_dummies\n\n    # Indexing\n    def take_2d_positional(self, index=None, columns=None):\n        return self.__constructor__(\n            self._modin_frame.take_2d_labels_or_positional(\n                row_positions=index, col_positions=columns\n            )\n        )\n\n    def write_items(\n        self, row_numeric_index, col_numeric_index, item, need_columns_reindex=True\n    ):\n        # We have to keep this import away from the module level to avoid circular import\n        from modin.pandas.utils import broadcast_item, is_scalar\n\n        def iloc_mut(partition, row_internal_indices, col_internal_indices, item):\n            \"\"\"\n            Write `value` in a specified location in a single partition.\n\n            Parameters\n            ----------\n            partition : pandas.DataFrame\n                Partition of the self frame.\n            row_internal_indices : list of ints\n                Positional indices of rows in this particular partition\n                to write `item` to.\n            col_internal_indices : list of ints\n                Positional indices of columns in this particular partition\n                to write `item` to.\n            item : 2D-array\n                Value to write.\n\n            Returns\n            -------\n            pandas.DataFrame\n                Partition data with updated values.\n            \"\"\"\n            partition = partition.copy()\n            try:\n                partition.iloc[row_internal_indices, col_internal_indices] = item\n            except ValueError:\n                # `copy` is needed to avoid \"ValueError: buffer source array is read-only\" for `item`\n                # because the item may be converted to the type that is in the dataframe.\n                # TODO: in the future we will need to convert to the correct type manually according\n                # to the following warning. Example: \"FutureWarning: Setting an item of incompatible\n                # dtype is deprecated and will raise in a future error of pandas. Value '[1.38629436]'\n                # has dtype incompatible with int64, please explicitly cast to a compatible dtype first.\"\n                partition.iloc[row_internal_indices, col_internal_indices] = item.copy()\n            return partition\n\n        if not is_scalar(item):\n            (\n                broadcasted_item,\n                broadcasted_dtypes,\n                row_numeric_index,\n                col_numeric_index,\n            ) = broadcast_item(\n                self,\n                row_numeric_index,\n                col_numeric_index,\n                item,\n                need_columns_reindex=need_columns_reindex,\n            )\n        else:\n            broadcasted_item, broadcasted_dtypes = item, pandas.Series(\n                [extract_dtype(item)] * len(col_numeric_index)\n            )\n\n        new_dtypes = None\n        if (\n            # compute dtypes only if assigning entire columns\n            isinstance(row_numeric_index, slice)\n            and row_numeric_index == slice(None)\n            and self.frame_has_materialized_dtypes\n        ):\n            new_dtypes = self.dtypes.copy()\n            new_dtypes.iloc[col_numeric_index] = broadcasted_dtypes.values\n\n        new_modin_frame = self._modin_frame.apply_select_indices(\n            axis=None,\n            func=iloc_mut,\n            row_labels=row_numeric_index,\n            col_labels=col_numeric_index,\n            new_index=self.index,\n            new_columns=self.columns,\n            new_dtypes=new_dtypes,\n            keep_remaining=True,\n            item_to_distribute=broadcasted_item,\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):\n        new_modin_frame = self._modin_frame.sort_by(\n            0, columns, ascending=ascending, **kwargs\n        )\n        return self.__constructor__(new_modin_frame)\n\n    def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):\n        if not is_list_like(rows):\n            rows = [rows]\n        ErrorMessage.default_to_pandas(\"sort_values\")\n        broadcast_value_list = [\n            self.getitem_row_array([row]).to_pandas() for row in rows\n        ]\n        index_builder = list(zip(broadcast_value_list, rows))\n        broadcast_values = pandas.concat(\n            [row for row, idx in index_builder], copy=False\n        )\n        broadcast_values.columns = self.columns\n        new_columns = broadcast_values.sort_values(\n            by=rows, axis=1, ascending=ascending, **kwargs\n        ).columns\n        return self.reindex(axis=1, labels=new_columns)\n\n    # Cat operations\n    def cat_codes(self):\n        def func(df: pandas.DataFrame) -> pandas.DataFrame:\n            ser = df.iloc[:, 0]\n            return ser.cat.codes.to_frame(name=MODIN_UNNAMED_SERIES_LABEL)\n\n        res = self._modin_frame.map(func=func, new_columns=[MODIN_UNNAMED_SERIES_LABEL])\n        return self.__constructor__(res, shape_hint=\"column\")\n\n    # END Cat operations\n\n    def compare(self, other, **kwargs):\n        return self.__constructor__(\n            self._modin_frame.broadcast_apply_full_axis(\n                0,\n                lambda left, right: pandas.DataFrame.compare(\n                    left, other=right, **kwargs\n                ),\n                other._modin_frame,\n            )\n        )\n\n    def case_when(self, caselist):\n        qc_type = type(self)\n        caselist = [\n            tuple(\n                data._modin_frame if isinstance(data, qc_type) else data\n                for data in case_tuple\n            )\n            for case_tuple in caselist\n        ]\n        return self.__constructor__(\n            self._modin_frame.case_when(caselist),\n            shape_hint=self._shape_hint,\n        )\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/query_compiler_caster.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains ``QueryCompilerCaster`` class.\n\n``QueryCompilerCaster`` is used for automatically casting query compiler\narguments to the type of the current query compiler for query compiler class functions.\nThis ensures compatibility between different query compiler classes.\n\"\"\"\n\nimport functools\nimport inspect\nimport random\nfrom abc import ABC, abstractmethod\nfrom collections import defaultdict, namedtuple\nfrom types import FunctionType, MappingProxyType, MethodType\nfrom typing import Any, Callable, Dict, Optional, Tuple, TypeVar, Union, ValuesView\n\nimport pandas\nfrom pandas.core.indexes.frozen import FrozenList\nfrom typing_extensions import Self\n\nfrom modin.config import AutoSwitchBackend, Backend, BackendMergeCastInPlace\nfrom modin.config import context as config_context\nfrom modin.core.storage_formats.base.query_compiler import (\n    BaseQueryCompiler,\n    QCCoercionCost,\n)\nfrom modin.core.storage_formats.base.query_compiler_calculator import (\n    BackendCostCalculator,\n    all_switchable_backends,\n)\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import disable_logging, get_logger\nfrom modin.logging.metrics import emit_metric\nfrom modin.utils import _inherit_docstrings, sentinel\n\nFn = TypeVar(\"Fn\", bound=Any)\n\n# Constant for the default class name when class_of_wrapped_fn is None\n# (represents functions in the modin.pandas module)\nMODIN_PANDAS_MODULE_NAME = \"modin.pandas\"\n\n\ndef _normalize_class_name(class_of_wrapped_fn: Optional[str]) -> str:\n    \"\"\"\n    Normalize class name for logging and operation tracking.\n\n    Parameters\n    ----------\n    class_of_wrapped_fn : Optional[str]\n        The name of the class that the function belongs to. `None` for functions\n        in the modin.pandas module.\n\n    Returns\n    -------\n    str\n        The normalized class name. Returns \"modin.pandas\" if input is None.\n    \"\"\"\n    return (\n        class_of_wrapped_fn\n        if class_of_wrapped_fn is not None\n        else MODIN_PANDAS_MODULE_NAME\n    )\n\n\n# This type describes a defaultdict that maps backend name (or `None` for\n# method implementation and not bound to any one extension) to the dictionary of\n# extensions for that backend. The keys of the inner dictionary are the names of\n# the extensions, and the values are the extensions themselves.\nEXTENSION_DICT_TYPE = defaultdict[Optional[str], dict[str, Any]]\n\n\n_NON_EXTENDABLE_ATTRIBUTES = {\n    # we use these attributes to implement casting and backend dispatching, so\n    # we can't allow extensions to override them.\n    \"__getattribute__\",\n    \"__setattr__\",\n    \"__delattr__\",\n    \"__getattr__\",\n    \"_getattribute__from_extension_impl\",\n    \"_getattr__from_extension_impl\",\n    \"get_backend\",\n    \"move_to\",\n    \"set_backend\",\n    \"_get_extension\",\n    \"_query_compiler\",\n    \"_get_query_compiler\",\n    \"_copy_into\",\n    \"_update_inplace\",\n    \"is_backend_pinned\",\n    \"_set_backend_pinned\",\n    \"pin_backend\",\n    \"unpin_backend\",\n    \"__dict__\",\n}\n\n\n# Do not look up these attributes when searching for extensions. We use them\n# to implement the extension lookup itself.\nEXTENSION_NO_LOOKUP = {\n    \"_get_extension\",\n    \"_query_compiler\",\n    \"get_backend\",\n    \"_getattribute__from_extension_impl\",\n    \"_getattr__from_extension_impl\",\n    \"_get_query_compiler\",\n    \"set_backend\",\n    \"_pinned\",\n    \"is_backend_pinned\",\n    \"_set_backend_pinned\",\n    \"pin_backend\",\n    \"unpin_backend\",\n    \"_update_inplace\",\n}\n\n\nBackendAndClassName = namedtuple(\"BackendAndClassName\", [\"backend\", \"class_name\"])\n\n_AUTO_SWITCH_CLASS = defaultdict[BackendAndClassName, set[str]]\n\n_CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS: _AUTO_SWITCH_CLASS = _AUTO_SWITCH_CLASS(\n    set\n)\n\n_CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS: _AUTO_SWITCH_CLASS = _AUTO_SWITCH_CLASS(\n    set\n)\n\n\ndef _get_empty_qc_for_default_backend() -> BaseQueryCompiler:\n    \"\"\"\n    Get an empty query compiler for the default backend.\n\n    Returns\n    -------\n    BaseQueryCompiler\n        An empty query compiler for the default backend.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return FactoryDispatcher.get_factory().io_cls.from_pandas(pandas.DataFrame())\n\n\n_BACKEND_TO_EMPTY_QC: defaultdict[str, BaseQueryCompiler] = defaultdict(\n    _get_empty_qc_for_default_backend\n)\n\n\nclass QueryCompilerCaster(ABC):\n    \"\"\"Cast all query compiler arguments of the member function to current query compiler.\"\"\"\n\n    @classmethod\n    def __init_subclass__(\n        cls,\n        **kwargs: Dict,\n    ) -> None:\n        \"\"\"\n        Apply type casting to all children of ``QueryCompilerCaster``.\n\n        This method is called automatically when a class inherits from\n        ``QueryCompilerCaster``. It ensures that all member functions within the\n        subclass have their arguments automatically casted to the current query\n        compiler type.\n\n        Parameters\n        ----------\n        **kwargs : Additional keyword arguments\n        \"\"\"\n        super().__init_subclass__(**kwargs)\n        apply_argument_cast_to_class(cls)\n\n    @abstractmethod\n    def _get_query_compiler(self) -> Optional[BaseQueryCompiler]:\n        \"\"\"\n        Get the query compiler storing data for this object.\n\n        Returns\n        -------\n        Optional[BaseQueryCompiler]\n            The query compiler storing data for this object, if it exists.\n            Otherwise, None.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def is_backend_pinned(self) -> bool:\n        \"\"\"\n        Get whether this object's data is pinned to a particular backend.\n\n        Returns\n        -------\n        bool\n            True if the data is pinned.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _set_backend_pinned(self, pinned: bool, inplace: bool) -> Optional[Self]:\n        \"\"\"\n        Update whether this object's data is pinned to a particular backend.\n\n        Parameters\n        ----------\n        pinned : bool\n            Whether the data is pinned.\n\n        inplace : bool, default: False\n            Whether to update the object in place.\n\n        Returns\n        -------\n        Optional[Self]\n            The object with the new pin state, if `inplace` is False. Otherwise, None.\n        \"\"\"\n        pass\n\n    def pin_backend(self, inplace: bool = False) -> Optional[Self]:\n        \"\"\"\n        Pin the object's underlying data, preventing Modin from automatically moving it to another backend.\n\n        Parameters\n        ----------\n        inplace : bool, default: False\n            Whether to update the object in place.\n\n        Returns\n        -------\n        Optional[Self]\n            The newly-pinned object, if `inplace` is False. Otherwise, None.\n        \"\"\"\n        return self._set_backend_pinned(True, inplace)\n\n    def unpin_backend(self, inplace: bool = False) -> Optional[Self]:\n        \"\"\"\n        Unpin the object's underlying data, allowing Modin to automatically move it to another backend.\n\n        Parameters\n        ----------\n        inplace : bool, default: False\n            Whether to update the object in place.\n\n        Returns\n        -------\n        Optional[Self]\n            The newly-unpinned object, if `inplace` is False. Otherwise, None.\n        \"\"\"\n        return self._set_backend_pinned(False, inplace)\n\n    @abstractmethod\n    def get_backend(self) -> str:\n        \"\"\"\n        Get the backend of this object.\n\n        Returns\n        -------\n        str\n            The backend of this object. The backend name must be title-cased.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def set_backend(\n        self,\n        backend: str,\n        inplace: bool = False,\n        *,\n        switch_operation: Optional[str] = None,\n    ) -> Optional[Self]:\n        \"\"\"\n        Set the backend of this object.\n\n        Parameters\n        ----------\n        backend : str\n            The new backend.\n\n        inplace : bool, default: False\n            Whether to update the object in place.\n\n        switch_operation : Optional[str], default: None\n            The name of the operation that triggered the set_backend call.\n            Internal argument used for displaying progress bar information.\n\n        Returns\n        -------\n        Optional[Self]\n            The object with the new backend, if `inplace` is False. Otherwise, None.\n        \"\"\"\n        pass\n\n    @_inherit_docstrings(set_backend)\n    def move_to(\n        self,\n        backend: str,\n        inplace: bool = False,\n        *,\n        switch_operation: Optional[str] = None,\n    ) -> Optional[Self]:\n        return self.set_backend(\n            backend=backend, inplace=inplace, switch_operation=switch_operation\n        )\n\n    @abstractmethod\n    def _copy_into(self, other: Self) -> None:\n        \"\"\"\n        Copy the data from this object into another object of the same type.\n\n        Parameters\n        ----------\n        other : Self\n            The object to copy data into.\n        \"\"\"\n        pass\n\n    @disable_logging\n    def _get_extension(self, name: str, extensions: EXTENSION_DICT_TYPE) -> Any:\n        \"\"\"\n        Get an extension with the given name from the given set of extensions.\n\n        Parameters\n        ----------\n        name : str\n            The name of the extension.\n        extensions : EXTENSION_DICT_TYPE\n            The set of extensions.\n\n        Returns\n        -------\n        Any\n            The extension with the given name, or `sentinel` if the extension is not found.\n        \"\"\"\n        if self._get_query_compiler() is not None:\n            extensions_for_backend = extensions[self.get_backend()]\n            if name in extensions_for_backend:\n                return extensions_for_backend[name]\n            if name in extensions[None]:\n                return extensions[None][name]\n        return sentinel\n\n    @disable_logging\n    def _getattribute__from_extension_impl(\n        self, item: str, extensions: EXTENSION_DICT_TYPE\n    ):\n        \"\"\"\n        __getatttribute__() an extension with the given name from the given set of extensions.\n\n        Implement __getattribute__() for extensions. Python calls\n        __getattribute_() every time you access an attribute of an object.\n\n        Parameters\n        ----------\n        item : str\n            The name of the attribute to get.\n        extensions : EXTENSION_DICT_TYPE\n            The set of extensions.\n\n        Returns\n        -------\n        Any\n            The attribute from the extension, or `sentinel` if the attribute is\n            not found.\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(item, extensions)\n        if (\n            extension is not sentinel\n            # We should implement callable extensions by wrapping them in\n            # methods that dispatch to the corrrect backend. We should get the\n            # wrapped method with the usual object.__getattribute__() method\n            # lookup rather than by getting a particular extension when we call\n            # __getattribute__(). For example, if we've extended sort_values(),\n            # then __getattribute__('sort_values') should return a wrapper that\n            # calls the correct extension once it's invoked.\n            and not callable(extension)\n        ):\n            return (\n                extension.__get__(self) if hasattr(extension, \"__get__\") else extension\n            )\n        return sentinel\n\n    @disable_logging\n    def _getattr__from_extension_impl(\n        self,\n        key: str,\n        default_behavior_attributes: set[str],\n        extensions: EXTENSION_DICT_TYPE,\n    ) -> Any:\n        \"\"\"\n        Implement __getattr__, which the python interpreter falls back to if __getattribute__ raises AttributeError.\n\n        We override this method to make sure we try to get the extension\n        attribute for `key`, even if this class has a different\n        attribute for `key`.\n\n        Parameters\n        ----------\n        key : str\n            Attribute name.\n        default_behavior_attributes : set[str]\n            The set of attributes for which we should follow the default\n            __getattr__ behavior and not try to get the extension.\n        extensions : EXTENSION_DICT_TYPE\n            The set of extensions.\n\n        Returns\n        -------\n        The value of the attribute.\n        \"\"\"\n        if key not in default_behavior_attributes:\n            # If this class has a an extension for `key`, but __getattribute__()\n            # for the extension raises an AttributeError, we end up in this\n            # method, which should try getting the extension again (and\n            # probably raise the AttributeError that\n            # _getattribute__from_extension_impl() originally raised), rather\n            # than following back to object.__getattribute__().\n            extensions_result = self._getattribute__from_extension_impl(key, extensions)\n            # If extensions_result is not `sentinel`, __getattribute__() should have\n            # returned it first.\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=extensions_result is not sentinel,\n                extra_log=(\n                    \"This object should return extensions via \"\n                    + \"__getattribute__ rather than __getattr__\"\n                ),\n            )\n        return object.__getattribute__(self, key)\n\n\ndef visit_nested_args(arguments, fn: callable):\n    \"\"\"\n    Visit each argument recursively, calling fn on each one.\n\n    Parameters\n    ----------\n    arguments : tuple or dict\n    fn : Callable to apply to matching arguments\n\n    Returns\n    -------\n    tuple or dict\n        Returns args and kwargs with all query compilers casted to current_qc.\n    \"\"\"\n    if isinstance(arguments, pandas.NamedAgg):\n        # NamedAgg needs special treatment because it's an immutable subclass\n        # of tuple that can't be constructed from another tuple.\n        return pandas.NamedAgg(\n            column=fn(arguments.column), aggfunc=fn(arguments.aggfunc)\n        )\n    immutable_types = (FrozenList, tuple, ValuesView)\n    if isinstance(arguments, immutable_types):\n        args_type = type(arguments)\n        return (\n            # ValuesView, which we might get from dict.values(), is immutable,\n            # but not constructable, so we convert it to a tuple. Otherwise,\n            # we return an object of the same type as the input.\n            tuple\n            if issubclass(args_type, ValuesView)\n            else args_type\n        )(visit_nested_args(list(arguments), fn))\n    types_to_recursively_visit = (list, dict, *immutable_types)\n    if isinstance(\n        arguments,\n        list,\n    ):\n        for i in range(len(arguments)):\n            if isinstance(arguments[i], types_to_recursively_visit):\n                visit_nested_args(arguments[i], fn)\n            else:\n                arguments[i] = fn(arguments[i])\n    elif isinstance(arguments, dict):\n        for key in arguments:\n            if isinstance(arguments[key], types_to_recursively_visit):\n                visit_nested_args(arguments[key], fn)\n            else:\n                arguments[key] = fn(arguments[key])\n    return arguments\n\n\ndef _assert_casting_functions_wrap_same_implementation(\n    m1: callable, m2: callable\n) -> None:\n    \"\"\"\n    Assert that two casting wrappers wrap the same implementation.\n\n    Parameters\n    ----------\n    m1 : callable\n        The first casting wrapper.\n    m2 : callable\n        The second casting wrapper.\n\n    Raises\n    ------\n    AssertionError\n        If the two casting wrappers wrap different implementations.\n    \"\"\"\n    assert (\n        # For cases like (m1=Series.agg, m2=Series.aggregate), where Series\n        # defines its own method and aliases it, the two wrapped methods\n        # are the same.\n        m2._wrapped_method_for_casting is m1._wrapped_method_for_casting\n        # For cases like (m1=Series.kurt, m2=Series.kurtosis), where Series\n        # inherits both kurt and kurtosis from BasePandasDataset but does\n        # not define its own implementation of either,\n        # Series.kurt._wrapped_method_for_casting points to\n        # BasePandasDataset.kurt, which is not the same as\n        # BasePandasDataset.kurtosis. In that case, we need to go one level\n        # deeper to compare the wrapped methods of the two aliases of\n        # BasePandasDataset.\n        or m2._wrapped_method_for_casting._wrapped_method_for_casting\n        is m1._wrapped_method_for_casting._wrapped_method_for_casting\n    )\n\n\ndef apply_argument_cast_to_class(klass: type) -> type:\n    \"\"\"\n    Apply argument casting to all functions in a class.\n\n    Parameters\n    ----------\n    klass : type\n        The class to apply argument casting to.\n\n    Returns\n    -------\n    type\n        The class with argument casting applied to all functions.\n    \"\"\"\n    all_attrs = dict(inspect.getmembers(klass))\n    # This is required because inspect converts class methods to member functions\n    current_class_attrs = vars(klass)\n    for key in current_class_attrs:\n        all_attrs[key] = current_class_attrs[key]\n\n    for attr_name, attr_value in all_attrs.items():\n        if attr_name in _NON_EXTENDABLE_ATTRIBUTES or not isinstance(\n            attr_value, (FunctionType, classmethod, staticmethod)\n        ):\n            continue\n\n        implementation_function = (\n            attr_value.__func__\n            if isinstance(attr_value, (classmethod, staticmethod))\n            else attr_value\n        )\n        if attr_name not in klass._extensions[None]:\n            # Register the original implementation as the default\n            # extension. We fall back to this implementation if the\n            # object's backend does not have an implementation for this\n            # method.\n            klass._extensions[None][attr_name] = implementation_function\n\n        casting_implementation = wrap_function_in_argument_caster(\n            klass=klass,\n            f=implementation_function,\n            wrapping_function_type=(\n                classmethod\n                if isinstance(attr_value, classmethod)\n                else (\n                    staticmethod if isinstance(attr_value, staticmethod) else MethodType\n                )\n            ),\n            extensions=klass._extensions,\n            name=attr_name,\n        )\n        wrapped = (\n            classmethod(casting_implementation)\n            if isinstance(attr_value, classmethod)\n            else (\n                staticmethod(casting_implementation)\n                if isinstance(attr_value, staticmethod)\n                else casting_implementation\n            )\n        )\n        if attr_name not in klass.__dict__:\n            # If this class's method comes from a superclass (i.e.\n            # it's not in klass.__dict__), mark it so that\n            # modin.utils._inherit_docstrings knows that the method\n            # must get its docstrings from its superclass.\n            wrapped._wrapped_superclass_method = attr_value\n        setattr(klass, attr_name, wrapped)\n\n    return klass\n\n\ndef _maybe_switch_backend_pre_op(\n    function_name: str,\n    input_qc: BaseQueryCompiler,\n    class_of_wrapped_fn: Optional[str],\n    arguments: MappingProxyType[str, Any],\n) -> tuple[str, Callable[[Any], Any]]:\n    \"\"\"\n    Possibly switch backend before a function.\n\n    Parameters\n    ----------\n    function_name : str\n        The name of the function.\n    input_qc : BaseQueryCompiler\n        The input query compiler.\n    class_of_wrapped_fn : Optional[str]\n        The name of the class that the function belongs to. `None` for functions\n        in the modin.pandas module.\n    arguments : MappingProxyType[str, Any]\n        Mapping from operation argument names to their values.\n\n    Returns\n    -------\n    Tuple[str, callable]\n        A tuple of the new backend and a function that casts all castable arguments\n        to the new query compiler type.\n    \"\"\"\n    input_backend = input_qc.get_backend()\n    if (\n        function_name\n        in _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS[\n            BackendAndClassName(\n                backend=input_qc.get_backend(), class_name=class_of_wrapped_fn\n            )\n        ]\n    ):\n        result_backend = _get_backend_for_auto_switch(\n            input_qc=input_qc,\n            class_of_wrapped_fn=class_of_wrapped_fn,\n            function_name=function_name,\n            arguments=arguments,\n        )\n    else:\n        result_backend = input_backend\n\n    def cast_to_qc(arg: Any) -> Any:\n        if not (\n            isinstance(arg, QueryCompilerCaster)\n            and arg._get_query_compiler() is not None\n            and arg.get_backend() != result_backend\n        ):\n            return arg\n        arg.set_backend(\n            result_backend,\n            inplace=True,\n            switch_operation=f\"{_normalize_class_name(class_of_wrapped_fn)}.{function_name}\",\n        )\n        return arg\n\n    return result_backend, cast_to_qc\n\n\ndef _maybe_switch_backend_post_op(\n    result: Any,\n    function_name: str,\n    qc_list: list[BaseQueryCompiler],\n    starting_backend: str,\n    class_of_wrapped_fn: Optional[str],\n    pin_backend: bool,\n    arguments: MappingProxyType[str, Any],\n) -> Any:\n    \"\"\"\n    Possibly switch the backend of the result of a function.\n\n    Use cost-based optimization to determine whether to switch the backend of the\n    result of a function. If the function returned a QueryCompilerCaster and the\n    cost of switching is less than the cost of staying on the current backend,\n    we switch. If there are multiple backends we can switch to, we choose the\n    one that minimizes cost_to_move - cost_to_stay.\n\n    Parameters\n    ----------\n    result : Any\n        The result of the function.\n    function_name : str\n        The name of the function.\n    qc_list : list[BaseQueryCompiler]\n        The list of query compilers that were arguments to the function.\n    starting_backend : str\n        The backend used to run the function.\n    class_of_wrapped_fn : Optional[str]\n        The name of the class that the function belongs to. `None` for functions\n        in the modin.pandas module.\n    pin_backend : bool\n        Whether the result should have its backend pinned, and therefore not moved.\n    arguments : MappingProxyType[str, Any]\n        Mapping from operation argument names to their values.\n\n    Returns\n    -------\n    Any\n        The result of the function, possibly with its backend switched.\n    \"\"\"\n    # If any input QC was pinned, then the output should be as well.\n    if pin_backend:\n        if isinstance(result, QueryCompilerCaster):\n            result.pin_backend(inplace=True)\n        return result\n    if (\n        # only apply post-operation switch to nullary and unary methods\n        len(qc_list) in (0, 1)\n        and function_name\n        in _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS[\n            BackendAndClassName(\n                backend=(\n                    qc_list[0].get_backend() if len(qc_list) == 1 else starting_backend\n                ),\n                class_name=class_of_wrapped_fn,\n            )\n        ]\n        # if the operation did not return a query compiler, we can't switch the\n        # backend of the result.\n        and isinstance(result, QueryCompilerCaster)\n        and (input_qc := result._get_query_compiler()) is not None\n    ):\n        return result.move_to(\n            _get_backend_for_auto_switch(\n                input_qc=input_qc,\n                class_of_wrapped_fn=class_of_wrapped_fn,\n                function_name=function_name,\n                arguments=arguments,\n            ),\n            switch_operation=f\"{_normalize_class_name(class_of_wrapped_fn)}.{function_name}\",\n        )\n    return result\n\n\ndef _get_backend_for_auto_switch(\n    input_qc: BaseQueryCompiler,\n    class_of_wrapped_fn: str,\n    function_name: str,\n    arguments: MappingProxyType[str, Any],\n) -> str:\n    \"\"\"\n    Get the best backend to switch to.\n\n    Use cost-based optimization to determine whether to switch the backend of the\n    arguments to a function. If the cost of switching is less than the cost of\n    staying on the current backend, we switch. If there are multiple backends we\n    can switch to, we choose the one that minimizes cost_to_move - cost_to_stay.\n\n    Parameters\n    ----------\n    input_qc : BaseQueryCompiler\n        The query compiler representing the starting backend.\n    class_of_wrapped_fn : Optional[str]\n        The name of the class that the function belongs to. `None` for functions\n        in the modin.pandas module.\n    function_name : str\n        The name of the function.\n    arguments : MappingProxyType[str, Any]\n        Mapping from operation argument names to their values.\n\n    Returns\n    -------\n    str\n        The name of the best backend to switch to.\n    \"\"\"\n    # TODO(https://github.com/modin-project/modin/issues/7503): Make costing\n    # methods take backend instead of query compiler type so that we don't\n    # have to use the dispatcher to figure out the appropriate type for each\n    # backend.\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    # Does not need to be secure, should not use system entropy\n    metrics_group = \"%04x\" % random.randrange(16**4)\n    starting_backend = input_qc.get_backend()\n\n    min_move_stay_delta = None\n    best_backend = starting_backend\n\n    stay_cost = input_qc.stay_cost(\n        api_cls_name=class_of_wrapped_fn,\n        operation=function_name,\n        arguments=arguments,\n    )\n    data_max_shape = input_qc._max_shape()\n    emit_metric(\n        f\"hybrid.auto.api.{class_of_wrapped_fn}.{function_name}.group.{metrics_group}\",\n        1,\n    )\n    emit_metric(\n        f\"hybrid.auto.current.{starting_backend}.group.{metrics_group}.stay_cost\",\n        stay_cost,\n    )\n    emit_metric(\n        f\"hybrid.auto.current.{starting_backend}.group.{metrics_group}.rows\",\n        data_max_shape[0],\n    )\n    emit_metric(\n        f\"hybrid.auto.current.{starting_backend}.group.{metrics_group}.cols\",\n        data_max_shape[1],\n    )\n    for backend in all_switchable_backends():\n        if backend == starting_backend:\n            continue\n        move_to_class = FactoryDispatcher._get_prepared_factory_for_backend(\n            backend=backend\n        ).io_cls.query_compiler_cls\n        move_to_cost = input_qc.move_to_cost(\n            move_to_class,\n            api_cls_name=class_of_wrapped_fn,\n            operation=function_name,\n            arguments=arguments,\n        )\n        other_execute_cost = move_to_class.move_to_me_cost(\n            input_qc,\n            api_cls_name=class_of_wrapped_fn,\n            operation=function_name,\n            arguments=arguments,\n        )\n        if (\n            move_to_cost is not None\n            and stay_cost is not None\n            and other_execute_cost is not None\n        ):\n            if stay_cost >= QCCoercionCost.COST_IMPOSSIBLE:\n                # We cannot execute the workload on the current engine\n                # disregard the move_to_cost and just consider whether\n                # the other engine can execute the workload\n                move_stay_delta = other_execute_cost - stay_cost\n            else:\n                # We can execute this workload if we need to, consider\n                # move_to_cost/transfer time in our decision\n                move_stay_delta = (move_to_cost + other_execute_cost) - stay_cost\n            if move_stay_delta < 0 and (\n                min_move_stay_delta is None or move_stay_delta < min_move_stay_delta\n            ):\n                min_move_stay_delta = move_stay_delta\n                best_backend = backend\n            emit_metric(\n                f\"hybrid.auto.candidate.{backend}.group.{metrics_group}.move_to_cost\",\n                move_to_cost,\n            )\n            emit_metric(\n                f\"hybrid.auto.candidate.{backend}.group.{metrics_group}.other_execute_cost\",\n                other_execute_cost,\n            )\n            emit_metric(\n                f\"hybrid.auto.candidate.{backend}.group.{metrics_group}.delta\",\n                move_stay_delta,\n            )\n\n            get_logger().info(\n                f\"After {_normalize_class_name(class_of_wrapped_fn)} function {function_name}, \"\n                + f\"considered moving to backend {backend} with \"\n                + f\"(transfer_cost {move_to_cost} + other_execution_cost {other_execute_cost}) \"\n                + f\", stay_cost {stay_cost}, and move-stay delta \"\n                + f\"{move_stay_delta}\"\n            )\n\n    if best_backend == starting_backend:\n        emit_metric(f\"hybrid.auto.decision.{best_backend}.group.{metrics_group}\", 0)\n        get_logger().info(\n            f\"Chose not to switch backends after operation {function_name}\"\n        )\n    else:\n        emit_metric(f\"hybrid.auto.decision.{best_backend}.group.{metrics_group}\", 1)\n        get_logger().info(f\"Chose to move to backend {best_backend}\")\n    return best_backend\n\n\ndef _get_extension_for_method(\n    name: str,\n    extensions: EXTENSION_DICT_TYPE,\n    backend: str,\n    args: tuple,\n    wrapping_function_type: Optional[\n        Union[type[classmethod], type[staticmethod], type[MethodType]]\n    ],\n) -> callable:\n    \"\"\"\n    Get the extension implementation for a method.\n\n    Parameters\n    ----------\n    name : str\n        The name of the method.\n    extensions : EXTENSION_DICT_TYPE\n        The extension dictionary for the modin-API-level object (e.g. class\n        DataFrame or module modin.pandas) that the method belongs to.\n    backend : str\n        The backend to use for this method call.\n    args : tuple\n        The arguments to the method.\n    wrapping_function_type : Union[type[classmethod], type[staticmethod], type[MethodType]]\n        The type of the original function that `f` implements.\n        - `None` means we are wrapping a free function, e.g. pd.concat()\n        - `classmethod` means we are wrapping a classmethod.\n        - `staticmethod` means we are wrapping a staticmethod.\n        - `MethodType` means we are wrapping a regular method of a class.\n\n    Returns\n    -------\n    callable\n        The implementation of the method for the given backend.\n    \"\"\"\n    if name in extensions[backend]:\n        f_to_apply = extensions[backend][name]\n    else:\n        if name not in extensions[None]:\n            raise AttributeError(\n                (\n                    # When python invokes a method on an object, it passes the object as\n                    # the first positional argument.\n                    (\n                        f\"{(type(args[0]).__name__)} object\"\n                        if wrapping_function_type is MethodType\n                        else \"module 'modin.pandas'\"\n                    )\n                    + f\" has no attribute {name}\"\n                )\n            )\n        f_to_apply = extensions[None][name]\n    return f_to_apply\n\n\ndef wrap_function_in_argument_caster(\n    klass: Optional[type],\n    f: callable,\n    name: str,\n    wrapping_function_type: Optional[\n        Union[type[classmethod], type[staticmethod], type[MethodType]]\n    ],\n    extensions: EXTENSION_DICT_TYPE,\n) -> callable:\n    \"\"\"\n    Wrap a function so that it casts all castable arguments to a consistent query compiler, and uses the correct extension implementation for methods.\n\n    Also propagates pin behavior across operations.\n\n    Parameters\n    ----------\n    klass : Optional[type]\n        Class of the function being wrapped.\n    f : callable\n        The function to wrap.\n    name : str\n        The name of the function.\n    wrapping_function_type : Optional[Union[type[classmethod], type[staticmethod], type[MethodType]]\n        The type of the original function that `f` implements.\n        - `None` means we are wrapping a free function, e.g. pd.concat()\n        - `classmethod` means we are wrapping a classmethod.\n        - `staticmethod` means we are wrapping a staticmethod.\n        - `MethodType` means we are wrapping a regular method of a class.\n    extensions : EXTENSION_DICT_TYPE\n        The class of the function we are wrapping. This should be None if\n        and only if `wrapping_function_type` is None.\n\n    Returns\n    -------\n    callable\n        The wrapped function.\n    \"\"\"\n\n    @functools.wraps(f)\n    def f_with_argument_casting(*args: Tuple, **kwargs: Dict) -> Any:\n        \"\"\"\n        Add casting for query compiler arguments.\n\n        Parameters\n        ----------\n        *args : tuple\n            The function arguments.\n        **kwargs : dict\n            The function keyword arguments.\n\n        Returns\n        -------\n        Any\n        \"\"\"\n        if wrapping_function_type in (classmethod, staticmethod):\n            # TODO: currently we don't support any kind of casting or extension\n            # for classmethod or staticmethod.\n            return f(*args, **kwargs)\n\n        # f() may make in-place updates to some of its arguments. If we cast\n        # an argument and then f() updates it in place, the updates will not\n        # be reflected in the original object. As a fix, we keep track of all\n        # the in-place updates that f() makes, and once f() is finished, we\n        # copy the updates back into the original objects. The query compiler\n        # interface is mostly immutable (the only exceptions being the mutable\n        # index and column properties), so to check for an in-place update, we\n        # check whether an input's query compiler has changed its identity.\n        InplaceUpdateTracker = namedtuple(\n            \"InplaceUpdateTracker\",\n            [\"input_castable\", \"original_query_compiler\", \"new_castable\"],\n        )\n        inplace_update_trackers: list[InplaceUpdateTracker] = []\n        # The function name and class name of the function are passed to the calculator as strings\n        class_of_wrapped_fn = klass.__name__ if klass is not None else None\n\n        input_query_compilers: list[BaseQueryCompiler] = []\n\n        pin_target_backend = None\n\n        input_backends: set[str] = set()\n\n        def register_query_compilers(arg):\n            nonlocal pin_target_backend\n            if (\n                isinstance(arg, QueryCompilerCaster)\n                and (qc := arg._get_query_compiler()) is not None\n            ):\n                arg_backend = arg.get_backend()\n                input_backends.add(arg_backend)\n                if pin_target_backend is not None:\n                    if arg.is_backend_pinned() and arg_backend != pin_target_backend:\n                        raise ValueError(\n                            f\"Cannot combine arguments that are pinned to conflicting backends ({pin_target_backend}, {arg_backend})\"\n                        )\n                elif arg.is_backend_pinned():\n                    pin_target_backend = arg_backend\n                input_query_compilers.append(qc)\n            elif isinstance(arg, BaseQueryCompiler):\n                # We might get query compiler arguments in __init__()\n                input_query_compilers.append(arg)\n            return arg\n\n        visit_nested_args(args, register_query_compilers)\n        visit_nested_args(kwargs, register_query_compilers)\n\n        # Before determining any automatic switches, we perform the following checks:\n        # 1. If the global AutoSwitchBackend configuration variable is set to False, do not switch.\n        # 2. If there's only one query compiler and it's pinned, do not switch.\n        # 3. If there are multiple query compilers, and at least one is pinned to a particular\n        #    backend, then switch to that backend.\n        # 4. If there are multiple query compilers, at least two of which are pinned to distinct\n        #    backends, raise a ValueError.\n\n        if len(input_query_compilers) == 0:\n            input_backend = Backend.get()\n            # For nullary functions, we need to create a dummy query compiler\n            # to calculate the cost of switching backends. We should only\n            # create the dummy query compiler once per backend.\n            input_qc_for_pre_op_switch = _BACKEND_TO_EMPTY_QC[input_backend]\n        else:\n            input_qc_for_pre_op_switch = input_query_compilers[0]\n            input_backend = input_qc_for_pre_op_switch.get_backend()\n\n        # Skip the casting code if there are < 2 input backends and either\n        # auto-switching is disabled or the inputs are pinned to the input\n        # backend.\n        if len(input_backends) < 2 and (\n            not AutoSwitchBackend.get() or pin_target_backend is not None\n        ):\n            f_to_apply = _get_extension_for_method(\n                name=name,\n                extensions=extensions,\n                backend=(\n                    pin_target_backend\n                    if pin_target_backend is not None\n                    else input_backend\n                ),\n                args=args,\n                wrapping_function_type=wrapping_function_type,\n            )\n            result = f_to_apply(*args, **kwargs)\n            if (\n                isinstance(result, QueryCompilerCaster)\n                and pin_target_backend is not None\n            ):\n                result._set_backend_pinned(True, inplace=True)\n            return result\n\n        # Bind the arguments using the function implementation for the input\n        # backend. TODO(https://github.com/modin-project/modin/issues/7525):\n        # Ideally every implementation would have the same signature.\n        bound_arguments = inspect.signature(\n            _get_extension_for_method(\n                name=name,\n                extensions=extensions,\n                backend=input_backend,\n                args=args,\n                wrapping_function_type=wrapping_function_type,\n            ),\n        ).bind(*args, **kwargs)\n        bound_arguments.apply_defaults()\n        args_dict = MappingProxyType(bound_arguments.arguments)\n\n        if len(input_query_compilers) < 2:\n            # No need to check should_pin_result() again, since we have already done so above.\n            result_backend, cast_to_qc = _maybe_switch_backend_pre_op(\n                name,\n                input_qc=input_qc_for_pre_op_switch,\n                class_of_wrapped_fn=class_of_wrapped_fn,\n                arguments=args_dict,\n            )\n        else:\n            preop_switch = (\n                name\n                in _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS[\n                    BackendAndClassName(\n                        backend=input_backend,\n                        class_name=class_of_wrapped_fn,\n                    )\n                ]\n            )\n            calculator: BackendCostCalculator = BackendCostCalculator(\n                operation_arguments=args_dict,\n                api_cls_name=class_of_wrapped_fn,\n                operation=name,\n                query_compilers=input_query_compilers,\n                preop_switch=preop_switch,\n            )\n\n            if pin_target_backend is None:\n                result_backend = calculator.calculate()\n            else:\n                result_backend = pin_target_backend\n\n            def cast_to_qc(arg):\n                if not (\n                    isinstance(arg, QueryCompilerCaster)\n                    and arg._get_query_compiler() is not None\n                    and arg.get_backend() != result_backend\n                ):\n                    return arg\n                if BackendMergeCastInPlace.get():\n                    arg.set_backend(\n                        result_backend,\n                        switch_operation=f\"{_normalize_class_name(class_of_wrapped_fn)}.{name}\",\n                        inplace=True,\n                    )\n                    assert arg.get_backend() == result_backend\n                    cast = arg\n                else:\n                    cast = arg.set_backend(\n                        result_backend,\n                        switch_operation=f\"{_normalize_class_name(class_of_wrapped_fn)}.{name}\",\n                        inplace=False,\n                    )\n                inplace_update_trackers.append(\n                    InplaceUpdateTracker(\n                        input_castable=arg,\n                        original_query_compiler=cast._get_query_compiler(),\n                        new_castable=cast,\n                    )\n                )\n                return cast\n\n        args = visit_nested_args(args, cast_to_qc)\n        kwargs = visit_nested_args(kwargs, cast_to_qc)\n\n        # `result_backend` may be different from `input_backend`, so we have to\n        # look up the correct implementation based on `result_backend`.\n        f_to_apply = _get_extension_for_method(\n            name=name,\n            extensions=extensions,\n            backend=result_backend,\n            args=args,\n            wrapping_function_type=wrapping_function_type,\n        )\n\n        # We have to set the global Backend correctly for I/O methods like\n        # read_json() to use the correct backend.\n        with config_context(Backend=result_backend):\n            result = f_to_apply(*args, **kwargs)\n        for (\n            original_castable,\n            original_qc,\n            new_castable,\n        ) in inplace_update_trackers:\n            new_qc = new_castable._get_query_compiler()\n            if BackendMergeCastInPlace.get() or original_qc is not new_qc:\n                new_castable._copy_into(original_castable)\n\n        return _maybe_switch_backend_post_op(\n            result,\n            function_name=name,\n            qc_list=input_query_compilers,\n            starting_backend=result_backend,\n            class_of_wrapped_fn=class_of_wrapped_fn,\n            pin_backend=pin_target_backend is not None,\n            arguments=args_dict,\n        )\n\n    f_with_argument_casting._wrapped_method_for_casting = f\n    return f_with_argument_casting\n\n\n_GENERAL_EXTENSIONS: EXTENSION_DICT_TYPE = defaultdict(dict)\n\n\ndef wrap_free_function_in_argument_caster(name: str) -> callable:\n    \"\"\"\n    Get a wrapper for a free function that casts all castable arguments to a consistent query compiler.\n\n    Parameters\n    ----------\n    name : str\n        The name of the function.\n\n    Returns\n    -------\n    callable\n        A wrapper for a free function that casts all castable arguments to a consistent query compiler.\n    \"\"\"\n\n    def wrapper(f):\n        if name not in _GENERAL_EXTENSIONS[None]:\n            _GENERAL_EXTENSIONS[None][name] = f\n\n        return wrap_function_in_argument_caster(\n            klass=None,\n            f=f,\n            wrapping_function_type=None,\n            extensions=_GENERAL_EXTENSIONS,\n            name=name,\n        )\n\n    return wrapper\n\n\ndef register_function_for_post_op_switch(\n    class_name: Optional[str], backend: str, method: str\n) -> None:\n    \"\"\"\n    Register a function for post-operation backend switch.\n\n    Parameters\n    ----------\n    class_name : Optional[str]\n        The name of the class that the function belongs to. `None` for functions\n        in the modin.pandas module.\n    backend : str\n        Only consider switching when the starting backend is this one.\n    method : str\n        The name of the method to register.\n    \"\"\"\n    _CLASS_AND_BACKEND_TO_POST_OP_SWITCH_METHODS[\n        BackendAndClassName(backend=backend, class_name=class_name)\n    ].add(method)\n\n\ndef register_function_for_pre_op_switch(\n    class_name: Optional[str], backend: str, method: str\n) -> None:\n    \"\"\"\n    Register a function for pre-operation backend switch.\n\n    Parameters\n    ----------\n    class_name : Optional[str]\n        The name of the class that the function belongs to. `None` for functions\n        in the modin.pandas module.\n    backend : str\n        Only consider switching when the starting backend is this one.\n    method : str\n        The name of the method to register.\n    \"\"\"\n    _CLASS_AND_BACKEND_TO_PRE_OP_SWITCH_METHODS[\n        BackendAndClassName(backend=backend, class_name=class_name)\n    ].add(method)\n"
  },
  {
    "path": "modin/core/storage_formats/pandas/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Contains utility functions for frame partitioning.\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom math import ceil\nfrom typing import Generator, Hashable, List, Optional\n\nimport numpy as np\nimport pandas\n\nfrom modin.config import MinColumnPartitionSize, MinRowPartitionSize, NPartitions\n\n\ndef compute_chunksize(axis_len: int, num_splits: int, min_block_size: int) -> int:\n    \"\"\"\n    Compute the number of elements (rows/columns) to include in each partition.\n\n    Chunksize is defined the same for both axes.\n\n    Parameters\n    ----------\n    axis_len : int\n        Element count in an axis.\n    num_splits : int\n        The number of splits.\n    min_block_size : int\n        Minimum number of rows/columns in a single split.\n\n    Returns\n    -------\n    int\n        Integer number of rows/columns to split the DataFrame will be returned.\n    \"\"\"\n    if not isinstance(min_block_size, int) or min_block_size <= 0:\n        raise ValueError(\n            f\"'min_block_size' should be int > 0, passed: {min_block_size=}\"\n        )\n\n    chunksize = axis_len // num_splits\n    if axis_len % num_splits:\n        chunksize += 1\n    # chunksize shouldn't be less than `min_block_size` to avoid a\n    # large amount of small partitions.\n    return max(chunksize, min_block_size)\n\n\ndef split_result_of_axis_func_pandas(\n    axis: int,\n    num_splits: int,\n    result: pandas.DataFrame,\n    min_block_size: int,\n    length_list: Optional[list] = None,\n) -> list[pandas.DataFrame]:\n    \"\"\"\n    Split pandas DataFrame evenly based on the provided number of splits.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to split across. 0 means index axis when 1 means column axis.\n    num_splits : int\n        Number of splits to separate the DataFrame into.\n        This parameter is ignored if `length_list` is specified.\n    result : pandas.DataFrame\n        DataFrame to split.\n    min_block_size : int\n        Minimum number of rows/columns in a single split.\n    length_list : list of ints, optional\n        List of slice lengths to split DataFrame into. This is used to\n        return the DataFrame to its original partitioning schema.\n\n    Returns\n    -------\n    list of pandas.DataFrames\n        Splitted dataframe represented by list of frames.\n    \"\"\"\n    return list(\n        generate_result_of_axis_func_pandas(\n            axis, num_splits, result, min_block_size, length_list\n        )\n    )\n\n\ndef generate_result_of_axis_func_pandas(\n    axis: int,\n    num_splits: int,\n    result: pandas.DataFrame,\n    min_block_size: int,\n    length_list: Optional[list] = None,\n) -> Generator:\n    \"\"\"\n    Generate pandas DataFrame evenly based on the provided number of splits.\n\n    Parameters\n    ----------\n    axis : {0, 1}\n        Axis to split across. 0 means index axis when 1 means column axis.\n    num_splits : int\n        Number of splits to separate the DataFrame into.\n        This parameter is ignored if `length_list` is specified.\n    result : pandas.DataFrame\n        DataFrame to split.\n    min_block_size : int\n        Minimum number of rows/columns in a single split.\n    length_list : list of ints, optional\n        List of slice lengths to split DataFrame into. This is used to\n        return the DataFrame to its original partitioning schema.\n\n    Yields\n    ------\n    Generator\n        Generates 'num_splits' dataframes as a result of axis function.\n    \"\"\"\n    if num_splits == 1:\n        yield result\n    else:\n        if length_list is None:\n            length_list = get_length_list(\n                result.shape[axis], num_splits, min_block_size\n            )\n        # Inserting the first \"zero\" to properly compute cumsum indexing slices\n        length_list = np.insert(length_list, obj=0, values=[0])\n        sums = np.cumsum(length_list)\n        axis = 0 if isinstance(result, pandas.Series) else axis\n\n        for i in range(len(sums) - 1):\n            # We do this to restore block partitioning\n            if axis == 0:\n                chunk = result.iloc[sums[i] : sums[i + 1]]\n            else:\n                chunk = result.iloc[:, sums[i] : sums[i + 1]]\n\n            # Sliced MultiIndex still stores all encoded values of the original index, explicitly\n            # asking it to drop unused values in order to save memory.\n            if isinstance(chunk.axes[axis], pandas.MultiIndex):\n                chunk = chunk.set_axis(\n                    chunk.axes[axis].remove_unused_levels(), axis=axis, copy=False\n                )\n            yield chunk\n\n\ndef get_length_list(axis_len: int, num_splits: int, min_block_size: int) -> list:\n    \"\"\"\n    Compute partitions lengths along the axis with the specified number of splits.\n\n    Parameters\n    ----------\n    axis_len : int\n        Element count in an axis.\n    num_splits : int\n        Number of splits along the axis.\n    min_block_size : int\n        Minimum number of rows/columns in a single split.\n\n    Returns\n    -------\n    list of ints\n        List of integer lengths of partitions.\n    \"\"\"\n    chunksize = compute_chunksize(axis_len, num_splits, min_block_size)\n    return [\n        (\n            chunksize\n            if (i + 1) * chunksize <= axis_len\n            else max(0, axis_len - i * chunksize)\n        )\n        for i in range(num_splits)\n    ]\n\n\ndef length_fn_pandas(df):\n    \"\"\"\n    Compute number of rows of passed `pandas.DataFrame`.\n\n    Parameters\n    ----------\n    df : pandas.DataFrame\n\n    Returns\n    -------\n    int\n    \"\"\"\n    assert isinstance(df, pandas.DataFrame)\n    return len(df) if len(df) > 0 else 0\n\n\ndef width_fn_pandas(df):\n    \"\"\"\n    Compute number of columns of passed `pandas.DataFrame`.\n\n    Parameters\n    ----------\n    df : pandas.DataFrame\n\n    Returns\n    -------\n    int\n    \"\"\"\n    assert isinstance(df, pandas.DataFrame)\n    return len(df.columns) if len(df.columns) > 0 else 0\n\n\ndef get_group_names(regex: \"re.Pattern\") -> \"List[Hashable]\":\n    \"\"\"\n    Get named groups from compiled regex.\n\n    Unnamed groups are numbered.\n\n    Parameters\n    ----------\n    regex : compiled regex\n\n    Returns\n    -------\n    list of column labels\n    \"\"\"\n    names = {v: k for k, v in regex.groupindex.items()}\n    return [names.get(1 + i, i) for i in range(regex.groups)]\n\n\ndef merge_partitioning(left, right, axis=1):\n    \"\"\"\n    Get the number of splits across the `axis` for the two dataframes being concatenated.\n\n    Parameters\n    ----------\n    left : PandasDataframe\n    right : PandasDataframe\n    axis : int, default: 1\n\n    Returns\n    -------\n    int\n    \"\"\"\n    lshape = left._row_lengths_cache if axis == 0 else left._column_widths_cache\n    rshape = right._row_lengths_cache if axis == 0 else right._column_widths_cache\n\n    if lshape is not None and rshape is not None:\n        res_shape = sum(lshape) + sum(rshape)\n        chunk_size = compute_chunksize(\n            axis_len=res_shape,\n            num_splits=NPartitions.get(),\n            min_block_size=(\n                MinRowPartitionSize.get() if axis == 0 else MinColumnPartitionSize.get()\n            ),\n        )\n        return ceil(res_shape / chunk_size)\n    else:\n        lsplits = left._partitions.shape[axis]\n        rsplits = right._partitions.shape[axis]\n        return min(lsplits + rsplits, NPartitions.get())\n"
  },
  {
    "path": "modin/db_conn.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule houses `ModinDatabaseConnection` class.\n\n`ModinDatabaseConnection` lets a single process make its own connection to a\ndatabase to read from it. Whereas it's possible in pandas to pass an open\nconnection directly to `read_sql`, the open connection is not pickleable\nin Modin, so each worker must open its own connection.\n`ModinDatabaseConnection` saves the arguments that would normally be used to\nmake a db connection. It can make and provide a connection whenever the Modin\ndriver or a worker wants one.\n\"\"\"\n\nfrom typing import Any, Dict, Optional, Sequence\n\n_PSYCOPG_LIB_NAME = \"psycopg2\"\n_SQLALCHEMY_LIB_NAME = \"sqlalchemy\"\n\n\nclass UnsupportedDatabaseException(Exception):\n    \"\"\"Modin can't create a particular kind of database connection.\"\"\"\n\n    pass\n\n\nclass ModinDatabaseConnection:\n    \"\"\"\n    Creates a SQL database connection.\n\n    Parameters\n    ----------\n    lib : str\n        The library for the SQL connection.\n    *args : iterable\n        Positional arguments to pass when creating the connection.\n    **kwargs : dict\n        Keyword arguments to pass when creating the connection.\n    \"\"\"\n\n    lib: str\n    args: Sequence\n    kwargs: Dict\n    _dialect_is_microsoft_sql_cache: Optional[bool]\n\n    def __init__(self, lib: str, *args: Any, **kwargs: Any) -> None:\n        lib = lib.lower()\n        if lib not in (_PSYCOPG_LIB_NAME, _SQLALCHEMY_LIB_NAME):\n            raise UnsupportedDatabaseException(f\"Unsupported database library {lib}\")\n        self.lib = lib\n        self.args = args\n        self.kwargs = kwargs\n        self._dialect_is_microsoft_sql_cache = None\n\n    def _dialect_is_microsoft_sql(self) -> bool:\n        \"\"\"\n        Tell whether this connection requires Microsoft SQL dialect.\n\n        If this is a sqlalchemy connection, create an engine from args and\n        kwargs. If that engine's driver is pymssql or pyodbc, this\n        connection requires Microsoft SQL. Otherwise, it doesn't.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        if self._dialect_is_microsoft_sql_cache is None:\n            self._dialect_is_microsoft_sql_cache = False\n            if self.lib == _SQLALCHEMY_LIB_NAME:\n                from sqlalchemy import create_engine\n\n                self._dialect_is_microsoft_sql_cache = create_engine(\n                    *self.args, **self.kwargs\n                ).driver in (\"pymssql\", \"pyodbc\")\n\n        return self._dialect_is_microsoft_sql_cache\n\n    def get_connection(self) -> Any:\n        \"\"\"\n        Make the database connection and get it.\n\n        For psycopg2, pass all arguments to psycopg2.connect() and return the\n        result of psycopg2.connect(). For sqlalchemy, pass all arguments to\n        sqlalchemy.create_engine() and return the result of calling connect()\n        on the engine.\n\n        Returns\n        -------\n        Any\n            The open database connection.\n        \"\"\"\n        if self.lib == _PSYCOPG_LIB_NAME:\n            import psycopg2\n\n            return psycopg2.connect(*self.args, **self.kwargs)\n        if self.lib == _SQLALCHEMY_LIB_NAME:\n            from sqlalchemy import create_engine\n\n            return create_engine(*self.args, **self.kwargs).connect()\n\n        raise UnsupportedDatabaseException(\"Unsupported database library\")\n\n    def get_string(self) -> str:\n        \"\"\"\n        Get input connection string.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        return self.args[0]\n\n    def column_names_query(self, query: str) -> str:\n        \"\"\"\n        Get a query that gives the names of columns that `query` would produce.\n\n        Parameters\n        ----------\n        query : str\n            The SQL query to check.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        # This query looks odd, but it works in both PostgreSQL and Microsoft\n        # SQL, which doesn't let you use a \"limit\" clause to select 0 rows.\n        return f\"SELECT * FROM ({query}) AS _MODIN_COUNT_QUERY WHERE 1 = 0\"\n\n    def row_count_query(self, query: str) -> str:\n        \"\"\"\n        Get a query that gives the names of rows that `query` would produce.\n\n        Parameters\n        ----------\n        query : str\n            The SQL query to check.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        return f\"SELECT COUNT(*) FROM ({query}) AS _MODIN_COUNT_QUERY\"\n\n    def partition_query(self, query: str, limit: int, offset: int) -> str:\n        \"\"\"\n        Get a query that partitions the original `query`.\n\n        Parameters\n        ----------\n        query : str\n            The SQL query to get a partition.\n        limit : int\n            The size of the partition.\n        offset : int\n            Where the partition begins.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        return (\n            (\n                f\"SELECT * FROM ({query}) AS _MODIN_COUNT_QUERY ORDER BY(SELECT NULL)\"\n                + f\" OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY\"\n            )\n            if self._dialect_is_microsoft_sql()\n            else f\"SELECT * FROM ({query}) AS _MODIN_COUNT_QUERY LIMIT \"\n            + f\"{limit} OFFSET {offset}\"\n        )\n"
  },
  {
    "path": "modin/distributed/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"API to operate on distributed objects.\"\"\"\n"
  },
  {
    "path": "modin/distributed/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"API to operate on distributed DataFrame objects.\"\"\"\n"
  },
  {
    "path": "modin/distributed/dataframe/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"API to operate on distributed pandas DataFrame objects.\"\"\"\n\nfrom .partitions import from_partitions, unwrap_partitions\n\n__all__ = [\"unwrap_partitions\", \"from_partitions\"]\n"
  },
  {
    "path": "modin/distributed/dataframe/pandas/partitions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses API to operate on Modin DataFrame partitions that are pandas DataFrame(s).\"\"\"\n\nfrom typing import TYPE_CHECKING, Optional, Union\n\nimport numpy as np\nfrom pandas._typing import Axes\n\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\nfrom modin.pandas.dataframe import DataFrame, Series\n\nif TYPE_CHECKING:\n    from modin.core.execution.dask.implementations.pandas_on_dask.partitioning import (\n        PandasOnDaskDataframeColumnPartition,\n        PandasOnDaskDataframePartition,\n        PandasOnDaskDataframeRowPartition,\n    )\n    from modin.core.execution.ray.implementations.pandas_on_ray.partitioning import (\n        PandasOnRayDataframeColumnPartition,\n        PandasOnRayDataframePartition,\n        PandasOnRayDataframeRowPartition,\n    )\n    from modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning import (\n        PandasOnUnidistDataframeColumnPartition,\n        PandasOnUnidistDataframePartition,\n        PandasOnUnidistDataframeRowPartition,\n    )\n\n    PartitionUnionType = Union[\n        PandasOnRayDataframePartition,\n        PandasOnDaskDataframePartition,\n        PandasOnUnidistDataframePartition,\n        PandasOnRayDataframeColumnPartition,\n        PandasOnRayDataframeRowPartition,\n        PandasOnDaskDataframeColumnPartition,\n        PandasOnDaskDataframeRowPartition,\n        PandasOnUnidistDataframeColumnPartition,\n        PandasOnUnidistDataframeRowPartition,\n    ]\nelse:\n    from typing import Any\n\n    PartitionUnionType = Any\n\n\ndef unwrap_partitions(\n    api_layer_object: Union[DataFrame, Series],\n    axis: Optional[int] = None,\n    get_ip: bool = False,\n) -> list:\n    \"\"\"\n    Unwrap partitions of the ``api_layer_object``.\n\n    Parameters\n    ----------\n    api_layer_object : DataFrame or Series\n        The API layer object.\n    axis : {None, 0, 1}, default: None\n        The axis to unwrap partitions for (0 - row partitions, 1 - column partitions).\n        If ``axis is None``, the partitions are unwrapped as they are currently stored.\n    get_ip : bool, default: False\n        Whether to get node ip address to each partition or not.\n\n    Returns\n    -------\n    list\n        A list of Ray.ObjectRef/Dask.Future to partitions of the ``api_layer_object``\n        if Ray/Dask is used as an engine.\n\n    Notes\n    -----\n    If ``get_ip=True``, a list of tuples of Ray.ObjectRef/Dask.Future to node ip addresses and\n    partitions of the ``api_layer_object``, respectively, is returned if Ray/Dask is used as an engine\n    (i.e. ``[(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]``).\n    \"\"\"\n    if not hasattr(api_layer_object, \"_query_compiler\"):\n        raise ValueError(\n            f\"Only API Layer objects may be passed in here, got {type(api_layer_object)} instead.\"\n        )\n\n    modin_frame = api_layer_object._query_compiler._modin_frame\n    modin_frame._propagate_index_objs(None)\n    if axis is None:\n\n        def _unwrap_partitions() -> list:\n            [p.drain_call_queue() for p in modin_frame._partitions.flatten()]\n\n            def get_block(partition: PartitionUnionType) -> np.ndarray:\n                if hasattr(partition, \"force_materialization\"):\n                    blocks = partition.force_materialization().list_of_blocks\n                else:\n                    blocks = partition.list_of_blocks\n                assert (\n                    len(blocks) == 1\n                ), f\"Implementation assumes that partition contains a single block, but {len(blocks)} received.\"\n                return blocks[0]\n\n            if get_ip:\n                return [\n                    [\n                        (partition.ip(materialize=False), get_block(partition))\n                        for partition in row\n                    ]\n                    for row in modin_frame._partitions\n                ]\n            else:\n                return [\n                    [get_block(partition) for partition in row]\n                    for row in modin_frame._partitions\n                ]\n\n        actual_engine = type(\n            api_layer_object._query_compiler._modin_frame._partitions[0][0]\n        ).__name__\n        if actual_engine in (\n            \"PandasOnRayDataframePartition\",\n            \"PandasOnDaskDataframePartition\",\n            \"PandasOnUnidistDataframePartition\",\n            \"PandasOnRayDataframeColumnPartition\",\n            \"PandasOnRayDataframeRowPartition\",\n            \"PandasOnDaskDataframeColumnPartition\",\n            \"PandasOnDaskDataframeRowPartition\",\n            \"PandasOnUnidistDataframeColumnPartition\",\n            \"PandasOnUnidistDataframeRowPartition\",\n        ):\n            return _unwrap_partitions()\n        raise ValueError(\n            f\"Do not know how to unwrap '{actual_engine}' underlying partitions\"\n        )\n    else:\n        partitions = modin_frame._partition_mgr_cls.axis_partition(\n            modin_frame._partitions, axis ^ 1\n        )\n        return [\n            part.force_materialization(get_ip=get_ip).unwrap(\n                squeeze=True, get_ip=get_ip\n            )\n            for part in partitions\n        ]\n\n\ndef from_partitions(\n    partitions: list,\n    axis: Optional[int],\n    index: Optional[Axes] = None,\n    columns: Optional[Axes] = None,\n    row_lengths: Optional[list] = None,\n    column_widths: Optional[list] = None,\n) -> DataFrame:\n    \"\"\"\n    Create DataFrame from remote partitions.\n\n    Parameters\n    ----------\n    partitions : list\n        A list of Ray.ObjectRef/Dask.Future to partitions depending on the engine used.\n        Or a list of tuples of Ray.ObjectRef/Dask.Future to node ip addresses and partitions\n        depending on the engine used (i.e. ``[(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]``).\n    axis : {None, 0 or 1}\n        The ``axis`` parameter is used to identify what are the partitions passed.\n        You have to set:\n\n        * ``axis=0`` if you want to create DataFrame from row partitions\n        * ``axis=1`` if you want to create DataFrame from column partitions\n        * ``axis=None`` if you want to create DataFrame from 2D list of partitions\n    index : sequence, optional\n        The index for the DataFrame. Is computed if not provided.\n    columns : sequence, optional\n        The columns for the DataFrame. Is computed if not provided.\n    row_lengths : list, optional\n        The length of each partition in the rows. The \"height\" of\n        each of the block partitions. Is computed if not provided.\n    column_widths : list, optional\n        The width of each partition in the columns. The \"width\" of\n        each of the block partitions. Is computed if not provided.\n\n    Returns\n    -------\n    modin.pandas.DataFrame\n        DataFrame instance created from remote partitions.\n\n    Notes\n    -----\n    Pass `index`, `columns`, `row_lengths` and `column_widths` to avoid triggering\n    extra computations of the metadata when creating a DataFrame.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    factory = FactoryDispatcher.get_factory()\n    # TODO(https://github.com/modin-project/modin/issues/5127):\n    # Remove these assertions once the dependencies of this function all have types.\n    assert factory is not None\n    assert factory.io_cls is not None\n    assert factory.io_cls.frame_cls is not None\n    assert factory.io_cls.frame_cls._partition_mgr_cls is not None  # type: ignore[unreachable]\n    partition_class = factory.io_cls.frame_cls._partition_mgr_cls._partition_class\n    partition_frame_class = factory.io_cls.frame_cls\n    partition_mgr_class = factory.io_cls.frame_cls._partition_mgr_cls\n\n    # Since we store partitions of Modin DataFrame as a 2D NumPy array we need to place\n    # passed partitions to 2D NumPy array to pass it to internal Modin Frame class.\n    # `axis=None` - convert 2D list to 2D NumPy array\n    if axis is None:\n        if isinstance(partitions[0][0], tuple):\n            parts = np.array(\n                [\n                    [partition_class(partition, ip=ip) for ip, partition in row]\n                    for row in partitions\n                ]\n            )\n        else:\n            parts = np.array(\n                [\n                    [partition_class(partition) for partition in row]\n                    for row in partitions\n                ]\n            )\n    # `axis=0` - place row partitions to 2D NumPy array so that each row of the array is one row partition.\n    elif axis == 0:\n        if isinstance(partitions[0], tuple):\n            parts = np.array(\n                [[partition_class(partition, ip=ip)] for ip, partition in partitions]\n            )\n        else:\n            parts = np.array([[partition_class(partition)] for partition in partitions])\n    # `axis=1` - place column partitions to 2D NumPy array so that each column of the array is one column partition.\n    elif axis == 1:\n        if isinstance(partitions[0], tuple):\n            parts = np.array(\n                [[partition_class(partition, ip=ip) for ip, partition in partitions]]\n            )\n        else:\n            parts = np.array([[partition_class(partition) for partition in partitions]])\n    else:\n        raise ValueError(\n            f\"Got unacceptable value of axis {axis}. Possible values are {0}, {1} or {None}.\"\n        )\n\n    labels_axis_to_sync = None\n    if index is None:\n        labels_axis_to_sync = 1\n        index, internal_indices = partition_mgr_class.get_indices(0, parts)\n        if row_lengths is None:\n            row_lengths = [len(idx) for idx in internal_indices]\n\n    if columns is None:\n        labels_axis_to_sync = 0 if labels_axis_to_sync is None else -1\n        columns, internal_indices = partition_mgr_class.get_indices(1, parts)\n        if column_widths is None:\n            column_widths = [len(idx) for idx in internal_indices]\n\n    frame = partition_frame_class(\n        parts,\n        index,\n        columns,\n        row_lengths=row_lengths,\n        column_widths=column_widths,\n    )\n\n    if labels_axis_to_sync != -1:\n        frame.synchronize_labels(axis=labels_axis_to_sync)\n\n    return DataFrame(query_compiler=PandasQueryCompiler(frame))\n"
  },
  {
    "path": "modin/error_message.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport warnings\nfrom typing import NoReturn, Optional, Set\n\nfrom modin.logging import get_logger\nfrom modin.utils import get_current_execution\n\n\nclass ErrorMessage(object):\n    # Only print full ``default to pandas`` warning one time.\n    printed_default_to_pandas = False\n    printed_warnings: Set[int] = set()  # Set of hashes of printed warnings\n\n    @classmethod\n    def not_implemented(cls, message: str = \"\") -> NoReturn:\n        if message == \"\":\n            message = \"This functionality is not yet available in Modin.\"\n        get_logger().info(f\"Modin Error: NotImplementedError: {message}\")\n        raise NotImplementedError(\n            f\"{message}\\n\"\n            + \"To request implementation, file an issue at \"\n            + \"https://github.com/modin-project/modin/issues or, if that's \"\n            + \"not possible, send an email to feature_requests@modin.org.\"\n        )\n\n    @classmethod\n    def single_warning(\n        cls, message: str, category: Optional[type[Warning]] = None\n    ) -> None:\n        # note that there should not be identical messages with different categories since\n        # only the message is used as the hash key.\n        message_hash = hash(message)\n        logger = get_logger()\n        if message_hash in cls.printed_warnings:\n            logger.debug(\n                f\"Modin Warning: Single Warning: {message} was raised and suppressed.\"\n            )\n            return\n\n        logger.debug(f\"Modin Warning: Single Warning: {message} was raised.\")\n        warnings.warn(message, category=category)\n        cls.printed_warnings.add(message_hash)\n\n    @classmethod\n    def default_to_pandas(cls, message: str = \"\", reason: str = \"\") -> None:\n        # TODO(https://github.com/modin-project/modin/issues/7429): Use\n        # frame-level engine config.\n\n        if message != \"\":\n            execution_str = get_current_execution()\n            message = (\n                f\"{message} is not currently supported by {execution_str}, \"\n                + \"defaulting to pandas implementation.\"\n            )\n        else:\n            message = \"Defaulting to pandas implementation.\"\n\n        if not cls.printed_default_to_pandas:\n            message = (\n                f\"{message}\\n\"\n                + \"Please refer to \"\n                + \"https://modin.readthedocs.io/en/stable/supported_apis/defaulting_to_pandas.html for explanation.\"\n            )\n            cls.printed_default_to_pandas = True\n        if reason:\n            message += f\"\\nReason: {reason}\"\n        get_logger().debug(f\"Modin Warning: Default to pandas: {message}\")\n        warnings.warn(message)\n\n    @classmethod\n    def catch_bugs_and_request_email(\n        cls, failure_condition: bool, extra_log: str = \"\"\n    ) -> None:\n        if failure_condition:\n            get_logger().info(f\"Modin Error: Internal Error: {extra_log}\")\n            raise Exception(\n                \"Internal Error. \"\n                + \"Please visit https://github.com/modin-project/modin/issues \"\n                + \"to file an issue with the traceback and the command that \"\n                + \"caused this error. If you can't file a GitHub issue, \"\n                + f\"please email bug_reports@modin.org.\\n{extra_log}\"\n            )\n\n    @classmethod\n    def non_verified_udf(cls) -> None:\n        get_logger().debug(\"Modin Warning: Non Verified UDF\")\n        warnings.warn(\n            \"User-defined function verification is still under development in Modin. \"\n            + \"The function provided is not verified.\"\n        )\n\n    @classmethod\n    def bad_type_for_numpy_op(cls, function_name: str, operand_type: type) -> None:\n        cls.single_warning(\n            f\"Modin NumPy only supports objects of modin.numpy.array types for {function_name}, not {operand_type}. Defaulting to NumPy.\"\n        )\n\n    @classmethod\n    def mismatch_with_pandas(cls, operation: str, message: str) -> None:\n        get_logger().debug(\n            f\"Modin Warning: {operation} mismatch with pandas: {message}\"\n        )\n        cls.single_warning(\n            f\"`{operation}` implementation has mismatches with pandas:\\n{message}.\"\n        )\n\n    @classmethod\n    def warn(cls, message: str) -> None:\n        warnings.warn(message)\n\n    @classmethod\n    def not_initialized(cls, engine: str, code: str) -> None:\n        get_logger().debug(f\"Modin Warning: Not Initialized: {engine}\")\n        warnings.warn(\n            f\"{engine} execution environment not yet initialized. Initializing...\\n\"\n            + \"To remove this warning, run the following python code before doing dataframe operations:\\n\"\n            + f\"{code}\"\n        )\n"
  },
  {
    "path": "modin/experimental/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/experimental/batch/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom .pipeline import PandasQueryPipeline\n\n__all__ = [\n    \"PandasQueryPipeline\",\n]\n"
  },
  {
    "path": "modin/experimental/batch/pipeline.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses ``PandasQueryPipeline`` and ``PandasQuery`` classes, that implement a batch pipeline protocol for Modin Dataframes.\"\"\"\n\nfrom typing import Callable, Optional\n\nimport numpy as np\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.core.execution.ray.implementations.pandas_on_ray.dataframe.dataframe import (\n    PandasOnRayDataframe,\n)\nfrom modin.core.storage_formats.pandas import PandasQueryCompiler\nfrom modin.error_message import ErrorMessage\nfrom modin.utils import get_current_execution\n\n\nclass PandasQuery(object):\n    \"\"\"\n    Internal representation of a single query in a pipeline.\n\n    This object represents a single function to be pipelined in a batch pipeline.\n\n    Parameters\n    ----------\n    func : Callable\n        The function to apply to the dataframe.\n    is_output : bool, default: False\n        Whether this query is an output query and should be passed both to the next query, and\n        directly to postprocessing.\n    repartition_after : bool, default: False\n        Whether to repartition after this query is computed. Currently, repartitioning is only\n        supported if there is 1 partition prior to repartitioning.\n    fan_out : bool, default: False\n        Whether to fan out this node. If True and only 1 partition is passed as input, the partition\n        is replicated `PandasQueryPipeline.num_partitions` (default: `NPartitions.get`) times, and\n        the function is called on each. The `reduce_fn` must also be specified.\n    pass_partition_id : bool, default: False\n        Whether to pass the numerical partition id to the query.\n    reduce_fn : Callable, default: None\n        The reduce function to apply if `fan_out` is set to True. This takes the\n        `PandasQueryPipeline.num_partitions` (default: `NPartitions.get`) partitions that result from\n        this query, and combines them into 1 partition.\n    output_id : int, default: None\n            An id to assign to this node if it is an output.\n\n    Notes\n    -----\n    `func` must be a function that is applied along an axis of the dataframe.\n\n    Use `pandas` for any module level functions inside `func` since it operates directly on\n    partitions.\n    \"\"\"\n\n    def __init__(\n        self,\n        func: Callable,\n        is_output: bool = False,\n        repartition_after: bool = False,\n        fan_out: bool = False,\n        pass_partition_id: bool = False,\n        reduce_fn: Optional[Callable] = None,\n        output_id: Optional[int] = None,\n    ):\n        self.func = func\n        self.is_output = is_output\n        self.repartition_after = repartition_after\n        self.fan_out = fan_out\n        self.pass_partition_id = pass_partition_id\n        self.reduce_fn = reduce_fn\n        self.output_id = output_id\n        # List of sub-queries to feed into this query, if this query is an output node.\n        self.operators = None\n\n\nclass PandasQueryPipeline(object):\n    \"\"\"\n    Internal representation of a query pipeline.\n\n    This object keeps track of the functions that compose to form a query pipeline.\n\n    Parameters\n    ----------\n    df : modin.pandas.Dataframe\n        The dataframe to perform this pipeline on.\n    num_partitions : int, optional\n        The number of partitions to maintain for the batched dataframe.\n        If not specified, the value is assumed equal to ``NPartitions.get()``.\n\n    Notes\n    -----\n    Only row-parallel pipelines are supported. All queries will be applied along the row axis.\n    \"\"\"\n\n    def __init__(self, df, num_partitions: Optional[int] = None):\n        if get_current_execution() != \"PandasOnRay\" or (\n            not isinstance(df._query_compiler._modin_frame, PandasOnRayDataframe)\n        ):  # pragma: no cover\n            ErrorMessage.not_implemented(\n                \"Batch Pipeline API is only implemented for `PandasOnRay` execution.\"\n            )\n        ErrorMessage.single_warning(\n            \"The Batch Pipeline API is an experimental feature and still under development in Modin.\"\n        )\n        self.df = df\n        self.num_partitions = num_partitions if num_partitions else NPartitions.get()\n        self.outputs = []  # List of output queries.\n        self.query_list = []  # List of all queries.\n        self.is_output_id_specified = (\n            False  # Flag to indicate that `output_id` has been specified for a node.\n        )\n\n    def update_df(self, df):\n        \"\"\"\n        Update the dataframe to perform this pipeline on.\n\n        Parameters\n        ----------\n        df : modin.pandas.DataFrame\n            The new dataframe to perform this pipeline on.\n        \"\"\"\n        if get_current_execution() != \"PandasOnRay\" or (\n            not isinstance(df._query_compiler._modin_frame, PandasOnRayDataframe)\n        ):  # pragma: no cover\n            ErrorMessage.not_implemented(\n                \"Batch Pipeline API is only implemented for `PandasOnRay` execution.\"\n            )\n        self.df = df\n\n    def add_query(\n        self,\n        func: Callable,\n        is_output: bool = False,\n        repartition_after: bool = False,\n        fan_out: bool = False,\n        pass_partition_id: bool = False,\n        reduce_fn: Optional[Callable] = None,\n        output_id: Optional[int] = None,\n    ):\n        \"\"\"\n        Add a query to the current pipeline.\n\n        Parameters\n        ----------\n        func : Callable\n            DataFrame query to perform.\n        is_output : bool, default: False\n            Whether this query should be designated as an output query. If `True`, the output of\n            this query is passed both to the next query and directly to postprocessing.\n        repartition_after : bool, default: False\n            Whether the dataframe should be repartitioned after this query. Currently,\n            repartitioning is only supported if there is 1 partition prior.\n        fan_out : bool, default: False\n            Whether to fan out this node. If True and only 1 partition is passed as input, the\n            partition is replicated `self.num_partitions` (default: `NPartitions.get`) times,\n            and the function is called on each. The `reduce_fn` must also be specified.\n        pass_partition_id : bool, default: False\n            Whether to pass the numerical partition id to the query.\n        reduce_fn : Callable, default: None\n            The reduce function to apply if `fan_out` is set to True. This takes the\n            `self.num_partitions` (default: `NPartitions.get`) partitions that result from this\n            query, and combines them into 1 partition.\n        output_id : int, default: None\n            An id to assign to this node if it is an output.\n\n        Notes\n        -----\n        Use `pandas` for any module level functions inside `func` since it operates directly on\n        partitions.\n        \"\"\"\n        if not is_output and output_id is not None:\n            raise ValueError(\"Output ID cannot be specified for non-output node.\")\n        if is_output:\n            if not self.is_output_id_specified and output_id is not None:\n                if len(self.outputs) != 0:\n                    raise ValueError(\"Output ID must be specified for all nodes.\")\n            if output_id is None and self.is_output_id_specified:\n                raise ValueError(\"Output ID must be specified for all nodes.\")\n        self.query_list.append(\n            PandasQuery(\n                func,\n                is_output,\n                repartition_after,\n                fan_out,\n                pass_partition_id,\n                reduce_fn,\n                output_id,\n            )\n        )\n        if is_output:\n            self.outputs.append(self.query_list[-1])\n            if output_id is not None:\n                self.is_output_id_specified = True\n            self.outputs[-1].operators = self.query_list[:-1]\n            self.query_list = []\n\n    def _complete_nodes(self, list_of_nodes, partitions):\n        \"\"\"\n        Run a sub-query end to end.\n\n        Parameters\n        ----------\n        list_of_nodes : list of PandasQuery\n            The functions that compose this query.\n        partitions : list of PandasOnRayDataframeVirtualPartition\n            The partitions that compose the dataframe that is input to this sub-query.\n\n        Returns\n        -------\n        list of PandasOnRayDataframeVirtualPartition\n            The partitions that result from computing the functions represented by `list_of_nodes`.\n        \"\"\"\n        for node in list_of_nodes:\n            if node.fan_out:\n                if len(partitions) > 1:\n                    ErrorMessage.not_implemented(\n                        \"Fan out is only supported with DataFrames with 1 partition.\"\n                    )\n                partitions[0] = partitions[0].force_materialization()\n                partition_list = partitions[0].list_of_block_partitions\n                partitions[0] = partitions[0].add_to_apply_calls(node.func, 0)\n                partitions[0].drain_call_queue(num_splits=1)\n                new_dfs = []\n                for i in range(1, self.num_partitions):\n                    new_dfs.append(\n                        type(partitions[0])(\n                            partition_list,\n                            full_axis=partitions[0].full_axis,\n                        ).add_to_apply_calls(node.func, i)\n                    )\n                    new_dfs[-1].drain_call_queue(num_splits=1)\n\n                def reducer(df):\n                    df_inputs = [df]\n                    for df in new_dfs:\n                        df_inputs.append(df.to_pandas())\n                    return node.reduce_fn(df_inputs)\n\n                partitions = [partitions[0].add_to_apply_calls(reducer)]\n            elif node.repartition_after:\n                if len(partitions) > 1:\n                    ErrorMessage.not_implemented(\n                        \"Dynamic repartitioning is currently only supported for DataFrames with 1 partition.\"\n                    )\n                partitions[0] = (\n                    partitions[0].add_to_apply_calls(node.func).force_materialization()\n                )\n                new_dfs = []\n\n                def mask_partition(df, i):  # pragma: no cover\n                    new_length = len(df.index) // self.num_partitions\n                    if i == self.num_partitions - 1:\n                        return df.iloc[i * new_length :]\n                    return df.iloc[i * new_length : (i + 1) * new_length]\n\n                for i in range(self.num_partitions):\n                    new_dfs.append(\n                        type(partitions[0])(\n                            partitions[0].list_of_block_partitions,\n                            full_axis=partitions[0].full_axis,\n                        ).add_to_apply_calls(mask_partition, i)\n                    )\n                partitions = new_dfs\n            else:\n                if node.pass_partition_id:\n                    partitions = [\n                        part.add_to_apply_calls(node.func, i)\n                        for i, part in enumerate(partitions)\n                    ]\n                else:\n                    partitions = [\n                        part.add_to_apply_calls(node.func) for part in partitions\n                    ]\n        return partitions\n\n    def compute_batch(\n        self,\n        postprocessor: Optional[Callable] = None,\n        pass_partition_id: Optional[bool] = False,\n        pass_output_id: Optional[bool] = False,\n    ):\n        \"\"\"\n        Run the completed pipeline + any postprocessing steps end to end.\n\n        Parameters\n        ----------\n        postprocessor : Callable, default: None\n            A postprocessing function to be applied to each output partition.\n            The order of arguments passed is `df` (the partition), `output_id`\n            (if `pass_output_id=True`), and `partition_id` (if `pass_partition_id=True`).\n        pass_partition_id : bool, default: False\n            Whether or not to pass the numerical partition id to the postprocessing function.\n        pass_output_id : bool, default: False\n            Whether or not to pass the output ID associated with output queries to the\n            postprocessing function.\n\n        Returns\n        -------\n        list or dict or DataFrame\n            If output ids are specified, a dictionary mapping output id to the resulting dataframe\n            is returned, otherwise, a list of the resulting dataframes is returned.\n        \"\"\"\n        if len(self.outputs) == 0:\n            ErrorMessage.single_warning(\n                \"No outputs to compute. Returning an empty list. Please specify outputs by calling `add_query` with `is_output=True`.\"\n            )\n            return []\n        if not self.is_output_id_specified and pass_output_id:\n            raise ValueError(\n                \"`pass_output_id` is set to True, but output ids have not been specified. \"\n                + \"To pass output ids, please specify them using the `output_id` kwarg with pipeline.add_query\"\n            )\n        if self.is_output_id_specified:\n            outs = {}\n        else:\n            outs = []\n        modin_frame = self.df._query_compiler._modin_frame\n        partitions = modin_frame._partition_mgr_cls.row_partitions(\n            modin_frame._partitions\n        )\n        for node in self.outputs:\n            partitions = self._complete_nodes(node.operators + [node], partitions)\n            for part in partitions:\n                part.drain_call_queue(num_splits=1)\n            if postprocessor:\n                output_partitions = []\n                for partition_id, partition in enumerate(partitions):\n                    args = []\n                    if pass_output_id:\n                        args.append(node.output_id)\n                    if pass_partition_id:\n                        args.append(partition_id)\n                    output_partitions.append(\n                        partition.add_to_apply_calls(postprocessor, *args)\n                    )\n            else:\n                output_partitions = [\n                    part.add_to_apply_calls(lambda df: df) for part in partitions\n                ]\n            [\n                part.drain_call_queue(num_splits=self.num_partitions)\n                for part in output_partitions\n            ]  # Ensures our result df is block partitioned.\n            if not self.is_output_id_specified:\n                outs.append(output_partitions)\n            else:\n                outs[node.output_id] = output_partitions\n        if self.is_output_id_specified:\n            final_results = {}\n            id_df_iter = outs.items()\n        else:\n            final_results = [None] * len(outs)\n            id_df_iter = enumerate(outs)\n\n        for id, df in id_df_iter:\n            partitions = []\n            for row_partition in df:\n                partitions.append(row_partition.list_of_block_partitions)\n            partitions = np.array(partitions)\n            partition_mgr_class = PandasOnRayDataframe._partition_mgr_cls\n            index, internal_rows = partition_mgr_class.get_indices(0, partitions)\n            columns, internal_cols = partition_mgr_class.get_indices(1, partitions)\n            result_modin_frame = PandasOnRayDataframe(\n                partitions,\n                index,\n                columns,\n                row_lengths=list(map(len, internal_rows)),\n                column_widths=list(map(len, internal_cols)),\n            )\n            query_compiler = PandasQueryCompiler(result_modin_frame)\n            result_df = pd.DataFrame(query_compiler=query_compiler)\n            final_results[id] = result_df\n\n        return final_results\n"
  },
  {
    "path": "modin/experimental/core/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental Modin's core functionality.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental Modin's functionality related to execution engines supported.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/dask/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental Modin's functionality related to Dask execution engine.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/dask/implementations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental functionality related to Dask execution engine and optimized for specific storage formats.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/dask/implementations/pandas_on_dask/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental functionality related to Dask execution engine and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/ray/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental Modin's functionality related to Ray execution engine.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/ray/implementations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental functionality related to Ray execution engine and optimized for specific storage formats.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/unidist/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental Modin's functionality related to unidist execution engine.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/unidist/implementations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental Modin's functionality related to unidist execution engine and optimized for specific storage formats.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/execution/unidist/implementations/pandas_on_unidist/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental functionality related to unidist execution engine and optimized for pandas storage format.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/io/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental IO functions implementations.\"\"\"\n\nfrom .glob.glob_dispatcher import ExperimentalGlobDispatcher\nfrom .sql.sql_dispatcher import ExperimentalSQLDispatcher\nfrom .text.csv_glob_dispatcher import ExperimentalCSVGlobDispatcher\nfrom .text.custom_text_dispatcher import ExperimentalCustomTextDispatcher\n\n__all__ = [\n    \"ExperimentalCSVGlobDispatcher\",\n    \"ExperimentalSQLDispatcher\",\n    \"ExperimentalGlobDispatcher\",\n    \"ExperimentalCustomTextDispatcher\",\n]\n"
  },
  {
    "path": "modin/experimental/core/io/glob/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental module that allows to work with various formats using glob syntax.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/io/glob/glob_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses ``ExperimentalGlobDispatcher`` class that is used to read/write files of different formats in parallel.\"\"\"\n\nimport glob\nimport warnings\n\nimport pandas\nfrom pandas.io.common import stringify_path\n\nfrom modin.config import NPartitions\nfrom modin.core.io.file_dispatcher import FileDispatcher\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\n\n\nclass ExperimentalGlobDispatcher(FileDispatcher):\n    \"\"\"Class implements reading/writing different formats, parallelizing by the number of files.\"\"\"\n\n    @classmethod\n    def _read(cls, **kwargs):\n        \"\"\"\n        Read data from `filepath_or_buffer` according to `kwargs` parameters.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of `read_*` function.\n        **kwargs : dict\n            Parameters of `read_*` function.\n\n        Returns\n        -------\n        new_query_compiler : BaseQueryCompiler\n            Query compiler with imported data for further processing.\n\n        Notes\n        -----\n        The number of partitions is equal to the number of input files.\n        \"\"\"\n        if \"filepath_or_buffer\" in kwargs:\n            path_key = \"filepath_or_buffer\"\n        elif \"path\" in kwargs:\n            path_key = \"path\"\n        elif \"path_or_buf\" in kwargs:\n            path_key = \"path_or_buf\"\n        elif \"path_or_buffer\" in kwargs:\n            path_key = \"path_or_buffer\"\n        filepath_or_buffer = kwargs.pop(path_key)\n        filepath_or_buffer = stringify_path(filepath_or_buffer)\n        if not (isinstance(filepath_or_buffer, str) and \"*\" in filepath_or_buffer):\n            return cls.single_worker_read(\n                filepath_or_buffer,\n                single_worker_read=True,\n                reason=\"Buffers and single files are not supported\",\n                **kwargs,\n            )\n        filepath_or_buffer = sorted(glob.glob(filepath_or_buffer))\n\n        if len(filepath_or_buffer) == 0:\n            raise ValueError(\n                f\"There are no files matching the pattern: {filepath_or_buffer}\"\n            )\n\n        partition_ids = [None] * len(filepath_or_buffer)\n        lengths_ids = [None] * len(filepath_or_buffer)\n        widths_ids = [None] * len(filepath_or_buffer)\n\n        if len(filepath_or_buffer) != NPartitions.get():\n            # do we need to do a repartitioning?\n            warnings.warn(\"can be inefficient partitioning\")\n\n        for idx, file_name in enumerate(filepath_or_buffer):\n            *partition_ids[idx], lengths_ids[idx], widths_ids[idx] = cls.deploy(\n                func=cls.parse,\n                f_kwargs={\n                    \"fname\": file_name,\n                    **kwargs,\n                },\n                num_returns=3,\n            )\n        lengths = cls.materialize(lengths_ids)\n        widths = cls.materialize(widths_ids)\n\n        # while num_splits is 1, need only one value\n        partition_ids = cls.build_partition(partition_ids, lengths, [widths[0]])\n\n        new_index, _ = cls.frame_cls._partition_mgr_cls.get_indices(0, partition_ids)\n        new_columns, _ = cls.frame_cls._partition_mgr_cls.get_indices(1, partition_ids)\n\n        return cls.query_compiler_cls(\n            cls.frame_cls(partition_ids, new_index, new_columns)\n        )\n\n    @classmethod\n    def write(cls, qc, **kwargs):\n        \"\"\"\n        When `*` is in the filename, all partitions are written to their own separate file.\n\n        The filenames is determined as follows:\n        - if `*` is in the filename, then it will be replaced by the ascending sequence 0, 1, 2, …\n        - if `*` is not in the filename, then the default implementation will be used.\n\n        Parameters\n        ----------\n        qc : BaseQueryCompiler\n            The query compiler of the Modin dataframe that we want\n            to run ``to_<format>_glob`` on.\n        **kwargs : dict\n            Parameters for ``pandas.to_<format>(**kwargs)``.\n        \"\"\"\n        if \"filepath_or_buffer\" in kwargs:\n            path_key = \"filepath_or_buffer\"\n        elif \"path\" in kwargs:\n            path_key = \"path\"\n        elif \"path_or_buf\" in kwargs:\n            path_key = \"path_or_buf\"\n        elif \"path_or_buffer\" in kwargs:\n            path_key = \"path_or_buffer\"\n        filepath_or_buffer = kwargs.pop(path_key)\n        filepath_or_buffer = stringify_path(filepath_or_buffer)\n        if not (\n            isinstance(filepath_or_buffer, str) and \"*\" in filepath_or_buffer\n        ) or not isinstance(qc, PandasQueryCompiler):\n            warnings.warn(\"Defaulting to Modin core implementation\")\n            cls.base_write(qc, filepath_or_buffer, **kwargs)\n            return\n\n        # Be careful, this is a kind of limitation, but at the time of the first implementation,\n        # getting a name in this way is quite convenient.\n        # We can use this attribute because the names of the BaseIO's methods match pandas API.\n        write_func_name = cls.base_write.__name__\n\n        def func(df, **kw):  # pragma: no cover\n            idx = str(kw[\"partition_idx\"])\n            path = filepath_or_buffer.replace(\"*\", idx)\n            getattr(df, write_func_name)(path, **kwargs)\n            return pandas.DataFrame()\n\n        result = qc._modin_frame.apply_full_axis(\n            1, func, new_index=[], new_columns=[], enumerate_partitions=True\n        )\n        cls.materialize(\n            [part.list_of_blocks[0] for row in result._partitions for part in row]\n        )\n"
  },
  {
    "path": "modin/experimental/core/io/sql/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental SQL format type IO functions implementations.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/io/sql/sql_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `ExperimentalSQLDispatcher` class.\"\"\"\n\nimport warnings\n\nimport numpy as np\nimport pandas\n\nfrom modin.config import NPartitions\nfrom modin.core.io import SQLDispatcher\n\n\nclass ExperimentalSQLDispatcher(SQLDispatcher):\n    \"\"\"Class handles experimental utils for reading SQL queries or database tables.\"\"\"\n\n    __read_sql_with_offset = None\n\n    @classmethod\n    def preprocess_func(cls):  # noqa: RT01\n        \"\"\"Prepare a function for transmission to remote workers.\"\"\"\n        if cls.__read_sql_with_offset is None:\n            # sql deps are optional, so import only when needed\n            from modin.experimental.core.io.sql.utils import read_sql_with_offset\n\n            cls.__read_sql_with_offset = cls.put(read_sql_with_offset)\n        return cls.__read_sql_with_offset\n\n    @classmethod\n    def _read(\n        cls,\n        sql,\n        con,\n        index_col,\n        coerce_float,\n        params,\n        parse_dates,\n        columns,\n        chunksize,\n        dtype_backend,\n        dtype,\n        partition_column,\n        lower_bound,\n        upper_bound,\n        max_sessions,\n    ):  # noqa: PR01\n        \"\"\"\n        Read SQL query or database table into a DataFrame.\n\n        Documentation for parameters can be found at `modin.read_sql`.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            A new query compiler with imported data for further processing.\n        \"\"\"\n        # sql deps are optional, so import only when needed\n        from modin.experimental.core.io.sql.utils import get_query_info, is_distributed\n\n        if not is_distributed(partition_column, lower_bound, upper_bound):\n            message = \"Defaulting to Modin core implementation; \\\n                'partition_column', 'lower_bound', 'upper_bound' must be different from None\"\n            warnings.warn(message)\n            return cls.base_read(\n                sql,\n                con,\n                index_col,\n                coerce_float=coerce_float,\n                params=params,\n                parse_dates=parse_dates,\n                columns=columns,\n                chunksize=chunksize,\n                dtype_backend=dtype_backend,\n                dtype=dtype,\n            )\n        #  starts the distributed alternative\n        cols_names, query = get_query_info(sql, con, partition_column)\n        num_parts = min(NPartitions.get(), max_sessions if max_sessions else 1)\n        num_splits = min(len(cols_names), num_parts)\n        diff = (upper_bound - lower_bound) + 1\n        min_size = diff // num_parts\n        rest = diff % num_parts\n        partition_ids = []\n        index_ids = []\n        end = lower_bound - 1\n        func = cls.preprocess_func()\n        for part in range(num_parts):\n            if rest:\n                size = min_size + 1\n                rest -= 1\n            else:\n                size = min_size\n            start = end + 1\n            end = start + size - 1\n            partition_id = cls.deploy(\n                func,\n                f_args=(\n                    partition_column,\n                    start,\n                    end,\n                    num_splits,\n                    query,\n                    con,\n                    index_col,\n                    coerce_float,\n                    params,\n                    parse_dates,\n                    columns,\n                    chunksize,\n                    dtype_backend,\n                    dtype,\n                ),\n                num_returns=num_splits + 1,\n            )\n            partition_ids.append(\n                [cls.frame_partition_cls(obj) for obj in partition_id[:-1]]\n            )\n            index_ids.append(partition_id[-1])\n        new_index = pandas.RangeIndex(sum(cls.materialize(index_ids)))\n        new_query_compiler = cls.query_compiler_cls(\n            cls.frame_cls(np.array(partition_ids), new_index, cols_names)\n        )\n        new_query_compiler._modin_frame.synchronize_labels(axis=0)\n        return new_query_compiler\n"
  },
  {
    "path": "modin/experimental/core/io/sql/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Utilities for experimental SQL format type IO functions implementations.\"\"\"\n\nimport pandas\nimport pandas._libs.lib as lib\nfrom sqlalchemy import MetaData, Table, create_engine, inspect, text\n\nfrom modin.core.storage_formats.pandas.parsers import _split_result_for_readers\n\n\ndef is_distributed(partition_column, lower_bound, upper_bound):\n    \"\"\"\n    Check if is possible to distribute a query with the given args.\n\n    Parameters\n    ----------\n    partition_column : str\n        Column name used for data partitioning between the workers.\n    lower_bound : int\n        The minimum value to be requested from the `partition_column`.\n    upper_bound : int\n        The maximum value to be requested from the `partition_column`.\n\n    Returns\n    -------\n    bool\n        Whether the given query is distributable or not.\n    \"\"\"\n    if (\n        (partition_column is not None)\n        and (lower_bound is not None)\n        and (upper_bound is not None)\n    ):\n        if upper_bound > lower_bound:\n            return True\n        raise InvalidArguments(\"upper_bound must be greater than lower_bound.\")\n    elif (partition_column is None) and (lower_bound is None) and (upper_bound is None):\n        return False\n    else:\n        raise InvalidArguments(\n            \"Invalid combination of partition_column, lower_bound, upper_bound.\"\n            + \"All these arguments should be passed (distributed) or none of them (standard pandas).\"\n        )\n\n\ndef is_table(engine, sql):\n    \"\"\"\n    Check if given `sql` parameter is a table name.\n\n    Parameters\n    ----------\n    engine : sqlalchemy.engine.base.Engine\n        SQLAlchemy connection engine.\n    sql : str\n        SQL query to be executed or a table name.\n\n    Returns\n    -------\n    bool\n        Whether `sql` a table name or not.\n    \"\"\"\n    return inspect(engine).has_table(sql)\n\n\ndef get_table_metadata(engine, table):\n    \"\"\"\n    Extract all useful data from the given table.\n\n    Parameters\n    ----------\n    engine : sqlalchemy.engine.base.Engine\n        SQLAlchemy connection engine.\n    table : str\n        Table name.\n\n    Returns\n    -------\n    sqlalchemy.sql.schema.Table\n        Extracted metadata.\n    \"\"\"\n    metadata = MetaData()\n    metadata.reflect(bind=engine, only=[table])\n    table_metadata = Table(table, metadata, autoload=True)\n    return table_metadata\n\n\ndef get_table_columns(metadata):\n    \"\"\"\n    Extract columns names and python types from the `metadata`.\n\n    Parameters\n    ----------\n    metadata : sqlalchemy.sql.schema.Table\n        Table metadata.\n\n    Returns\n    -------\n    dict\n        Dictionary with columns names and python types.\n    \"\"\"\n    cols = dict()\n    for col in metadata.c:\n        name = str(col).rpartition(\".\")[2]\n        cols[name] = col.type.python_type.__name__\n    return cols\n\n\ndef build_query_from_table(name):\n    \"\"\"\n    Create a query from the given table name.\n\n    Parameters\n    ----------\n    name : str\n        Table name.\n\n    Returns\n    -------\n    str\n        Query string.\n    \"\"\"\n    return \"SELECT * FROM {0}\".format(name)\n\n\ndef check_query(query):\n    \"\"\"\n    Check query sanity.\n\n    Parameters\n    ----------\n    query : str\n        Query string.\n    \"\"\"\n    q = query.lower()\n    if \"select \" not in q:\n        raise InvalidQuery(\"SELECT word not found in the query: {0}\".format(query))\n    if \" from \" not in q:\n        raise InvalidQuery(\"FROM word not found in the query: {0}\".format(query))\n\n\ndef get_query_columns(engine, query):\n    \"\"\"\n    Extract columns names and python types from the `query`.\n\n    Parameters\n    ----------\n    engine : sqlalchemy.engine.base.Engine\n        SQLAlchemy connection engine.\n    query : str\n        SQL query.\n\n    Returns\n    -------\n    dict\n        Dictionary with columns names and python types.\n    \"\"\"\n    con = engine.connect()\n    result = con.execute(text(query))\n    cols_names = list(result.keys())\n    values = list(result.first())\n    cols = dict()\n    for i in range(len(cols_names)):\n        cols[cols_names[i]] = type(values[i]).__name__\n    return cols\n\n\ndef check_partition_column(partition_column, cols):\n    \"\"\"\n    Check `partition_column` existence and it's type.\n\n    Parameters\n    ----------\n    partition_column : str\n        Column name used for data partitioning between the workers.\n    cols : dict\n        Dictionary with columns names and python types.\n    \"\"\"\n    for k, v in cols.items():\n        if k == partition_column:\n            if v == \"int\":\n                return\n            raise InvalidPartitionColumn(f\"partition_column must be int, and not {v}\")\n    raise InvalidPartitionColumn(\n        f\"partition_column {partition_column} not found in the query\"\n    )\n\n\ndef get_query_info(sql, con, partition_column):\n    \"\"\"\n    Compute metadata needed for query distribution.\n\n    Parameters\n    ----------\n    sql : str\n        SQL query to be executed or a table name.\n    con : SQLAlchemy connectable or str\n        Database connection or url string.\n    partition_column : str\n        Column name used for data partitioning between the workers.\n\n    Returns\n    -------\n    list\n        Columns names list.\n    str\n        Query string.\n    \"\"\"\n    engine = create_engine(con)\n    if is_table(engine, sql):\n        table_metadata = get_table_metadata(engine, sql)\n        query = build_query_from_table(sql)\n        cols = get_table_columns(table_metadata)\n    else:\n        check_query(sql)\n        query = sql.replace(\";\", \"\")\n        cols = get_query_columns(engine, query)\n    # TODO allow validation that takes into account edge cases of pandas e.g. \"[index]\"\n    # check_partition_column(partition_column, cols)\n    # TODO partition_column isn't used; we need to use it;\n    return list(cols.keys()), query\n\n\ndef query_put_bounders(query, partition_column, start, end):  # pragma: no cover\n    \"\"\"\n    Put partition boundaries into the query.\n\n    Parameters\n    ----------\n    query : str\n        SQL query string.\n    partition_column : str\n        Column name used for data partitioning between the workers.\n    start : int\n        Lowest value to request from the `partition_column`.\n    end : int\n        Highest value to request from the `partition_column`.\n\n    Returns\n    -------\n    str\n        Query string with boundaries.\n    \"\"\"\n    where = \" WHERE TMP_TABLE.{0} >= {1} AND TMP_TABLE.{0} <= {2}\".format(\n        partition_column, start, end\n    )\n    query_with_bounders = \"SELECT * FROM ({0}) AS TMP_TABLE {1}\".format(query, where)\n    return query_with_bounders\n\n\nclass InvalidArguments(Exception):\n    \"\"\"Exception that should be raised if invalid arguments combination was found.\"\"\"\n\n\nclass InvalidQuery(Exception):\n    \"\"\"Exception that should be raised if invalid query statement was found.\"\"\"\n\n\nclass InvalidPartitionColumn(Exception):\n    \"\"\"Exception that should be raised if `partition_column` doesn't satisfy predefined requirements.\"\"\"\n\n\ndef read_sql_with_offset(\n    partition_column,\n    start,\n    end,\n    num_splits,\n    sql,\n    con,\n    index_col=None,\n    coerce_float=True,\n    params=None,\n    parse_dates=None,\n    columns=None,\n    chunksize=None,\n    dtype_backend=lib.no_default,\n    dtype=None,\n):  # pragma: no cover\n    \"\"\"\n    Read a chunk of SQL query or table into a pandas DataFrame.\n\n    Parameters\n    ----------\n    partition_column : str\n        Column name used for data partitioning between the workers.\n    start : int\n        Lowest value to request from the `partition_column`.\n    end : int\n        Highest value to request from the `partition_column`.\n    num_splits : int\n        The number of partitions to split the column into.\n    sql : str or SQLAlchemy Selectable (select or text object)\n        SQL query to be executed or a table name.\n    con : SQLAlchemy connectable or str\n        Connection to database (sqlite3 connections are not supported).\n    index_col : str or list of str, optional\n        Column(s) to set as index(MultiIndex).\n    coerce_float : bool, default: True\n        Attempts to convert values of non-string, non-numeric objects\n        (like decimal.Decimal) to floating point, useful for SQL result sets.\n    params : list, tuple or dict, optional\n        List of parameters to pass to ``execute`` method. The syntax used\n        to pass parameters is database driver dependent. Check your\n        database driver documentation for which of the five syntax styles,\n        described in PEP 249's paramstyle, is supported.\n    parse_dates : list or dict, optional\n        The behavior is as follows:\n\n        - List of column names to parse as dates.\n        - Dict of `{column_name: format string}` where format string is\n          strftime compatible in case of parsing string times, or is one of\n          (D, s, ns, ms, us) in case of parsing integer timestamps.\n        - Dict of `{column_name: arg dict}`, where the arg dict corresponds\n          to the keyword arguments of ``pandas.to_datetime``\n          Especially useful with databases without native Datetime support,\n          such as SQLite.\n    columns : list, optional\n        List of column names to select from SQL table (only used when reading a\n        table).\n    chunksize : int, optional\n        If specified, return an iterator where `chunksize` is the number of rows\n        to include in each chunk.\n    dtype_backend : {\"numpy_nullable\", \"pyarrow\"}, default: NumPy backed DataFrames\n        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,\n        nullable dtypes are used for all dtypes that have a nullable implementation when\n        \"numpy_nullable\" is set, PyArrow is used for all dtypes if \"pyarrow\" is set.\n        The dtype_backends are still experimential.\n    dtype : Type name or dict of columns, optional\n        Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.\n\n    Returns\n    -------\n    list\n        List with split read results and it's metadata (index, dtypes, etc.).\n    \"\"\"\n    query_with_bounders = query_put_bounders(sql, partition_column, start, end)\n    pandas_df = pandas.read_sql(\n        query_with_bounders,\n        con,\n        index_col=index_col,\n        coerce_float=coerce_float,\n        params=params,\n        parse_dates=parse_dates,\n        columns=columns,\n        chunksize=chunksize,\n        dtype_backend=dtype_backend,\n        dtype=dtype,\n    )\n    index = len(pandas_df)\n    return _split_result_for_readers(1, num_splits, pandas_df) + [index]\n"
  },
  {
    "path": "modin/experimental/core/io/text/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental text format type IO functions implementations.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/io/text/csv_glob_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `ExperimentalCSVGlobDispatcher` class, that is used for reading multiple `.csv` files simultaneously.\"\"\"\n\nimport csv\nimport glob\nimport os\nimport warnings\nfrom contextlib import ExitStack\nfrom typing import List, Tuple\n\nimport fsspec\nimport pandas\nimport pandas._libs.lib as lib\nfrom pandas.io.common import is_fsspec_url, is_url, stringify_path\n\nfrom modin.config import NPartitions\nfrom modin.core.io.file_dispatcher import OpenFile\nfrom modin.core.io.text.csv_dispatcher import CSVDispatcher\n\n\nclass ExperimentalCSVGlobDispatcher(CSVDispatcher):\n    \"\"\"Class contains utils for reading multiple `.csv` files simultaneously.\"\"\"\n\n    @classmethod\n    def _read(cls, filepath_or_buffer, **kwargs):\n        \"\"\"\n        Read data from multiple `.csv` files passed with `filepath_or_buffer` simultaneously.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of ``read_csv`` function.\n        **kwargs : dict\n            Parameters of ``read_csv`` function.\n\n        Returns\n        -------\n        new_query_compiler : BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        # Ensures that the file is a string file path. Otherwise, default to pandas.\n        filepath_or_buffer = cls.get_path_or_buffer(stringify_path(filepath_or_buffer))\n        if isinstance(filepath_or_buffer, str):\n            # os.altsep == None on Linux\n            is_folder = any(\n                filepath_or_buffer.endswith(sep) for sep in (os.sep, os.altsep) if sep\n            )\n            if \"*\" not in filepath_or_buffer and not is_folder:\n                warnings.warn(\n                    \"Shell-style wildcard '*' must be in the filename pattern in order to read multiple \"\n                    + f\"files at once. Did you forget it? Passed filename: '{filepath_or_buffer}'\"\n                )\n            if not cls.file_exists(filepath_or_buffer, kwargs.get(\"storage_options\")):\n                return cls.single_worker_read(\n                    filepath_or_buffer,\n                    reason=cls._file_not_found_msg(filepath_or_buffer),\n                    **kwargs,\n                )\n            filepath_or_buffer = cls.get_path(\n                filepath_or_buffer, kwargs.get(\"storage_options\")\n            )\n        elif not cls.pathlib_or_pypath(filepath_or_buffer):\n            return cls.single_worker_read(\n                filepath_or_buffer,\n                reason=cls.BUFFER_UNSUPPORTED_MSG,\n                **kwargs,\n            )\n\n        # We read multiple csv files when the file path is a list of absolute file paths. We assume that all of the files will be essentially replicas of the\n        # first file but with different data values.\n        glob_filepaths = filepath_or_buffer\n        filepath_or_buffer = filepath_or_buffer[0]\n\n        compression_type = cls.infer_compression(\n            filepath_or_buffer, kwargs.get(\"compression\")\n        )\n\n        chunksize = kwargs.get(\"chunksize\")\n        if chunksize is not None:\n            return cls.single_worker_read(\n                filepath_or_buffer,\n                reason=\"`chunksize` parameter is not supported\",\n                **kwargs,\n            )\n\n        skiprows = kwargs.get(\"skiprows\")\n        if skiprows is not None and not isinstance(skiprows, int):\n            return cls.single_worker_read(\n                filepath_or_buffer,\n                reason=\"Non-integer `skiprows` value not supported\",\n                **kwargs,\n            )\n\n        nrows = kwargs.pop(\"nrows\", None)\n        names = kwargs.get(\"names\", lib.no_default)\n        index_col = kwargs.get(\"index_col\", None)\n        usecols = kwargs.get(\"usecols\", None)\n        encoding = kwargs.get(\"encoding\", None)\n        if names in [lib.no_default, None]:\n            # For the sake of the empty df, we assume no `index_col` to get the correct\n            # column names before we build the index. Because we pass `names` in, this\n            # step has to happen without removing the `index_col` otherwise it will not\n            # be assigned correctly.\n            names = pandas.read_csv(\n                filepath_or_buffer,\n                **dict(kwargs, usecols=None, nrows=0, skipfooter=0, index_col=None),\n            ).columns\n        elif index_col is None and not usecols:\n            # When names is set to some list that is smaller than the number of columns\n            # in the file, the first columns are built as a hierarchical index.\n            empty_pd_df = pandas.read_csv(\n                filepath_or_buffer, nrows=0, encoding=encoding\n            )\n            num_cols = len(empty_pd_df.columns)\n            if num_cols > len(names):\n                index_col = list(range(num_cols - len(names)))\n                if len(index_col) == 1:\n                    index_col = index_col[0]\n                kwargs[\"index_col\"] = index_col\n        pd_df_metadata = pandas.read_csv(\n            filepath_or_buffer, **dict(kwargs, nrows=1, skipfooter=0)\n        )\n        column_names = pd_df_metadata.columns\n        skipfooter = kwargs.get(\"skipfooter\", None)\n        skiprows = kwargs.pop(\"skiprows\", None)\n        usecols_md = cls._validate_usecols_arg(usecols)\n        if usecols is not None and usecols_md[1] != \"integer\":\n            del kwargs[\"usecols\"]\n            all_cols = pandas.read_csv(\n                filepath_or_buffer,\n                **dict(kwargs, nrows=0, skipfooter=0),\n            ).columns\n            usecols = all_cols.get_indexer_for(list(usecols_md[0]))\n        parse_dates = kwargs.pop(\"parse_dates\", False)\n        partition_kwargs = dict(\n            kwargs,\n            header=None,\n            names=names,\n            skipfooter=0,\n            skiprows=None,\n            parse_dates=parse_dates,\n            usecols=usecols,\n        )\n        encoding = kwargs.get(\"encoding\", None)\n        quotechar = kwargs.get(\"quotechar\", '\"').encode(\n            encoding if encoding is not None else \"UTF-8\"\n        )\n        is_quoting = kwargs.get(\"quoting\", \"\") != csv.QUOTE_NONE\n\n        with ExitStack() as stack:\n            files = [\n                stack.enter_context(\n                    OpenFile(\n                        fname,\n                        \"rb\",\n                        compression_type,\n                        **(kwargs.get(\"storage_options\", None) or {}),\n                    )\n                )\n                for fname in glob_filepaths\n            ]\n\n            # Skip the header since we already have the header information and skip the\n            # rows we are told to skip.\n            if isinstance(skiprows, int) or skiprows is None:\n                if skiprows is None:\n                    skiprows = 0\n                header = kwargs.get(\"header\", \"infer\")\n                if header == \"infer\" and kwargs.get(\"names\", lib.no_default) in [\n                    lib.no_default,\n                    None,\n                ]:\n                    skip_header = 1\n                elif isinstance(header, int):\n                    skip_header = header + 1\n                elif hasattr(header, \"__iter__\") and not isinstance(header, str):\n                    skip_header = max(header) + 1\n                else:\n                    skip_header = 0\n            if kwargs.get(\"encoding\", None) is not None:\n                partition_kwargs[\"skiprows\"] = 1\n            # Launch tasks to read partitions\n            column_widths, num_splits = cls._define_metadata(\n                pd_df_metadata, column_names\n            )\n\n            args = {\n                \"num_splits\": num_splits,\n                **partition_kwargs,\n            }\n\n            splits = cls.partitioned_file(\n                files,\n                glob_filepaths,\n                num_partitions=NPartitions.get(),\n                nrows=nrows,\n                skiprows=skiprows,\n                skip_header=skip_header,\n                quotechar=quotechar,\n                is_quoting=is_quoting,\n            )\n            partition_ids = [None] * len(splits)\n            index_ids = [None] * len(splits)\n            dtypes_ids = [None] * len(splits)\n            for idx, chunks in enumerate(splits):\n                args.update({\"chunks\": chunks})\n                *partition_ids[idx], index_ids[idx], dtypes_ids[idx] = cls.deploy(\n                    func=cls.parse,\n                    f_kwargs=args,\n                    num_returns=num_splits + 2,\n                )\n\n        # Compute the index based on a sum of the lengths of each partition (by default)\n        # or based on the column(s) that were requested.\n        if index_col is None:\n            row_lengths = cls.materialize(index_ids)\n            new_index = pandas.RangeIndex(sum(row_lengths))\n        else:\n            index_objs = cls.materialize(index_ids)\n            row_lengths = [len(o) for o in index_objs]\n            new_index = index_objs[0].append(index_objs[1:])\n            new_index.name = pd_df_metadata.index.name\n\n        partition_ids = cls.build_partition(partition_ids, row_lengths, column_widths)\n\n        # Compute dtypes by getting collecting and combining all of the partitions. The\n        # reported dtypes from differing rows can be different based on the inference in\n        # the limited data seen by each worker. We use pandas to compute the exact dtype\n        # over the whole column for each column. The index is set below.\n        dtypes = cls.get_dtypes(dtypes_ids, column_names)\n\n        new_frame = cls.frame_cls(\n            partition_ids,\n            new_index,\n            column_names,\n            row_lengths,\n            column_widths,\n            dtypes=dtypes,\n        )\n        new_query_compiler = cls.query_compiler_cls(new_frame)\n\n        if skipfooter:\n            new_query_compiler = new_query_compiler.drop(\n                new_query_compiler.index[-skipfooter:]\n            )\n        if kwargs.get(\"squeeze\", False) and len(new_query_compiler.columns) == 1:\n            return new_query_compiler[new_query_compiler.columns[0]]\n        if index_col is None:\n            new_query_compiler._modin_frame.synchronize_labels(axis=0)\n        return new_query_compiler\n\n    @classmethod\n    def file_exists(cls, file_path: str, storage_options=None) -> bool:\n        \"\"\"\n        Check if the `file_path` is valid.\n\n        Parameters\n        ----------\n        file_path : str\n            String representing a path.\n        storage_options : dict, optional\n            Keyword from `read_*` functions.\n\n        Returns\n        -------\n        bool\n            True if the path is valid.\n        \"\"\"\n        if is_url(file_path):\n            raise NotImplementedError(\"`read_csv_glob` does not support urllib paths.\")\n\n        if not is_fsspec_url(file_path):\n            return len(glob.glob(file_path)) > 0\n\n        try:\n            from botocore.exceptions import (\n                ConnectTimeoutError,\n                EndpointConnectionError,\n                NoCredentialsError,\n            )\n\n            credential_error_type = (\n                NoCredentialsError,\n                PermissionError,\n                EndpointConnectionError,\n                ConnectTimeoutError,\n            )\n        except ModuleNotFoundError:\n            credential_error_type = (PermissionError,)\n\n        if storage_options is not None:\n            new_storage_options = dict(storage_options)\n            new_storage_options.pop(\"anon\", None)\n        else:\n            new_storage_options = {}\n\n        fs, _ = fsspec.core.url_to_fs(file_path, **new_storage_options)\n        exists = False\n        try:\n            exists = fs.exists(file_path)\n        except credential_error_type:\n            fs, _ = fsspec.core.url_to_fs(file_path, anon=True, **new_storage_options)\n            exists = fs.exists(file_path)\n        return exists or len(fs.glob(file_path)) > 0\n\n    @classmethod\n    def get_path(cls, file_path: str, storage_options=None) -> list:\n        \"\"\"\n        Return the path of the file(s).\n\n        Parameters\n        ----------\n        file_path : str\n            String representing a path.\n        storage_options : dict, optional\n            Keyword from `read_*` functions.\n\n        Returns\n        -------\n        list\n            List of strings of absolute file paths.\n        \"\"\"\n        if not is_fsspec_url(file_path) and not is_url(file_path):\n            relative_paths = glob.glob(file_path)\n            abs_paths = [os.path.abspath(path) for path in relative_paths]\n            return abs_paths\n\n        try:\n            from botocore.exceptions import (\n                ConnectTimeoutError,\n                EndpointConnectionError,\n                NoCredentialsError,\n            )\n\n            credential_error_type = (\n                NoCredentialsError,\n                PermissionError,\n                EndpointConnectionError,\n                ConnectTimeoutError,\n            )\n        except ModuleNotFoundError:\n            credential_error_type = (PermissionError,)\n\n        def get_file_path(fs_handle) -> List[str]:\n            if \"*\" in file_path:\n                file_paths = fs_handle.glob(file_path)\n            else:\n                file_paths = [\n                    f\n                    for f in fs_handle.find(file_path)\n                    if not f.endswith(\"/\")  # exclude folder\n                ]\n            if len(file_paths) == 0 and not fs_handle.exists(file_path):\n                raise FileNotFoundError(f\"Path <{file_path}> isn't available.\")\n            fs_addresses = [fs_handle.unstrip_protocol(path) for path in file_paths]\n            return fs_addresses\n\n        if storage_options is not None:\n            new_storage_options = dict(storage_options)\n            new_storage_options.pop(\"anon\", None)\n        else:\n            new_storage_options = {}\n\n        fs, _ = fsspec.core.url_to_fs(file_path, **new_storage_options)\n        try:\n            return get_file_path(fs)\n        except credential_error_type:\n            fs, _ = fsspec.core.url_to_fs(file_path, anon=True, **new_storage_options)\n        return get_file_path(fs)\n\n    @classmethod\n    def partitioned_file(\n        cls,\n        files,\n        fnames: List[str],\n        num_partitions: int = None,\n        nrows: int = None,\n        skiprows: int = None,\n        skip_header: int = None,\n        quotechar: bytes = b'\"',\n        is_quoting: bool = True,\n    ) -> List[List[Tuple[str, int, int]]]:\n        \"\"\"\n        Compute chunk sizes in bytes for every partition.\n\n        Parameters\n        ----------\n        files : file or list of files\n            File(s) to be partitioned.\n        fnames : str or list of str\n            File name(s) to be partitioned.\n        num_partitions : int, optional\n            For what number of partitions split a file.\n            If not specified grabs the value from `modin.config.NPartitions.get()`.\n        nrows : int, optional\n            Number of rows of file to read.\n        skiprows : int, optional\n            Specifies rows to skip.\n        skip_header : int, optional\n            Specifies header rows to skip.\n        quotechar : bytes, default: b'\"'\n            Indicate quote in a file.\n        is_quoting : bool, default: True\n            Whether or not to consider quotes.\n\n        Returns\n        -------\n        list\n            List, where each element of the list is a list of tuples. The inner lists\n            of tuples contains the data file name of the chunk, chunk start offset, and\n            chunk end offsets for its corresponding file.\n\n        Notes\n        -----\n        The logic gets really complicated if we try to use the `TextFileDispatcher.partitioned_file`.\n        \"\"\"\n        if type(files) is not list:\n            files = [files]\n\n        if num_partitions is None:\n            num_partitions = NPartitions.get()\n\n        file_sizes = [cls.file_size(f) for f in files]\n        partition_size = max(\n            1, num_partitions, (nrows if nrows else sum(file_sizes)) // num_partitions\n        )\n\n        result = []\n        split_result = []\n        split_size = 0\n        read_rows_counter = 0\n        for f, fname, f_size in zip(files, fnames, file_sizes):\n            if skiprows or skip_header:\n                skip_amount = (skiprows if skiprows else 0) + (\n                    skip_header if skip_header else 0\n                )\n\n                # TODO(williamma12): Handle when skiprows > number of rows in file. Currently returns empty df.\n                outside_quotes, read_rows = cls._read_rows(\n                    f,\n                    nrows=skip_amount,\n                    quotechar=quotechar,\n                    is_quoting=is_quoting,\n                )\n                if skiprows:\n                    skiprows -= read_rows\n                    if skiprows > 0:\n                        # We have more rows to skip than the amount read in the file.\n                        continue\n\n            start = f.tell()\n\n            while f.tell() < f_size:\n                if split_size >= partition_size:\n                    # Create a new split when the split has reached partition_size.\n                    # This is mainly used when we are reading row-wise partitioned files.\n                    result.append(split_result)\n                    split_result = []\n                    split_size = 0\n\n                # We calculate the amount that we need to read based off of how much of the split we have already read.\n                read_size = partition_size - split_size\n\n                if nrows:\n                    if read_rows_counter >= nrows:\n                        # # Finish when we have read enough rows.\n                        if len(split_result) > 0:\n                            # Add last split into the result.\n                            result.append(split_result)\n                        return result\n                    elif read_rows_counter + read_size > nrows:\n                        # Ensure that we will not read more than nrows.\n                        read_size = nrows - read_rows_counter\n\n                    outside_quotes, read_rows = cls._read_rows(\n                        f,\n                        nrows=read_size,\n                        quotechar=quotechar,\n                        is_quoting=is_quoting,\n                    )\n                    split_size += read_rows\n                    read_rows_counter += read_rows\n                else:\n                    outside_quotes = cls.offset(\n                        f,\n                        offset_size=read_size,\n                        quotechar=quotechar,\n                        is_quoting=is_quoting,\n                    )\n\n                split_result.append((fname, start, f.tell()))\n                split_size += f.tell() - start\n                start = f.tell()\n\n                # Add outside_quotes.\n                if is_quoting and not outside_quotes:\n                    warnings.warn(\"File has mismatched quotes\")\n\n        # Add last split into the result.\n        if len(split_result) > 0:\n            result.append(split_result)\n\n        return result\n"
  },
  {
    "path": "modin/experimental/core/io/text/custom_text_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `ExperimentalCustomTextDispatcher` class, that is used for reading custom text files.\"\"\"\n\nimport pandas\nfrom pandas.io.common import stringify_path\n\nfrom modin.config import NPartitions\nfrom modin.core.io.file_dispatcher import OpenFile\nfrom modin.core.io.text.text_file_dispatcher import TextFileDispatcher\n\n\nclass ExperimentalCustomTextDispatcher(TextFileDispatcher):\n    \"\"\"Class handles utils for reading custom text files.\"\"\"\n\n    @classmethod\n    def _read(cls, filepath_or_buffer, columns, custom_parser, **kwargs):\n        r\"\"\"\n        Read data from `filepath_or_buffer` according to the passed `read_custom_text` `kwargs` parameters.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str, path object or file-like object\n            `filepath_or_buffer` parameter of `read_custom_text` function.\n        columns : list or callable(file-like object, \\*\\*kwargs -> list\n            Column names of list type or callable that create column names from opened file\n            and passed `kwargs`.\n        custom_parser : callable(file-like object, \\*\\*kwargs -> pandas.DataFrame\n            Function that takes as input a part of the `filepath_or_buffer` file loaded into\n            memory in file-like object form.\n        **kwargs : dict\n            Parameters of `read_custom_text` function.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            Query compiler with imported data for further processing.\n        \"\"\"\n        filepath_or_buffer = stringify_path(filepath_or_buffer)\n        filepath_or_buffer_md = (\n            cls.get_path(filepath_or_buffer)\n            if isinstance(filepath_or_buffer, str)\n            else cls.get_path_or_buffer(filepath_or_buffer)\n        )\n        compression_infered = cls.infer_compression(\n            filepath_or_buffer, kwargs[\"compression\"]\n        )\n\n        with OpenFile(filepath_or_buffer_md, \"rb\", compression_infered) as f:\n            splits, _ = cls.partitioned_file(\n                f,\n                num_partitions=NPartitions.get(),\n                is_quoting=kwargs.pop(\"is_quoting\"),\n                nrows=kwargs[\"nrows\"],\n            )\n\n        if callable(columns):\n            with OpenFile(filepath_or_buffer_md, \"rb\", compression_infered) as f:\n                columns = columns(f, **kwargs)\n        if not isinstance(columns, pandas.Index):\n            columns = pandas.Index(columns)\n\n        empty_pd_df = pandas.DataFrame(columns=columns)\n        index_name = empty_pd_df.index.name\n        column_widths, num_splits = cls._define_metadata(empty_pd_df, columns)\n\n        # kwargs that will be passed to the workers\n        partition_kwargs = dict(\n            kwargs,\n            fname=filepath_or_buffer_md,\n            num_splits=num_splits,\n            nrows=None,\n            compression=compression_infered,\n        )\n\n        partition_ids, index_ids, dtypes_ids = cls._launch_tasks(\n            splits, callback=custom_parser, **partition_kwargs\n        )\n\n        new_query_compiler = cls._get_new_qc(\n            partition_ids=partition_ids,\n            index_ids=index_ids,\n            dtypes_ids=dtypes_ids,\n            index_col=None,\n            index_name=index_name,\n            column_widths=column_widths,\n            column_names=columns,\n            nrows=kwargs[\"nrows\"],\n        )\n        return new_query_compiler\n"
  },
  {
    "path": "modin/experimental/core/storage_formats/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Experimental functionality related to storage formats supported.\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/storage_formats/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"The module represents the query compiler level for the pandas storage format (experimental).\"\"\"\n"
  },
  {
    "path": "modin/experimental/core/storage_formats/pandas/parsers.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\n\"\"\"Module houses experimental Modin parser classes, that are used for data parsing on the workers.\"\"\"\n\nimport warnings\nfrom io import BytesIO\n\nimport pandas\nfrom pandas.util._decorators import doc\n\nfrom modin.core.io.file_dispatcher import OpenFile\nfrom modin.core.storage_formats.pandas.parsers import (\n    PandasCSVParser,\n    PandasParser,\n    _doc_pandas_parser_class,\n    _doc_parse_func,\n    _doc_parse_parameters_common,\n    _split_result_for_readers,\n)\n\n\n@doc(_doc_pandas_parser_class, data_type=\"multiple CSV files simultaneously\")\nclass ExperimentalPandasCSVGlobParser(PandasCSVParser):\n    @staticmethod\n    @doc(\n        _doc_parse_func,\n        parameters=\"\"\"chunks : list\n    List, where each element of the list is a list of tuples. The inner lists\n    of tuples contains the data file name of the chunk, chunk start offset, and\n    chunk end offsets for its corresponding file.\"\"\",\n    )\n    def parse(chunks, **kwargs):\n        warnings.filterwarnings(\"ignore\")\n        num_splits = kwargs.pop(\"num_splits\", None)\n        index_col = kwargs.get(\"index_col\", None)\n\n        # `single_worker_read` just pass filename via chunks; need check\n        if isinstance(chunks, str):\n            return pandas.read_csv(chunks, **kwargs)\n\n        # pop `compression` from kwargs because `bio` below is uncompressed\n        compression = kwargs.pop(\"compression\", \"infer\")\n        storage_options = kwargs.pop(\"storage_options\", None) or {}\n        pandas_dfs = []\n        for fname, start, end in chunks:\n            if start is not None and end is not None:\n                with OpenFile(fname, \"rb\", compression, **storage_options) as bio:\n                    if kwargs.get(\"encoding\", None) is not None:\n                        header = b\"\" + bio.readline()\n                    else:\n                        header = b\"\"\n                    bio.seek(start)\n                    to_read = header + bio.read(end - start)\n                pandas_dfs.append(pandas.read_csv(BytesIO(to_read), **kwargs))\n            else:\n                # This only happens when we are reading with only one worker (Default)\n                return pandas.read_csv(\n                    fname,\n                    compression=compression,\n                    storage_options=storage_options,\n                    **kwargs,\n                )\n\n        # Combine read in data.\n        if len(pandas_dfs) > 1:\n            pandas_df = pandas.concat(pandas_dfs)\n        elif len(pandas_dfs) > 0:\n            pandas_df = pandas_dfs[0]\n        else:\n            pandas_df = pandas.DataFrame()\n\n        # Set internal index.\n        if index_col is not None:\n            index = pandas_df.index\n        else:\n            # The lengths will become the RangeIndex\n            index = len(pandas_df)\n        return _split_result_for_readers(1, num_splits, pandas_df) + [\n            index,\n            pandas_df.dtypes,\n        ]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"pickled pandas objects\")\nclass ExperimentalPandasPickleParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def parse(fname, **kwargs):\n        warnings.filterwarnings(\"ignore\")\n        num_splits = 1\n        single_worker_read = kwargs.pop(\"single_worker_read\", None)\n        df = pandas.read_pickle(fname, **kwargs)\n        if single_worker_read:\n            return df\n        assert isinstance(\n            df, pandas.DataFrame\n        ), f\"Pickled obj type: [{type(df)}] in [{fname}]; works only with pandas.DataFrame\"\n\n        length = len(df)\n        width = len(df.columns)\n\n        return _split_result_for_readers(1, num_splits, df) + [length, width]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"parquet files\")\nclass ExperimentalPandasParquetParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def parse(fname, **kwargs):\n        warnings.filterwarnings(\"ignore\")\n        num_splits = 1\n        single_worker_read = kwargs.pop(\"single_worker_read\", None)\n        df = pandas.read_parquet(fname, **kwargs)\n        if single_worker_read:\n            return df\n\n        length = len(df)\n        width = len(df.columns)\n\n        return _split_result_for_readers(1, num_splits, df) + [length, width]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"json files\")\nclass ExperimentalPandasJsonParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def parse(fname, **kwargs):\n        warnings.filterwarnings(\"ignore\")\n        num_splits = 1\n        single_worker_read = kwargs.pop(\"single_worker_read\", None)\n        df = pandas.read_json(fname, **kwargs)\n        if single_worker_read:\n            return df\n\n        length = len(df)\n        width = len(df.columns)\n\n        return _split_result_for_readers(1, num_splits, df) + [length, width]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"XML files\")\nclass ExperimentalPandasXmlParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def parse(fname, **kwargs):\n        warnings.filterwarnings(\"ignore\")\n        num_splits = 1\n        single_worker_read = kwargs.pop(\"single_worker_read\", None)\n        df = pandas.read_xml(fname, **kwargs)\n        if single_worker_read:\n            return df\n\n        length = len(df)\n        width = len(df.columns)\n\n        return _split_result_for_readers(1, num_splits, df) + [length, width]\n\n\n@doc(_doc_pandas_parser_class, data_type=\"custom text\")\nclass ExperimentalCustomTextParser(PandasParser):\n    @staticmethod\n    @doc(_doc_parse_func, parameters=_doc_parse_parameters_common)\n    def parse(fname, **kwargs):\n        return PandasParser.generic_parse(fname, **kwargs)\n"
  },
  {
    "path": "modin/experimental/fuzzydata/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module holds experimental fuzzydata specific functionality for Modin.\"\"\"\n"
  },
  {
    "path": "modin/experimental/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nThe main module through which interaction with the experimental API takes place.\n\nSee `Experimental API Reference` for details.\n\nNotes\n-----\n* Some of experimental APIs deviate from pandas in order to provide improved\n  performance.\n\n* Although the use of experimental storage formats and engines is available through the\n  `modin.pandas` module when defining environment variable `MODIN_EXPERIMENTAL=true`,\n  the use of experimental I/O functions is available only through the\n  `modin.experimental.pandas` module.\n\nExamples\n--------\n>>> import modin.experimental.pandas as pd\n>>> df = pd.read_csv_glob(\"data*.csv\")\n\"\"\"\n\nfrom modin.pandas import *  # noqa F401, F403\n\nfrom .io import (  # noqa F401\n    read_csv_glob,\n    read_custom_text,\n    read_json_glob,\n    read_parquet_glob,\n    read_pickle_glob,\n    read_sql,\n    read_xml_glob,\n)\n"
  },
  {
    "path": "modin/experimental/pandas/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement experimental I/O public API.\"\"\"\n\nfrom __future__ import annotations\n\nimport inspect\nimport pathlib\nimport pickle\nfrom typing import IO, AnyStr, Callable, Iterator, Literal, Optional, Union\n\nimport pandas\nimport pandas._libs.lib as lib\nfrom pandas._typing import CompressionOptions, DtypeArg, DtypeBackend, StorageOptions\n\nfrom modin.core.storage_formats import BaseQueryCompiler\nfrom modin.utils import expanduser_path_arg\n\nfrom . import DataFrame\n\n\ndef read_sql(\n    sql,\n    con,\n    index_col=None,\n    coerce_float=True,\n    params=None,\n    parse_dates=None,\n    columns=None,\n    chunksize=None,\n    dtype_backend=lib.no_default,\n    dtype=None,\n    partition_column: Optional[str] = None,\n    lower_bound: Optional[int] = None,\n    upper_bound: Optional[int] = None,\n    max_sessions: Optional[int] = None,\n) -> Union[DataFrame, Iterator[DataFrame]]:\n    \"\"\"\n    General documentation is available in `modin.pandas.read_sql`.\n\n    This experimental feature provides distributed reading from a sql file.\n    The function extended with `Spark-like parameters <https://spark.apache.org/docs/2.0.0/api/R/read.jdbc.html>`_\n    such as ``partition_column``, ``lower_bound`` and ``upper_bound``. With these\n    parameters, the user will be able to specify how to partition the imported data.\n\n    Parameters\n    ----------\n    sql : str or SQLAlchemy Selectable (select or text object)\n        SQL query to be executed or a table name.\n    con : SQLAlchemy connectable, str, or sqlite3 connection\n        Using SQLAlchemy makes it possible to use any DB supported by that\n        library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible\n        for engine disposal and connection closure for the SQLAlchemy\n        connectable; str connections are closed automatically. See\n        `here <https://docs.sqlalchemy.org/en/13/core/connections.html>`_.\n    index_col : str or list of str, optional\n        Column(s) to set as index(MultiIndex).\n    coerce_float : bool, default: True\n        Attempts to convert values of non-string, non-numeric objects (like\n        decimal.Decimal) to floating point, useful for SQL result sets.\n    params : list, tuple or dict, optional\n        List of parameters to pass to execute method. The syntax used to pass\n        parameters is database driver dependent. Check your database driver\n        documentation for which of the five syntax styles, described in PEP 249's\n        paramstyle, is supported. Eg. for psycopg2, uses %(name)s so use params=\n        {'name' : 'value'}.\n    parse_dates : list or dict, optional\n        - List of column names to parse as dates.\n        - Dict of ``{column_name: format string}`` where format string is\n          strftime compatible in case of parsing string times, or is one of\n          (D, s, ns, ms, us) in case of parsing integer timestamps.\n        - Dict of ``{column_name: arg dict}``, where the arg dict corresponds\n          to the keyword arguments of :func:`pandas.to_datetime`\n          Especially useful with databases without native Datetime support,\n          such as SQLite.\n    columns : list, optional\n        List of column names to select from SQL table (only used when reading\n        a table).\n    chunksize : int, optional\n        If specified, return an iterator where `chunksize` is the\n        number of rows to include in each chunk.\n    dtype_backend : {\"numpy_nullable\", \"pyarrow\"}, default: NumPy backed DataFrames\n        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,\n        nullable dtypes are used for all dtypes that have a nullable implementation when\n        \"numpy_nullable\" is set, PyArrow is used for all dtypes if \"pyarrow\" is set.\n        The dtype_backends are still experimential.\n    dtype : Type name or dict of columns, optional\n        Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.\n    partition_column : str, optional\n        Column used to share the data between the workers (MUST be a INTEGER column).\n    lower_bound : int, optional\n        The minimum value to be requested from the partition_column.\n    upper_bound : int, optional\n        The maximum value to be requested from the partition_column.\n    max_sessions : int, optional\n        The maximum number of simultaneous connections allowed to use.\n\n    Returns\n    -------\n    modin.DataFrame or Iterator[modin.DataFrame]\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    result = FactoryDispatcher.read_sql_distributed(**kwargs)\n    if isinstance(result, BaseQueryCompiler):\n        return DataFrame(query_compiler=result)\n    return (DataFrame(query_compiler=qc) for qc in result)\n\n\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_custom_text(\n    filepath_or_buffer,\n    columns,\n    custom_parser,\n    compression=\"infer\",\n    nrows: Optional[int] = None,\n    is_quoting=True,\n):\n    r\"\"\"\n    Load custom text data from file.\n\n    Parameters\n    ----------\n    filepath_or_buffer : str\n        File path where the custom text data will be loaded from.\n    columns : list or callable(file-like object, \\*\\*kwargs) -> list\n        Column names of list type or callable that create column names from opened file\n        and passed `kwargs`.\n    custom_parser : callable(file-like object, \\*\\*kwargs) -> pandas.DataFrame\n        Function that takes as input a part of the `filepath_or_buffer` file loaded into\n        memory in file-like object form.\n    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default: 'infer'\n        If 'infer' and 'path_or_url' is path-like, then detect compression from\n        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no\n        compression). If 'infer' and 'path_or_url' is not path-like, then use\n        None (= no decompression).\n    nrows : int, optional\n        Amount of rows to read.\n    is_quoting : bool, default: True\n        Whether or not to consider quotes.\n\n    Returns\n    -------\n    modin.DataFrame\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return DataFrame(query_compiler=FactoryDispatcher.read_custom_text(**kwargs))\n\n\n# CSV and table\ndef _make_parser_func(sep: str, funcname: str) -> Callable:\n    \"\"\"\n    Create a parser function from the given sep.\n\n    Parameters\n    ----------\n    sep : str\n        The separator default to use for the parser.\n    funcname : str\n        The name of the generated parser function.\n\n    Returns\n    -------\n    Callable\n    \"\"\"\n\n    def parser_func(\n        filepath_or_buffer: Union[str, pathlib.Path, IO[AnyStr]],\n        *,\n        sep=lib.no_default,\n        delimiter=None,\n        header=\"infer\",\n        names=lib.no_default,\n        index_col=None,\n        usecols=None,\n        dtype=None,\n        engine=None,\n        converters=None,\n        true_values=None,\n        false_values=None,\n        skipinitialspace=False,\n        skiprows=None,\n        skipfooter=0,\n        nrows=None,\n        na_values=None,\n        keep_default_na=True,\n        na_filter=True,\n        verbose=lib.no_default,\n        skip_blank_lines=True,\n        parse_dates=None,\n        infer_datetime_format=lib.no_default,\n        keep_date_col=lib.no_default,\n        date_parser=lib.no_default,\n        date_format=None,\n        dayfirst=False,\n        cache_dates=True,\n        iterator=False,\n        chunksize=None,\n        compression=\"infer\",\n        thousands=None,\n        decimal: str = \".\",\n        lineterminator=None,\n        quotechar='\"',\n        quoting=0,\n        escapechar=None,\n        comment=None,\n        encoding=None,\n        encoding_errors=\"strict\",\n        dialect=None,\n        on_bad_lines=\"error\",\n        doublequote=True,\n        delim_whitespace=lib.no_default,\n        low_memory=True,\n        memory_map=False,\n        float_precision=None,\n        storage_options: StorageOptions = None,\n        dtype_backend=lib.no_default,\n    ) -> DataFrame:\n        # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args\n        _pd_read_csv_signature = {\n            val.name for val in inspect.signature(pandas.read_csv).parameters.values()\n        }\n        _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())\n        if f_locals.get(\"sep\", sep) is False:\n            f_locals[\"sep\"] = \"\\t\"\n\n        kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}\n        return _read(**kwargs)\n\n    parser_func.__doc__ = _read.__doc__\n    parser_func.__name__ = funcname\n    return expanduser_path_arg(\"filepath_or_buffer\")(parser_func)\n\n\ndef _read(**kwargs) -> DataFrame:\n    \"\"\"\n    General documentation is available in `modin.pandas.read_csv`.\n\n    This experimental feature provides parallel reading from multiple csv files which are\n    defined by glob pattern.\n\n    Parameters\n    ----------\n    **kwargs : dict\n        Keyword arguments in `modin.pandas.read_csv`.\n\n    Returns\n    -------\n    modin.DataFrame\n\n    Examples\n    --------\n    >>> import modin.experimental.pandas as pd\n    >>> df = pd.read_csv_glob(\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-1*\")\n    UserWarning: `read_*` implementation has mismatches with pandas:\n    Data types of partitions are different! Please refer to the troubleshooting section of the Modin documentation to fix this issue.\n            VendorID tpep_pickup_datetime  ... total_amount  congestion_surcharge\n    0             1.0  2020-10-01 00:09:08  ...         4.30                   0.0\n    1             1.0  2020-10-01 00:09:19  ...        13.30                   2.5\n    2             1.0  2020-10-01 00:30:00  ...        15.36                   2.5\n    3             2.0  2020-10-01 00:56:46  ...        -3.80                   0.0\n    4             2.0  2020-10-01 00:56:46  ...         3.80                   0.0\n    ...           ...                  ...  ...          ...                   ...\n    4652008       NaN  2020-12-31 23:44:35  ...        43.95                   2.5\n    4652009       NaN  2020-12-31 23:41:36  ...        20.17                   2.5\n    4652010       NaN  2020-12-31 23:01:17  ...        78.98                   0.0\n    4652011       NaN  2020-12-31 23:31:29  ...        39.50                   0.0\n    4652012       NaN  2020-12-31 23:12:48  ...        20.64                   0.0\n\n    [4652013 rows x 18 columns]\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    pd_obj = FactoryDispatcher.read_csv_glob(**kwargs)\n    # This happens when `read_csv` returns a TextFileReader object for iterating through\n    if isinstance(pd_obj, pandas.io.parsers.TextFileReader):\n        reader = pd_obj.read\n        pd_obj.read = lambda *args, **kwargs: DataFrame(\n            query_compiler=reader(*args, **kwargs)\n        )\n        return pd_obj\n\n    return DataFrame(query_compiler=pd_obj)\n\n\nread_csv_glob = _make_parser_func(sep=\",\", funcname=\"read_csv_glob\")\n\n\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_pickle_glob(\n    filepath_or_buffer,\n    compression: Optional[str] = \"infer\",\n    storage_options: StorageOptions = None,\n):\n    \"\"\"\n    Load pickled pandas object from files.\n\n    This experimental feature provides parallel reading from multiple pickle files which are\n    defined by glob pattern. The files must contain parts of one dataframe, which can be\n    obtained, for example, by `DataFrame.modin.to_pickle_glob` function.\n\n    Parameters\n    ----------\n    filepath_or_buffer : str, path object or file-like object\n        File path, URL, or buffer where the pickled object will be loaded from.\n        Accept URL. URL is not limited to S3 and GCS.\n    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default: 'infer'\n        If 'infer' and 'path_or_url' is path-like, then detect compression from\n        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no\n        compression) If 'infer' and 'path_or_url' is not path-like, then use\n        None (= no decompression).\n    storage_options : dict, optional\n        Extra options that make sense for a particular storage connection, e.g.\n        host, port, username, password, etc., if using a URL that will be parsed by\n        fsspec, e.g., starting \"s3://\", \"gcs://\". An error will be raised if providing\n        this argument with a non-fsspec URL. See the fsspec and backend storage\n        implementation docs for the set of allowed keys and values.\n\n    Returns\n    -------\n    unpickled : same type as object stored in file\n\n    Notes\n    -----\n    The number of partitions is equal to the number of input files.\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return DataFrame(query_compiler=FactoryDispatcher.read_pickle_glob(**kwargs))\n\n\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef to_pickle_glob(\n    self,\n    filepath_or_buffer,\n    compression: CompressionOptions = \"infer\",\n    protocol: int = pickle.HIGHEST_PROTOCOL,\n    storage_options: StorageOptions = None,\n) -> None:\n    \"\"\"\n    Pickle (serialize) object to file.\n\n    This experimental feature provides parallel writing into multiple pickle files which are\n    defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.\n\n    Parameters\n    ----------\n    filepath_or_buffer : str\n        File path where the pickled object will be stored.\n    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default: 'infer'\n        A string representing the compression to use in the output file. By\n        default, infers from the file extension in specified path.\n        Compression mode may be any of the following possible\n        values: {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}. If compression\n        mode is 'infer' and path_or_buf is path-like, then detect\n        compression mode from the following extensions:\n        '.gz', '.bz2', '.zip' or '.xz'. (otherwise no compression).\n        If dict given and mode is 'zip' or inferred as 'zip', other entries\n        passed as additional compression options.\n    protocol : int, default: pickle.HIGHEST_PROTOCOL\n        Int which indicates which protocol should be used by the pickler,\n        default HIGHEST_PROTOCOL (see `pickle docs <https://docs.python.org/3/library/pickle.html>`_\n        paragraph 12.1.2 for details). The possible  values are 0, 1, 2, 3, 4, 5. A negative value\n        for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL.\n    storage_options : dict, optional\n        Extra options that make sense for a particular storage connection, e.g.\n        host, port, username, password, etc., if using a URL that will be parsed by\n        fsspec, e.g., starting \"s3://\", \"gcs://\". An error will be raised if providing\n        this argument with a non-fsspec URL. See the fsspec and backend storage\n        implementation docs for the set of allowed keys and values.\n    \"\"\"\n    obj = self\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if isinstance(self, DataFrame):\n        obj = self._query_compiler\n    FactoryDispatcher.to_pickle_glob(\n        obj,\n        filepath_or_buffer=filepath_or_buffer,\n        compression=compression,\n        protocol=protocol,\n        storage_options=storage_options,\n    )\n\n\n@expanduser_path_arg(\"path\")\ndef read_parquet_glob(\n    path,\n    engine: str = \"auto\",\n    columns: list[str] | None = None,\n    storage_options: StorageOptions = None,\n    use_nullable_dtypes: bool = lib.no_default,\n    dtype_backend=lib.no_default,\n    filesystem=None,\n    filters=None,\n    **kwargs,\n) -> DataFrame:  # noqa: PR01\n    \"\"\"\n    Load a parquet object from the file path, returning a DataFrame.\n\n    This experimental feature provides parallel reading from multiple parquet files which are\n    defined by glob pattern. The files must contain parts of one dataframe, which can be\n    obtained, for example, by `DataFrame.modin.to_parquet_glob` function.\n\n    Returns\n    -------\n    DataFrame\n\n    Notes\n    -----\n    * Only string type supported for `path` argument.\n    * The rest of the arguments are the same as for `pandas.read_parquet`.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return DataFrame(\n        query_compiler=FactoryDispatcher.read_parquet_glob(\n            path=path,\n            engine=engine,\n            columns=columns,\n            storage_options=storage_options,\n            use_nullable_dtypes=use_nullable_dtypes,\n            dtype_backend=dtype_backend,\n            filesystem=filesystem,\n            filters=filters,\n            **kwargs,\n        )\n    )\n\n\n@expanduser_path_arg(\"path\")\ndef to_parquet_glob(\n    self,\n    path,\n    engine=\"auto\",\n    compression=\"snappy\",\n    index=None,\n    partition_cols=None,\n    storage_options: StorageOptions = None,\n    **kwargs,\n) -> None:  # noqa: PR01\n    \"\"\"\n    Write a DataFrame to the binary parquet format.\n\n    This experimental feature provides parallel writing into multiple parquet files which are\n    defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.\n\n    Notes\n    -----\n    * Only string type supported for `path` argument.\n    * The rest of the arguments are the same as for `pandas.to_parquet`.\n    \"\"\"\n    obj = self\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if isinstance(self, DataFrame):\n        obj = self._query_compiler\n    FactoryDispatcher.to_parquet_glob(\n        obj,\n        path=path,\n        engine=engine,\n        compression=compression,\n        index=index,\n        partition_cols=partition_cols,\n        storage_options=storage_options,\n        **kwargs,\n    )\n\n\n@expanduser_path_arg(\"path_or_buf\")\ndef read_json_glob(\n    path_or_buf,\n    *,\n    orient: str | None = None,\n    typ: Literal[\"frame\", \"series\"] = \"frame\",\n    dtype: DtypeArg | None = None,\n    convert_axes=None,\n    convert_dates: bool | list[str] = True,\n    keep_default_dates: bool = True,\n    precise_float: bool = False,\n    date_unit: str | None = None,\n    encoding: str | None = None,\n    encoding_errors: str | None = \"strict\",\n    lines: bool = False,\n    chunksize: int | None = None,\n    compression: CompressionOptions = \"infer\",\n    nrows: int | None = None,\n    storage_options: StorageOptions = None,\n    dtype_backend: Union[DtypeBackend, lib.NoDefault] = lib.no_default,\n    engine=\"ujson\",\n) -> DataFrame:  # noqa: PR01\n    \"\"\"\n    Convert a JSON string to pandas object.\n\n    This experimental feature provides parallel reading from multiple json files which are\n    defined by glob pattern. The files must contain parts of one dataframe, which can be\n    obtained, for example, by `DataFrame.modin.to_json_glob` function.\n\n    Returns\n    -------\n    DataFrame\n\n    Notes\n    -----\n    * Only string type supported for `path_or_buf` argument.\n    * The rest of the arguments are the same as for `pandas.read_json`.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if nrows is not None:\n        raise NotImplementedError(\n            \"`read_json_glob` only support nrows is None, otherwise use `to_json`.\"\n        )\n\n    return DataFrame(\n        query_compiler=FactoryDispatcher.read_json_glob(\n            path_or_buf=path_or_buf,\n            orient=orient,\n            typ=typ,\n            dtype=dtype,\n            convert_axes=convert_axes,\n            convert_dates=convert_dates,\n            keep_default_dates=keep_default_dates,\n            precise_float=precise_float,\n            date_unit=date_unit,\n            encoding=encoding,\n            encoding_errors=encoding_errors,\n            lines=lines,\n            chunksize=chunksize,\n            compression=compression,\n            nrows=nrows,\n            storage_options=storage_options,\n            dtype_backend=dtype_backend,\n            engine=engine,\n        )\n    )\n\n\n@expanduser_path_arg(\"path_or_buf\")\ndef to_json_glob(\n    self,\n    path_or_buf=None,\n    orient=None,\n    date_format=None,\n    double_precision=10,\n    force_ascii=True,\n    date_unit=\"ms\",\n    default_handler=None,\n    lines=False,\n    compression=\"infer\",\n    index=None,\n    indent=None,\n    storage_options: StorageOptions = None,\n    mode=\"w\",\n) -> None:  # noqa: PR01\n    \"\"\"\n    Convert the object to a JSON string.\n\n    Notes\n    -----\n    * Only string type supported for `path_or_buf` argument.\n    * The rest of the arguments are the same as for `pandas.to_json`.\n    \"\"\"\n    obj = self\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if isinstance(self, DataFrame):\n        obj = self._query_compiler\n    FactoryDispatcher.to_json_glob(\n        obj,\n        path_or_buf=path_or_buf,\n        orient=orient,\n        date_format=date_format,\n        double_precision=double_precision,\n        force_ascii=force_ascii,\n        date_unit=date_unit,\n        default_handler=default_handler,\n        lines=lines,\n        compression=compression,\n        index=index,\n        indent=indent,\n        storage_options=storage_options,\n        mode=mode,\n    )\n\n\n@expanduser_path_arg(\"path_or_buffer\")\ndef read_xml_glob(\n    path_or_buffer,\n    *,\n    xpath=\"./*\",\n    namespaces=None,\n    elems_only=False,\n    attrs_only=False,\n    names=None,\n    dtype=None,\n    converters=None,\n    parse_dates=None,\n    encoding=\"utf-8\",\n    parser=\"lxml\",\n    stylesheet=None,\n    iterparse=None,\n    compression=\"infer\",\n    storage_options: StorageOptions = None,\n    dtype_backend=lib.no_default,\n) -> DataFrame:  # noqa: PR01\n    \"\"\"\n    Read XML document into a DataFrame object.\n\n    This experimental feature provides parallel reading from multiple XML files which are\n    defined by glob pattern. The files must contain parts of one dataframe, which can be\n    obtained, for example, by `DataFrame.modin.to_xml_glob` function.\n\n    Returns\n    -------\n    DataFrame\n\n    Notes\n    -----\n    * Only string type supported for `path_or_buffer` argument.\n    * The rest of the arguments are the same as for `pandas.read_xml`.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return DataFrame(\n        query_compiler=FactoryDispatcher.read_xml_glob(\n            path_or_buffer=path_or_buffer,\n            xpath=xpath,\n            namespaces=namespaces,\n            elems_only=elems_only,\n            attrs_only=attrs_only,\n            names=names,\n            dtype=dtype,\n            converters=converters,\n            parse_dates=parse_dates,\n            encoding=encoding,\n            parser=parser,\n            stylesheet=stylesheet,\n            iterparse=iterparse,\n            compression=compression,\n            storage_options=storage_options,\n            dtype_backend=dtype_backend,\n        )\n    )\n\n\n@expanduser_path_arg(\"path_or_buffer\")\ndef to_xml_glob(\n    self,\n    path_or_buffer=None,\n    index=True,\n    root_name=\"data\",\n    row_name=\"row\",\n    na_rep=None,\n    attr_cols=None,\n    elem_cols=None,\n    namespaces=None,\n    prefix=None,\n    encoding=\"utf-8\",\n    xml_declaration=True,\n    pretty_print=True,\n    parser=\"lxml\",\n    stylesheet=None,\n    compression=\"infer\",\n    storage_options=None,\n) -> None:  # noqa: PR01\n    \"\"\"\n    Render a DataFrame to an XML document.\n\n    Notes\n    -----\n    * Only string type supported for `path_or_buffer` argument.\n    * The rest of the arguments are the same as for `pandas.to_xml`.\n    \"\"\"\n    obj = self\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if isinstance(self, DataFrame):\n        obj = self._query_compiler\n    FactoryDispatcher.to_xml_glob(\n        obj,\n        path_or_buffer=path_or_buffer,\n        index=index,\n        root_name=root_name,\n        row_name=row_name,\n        na_rep=na_rep,\n        attr_cols=attr_cols,\n        elem_cols=elem_cols,\n        namespaces=namespaces,\n        prefix=prefix,\n        encoding=encoding,\n        xml_declaration=xml_declaration,\n        pretty_print=pretty_print,\n        parser=parser,\n        stylesheet=stylesheet,\n        compression=compression,\n        storage_options=storage_options,\n    )\n"
  },
  {
    "path": "modin/experimental/sklearn/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module holds experimental scikit-learn specific functionality for Modin.\"\"\"\n"
  },
  {
    "path": "modin/experimental/sklearn/model_selection/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module holds model selection specific functionality.\"\"\"\n\nfrom .train_test_split import train_test_split\n\n__all__ = [\"train_test_split\"]\n"
  },
  {
    "path": "modin/experimental/sklearn/model_selection/train_test_split.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module holds `train_test_splt` function.\"\"\"\n\n\n# FIXME: Change `**options`-->`train_size=0.75`\ndef train_test_split(df, **options):\n    \"\"\"\n    Split input data to train and test data.\n\n    Parameters\n    ----------\n    df : modin.pandas.DataFrame / modin.pandas.Series\n        Data to split.\n    **options : dict\n        Keyword arguments. If `train_size` key isn't provided\n        `train_size` will be 0.75.\n\n    Returns\n    -------\n    tuple\n        A pair of modin.pandas.DataFrame / modin.pandas.Series.\n    \"\"\"\n    train_size = options.get(\"train_size\", 0.75)\n    train = df.iloc[: int(len(df) * train_size)]\n    test = df.iloc[len(train) :]\n    return train, test\n"
  },
  {
    "path": "modin/experimental/spreadsheet/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\ntry:\n    import modin_spreadsheet\nexcept ImportError:\n    raise ImportError(\n        'Please `pip install \"modin[spreadsheet]\"` to install the spreadsheet extension'\n    )\n\nfrom .general import from_dataframe, to_dataframe\n\n__all__ = [\"from_dataframe\", \"to_dataframe\"]\n\ndel modin_spreadsheet\n"
  },
  {
    "path": "modin/experimental/spreadsheet/general.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom modin_spreadsheet import SpreadsheetWidget, show_grid\n\nfrom .. import pandas as pd\n\n\ndef from_dataframe(\n    dataframe,\n    show_toolbar=None,\n    show_history=None,\n    precision=None,\n    grid_options=None,\n    column_options=None,\n    column_definitions=None,\n    row_edit_callback=None,\n):\n    \"\"\"\n    Renders a DataFrame or Series as an interactive spreadsheet, represented by\n    an instance of the ``SpreadsheetWidget`` class.  The ``SpreadsheetWidget`` instance\n    is constructed using the options passed in to this function.  The\n    ``dataframe`` argument to this function is used as the ``df`` kwarg in\n    call to the SpreadsheetWidget constructor, and the rest of the parameters\n    are passed through as is.\n\n    If the ``dataframe`` argument is a Series, it will be converted to a\n    DataFrame before being passed in to the SpreadsheetWidget constructor as the\n    ``df`` kwarg.\n\n    :rtype: SpreadsheetWidget\n\n    Parameters\n    ----------\n    dataframe : DataFrame\n        The DataFrame that will be displayed by this instance of\n        SpreadsheetWidget.\n    grid_options : dict\n        Options to use when creating the SlickGrid control (i.e. the\n        interactive grid).  See the Notes section below for more information\n        on the available options, as well as the default options that this\n        widget uses.\n    precision : integer\n        The number of digits of precision to display for floating-point\n        values.  If unset, we use the value of\n        `pandas.get_option('display.precision')`.\n    show_toolbar : bool\n        Whether to show a toolbar with options for adding/removing rows.\n        Adding/removing rows is an experimental feature which only works\n        with DataFrames that have an integer index.\n    show_history : bool\n        Whether to show the cell containing the spreadsheet transformation\n        history.\n    column_options : dict\n        Column options that are to be applied to every column. See the\n        Notes section below for more information on the available options,\n        as well as the default options that this widget uses.\n    column_definitions : dict\n        Column options that are to be applied to individual\n        columns. The keys of the dict should be the column names, and each\n        value should be the column options for a particular column,\n        represented as a dict. The available options for each column are the\n        same options that are available to be set for all columns via the\n        ``column_options`` parameter. See the Notes section below for more\n        information on those options.\n    row_edit_callback : callable\n        A callable that is called to determine whether a particular row\n        should be editable or not. Its signature should be\n        ``callable(row)``, where ``row`` is a dictionary which contains a\n        particular row's values, keyed by column name. The callback should\n        return True if the provided row should be editable, and False\n        otherwise.\n\n\n    Notes\n    -----\n    The following dictionary is used for ``grid_options`` if none are\n    provided explicitly::\n\n        {\n            # SlickGrid options\n            'fullWidthRows': True,\n            'syncColumnCellResize': True,\n            'forceFitColumns': False,\n            'defaultColumnWidth': 150,\n            'rowHeight': 28,\n            'enableColumnReorder': False,\n            'enableTextSelectionOnCells': True,\n            'editable': True,\n            'autoEdit': False,\n            'explicitInitialization': True,\n\n            # Modin-spreadsheet options\n            'maxVisibleRows': 15,\n            'minVisibleRows': 8,\n            'sortable': True,\n            'filterable': True,\n            'highlightSelectedCell': False,\n            'highlightSelectedRow': True\n        }\n\n    The first group of options are SlickGrid \"grid options\" which are\n    described in the `SlickGrid documentation\n    <https://github.com/mleibman/SlickGrid/wiki/Grid-Options>`__.\n\n    The second group of option are options that were added specifically\n    for modin-spreadsheet and therefore are not documented in the SlickGrid documentation.\n    The following bullet points describe these options.\n\n    * **maxVisibleRows** The maximum number of rows that modin-spreadsheet will show.\n    * **minVisibleRows** The minimum number of rows that modin-spreadsheet will show\n    * **sortable** Whether the modin-spreadsheet instance will allow the user to sort\n      columns by clicking the column headers. When this is set to ``False``,\n      nothing will happen when users click the column headers.\n    * **filterable** Whether the modin-spreadsheet instance will allow the user to filter\n      the grid. When this is set to ``False`` the filter icons won't be shown\n      for any columns.\n    * **highlightSelectedCell** If you set this to True, the selected cell\n      will be given a light blue border.\n    * **highlightSelectedRow** If you set this to False, the light blue\n      background that's shown by default for selected rows will be hidden.\n\n    The following dictionary is used for ``column_options`` if none are\n    provided explicitly::\n\n        {\n            # SlickGrid column options\n            'defaultSortAsc': True,\n            'maxWidth': None,\n            'minWidth': 30,\n            'resizable': True,\n            'sortable': True,\n            'toolTip': \"\",\n            'width': None\n\n            # Modin-spreadsheet column options\n            'editable': True,\n        }\n\n    The first group of options are SlickGrid \"column options\" which are\n    described in the `SlickGrid documentation\n    <https://github.com/mleibman/SlickGrid/wiki/Column-Options>`__.\n\n    The ``editable`` option was added specifically for modin-spreadsheet and therefore is\n    not documented in the SlickGrid documentation.  This option specifies\n    whether a column should be editable or not.\n\n    See Also\n    --------\n    set_defaults : Permanently set global defaults for the parameters\n                   of ``show_grid``, with the exception of the ``dataframe``\n                   and ``column_definitions`` parameters, since those\n                   depend on the particular set of data being shown by an\n                   instance, and therefore aren't parameters we would want\n                   to set for all SpreadsheetWidget instances.\n    set_grid_option : Permanently set global defaults for individual\n                      grid options.  Does so by changing the defaults\n                      that the ``show_grid`` method uses for the\n                      ``grid_options`` parameter.\n    SpreadsheetWidget : The widget class that is instantiated and returned by this\n                  method.\n\n    \"\"\"\n    if not isinstance(dataframe, pd.DataFrame):\n        raise TypeError(\"dataframe must be modin.DataFrame, not %s\" % type(dataframe))\n    return show_grid(\n        dataframe,\n        show_toolbar,\n        show_history,\n        precision,\n        grid_options,\n        column_options,\n        column_definitions,\n        row_edit_callback,\n    )\n\n\ndef to_dataframe(spreadsheet):\n    \"\"\"\n    Get a copy of the DataFrame that reflects the current state of the ``spreadsheet`` SpreadsheetWidget instance UI.\n    This includes any sorting or filtering changes, as well as edits\n    that have been made by double clicking cells.\n\n    :rtype: DataFrame\n\n    Parameters\n    ----------\n    spreadsheet : SpreadsheetWidget\n        The SpreadsheetWidget instance that DataFrame that will be displayed by this instance of\n        SpreadsheetWidget.\n    \"\"\"\n    if not isinstance(spreadsheet, SpreadsheetWidget):\n        raise TypeError(\n            \"spreadsheet must be modin_spreadsheet.SpreadsheetWidget, not %s\"\n            % type(spreadsheet)\n        )\n    return spreadsheet.get_changed_df()\n"
  },
  {
    "path": "modin/experimental/torch/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module supports conversion for torch `DataLoader` interplay.\"\"\"\n"
  },
  {
    "path": "modin/experimental/torch/datasets.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\nfrom __future__ import annotations\n\nimport math\nfrom typing import Hashable, Sequence, Type\n\nfrom pandas import DataFrame\nfrom torch.utils.data import Sampler, SequentialSampler\n\nfrom modin.pandas import DataFrame as ModinDataFrame\n\n\nclass ModinDataLoader:\n    \"A self explainatory class to convert a DataFrame into a DataLoader that batches rows.\"\n\n    def __init__(\n        self,\n        df: DataFrame | ModinDataFrame,\n        batch_size: int,\n        features: Sequence[Hashable] = (),\n        sampler: Type[Sampler] | Sampler = SequentialSampler,\n    ) -> None:\n        \"\"\"\n        Converts a Pandas/Modin DataFrame into a torch DataLoader.\n\n        NOTE: This function should eventually go into modin/utils.py.\n\n        Parameters\n        ----------\n        df : DataFrame\n\n        batch_size : int, default: 1\n\n        features : Sequence[Hashable], default: ()\n            If specified, only these features will be used.\n\n        sampler: Type[Sampler] | Sampler, default: SequentialSampler\n            The sampler to use. By default, iterates over the DataFrame in order.\n\n        Returns\n        -------\n        DataLoader\n            DataLoader object backed by desired data.\n        \"\"\"\n\n        if features:\n            df = df[features]\n\n        if isinstance(sampler, type):\n            sampler = sampler(df)\n\n        self._df = df\n        self._batch_size = batch_size\n        self._sampler = sampler\n\n    def __len__(self):\n        # Sampler length is always valid.\n        return math.ceil(len(self._sampler) / self._batch_size)\n\n    def __iter__(self):\n        idx_buffer = []\n\n        for cnt, idx in enumerate(self._sampler):\n            idx_buffer.append(idx)\n\n            if self._end_of_batch(cnt):\n                yield self._df.iloc[idx_buffer].to_numpy()\n                idx_buffer = []\n\n    def _end_of_batch(self, counter: int):\n        return (\n            counter % self._batch_size == self._batch_size - 1\n            or counter == len(self._sampler) - 1\n        )\n"
  },
  {
    "path": "modin/experimental/xgboost/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module holds public interfaces for Modin XGBoost.\"\"\"\n\nfrom .xgboost import Booster, DMatrix, train\n\n__all__ = [\"DMatrix\", \"Booster\", \"train\"]\n"
  },
  {
    "path": "modin/experimental/xgboost/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module holds classes for work with Rabit all-reduce context.\"\"\"\n\nimport logging\n\nimport xgboost as xgb\n\nLOGGER = logging.getLogger(\"[modin.xgboost]\")\n\n\nclass RabitContextManager:\n    \"\"\"\n    A manager class that controls lifecycle of `xgb.RabitTracker`.\n\n    All workers that are used for distributed training will connect to\n    Rabit Tracker stored in this class.\n\n    Parameters\n    ----------\n    num_workers : int\n        Number of workers of `self.rabit_tracker`.\n    host_ip : str\n        IP address of host that creates `self` object.\n    \"\"\"\n\n    # TODO: Specify type of host_ip\n    def __init__(self, num_workers: int, host_ip):\n        self._num_workers = num_workers\n        self.env = {\"DMLC_NUM_WORKER\": self._num_workers}\n        self.rabit_tracker = xgb.RabitTracker(\n            host_ip=host_ip, n_workers=self._num_workers\n        )\n\n    def __enter__(self):\n        \"\"\"\n        Entry point of manager.\n\n        Updates Rabit Tracker environment, starts `self.rabit_tracker`.\n\n        Returns\n        -------\n        dict\n            Dict with Rabit Tracker environment.\n        \"\"\"\n        self.env.update(self.rabit_tracker.worker_envs())\n        self.rabit_tracker.start(self._num_workers)\n        return self.env\n\n    # TODO: (type, value, traceback) -> *args\n    def __exit__(self, type, value, traceback):\n        \"\"\"\n        Exit point of manager.\n\n        Finishes `self.rabit_tracker`.\n\n        Parameters\n        ----------\n        type : exception type\n            Type of exception, captured  by manager.\n        value : Exception\n            Exception value.\n        traceback : TracebackType\n            Traceback of exception.\n        \"\"\"\n        self.rabit_tracker.join()\n\n\nclass RabitContext:\n    \"\"\"\n    Context to connect a worker to a rabit tracker.\n\n    Parameters\n    ----------\n    actor_rank : int\n        Rank of actor, connected to this context.\n    args : list\n        List with environment variables for Rabit Tracker.\n    \"\"\"\n\n    def __init__(self, actor_rank, args):\n        self.args = args\n        self.args.append((\"DMLC_TASK_ID=[modin.xgboost]:\" + str(actor_rank)).encode())\n\n    def __enter__(self):\n        \"\"\"\n        Entry point of context.\n\n        Connects to Rabit Tracker.\n        \"\"\"\n        xgb.rabit.init(self.args)\n        LOGGER.info(\"-------------- rabit started ------------------\")\n\n    def __exit__(self, *args):\n        \"\"\"\n        Exit point of context.\n\n        Disconnects from Rabit Tracker.\n\n        Parameters\n        ----------\n        *args : iterable\n            Parameters for Exception capturing.\n        \"\"\"\n        xgb.rabit.finalize()\n        LOGGER.info(\"-------------- rabit finished ------------------\")\n"
  },
  {
    "path": "modin/experimental/xgboost/xgboost.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module holds public interfaces for work Modin XGBoost.\"\"\"\n\nimport logging\nfrom typing import Dict, Optional\n\nimport xgboost as xgb\n\nimport modin.pandas as pd\nfrom modin.config import Engine\nfrom modin.distributed.dataframe.pandas import unwrap_partitions\n\nLOGGER = logging.getLogger(\"[modin.xgboost]\")\n\n\nclass DMatrix:\n    \"\"\"\n    DMatrix holds references to partitions of Modin DataFrame.\n\n    On init stage unwrapping partitions of Modin DataFrame is started.\n\n    Parameters\n    ----------\n    data : modin.pandas.DataFrame\n        Data source of DMatrix.\n    label : modin.pandas.DataFrame or modin.pandas.Series, optional\n        Labels used for training.\n    missing : float, optional\n        Value in the input data which needs to be present as a missing\n        value. If ``None``, defaults to ``np.nan``.\n    silent : boolean, optional\n        Whether to print messages during construction or not.\n    feature_names : list, optional\n        Set names for features.\n    feature_types : list, optional\n        Set types for features.\n    feature_weights : array_like, optional\n        Set feature weights for column sampling.\n    enable_categorical : boolean, optional\n        Experimental support of specializing for categorical features.\n\n    Notes\n    -----\n    Currently DMatrix doesn't support `weight`, `base_margin`, `nthread`,\n    `group`, `qid`, `label_lower_bound`, `label_upper_bound` parameters.\n    \"\"\"\n\n    def __init__(\n        self,\n        data,\n        label=None,\n        missing=None,\n        silent=False,\n        feature_names=None,\n        feature_types=None,\n        feature_weights=None,\n        enable_categorical=None,\n    ):\n        assert isinstance(\n            data, pd.DataFrame\n        ), f\"Type of `data` is {type(data)}, but expected {pd.DataFrame}.\"\n\n        if label is not None:\n            assert isinstance(\n                label, (pd.DataFrame, pd.Series)\n            ), f\"Type of `data` is {type(label)}, but expected {pd.DataFrame} or {pd.Series}.\"\n            self.label = unwrap_partitions(label, axis=0)\n        else:\n            self.label = None\n\n        self.data = unwrap_partitions(data, axis=0, get_ip=True)\n\n        self._n_rows = data.shape[0]\n        self._n_cols = data.shape[1]\n\n        for i, dtype in enumerate(data.dtypes):\n            if dtype == \"object\":\n                raise ValueError(f\"Column {i} has unsupported data type {dtype}.\")\n\n        self.feature_names = feature_names\n        self.feature_types = feature_types\n\n        self.missing = missing\n        self.silent = silent\n        self.feature_weights = feature_weights\n        self.enable_categorical = enable_categorical\n\n        self.metadata = (\n            data.index,\n            data.columns,\n            data._query_compiler._modin_frame.row_lengths,\n        )\n\n    def __iter__(self):\n        \"\"\"\n        Return unwrapped `self.data` and `self.label`.\n\n        Yields\n        ------\n        list\n            List of `self.data` with pairs of references to IP of row partition\n            and row partition [(IP_ref0, partition_ref0), ..].\n        list\n            List of `self.label` with references to row partitions\n            [partition_ref0, ..].\n        \"\"\"\n        yield self.data\n        yield self.label\n\n    def get_dmatrix_params(self):\n        \"\"\"\n        Get dict of DMatrix parameters excluding `self.data`/`self.label`.\n\n        Returns\n        -------\n        dict\n        \"\"\"\n        dmatrix_params = {\n            \"feature_names\": self.feature_names,\n            \"feature_types\": self.feature_types,\n            \"missing\": self.missing,\n            \"silent\": self.silent,\n            \"feature_weights\": self.feature_weights,\n            \"enable_categorical\": self.enable_categorical,\n        }\n        return dmatrix_params\n\n    @property\n    def feature_names(self):\n        \"\"\"\n        Get column labels.\n\n        Returns\n        -------\n        Column labels.\n        \"\"\"\n        return self._feature_names\n\n    @feature_names.setter\n    def feature_names(self, feature_names):\n        \"\"\"\n        Set column labels.\n\n        Parameters\n        ----------\n        feature_names : list or None\n            Labels for columns. In the case of ``None``, existing feature names will be reset.\n        \"\"\"\n        if feature_names is not None:\n            feature_names = (\n                list(feature_names)\n                if not isinstance(feature_names, str)\n                else [feature_names]\n            )\n\n            if len(feature_names) != len(set(feature_names)):\n                raise ValueError(\"Items in `feature_names` must be unique.\")\n            if len(feature_names) != self.num_col() and self.num_col() != 0:\n                raise ValueError(\n                    \"`feature_names` must have the same width as `self.data`.\"\n                )\n            if not all(\n                isinstance(f, str) and not any(x in f for x in set((\"[\", \"]\", \"<\")))\n                for f in feature_names\n            ):\n                raise ValueError(\n                    \"Items of `feature_names` must be string and must not contain [, ] or <.\"\n                )\n        else:\n            feature_names = None\n        self._feature_names = feature_names\n\n    @property\n    def feature_types(self):\n        \"\"\"\n        Get column types.\n\n        Returns\n        -------\n        Column types.\n        \"\"\"\n        return self._feature_types\n\n    @feature_types.setter\n    def feature_types(self, feature_types):\n        \"\"\"\n        Set column types.\n\n        Parameters\n        ----------\n        feature_types : list or None\n            Labels for columns. In case None, existing feature names will be reset.\n        \"\"\"\n        if feature_types is not None:\n            if not isinstance(feature_types, (list, str)):\n                raise TypeError(\"feature_types must be string or list of strings\")\n            if isinstance(feature_types, str):\n                feature_types = [feature_types] * self.num_col()\n                feature_types = (\n                    list(feature_types)\n                    if not isinstance(feature_types, str)\n                    else [feature_types]\n                )\n        else:\n            feature_types = None\n        self._feature_types = feature_types\n\n    def num_row(self):\n        \"\"\"\n        Get number of rows.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        return self._n_rows\n\n    def num_col(self):\n        \"\"\"\n        Get number of columns.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        return self._n_cols\n\n    def get_float_info(self, name):\n        \"\"\"\n        Get float property from the DMatrix.\n\n        Parameters\n        ----------\n        name : str\n            The field name of the information.\n\n        Returns\n        -------\n        A NumPy array of float information of the data.\n        \"\"\"\n        return getattr(self, name)\n\n    def set_info(\n        self,\n        *,\n        label=None,\n        feature_names=None,\n        feature_types=None,\n        feature_weights=None,\n    ) -> None:\n        \"\"\"\n        Set meta info for DMatrix.\n\n        Parameters\n        ----------\n        label : modin.pandas.DataFrame or modin.pandas.Series, optional\n            Labels used for training.\n        feature_names : list, optional\n            Set names for features.\n        feature_types : list, optional\n            Set types for features.\n        feature_weights : array_like, optional\n            Set feature weights for column sampling.\n        \"\"\"\n        if label is not None:\n            self.label = label\n        if feature_names is not None:\n            self.feature_names = feature_names\n        if feature_types is not None:\n            self.feature_types = feature_types\n        if feature_weights is not None:\n            self.feature_weights = feature_weights\n\n\nclass Booster(xgb.Booster):\n    \"\"\"\n    A Modin Booster of XGBoost.\n\n    Booster is the model of XGBoost, that contains low level routines for\n    training, prediction and evaluation.\n\n    Parameters\n    ----------\n    params : dict, optional\n        Parameters for boosters.\n    cache : list, default: empty\n        List of cache items.\n    model_file : string/os.PathLike/xgb.Booster/bytearray, optional\n        Path to the model file if it's string or PathLike or xgb.Booster.\n    \"\"\"\n\n    def __init__(self, params=None, cache=(), model_file=None):  # noqa: MD01\n        super(Booster, self).__init__(params=params, cache=cache, model_file=model_file)\n\n    def predict(\n        self,\n        data: DMatrix,\n        **kwargs,\n    ):\n        \"\"\"\n        Run distributed prediction with a trained booster.\n\n        During execution it runs ``xgb.predict`` on each worker for subset of `data`\n        and creates Modin DataFrame with prediction results.\n\n        Parameters\n        ----------\n        data : modin.experimental.xgboost.DMatrix\n            Input data used for prediction.\n        **kwargs : dict\n            Other parameters are the same as for ``xgboost.Booster.predict``.\n\n        Returns\n        -------\n        modin.pandas.DataFrame\n            Modin DataFrame with prediction results.\n        \"\"\"\n        LOGGER.info(\"Prediction started\")\n\n        if Engine.get() == \"Ray\":\n            from .xgboost_ray import _predict\n        else:\n            raise ValueError(\"Current version supports only Ray engine.\")\n\n        assert isinstance(\n            data, DMatrix\n        ), f\"Type of `data` is {type(data)}, but expected {DMatrix}.\"\n\n        if (\n            self.feature_names is not None\n            and data.feature_names is not None\n            and self.feature_names != data.feature_names\n        ):\n            data_missing = set(self.feature_names) - set(data.feature_names)\n            self_missing = set(data.feature_names) - set(self.feature_names)\n\n            msg = \"feature_names mismatch: {0} {1}\"\n\n            if data_missing:\n                msg += (\n                    \"\\nexpected \"\n                    + \", \".join(str(s) for s in data_missing)\n                    + \" in input data\"\n                )\n\n            if self_missing:\n                msg += (\n                    \"\\ntraining data did not have the following fields: \"\n                    + \", \".join(str(s) for s in self_missing)\n                )\n\n            raise ValueError(msg.format(self.feature_names, data.feature_names))\n\n        result = _predict(self.copy(), data, **kwargs)\n        LOGGER.info(\"Prediction finished\")\n\n        return result\n\n\ndef train(\n    params: Dict,\n    dtrain: DMatrix,\n    *args,\n    evals=(),\n    num_actors: Optional[int] = None,\n    evals_result: Optional[Dict] = None,\n    **kwargs,\n):\n    \"\"\"\n    Run distributed training of XGBoost model.\n\n    During work it evenly distributes `dtrain` between workers according\n    to IP addresses partitions (in case of not even distribution of `dtrain`\n    over nodes, some partitions will be re-distributed between nodes),\n    runs xgb.train on each worker for subset of `dtrain` and reduces training results\n    of each worker using Rabit Context.\n\n    Parameters\n    ----------\n    params : dict\n        Booster params.\n    dtrain : modin.experimental.xgboost.DMatrix\n        Data to be trained against.\n    *args : iterable\n        Other parameters for `xgboost.train`.\n    evals : list of pairs (modin.experimental.xgboost.DMatrix, str), default: empty\n        List of validation sets for which metrics will evaluated during training.\n        Validation metrics will help us track the performance of the model.\n    num_actors : int, optional\n        Number of actors for training. If unspecified, this value will be\n        computed automatically.\n    evals_result : dict, optional\n        Dict to store evaluation results in.\n    **kwargs : dict\n        Other parameters are the same as `xgboost.train`.\n\n    Returns\n    -------\n    modin.experimental.xgboost.Booster\n        A trained booster.\n    \"\"\"\n    LOGGER.info(\"Training started\")\n\n    if Engine.get() == \"Ray\":\n        from .xgboost_ray import _train\n    else:\n        raise ValueError(\"Current version supports only Ray engine.\")\n\n    assert isinstance(\n        dtrain, DMatrix\n    ), f\"Type of `dtrain` is {type(dtrain)}, but expected {DMatrix}.\"\n    result = _train(dtrain, params, *args, num_actors=num_actors, evals=evals, **kwargs)\n    if isinstance(evals_result, dict):\n        evals_result.update(result[\"history\"])\n\n    LOGGER.info(\"Training finished\")\n    return Booster(model_file=result[\"booster\"])\n"
  },
  {
    "path": "modin/experimental/xgboost/xgboost_ray.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule holds internal entities for Modin XGBoost on Ray engine.\n\nClass ModinXGBoostActor provides interfaces to run XGBoost operations\non remote workers. Other functions create Ray actors, distribute data between them, etc.\n\"\"\"\n\nimport logging\nimport math\nimport time\nimport warnings\nfrom collections import defaultdict\nfrom typing import Dict, List\n\nimport numpy as np\nimport pandas\nimport ray\nimport xgboost as xgb\nfrom ray.util import get_node_ip_address\n\nfrom modin.core.execution.ray.common import RayWrapper\nfrom modin.distributed.dataframe.pandas import from_partitions\n\nfrom .utils import RabitContext, RabitContextManager\n\nLOGGER = logging.getLogger(\"[modin.xgboost]\")\n\n\n@ray.remote(num_cpus=0)\nclass ModinXGBoostActor:\n    \"\"\"\n    Ray actor-class runs training on the remote worker.\n\n    Parameters\n    ----------\n    rank : int\n        Rank of this actor.\n    nthread : int\n        Number of threads used by XGBoost in this actor.\n    \"\"\"\n\n    def __init__(self, rank, nthread):\n        self._evals = []\n        self._rank = rank\n        self._nthreads = nthread\n\n        LOGGER.info(\n            f\"Actor <{self._rank}>, nthread = {self._nthreads} was initialized.\"\n        )\n\n    def _get_dmatrix(self, X_y, **dmatrix_kwargs):\n        \"\"\"\n        Create xgboost.DMatrix from sequence of pandas.DataFrame objects.\n\n        First half of `X_y` should contains objects for `X`, second for `y`.\n\n        Parameters\n        ----------\n        X_y : list\n            List of pandas.DataFrame objects.\n        **dmatrix_kwargs : dict\n            Keyword parameters for ``xgb.DMatrix``.\n\n        Returns\n        -------\n        xgb.DMatrix\n            A XGBoost DMatrix.\n        \"\"\"\n        s = time.time()\n        X = X_y[: len(X_y) // 2]\n        y = X_y[len(X_y) // 2 :]\n\n        assert (\n            len(X) == len(y) and len(X) > 0\n        ), \"X and y should have the equal length more than 0\"\n\n        X = pandas.concat(X, axis=0)\n        y = pandas.concat(y, axis=0)\n        LOGGER.info(f\"Concat time: {time.time() - s} s\")\n\n        return xgb.DMatrix(X, y, nthread=self._nthreads, **dmatrix_kwargs)\n\n    def set_train_data(self, *X_y, add_as_eval_method=None, **dmatrix_kwargs):\n        \"\"\"\n        Set train data for actor.\n\n        Parameters\n        ----------\n        *X_y : iterable\n            Sequence of ray.ObjectRef objects. First half of sequence is for\n            `X` data, second for `y`. When it is passed in actor, auto-materialization\n            of ray.ObjectRef -> pandas.DataFrame happens.\n        add_as_eval_method : str, optional\n            Name of eval data. Used in case when train data also used for evaluation.\n        **dmatrix_kwargs : dict\n            Keyword parameters for ``xgb.DMatrix``.\n        \"\"\"\n        self._dtrain = self._get_dmatrix(X_y, **dmatrix_kwargs)\n\n        if add_as_eval_method is not None:\n            self._evals.append((self._dtrain, add_as_eval_method))\n\n    def add_eval_data(self, *X_y, eval_method, **dmatrix_kwargs):\n        \"\"\"\n        Add evaluation data for actor.\n\n        Parameters\n        ----------\n        *X_y : iterable\n            Sequence of ray.ObjectRef objects. First half of sequence is for\n            `X` data, second for `y`. When it is passed in actor, auto-materialization\n            of ray.ObjectRef -> pandas.DataFrame happens.\n        eval_method : str\n            Name of eval data.\n        **dmatrix_kwargs : dict\n            Keyword parameters for ``xgb.DMatrix``.\n        \"\"\"\n        self._evals.append((self._get_dmatrix(X_y, **dmatrix_kwargs), eval_method))\n\n    def train(self, rabit_args, params, *args, **kwargs):\n        \"\"\"\n        Run local XGBoost training.\n\n        Connects to Rabit Tracker environment to share training data between\n        actors and trains XGBoost booster using `self._dtrain`.\n\n        Parameters\n        ----------\n        rabit_args : list\n            List with environment variables for Rabit Tracker.\n        params : dict\n            Booster params.\n        *args : iterable\n            Other parameters for `xgboost.train`.\n        **kwargs : dict\n            Other parameters for `xgboost.train`.\n\n        Returns\n        -------\n        dict\n            A dictionary with trained booster and dict of\n            evaluation results\n            as {\"booster\": xgb.Booster, \"history\": dict}.\n        \"\"\"\n        local_params = params.copy()\n        local_dtrain = self._dtrain\n        local_evals = self._evals\n\n        local_params[\"nthread\"] = self._nthreads\n\n        evals_result = dict()\n\n        s = time.time()\n        with RabitContext(self._rank, rabit_args):\n            bst = xgb.train(\n                local_params,\n                local_dtrain,\n                *args,\n                evals=local_evals,\n                evals_result=evals_result,\n                **kwargs,\n            )\n            LOGGER.info(f\"Local training time: {time.time() - s} s\")\n            return {\"booster\": bst, \"history\": evals_result}\n\n\ndef _get_cluster_cpus():\n    \"\"\"\n    Get number of CPUs available on Ray cluster.\n\n    Returns\n    -------\n    int\n        Number of CPUs available on cluster.\n    \"\"\"\n    return ray.cluster_resources().get(\"CPU\", 1)\n\n\ndef _get_min_cpus_per_node():\n    \"\"\"\n    Get min number of node CPUs available on cluster nodes.\n\n    Returns\n    -------\n    int\n        Min number of CPUs per node.\n    \"\"\"\n    # TODO: max_node_cpus -> min_node_cpus\n    max_node_cpus = min(\n        node.get(\"Resources\", {}).get(\"CPU\", 0.0) for node in ray.nodes()\n    )\n    return max_node_cpus if max_node_cpus > 0.0 else _get_cluster_cpus()\n\n\ndef _get_cpus_per_actor(num_actors):\n    \"\"\"\n    Get number of CPUs to use by each actor.\n\n    Parameters\n    ----------\n    num_actors : int\n        Number of Ray actors.\n\n    Returns\n    -------\n    int\n        Number of CPUs per actor.\n    \"\"\"\n    cluster_cpus = _get_cluster_cpus()\n    cpus_per_actor = max(\n        1, min(int(_get_min_cpus_per_node() or 1), int(cluster_cpus // num_actors))\n    )\n    return cpus_per_actor\n\n\ndef _get_num_actors(num_actors=None):\n    \"\"\"\n    Get number of actors to create.\n\n    Parameters\n    ----------\n    num_actors : int, optional\n        Desired number of actors. If is None, integer number of actors\n        will be computed by condition 2 CPUs per 1 actor.\n\n    Returns\n    -------\n    int\n        Number of actors to create.\n    \"\"\"\n    min_cpus_per_node = _get_min_cpus_per_node()\n    if num_actors is None:\n        num_actors_per_node = max(1, int(min_cpus_per_node // 2))\n        return num_actors_per_node * len(ray.nodes())\n    elif isinstance(num_actors, int):\n        assert (\n            num_actors % len(ray.nodes()) == 0\n        ), \"`num_actors` must be a multiple to number of nodes in Ray cluster.\"\n        return num_actors\n    else:\n        RuntimeError(\"`num_actors` must be int or None\")\n\n\ndef create_actors(num_actors):\n    \"\"\"\n    Create ModinXGBoostActors.\n\n    Parameters\n    ----------\n    num_actors : int\n        Number of actors to create.\n\n    Returns\n    -------\n    list\n        List of pairs (ip, actor).\n    \"\"\"\n    num_cpus_per_actor = _get_cpus_per_actor(num_actors)\n    # starting from ray 2.6 there is a new field: 'node:__internal_head__'\n    # example:\n    # >>> ray.cluster_resources()\n    # {'object_store_memory': 1036438732.0, 'memory': 2072877467.0, 'node:127.0.0.1': 1.0, 'CPU': 8.0, 'node:__internal_head__': 1.0}\n    node_ips = [\n        key\n        for key in ray.cluster_resources().keys()\n        if key.startswith(\"node:\") and \"__internal_head__\" not in key\n    ]\n\n    num_actors_per_node = max(num_actors // len(node_ips), 1)\n    actors_ips = [ip for ip in node_ips for _ in range(num_actors_per_node)]\n\n    actors = [\n        (\n            node_ip.split(\"node:\")[-1],\n            ModinXGBoostActor.options(resources={node_ip: 0.01}).remote(\n                i, nthread=num_cpus_per_actor\n            ),\n        )\n        for i, node_ip in enumerate(actors_ips)\n    ]\n    return actors\n\n\ndef _split_data_across_actors(\n    actors: List,\n    set_func,\n    X_parts,\n    y_parts,\n):\n    \"\"\"\n    Split row partitions of data between actors.\n\n    Parameters\n    ----------\n    actors : list\n        List of used actors.\n    set_func : callable\n        The function for setting data in actor.\n    X_parts : list\n        Row partitions of X data.\n    y_parts : list\n        Row partitions of y data.\n    \"\"\"\n    X_parts_by_actors = _assign_row_partitions_to_actors(\n        actors,\n        X_parts,\n    )\n\n    y_parts_by_actors = _assign_row_partitions_to_actors(\n        actors,\n        y_parts,\n        data_for_aligning=X_parts_by_actors,\n    )\n\n    for rank, (_, actor) in enumerate(actors):\n        set_func(actor, *(X_parts_by_actors[rank][0] + y_parts_by_actors[rank][0]))\n\n\ndef _assign_row_partitions_to_actors(\n    actors: List,\n    row_partitions,\n    data_for_aligning=None,\n):\n    \"\"\"\n    Assign row_partitions to actors.\n\n    `row_partitions` will be assigned to actors according to their IPs.\n    If distribution isn't even, partitions will be moved from actor\n    with excess partitions to actor with lack of them.\n\n    Parameters\n    ----------\n    actors : list\n        List of used actors.\n    row_partitions : list\n        Row partitions of data to assign.\n    data_for_aligning : dict, optional\n        Data according to the order of which should be\n        distributed `row_partitions`. Used to align y with X.\n\n    Returns\n    -------\n    dict\n        Dictionary of assigned to actors partitions\n        as {actor_rank: (partitions, order)}.\n    \"\"\"\n    num_actors = len(actors)\n    if data_for_aligning is None:\n        parts_ips_ref, parts_ref = zip(*row_partitions)\n\n        # Group actors which are one the same ip\n        actor_ips = defaultdict(list)\n        for rank, (ip, _) in enumerate(actors):\n            actor_ips[ip].append(rank)\n\n        # Get distribution of parts between nodes ({ip:[(part, position),..],..})\n        init_parts_distribution = defaultdict(list)\n        for idx, (ip, part_ref) in enumerate(\n            zip(RayWrapper.materialize(list(parts_ips_ref)), parts_ref)\n        ):\n            init_parts_distribution[ip].append((part_ref, idx))\n\n        num_parts = len(parts_ref)\n        min_parts_per_actor = math.floor(num_parts / num_actors)\n        max_parts_per_actor = math.ceil(num_parts / num_actors)\n        num_actors_with_max_parts = num_parts % num_actors\n\n        row_partitions_by_actors = defaultdict(list)\n        # Fill actors without movement parts between ips\n        for actor_ip, ranks in actor_ips.items():\n            # Loop across actors which are placed on actor_ip\n            for rank in ranks:\n                num_parts_on_ip = len(init_parts_distribution[actor_ip])\n\n                # Check that have something to distribute on this ip\n                if num_parts_on_ip == 0:\n                    break\n                # Check that node with `actor_ip` has enough parts for minimal\n                # filling actor with `rank`\n                if num_parts_on_ip >= min_parts_per_actor:\n                    # Check that node has enough parts for max filling\n                    # actor with `rank`\n                    if (\n                        num_parts_on_ip >= max_parts_per_actor\n                        and num_actors_with_max_parts > 0\n                    ):\n                        pop_slice = slice(0, max_parts_per_actor)\n                        num_actors_with_max_parts -= 1\n                    else:\n                        pop_slice = slice(0, min_parts_per_actor)\n\n                    row_partitions_by_actors[rank].extend(\n                        init_parts_distribution[actor_ip][pop_slice]\n                    )\n                    # Delete parts which we already assign\n                    del init_parts_distribution[actor_ip][pop_slice]\n                else:\n                    row_partitions_by_actors[rank].extend(\n                        init_parts_distribution[actor_ip]\n                    )\n                    init_parts_distribution[actor_ip] = []\n\n        # Remove empty IPs\n        for ip in list(init_parts_distribution):\n            if len(init_parts_distribution[ip]) == 0:\n                init_parts_distribution.pop(ip)\n\n        # IP's aren't necessary now\n        init_parts_distribution = [\n            pair for pairs in init_parts_distribution.values() for pair in pairs\n        ]\n\n        # Fill the actors with extra parts (movements data between nodes)\n        for rank in range(len(actors)):\n            num_parts_on_rank = len(row_partitions_by_actors[rank])\n\n            if num_parts_on_rank == max_parts_per_actor or (\n                num_parts_on_rank == min_parts_per_actor\n                and num_actors_with_max_parts == 0\n            ):\n                continue\n\n            if num_actors_with_max_parts > 0:\n                pop_slice = slice(0, max_parts_per_actor - num_parts_on_rank)\n                num_actors_with_max_parts -= 1\n            else:\n                pop_slice = slice(0, min_parts_per_actor - num_parts_on_rank)\n\n            row_partitions_by_actors[rank].extend(init_parts_distribution[pop_slice])\n            del init_parts_distribution[pop_slice]\n\n        if len(init_parts_distribution) != 0:\n            raise RuntimeError(\n                f\"Not all partitions were ditributed between actors: {len(init_parts_distribution)} left.\"\n            )\n\n        row_parts_by_ranks = dict()\n        for rank, pairs_part_pos in dict(row_partitions_by_actors).items():\n            parts, order = zip(*pairs_part_pos)\n            row_parts_by_ranks[rank] = (list(parts), list(order))\n    else:\n        row_parts_by_ranks = {rank: ([], []) for rank in range(len(actors))}\n\n        for rank, (_, order_of_indexes) in data_for_aligning.items():\n            row_parts_by_ranks[rank][1].extend(order_of_indexes)\n            for row_idx in order_of_indexes:\n                row_parts_by_ranks[rank][0].append(row_partitions[row_idx])\n\n    return row_parts_by_ranks\n\n\ndef _train(\n    dtrain,\n    params: Dict,\n    *args,\n    num_actors=None,\n    evals=(),\n    **kwargs,\n):\n    \"\"\"\n    Run distributed training of XGBoost model on Ray engine.\n\n    During work it evenly distributes `dtrain` between workers according\n    to IP addresses partitions (in case of not even distribution of `dtrain`\n    by nodes, part of partitions will be re-distributed between nodes),\n    runs xgb.train on each worker for subset of `dtrain` and reduces training results\n    of each worker using Rabit Context.\n\n    Parameters\n    ----------\n    dtrain : modin.experimental.DMatrix\n        Data to be trained against.\n    params : dict\n        Booster params.\n    *args : iterable\n        Other parameters for `xgboost.train`.\n    num_actors : int, optional\n        Number of actors for training. If unspecified, this value will be\n        computed automatically.\n    evals : list of pairs (modin.experimental.xgboost.DMatrix, str), default: empty\n        List of validation sets for which metrics will be evaluated during training.\n        Validation metrics will help us track the performance of the model.\n    **kwargs : dict\n        Other parameters are the same as `xgboost.train`.\n\n    Returns\n    -------\n    dict\n        A dictionary with trained booster and dict of\n        evaluation results\n        as {\"booster\": xgboost.Booster, \"history\": dict}.\n    \"\"\"\n    s = time.time()\n\n    X_row_parts, y_row_parts = dtrain\n    dmatrix_kwargs = dtrain.get_dmatrix_params()\n\n    assert len(X_row_parts) == len(y_row_parts), \"Unaligned train data\"\n\n    num_actors = _get_num_actors(num_actors)\n\n    if num_actors > len(X_row_parts):\n        num_actors = len(X_row_parts)\n\n    if evals:\n        min_num_parts = num_actors\n        for (eval_X, _), eval_method in evals:\n            if len(eval_X) < min_num_parts:\n                min_num_parts = len(eval_X)\n                method_name = eval_method\n\n        if num_actors != min_num_parts:\n            num_actors = min_num_parts\n            warnings.warn(\n                f\"`num_actors` is set to {num_actors}, because `evals` data with name `{method_name}` has only {num_actors} partition(s).\"\n            )\n\n    actors = create_actors(num_actors)\n\n    add_as_eval_method = None\n    if evals:\n        for eval_data, method in evals[:]:\n            if eval_data is dtrain:\n                add_as_eval_method = method\n                evals.remove((eval_data, method))\n\n        for (eval_X, eval_y), eval_method in evals:\n            # Split data across workers\n            _split_data_across_actors(\n                actors,\n                lambda actor, *X_y: actor.add_eval_data.remote(\n                    *X_y, eval_method=eval_method, **dmatrix_kwargs\n                ),\n                eval_X,\n                eval_y,\n            )\n\n    # Split data across workers\n    _split_data_across_actors(\n        actors,\n        lambda actor, *X_y: actor.set_train_data.remote(\n            *X_y, add_as_eval_method=add_as_eval_method, **dmatrix_kwargs\n        ),\n        X_row_parts,\n        y_row_parts,\n    )\n    LOGGER.info(f\"Data preparation time: {time.time() - s} s\")\n\n    s = time.time()\n    with RabitContextManager(len(actors), get_node_ip_address()) as env:\n        rabit_args = [(\"%s=%s\" % item).encode() for item in env.items()]\n\n        # Train\n        fut = [\n            actor.train.remote(rabit_args, params, *args, **kwargs)\n            for _, actor in actors\n        ]\n        # All results should be the same because of Rabit tracking. So we just\n        # return the first one.\n        result = RayWrapper.materialize(fut[0])\n        LOGGER.info(f\"Training time: {time.time() - s} s\")\n        return result\n\n\n@ray.remote\ndef _map_predict(booster, part, columns, dmatrix_kwargs={}, **kwargs):\n    \"\"\"\n    Run prediction on a remote worker.\n\n    Parameters\n    ----------\n    booster : xgboost.Booster or ray.ObjectRef\n        A trained booster.\n    part : pandas.DataFrame or ray.ObjectRef\n        Partition of full data used for local prediction.\n    columns : list or ray.ObjectRef\n        Columns for the result.\n    dmatrix_kwargs : dict, optional\n        Keyword parameters for ``xgb.DMatrix``.\n    **kwargs : dict\n        Other parameters are the same as for ``xgboost.Booster.predict``.\n\n    Returns\n    -------\n    ray.ObjectRef\n        ``ray.ObjectRef`` with partial prediction.\n    \"\"\"\n    dmatrix = xgb.DMatrix(part, **dmatrix_kwargs)\n    prediction = pandas.DataFrame(\n        booster.predict(dmatrix, **kwargs),\n        index=part.index,\n        columns=columns,\n    )\n    return prediction\n\n\ndef _predict(\n    booster,\n    data,\n    **kwargs,\n):\n    \"\"\"\n    Run distributed prediction with a trained booster on Ray engine.\n\n    During execution it runs ``xgb.predict`` on each worker for subset of `data`\n    and creates Modin DataFrame with prediction results.\n\n    Parameters\n    ----------\n    booster : xgboost.Booster\n        A trained booster.\n    data : modin.experimental.xgboost.DMatrix\n        Input data used for prediction.\n    **kwargs : dict\n        Other parameters are the same as for ``xgboost.Booster.predict``.\n\n    Returns\n    -------\n    modin.pandas.DataFrame\n        Modin DataFrame with prediction results.\n    \"\"\"\n    s = time.time()\n    dmatrix_kwargs = data.get_dmatrix_params()\n\n    # Get metadata from DMatrix\n    input_index, input_columns, row_lengths = data.metadata\n\n    # Infer columns of result\n    def _get_num_columns(booster, n_features, **kwargs):\n        rng = np.random.RandomState(777)\n        test_data = rng.randn(1, n_features)\n        test_predictions = booster.predict(\n            xgb.DMatrix(test_data), validate_features=False, **kwargs\n        )\n        num_columns = (\n            test_predictions.shape[1] if len(test_predictions.shape) > 1 else 1\n        )\n        return num_columns\n\n    result_num_columns = _get_num_columns(booster, len(input_columns), **kwargs)\n    new_columns = list(range(result_num_columns))\n\n    # Put common data in object store\n    booster = RayWrapper.put(booster)\n    new_columns_ref = RayWrapper.put(new_columns)\n\n    prediction_refs = [\n        _map_predict.remote(booster, part, new_columns_ref, dmatrix_kwargs, **kwargs)\n        for _, part in data.data\n    ]\n    predictions = from_partitions(\n        prediction_refs,\n        0,\n        index=input_index,\n        columns=new_columns,\n        row_lengths=row_lengths,\n        column_widths=[len(new_columns)],\n    )\n    LOGGER.info(f\"Prediction time: {time.time() - s} s\")\n    return predictions\n"
  },
  {
    "path": "modin/logging/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom .class_logger import ClassLogger  # noqa: F401\nfrom .config import DEFAULT_LOGGER_NAME, get_logger  # noqa: F401\nfrom .logger_decorator import disable_logging, enable_logging  # noqa: F401\nfrom .metrics import add_metric_handler, clear_metric_handler, emit_metric\n\n__all__ = [\n    \"ClassLogger\",\n    \"get_logger\",\n    \"enable_logging\",\n    \"disable_logging\",\n    \"emit_metric\",\n    \"add_metric_handler\",\n    \"clear_metric_handler\",\n    \"DEFAULT_LOGGER_NAME\",\n]\n"
  },
  {
    "path": "modin/logging/class_logger.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains ``ClassLogger`` class.\n\n``ClassLogger`` is used for adding logging to Modin classes and their subclasses.\n\"\"\"\n\nfrom typing import Dict, Optional\n\nfrom .config import LogLevel\nfrom .logger_decorator import enable_logging\n\n\nclass ClassLogger:\n    \"\"\"\n    Ensure all subclasses of the class being inherited are logged, too.\n\n    Notes\n    -----\n    This mixin must go first in class bases declaration to have the desired effect.\n    \"\"\"\n\n    _modin_logging_layer = \"PANDAS-API\"\n    _log_level = LogLevel.INFO\n\n    @classmethod\n    def __init_subclass__(\n        cls,\n        modin_layer: Optional[str] = None,\n        class_name: Optional[str] = None,\n        log_level: Optional[LogLevel] = None,\n        **kwargs: Dict,\n    ) -> None:\n        \"\"\"\n        Apply logging decorator to all children of ``ClassLogger``.\n\n        Parameters\n        ----------\n        modin_layer : str, optional\n            Specified by the logger (e.g. PANDAS-API).\n        class_name : str, optional\n            The name of the class the decorator is being applied to.\n            Composed from the decorated class name if not specified.\n        log_level : LogLevel, optional\n            The log level (LogLevel.INFO, LogLevel.DEBUG, LogLevel.WARNING, etc.).\n        **kwargs : dict\n        \"\"\"\n        modin_layer = modin_layer or cls._modin_logging_layer\n        log_level = log_level or cls._log_level\n        super().__init_subclass__(**kwargs)\n        enable_logging(modin_layer, class_name, log_level)(cls)\n        cls._modin_logging_layer = modin_layer\n        cls._log_level = log_level\n"
  },
  {
    "path": "modin/logging/config.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains ``ModinFormatter`` class.\n\n``ModinFormatter`` and the associated functions are used for logging configuration.\n\"\"\"\n\nimport datetime as dt\nimport logging\nimport platform\nimport threading\nimport time\nimport uuid\nfrom enum import IntEnum\nfrom logging.handlers import RotatingFileHandler\nfrom pathlib import Path\nfrom typing import Optional\n\nimport pandas\nimport psutil\n\nimport modin\nfrom modin.config import LogFileSize, LogMemoryInterval, LogMode\n\nDEFAULT_LOGGER_NAME = \"modin.logger.default\"\n\n__LOGGER_CONFIGURED__: bool = False\n\n\nclass LogLevel(IntEnum):  # noqa: PR01\n    \"\"\"Enumerator to specify the valid values of LogLevel accepted by Logger.setLevel().\"\"\"\n\n    DEBUG = 10\n    INFO = 20\n    WARNING = 30\n    ERROR = 40\n    CRITICAL = 50\n\n\nclass ModinFormatter(logging.Formatter):  # noqa: PR01\n    \"\"\"Implement custom formatter to log at microsecond granularity.\"\"\"\n\n    def formatTime(\n        self, record: logging.LogRecord, datefmt: Optional[str] = None\n    ) -> str:\n        \"\"\"\n        Return the creation time of the specified LogRecord as formatted text.\n\n        This custom logging formatter inherits from the logging module and\n        records timestamps at the microsecond level of granularity.\n\n        Parameters\n        ----------\n        record : LogRecord\n            The specified LogRecord object.\n        datefmt : str, default: None\n            Used with time.ststrftime() to format time record.\n\n        Returns\n        -------\n        str\n            Datetime string containing microsecond timestamp.\n        \"\"\"\n        ct = dt.datetime.fromtimestamp(record.created)\n        if datefmt:\n            s = ct.strftime(datefmt)\n        else:\n            # Format datetime object ct to microseconds\n            t = ct.strftime(\"%Y-%m-%d %H:%M:%S\")\n            s = f\"{t},{record.msecs:03}\"\n        return s\n\n\ndef bytes_int_to_str(num_bytes: int, suffix: str = \"B\") -> str:\n    \"\"\"\n    Scale bytes to its human-readable format (e.g: 1253656678 => '1.17GB').\n\n    Parameters\n    ----------\n    num_bytes : int\n        Number of bytes.\n    suffix : str, default: \"B\"\n        Suffix to add to conversion of num_bytes.\n\n    Returns\n    -------\n    str\n        Human-readable string format.\n    \"\"\"\n    factor = 1000\n    # Convert n_bytes to float b/c we divide it by factor\n    n_bytes: float = num_bytes\n    for unit in [\"\", \"K\", \"M\", \"G\", \"T\", \"P\"]:\n        if n_bytes < factor:\n            return f\"{n_bytes:.2f}{unit}{suffix}\"\n        n_bytes /= factor\n    return f\"{n_bytes * 1000:.2f}P{suffix}\"\n\n\ndef _create_logger(\n    namespace: str, job_id: str, log_name: str, log_level: LogLevel\n) -> logging.Logger:\n    \"\"\"\n    Create and configure logger as Modin expects it to be.\n\n    Parameters\n    ----------\n    namespace : str\n        Logging namespace to use, e.g. \"modin.logger.default\".\n    job_id : str\n        Part of path to where logs are stored.\n    log_name : str\n        Name of the log file to create.\n    log_level : LogLevel\n\n    Returns\n    -------\n    Logger\n        Logger object configured per Modin settings.\n    \"\"\"\n    # Pathlib makes it OS agnostic.\n    modin_path = Path(\".modin\")\n    modin_path.mkdir(exist_ok=True)\n\n    # Add gitignore to the log directory.\n    ignore_modin_path = modin_path / \".gitignore\"\n    if not ignore_modin_path.exists():\n        ignore_modin_path.write_text(\"# Automatically generated by modin.\\n*\\n\")\n\n    log_dir = modin_path / \"logs\" / f\"job_{job_id}\"\n    log_dir.mkdir(parents=True, exist_ok=True)\n    log_filename = log_dir / f\"{log_name}.log\"\n\n    logger = logging.getLogger(namespace)\n    logfile = RotatingFileHandler(\n        filename=log_filename,\n        mode=\"a\",\n        maxBytes=LogFileSize.get() * int(1e6),\n        backupCount=10,\n    )\n    formatter = ModinFormatter(\n        fmt=\"%(process)d, %(thread)d, %(asctime)s, %(message)s\",\n        datefmt=\"%Y-%m-%d,%H:%M:%S.%f\",\n    )\n    logfile.setFormatter(formatter)\n    logger.addHandler(logfile)\n    logger.setLevel(log_level)\n\n    return logger\n\n\ndef configure_logging() -> None:\n    \"\"\"Configure Modin logging by setting up directory structure and formatting.\"\"\"\n    global __LOGGER_CONFIGURED__\n    current_timestamp = dt.datetime.now().strftime(\"%Y.%m.%d_%H-%M-%S\")\n    job_id = f\"{current_timestamp}_{uuid.uuid4().hex}\"\n\n    logger = _create_logger(\n        DEFAULT_LOGGER_NAME,\n        job_id,\n        \"trace\",\n        LogLevel.INFO,\n    )\n\n    logger.info(f\"OS Version: {platform.platform()}\")\n    logger.info(f\"Python Version: {platform.python_version()}\")\n    num_physical_cores = str(psutil.cpu_count(logical=False))\n    num_total_cores = str(psutil.cpu_count(logical=True))\n    logger.info(f\"Modin Version: {modin.__version__}\")\n    logger.info(f\"Pandas Version: {pandas.__version__}\")\n    logger.info(f\"Physical Cores: {num_physical_cores}\")\n    logger.info(f\"Total Cores: {num_total_cores}\")\n\n    mem_sleep = LogMemoryInterval.get()\n    mem_logger = _create_logger(\"modin_memory.logger\", job_id, \"memory\", LogLevel.DEBUG)\n\n    svmem = psutil.virtual_memory()\n    mem_logger.info(f\"Memory Total: {bytes_int_to_str(svmem.total)}\")\n    mem_logger.info(f\"Memory Available: {bytes_int_to_str(svmem.available)}\")\n    mem_logger.info(f\"Memory Used: {bytes_int_to_str(svmem.used)}\")\n    mem = threading.Thread(\n        target=memory_thread, args=[mem_logger, mem_sleep], daemon=True\n    )\n    mem.start()\n\n    _create_logger(\"modin.logger.errors\", job_id, \"error\", LogLevel.INFO)\n\n    __LOGGER_CONFIGURED__ = True\n\n\ndef memory_thread(logger: logging.Logger, sleep_time: int) -> None:\n    \"\"\"\n    Configure Modin logging system memory profiling thread.\n\n    Parameters\n    ----------\n    logger : logging.Logger\n        The logger object.\n    sleep_time : int\n        The interval at which to profile system memory.\n    \"\"\"\n    while True:\n        rss_mem = bytes_int_to_str(psutil.Process().memory_info().rss)\n        svmem = psutil.virtual_memory()\n        logger.info(f\"Memory Percentage: {svmem.percent}%\")\n        logger.info(f\"RSS Memory: {rss_mem}\")\n        time.sleep(sleep_time)\n\n\ndef get_logger(namespace: str = \"modin.logger.default\") -> logging.Logger:\n    \"\"\"\n    Configure Modin logger based on Modin config and returns the logger.\n\n    Parameters\n    ----------\n    namespace : str, default: \"modin.logger.default\"\n        Which namespace to use for logging.\n\n    Returns\n    -------\n    logging.Logger\n        The Modin logger.\n    \"\"\"\n    if not __LOGGER_CONFIGURED__ and LogMode.get() != \"disable\":\n        configure_logging()\n    return logging.getLogger(namespace)\n"
  },
  {
    "path": "modin/logging/logger_decorator.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains the functions designed for the enable/disable of logging.\n\n``enable_logging`` is used for decorating individual Modin functions or classes.\n\"\"\"\nfrom __future__ import annotations\n\nfrom functools import wraps\nfrom time import perf_counter\nfrom types import FunctionType, MethodType\nfrom typing import Any, Callable, Dict, Optional, Tuple, TypeVar, overload\n\nfrom modin.config import LogMode\nfrom modin.logging.metrics import emit_metric\n\nfrom .config import LogLevel, get_logger\n\n_MODIN_LOGGER_NOWRAP = \"__modin_logging_nowrap__\"\n\nFn = TypeVar(\"Fn\", bound=Any)\n\n\ndef disable_logging(func: Callable) -> Any:\n    \"\"\"\n    Disable logging of one particular function. Useful for decorated classes.\n\n    Parameters\n    ----------\n    func : callable\n        A method in a logger-decorated class for which logging should be disabled.\n\n    Returns\n    -------\n    func\n        A function with logging disabled.\n    \"\"\"\n    setattr(func, _MODIN_LOGGER_NOWRAP, True)\n    return func\n\n\n@overload\ndef enable_logging(modin_layer: Fn) -> Fn:\n    # This helps preserve typings when the decorator is used without parentheses\n    pass\n\n\n@overload\ndef enable_logging(\n    modin_layer: str = \"PANDAS-API\",\n    name: Optional[str] = None,\n    log_level: LogLevel = LogLevel.INFO,\n) -> Callable[[Fn], Fn]:\n    pass\n\n\ndef enable_logging(\n    modin_layer: str | Fn = \"PANDAS-API\",\n    name: Optional[str] = None,\n    log_level: LogLevel = LogLevel.INFO,\n) -> Callable[[Fn], Fn] | Fn:\n    \"\"\"\n    Log Decorator used on specific Modin functions or classes.\n\n    Parameters\n    ----------\n    modin_layer : str or object to decorate, default: \"PANDAS-API\"\n        Specified by the logger (e.g. PANDAS-API).\n        If it's an object to decorate, call logger_decorator() on it with default arguments.\n    name : str, optional\n        The name of the object the decorator is being applied to.\n        Composed from the decorated object name if not specified.\n    log_level : LogLevel, default: LogLevel.INFO\n        The log level (LogLevel.INFO, LogLevel.DEBUG, LogLevel.WARNING, etc.).\n\n    Returns\n    -------\n    func\n        A decorator function.\n    \"\"\"\n    if not isinstance(modin_layer, str):\n        # assume the decorator is used in a form without parenthesis like:\n        # @enable_logging\n        # def func()\n        return enable_logging()(modin_layer)\n\n    def decorator(obj: Fn) -> Fn:\n        \"\"\"Decorate function or class to add logs to Modin API function(s).\"\"\"\n        if isinstance(obj, type):\n            seen: Dict[Any, Any] = {}\n            for attr_name, attr_value in vars(obj).items():\n                if isinstance(\n                    attr_value, (FunctionType, MethodType, classmethod, staticmethod)\n                ) and not hasattr(attr_value, _MODIN_LOGGER_NOWRAP):\n                    try:\n                        wrapped = seen[attr_value]\n                    except KeyError:\n                        wrapped = seen[attr_value] = enable_logging(\n                            modin_layer,\n                            f\"{name or obj.__name__}.{attr_name}\",\n                            log_level,\n                        )(attr_value)\n\n                    setattr(obj, attr_name, wrapped)\n            return obj\n        elif isinstance(obj, classmethod):\n            return classmethod(decorator(obj.__func__))  # type: ignore [return-value, arg-type]\n        elif isinstance(obj, staticmethod):\n            return staticmethod(decorator(obj.__func__))  # type: ignore [return-value, arg-type]\n\n        assert isinstance(modin_layer, str), \"modin_layer is somehow not a string!\"\n\n        api_call_name = f\"{name or obj.__name__}\"\n        log_line = f\"{modin_layer.upper()}::{api_call_name}\"\n        metric_name = f\"{modin_layer.lower()}.{api_call_name.lower()}\"\n        start_line = f\"START::{log_line}\"\n        stop_line = f\"STOP::{log_line}\"\n\n        @wraps(obj)\n        def run_and_log(*args: Tuple, **kwargs: Dict) -> Any:\n            \"\"\"\n            Compute function with logging if Modin logging is enabled.\n\n            Parameters\n            ----------\n            *args : tuple\n                The function arguments.\n            **kwargs : dict\n                The function keyword arguments.\n\n            Returns\n            -------\n            Any\n            \"\"\"\n            start_time = perf_counter()\n            if LogMode.get() == \"disable\":\n                result = obj(*args, **kwargs)\n                emit_metric(metric_name, perf_counter() - start_time)\n                return result\n\n            logger = get_logger()\n            logger.log(log_level, start_line)\n            try:\n                result = obj(*args, **kwargs)\n                emit_metric(metric_name, perf_counter() - start_time)\n            except BaseException as e:\n                # Only log the exception if a deeper layer of the modin stack has not\n                # already logged it.\n                if not hasattr(e, \"_modin_logged\"):\n                    # use stack_info=True so that even if we are a few layers deep in\n                    # modin, we log a stack trace that includes calls to higher layers\n                    # of modin\n                    get_logger(\"modin.logger.errors\").exception(\n                        stop_line, stack_info=True\n                    )\n                    e._modin_logged = True  # type: ignore[attr-defined]\n                raise\n            finally:\n                logger.log(log_level, stop_line)\n            return result\n\n        # make sure we won't decorate multiple times\n        return disable_logging(run_and_log)\n\n    return decorator\n"
  },
  {
    "path": "modin/logging/metrics.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nModule contains metrics handler functions.\n\nAllows for the registration of functions to collect\nAPI metrics.\n\"\"\"\n\nimport re\nfrom typing import Callable, Union\n\nfrom modin.config.envvars import MetricsMode\n\nmetric_name_pattern = r\"[a-zA-Z\\._\\-0-9]+$\"\n_metric_handlers: list[Callable[[str, Union[int, float]], None]] = []\n\n\n# Metric/Telemetry hooks can be implemented by plugin engines\n# to collect discrete data on how modin is performing at the\n# high level modin layer.\ndef emit_metric(name: str, value: Union[int, float]) -> None:\n    \"\"\"\n    Emit a metric using the set of registered handlers.\n\n    Parameters\n    ----------\n    name : str, required\n            Name of the metric, in dot-format.\n    value : int or float required\n            Value of the metric.\n    \"\"\"\n    if MetricsMode.get() == \"disable\":\n        return\n    if not re.fullmatch(metric_name_pattern, name):\n        raise KeyError(\n            f\"Metrics name is not in metric-name dot format, (eg. modin.dataframe.hist.duration ): {name}\"\n        )\n\n    handlers = _metric_handlers.copy()\n    for fn in handlers:\n        try:\n            fn(f\"modin.{name}\", value)\n        except Exception:\n            clear_metric_handler(fn)\n\n\ndef add_metric_handler(handler: Callable[[str, Union[int, float]], None]) -> None:\n    \"\"\"\n    Add a metric handler to Modin which can collect metrics.\n\n    Parameters\n    ----------\n    handler : Callable, required\n    \"\"\"\n    _metric_handlers.append(handler)\n\n\ndef clear_metric_handler(handler: Callable[[str, Union[int, float]], None]) -> None:\n    \"\"\"\n    Remove a metric handler from Modin.\n\n    Parameters\n    ----------\n    handler : Callable, required\n    \"\"\"\n    if handler in _metric_handlers:\n        _metric_handlers.remove(handler)\n"
  },
  {
    "path": "modin/numpy/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nfrom packaging import version\n\nfrom . import linalg\nfrom .arr import array\nfrom .array_creation import ones_like, tri, zeros_like\nfrom .array_shaping import append, hstack, ravel, shape, split, transpose\nfrom .constants import e, euler_gamma, inf, nan, newaxis, pi\n\nif version.parse(numpy.__version__) < version.parse(\"2.0.0b1\"):\n    from .constants import (\n        NAN,\n        NINF,\n        NZERO,\n        PINF,\n        PZERO,\n        Inf,\n        Infinity,\n        NaN,\n        infty,\n    )\n\nfrom .logic import (\n    all,\n    any,\n    equal,\n    greater,\n    greater_equal,\n    iscomplex,\n    isfinite,\n    isinf,\n    isnan,\n    isnat,\n    isneginf,\n    isposinf,\n    isreal,\n    isscalar,\n    less,\n    less_equal,\n    logical_and,\n    logical_not,\n    logical_or,\n    logical_xor,\n    not_equal,\n)\nfrom .math import (\n    abs,\n    absolute,\n    add,\n    amax,\n    amin,\n    argmax,\n    argmin,\n    divide,\n    dot,\n    exp,\n    float_power,\n    floor_divide,\n    max,\n    maximum,\n    mean,\n    min,\n    minimum,\n    mod,\n    multiply,\n    power,\n    prod,\n    remainder,\n    sqrt,\n    subtract,\n    sum,\n    true_divide,\n    var,\n)\nfrom .trigonometry import tanh\n\n\ndef where(condition, x=None, y=None):\n    if condition is True:\n        return x\n    if condition is False:\n        return y\n    if hasattr(condition, \"where\"):\n        return condition.where(x=x, y=y)\n    raise NotImplementedError(\n        f\"np.where for condition of type {type(condition)} is not yet supported in Modin.\"\n    )\n\n\n__all__ = [  # noqa: F405\n    \"linalg\",\n    \"array\",\n    \"zeros_like\",\n    \"ones_like\",\n    \"ravel\",\n    \"shape\",\n    \"transpose\",\n    \"all\",\n    \"any\",\n    \"isfinite\",\n    \"isinf\",\n    \"isnan\",\n    \"isnat\",\n    \"isneginf\",\n    \"isposinf\",\n    \"iscomplex\",\n    \"isreal\",\n    \"isscalar\",\n    \"logical_not\",\n    \"logical_and\",\n    \"logical_or\",\n    \"logical_xor\",\n    \"greater\",\n    \"greater_equal\",\n    \"less\",\n    \"less_equal\",\n    \"equal\",\n    \"not_equal\",\n    \"absolute\",\n    \"abs\",\n    \"add\",\n    \"divide\",\n    \"dot\",\n    \"float_power\",\n    \"floor_divide\",\n    \"power\",\n    \"prod\",\n    \"multiply\",\n    \"remainder\",\n    \"mod\",\n    \"subtract\",\n    \"sum\",\n    \"true_divide\",\n    \"mean\",\n    \"maximum\",\n    \"amax\",\n    \"max\",\n    \"minimum\",\n    \"amin\",\n    \"min\",\n    \"where\",\n    \"e\",\n    \"euler_gamma\",\n    \"inf\",\n    \"nan\",\n    \"newaxis\",\n    \"pi\",\n    \"sqrt\",\n    \"tanh\",\n    \"exp\",\n    \"argmax\",\n    \"argmin\",\n    \"var\",\n    \"split\",\n    \"hstack\",\n    \"append\",\n    \"tri\",\n]\nif version.parse(numpy.__version__) < version.parse(\"2.0.0b1\"):\n    __all__ += [\n        \"Inf\",\n        \"Infinity\",\n        \"NAN\",\n        \"NINF\",\n        \"NZERO\",\n        \"NaN\",\n        \"PINF\",\n        \"PZERO\",\n        \"infty\",\n    ]\n"
  },
  {
    "path": "modin/numpy/arr.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses ``array`` class, that is distributed version of ``numpy.array``.\"\"\"\n\nfrom inspect import signature\nfrom math import prod\n\nimport numpy\nimport pandas\nfrom pandas.api.types import is_scalar\nfrom pandas.core.dtypes.common import is_bool_dtype, is_list_like, is_numeric_dtype\n\nimport modin.pandas as pd\nfrom modin.core.dataframe.algebra import Binary, Map, Reduce\nfrom modin.error_message import ErrorMessage\n\nfrom .utils import try_convert_from_interoperable_type\n\n\ndef check_kwargs(order=\"C\", subok=True, keepdims=None, casting=\"same_kind\", where=True):\n    if order not in [\"K\", \"C\"]:\n        ErrorMessage.single_warning(\n            \"Array order besides 'C' is not currently supported in Modin. Defaulting to 'C' order.\"\n        )\n    if not subok:\n        ErrorMessage.single_warning(\n            \"Subclassing types is not currently supported in Modin. Defaulting to the same base dtype.\"\n        )\n    if keepdims:\n        ErrorMessage.single_warning(\n            \"Modin does not yet support broadcasting between nested 1D arrays and 2D arrays.\"\n        )\n    if casting != \"same_kind\":\n        ErrorMessage.single_warning(\n            \"Modin does not yet support the `casting` argument.\"\n        )\n    if not (\n        is_scalar(where) or (isinstance(where, array) and is_bool_dtype(where.dtype))\n    ):\n        if not isinstance(where, array):\n            raise NotImplementedError(\n                f\"Modin only supports scalar or modin.numpy.array `where` parameter, not `where` parameter of type {type(where)}\"\n            )\n        raise TypeError(\n            f\"Cannot cast array data from {where.dtype} to dtype('bool') according to the rule 'safe'\"\n        )\n\n\ndef check_can_broadcast_to_output(arr_in: \"array\", arr_out: \"array\"):\n    if not isinstance(arr_out, array):\n        raise TypeError(\"return arrays must be of modin.numpy.array type.\")\n    # Broadcasting is ok if both arrays have matching ndim + shape, OR\n    # arr_in is 1xN or a 1D N-element array and arr_out is MxN.\n    # Note that 1xN arr_in cannot be broadcasted into a 1D N-element arr_out.\n    #\n    # This is slightly different from the rules for checking if two inputs\n    # of a binary operation can be broadcasted together.\n    broadcast_ok = (\n        (\n            # Case 1: arrays have matching ndim + shape\n            # Case 2a: arr_in is 1D N-element, arr_out is 1D N-element (covered here)\n            arr_in._ndim == arr_out._ndim\n            and arr_in.shape == arr_out.shape\n        )\n        or (\n            # Case 2b: both arrays are 2D, arr_in is 1xN and arr_out is MxN\n            arr_in._ndim == 2\n            and arr_out._ndim == 2\n            and arr_in.shape[0] == 1\n            and arr_in.shape[1] == arr_out.shape[1]\n        )\n        or (\n            # Case 2c: arr_in is 1D N-element, arr_out is MxN\n            arr_in._ndim == 1\n            and arr_out._ndim == 2\n            and arr_in.shape[0] == arr_out.shape[1]\n            and arr_out.shape[0] == 1\n        )\n    )\n    # Case 2b would require duplicating the 1xN result M times to match the shape of out,\n    # which we currently do not support. See GH#5831.\n    if (\n        arr_in._ndim == 2\n        and arr_out._ndim == 2\n        and arr_in.shape[0] == 1\n        and arr_in.shape[1] == arr_out.shape[1]\n        and arr_in.shape[0] != 1\n    ):\n        raise NotImplementedError(\n            f\"Modin does not currently support broadcasting shape {arr_in.shape} to output operand with shape {arr_out.shape}\"\n        )\n    if not broadcast_ok:\n        raise ValueError(\n            f\"non-broadcastable output operand with shape {arr_out.shape} doesn't match the broadcast shape {arr_in.shape}\"\n        )\n\n\ndef fix_dtypes_and_determine_return(\n    query_compiler_in, _ndim, dtype=None, out=None, where=True\n):\n    if dtype is not None:\n        query_compiler_in = query_compiler_in.astype(\n            {col_name: dtype for col_name in query_compiler_in.columns}\n        )\n    result = array(_query_compiler=query_compiler_in, _ndim=_ndim)\n    if out is not None:\n        out = try_convert_from_interoperable_type(out, copy=False)\n        check_can_broadcast_to_output(result, out)\n        result._query_compiler = result._query_compiler.astype(\n            {col_name: out.dtype for col_name in result._query_compiler.columns}\n        )\n        if isinstance(where, array):\n            out._update_inplace(where.where(result, out)._query_compiler)\n        elif where:\n            out._update_inplace(result._query_compiler)\n        return out\n    if isinstance(where, array) and out is None:\n        from .array_creation import zeros_like\n\n        out = zeros_like(result).astype(dtype if dtype is not None else result.dtype)\n        out._query_compiler = where.where(result, out)._query_compiler\n        return out\n    elif not where:\n        from .array_creation import zeros_like\n\n        return zeros_like(result)\n    return result\n\n\nclass array(object):\n    \"\"\"\n    Modin distributed representation of ``numpy.array``.\n\n    Internally, the data can be divided into partitions along both columns and rows\n    in order to parallelize computations and utilize the user's hardware as much as possible.\n\n    Notes\n    -----\n    The ``array`` class is a lightweight shim that relies on the pandas Query Compiler in order to\n    provide functionality.\n    \"\"\"\n\n    def __init__(\n        self,\n        object=None,\n        dtype=None,\n        *,\n        copy=True,\n        order=\"K\",\n        subok=False,\n        ndmin=0,\n        like=numpy._NoValue,\n        _query_compiler=None,\n        _ndim=None,\n    ):\n        self._siblings = []\n        ErrorMessage.single_warning(\n            \"Using Modin's new NumPy API. To convert from a Modin object to a NumPy array, either turn off the ModinNumpy flag, or use `modin.pandas.io.to_numpy`.\"\n        )\n        if isinstance(object, array):\n            _query_compiler = object._query_compiler.copy()\n            if not copy:\n                object._add_sibling(self)\n            _ndim = object._ndim\n        elif isinstance(object, (pd.DataFrame, pd.Series)):\n            _query_compiler = object._query_compiler.copy()\n            if not copy:\n                object._add_sibling(self)\n            _ndim = 1 if isinstance(object, pd.Series) else 2\n        if _query_compiler is not None:\n            self._query_compiler = _query_compiler\n            self._ndim = _ndim\n            new_dtype = pandas.core.dtypes.cast.find_common_type(\n                list(self._query_compiler.dtypes.values)\n            )\n        elif is_list_like(object) and not is_list_like(object[0]):\n            series = pd.Series(object)\n            self._query_compiler = series._query_compiler\n            self._ndim = 1\n            new_dtype = self._query_compiler.dtypes.values[0]\n        else:\n            target_kwargs = {\n                \"dtype\": None,\n                \"copy\": True,\n                \"order\": \"K\",\n                \"subok\": False,\n                \"ndmin\": 0,\n                \"like\": numpy._NoValue,\n            }\n            for key, value in target_kwargs.copy().items():\n                if value == locals()[key]:\n                    target_kwargs.pop(key)\n                else:\n                    target_kwargs[key] = locals()[key]\n            arr = numpy.asarray(object)\n            assert arr.ndim in (\n                1,\n                2,\n            ), \"modin.numpy currently only supports 1D and 2D objects.\"\n            self._ndim = len(arr.shape)\n            if self._ndim > 2:\n                ErrorMessage.not_implemented(\n                    \"NumPy arrays with dimensions higher than 2 are not yet supported.\"\n                )\n\n            self._query_compiler = pd.DataFrame(arr)._query_compiler\n            new_dtype = arr.dtype\n        # These two lines are necessary so that our query compiler does not keep track of indices\n        # and try to map like indices to like indices. (e.g. if we multiply two arrays that used\n        # to be dataframes, and the dataframes had the same column names but ordered differently\n        # we want to do a simple broadcast where we only consider position, as numpy would, rather\n        # than pair columns with the same name and multiply them.)\n        self._query_compiler = self._query_compiler.reset_index(drop=True)\n        self._query_compiler.columns = range(len(self._query_compiler.columns))\n        new_dtype = new_dtype if dtype is None else dtype\n        if isinstance(new_dtype, pandas.Float64Dtype):\n            new_dtype = numpy.float64\n        cols_with_wrong_dtype = self._query_compiler.dtypes != new_dtype\n        if cols_with_wrong_dtype.any():\n            self._query_compiler = self._query_compiler.astype(\n                {\n                    col_name: new_dtype\n                    for col_name in self._query_compiler.columns[cols_with_wrong_dtype]\n                }\n            )\n        self.indexer = None\n\n    def __getitem__(self, key):\n        if isinstance(key, array) and is_bool_dtype(key.dtype) and key._ndim == 2:\n            raise NotImplementedError(\n                \"Advanced indexing with 2D boolean indexes is not currently supported.\"\n            )\n        if self.indexer is None:\n            from .indexing import ArrayIndexer\n\n            self.indexer = ArrayIndexer(self)\n        return self.indexer.__getitem__(key)\n\n    def __setitem__(self, key, item):\n        if self.indexer is None:\n            from .indexing import ArrayIndexer\n\n            self.indexer = ArrayIndexer(self)\n        return self.indexer.__setitem__(key, item)\n\n    def _add_sibling(self, sibling):\n        \"\"\"\n        Add an array object to the list of siblings.\n\n        Siblings are objects that share the same query compiler. This function is called\n        when a shallow copy is made.\n\n        Parameters\n        ----------\n        sibling : BasePandasDataset\n            Dataset to add to siblings list.\n        \"\"\"\n        sibling._siblings = self._siblings + [self]\n        self._siblings += [sibling]\n        for sib in self._siblings:\n            sib._siblings += [sibling]\n\n    def _update_inplace(self, new_query_compiler):\n        \"\"\"\n        Update the current array inplace.\n\n        Parameters\n        ----------\n        new_query_compiler : query_compiler\n            The new QueryCompiler to use to manage the data.\n        \"\"\"\n        old_query_compiler = self._query_compiler\n        self._query_compiler = new_query_compiler\n        for sib in self._siblings:\n            sib._query_compiler = new_query_compiler\n        old_query_compiler.free()\n\n    def _validate_axis(self, axis):\n        \"\"\"\n        Check that the provided axis argument is valid on this array.\n\n        Parameters\n        ----------\n        axis : int, optional\n            The axis argument passed to the function.\n\n        Returns\n        -------\n        int, optional\n            Axis to apply the function over (None, 0, or 1).\n\n        Raises\n        -------\n        numpy.AxisError\n            if the axis is invalid.\n        \"\"\"\n        if axis is not None and axis < 0:\n            new_axis = axis + self._ndim\n            if self._ndim == 1 and new_axis != 0:\n                raise numpy.AxisError(axis, 1)\n            elif self._ndim == 2 and new_axis not in [0, 1]:\n                raise numpy.AxisError(axis, 2)\n            return new_axis\n        return axis\n\n    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):\n        ufunc_name = ufunc.__name__\n        supported_array_layer = hasattr(self, ufunc_name) or hasattr(\n            self, f\"__{ufunc_name}__\"\n        )\n        if supported_array_layer:\n            args = []\n            for input in inputs:\n                input = try_convert_from_interoperable_type(input)\n                if not (isinstance(input, array) or is_scalar(input)):\n                    input = array(input)\n                args += [input]\n            function = (\n                getattr(args[0], ufunc_name)\n                if hasattr(args[0], ufunc_name)\n                else getattr(args[0], f\"__{ufunc_name}__\")\n            )\n            len_expected_arguments = len(\n                [\n                    param\n                    for param in signature(function).parameters.values()\n                    if param.default == param.empty\n                ]\n            )\n            if len_expected_arguments == (len(args) - 1) and method == \"__call__\":\n                return function(*tuple(args[1:]), **kwargs)\n            else:\n                ErrorMessage.single_warning(\n                    f\"{ufunc} method {method} is not yet supported in Modin. Defaulting to NumPy.\"\n                )\n                args = []\n                for input in inputs:\n                    if isinstance(input, array):\n                        input = input._to_numpy()\n                    if isinstance(input, pd.DataFrame):\n                        input = input._query_compiler.to_numpy()\n                    if isinstance(input, pd.Series):\n                        input = input._query_compiler.to_numpy().flatten()\n                    args += [input]\n                output = self._to_numpy().__array_ufunc__(\n                    ufunc, method, *args, **kwargs\n                )\n                if is_scalar(output):\n                    return output\n                return array(output)\n        new_ufunc = None\n        out_ndim = -1\n        if method == \"__call__\":\n            if len(inputs) == 1:\n                new_ufunc = Map.register(ufunc)\n                out_ndim = len(inputs[0].shape)\n            else:\n                new_ufunc = Binary.register(ufunc)\n                out_ndim = max(\n                    [len(inp.shape) for inp in inputs if hasattr(inp, \"shape\")]\n                )\n        elif method == \"reduce\":\n            if len(inputs) == 1:\n                new_ufunc = Reduce.register(ufunc, axis=kwargs.get(\"axis\", None))\n            if kwargs.get(\"axis\", None) is None:\n                out_ndim = 0\n            else:\n                out_ndim = len(inputs[0].shape) - 1\n        elif method == \"accumulate\":\n            if len(inputs) == 1:\n                new_ufunc = Reduce.register(ufunc, axis=None)\n            out_ndim = 0\n        if new_ufunc is None:\n            ErrorMessage.single_warning(\n                f\"{ufunc} is not yet supported in Modin. Defaulting to NumPy.\"\n            )\n            args = []\n            for input in inputs:\n                if isinstance(input, array):\n                    input = input._to_numpy()\n                if isinstance(input, pd.DataFrame):\n                    input = input._query_compiler.to_numpy()\n                if isinstance(input, pd.Series):\n                    input = input._query_compiler.to_numpy().flatten()\n                args += [input]\n            output = self._to_numpy().__array_ufunc__(ufunc, method, *args, **kwargs)\n            if is_scalar(output):\n                return output\n            return array(output)\n        args = []\n        for input in inputs:\n            input = try_convert_from_interoperable_type(input)\n            if not (isinstance(input, array) or is_scalar(input)):\n                input = array(input)\n            args += [\n                input._query_compiler if hasattr(input, \"_query_compiler\") else input\n            ]\n        out_kwarg = kwargs.get(\"out\", None)\n        if out_kwarg is not None:\n            # If `out` is a modin.numpy.array, `kwargs.get(\"out\")` returns a 1-tuple\n            # whose only element is that array, so we need to unwrap it from the tuple.\n            out_kwarg = out_kwarg[0]\n        where_kwarg = kwargs.get(\"where\", True)\n        kwargs[\"out\"] = None\n        kwargs[\"where\"] = True\n        result = new_ufunc(*args, **kwargs)\n        return fix_dtypes_and_determine_return(\n            result,\n            out_ndim,\n            dtype=kwargs.get(\"dtype\", None),\n            out=out_kwarg,\n            where=where_kwarg,\n        )\n\n    def __array_function__(self, func, types, args, kwargs):\n        from . import array_creation as creation\n        from . import array_shaping as shaping\n        from . import math\n\n        func_name = func.__name__\n        modin_func = None\n        if hasattr(math, func_name):\n            modin_func = getattr(math, func_name)\n        elif hasattr(shaping, func_name):\n            modin_func = getattr(shaping, func_name)\n        elif hasattr(creation, func_name):\n            modin_func = getattr(creation, func_name)\n        if modin_func is None:\n            return NotImplemented\n        return modin_func(*args, **kwargs)\n\n    def where(self, x=None, y=None):\n        if not is_bool_dtype(self.dtype):\n            raise NotImplementedError(\n                \"Modin currently only supports where on condition arrays with boolean dtype.\"\n            )\n        if x is None and y is None:\n            ErrorMessage.single_warning(\n                \"np.where method with only condition specified is not yet supported in Modin. Defaulting to NumPy.\"\n            )\n            condition = self._to_numpy()\n            return array(numpy.where(condition))\n        x, y = try_convert_from_interoperable_type(\n            x\n        ), try_convert_from_interoperable_type(y)\n        if not (\n            (isinstance(x, array) or is_scalar(x))\n            and (isinstance(y, array) or is_scalar(y))\n        ):\n            raise ValueError(\n                \"np.where requires x and y to either be np.arrays or scalars.\"\n            )\n        if is_scalar(x) and is_scalar(y):\n            ErrorMessage.single_warning(\n                \"np.where not supported when both x and y are scalars. Defaulting to NumPy.\"\n            )\n            return array(numpy.where(self._to_numpy(), x, y))\n        if is_scalar(x) and not is_scalar(y):\n            if self._ndim < y._ndim:\n                if not self.shape[0] == y.shape[1]:\n                    raise ValueError(\n                        f\"operands could not be broadcast together with shapes {self.shape} {y.shape}\"\n                    )\n                ErrorMessage.single_warning(\n                    \"np.where method where condition must be broadcast is not yet available in Modin. Defaulting to NumPy.\"\n                )\n                return array(numpy.where(self._to_numpy(), x, y._to_numpy()))\n            elif self._ndim == y._ndim:\n                if not self.shape == y.shape:\n                    raise ValueError(\n                        f\"operands could not be broadcast together with shapes {self.shape} {y.shape}\"\n                    )\n                return array(\n                    _query_compiler=y._query_compiler.where((~self)._query_compiler, x),\n                    _ndim=y._ndim,\n                )\n            else:\n                ErrorMessage.single_warning(\n                    \"np.where method with broadcast is not yet available in Modin. Defaulting to NumPy.\"\n                )\n                return numpy.where(self._to_numpy(), x, y._to_numpy())\n        if not is_scalar(x) and is_scalar(y):\n            if self._ndim < x._ndim:\n                if not self.shape[0] == x.shape[1]:\n                    raise ValueError(\n                        f\"operands could not be broadcast together with shapes {self.shape} {x.shape}\"\n                    )\n                ErrorMessage.single_warning(\n                    \"np.where method where condition must be broadcast is not yet available in Modin. Defaulting to NumPy.\"\n                )\n                return array(numpy.where(self._to_numpy(), x._to_numpy(), y))\n            elif self._ndim == x._ndim:\n                if not self.shape == x.shape:\n                    raise ValueError(\n                        f\"operands could not be broadcast together with shapes {self.shape} {x.shape}\"\n                    )\n                return array(\n                    _query_compiler=x._query_compiler.where(self._query_compiler, y),\n                    _ndim=x._ndim,\n                )\n            else:\n                ErrorMessage.single_warning(\n                    \"np.where method with broadcast is not yet available in Modin. Defaulting to NumPy.\"\n                )\n                return array(numpy.where(self._to_numpy(), x._to_numpy(), y))\n        if not (x.shape == y.shape and y.shape == self.shape):\n            ErrorMessage.single_warning(\n                \"np.where method with broadcast is not yet available in Modin. Defaulting to NumPy.\"\n            )\n            return array(numpy.where(self._to_numpy(), x._to_numpy(), y._to_numpy()))\n        return array(\n            _query_compiler=x._query_compiler.where(\n                self._query_compiler, y._query_compiler\n            ),\n            _ndim=self._ndim,\n        )\n\n    def max(\n        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True\n    ):\n        check_kwargs(keepdims=keepdims, where=where)\n        apply_axis = self._validate_axis(axis)\n        truthy_where = bool(where)\n        if initial is None and where is not True:\n            raise ValueError(\n                \"reduction operation 'maximum' does not have an identity, so to use a where mask one has to specify 'initial'\"\n            )\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            target = where.where(self, initial) if isinstance(where, array) else self\n            result = target._query_compiler.max(axis=0)\n            if keepdims:\n                if initial is not None and result.lt(initial).any():\n                    result = pd.Series([initial])._query_compiler\n                if initial is not None and out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * initial)._query_compiler\n                    )\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, dtype, out, truthy_where\n                    )\n                else:\n                    return array([initial])\n            if initial is not None:\n                result = max(result.to_numpy()[0, 0], initial)\n            else:\n                result = result.to_numpy()[0, 0]\n            return result if truthy_where else initial\n        if axis is None:\n            target = where.where(self, initial) if isinstance(where, array) else self\n            result = target._query_compiler.max(axis=0).max(axis=1).to_numpy()[0, 0]\n            if initial is not None:\n                result = max(result, initial)\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if initial is not None and out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * initial)._query_compiler\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]]))._query_compiler,\n                        2,\n                        dtype,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[initial]])\n            return result if truthy_where else initial\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        target = where.where(self, initial) if isinstance(where, array) else self\n        result = target._query_compiler.max(axis=apply_axis)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            if initial is not None:\n                result = max(result.to_numpy()[0, 0], initial)\n            else:\n                result = result.to_numpy()[0, 0]\n            return result if truthy_where else initial\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if initial is not None and out is not None:\n            out._update_inplace((numpy.ones_like(out) * initial)._query_compiler)\n        intermediate = fix_dtypes_and_determine_return(\n            result, new_ndim, dtype, out, truthy_where\n        )\n        if initial is not None:\n            intermediate._update_inplace(\n                (intermediate > initial).where(intermediate, initial)._query_compiler\n            )\n        if truthy_where or out is not None:\n            return intermediate\n        else:\n            return numpy.ones_like(intermediate) * initial\n\n    def min(\n        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True\n    ):\n        check_kwargs(keepdims=keepdims, where=where)\n        truthy_where = bool(where)\n        apply_axis = self._validate_axis(axis)\n        if initial is None and where is not True:\n            raise ValueError(\n                \"reduction operation 'minimum' does not have an identity, so to use a where mask one has to specify 'initial'\"\n            )\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            target = where.where(self, initial) if isinstance(where, array) else self\n            result = target._query_compiler.min(axis=0)\n            if keepdims:\n                if initial is not None and result.gt(initial).any():\n                    result = pd.Series([initial])._query_compiler\n                if initial is not None and out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * initial)._query_compiler\n                    )\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, dtype, out, truthy_where\n                    )\n                else:\n                    return array([initial])\n            if initial is not None:\n                result = min(result.to_numpy()[0, 0], initial)\n            else:\n                result = result.to_numpy()[0, 0]\n            return result if truthy_where else initial\n        if apply_axis is None:\n            target = where.where(self, initial) if isinstance(where, array) else self\n            result = target._query_compiler.min(axis=0).min(axis=1).to_numpy()[0, 0]\n            if initial is not None:\n                result = min(result, initial)\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if initial is not None and out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * initial)._query_compiler\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]]))._query_compiler,\n                        2,\n                        dtype,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[initial]])\n            return result if truthy_where else initial\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        target = where.where(self, initial) if isinstance(where, array) else self\n        result = target._query_compiler.min(axis=apply_axis)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            if initial is not None:\n                result = min(result.to_numpy()[0, 0], initial)\n            else:\n                result = result.to_numpy()[0, 0]\n            return result if truthy_where else initial\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if initial is not None and out is not None:\n            out._update_inplace((numpy.ones_like(out) * initial)._query_compiler)\n        intermediate = fix_dtypes_and_determine_return(\n            result, new_ndim, dtype, out, truthy_where\n        )\n        if initial is not None:\n            intermediate._update_inplace(\n                (intermediate < initial).where(intermediate, initial)._query_compiler\n            )\n        if truthy_where or out is not None:\n            return intermediate\n        else:\n            return numpy.ones_like(intermediate) * initial\n\n    def __abs__(\n        self,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else self.dtype)\n        )\n        check_kwargs(order=order, casting=casting, subok=subok, where=where)\n        result = self._query_compiler.astype(\n            {col_name: out_dtype for col_name in self._query_compiler.columns}\n        ).abs()\n        if dtype is not None:\n            result = result.astype({col_name: dtype for col_name in result.columns})\n        if out is not None:\n            out = try_convert_from_interoperable_type(out, copy=False)\n            check_can_broadcast_to_output(self, out)\n            out._update_inplace(result)\n            return out\n        return array(_query_compiler=result, _ndim=self._ndim)\n\n    absolute = __abs__\n\n    def __invert__(self):\n        \"\"\"\n        Apply bitwise inverse to each element of the `BasePandasDataset`.\n\n        Returns\n        -------\n        BasePandasDataset\n            New BasePandasDataset containing bitwise inverse to each value.\n        \"\"\"\n        if not is_numeric_dtype(self.dtype):\n            raise TypeError(f\"bad operand type for unary ~: '{self.dtype}'\")\n        return array(_query_compiler=self._query_compiler.invert(), _ndim=self._ndim)\n\n    def _preprocess_binary_op(self, other, cast_input_types=True, dtype=None, out=None):\n        \"\"\"\n        Processes arguments and performs dtype conversions necessary to perform binary\n        operations. If the arguments to the binary operation are a 1D object and a 2D object,\n        then it will swap the order of the caller and callee return values in order to\n        facilitate native broadcasting by modin.\n\n        This function may modify `self._query_compiler` and `other._query_compiler` by replacing\n        it with the result of `astype`.\n\n        Parameters\n        ----------\n        other : array or scalar\n            The RHS of the binary operation.\n        cast_input_types : bool, default: True\n            If specified, the columns of the caller/callee query compilers will be assigned\n            dtypes in the following priority, depending on what values were specified:\n            (1) the `dtype` argument,\n            (2) the dtype of the `out` array,\n            (3) the common parent dtype of `self` and `other`.\n            If this flag is not specified, then the resulting dtype is left to be determined\n            by the result of the modin operation.\n        dtype : numpy type, optional\n            The desired dtype of the output array.\n        out : array, optional\n            Existing array object to which to assign the computation's result.\n\n        Returns\n        -------\n        tuple\n            Returns a 4-tuple with the following elements:\n            - 0: QueryCompiler object that is the LHS of the binary operation, with types converted\n                 as needed.\n            - 1: QueryCompiler object OR scalar that is the RHS of the binary operation, with types\n                 converted as needed.\n            - 2: The ndim of the result.\n            - 3: kwargs to pass to the query compiler.\n        \"\"\"\n        other = try_convert_from_interoperable_type(other)\n\n        if cast_input_types:\n            operand_dtype = (\n                self.dtype\n                if not isinstance(other, array)\n                else pandas.core.dtypes.cast.find_common_type([self.dtype, other.dtype])\n            )\n            out_dtype = (\n                dtype\n                if dtype is not None\n                else (out.dtype if out is not None else operand_dtype)\n            )\n            self._query_compiler = self._query_compiler.astype(\n                {col_name: out_dtype for col_name in self._query_compiler.columns}\n            )\n        if is_scalar(other):\n            # Return early, since no need to check broadcasting behavior if RHS is a scalar\n            return (self._query_compiler, other, self._ndim, {})\n        elif cast_input_types:\n            other._query_compiler = other._query_compiler.astype(\n                {col_name: out_dtype for col_name in other._query_compiler.columns}\n            )\n\n        if not isinstance(other, array):\n            raise TypeError(\n                f\"Unsupported operand type(s): '{type(self)}' and '{type(other)}'\"\n            )\n        broadcast = self._ndim != other._ndim\n        if broadcast:\n            # In this case, we have a 1D object doing a binary op with a 2D object\n            caller, callee = (self, other) if self._ndim == 2 else (other, self)\n            if callee.shape[0] != caller.shape[1]:\n                raise ValueError(\n                    f\"operands could not be broadcast together with shapes {self.shape} {other.shape}\"\n                )\n            return (\n                caller._query_compiler,\n                callee._query_compiler,\n                caller._ndim,\n                {\"broadcast\": broadcast, \"axis\": 1},\n            )\n        else:\n            if self.shape != other.shape:\n                # In this case, we either have two mismatched objects trying to do an operation\n                # or a nested 1D object that must be broadcasted trying to do an operation.\n                broadcast = True\n                if self.shape[0] == other.shape[0]:\n                    matched_dimension = 0\n                elif self.shape[1] == other.shape[1]:\n                    matched_dimension = 1\n                    broadcast = False\n                else:\n                    raise ValueError(\n                        f\"operands could not be broadcast together with shapes {self.shape} {other.shape}\"\n                    )\n                if (\n                    self.shape[matched_dimension ^ 1] == 1\n                    or other.shape[matched_dimension ^ 1] == 1\n                ):\n                    return (\n                        self._query_compiler,\n                        other._query_compiler,\n                        self._ndim,\n                        {\"broadcast\": broadcast, \"axis\": matched_dimension},\n                    )\n                else:\n                    raise ValueError(\n                        f\"operands could not be broadcast together with shapes {self.shape} {other.shape}\"\n                    )\n            else:\n                return (\n                    self._query_compiler,\n                    other._query_compiler,\n                    self._ndim,\n                    {\"broadcast\": False},\n                )\n\n    def _greater(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        if is_scalar(x2):\n            return array(_query_compiler=self._query_compiler.gt(x2), _ndim=self._ndim)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, cast_input_types=False, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # In this case, we are doing an operation that looks like this 1D_object > 2D_object.\n            # For Modin to broadcast directly, we have to swap it so that the operation is actually\n            # 2D_object < 1D_object.\n            result = caller.lt(callee, **kwargs)\n        else:\n            result = caller.gt(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def __gt__(self, x2):\n        return self._greater(x2)\n\n    def _greater_equal(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        if is_scalar(x2):\n            return array(_query_compiler=self._query_compiler.ge(x2), _ndim=self._ndim)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, cast_input_types=False, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # In this case, we are doing an operation that looks like this 1D_object >= 2D_object.\n            # For Modin to broadcast directly, we have to swap it so that the operation is actually\n            # 2D_object <= 1D_object.\n            result = caller.le(callee, **kwargs)\n        else:\n            result = caller.ge(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def __ge__(self, x2):\n        return self._greater_equal(x2)\n\n    def _less(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        if is_scalar(x2):\n            return array(_query_compiler=self._query_compiler.lt(x2), _ndim=self._ndim)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, cast_input_types=False, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # In this case, we are doing an operation that looks like this 1D_object < 2D_object.\n            # For Modin to broadcast directly, we have to swap it so that the operation is actually\n            # 2D_object < 1D_object.\n            result = caller.gt(callee, **kwargs)\n        else:\n            result = caller.lt(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def __lt__(self, x2):\n        return self._less(x2)\n\n    def _less_equal(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        if is_scalar(x2):\n            return array(_query_compiler=self._query_compiler.le(x2), _ndim=self._ndim)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, cast_input_types=False, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # In this case, we are doing an operation that looks like this 1D_object <= 2D_object.\n            # For Modin to broadcast directly, we have to swap it so that the operation is actually\n            # 2D_object <= 1D_object.\n            result = caller.ge(callee, **kwargs)\n        else:\n            result = caller.le(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def __le__(self, x2):\n        return self._less_equal(x2)\n\n    def _equal(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        if is_scalar(x2):\n            return array(_query_compiler=self._query_compiler.eq(x2), _ndim=self._ndim)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, cast_input_types=False, dtype=dtype, out=out\n        )\n        result = caller.eq(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def __eq__(self, x2):\n        return self._equal(x2)\n\n    def _not_equal(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        if is_scalar(x2):\n            return array(_query_compiler=self._query_compiler.ne(x2), _ndim=self._ndim)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, cast_input_types=False, dtype=dtype, out=out\n        )\n        result = caller.ne(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def __ne__(self, x2):\n        return self._not_equal(x2)\n\n    def _unary_math_operator(\n        self,\n        opName,\n        *args,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else self.dtype)\n        )\n        check_kwargs(order=order, casting=casting, subok=subok, where=where)\n        result = self._query_compiler.astype(\n            {col_name: out_dtype for col_name in self._query_compiler.columns}\n        )\n        result = getattr(result, opName)(*args)\n        if dtype is not None:\n            result = result.astype({col_name: dtype for col_name in result.columns})\n        if out is not None:\n            out = try_convert_from_interoperable_type(out)\n            check_can_broadcast_to_output(self, out)\n            out._query_compiler = result\n            return out\n        return array(_query_compiler=result, _ndim=self._ndim)\n\n    def tanh(\n        self,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self._unary_math_operator(\n            \"_tanh\",\n            out=out,\n            where=where,\n            casting=casting,\n            order=order,\n            dtype=dtype,\n            subok=subok,\n        )\n\n    def exp(\n        self,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self._unary_math_operator(\n            \"_exp\",\n            out=out,\n            where=where,\n            casting=casting,\n            order=order,\n            dtype=dtype,\n            subok=subok,\n        )\n\n    def sqrt(\n        self,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self._unary_math_operator(\n            \"_sqrt\",\n            out=out,\n            where=where,\n            casting=casting,\n            order=order,\n            dtype=dtype,\n            subok=subok,\n        )\n\n    def append(self, values, axis=None):\n        if not isinstance(values, array):\n            if is_list_like(values):\n                lengths = [len(a) if is_list_like(a) else None for a in values]\n                if any(numpy.array(lengths[1:]) != lengths[0]):\n                    raise ValueError(\n                        \"setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.\"\n                    )\n            values = array(values)\n        if axis is None:\n            return self.flatten().hstack([values.flatten()])\n        elif self._ndim == 1:\n            if values._ndim == 1:\n                return self.hstack([values])\n            raise ValueError(\n                f\"all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has {values._ndim} dimension(s)\"\n            )\n        if (axis ^ 1 < values._ndim) and self.shape[axis ^ 1] != values.shape[axis ^ 1]:\n            raise ValueError(\n                f\"all the input array dimensions except for the concatenation axis must match exactly, but along dimension {axis ^ 1}, the array at index 0 has size {self.shape[axis^1]} and the array at index 1 has size {values.shape[axis^1]}\"\n            )\n        new_qc = self._query_compiler.concat(axis, values._query_compiler)\n        return array(_query_compiler=new_qc, _ndim=self._ndim)\n\n    def hstack(self, others, dtype=None, casting=\"same_kind\"):\n        check_kwargs(casting=casting)\n        new_dtype = (\n            dtype\n            if dtype is not None\n            else pandas.core.dtypes.cast.find_common_type(\n                [self.dtype] + [a.dtype for a in others]\n            )\n        )\n        for index, i in enumerate([a._ndim for a in others]):\n            if i != self._ndim:\n                raise ValueError(\n                    f\"all the input arrays must have same number of dimensions, but the array at index 0 has {self._ndim} dimension(s) and the array at index {index} has {i} dimension(s)\"\n                )\n        if self._ndim == 1:\n            new_qc = self._query_compiler.concat(0, [o._query_compiler for o in others])\n        else:\n            for index, i in enumerate([a.shape[0] for a in others]):\n                if i != self.shape[0]:\n                    raise ValueError(\n                        f\"all the input array dimensions except for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size {self.shape[0]} and the array at index {index} has size {i}\"\n                    )\n            new_qc = self._query_compiler.concat(1, [o._query_compiler for o in others])\n        return array(_query_compiler=new_qc, _ndim=self._ndim, dtype=new_dtype)\n\n    def split(self, indices, axis=0):\n        if axis is not None and axis < 0:\n            new_axis = axis + self._ndim\n            if self._ndim == 1 and new_axis != 0:\n                raise IndexError\n            elif self._ndim == 2 and new_axis not in [0, 1]:\n                raise IndexError\n            axis = new_axis\n        if self._ndim == 1:\n            if axis != 0:\n                raise IndexError\n        if self._ndim == 2:\n            if axis > 1:\n                raise IndexError\n        arrays = []\n        if is_list_like(indices) or isinstance(indices, array):\n            if not isinstance(indices, array):\n                indices = array(indices)\n            if indices._ndim != 1:\n                raise TypeError(\n                    \"only integer scalar arrays can be converted to a scalar index\"\n                )\n            prev_index = 0\n            for i in range(len(indices) + 1):\n                if i < len(indices):\n                    end_index = indices._query_compiler.take_2d_positional(\n                        [i]\n                    ).to_numpy()[0, 0]\n                    if end_index == 0:\n                        ErrorMessage.single_warning(\n                            \"Defaulting to NumPy for empty arrays.\"\n                        )\n                        new_shape = list(self.shape)\n                        new_shape[axis] = 0\n                        arrays.append(numpy.empty(new_shape, dtype=self.dtype))\n                        continue\n                    if end_index < 0:\n                        end_index = self.shape[axis] + end_index\n                else:\n                    end_index = self.shape[axis]\n                if prev_index > self.shape[axis] or prev_index == end_index:\n                    ErrorMessage.single_warning(\"Defaulting to NumPy for empty arrays.\")\n                    new_shape = list(self.shape)\n                    new_shape[axis] = 0\n                    arrays.append(numpy.empty(new_shape, dtype=self.dtype))\n                else:\n                    idxs = list(range(prev_index, min(end_index, self.shape[axis])))\n                    if axis == 0:\n                        new_qc = self._query_compiler.take_2d_positional(index=idxs)\n                    else:\n                        new_qc = self._query_compiler.take_2d_positional(columns=idxs)\n                    arrays.append(array(_query_compiler=new_qc, _ndim=self._ndim))\n                prev_index = end_index\n        else:\n            if self.shape[axis] % indices != 0:\n                raise ValueError(\"array split does not result in an equal division\")\n            for i in range(0, self.shape[axis], self.shape[axis] // indices):\n                if axis == 0:\n                    new_qc = self._query_compiler.take_2d_positional(\n                        index=list(range(i, i + self.shape[axis] // indices))\n                    )\n                else:\n                    new_qc = self._query_compiler.take_2d_positional(\n                        columns=list(range(i, i + self.shape[axis] // indices))\n                    )\n                arrays.append(array(_query_compiler=new_qc, _ndim=self._ndim))\n        return arrays\n\n    def _compute_masked_variance(self, mask, output_dtype, axis, ddof):\n        if axis == 0 and self._ndim != 1:\n            # Our broadcasting is wrong, so we can't do the final subtraction at the end.\n            raise NotImplementedError(\n                \"Masked variance on 2D arrays along axis = 0 is currently unsupported.\"\n            )\n        axis_mean = self.mean(axis, output_dtype, keepdims=True, where=mask)\n        target = mask.where(self, numpy.nan)\n        if self._ndim == 1:\n            axis_mean = axis_mean._to_numpy()[0]\n            target = target._query_compiler.sub(axis_mean).pow(2).sum(axis=axis)\n        else:\n            target = (target - axis_mean)._query_compiler.pow(2).sum(axis=axis)\n        num_elems = (\n            mask.where(self, 0)._query_compiler.notna().sum(axis=axis, skipna=False)\n        )\n        num_elems = num_elems.sub(ddof)\n        target = target.truediv(num_elems)\n        na_propagation_mask = mask.where(self, 0)._query_compiler.sum(\n            axis=axis, skipna=False\n        )\n        target = target.where(na_propagation_mask.notna(), numpy.nan)\n        return target\n\n    def var(\n        self, axis=None, dtype=None, out=None, ddof=0, keepdims=None, *, where=True\n    ):\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else self.dtype)\n        )\n        out_type = getattr(out_dtype, \"type\", out_dtype)\n        if isinstance(where, array) and issubclass(out_type, numpy.integer):\n            out_dtype = numpy.float64\n        apply_axis = self._validate_axis(axis)\n        check_kwargs(keepdims=keepdims, where=where)\n        truthy_where = bool(where)\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            if isinstance(where, array):\n                result = self._compute_masked_variance(where, out_dtype, 0, ddof)\n            else:\n                result = self._query_compiler.var(axis=0, skipna=False, ddof=ddof)\n            if keepdims:\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if out is not None:\n                    out._query_compiler = (\n                        numpy.ones_like(out) * numpy.nan\n                    )._query_compiler\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, dtype, out, truthy_where\n                    )\n                else:\n                    return array([numpy.nan], dtype=out_dtype)\n        if apply_axis is None:\n            # If any of the (non-masked) elements of our array are `NaN`, we know that the\n            # result of `mean` must be `NaN`. This is a fastpath to see if any unmasked elements\n            # are `NaN`.\n            contains_na_check = (\n                where.where(self, 0) if isinstance(where, array) else self\n            )\n            if (\n                contains_na_check._query_compiler.isna()\n                .any(axis=1)\n                .any(axis=0)\n                .to_numpy()[0, 0]\n            ):\n                return numpy.nan\n            result = where.where(self, numpy.nan) if isinstance(where, array) else self\n            # Since our current QueryCompiler does not have a variance that reduces 2D objects to\n            # a single value, we need to calculate the variance ourselves. First though, we need\n            # to figure out how many objects that we are taking the variance over (since any\n            # entries in our array that are `numpy.nan` must be ignored when taking the variance,\n            # and so cannot be included in the final division (of the sum over num total elements))\n            num_na_elements = (\n                result._query_compiler.isna().sum(axis=1).sum(axis=0).to_numpy()[0, 0]\n            )\n            num_total_elements = prod(self.shape) - num_na_elements\n            mean = (\n                numpy.array(\n                    [result._query_compiler.sum(axis=1).sum(axis=0).to_numpy()[0, 0]],\n                    dtype=out_dtype,\n                )\n                / num_total_elements\n            )[0]\n            result = (\n                numpy.array(\n                    [\n                        result._query_compiler.sub(mean)\n                        .pow(2)\n                        .sum(axis=1)\n                        .sum(axis=0)\n                        .to_numpy()[0, 0]\n                    ],\n                    dtype=out_dtype,\n                )\n                / (num_total_elements - ddof)\n            )[0]\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if out is not None:\n                    out._query_compiler = (\n                        numpy.ones_like(out) * numpy.nan\n                    )._query_compiler\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]]))\n                        .astype(out_dtype)\n                        ._query_compiler,\n                        2,\n                        dtype,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[numpy.nan]], dtype=out_dtype)\n            return result if truthy_where else numpy.nan\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        if isinstance(where, array):\n            result = self._compute_masked_variance(where, out_dtype, apply_axis, ddof)\n        else:\n            result = self._query_compiler.astype(\n                {col_name: out_dtype for col_name in self._query_compiler.columns}\n            ).var(axis=apply_axis, skipna=False, ddof=ddof)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            return result.to_numpy()[0, 0] if truthy_where else numpy.nan\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if out is not None:\n            out._query_compiler = (numpy.ones_like(out) * numpy.nan)._query_compiler\n        if truthy_where or out is not None:\n            return fix_dtypes_and_determine_return(\n                result, new_ndim, dtype, out, truthy_where\n            )\n        else:\n            return (\n                numpy.ones(array(_query_compiler=result, _ndim=new_ndim).shape)\n            ) * numpy.nan\n\n    def _compute_masked_mean(self, mask, output_dtype, axis):\n        # By default, pandas ignores NaN values when doing computations.\n        # NumPy; however, propagates the value by default. We use pandas\n        # default behaviour in order to mask values (by replacing them)\n        # with NaN when initially computing the mean, but we need to propagate\n        # NaN values that were not masked to the final output, so we do a\n        # sum along the same axis (where masked values are 0) to see where\n        # NumPy would propagate NaN, and swap out those values in our result\n        # with NaN.\n        target = mask.where(self, numpy.nan)._query_compiler\n        target = target.astype(\n            {col_name: output_dtype for col_name in target.columns}\n        ).mean(axis=axis)\n        na_propagation_mask = mask.where(self, 0)._query_compiler\n        na_propagation_mask = na_propagation_mask.sum(axis=axis, skipna=False)\n        target = target.where(na_propagation_mask.notna(), numpy.nan)\n        return target\n\n    def mean(self, axis=None, dtype=None, out=None, keepdims=None, *, where=True):\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else self.dtype)\n        )\n        out_type = getattr(out_dtype, \"type\", out_dtype)\n        if isinstance(where, array) and issubclass(out_type, numpy.integer):\n            out_dtype = numpy.float64\n        apply_axis = self._validate_axis(axis)\n        check_kwargs(keepdims=keepdims, where=where)\n        truthy_where = bool(where)\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            if isinstance(where, array):\n                result = self._compute_masked_mean(where, out_dtype, 0)\n            else:\n                result = self._query_compiler.astype(\n                    {col_name: out_dtype for col_name in self._query_compiler.columns}\n                ).mean(axis=0, skipna=False)\n            if keepdims:\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * numpy.nan)._query_compiler\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, dtype, out, truthy_where\n                    )\n                else:\n                    return array([numpy.nan], dtype=out_dtype)\n            # This is just to see if `where` is a truthy value. If `where` is an array,\n            # we would have already masked the input before computing `result`, so here\n            # we just want to ensure that `where=False` was not passed in, and if it was\n            # we return `numpy.nan`, since that is what NumPy would do.\n            return result.to_numpy()[0, 0] if where else numpy.nan\n        if apply_axis is None:\n            # If any of the (non-masked) elements of our array are `NaN`, we know that the\n            # result of `mean` must be `NaN`. This is a fastpath to see if any unmasked elements\n            # are `NaN`.\n            contains_na_check = (\n                where.where(self, 0) if isinstance(where, array) else self\n            )\n            if (\n                contains_na_check._query_compiler.isna()\n                .any(axis=1)\n                .any(axis=0)\n                .to_numpy()[0, 0]\n            ):\n                return numpy.nan\n            result = where.where(self, numpy.nan) if isinstance(where, array) else self\n            # Since our current QueryCompiler does not have a mean that reduces 2D objects to\n            # a single value, we need to calculate the mean ourselves. First though, we need\n            # to figure out how many objects that we are taking the mean over (since any\n            # entries in our array that are `numpy.nan` must be ignored when taking the mean,\n            # and so cannot be included in the final division (of the sum over num total elements))\n            num_na_elements = (\n                result._query_compiler.isna().sum(axis=1).sum(axis=0).to_numpy()[0, 0]\n            )\n            num_total_elements = prod(self.shape) - num_na_elements\n            result = (\n                numpy.array(\n                    [result._query_compiler.sum(axis=1).sum(axis=0).to_numpy()[0, 0]],\n                    dtype=out_dtype,\n                )\n                / num_total_elements\n            )[0]\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * numpy.nan)._query_compiler\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]]))\n                        .astype(out_dtype)\n                        ._query_compiler,\n                        2,\n                        dtype,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[numpy.nan]], dtype=out_dtype)\n            return result if truthy_where else numpy.nan\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        if isinstance(where, array):\n            result = self._compute_masked_mean(where, out_dtype, apply_axis)\n        else:\n            result = self._query_compiler.astype(\n                {col_name: out_dtype for col_name in self._query_compiler.columns}\n            ).mean(axis=apply_axis, skipna=False)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            return result.to_numpy()[0, 0] if truthy_where else numpy.nan\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if out is not None:\n            out._update_inplace((numpy.ones_like(out) * numpy.nan)._query_compiler)\n        if truthy_where or out is not None:\n            return fix_dtypes_and_determine_return(\n                result, new_ndim, dtype, out, truthy_where\n            )\n        else:\n            return (\n                numpy.ones(array(_query_compiler=result, _ndim=new_ndim).shape)\n            ) * numpy.nan\n\n    def __add__(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        result = caller.add(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def __radd__(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self.__add__(x2, out, where, casting, order, dtype, subok)\n\n    def divide(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # In this case, we are doing an operation that looks like this 1D_object/2D_object.\n            # For Modin to broadcast directly, we have to swap it so that the operation is actually\n            # 2D_object.rtruediv(1D_object).\n            result = caller.rtruediv(callee, **kwargs)\n        else:\n            result = caller.truediv(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    __truediv__ = divide\n\n    def __rtruediv__(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            result = caller.truediv(callee, **kwargs)\n        else:\n            result = caller.rtruediv(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def floor_divide(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        operand_dtype = (\n            self.dtype\n            if not isinstance(x2, array)\n            else pandas.core.dtypes.cast.find_common_type([self.dtype, x2.dtype])\n        )\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else operand_dtype)\n        )\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        if is_scalar(x2):\n            result = self._query_compiler.floordiv(x2)\n            if x2 == 0 and numpy.issubdtype(out_dtype, numpy.integer):\n                # NumPy's floor_divide by 0 works differently from pandas', so we need to fix\n                # the output.\n                result = (\n                    result.replace(numpy.inf, 0)\n                    .replace(-numpy.inf, 0)\n                    .where(self._query_compiler.ne(0), 0)\n                )\n            return fix_dtypes_and_determine_return(\n                result, self._ndim, dtype, out, where\n            )\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # Modin does not correctly support broadcasting when the caller of the function is\n            # a Series (1D), and the operand is a Dataframe (2D). We cannot workaround this using\n            # commutativity, and `rfloordiv` also works incorrectly. GH#5529\n            raise NotImplementedError(\n                \"Using floor_divide with broadcast is not currently available in Modin.\"\n            )\n        result = caller.floordiv(callee, **kwargs)\n        if callee.eq(0).any() and numpy.issubdtype(out_dtype, numpy.integer):\n            # NumPy's floor_divide by 0 works differently from pandas', so we need to fix\n            # the output.\n            result = (\n                result.replace(numpy.inf, 0)\n                .replace(-numpy.inf, 0)\n                .where(callee.ne(0), 0)\n            )\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    __floordiv__ = floor_divide\n\n    def power(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # Modin does not correctly support broadcasting when the caller of the function is\n            # a Series (1D), and the operand is a Dataframe (2D). We cannot workaround this using\n            # commutativity, and `rpow` also works incorrectly. GH#5529\n            raise NotImplementedError(\n                \"Using power with broadcast is not currently available in Modin.\"\n            )\n        result = caller.pow(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    __pow__ = power\n\n    def prod(\n        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True\n    ):\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else self.dtype)\n        )\n        initial = 1 if initial is None else initial\n        check_kwargs(keepdims=keepdims, where=where)\n        apply_axis = self._validate_axis(axis)\n        truthy_where = bool(where)\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            target = where.where(self, 1) if isinstance(where, array) else self\n            result = target._query_compiler.astype(\n                {col_name: out_dtype for col_name in target._query_compiler.columns}\n            ).prod(axis=0, skipna=False)\n            result = result.mul(initial)\n            if keepdims:\n                if out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * initial)\n                        .astype(out_dtype)\n                        ._query_compiler\n                    )\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, dtype, out, truthy_where\n                    )\n                else:\n                    return array([initial], dtype=out_dtype)\n            return result.to_numpy()[0, 0] if truthy_where else initial\n        if apply_axis is None:\n            result = self\n            if isinstance(where, array):\n                result = where.where(self, 1)\n            result = (\n                result.astype(out_dtype)\n                ._query_compiler.prod(axis=1, skipna=False)\n                .prod(axis=0, skipna=False)\n                .to_numpy()[0, 0]\n            )\n            result *= initial\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if out is not None:\n                    out._update_inplace(\n                        (numpy.ones_like(out) * initial)\n                        .astype(out_dtype)\n                        ._query_compiler\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]]))\n                        .astype(out_dtype)\n                        ._query_compiler,\n                        2,\n                        dtype,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[initial]], dtype=out_dtype)\n            return result if truthy_where else initial\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        target = where.where(self, 1) if isinstance(where, array) else self\n        result = target._query_compiler.astype(\n            {col_name: out_dtype for col_name in target._query_compiler.columns}\n        ).prod(axis=apply_axis, skipna=False)\n        result = result.mul(initial)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            return result.to_numpy()[0, 0] if truthy_where else initial\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if initial is not None and out is not None:\n            out._update_inplace(\n                (numpy.ones_like(out) * initial).astype(out_dtype)._query_compiler\n            )\n        if truthy_where or out is not None:\n            return fix_dtypes_and_determine_return(\n                result, new_ndim, dtype, out, truthy_where\n            )\n        else:\n            return (\n                numpy.ones_like(array(_query_compiler=result, _ndim=new_ndim)) * initial\n            )\n\n    def multiply(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        result = caller.mul(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    __mul__ = multiply\n\n    def __rmul__(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self.multiply(x2, out, where, casting, order, dtype, subok)\n\n    def dot(self, other, out=None):\n        other = try_convert_from_interoperable_type(other)\n        if numpy.isscalar(other):\n            # other is scalar -- result is an array\n            result = self._query_compiler.mul(other)\n            result_ndim = self._ndim\n        elif not isinstance(other, array):\n            raise TypeError(\n                f\"Unsupported operand type(s): '{type(self)}' and '{type(other)}'\"\n            )\n        elif self._ndim == 1 and other._ndim == 1:\n            # both 1D arrays -- result is a scalar\n            result = self._query_compiler.dot(\n                other._query_compiler, squeeze_self=True, squeeze_other=True\n            )\n            return result.to_numpy()[0, 0]\n        elif self._ndim == 2 and other._ndim == 2:\n            # both 2D arrays -- result is a 2D array\n            result = self._query_compiler.dot(other._query_compiler)\n            result_ndim = 2\n        elif self._ndim == 1 and other._ndim == 2:\n            result = self._query_compiler.dot(other._query_compiler, squeeze_self=True)\n            result_ndim = 1\n        elif self._ndim == 2 and other._ndim == 1:\n            result = self._query_compiler.dot(other._query_compiler)\n            result_ndim = 1\n        return fix_dtypes_and_determine_return(\n            result,\n            result_ndim,\n            out=out,\n        )\n\n    def __matmul__(self, other):\n        if numpy.isscalar(other):\n            # numpy's original error message is something cryptic about a gufunc signature\n            raise ValueError(\n                \"cannot call matmul with a scalar argument (use np.dot instead)\"\n            )\n        return self.dot(other)\n\n    def _norm(self, ord=None, axis=None, keepdims=False):\n        check_kwargs(keepdims=keepdims)\n        if ord is not None and ord not in (\"fro\",):  # , numpy.inf, -numpy.inf, 0):\n            raise NotImplementedError(\"unsupported ord argument for norm:\", ord)\n        if isinstance(axis, int) and axis < 0:\n            apply_axis = self._ndim + axis\n        else:\n            apply_axis = axis or 0\n        if apply_axis >= self._ndim or apply_axis < 0:\n            raise numpy.AxisError(axis, self._ndim)\n        result = self._query_compiler.pow(2)\n        if self._ndim == 2:\n            result = result.sum(axis=apply_axis)\n            if axis is None:\n                result = result.sum(axis=apply_axis ^ 1)\n        else:\n            result = result.sum(axis=0)\n        if axis is None:\n            # Return a scalar\n            return result._sqrt().to_numpy()[0, 0]\n        else:\n            result = result._sqrt()\n            # the DF may be transposed after processing through pandas\n            # check query compiler shape to ensure this is a row vector (1xN) not column (Nx1)\n            if len(result.index) != 1:\n                result = result.transpose()\n            return array(_query_compiler=result, _ndim=1)\n\n    def remainder(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        operand_dtype = (\n            self.dtype\n            if not isinstance(x2, array)\n            else pandas.core.dtypes.cast.find_common_type([self.dtype, x2.dtype])\n        )\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else operand_dtype)\n        )\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        if is_scalar(x2):\n            result = self._query_compiler.astype(\n                {col_name: out_dtype for col_name in self._query_compiler.columns}\n            ).mod(x2)\n            if x2 == 0 and numpy.issubdtype(out_dtype, numpy.integer):\n                # NumPy's remainder by 0 works differently from pandas', so we need to fix\n                # the output.\n                result = result.replace(numpy.nan, 0)\n            return fix_dtypes_and_determine_return(\n                result, self._ndim, dtype, out, where\n            )\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # Modin does not correctly support broadcasting when the caller of the function is\n            # a Series (1D), and the operand is a Dataframe (2D). We cannot workaround this using\n            # commutativity, and `rmod` also works incorrectly. GH#5529\n            raise NotImplementedError(\n                \"Using remainder with broadcast is not currently available in Modin.\"\n            )\n        result = caller.mod(callee, **kwargs)\n        if callee.eq(0).any() and numpy.issubdtype(out_dtype, numpy.integer):\n            # NumPy's floor_divide by 0 works differently from pandas', so we need to fix\n            # the output.\n            result = result.replace(numpy.nan, 0)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    __mod__ = remainder\n\n    def subtract(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # In this case, we are doing an operation that looks like this 1D_object - 2D_object.\n            # For Modin to broadcast directly, we have to swap it so that the operation is actually\n            # 2D_object.rsub(1D_object).\n            result = caller.rsub(callee, **kwargs)\n        else:\n            result = caller.sub(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    __sub__ = subtract\n\n    def __rsub__(\n        self,\n        x2,\n        out=None,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(order=order, subok=subok, casting=casting, where=where)\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, dtype=dtype, out=out\n        )\n        if caller != self._query_compiler:\n            # In this case, we are doing an operation that looks like this 1D_object - 2D_object.\n            # For Modin to broadcast directly, we have to swap it so that the operation is actually\n            # 2D_object.sub(1D_object).\n            result = caller.sub(callee, **kwargs)\n        else:\n            result = caller.rsub(callee, **kwargs)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def sum(\n        self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=True\n    ):\n        out_dtype = (\n            dtype\n            if dtype is not None\n            else (out.dtype if out is not None else self.dtype)\n        )\n        initial = 0 if initial is None else initial\n        check_kwargs(keepdims=keepdims, where=where)\n        apply_axis = self._validate_axis(axis)\n        truthy_where = bool(where)\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            target = where.where(self, 0) if isinstance(where, array) else self\n            if target.dtype != out_dtype:\n                target = target.astype(out_dtype)\n            result = target._query_compiler.sum(axis=0, skipna=False)\n            if initial != 0:\n                result = result.add(initial)\n            if keepdims:\n                if out is not None:\n                    out._update_inplace(\n                        (\n                            numpy.ones_like(out, dtype=out_dtype) * initial\n                        )._query_compiler\n                    )\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, dtype, out, truthy_where\n                    )\n                else:\n                    return array([initial], dtype=out_dtype)\n            return result.to_numpy()[0, 0] if truthy_where else initial\n        if apply_axis is None:\n            target = where.where(self, 0) if isinstance(where, array) else self\n            if target.dtype != out_dtype:\n                target = target.astype(out_dtype)\n            result = (\n                target._query_compiler.sum(axis=1, skipna=False)\n                .sum(axis=0, skipna=False)\n                .to_numpy()[0, 0]\n            )\n            if initial != 0:\n                result += initial\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 1, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if out is not None:\n                    out._update_inplace(\n                        (\n                            numpy.ones_like(out, dtype=out_dtype) * initial\n                        )._query_compiler\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]], dtype=out_dtype))._query_compiler,\n                        2,\n                        dtype,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[initial]], dtype=out_dtype)\n            return result if truthy_where else initial\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        target = where.where(self, 0) if isinstance(where, array) else self\n        if target.dtype != out_dtype:\n            target = target.astype(out_dtype)\n        result = target._query_compiler.sum(axis=apply_axis, skipna=False)\n        if initial != 0:\n            result = result.add(initial)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            return result.to_numpy()[0, 0] if truthy_where else initial\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if out is not None:\n            out._update_inplace(\n                (numpy.ones_like(out, dtype=out_dtype) * initial)._query_compiler\n            )\n        if truthy_where or out is not None:\n            return fix_dtypes_and_determine_return(\n                result, new_ndim, dtype, out, truthy_where\n            )\n        else:\n            return (\n                numpy.zeros_like(array(_query_compiler=result, _ndim=new_ndim))\n                + initial\n            )\n\n    def all(self, axis=None, out=None, keepdims=None, *, where=True):\n        check_kwargs(keepdims=keepdims, where=where)\n        truthy_where = bool(where)\n        apply_axis = self._validate_axis(axis)\n        target = where.where(self, True) if isinstance(where, array) else self\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            result = target._query_compiler.all(axis=0)\n            if keepdims:\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, bool, out, truthy_where\n                    )\n                else:\n                    return array([True], dtype=bool)\n            return result.to_numpy()[0, 0] if truthy_where else True\n        if apply_axis is None:\n            result = target._query_compiler.all(axis=1).all(axis=0)\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]], dtype=bool))._query_compiler,\n                        2,\n                        bool,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[True]], dtype=bool)\n            return result.to_numpy()[0, 0] if truthy_where else True\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        result = target._query_compiler.all(axis=apply_axis)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            result = result.to_numpy()[0, 0]\n            return result if truthy_where else True\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if truthy_where or out is not None:\n            return fix_dtypes_and_determine_return(\n                result, new_ndim, bool, out, truthy_where\n            )\n        else:\n            return numpy.ones_like(array(_query_compiler=result, _ndim=new_ndim))\n\n    _all = all\n\n    def any(self, axis=None, out=None, keepdims=None, *, where=True):\n        check_kwargs(keepdims=keepdims, where=where)\n        truthy_where = bool(where)\n        apply_axis = self._validate_axis(axis)\n        target = where.where(self, False) if isinstance(where, array) else self\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            result = target._query_compiler.any(axis=0)\n            if keepdims:\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        result, 1, bool, out, truthy_where\n                    )\n                else:\n                    return array([False], dtype=bool)\n            return result.to_numpy()[0, 0] if truthy_where else False\n        if apply_axis is None:\n            result = target._query_compiler.any(axis=1).any(axis=0)\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                if truthy_where or out is not None:\n                    return fix_dtypes_and_determine_return(\n                        array(numpy.array([[result]], dtype=bool))._query_compiler,\n                        2,\n                        bool,\n                        out,\n                        truthy_where,\n                    )\n                else:\n                    return array([[False]], dtype=bool)\n            return result.to_numpy()[0, 0] if truthy_where else False\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        result = target._query_compiler.any(axis=apply_axis)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            result = result.to_numpy()[0, 0]\n            return result if truthy_where else False\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        if truthy_where or out is not None:\n            return fix_dtypes_and_determine_return(\n                result, new_ndim, bool, out, truthy_where\n            )\n        else:\n            return numpy.zeros_like(array(_query_compiler=result, _ndim=new_ndim))\n\n    _any = any\n\n    def argmax(self, axis=None, out=None, keepdims=None):\n        check_kwargs(keepdims=keepdims)\n        apply_axis = self._validate_axis(axis)\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:\n                na_row_map = self._query_compiler.isna().any(axis=1)\n                result = na_row_map.idxmax()\n            else:\n                result = self._query_compiler.idxmax(axis=0)\n            if keepdims:\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                return fix_dtypes_and_determine_return(\n                    result, 1, numpy.int64, out, True\n                )\n            return result.to_numpy()[0, 0]\n        if apply_axis is None:\n            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:\n                na_row_map = self._query_compiler.isna().any(axis=1)\n                na_row = self._query_compiler.getitem_array(na_row_map)\n                col_idx = na_row.to_numpy().argmax()\n                final_idxmax = na_row_map.idxmax().to_numpy().flatten()\n            else:\n                inner_idxs = self._query_compiler.idxmax(axis=1)\n                final_idxmax = (\n                    self._query_compiler.max(axis=1).idxmax(axis=0).to_numpy().flatten()\n                )\n                col_idx = inner_idxs.take_2d_positional(final_idxmax, [0]).to_numpy()[\n                    0, 0\n                ]\n            result = (self.shape[1] * final_idxmax[0]) + col_idx\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                return fix_dtypes_and_determine_return(\n                    array(numpy.array([[result]], dtype=bool))._query_compiler,\n                    2,\n                    numpy.int64,\n                    out,\n                    True,\n                )\n            return result\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        result = self._query_compiler.idxmax(axis=apply_axis)\n        na_mask = self._query_compiler.isna().any(axis=apply_axis)\n        if na_mask.any(axis=apply_axis ^ 1).to_numpy()[0, 0]:\n            na_idxs = self._query_compiler.isna().idxmax(axis=apply_axis)\n            result = na_idxs.where(na_mask, result)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            result = result.to_numpy()[0, 0]\n            return result\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        return fix_dtypes_and_determine_return(result, new_ndim, numpy.int64, out, True)\n\n    def argmin(self, axis=None, out=None, keepdims=None):\n        check_kwargs(keepdims=keepdims)\n        apply_axis = self._validate_axis(axis)\n        if self._ndim == 1:\n            if apply_axis == 1:\n                raise numpy.AxisError(1, 1)\n            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:\n                na_row_map = self._query_compiler.isna().any(axis=1)\n                # numpy apparently considers nan to be the minimum value in an array if present\n                # therefore, we use idxmax on the mask array to identify where nans are\n                result = na_row_map.idxmax()\n            else:\n                result = self._query_compiler.idxmin(axis=0)\n            if keepdims:\n                if out is not None and out.shape != (1,):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                return fix_dtypes_and_determine_return(\n                    result, 1, numpy.int64, out, True\n                )\n            return result.to_numpy()[0, 0]\n        if apply_axis is None:\n            if self._query_compiler.isna().any(axis=1).any(axis=0).to_numpy()[0, 0]:\n                na_row_map = self._query_compiler.isna().any(axis=1)\n                na_row = self._query_compiler.getitem_array(na_row_map)\n                col_idx = na_row.to_numpy().argmax()\n                final_idxmax = na_row_map.idxmax().to_numpy().flatten()\n            else:\n                inner_idxs = self._query_compiler.idxmin(axis=1)\n                final_idxmax = (\n                    self._query_compiler.min(axis=1).idxmin(axis=0).to_numpy().flatten()\n                )\n                col_idx = inner_idxs.take_2d_positional(final_idxmax, [0]).to_numpy()[\n                    0, 0\n                ]\n            result = (self.shape[1] * final_idxmax[0]) + col_idx\n            if keepdims:\n                if out is not None and out.shape != (1, 1):\n                    raise ValueError(\n                        f\"operand was set up as a reduction along axis 0, but the length of the axis is {out.shape[0]} (it has to be 1)\"\n                    )\n                return fix_dtypes_and_determine_return(\n                    array(numpy.array([[result]], dtype=bool))._query_compiler,\n                    2,\n                    numpy.int64,\n                    out,\n                    True,\n                )\n            return result\n        if apply_axis > 1:\n            raise numpy.AxisError(axis, 2)\n        result = self._query_compiler.idxmin(axis=apply_axis)\n        na_mask = self._query_compiler.isna().any(axis=apply_axis)\n        if na_mask.any(axis=apply_axis ^ 1).to_numpy()[0, 0]:\n            na_idxs = self._query_compiler.isna().idxmax(axis=apply_axis)\n            result = na_idxs.where(na_mask, result)\n        new_ndim = self._ndim - 1 if not keepdims else self._ndim\n        if new_ndim == 0:\n            result = result.to_numpy()[0, 0]\n            return result\n        if not keepdims and apply_axis != 1:\n            result = result.transpose()\n        return fix_dtypes_and_determine_return(result, new_ndim, numpy.int64, out, True)\n\n    def _isfinite(\n        self,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        result = self._query_compiler._isfinite()\n        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)\n\n    def _isinf(\n        self,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        result = self._query_compiler._isinf()\n        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)\n\n    def _isnan(\n        self,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        result = self._query_compiler.isna()\n        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)\n\n    def _isnat(\n        self,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        result = self._query_compiler._isnat()\n        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)\n\n    def _isneginf(self, out=None):\n        result = self._query_compiler._isneginf()\n        return fix_dtypes_and_determine_return(result, self._ndim, out=out)\n\n    def _isposinf(self, out=None):\n        result = self._query_compiler._isposinf()\n        return fix_dtypes_and_determine_return(result, self._ndim, out=out)\n\n    def _iscomplex(self):\n        result = self._query_compiler._iscomplex()\n        return fix_dtypes_and_determine_return(result, self._ndim)\n\n    def _isreal(self):\n        result = self._query_compiler._isreal()\n        return fix_dtypes_and_determine_return(result, self._ndim)\n\n    def _logical_not(\n        self,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        result = self._query_compiler._logical_not()\n        return fix_dtypes_and_determine_return(result, self._ndim, dtype, out, where)\n\n    def _logical_binop(\n        self, qc_method_name, x2, out, where, casting, order, dtype, subok\n    ):\n        check_kwargs(where=where, casting=casting, order=order, subok=subok)\n        if self._ndim != x2._ndim:\n            raise ValueError(\n                \"modin.numpy logic operators do not currently support broadcasting between arrays of different dimensions\"\n            )\n        caller, callee, new_ndim, kwargs = self._preprocess_binary_op(\n            x2, cast_input_types=False, dtype=dtype, out=out\n        )\n        # Deliberately do not pass **kwargs, since they're not used\n        result = getattr(caller, qc_method_name)(callee)\n        return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)\n\n    def _logical_and(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self._logical_binop(\n            \"_logical_and\", x2, out, where, casting, order, dtype, subok\n        )\n\n    def _logical_or(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self._logical_binop(\n            \"_logical_or\", x2, out, where, casting, order, dtype, subok\n        )\n\n    def _logical_xor(\n        self,\n        x2,\n        /,\n        out=None,\n        *,\n        where=True,\n        casting=\"same_kind\",\n        order=\"K\",\n        dtype=None,\n        subok=True,\n    ):\n        return self._logical_binop(\n            \"_logical_xor\", x2, out, where, casting, order, dtype, subok\n        )\n\n    def flatten(self, order=\"C\"):\n        check_kwargs(order=order)\n        qcs = [\n            self._query_compiler.getitem_row_array([index_val]).reset_index(drop=True)\n            for index_val in self._query_compiler.index[1:]\n        ]\n        new_query_compiler = (\n            self._query_compiler.getitem_row_array([self._query_compiler.index[0]])\n            .reset_index(drop=True)\n            .concat(1, qcs, ignore_index=True)\n        )\n        new_query_compiler.columns = range(len(new_query_compiler.columns))\n        new_query_compiler = new_query_compiler.transpose()\n        new_ndim = 1\n        return array(_query_compiler=new_query_compiler, _ndim=new_ndim)\n\n    def _get_shape(self):\n        if self._ndim == 1:\n            return (len(self._query_compiler.index),)\n        return (len(self._query_compiler.index), len(self._query_compiler.columns))\n\n    def _set_shape(self, new_shape):\n        if not (isinstance(new_shape, int)) and not isinstance(new_shape, tuple):\n            raise TypeError(\n                f\"expected a sequence of integers or a single integer, got '{new_shape}'\"\n            )\n        elif isinstance(new_shape, tuple):\n            for dim in new_shape:\n                if not isinstance(dim, int):\n                    raise TypeError(\n                        f\"'{type(dim)}' object cannot be interpreted as an integer\"\n                    )\n\n        new_dimensions = new_shape if isinstance(new_shape, int) else prod(new_shape)\n        if new_dimensions != prod(self._get_shape()):\n            raise ValueError(\n                f\"cannot reshape array of size {prod(self._get_shape())} into {new_shape if isinstance(new_shape, tuple) else (new_shape,)}\"\n            )\n        if isinstance(new_shape, int) or len(new_shape) == 1:\n            self._update_inplace(self.flatten()._query_compiler)\n            self._ndim = 1\n        else:\n            raise NotImplementedError(\n                \"Modin numpy does not currently support reshaping to a 2D object\"\n            )\n\n    shape = property(_get_shape, _set_shape)\n\n    def transpose(self):\n        if self._ndim == 1:\n            return self\n        return array(_query_compiler=self._query_compiler.transpose(), _ndim=self._ndim)\n\n    T = property(transpose)\n\n    @property\n    def dtype(self):\n        dtype = self._query_compiler.dtypes\n        if self._ndim == 1:\n            return dtype[0]\n        else:\n            return pandas.core.dtypes.cast.find_common_type(list(dtype.values))\n\n    @property\n    def size(self):\n        return prod(self.shape)\n\n    def __len__(self):\n        return self.shape[0]\n\n    def astype(self, dtype, order=\"K\", casting=\"unsafe\", subok=True, copy=True):\n        if casting != \"unsafe\":\n            raise ValueError(\n                \"Modin does not support `astype` with `casting != unsafe`.\"\n            )\n        check_kwargs(order=order, subok=subok)\n        result = self._query_compiler.astype(\n            {col_name: dtype for col_name in self._query_compiler.columns}\n        )\n        if not copy and subok and numpy.issubdtype(self.dtype, dtype):\n            return self\n        return array(_query_compiler=result, _ndim=self._ndim)\n\n    def _build_repr_array(self):\n        def _generate_indices_for_axis(\n            axis_size, num_elements=numpy.get_printoptions()[\"edgeitems\"]\n        ):\n            if axis_size > num_elements * 2:\n                return list(range(num_elements + 1)) + list(\n                    range(axis_size - num_elements, axis_size)\n                )\n            return list(range(axis_size))\n\n        # We want to rely on NumPy for creating a string representation of this array; however\n        # we also don't want to materialize all of the data to the head node. Instead, we will\n        # materialize enough data that NumPy can build the summarized representation of the array\n        # (while changing with the NumPy print options so it will format this smaller array as\n        # abridged) and return this smaller array. In the worst case, this array will have\n        # (2*numpy.get_printoptions()[\"edgeitems\"] + 1)^2 items, so 49 items max for the default\n        # value of 3.\n        if self._ndim == 1 or self.shape[1] == 0:\n            idxs = _generate_indices_for_axis(len(self))\n            arr = self._query_compiler.getitem_row_array(idxs).to_numpy()\n            if self._ndim == 1:\n                arr = arr.flatten()\n        elif self.shape[0] == 1:\n            idxs = _generate_indices_for_axis(self.shape[1])\n            arr = self._query_compiler.getitem_column_array(idxs).to_numpy()\n        else:\n            row_idxs = _generate_indices_for_axis(len(self))\n            col_idxs = _generate_indices_for_axis(self.shape[1])\n            arr = self._query_compiler.take_2d_positional(row_idxs, col_idxs).to_numpy()\n        return arr\n\n    def __repr__(self):\n        # If we are dealing with a small array, we can just collate all the data on the\n        # head node and let numpy handle the logic to get a string representation.\n        if self.size <= numpy.get_printoptions()[\"threshold\"]:\n            return repr(self._to_numpy())\n        arr = self._build_repr_array()\n        prev_threshold = numpy.get_printoptions()[\"threshold\"]\n        numpy.set_printoptions(threshold=arr.size - 1)\n        try:\n            repr_str = repr(arr)\n        finally:\n            numpy.set_printoptions(threshold=prev_threshold)\n        return repr_str\n\n    def _to_numpy(self):\n        arr = self._query_compiler.to_numpy()\n        if self._ndim == 1:\n            arr = arr.flatten()\n        return arr\n"
  },
  {
    "path": "modin/numpy/array_creation.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses array creation methods for Modin's NumPy API.\"\"\"\n\nimport numpy\n\nfrom modin.error_message import ErrorMessage\n\nfrom .arr import array\n\n\ndef _create_array(dtype, shape, order, subok, numpy_method):\n    if order not in [\"K\", \"C\"]:\n        ErrorMessage.single_warning(\n            \"Array order besides 'C' is not currently supported in Modin. Defaulting to 'C' order.\"\n        )\n    if not subok:\n        ErrorMessage.single_warning(\n            \"Subclassing types is not currently supported in Modin. Defaulting to the same base dtype.\"\n        )\n    ErrorMessage.single_warning(f\"np.{numpy_method}_like defaulting to NumPy.\")\n    return array(getattr(numpy, numpy_method)(shape, dtype=dtype))\n\n\ndef zeros_like(a, dtype=None, order=\"K\", subok=True, shape=None):\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"zeros_like\", type(a))\n        return numpy.zeros_like(a, dtype=dtype, order=order, subok=subok, shape=shape)\n    dtype = a.dtype if dtype is None else dtype\n    shape = a.shape if shape is None else shape\n    return _create_array(dtype, shape, order, subok, \"zeros\")\n\n\ndef ones_like(a, dtype=None, order=\"K\", subok=True, shape=None):\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"ones_like\", type(a))\n        return numpy.ones_like(a, dtype=dtype, order=order, subok=subok, shape=shape)\n    dtype = a.dtype if dtype is None else dtype\n    shape = a.shape if shape is None else shape\n    return _create_array(dtype, shape, order, subok, \"ones\")\n\n\ndef tri(N, M=None, k=0, dtype=float, like=None):\n    if like is not None:\n        ErrorMessage.single_warning(\n            \"Modin NumPy does not support the `like` argument for np.tri. Defaulting to `like=None`.\"\n        )\n    ErrorMessage.single_warning(\"np.tri defaulting to NumPy.\")\n    return array(numpy.tri(N, M=M, k=k, dtype=dtype))\n"
  },
  {
    "path": "modin/numpy/array_shaping.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses array shaping methods for Modin's NumPy API.\"\"\"\n\nimport numpy\n\nfrom modin.error_message import ErrorMessage\n\nfrom .arr import array\nfrom .utils import try_convert_from_interoperable_type\n\n\ndef ravel(a, order=\"C\"):\n    a = try_convert_from_interoperable_type(a)\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"ravel\", type(a))\n        return numpy.ravel(a, order=order)\n    if order != \"C\":\n        ErrorMessage.single_warning(\n            \"Array order besides 'C' is not currently supported in Modin. Defaulting to 'C' order.\"\n        )\n    return a.flatten(order)\n\n\ndef shape(a):\n    a = try_convert_from_interoperable_type(a)\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"shape\", type(a))\n        return numpy.shape(a)\n    return a.shape\n\n\ndef transpose(a, axes=None):\n    a = try_convert_from_interoperable_type(a)\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"transpose\", type(a))\n        return numpy.transpose(a, axes=axes)\n    if axes is not None:\n        raise NotImplementedError(\n            \"Modin does not support arrays higher than 2-dimensions. Please use `transpose` with `axis=None` on a 2-dimensional or lower object.\"\n        )\n    return a.transpose()\n\n\ndef split(arr, indices, axis=0):\n    arr = try_convert_from_interoperable_type(arr)\n    if not isinstance(arr, array):\n        ErrorMessage.bad_type_for_numpy_op(\"split\", type(arr))\n        return numpy.split(arr, indices, axis=axis)\n    return arr.split(indices, axis)\n\n\ndef hstack(tup, dtype=None, casting=\"same_kind\"):\n    a = try_convert_from_interoperable_type(tup[0])\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"hstack\", type(a))\n        return numpy.hstack(tup, dtype=dtype, casting=casting)\n    return a.hstack(tup[1:], dtype, casting)\n\n\ndef append(arr, values, axis=None):\n    arr = try_convert_from_interoperable_type(arr)\n    if not isinstance(arr, array):\n        ErrorMessage.bad_type_for_numpy_op(\"append\", type(arr))\n        return numpy.append(arr, values, axis=axis)\n    return arr.append(values, axis)\n"
  },
  {
    "path": "modin/numpy/constants.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nfrom numpy import e, euler_gamma, inf, nan, newaxis, pi\nfrom packaging import version\n\nif version.parse(numpy.__version__) < version.parse(\"2.0.0b1\"):\n    from numpy import NAN, NINF, NZERO, PINF, PZERO, Inf, Infinity, NaN, infty\n\n__all__ = [\n    \"e\",\n    \"euler_gamma\",\n    \"inf\",\n    \"nan\",\n    \"newaxis\",\n    \"pi\",\n]\n\nif version.parse(numpy.__version__) < version.parse(\"2.0.0b1\"):\n    __all__ += [\n        \"Inf\",\n        \"Infinity\",\n        \"NAN\",\n        \"NINF\",\n        \"NZERO\",\n        \"NaN\",\n        \"PINF\",\n        \"PZERO\",\n        \"infty\",\n    ]\n"
  },
  {
    "path": "modin/numpy/indexing.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n# noqa: MD02\n\"\"\"\nDetails about how Indexing Helper Class works.\n\n_LocationIndexerBase provide methods framework for __getitem__\n  and __setitem__ that work with Modin NumPy Array's internal index. Base\n  class's __{get,set}item__ takes in partitions & idx_in_partition data\n  and perform lookup/item write.\n\n_iLocIndexer is responsible for indexer specific logic and\n  lookup computation. Loc will take care of enlarge DataFrame. Both indexer\n  will take care of translating pandas's lookup to Modin DataFrame's internal\n  lookup.\n\nAn illustration is available at\nhttps://github.com/ray-project/ray/pull/1955#issuecomment-386781826\n\"\"\"\n\nimport itertools\n\nimport numpy as np\nimport pandas\nfrom pandas.api.types import is_bool, is_list_like\nfrom pandas.core.dtypes.common import is_bool_dtype, is_integer, is_integer_dtype\nfrom pandas.core.indexing import IndexingError\n\nfrom modin.error_message import ErrorMessage\nfrom modin.pandas.indexing import compute_sliced_len, is_range_like, is_slice, is_tuple\nfrom modin.pandas.utils import is_scalar\n\nfrom .arr import array\n\n\ndef broadcast_item(\n    obj,\n    row_lookup,\n    col_lookup,\n    item,\n    need_columns_reindex=True,\n):\n    \"\"\"\n    Use NumPy to broadcast or reshape item with reindexing.\n\n    Parameters\n    ----------\n    obj : DataFrame or Series\n        The object containing the necessary information about the axes.\n    row_lookup : slice or scalar\n        The global row index to locate inside of `item`.\n    col_lookup : range, array, list, slice or scalar\n        The global col index to locate inside of `item`.\n    item : DataFrame, Series, or query_compiler\n        Value that should be broadcast to a new shape of `to_shape`.\n    need_columns_reindex : bool, default: True\n        In the case of assigning columns to a dataframe (broadcasting is\n        part of the flow), reindexing is not needed.\n\n    Returns\n    -------\n    np.ndarray\n        `item` after it was broadcasted to `to_shape`.\n\n    Raises\n    ------\n    ValueError\n        1) If `row_lookup` or `col_lookup` contains values missing in\n        DataFrame/Series index or columns correspondingly.\n        2) If `item` cannot be broadcast from its own shape to `to_shape`.\n\n    Notes\n    -----\n    NumPy is memory efficient, there shouldn't be performance issue.\n    \"\"\"\n    new_row_len = (\n        len(obj._query_compiler.index[row_lookup])\n        if isinstance(row_lookup, slice)\n        else len(row_lookup)\n    )\n    new_col_len = (\n        len(obj._query_compiler.columns[col_lookup])\n        if isinstance(col_lookup, slice)\n        else len(col_lookup)\n    )\n    to_shape = new_row_len, new_col_len\n\n    if isinstance(item, array):\n        # convert indices in lookups to names, as pandas reindex expects them to be so\n        axes_to_reindex = {}\n        index_values = obj._query_compiler.index[row_lookup]\n        if not index_values.equals(item._query_compiler.index):\n            axes_to_reindex[\"index\"] = index_values\n        if need_columns_reindex and isinstance(item, array) and item._ndim == 2:\n            column_values = obj._query_compiler.columns[col_lookup]\n            if not column_values.equals(item._query_compiler.columns):\n                axes_to_reindex[\"columns\"] = column_values\n        # New value for columns/index make that reindex add NaN values\n        if axes_to_reindex:\n            row_axes = axes_to_reindex.get(\"index\", None)\n            if row_axes is not None:\n                item._query_compiler = item._query_compiler.reindex(\n                    axis=0, labels=row_axes, copy=None\n                )\n            col_axes = axes_to_reindex.get(\"columns\", None)\n            if col_axes is not None:\n                item._query_compiler = item._query_compiler.reindex(\n                    axis=1, labels=col_axes, copy=None\n                )\n    try:\n        item = np.array(item) if not isinstance(item, array) else item._to_numpy()\n        if np.prod(to_shape) == np.prod(item.shape):\n            return item.reshape(to_shape)\n        else:\n            return np.broadcast_to(item, to_shape)\n    except ValueError:\n        from_shape = np.array(item).shape\n        raise ValueError(\n            f\"could not broadcast input array from shape {from_shape} into shape \"\n            + f\"{to_shape}\"\n        )\n\n\ndef is_boolean_array(x):\n    \"\"\"\n    Check that argument is an array of bool.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is an array of bool, False otherwise.\n    \"\"\"\n    if isinstance(x, (np.ndarray, array, pandas.Series, pandas.Index)):\n        return is_bool_dtype(x.dtype)\n    return is_list_like(x) and all(map(is_bool, x))\n\n\ndef is_integer_array(x):\n    \"\"\"\n    Check that argument is an array of integers.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is an array of integers, False otherwise.\n    \"\"\"\n    if isinstance(x, (np.ndarray, array, pandas.Series, pandas.Index)):\n        return is_integer_dtype(x.dtype)\n    return is_list_like(x) and all(map(is_integer, x))\n\n\ndef is_integer_slice(x):\n    \"\"\"\n    Check that argument is an array of int.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is an array of int, False otherwise.\n    \"\"\"\n    if not is_slice(x):\n        return False\n    for pos in [x.start, x.stop, x.step]:\n        if not ((pos is None) or is_integer(pos)):\n            return False  # one position is neither None nor int\n    return True\n\n\ndef boolean_mask_to_numeric(indexer):\n    \"\"\"\n    Convert boolean mask to numeric indices.\n\n    Parameters\n    ----------\n    indexer : list-like of booleans\n\n    Returns\n    -------\n    np.ndarray of ints\n        Numerical positions of ``True`` elements in the passed `indexer`.\n    \"\"\"\n    if isinstance(indexer, (np.ndarray, array, pandas.Series)):\n        return np.where(indexer)[0]\n    else:\n        # It's faster to build the resulting numpy array from the reduced amount of data via\n        # `compress` iterator than convert non-numpy-like `indexer` to numpy and apply `np.where`.\n        return np.fromiter(\n            # `itertools.compress` masks `data` with the `selectors` mask,\n            # works about ~10% faster than a pure list comprehension\n            itertools.compress(data=range(len(indexer)), selectors=indexer),\n            dtype=np.int64,\n        )\n\n\n_ILOC_INT_ONLY_ERROR = \"\"\"\nLocation based indexing can only have [integer, integer slice (START point is\nINCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types.\n\"\"\"\n\n\ndef _compute_ndim(row_loc, col_loc):\n    \"\"\"\n    Compute the number of dimensions of result from locators.\n\n    Parameters\n    ----------\n    row_loc : list or scalar\n        Row locator.\n    col_loc : list or scalar\n        Column locator.\n\n    Returns\n    -------\n    {0, 1, 2}\n        Number of dimensions in located dataset.\n    \"\"\"\n    row_scalar = is_scalar(row_loc) or is_tuple(row_loc)\n    col_scalar = is_scalar(col_loc) or is_tuple(col_loc)\n\n    if row_scalar and col_scalar:\n        ndim = 0\n    elif row_scalar ^ col_scalar:\n        ndim = 1\n    else:\n        ndim = 2\n\n    return ndim\n\n\nclass ArrayIndexer(object):\n    \"\"\"\n    An indexer for modin_arr.__{get|set}item__ functionality.\n\n    Parameters\n    ----------\n    array : modin.numpy.array\n        Array to operate on.\n    \"\"\"\n\n    def __init__(self, array):\n        self.arr = array\n\n    def _get_numpy_object_from_qc_view(\n        self,\n        qc_view,\n        row_scalar: bool,\n        col_scalar: bool,\n        ndim: int,\n    ):\n        \"\"\"\n        Convert the query compiler view to the appropriate NumPy object.\n\n        Parameters\n        ----------\n        qc_view : BaseQueryCompiler\n            Query compiler to convert.\n        row_scalar : bool\n            Whether indexer for rows is scalar.\n        col_scalar : bool\n            Whether indexer for columns is scalar.\n        ndim : {0, 1, 2}\n            Number of dimensions in dataset to be retrieved.\n\n        Returns\n        -------\n        modin.numpy.array\n            The array object with the data from the query compiler view.\n\n        Notes\n        -----\n        Usage of `slice(None)` as a lookup is a hack to pass information about\n        full-axis grab without computing actual indices that triggers lazy computations.\n        Ideally, this API should get rid of using slices as indexers and either use a\n        common ``Indexer`` object or range and ``np.ndarray`` only.\n        \"\"\"\n        if ndim == 2:\n            return array(_query_compiler=qc_view, _ndim=self.arr._ndim)\n        if self.arr._ndim == 1 and not row_scalar:\n            return array(_query_compiler=qc_view, _ndim=1)\n\n        if self.arr._ndim == 1:\n            _ndim = 0\n        elif ndim == 0:\n            _ndim = 0\n        else:\n            # We are in the case where ndim == 1\n            # The axis we squeeze on depends on whether we are looking for an exact\n            # value or a subset of rows and columns. Knowing if we have a full MultiIndex\n            # lookup or scalar lookup can help us figure out whether we need to squeeze\n            # on the row or column index.\n            if row_scalar and col_scalar:\n                _ndim = 0\n            elif not any([row_scalar, col_scalar]):\n                _ndim = 2\n            else:\n                _ndim = 1\n                if row_scalar:\n                    qc_view = qc_view.transpose()\n\n        if _ndim == 0:\n            return qc_view.to_numpy()[0, 0]\n\n        res_arr = array(_query_compiler=qc_view, _ndim=_ndim)\n        return res_arr\n\n    def _parse_row_and_column_locators(self, tup):\n        \"\"\"\n        Unpack the user input for getitem and setitem and compute ndim.\n\n        loc[a] -> ([a], :), 1D\n        loc[[a,b]] -> ([a,b], :),\n        loc[a,b] -> ([a], [b]), 0D\n\n        Parameters\n        ----------\n        tup : tuple\n            User input to unpack.\n\n        Returns\n        -------\n        row_loc : scalar or list\n            Row locator(s) as a scalar or List.\n        col_list : scalar or list\n            Column locator(s) as a scalar or List.\n        ndim : {0, 1, 2}\n            Number of dimensions of located dataset.\n        \"\"\"\n        row_loc, col_loc = slice(None), slice(None)\n\n        if is_tuple(tup):\n            row_loc = tup[0]\n            if len(tup) == 2:\n                col_loc = tup[1]\n            if len(tup) > 2:\n                raise IndexingError(\"Too many indexers\")\n        else:\n            row_loc = tup\n\n        row_loc = row_loc(self.arr) if callable(row_loc) else row_loc\n        col_loc = col_loc(self.arr) if callable(col_loc) else col_loc\n        row_loc = row_loc._to_numpy() if isinstance(row_loc, array) else row_loc\n        col_loc = col_loc._to_numpy() if isinstance(col_loc, array) else col_loc\n        return row_loc, col_loc, _compute_ndim(row_loc, col_loc)\n\n    def __getitem__(self, key):\n        \"\"\"\n        Retrieve dataset according to `key`.\n\n        Parameters\n        ----------\n        key : callable or tuple\n            The global row numbers to retrieve data from.\n\n        Returns\n        -------\n        DataFrame or Series\n            Located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.iloc\n        \"\"\"\n        row_loc, col_loc, ndim = self._parse_row_and_column_locators(key)\n        row_scalar = is_scalar(row_loc)\n        col_scalar = is_scalar(col_loc)\n        self._check_dtypes(row_loc)\n        self._check_dtypes(col_loc)\n\n        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)\n        if isinstance(row_lookup, slice):\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=row_lookup != slice(None),\n                extra_log=f\"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}\",\n            )\n            row_lookup = None\n        if isinstance(col_lookup, slice):\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=col_lookup != slice(None),\n                extra_log=f\"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}\",\n            )\n            col_lookup = None\n        qc_view = self.arr._query_compiler.take_2d_positional(row_lookup, col_lookup)\n        result = self._get_numpy_object_from_qc_view(\n            qc_view,\n            row_scalar=row_scalar,\n            col_scalar=col_scalar,\n            ndim=ndim,\n        )\n        return result\n\n    def _determine_setitem_axis(self, row_lookup, col_lookup, row_scalar, col_scalar):\n        \"\"\"\n        Determine an axis along which we should do an assignment.\n\n        Parameters\n        ----------\n        row_lookup : slice or list\n            Indexer for rows.\n        col_lookup : slice or list\n            Indexer for columns.\n        row_scalar : bool\n            Whether indexer for rows is scalar or not.\n        col_scalar : bool\n            Whether indexer for columns is scalar or not.\n\n        Returns\n        -------\n        int or None\n            None if this will be a both axis assignment, number of axis to assign in other cases.\n\n        Notes\n        -----\n        axis = 0: column assignment df[col] = item\n        axis = 1: row assignment df.loc[row] = item\n        axis = None: assignment along both axes\n        \"\"\"\n        if self.arr.shape == (1, 1):\n            return None if not (row_scalar ^ col_scalar) else 1 if row_scalar else 0\n\n        def get_axis(axis):\n            return (\n                self.arr._query_compiler.index\n                if axis == 0\n                else self.arr._query_compiler.columns\n            )\n\n        row_lookup_len, col_lookup_len = [\n            (\n                len(lookup)\n                if not isinstance(lookup, slice)\n                else compute_sliced_len(lookup, len(get_axis(i)))\n            )\n            for i, lookup in enumerate([row_lookup, col_lookup])\n        ]\n\n        if col_lookup_len == 1 and row_lookup_len == 1:\n            axis = None\n        elif (\n            row_lookup_len == len(self.arr._query_compiler.index)\n            and col_lookup_len == 1\n            and self.arr._ndim == 2\n        ):\n            axis = 0\n        elif (\n            col_lookup_len == len(self.arr._query_compiler.columns)\n            and row_lookup_len == 1\n        ):\n            axis = 1\n        else:\n            axis = None\n        return axis\n\n    def _setitem_positional(self, row_lookup, col_lookup, item, axis=None):\n        \"\"\"\n        Assign `item` value to located dataset.\n\n        Parameters\n        ----------\n        row_lookup : slice or scalar\n            The global row index to write item to.\n        col_lookup : slice or scalar\n            The global col index to write item to.\n        item : DataFrame, Series or scalar\n            The new item needs to be set. It can be any shape that's\n            broadcast-able to the product of the lookup tables.\n        axis : {None, 0, 1}, default: None\n            If not None, it means that whole axis is used to assign a value.\n            0 means assign to whole column, 1 means assign to whole row.\n            If None, it means that partial assignment is done on both axes.\n        \"\"\"\n        # Convert slices to indices for the purposes of application.\n        # TODO (devin-petersohn): Apply to slice without conversion to list\n        if isinstance(row_lookup, slice):\n            row_lookup = range(len(self.arr._query_compiler.index))[row_lookup]\n        if isinstance(col_lookup, slice):\n            col_lookup = range(len(self.arr._query_compiler.columns))[col_lookup]\n\n        new_qc = self.arr._query_compiler.write_items(row_lookup, col_lookup, item)\n        self.arr._update_inplace(new_qc)\n\n    def __setitem__(self, key, item):\n        \"\"\"\n        Assign `item` value to dataset located by `key`.\n\n        Parameters\n        ----------\n        key : callable or tuple\n            The global row numbers to assign data to.\n        item : modin.pandas.DataFrame, modin.pandas.Series or scalar\n            Value that should be assigned to located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.iloc\n        \"\"\"\n        row_loc, col_loc, _ = self._parse_row_and_column_locators(key)\n        row_scalar = is_scalar(row_loc)\n        col_scalar = is_scalar(col_loc)\n        self._check_dtypes(row_loc)\n        self._check_dtypes(col_loc)\n\n        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)\n        self._setitem_positional(\n            row_lookup,\n            col_lookup,\n            item,\n            axis=self._determine_setitem_axis(\n                row_lookup, col_lookup, row_scalar, col_scalar\n            ),\n        )\n\n    def _compute_lookup(self, row_loc, col_loc):\n        \"\"\"\n        Compute index and column labels from index and column integer locators.\n\n        Parameters\n        ----------\n        row_loc : slice, list, array or tuple\n            Row locator.\n        col_loc : slice, list, array or tuple\n            Columns locator.\n\n        Returns\n        -------\n        row_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise\n            List of index labels.\n        col_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise\n            List of columns labels.\n\n        Notes\n        -----\n        Usage of `slice(None)` as a resulting lookup is a hack to pass information about\n        full-axis grab without computing actual indices that triggers lazy computations.\n        Ideally, this API should get rid of using slices as indexers and either use a\n        common ``Indexer`` object or range and ``np.ndarray`` only.\n        \"\"\"\n        lookups = []\n        for axis, axis_loc in enumerate((row_loc, col_loc)):\n            if is_scalar(axis_loc):\n                axis_loc = np.array([axis_loc])\n            if isinstance(axis_loc, slice):\n                axis_lookup = (\n                    axis_loc\n                    if axis_loc == slice(None)\n                    else pandas.RangeIndex(\n                        *axis_loc.indices(len(self.arr._query_compiler.get_axis(axis)))\n                    )\n                )\n            elif is_range_like(axis_loc):\n                axis_lookup = pandas.RangeIndex(\n                    axis_loc.start, axis_loc.stop, axis_loc.step\n                )\n            elif is_boolean_array(axis_loc):\n                axis_lookup = boolean_mask_to_numeric(axis_loc)\n            else:\n                if isinstance(axis_loc, pandas.Index):\n                    axis_loc = axis_loc.values\n                elif is_list_like(axis_loc) and not isinstance(axis_loc, np.ndarray):\n                    # `Index.__getitem__` works much faster with numpy arrays than with python lists,\n                    # so although we lose some time here on converting to numpy, `Index.__getitem__`\n                    # speedup covers the loss that we gain here.\n                    axis_loc = np.array(axis_loc, dtype=np.int64)\n                # Relatively fast check allows us to not trigger `self.qc.get_axis()` computation\n                # if there're no negative indices and so they don't not depend on the axis length.\n                if isinstance(axis_loc, np.ndarray) and not (axis_loc < 0).any():\n                    axis_lookup = axis_loc\n                else:\n                    axis_lookup = pandas.RangeIndex(\n                        len(self.arr._query_compiler.get_axis(axis))\n                    )[axis_loc]\n\n            if isinstance(axis_lookup, pandas.Index) and not is_range_like(axis_lookup):\n                axis_lookup = axis_lookup.values\n            lookups.append(axis_lookup)\n        return lookups\n\n    def _check_dtypes(self, locator):\n        \"\"\"\n        Check that `locator` is an integer scalar, integer slice, integer list or array of booleans.\n\n        Parameters\n        ----------\n        locator : scalar, list, slice or array\n            Object to check.\n\n        Raises\n        ------\n        ValueError\n            If check fails.\n        \"\"\"\n        is_int = is_integer(locator)\n        is_int_slice = is_integer_slice(locator)\n        is_int_arr = is_integer_array(locator)\n        is_bool_arr = is_boolean_array(locator)\n\n        if not any([is_int, is_int_slice, is_int_arr, is_bool_arr]):\n            raise ValueError(_ILOC_INT_ONLY_ERROR)\n"
  },
  {
    "path": "modin/numpy/linalg.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\n\nfrom modin.error_message import ErrorMessage\n\nfrom .arr import array\nfrom .utils import try_convert_from_interoperable_type\n\n\ndef norm(x, ord=None, axis=None, keepdims=False):\n    x = try_convert_from_interoperable_type(x)\n    if not isinstance(x, array):\n        ErrorMessage.bad_type_for_numpy_op(\"linalg.norm\", type(x))\n        return numpy.linalg.norm(x, ord=ord, axis=axis, keepdims=keepdims)\n    return x._norm(ord=ord, axis=axis, keepdims=keepdims)\n"
  },
  {
    "path": "modin/numpy/logic.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\n\nfrom modin.error_message import ErrorMessage\nfrom modin.utils import _inherit_docstrings\n\nfrom .arr import array\nfrom .utils import try_convert_from_interoperable_type\n\n\ndef _dispatch_logic(operator_name):\n    @_inherit_docstrings(getattr(numpy, operator_name))\n    def call(x, *args, **kwargs):\n        x = try_convert_from_interoperable_type(x)\n        if not isinstance(x, array):\n            ErrorMessage.bad_type_for_numpy_op(operator_name, type(x))\n            return getattr(numpy, operator_name)(x, *args, **kwargs)\n        return getattr(x, f\"_{operator_name}\")(*args, **kwargs)\n\n    return call\n\n\nall = _dispatch_logic(\"all\")\nany = _dispatch_logic(\"any\")\nisfinite = _dispatch_logic(\"isfinite\")\nisinf = _dispatch_logic(\"isinf\")\nisnan = _dispatch_logic(\"isnan\")\nisnat = _dispatch_logic(\"isnat\")\nisneginf = _dispatch_logic(\"isneginf\")\nisposinf = _dispatch_logic(\"isposinf\")\niscomplex = _dispatch_logic(\"iscomplex\")\nisreal = _dispatch_logic(\"isreal\")\n\n\ndef isscalar(e):\n    if isinstance(e, array):\n        return False\n    return numpy.isscalar(e)\n\n\nlogical_not = _dispatch_logic(\"logical_not\")\nlogical_and = _dispatch_logic(\"logical_and\")\nlogical_or = _dispatch_logic(\"logical_or\")\nlogical_xor = _dispatch_logic(\"logical_xor\")\ngreater = _dispatch_logic(\"greater\")\ngreater_equal = _dispatch_logic(\"greater_equal\")\nless = _dispatch_logic(\"less\")\nless_equal = _dispatch_logic(\"less_equal\")\nequal = _dispatch_logic(\"equal\")\nnot_equal = _dispatch_logic(\"not_equal\")\n"
  },
  {
    "path": "modin/numpy/math.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\n\nfrom modin.error_message import ErrorMessage\nfrom modin.utils import _inherit_docstrings\n\nfrom .arr import array\nfrom .utils import try_convert_from_interoperable_type\n\n\ndef _dispatch_math(operator_name, arr_method_name=None):\n    # `operator_name` is the name of the method on the numpy API\n    # `arr_method_name` is the name of the method on the modin.numpy.array object,\n    # which is assumed to be `operator_name` by default\n    @_inherit_docstrings(getattr(numpy, operator_name))\n    def call(x, *args, **kwargs):\n        x = try_convert_from_interoperable_type(x)\n        if not isinstance(x, array):\n            ErrorMessage.bad_type_for_numpy_op(operator_name, type(x))\n            return getattr(numpy, operator_name)(x, *args, **kwargs)\n\n        return getattr(x, arr_method_name or operator_name)(*args, **kwargs)\n\n    return call\n\n\nabsolute = _dispatch_math(\"absolute\")\nabs = absolute\nadd = _dispatch_math(\"add\", \"__add__\")\ndivide = _dispatch_math(\"divide\")\ndot = _dispatch_math(\"dot\")\nfloat_power = _dispatch_math(\"float_power\")\nfloor_divide = _dispatch_math(\"floor_divide\")\npower = _dispatch_math(\"power\")\nprod = _dispatch_math(\"prod\")\nmultiply = _dispatch_math(\"multiply\")\nremainder = _dispatch_math(\"remainder\")\nmod = remainder\nsubtract = _dispatch_math(\"subtract\")\nsum = _dispatch_math(\"sum\")\ntrue_divide = _dispatch_math(\"true_divide\", \"divide\")\nmean = _dispatch_math(\"mean\")\n\n\ndef var(x1, axis=None, dtype=None, out=None, keepdims=None, *, where=True):\n    x1 = try_convert_from_interoperable_type(x1)\n    if not isinstance(x1, array):\n        ErrorMessage.bad_type_for_numpy_op(\"var\", type(x1))\n        return numpy.var(\n            x1, axis=axis, out=out, keepdims=keepdims, where=where, dtype=dtype\n        )\n    return x1.var(axis=axis, out=out, keepdims=keepdims, where=where, dtype=dtype)\n\n\n# Maximum and minimum are ufunc's in NumPy, which means that our array's __array_ufunc__\n# implementation will automatically handle this. We still need the function though, so that\n# if the operands are modin.pandas objects, we can convert them to arrays, but after that\n# we can just use NumPy's maximum/minimum since that will route to our array's ufunc.\ndef maximum(\n    x1, x2, out=None, where=True, casting=\"same_kind\", order=\"K\", dtype=None, subok=True\n):\n    x1 = try_convert_from_interoperable_type(x1)\n    if not isinstance(x1, array):\n        ErrorMessage.bad_type_for_numpy_op(\"maximum\", type(x1))\n    return numpy.maximum(\n        x1,\n        x2,\n        out=out,\n        where=where,\n        casting=casting,\n        order=order,\n        dtype=dtype,\n        subok=subok,\n    )\n\n\ndef minimum(\n    x1, x2, out=None, where=True, casting=\"same_kind\", order=\"K\", dtype=None, subok=True\n):\n    x1 = try_convert_from_interoperable_type(x1)\n    if not isinstance(x1, array):\n        ErrorMessage.bad_type_for_numpy_op(\"minimum\", type(x1))\n    return numpy.minimum(\n        x1,\n        x2,\n        out=out,\n        where=where,\n        casting=casting,\n        order=order,\n        dtype=dtype,\n        subok=subok,\n    )\n\n\namax = _dispatch_math(\"amax\", \"max\")\namin = _dispatch_math(\"amin\", \"min\")\nmax = amax\nmin = amin\n\n\ndef sqrt(\n    x, out=None, *, where=True, casting=\"same_kind\", order=\"K\", dtype=None, subok=True\n):\n    x = try_convert_from_interoperable_type(x)\n    if not isinstance(x, array):\n        ErrorMessage.bad_type_for_numpy_op(\"sqrt\", type(x))\n        return numpy.sqrt(\n            x,\n            out=out,\n            where=where,\n            casting=casting,\n            order=order,\n            dtype=dtype,\n            subok=subok,\n        )\n    return x.sqrt(out, where, casting, order, dtype, subok)\n\n\ndef exp(\n    x, out=None, *, where=True, casting=\"same_kind\", order=\"K\", dtype=None, subok=True\n):\n    x = try_convert_from_interoperable_type(x)\n    if not isinstance(x, array):\n        ErrorMessage.bad_type_for_numpy_op(\"exp\", type(x))\n        return numpy.exp(\n            x,\n            out=out,\n            where=where,\n            casting=casting,\n            order=order,\n            dtype=dtype,\n            subok=subok,\n        )\n    return x.exp(out, where, casting, order, dtype, subok)\n\n\ndef argmax(a, axis=None, out=None, *, keepdims=None):\n    a = try_convert_from_interoperable_type(a)\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"argmax\", type(a))\n        return numpy.argmax(a, axis=axis, out=out, keepdims=keepdims)\n    return a.argmax(axis=axis, out=out, keepdims=keepdims)\n\n\ndef argmin(a, axis=None, out=None, *, keepdims=None):\n    a = try_convert_from_interoperable_type(a)\n    if not isinstance(a, array):\n        ErrorMessage.bad_type_for_numpy_op(\"argmin\", type(a))\n        return numpy.argmin(a, axis=axis, out=out, keepdims=keepdims)\n    return a.argmin(axis=axis, out=out, keepdims=keepdims)\n"
  },
  {
    "path": "modin/numpy/trigonometry.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\n\nfrom modin.error_message import ErrorMessage\n\nfrom .arr import array\nfrom .utils import try_convert_from_interoperable_type\n\n\ndef tanh(\n    x, out=None, where=True, casting=\"same_kind\", order=\"K\", dtype=None, subok=True\n):\n    x = try_convert_from_interoperable_type(x)\n    if not isinstance(x, array):\n        ErrorMessage.bad_type_for_numpy_op(\"tanh\", type(x))\n        return numpy.tanh(\n            x,\n            out=out,\n            where=where,\n            casting=casting,\n            order=order,\n            dtype=dtype,\n            subok=subok,\n        )\n    return x.tanh(\n        out=out, where=where, casting=casting, order=order, dtype=dtype, subok=subok\n    )\n"
  },
  {
    "path": "modin/numpy/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Collection of array utility functions for internal use.\"\"\"\n\nimport modin.numpy as np\nimport modin.pandas as pd\n\n_INTEROPERABLE_TYPES = (pd.DataFrame, pd.Series)\n\n\ndef try_convert_from_interoperable_type(obj, copy=False):\n    if isinstance(obj, _INTEROPERABLE_TYPES):\n        obj = np.array(obj, copy=copy)\n    return obj\n"
  },
  {
    "path": "modin/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport warnings\n\nimport pandas\nfrom packaging import version\n\n__min_pandas_version__ = \"2.2\"\n__max_pandas_version__ = \"2.4\"\n\npandas_version = version.parse(pandas.__version__)\nif pandas_version < version.parse(\n    __min_pandas_version__\n) or pandas_version >= version.parse(__max_pandas_version__):\n    warnings.warn(\n        f\"The pandas version installed ({pandas.__version__}) is outside the supported range in Modin\"\n        + f\" ({__min_pandas_version__} to {__max_pandas_version__}). This may cause undesired side effects!\"\n    )\n\n# to not pollute namespace\ndel version, pandas_version, __min_pandas_version__, __max_pandas_version__\n\n\nwith warnings.catch_warnings():\n    warnings.simplefilter(\"ignore\")\n    import inspect\n\n    from modin.core.storage_formats.pandas.query_compiler_caster import (\n        wrap_free_function_in_argument_caster,\n    )\n\n    # To allow the extensions system to override these methods, we must wrap all objects re-exported\n    # from pandas in a backend dispatcher.\n    _reexport_list = (\n        \"eval\",\n        \"factorize\",\n        \"test\",\n        \"date_range\",\n        \"period_range\",\n        \"Index\",\n        \"MultiIndex\",\n        \"CategoricalIndex\",\n        \"bdate_range\",\n        \"DatetimeIndex\",\n        \"Timedelta\",\n        \"Timestamp\",\n        \"set_eng_float_format\",\n        \"options\",\n        \"describe_option\",\n        \"set_option\",\n        \"get_option\",\n        \"reset_option\",\n        \"option_context\",\n        \"NaT\",\n        \"PeriodIndex\",\n        \"Categorical\",\n        \"Interval\",\n        \"UInt8Dtype\",\n        \"UInt16Dtype\",\n        \"UInt32Dtype\",\n        \"UInt64Dtype\",\n        \"SparseDtype\",\n        \"Int8Dtype\",\n        \"Int16Dtype\",\n        \"Int32Dtype\",\n        \"Int64Dtype\",\n        \"StringDtype\",\n        \"BooleanDtype\",\n        \"CategoricalDtype\",\n        \"DatetimeTZDtype\",\n        \"IntervalDtype\",\n        \"PeriodDtype\",\n        \"RangeIndex\",\n        \"TimedeltaIndex\",\n        \"IntervalIndex\",\n        \"IndexSlice\",\n        \"Grouper\",\n        \"array\",\n        \"Period\",\n        \"DateOffset\",\n        \"timedelta_range\",\n        \"infer_freq\",\n        \"interval_range\",\n        \"ExcelWriter\",\n        \"NamedAgg\",\n        \"NA\",\n        \"api\",\n        \"ArrowDtype\",\n        \"Flags\",\n        \"Float32Dtype\",\n        \"Float64Dtype\",\n        \"from_dummies\",\n        \"testing\",\n    )\n    for name in _reexport_list:\n        item = getattr(pandas, name)\n        if inspect.isfunction(item):\n            # Note that this is applied to only functions, not classes.\n            item = wrap_free_function_in_argument_caster(name)(item)\n        globals()[name] = item\n    del inspect, item, _reexport_list, name, wrap_free_function_in_argument_caster\n\nimport os\n\nfrom modin.config import Parameter\n\n_engine_initialized = {}\n\n\ndef _initialize_engine(engine_string: str):\n    from modin.config import (\n        CpuCount,\n        Engine,\n        IsExperimental,\n        StorageFormat,\n        ValueSource,\n    )\n\n    # Set this so that Pandas doesn't try to multithread by itself\n    os.environ[\"OMP_NUM_THREADS\"] = \"1\"\n\n    if engine_string == \"Ray\":\n        if not _engine_initialized.get(\"Ray\", False):\n            from modin.core.execution.ray.common import initialize_ray\n\n            initialize_ray()\n    elif engine_string == \"Dask\":\n        if not _engine_initialized.get(\"Dask\", False):\n            from modin.core.execution.dask.common import initialize_dask\n\n            initialize_dask()\n    elif engine_string == \"Unidist\":\n        if not _engine_initialized.get(\"Unidist\", False):\n            from modin.core.execution.unidist.common import initialize_unidist\n\n            initialize_unidist()\n    elif engine_string not in Engine.NOINIT_ENGINES:\n        raise ImportError(\"Unrecognized execution engine: {}.\".format(engine_string))\n\n    _engine_initialized[engine_string] = True\n\n\nfrom modin.pandas import arrays, errors\nfrom modin.pandas.api.extensions.extensions import __getattr___impl\nfrom modin.utils import show_versions\n\nfrom .. import __version__\nfrom .dataframe import DataFrame\nfrom .general import (\n    concat,\n    crosstab,\n    cut,\n    get_dummies,\n    isna,\n    isnull,\n    lreshape,\n    melt,\n    merge,\n    merge_asof,\n    merge_ordered,\n    notna,\n    notnull,\n    pivot,\n    pivot_table,\n    qcut,\n    to_datetime,\n    to_numeric,\n    to_timedelta,\n    unique,\n    value_counts,\n    wide_to_long,\n)\nfrom .io import (\n    ExcelFile,\n    HDFStore,\n    json_normalize,\n    read_clipboard,\n    read_csv,\n    read_excel,\n    read_feather,\n    read_fwf,\n    read_gbq,\n    read_hdf,\n    read_html,\n    read_json,\n    read_orc,\n    read_parquet,\n    read_pickle,\n    read_sas,\n    read_spss,\n    read_sql,\n    read_sql_query,\n    read_sql_table,\n    read_stata,\n    read_table,\n    read_xml,\n    to_pickle,\n)\nfrom .plotting import Plotting as plotting\nfrom .series import Series\n\n__getattr__ = __getattr___impl\n\n\n__all__ = [  # noqa: F405\n    \"DataFrame\",\n    \"Series\",\n    \"read_csv\",\n    \"read_parquet\",\n    \"read_json\",\n    \"read_html\",\n    \"read_clipboard\",\n    \"read_excel\",\n    \"read_hdf\",\n    \"read_feather\",\n    \"read_stata\",\n    \"read_sas\",\n    \"read_pickle\",\n    \"read_sql\",\n    \"read_gbq\",\n    \"read_table\",\n    \"read_spss\",\n    \"read_orc\",\n    \"json_normalize\",\n    \"concat\",\n    \"eval\",\n    \"cut\",\n    \"factorize\",\n    \"test\",\n    \"qcut\",\n    \"to_datetime\",\n    \"get_dummies\",\n    \"isna\",\n    \"isnull\",\n    \"merge\",\n    \"pivot_table\",\n    \"date_range\",\n    \"Index\",\n    \"MultiIndex\",\n    \"Series\",\n    \"bdate_range\",\n    \"period_range\",\n    \"DatetimeIndex\",\n    \"to_timedelta\",\n    \"set_eng_float_format\",\n    \"options\",\n    \"describe_option\",\n    \"set_option\",\n    \"get_option\",\n    \"reset_option\",\n    \"option_context\",\n    \"CategoricalIndex\",\n    \"Timedelta\",\n    \"Timestamp\",\n    \"NaT\",\n    \"PeriodIndex\",\n    \"Categorical\",\n    \"__version__\",\n    \"melt\",\n    \"crosstab\",\n    \"plotting\",\n    \"Interval\",\n    \"UInt8Dtype\",\n    \"UInt16Dtype\",\n    \"UInt32Dtype\",\n    \"UInt64Dtype\",\n    \"SparseDtype\",\n    \"Int8Dtype\",\n    \"Int16Dtype\",\n    \"Int32Dtype\",\n    \"Int64Dtype\",\n    \"CategoricalDtype\",\n    \"DatetimeTZDtype\",\n    \"IntervalDtype\",\n    \"PeriodDtype\",\n    \"BooleanDtype\",\n    \"StringDtype\",\n    \"NA\",\n    \"RangeIndex\",\n    \"TimedeltaIndex\",\n    \"IntervalIndex\",\n    \"IndexSlice\",\n    \"Grouper\",\n    \"array\",\n    \"Period\",\n    \"show_versions\",\n    \"DateOffset\",\n    \"timedelta_range\",\n    \"infer_freq\",\n    \"interval_range\",\n    \"ExcelWriter\",\n    \"read_fwf\",\n    \"read_sql_table\",\n    \"read_sql_query\",\n    \"ExcelFile\",\n    \"to_pickle\",\n    \"HDFStore\",\n    \"lreshape\",\n    \"wide_to_long\",\n    \"merge_asof\",\n    \"merge_ordered\",\n    \"notnull\",\n    \"notna\",\n    \"pivot\",\n    \"to_numeric\",\n    \"unique\",\n    \"value_counts\",\n    \"NamedAgg\",\n    \"api\",\n    \"read_xml\",\n    \"ArrowDtype\",\n    \"Flags\",\n    \"Float32Dtype\",\n    \"Float64Dtype\",\n    \"from_dummies\",\n    \"errors\",\n]\n\n# Remove these attributes from this module's namespace.\ndel pandas, Parameter, __getattr___impl\n"
  },
  {
    "path": "modin/pandas/accessor.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nImplement various accessor classes for DataFrame and Series API.\n\nSparseFrameAccessor implements API of pandas.DataFrame.sparse accessor.\n\nSparseAccessor implements API of pandas.Series.sparse accessor.\n\nCachedAccessor implements API of pandas.core.accessor.CachedAccessor\n\"\"\"\n\nfrom __future__ import annotations\n\nimport pickle\nfrom typing import TYPE_CHECKING, Union\n\nimport pandas\nfrom pandas._typing import CompressionOptions, StorageOptions\nfrom pandas.core.dtypes.dtypes import SparseDtype\n\nfrom modin import pandas as pd\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger\nfrom modin.pandas.io import to_dask, to_ray\nfrom modin.utils import _inherit_docstrings\n\nif TYPE_CHECKING:\n    from modin.pandas import DataFrame, Series\n\n\nclass BaseSparseAccessor(ClassLogger):\n    \"\"\"\n    Base class for various sparse DataFrame accessor classes.\n\n    Parameters\n    ----------\n    data : DataFrame or Series\n        Object to operate on.\n    \"\"\"\n\n    _parent: Union[DataFrame, Series]\n    _validation_msg = \"Can only use the '.sparse' accessor with Sparse data.\"\n\n    def __init__(self, data: Union[DataFrame, Series] = None):\n        self._parent = data\n        self._validate(data)\n\n    @classmethod\n    def _validate(cls, data: Union[DataFrame, Series]):\n        \"\"\"\n        Verify that `data` dtypes are compatible with `pandas.core.dtypes.dtypes.SparseDtype`.\n\n        Parameters\n        ----------\n        data : DataFrame or Series\n            Object to check.\n\n        Raises\n        ------\n        NotImplementedError\n            Function is implemented in child classes.\n        \"\"\"\n        raise NotImplementedError\n\n    def _default_to_pandas(self, op, *args, **kwargs):\n        \"\"\"\n        Convert dataset to pandas type and call a pandas sparse.`op` on it.\n\n        Parameters\n        ----------\n        op : str\n            Name of pandas function.\n        *args : list\n            Additional positional arguments to be passed in `op`.\n        **kwargs : dict\n            Additional keywords arguments to be passed in `op`.\n\n        Returns\n        -------\n        object\n            Result of operation.\n        \"\"\"\n        return self._parent._default_to_pandas(\n            lambda parent: op(parent.sparse, *args, **kwargs)\n        )\n\n\n@_inherit_docstrings(pandas.core.arrays.sparse.accessor.SparseFrameAccessor)\nclass SparseFrameAccessor(BaseSparseAccessor):\n    @classmethod\n    def _validate(cls, data: DataFrame):\n        \"\"\"\n        Verify that `data` dtypes are compatible with `pandas.core.dtypes.dtypes.SparseDtype`.\n\n        Parameters\n        ----------\n        data : DataFrame\n            Object to check.\n\n        Raises\n        ------\n        AttributeError\n            If check fails.\n        \"\"\"\n        dtypes = data.dtypes\n        if not all(isinstance(t, SparseDtype) for t in dtypes):\n            raise AttributeError(cls._validation_msg)\n\n    @property\n    def density(self):\n        return self._parent._default_to_pandas(pandas.DataFrame.sparse).density\n\n    @classmethod\n    def from_spmatrix(cls, data, index=None, columns=None):\n        ErrorMessage.default_to_pandas(\"`from_spmatrix`\")\n        return pd.DataFrame(\n            pandas.DataFrame.sparse.from_spmatrix(data, index=index, columns=columns)\n        )\n\n    def to_dense(self):\n        return self._default_to_pandas(pandas.DataFrame.sparse.to_dense)\n\n    def to_coo(self):\n        return self._default_to_pandas(pandas.DataFrame.sparse.to_coo)\n\n\n@_inherit_docstrings(pandas.core.arrays.sparse.accessor.SparseAccessor)\nclass SparseAccessor(BaseSparseAccessor):\n    @classmethod\n    def _validate(cls, data: Series):\n        \"\"\"\n        Verify that `data` dtype is compatible with `pandas.core.dtypes.dtypes.SparseDtype`.\n\n        Parameters\n        ----------\n        data : Series\n            Object to check.\n\n        Raises\n        ------\n        AttributeError\n            If check fails.\n        \"\"\"\n        if not isinstance(data.dtype, SparseDtype):\n            raise AttributeError(cls._validation_msg)\n\n    @property\n    def density(self):\n        return self._parent._default_to_pandas(pandas.Series.sparse).density\n\n    @property\n    def fill_value(self):\n        return self._parent._default_to_pandas(pandas.Series.sparse).fill_value\n\n    @property\n    def npoints(self):\n        return self._parent._default_to_pandas(pandas.Series.sparse).npoints\n\n    @property\n    def sp_values(self):\n        return self._parent._default_to_pandas(pandas.Series.sparse).sp_values\n\n    @classmethod\n    def from_coo(cls, A, dense_index=False):\n        return cls._default_to_pandas(\n            pandas.Series.sparse.from_coo, A, dense_index=dense_index\n        )\n\n    def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):\n        return self._default_to_pandas(\n            pandas.Series.sparse.to_coo,\n            row_levels=row_levels,\n            column_levels=column_levels,\n            sort_labels=sort_labels,\n        )\n\n    def to_dense(self):\n        return self._default_to_pandas(pandas.Series.sparse.to_dense)\n\n\n@_inherit_docstrings(pandas.core.accessor.CachedAccessor)\nclass CachedAccessor(ClassLogger):\n    def __init__(self, name: str, accessor) -> None:\n        self._name = name\n        self._accessor = accessor\n\n    def __get__(self, obj, cls):  # noqa: GL08\n        if obj is None:\n            return self._accessor\n        accessor_obj = self._accessor(obj)\n        object.__setattr__(obj, self._name, accessor_obj)\n        return accessor_obj\n\n\nclass ModinAPI:\n    \"\"\"\n    Namespace class for accessing additional Modin functions that are not available in pandas.\n\n    Parameters\n    ----------\n    data : DataFrame or Series\n        Object to operate on.\n    \"\"\"\n\n    _data: Union[DataFrame, Series]\n\n    def __init__(self, data: Union[DataFrame, Series]):\n        self._data = data\n\n    def to_pandas(self):\n        \"\"\"\n        Convert a Modin DataFrame/Series object to a pandas DataFrame/Series object.\n\n        Returns\n        -------\n        pandas.Series or pandas.DataFrame\n        \"\"\"\n        return self._data._to_pandas()\n\n    def to_ray(self):\n        \"\"\"\n        Convert a Modin DataFrame/Series to a Ray Dataset.\n\n        Returns\n        -------\n        ray.data.Dataset\n            Converted object with type depending on input.\n\n        Notes\n        -----\n        Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.\n        \"\"\"\n        return to_ray(self._data)\n\n    def to_dask(self):\n        \"\"\"\n        Convert a Modin DataFrame/Series to a Dask DataFrame/Series.\n\n        Returns\n        -------\n        dask.dataframe.DataFrame or dask.dataframe.Series\n            Converted object with type depending on input.\n\n        Notes\n        -----\n        Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.\n        \"\"\"\n        return to_dask(self._data)\n\n    def to_pickle_glob(\n        self,\n        filepath_or_buffer,\n        compression: CompressionOptions = \"infer\",\n        protocol: int = pickle.HIGHEST_PROTOCOL,\n        storage_options: StorageOptions = None,\n    ) -> None:\n        \"\"\"\n        Pickle (serialize) object to file.\n\n        This experimental feature provides parallel writing into multiple pickle files which are\n        defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.\n\n        Parameters\n        ----------\n        filepath_or_buffer : str\n            File path where the pickled object will be stored.\n        compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default: 'infer'\n            A string representing the compression to use in the output file. By\n            default, infers from the file extension in specified path.\n            Compression mode may be any of the following possible\n            values: {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}. If compression\n            mode is 'infer' and path_or_buf is path-like, then detect\n            compression mode from the following extensions:\n            '.gz', '.bz2', '.zip' or '.xz'. (otherwise no compression).\n            If dict given and mode is 'zip' or inferred as 'zip', other entries\n            passed as additional compression options.\n        protocol : int, default: pickle.HIGHEST_PROTOCOL\n            Int which indicates which protocol should be used by the pickler,\n            default HIGHEST_PROTOCOL (see `pickle docs <https://docs.python.org/3/library/pickle.html>`_\n            paragraph 12.1.2 for details). The possible  values are 0, 1, 2, 3, 4, 5. A negative value\n            for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL.\n        storage_options : dict, optional\n            Extra options that make sense for a particular storage connection, e.g.\n            host, port, username, password, etc., if using a URL that will be parsed by\n            fsspec, e.g., starting \"s3://\", \"gcs://\". An error will be raised if providing\n            this argument with a non-fsspec URL. See the fsspec and backend storage\n            implementation docs for the set of allowed keys and values.\n        \"\"\"\n        from modin.experimental.pandas.io import to_pickle_glob\n\n        to_pickle_glob(\n            self._data,\n            filepath_or_buffer=filepath_or_buffer,\n            compression=compression,\n            protocol=protocol,\n            storage_options=storage_options,\n        )\n\n    def to_parquet_glob(\n        self,\n        path,\n        engine=\"auto\",\n        compression=\"snappy\",\n        index=None,\n        partition_cols=None,\n        storage_options: StorageOptions = None,\n        **kwargs,\n    ) -> None:  # noqa: PR01\n        \"\"\"\n        Write a DataFrame to the binary parquet format.\n\n        This experimental feature provides parallel writing into multiple parquet files which are\n        defined by glob pattern, otherwise (without glob pattern) default pandas implementation is used.\n\n        Notes\n        -----\n        * Only string type supported for `path` argument.\n        * The rest of the arguments are the same as for `pandas.to_parquet`.\n        \"\"\"\n        from modin.experimental.pandas.io import to_parquet_glob\n\n        if path is None:\n            raise NotImplementedError(\n                \"`to_parquet_glob` doesn't support path=None, use `to_parquet` in that case.\"\n            )\n\n        to_parquet_glob(\n            self._data,\n            path=path,\n            engine=engine,\n            compression=compression,\n            index=index,\n            partition_cols=partition_cols,\n            storage_options=storage_options,\n            **kwargs,\n        )\n\n    def to_json_glob(\n        self,\n        path_or_buf=None,\n        orient=None,\n        date_format=None,\n        double_precision=10,\n        force_ascii=True,\n        date_unit=\"ms\",\n        default_handler=None,\n        lines=False,\n        compression=\"infer\",\n        index=None,\n        indent=None,\n        storage_options: StorageOptions = None,\n        mode=\"w\",\n    ) -> None:  # noqa: PR01\n        \"\"\"\n        Convert the object to a JSON string.\n\n        Notes\n        -----\n        * Only string type supported for `path_or_buf` argument.\n        * The rest of the arguments are the same as for `pandas.to_json`.\n        \"\"\"\n        from modin.experimental.pandas.io import to_json_glob\n\n        if path_or_buf is None:\n            raise NotImplementedError(\n                \"`to_json_glob` doesn't support path_or_buf=None, use `to_json` in that case.\"\n            )\n\n        to_json_glob(\n            self._data,\n            path_or_buf=path_or_buf,\n            orient=orient,\n            date_format=date_format,\n            double_precision=double_precision,\n            force_ascii=force_ascii,\n            date_unit=date_unit,\n            default_handler=default_handler,\n            lines=lines,\n            compression=compression,\n            index=index,\n            indent=indent,\n            storage_options=storage_options,\n            mode=mode,\n        )\n\n    def to_xml_glob(\n        self,\n        path_or_buffer=None,\n        index=True,\n        root_name=\"data\",\n        row_name=\"row\",\n        na_rep=None,\n        attr_cols=None,\n        elem_cols=None,\n        namespaces=None,\n        prefix=None,\n        encoding=\"utf-8\",\n        xml_declaration=True,\n        pretty_print=True,\n        parser=\"lxml\",\n        stylesheet=None,\n        compression=\"infer\",\n        storage_options=None,\n    ) -> None:  # noqa: PR01\n        \"\"\"\n        Render a DataFrame to an XML document.\n\n        Notes\n        -----\n        * Only string type supported for `path_or_buffer` argument.\n        * The rest of the arguments are the same as for `pandas.to_xml`.\n        \"\"\"\n        from modin.experimental.pandas.io import to_xml_glob\n\n        if path_or_buffer is None:\n            raise NotImplementedError(\n                \"`to_xml_glob` doesn't support path_or_buffer=None, use `to_xml` in that case.\"\n            )\n\n        to_xml_glob(\n            self._data,\n            path_or_buffer=path_or_buffer,\n            index=index,\n            root_name=root_name,\n            row_name=row_name,\n            na_rep=na_rep,\n            attr_cols=attr_cols,\n            elem_cols=elem_cols,\n            namespaces=namespaces,\n            prefix=prefix,\n            encoding=encoding,\n            xml_declaration=xml_declaration,\n            pretty_print=pretty_print,\n            parser=parser,\n            stylesheet=stylesheet,\n            compression=compression,\n            storage_options=storage_options,\n        )\n"
  },
  {
    "path": "modin/pandas/api/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\n# Re-export all other pandas.api submodules\nfrom pandas.api import indexers, interchange, types, typing\n\nfrom modin.pandas.api import extensions\n\n__all__ = [\"extensions\", \"interchange\", \"indexers\", \"types\", \"typing\"]\n"
  },
  {
    "path": "modin/pandas/api/extensions/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom .extensions import (\n    register_base_accessor,\n    register_dataframe_accessor,\n    register_dataframe_groupby_accessor,\n    register_pd_accessor,\n    register_series_accessor,\n    register_series_groupby_accessor,\n)\n\n__all__ = [\n    \"register_base_accessor\",\n    \"register_dataframe_accessor\",\n    \"register_series_accessor\",\n    \"register_pd_accessor\",\n    \"register_dataframe_groupby_accessor\",\n    \"register_series_groupby_accessor\",\n]\n"
  },
  {
    "path": "modin/pandas/api/extensions/extensions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport inspect\nfrom collections import defaultdict\nfrom functools import cached_property\nfrom types import MethodType, ModuleType\nfrom typing import Any, Dict, Optional, Union\n\nimport modin.pandas as pd\nfrom modin.config import Backend\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    _GENERAL_EXTENSIONS,\n    _NON_EXTENDABLE_ATTRIBUTES,\n    EXTENSION_DICT_TYPE,\n    wrap_function_in_argument_caster,\n)\n\n_attrs_to_delete_on_test = defaultdict(set)\n\n# Track a dict of module-level classes that are re-exported from pandas that may need to dynamically\n# change when overridden by the extensions system, such as pd.Index.\n# See register_pd_accessor for details.\n_reexport_classes: Dict[str, Any] = {}\n\n\ndef _set_attribute_on_obj(\n    name: str,\n    extensions: EXTENSION_DICT_TYPE,\n    backend: Optional[str],\n    obj: Union[type, ModuleType],\n    set_reexport: bool = False,\n):\n    \"\"\"\n    Create a new or override existing attribute on obj.\n\n    Parameters\n    ----------\n    name : str\n        The name of the attribute to assign to `obj`.\n    extensions : EXTENSION_DICT_TYPE\n        The dictionary mapping extension name to `new_attr` (assigned below).\n    backend : Optional[str]\n        The backend to which the accessor applies. If `None`, this accessor\n        will become the default for all backends.\n    obj : DataFrame, Series, or modin.pandas\n        The object we are assigning the new attribute to.\n    set_reexport : bool, default False\n        If True, register the original property in `_reexport_classes`.\n\n    Returns\n    -------\n    decorator\n        Returns the decorator function.\n    \"\"\"\n    if name in _NON_EXTENDABLE_ATTRIBUTES:\n        raise ValueError(f\"Cannot register an extension with the reserved name {name}.\")\n\n    def decorator(new_attr: Any):\n        \"\"\"\n        Decorate a function or class to be assigned to the given name.\n\n        Parameters\n        ----------\n        new_attr : Any\n            The new attribute to assign to name.\n\n        Returns\n        -------\n        new_attr\n            Unmodified new_attr is return from the decorator.\n        \"\"\"\n        # Module-level functions are resolved by `wrap_free_function_in_argument_caster`, which dynamically\n        # identifies the appropriate backend to use. We cannot apply this wrapper to classes in order\n        # to preserve the vailidity of `isinstance` checks, and instead must force __getattr__ to directly\n        # return the correct class.\n        # Because the module-level __getattr__ function is not called if the object is found in the namespace,\n        # any overrides from the extensions system must `delattr` the attribute to force any future lookups\n        # to hit this code path.\n        # We cannot do this by omitting those exports at module initialization time because the\n        # __getattr__ codepath performs a call to Backend.get() that assumes the presence of an engine;\n        # in an extensions system that may reference types like pd.Timestamp/pd.Index before registering\n        # itself as an engine, this will cause errors.\n        if set_reexport:\n            original_attr = getattr(pd, name)\n            _reexport_classes[name] = original_attr\n            delattr(pd, name)\n        # If the attribute is an instance of functools.cached_property, we must manually call __set_name__ on it.\n        # https://stackoverflow.com/a/62161136\n        if isinstance(new_attr, cached_property):\n            new_attr.__set_name__(obj, name)\n        extensions[None if backend is None else Backend.normalize(backend)][\n            name\n        ] = new_attr\n        if (\n            callable(new_attr)\n            and name not in dir(obj)\n            and not inspect.isclass(new_attr)\n        ):\n            # For callable extensions, we add a method to `obj`'s namespace that\n            # dispatches to the correct implementation.\n            # If the extension is a class like pd.Index, do not add a wrapper and let\n            # the getattr dispatcher choose the correct item.\n            setattr(\n                obj,\n                name,\n                wrap_function_in_argument_caster(\n                    klass=obj if isinstance(obj, type) else None,\n                    f=new_attr,\n                    wrapping_function_type=(\n                        MethodType if isinstance(obj, type) else None\n                    ),\n                    extensions=extensions,\n                    name=name,\n                ),\n            )\n            # \"Free\" functions are permanently kept in the wrapper, so no need to clear them in tests.\n            if obj is not pd:\n                _attrs_to_delete_on_test[obj].add(name)\n        return new_attr\n\n    return decorator\n\n\ndef register_dataframe_accessor(name: str, *, backend: Optional[str] = None):\n    \"\"\"\n    Register a dataframe attribute with the name provided.\n\n    This is a decorator that assigns a new attribute to DataFrame. It can be used\n    with the following syntax:\n\n    ```\n    @register_dataframe_accessor(\"new_method\")\n    def my_new_dataframe_method(*args, **kwargs):\n        # logic goes here\n        return\n    ```\n\n    The new attribute can then be accessed with the name provided:\n\n    ```\n    df.new_method(*my_args, **my_kwargs)\n    ```\n\n    Parameters\n    ----------\n    name : str\n        The name of the attribute to assign to DataFrame.\n\n    Returns\n    -------\n    decorator\n        Returns the decorator function.\n    backend : Optional[str]\n        The backend to which the accessor applies. If ``None``, this accessor\n        will become the default for all backends.\n    \"\"\"\n    return _set_attribute_on_obj(\n        name, pd.dataframe.DataFrame._extensions, backend, pd.dataframe.DataFrame\n    )\n\n\ndef register_series_accessor(name: str, *, backend: Optional[str] = None):\n    \"\"\"\n    Register a series attribute with the name provided.\n\n    This is a decorator that assigns a new attribute to Series. It can be used\n    with the following syntax:\n\n    ```\n    @register_series_accessor(\"new_method\")\n    def my_new_series_method(*args, **kwargs):\n        # logic goes here\n        return\n    ```\n\n    The new attribute can then be accessed with the name provided:\n\n    ```\n    s.new_method(*my_args, **my_kwargs)\n    ```\n\n    Parameters\n    ----------\n    name : str\n        The name of the attribute to assign to Series.\n    backend : Optional[str]\n        The backend to which the accessor applies. If ``None``, this accessor\n        will become the default for all backends.\n\n    Returns\n    -------\n    decorator\n        Returns the decorator function.\n    \"\"\"\n    return _set_attribute_on_obj(\n        name, pd.series.Series._extensions, backend=backend, obj=pd.series.Series\n    )\n\n\ndef register_base_accessor(name: str, *, backend: Optional[str] = None):\n    \"\"\"\n    Register a base attribute with the name provided.\n\n    This is a decorator that assigns a new attribute to BasePandasDataset. It can be used\n    with the following syntax:\n\n    ```\n    @register_base_accessor(\"new_method\")\n    def register_base_accessor(*args, **kwargs):\n        # logic goes here\n        return\n    ```\n\n    The new attribute can then be accessed with the name provided:\n\n    ```\n    s.new_method(*my_args, **my_kwargs)\n    ```\n\n    Parameters\n    ----------\n    name : str\n        The name of the attribute to assign to BasePandasDataset.\n    backend : Optional[str]\n        The backend to which the accessor applies. If ``None``, this accessor\n        will become the default for all backends.\n\n    Returns\n    -------\n    decorator\n        Returns the decorator function.\n    \"\"\"\n    from modin.pandas.base import BasePandasDataset\n\n    return _set_attribute_on_obj(\n        name,\n        BasePandasDataset._extensions,\n        backend=backend,\n        obj=BasePandasDataset,\n    )\n\n\ndef register_pd_accessor(name: str, *, backend: Optional[str] = None):\n    \"\"\"\n    Register a pd namespace attribute with the name provided.\n\n    This is a decorator that assigns a new attribute to modin.pandas. It can be used\n    with the following syntax:\n\n    ```\n    @register_pd_accessor(\"new_function\")\n    def my_new_pd_function(*args, **kwargs):\n        # logic goes here\n        return\n    ```\n\n    The new attribute can then be accessed with the name provided:\n\n    ```\n    import modin.pandas as pd\n\n    pd.new_method(*my_args, **my_kwargs)\n    ```\n\n\n    Parameters\n    ----------\n    name : str\n        The name of the attribute to assign to modin.pandas.\n    backend : Optional[str]\n        The backend to which the accessor applies. If ``None``, this accessor\n        will become the default for all backends.\n\n    Returns\n    -------\n    decorator\n        Returns the decorator function.\n    \"\"\"\n    set_reexport = name not in _GENERAL_EXTENSIONS[backend] and name in dir(pd)\n    return _set_attribute_on_obj(\n        name=name,\n        extensions=_GENERAL_EXTENSIONS,\n        backend=backend,\n        obj=pd,\n        set_reexport=set_reexport,\n    )\n\n\ndef __getattr___impl(name: str):\n    \"\"\"\n    Override __getattr__ on the modin.pandas module to enable extensions.\n\n    Note that python only falls back to this function if the attribute is not\n    found in this module's namespace.\n\n    Parameters\n    ----------\n    name : str\n        The name of the attribute being retrieved.\n\n    Returns\n    -------\n    Attribute\n        Returns the extension attribute, if it exists, otherwise returns the attribute\n        imported in this file.\n    \"\"\"\n    from modin.config import Backend\n\n    backend = Backend.get()\n    if name in _GENERAL_EXTENSIONS[backend]:\n        return _GENERAL_EXTENSIONS[backend][name]\n    elif name in _GENERAL_EXTENSIONS[None]:\n        return _GENERAL_EXTENSIONS[None][name]\n    elif name in _reexport_classes:\n        return _reexport_classes[name]\n    else:\n        raise AttributeError(f\"module 'modin.pandas' has no attribute '{name}'\")\n\n\ndef register_dataframe_groupby_accessor(name: str, *, backend: Optional[str] = None):\n    \"\"\"\n    Register a dataframe groupby attribute with the name provided.\n\n    This is a decorator that assigns a new attribute to DataFrameGroupBy. It can be used\n    with the following syntax:\n\n    ```\n    @register_dataframe_groupby_accessor(\"new_method\")\n    def my_new_dataframe_groupby_method(*args, **kwargs):\n        # logic goes here\n        return\n    ```\n    The new attribute can then be accessed with the name provided:\n\n    ```\n    df.groupby(\"col\").new_method(*my_args, **my_kwargs)\n    ```\n\n    Parameters\n    ----------\n    name : str\n        The name of the attribute to assign to DataFrameGroupBy.\n    backend : Optional[str]\n        The backend to which the accessor applies. If ``None``, this accessor\n        will become the default for all backends.\n\n    Returns\n    -------\n    decorator\n        Returns the decorator function.\n    \"\"\"\n    return _set_attribute_on_obj(\n        name,\n        pd.groupby.DataFrameGroupBy._extensions,\n        backend=backend,\n        obj=pd.groupby.DataFrameGroupBy,\n    )\n\n\ndef register_series_groupby_accessor(name: str, *, backend: Optional[str] = None):\n    \"\"\"\n    Register a series groupby attribute with the name provided.\n\n    This is a decorator that assigns a new attribute to SeriesGroupBy. It can be used\n    with the following syntax:\n\n    ```\n    @register_series_groupby_accessor(\"new_method\")\n    def my_new_series_groupby_method(*args, **kwargs):\n        # logic goes here\n        return\n    ```\n    The new attribute can then be accessed with the name provided:\n    ```\n    df.groupby(\"col0\")[\"col1\"].new_method(*my_args, **my_kwargs)\n    ```\n    Parameters\n    ----------\n    name : str\n        The name of the attribute to assign to SeriesGroupBy.\n    backend : Optional[str]\n        The backend to which the accessor applies. If ``None``, this accessor\n        will become the default for all backends.\n\n    Returns\n    -------\n    decorator\n        Returns the decorator function.\n    \"\"\"\n    return _set_attribute_on_obj(\n        name,\n        pd.groupby.SeriesGroupBy._extensions,\n        backend=backend,\n        obj=pd.groupby.SeriesGroupBy,\n    )\n"
  },
  {
    "path": "modin/pandas/arrays/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\n\"\"\"The module is needed to allow the following import `import modin.pandas.arrays`.\"\"\"\n\nfrom pandas.arrays import *  # noqa: F403, F401\nfrom pandas.arrays import __all__  # noqa: F401\n"
  },
  {
    "path": "modin/pandas/base.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement DataFrame/Series public API as pandas does.\"\"\"\n\nfrom __future__ import annotations\n\nimport abc\nimport pickle as pkl\nimport re\nimport sys\nimport warnings\nfrom functools import cached_property\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Callable,\n    Hashable,\n    Literal,\n    Optional,\n    Sequence,\n    Union,\n)\n\nimport numpy as np\nimport pandas\nimport pandas.core.generic\nimport pandas.core.resample\nimport pandas.core.window.rolling\nfrom pandas._libs import lib\nfrom pandas._libs.tslibs import to_offset\nfrom pandas._typing import (\n    Axis,\n    CompressionOptions,\n    DtypeBackend,\n    IndexKeyFunc,\n    IndexLabel,\n    Level,\n    RandomState,\n    Scalar,\n    StorageOptions,\n    T,\n    TimedeltaConvertibleTypes,\n    TimestampConvertibleTypes,\n    npt,\n)\nfrom pandas.compat import numpy as numpy_compat\nfrom pandas.core.common import count_not_none, pipe\nfrom pandas.core.dtypes.common import (\n    is_bool_dtype,\n    is_dict_like,\n    is_dtype_equal,\n    is_integer,\n    is_integer_dtype,\n    is_list_like,\n    is_numeric_dtype,\n    is_object_dtype,\n)\nfrom pandas.core.indexes.api import ensure_index\nfrom pandas.core.methods.describe import _refine_percentiles\nfrom pandas.util._decorators import doc\nfrom pandas.util._validators import (\n    validate_ascending,\n    validate_bool_kwarg,\n    validate_percentile,\n)\n\nfrom modin import pandas as pd\nfrom modin.config import Backend, ShowBackendSwitchProgress\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    EXTENSION_NO_LOOKUP,\n    QueryCompilerCaster,\n)\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger, disable_logging\nfrom modin.pandas.accessor import CachedAccessor, ModinAPI\nfrom modin.pandas.api.extensions.extensions import EXTENSION_DICT_TYPE\nfrom modin.pandas.utils import GET_BACKEND_DOC, SET_BACKEND_DOC, is_scalar\nfrom modin.utils import (\n    _inherit_docstrings,\n    expanduser_path_arg,\n    sentinel,\n    try_cast_to_pandas,\n)\n\nfrom .utils import _doc_binary_op, is_full_grab_slice\n\nif TYPE_CHECKING:\n    from typing_extensions import Self\n\n    from modin.core.storage_formats import BaseQueryCompiler\n\n    from .dataframe import DataFrame\n    from .indexing import _iLocIndexer, _LocIndexer\n    from .resample import Resampler\n    from .series import Series\n    from .window import Expanding, Rolling, Window\n\n\n# Do not lookup certain attributes in columns or index, as they're used for some\n# special purposes, like serving remote context\n_ATTRS_NO_LOOKUP = {\n    \"__name__\",\n    \"_cache\",\n    \"_ipython_canary_method_should_not_exist_\",\n    \"_ipython_display_\",\n    \"_repr_mimebundle_\",\n    # Also avoid looking up the attributes that we use to implement the\n    # extension system.\n} | EXTENSION_NO_LOOKUP\n\n\n_DEFAULT_BEHAVIOUR = {\n    \"__init__\",\n    \"__class__\",\n    \"_get_index\",\n    \"_set_index\",\n    \"_pandas_class\",\n    \"_get_axis_number\",\n    \"empty\",\n    \"index\",\n    \"columns\",\n    \"name\",\n    \"dtypes\",\n    \"dtype\",\n    \"groupby\",\n    \"_get_name\",\n    \"_set_name\",\n    \"_default_to_pandas\",\n    \"_query_compiler\",\n    \"_to_pandas\",\n    \"_repartition\",\n    \"_build_repr_df\",\n    \"_reduce_dimension\",\n    \"__repr__\",\n    \"__len__\",\n    \"__constructor__\",\n    \"_create_or_update_from_compiler\",\n    \"_update_inplace\",\n    # for persistance support;\n    # see DataFrame methods docstrings for more\n    \"_inflate_light\",\n    \"_inflate_full\",\n    \"__reduce__\",\n    \"__reduce_ex__\",\n    \"_init\",\n} | _ATTRS_NO_LOOKUP\n\n_doc_binary_op_kwargs = {\"returns\": \"BasePandasDataset\", \"left\": \"BasePandasDataset\"}\n\n\ndef _get_repr_axis_label_indexer(labels, num_for_repr):\n    \"\"\"\n    Get the indexer for the given axis labels to be used for the repr.\n\n    Parameters\n    ----------\n    labels : pandas.Index\n        The axis labels.\n    num_for_repr : int\n        The number of elements to display.\n\n    Returns\n    -------\n    slice or list\n        The indexer to use for the repr.\n    \"\"\"\n    if len(labels) <= num_for_repr:\n        return slice(None)\n    # At this point, the entire axis has len(labels) elements, and num_for_repr <\n    # len(labels). We want to select a pandas subframe containing elements such that:\n    #   - the repr of the pandas subframe will be the same as the repr of the entire\n    #     frame.\n    #   - the pandas repr will not be able to show all the elements and will put an\n    #      ellipsis in the middle\n    #\n    # We accomplish this by selecting some elements from the front and some from the\n    # back, with the front having at most 1 element more than the back. The total\n    # number of elements will be num_for_repr + 1.\n\n    if num_for_repr % 2 == 0:\n        # If num_for_repr is even, take an extra element from the front.\n        # The total number of elements we are selecting is (num_for_repr // 2) * 2 + 1\n        # = num_for_repr + 1\n        front_repr_num = num_for_repr // 2 + 1\n        back_repr_num = num_for_repr // 2\n    else:\n        # If num_for_repr is odd, take an extra element from both the front and the\n        # back. The total number of elements we are selecting is\n        # (num_for_repr // 2) * 2 + 1 + 1 = num_for_repr + 1\n        front_repr_num = num_for_repr // 2 + 1\n        back_repr_num = num_for_repr // 2 + 1\n    all_positions = range(len(labels))\n    return list(all_positions[:front_repr_num]) + (\n        [] if back_repr_num == 0 else list(all_positions[-back_repr_num:])\n    )\n\n\n@_inherit_docstrings(pandas.DataFrame, apilink=[\"pandas.DataFrame\", \"pandas.Series\"])\nclass BasePandasDataset(QueryCompilerCaster, ClassLogger):\n    \"\"\"\n    Implement most of the common code that exists in DataFrame/Series.\n\n    Since both objects share the same underlying representation, and the algorithms\n    are the same, we use this object to define the general behavior of those objects\n    and then use those objects to define the output type.\n    \"\"\"\n\n    # Pandas class that we pretend to be; usually it has the same name as our class\n    # but lives in \"pandas\" namespace.\n    _pandas_class = pandas.core.generic.NDFrame\n    _query_compiler: BaseQueryCompiler\n    _siblings: list[BasePandasDataset]\n\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n    _pinned: bool = False\n\n    @cached_property\n    def _is_dataframe(self) -> bool:\n        \"\"\"\n        Tell whether this is a dataframe.\n\n        Ideally, other methods of BasePandasDataset shouldn't care whether this\n        is a dataframe or a series, but sometimes we need to know. This method\n        is better than hasattr(self, \"columns\"), which for series will call\n        self.__getattr__(\"columns\"), which requires materializing the index.\n\n        Returns\n        -------\n        bool : Whether this is a dataframe.\n        \"\"\"\n        return issubclass(self._pandas_class, pandas.DataFrame)\n\n    @abc.abstractmethod\n    def _create_or_update_from_compiler(\n        self, new_query_compiler: BaseQueryCompiler, inplace: bool = False\n    ) -> Self | None:\n        \"\"\"\n        Return or update a ``DataFrame`` or ``Series`` with given `new_query_compiler`.\n\n        Parameters\n        ----------\n        new_query_compiler : BaseQueryCompiler\n            QueryCompiler to use to manage the data.\n        inplace : bool, default: False\n            Whether or not to perform update or creation inplace.\n\n        Returns\n        -------\n        DataFrame, Series or None\n            None if update was done, ``DataFrame`` or ``Series`` otherwise.\n        \"\"\"\n        pass\n\n    def _add_sibling(self, sibling: BasePandasDataset) -> None:\n        \"\"\"\n        Add a DataFrame or Series object to the list of siblings.\n\n        Siblings are objects that share the same query compiler. This function is called\n        when a shallow copy is made.\n\n        Parameters\n        ----------\n        sibling : BasePandasDataset\n            Dataset to add to siblings list.\n        \"\"\"\n        sibling._siblings = self._siblings + [self]\n        self._siblings += [sibling]\n        for sib in self._siblings:\n            sib._siblings += [sibling]\n\n    def _build_repr_df(\n        self, num_rows: int, num_cols: int\n    ) -> pandas.DataFrame | pandas.Series:\n        \"\"\"\n        Build pandas DataFrame for string representation.\n\n        Parameters\n        ----------\n        num_rows : int\n            Number of rows to show in string representation. If number of\n            rows in this dataset is greater than `num_rows` then half of\n            `num_rows` rows from the beginning and half of `num_rows` rows\n            from the end are shown.\n        num_cols : int\n            Number of columns to show in string representation. If number of\n            columns in this dataset is greater than `num_cols` then half of\n            `num_cols` columns from the beginning and half of `num_cols`\n            columns from the end are shown.\n\n        Returns\n        -------\n        pandas.DataFrame or pandas.Series\n            A pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns.\n        \"\"\"\n        # Fast track for empty dataframe.\n        if len(self) == 0 or (\n            self._is_dataframe and self._query_compiler.get_axis_len(1) == 0\n        ):\n            return pandas.DataFrame(\n                index=self.index,\n                columns=self.columns if self._is_dataframe else None,\n            )\n        row_indexer = _get_repr_axis_label_indexer(self.index, num_rows)\n        if self._is_dataframe:\n            indexer = row_indexer, _get_repr_axis_label_indexer(self.columns, num_cols)\n        else:\n            indexer = row_indexer\n        return self.iloc[indexer]._query_compiler.to_pandas()\n\n    def _update_inplace(self, new_query_compiler: BaseQueryCompiler) -> None:\n        \"\"\"\n        Update the current DataFrame inplace.\n\n        Parameters\n        ----------\n        new_query_compiler : BaseQueryCompiler\n            The new QueryCompiler to use to manage the data.\n        \"\"\"\n        old_query_compiler = self._query_compiler\n        self._query_compiler = new_query_compiler\n        for sib in self._siblings:\n            sib._query_compiler = new_query_compiler\n        old_query_compiler.free()\n\n    def _validate_other(\n        self,\n        other,\n        axis,\n        dtype_check=False,\n        compare_index=False,\n    ):\n        \"\"\"\n        Help to check validity of other in inter-df operations.\n\n        Parameters\n        ----------\n        other : modin.pandas.BasePandasDataset\n            Another dataset to validate against `self`.\n        axis : {None, 0, 1}\n            Specifies axis along which to do validation. When `1` or `None`\n            is specified, validation is done along `index`, if `0` is specified\n            validation is done along `columns` of `other` frame.\n        dtype_check : bool, default: False\n            Validates that both frames have compatible dtypes.\n        compare_index : bool, default: False\n            Compare Index if True.\n\n        Returns\n        -------\n        BaseQueryCompiler or Any\n            Other frame if it is determined to be valid.\n\n        Raises\n        ------\n        ValueError\n            If `other` is `Series` and its length is different from\n            length of `self` `axis`.\n        TypeError\n            If any validation checks fail.\n        \"\"\"\n        if isinstance(other, BasePandasDataset):\n            return other._query_compiler\n        if not is_list_like(other):\n            # We skip dtype checking if the other is a scalar. Note that pandas\n            # is_scalar can be misleading as it is False for almost all objects,\n            # even when those objects should be treated as scalars. See e.g.\n            # https://github.com/modin-project/modin/issues/5236. Therefore, we\n            # detect scalars by checking that `other` is neither a list-like nor\n            # another BasePandasDataset.\n            return other\n        axis = self._get_axis_number(axis) if axis is not None else 1\n        result = other\n        if axis == 0:\n            if len(other) != len(self._query_compiler.index):\n                raise ValueError(\n                    f\"Unable to coerce to Series, length must be {len(self._query_compiler.index)}: \"\n                    + f\"given {len(other)}\"\n                )\n        else:\n            if len(other) != len(self._query_compiler.columns):\n                raise ValueError(\n                    f\"Unable to coerce to Series, length must be {len(self._query_compiler.columns)}: \"\n                    + f\"given {len(other)}\"\n                )\n        if hasattr(other, \"dtype\"):\n            other_dtypes = [other.dtype] * len(other)\n        elif is_dict_like(other):\n            other_dtypes = [\n                other[label] if pandas.isna(other[label]) else type(other[label])\n                for label in self._get_axis(axis)\n                # The binary operation is applied for intersection of axis labels\n                # and dictionary keys. So filtering out extra keys.\n                if label in other\n            ]\n        else:\n            other_dtypes = [x if pandas.isna(x) else type(x) for x in other]\n        if compare_index:\n            if not self.index.equals(other.index):\n                raise TypeError(\"Cannot perform operation with non-equal index\")\n        # Do dtype checking.\n        if dtype_check:\n            self_dtypes = self._get_dtypes()\n            if is_dict_like(other):\n                # The binary operation is applied for the intersection of axis labels\n                # and dictionary keys. So filtering `self_dtypes` to match the `other`\n                # dictionary.\n                self_dtypes = [\n                    dtype\n                    for label, dtype in zip(self._get_axis(axis), self._get_dtypes())\n                    if label in other\n                ]\n\n            # TODO(https://github.com/modin-project/modin/issues/5239):\n            # this spuriously rejects other that is a list including some\n            # custom type that can be added to self's elements.\n            for self_dtype, other_dtype in zip(self_dtypes, other_dtypes):\n                if not (\n                    (is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype))\n                    or (is_numeric_dtype(self_dtype) and pandas.isna(other_dtype))\n                    or (is_object_dtype(self_dtype) and is_object_dtype(other_dtype))\n                    or (\n                        lib.is_np_dtype(self_dtype, \"mM\")\n                        and lib.is_np_dtype(self_dtype, \"mM\")\n                    )\n                    or is_dtype_equal(self_dtype, other_dtype)\n                ):\n                    raise TypeError(\"Cannot do operation with improper dtypes\")\n        return result\n\n    def _validate_function(self, func, on_invalid=None) -> None:\n        \"\"\"\n        Check the validity of the function which is intended to be applied to the frame.\n\n        Parameters\n        ----------\n        func : object\n        on_invalid : callable(str, cls), optional\n            Function to call in case invalid `func` is met, `on_invalid` takes an error\n            message and an exception type as arguments. If not specified raise an\n            appropriate exception.\n            **Note:** This parameter is a hack to concord with pandas error types.\n        \"\"\"\n\n        def error_raiser(msg, exception=Exception):\n            raise exception(msg)\n\n        if on_invalid is None:\n            on_invalid = error_raiser\n\n        if isinstance(func, dict):\n            [self._validate_function(fn, on_invalid) for fn in func.values()]\n            return\n            # We also could validate this, but it may be quite expensive for lazy-frames\n            # if not all(idx in self._get_axis(axis) for idx in func.keys()):\n            #     error_raiser(\"Invalid dict keys\", KeyError)\n\n        if not is_list_like(func):\n            func = [func]\n\n        for fn in func:\n            if isinstance(fn, str):\n                if not (hasattr(self, fn) or hasattr(np, fn)):\n                    on_invalid(\n                        f\"'{fn}' is not a valid function for '{type(self).__name__}' object\",\n                        AttributeError,\n                    )\n            elif not callable(fn):\n                on_invalid(\n                    f\"One of the passed functions has an invalid type: {type(fn)}: {fn}, \"\n                    + \"only callable or string is acceptable.\",\n                    TypeError,\n                )\n\n    def _binary_op(self, op, other, **kwargs) -> Self:\n        \"\"\"\n        Do binary operation between two datasets.\n\n        Parameters\n        ----------\n        op : str\n            Name of binary operation.\n        other : modin.pandas.BasePandasDataset\n            Second operand of binary operation.\n        **kwargs : dict\n            Additional parameters to binary operation.\n\n        Returns\n        -------\n        modin.pandas.BasePandasDataset\n            Result of binary operation.\n        \"\"\"\n        # _axis indicates the operator will use the default axis\n        if kwargs.pop(\"_axis\", None) is None:\n            if kwargs.get(\"axis\", None) is not None:\n                kwargs[\"axis\"] = axis = self._get_axis_number(kwargs.get(\"axis\", None))\n            else:\n                kwargs[\"axis\"] = axis = 1\n        else:\n            axis = 0\n        if kwargs.get(\"level\", None) is not None:\n            # Broadcast is an internally used argument\n            kwargs.pop(\"broadcast\", None)\n            return self._default_to_pandas(\n                getattr(self._pandas_class, op), other, **kwargs\n            )\n        other = self._validate_other(other, axis, dtype_check=True)\n        exclude_list = [\n            \"__add__\",\n            \"__radd__\",\n            \"__and__\",\n            \"__rand__\",\n            \"__or__\",\n            \"__ror__\",\n            \"__xor__\",\n            \"__rxor__\",\n        ]\n        if op in exclude_list:\n            kwargs.pop(\"axis\")\n        # Series logical operations take an additional fill_value argument that DF does not\n        series_specialize_list = [\n            \"eq\",\n            \"ge\",\n            \"gt\",\n            \"le\",\n            \"lt\",\n            \"ne\",\n        ]\n        if not self._is_dataframe and op in series_specialize_list:\n            op = \"series_\" + op\n        new_query_compiler = getattr(self._query_compiler, op)(other, **kwargs)\n        return self._create_or_update_from_compiler(new_query_compiler)\n\n    def _default_to_pandas(self, op, *args, reason: str = None, **kwargs):\n        \"\"\"\n        Convert dataset to pandas type and call a pandas function on it.\n\n        Parameters\n        ----------\n        op : str\n            Name of pandas function.\n        *args : list\n            Additional positional arguments to be passed to `op`.\n        reason : str, optional\n        **kwargs : dict\n            Additional keywords arguments to be passed to `op`.\n\n        Returns\n        -------\n        object\n            Result of operation.\n        \"\"\"\n        empty_self_str = \"\" if not self.empty else \" for empty DataFrame\"\n        self._query_compiler._maybe_warn_on_default(\n            message=\"`{}.{}`{}\".format(\n                type(self).__name__,\n                op if isinstance(op, str) else op.__name__,\n                empty_self_str,\n            ),\n            reason=reason,\n        )\n\n        args = try_cast_to_pandas(args)\n        kwargs = try_cast_to_pandas(kwargs)\n        pandas_obj = self._to_pandas()\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            if callable(op):\n                result = op(pandas_obj, *args, **kwargs)\n            elif isinstance(op, str):\n                # The inner `getattr` is ensuring that we are treating this object (whether\n                # it is a DataFrame, Series, etc.) as a pandas object. The outer `getattr`\n                # will get the operation (`op`) from the pandas version of the class and run\n                # it on the object after we have converted it to pandas.\n                attr = getattr(self._pandas_class, op)\n                if isinstance(attr, property):\n                    result = getattr(pandas_obj, op)\n                else:\n                    result = attr(pandas_obj, *args, **kwargs)\n            else:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=True,\n                    extra_log=\"{} is an unsupported operation\".format(op),\n                )\n        if isinstance(result, pandas.DataFrame):\n            from .dataframe import DataFrame\n\n            return DataFrame(result)\n        elif isinstance(result, pandas.Series):\n            from .series import Series\n\n            return Series(result)\n        # inplace\n        elif result is None:\n            return self._create_or_update_from_compiler(\n                getattr(pd, type(pandas_obj).__name__)(pandas_obj)._query_compiler,\n                inplace=True,\n            )\n        else:\n            try:\n                if (\n                    isinstance(result, (list, tuple))\n                    and len(result) == 2\n                    and isinstance(result[0], pandas.DataFrame)\n                ):\n                    # Some operations split the DataFrame into two (e.g. align). We need to wrap\n                    # both of the returned results\n                    if isinstance(result[1], pandas.DataFrame):\n                        second = self.__constructor__(result[1])\n                    else:\n                        second = result[1]\n                    return self.__constructor__(result[0]), second\n                else:\n                    return result\n            except TypeError:\n                return result\n\n    @classmethod\n    def _get_axis_number(cls, axis) -> int:\n        \"\"\"\n        Convert axis name or number to axis index.\n\n        Parameters\n        ----------\n        axis : int, str or pandas._libs.lib.NoDefault\n            Axis name ('index' or 'columns') or number to be converted to axis index.\n\n        Returns\n        -------\n        int\n            0 or 1 - axis index in the array of axes stored in the dataframe.\n        \"\"\"\n        if axis is lib.no_default:\n            axis = None\n\n        return cls._pandas_class._get_axis_number(axis) if axis is not None else 0\n\n    @cached_property\n    def __constructor__(self) -> type[Self]:\n        \"\"\"\n        Construct DataFrame or Series object depending on self type.\n\n        Returns\n        -------\n        modin.pandas.BasePandasDataset\n            Constructed object.\n        \"\"\"\n        return type(self)\n\n    def abs(self) -> Self:  # noqa: RT01, D200\n        \"\"\"\n        Return a `BasePandasDataset` with absolute numeric value of each element.\n        \"\"\"\n        self._validate_dtypes(numeric_only=True)\n        return self.__constructor__(query_compiler=self._query_compiler.abs())\n\n    def _set_index(self, new_index) -> None:\n        \"\"\"\n        Set the index for this DataFrame.\n\n        Parameters\n        ----------\n        new_index : pandas.Index\n            The new index to set this.\n        \"\"\"\n        self._query_compiler.index = new_index\n\n    def _get_index(self) -> pandas.Index:\n        \"\"\"\n        Get the index for this DataFrame.\n\n        Returns\n        -------\n        pandas.Index\n            The union of all indexes across the partitions.\n        \"\"\"\n        return self._query_compiler.index\n\n    index: pandas.Index = property(_get_index, _set_index)\n\n    def _get_axis(self, axis) -> pandas.Index:\n        \"\"\"\n        Return index labels of the specified axis.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to return labels on.\n            0 is for index, when 1 is for columns.\n\n        Returns\n        -------\n        pandas.Index\n        \"\"\"\n        return self.index if axis == 0 else self.columns\n\n    def add(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `add`).\n        \"\"\"\n        return self._binary_op(\n            \"add\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def aggregate(\n        self, func=None, axis=0, *args, **kwargs\n    ) -> DataFrame | Series | Scalar:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Aggregate using one or more operations over the specified axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        result = None\n\n        if axis == 0:\n            result = self._aggregate(func, _axis=axis, *args, **kwargs)\n        # TODO: handle case when axis == 1\n        if result is None:\n            kwargs.pop(\"is_transform\", None)\n            return self.apply(func, axis=axis, args=args, **kwargs)\n        return result\n\n    agg: DataFrame | Series | Scalar = aggregate\n\n    def _aggregate(self, func, *args, **kwargs):\n        \"\"\"\n        Aggregate using one or more operations over index axis.\n\n        Parameters\n        ----------\n        func : function, str, list or dict\n            Function to use for aggregating the data.\n        *args : list\n            Positional arguments to pass to func.\n        **kwargs : dict\n            Keyword arguments to pass to func.\n\n        Returns\n        -------\n        scalar or BasePandasDataset\n\n        See Also\n        --------\n        aggregate : Aggregate along any axis.\n        \"\"\"\n        _axis = kwargs.pop(\"_axis\", 0)\n        kwargs.pop(\"_level\", None)\n\n        if isinstance(func, str):\n            kwargs.pop(\"is_transform\", None)\n            return self._string_function(func, *args, **kwargs)\n\n        # Dictionaries have complex behavior because they can be renamed here.\n        elif func is None or isinstance(func, dict):\n            return self._default_to_pandas(\"agg\", func, *args, **kwargs)\n        kwargs.pop(\"is_transform\", None)\n        return self.apply(func, axis=_axis, args=args, **kwargs)\n\n    def _string_function(self, func, *args, **kwargs):\n        \"\"\"\n        Execute a function identified by its string name.\n\n        Parameters\n        ----------\n        func : str\n            Function name to call on `self`.\n        *args : list\n            Positional arguments to pass to func.\n        **kwargs : dict\n            Keyword arguments to pass to func.\n\n        Returns\n        -------\n        object\n            Function result.\n        \"\"\"\n        assert isinstance(func, str)\n        f = getattr(self, func, None)\n        if f is not None:\n            if callable(f):\n                return f(*args, **kwargs)\n            assert len(args) == 0\n            assert (\n                len([kwarg for kwarg in kwargs if kwarg not in [\"axis\", \"_level\"]]) == 0\n            )\n            return f\n        f = getattr(np, func, None)\n        if f is not None:\n            return self._default_to_pandas(\"agg\", func, *args, **kwargs)\n        raise ValueError(\"{} is an unknown string function\".format(func))\n\n    def _get_dtypes(self) -> list:\n        \"\"\"\n        Get dtypes as list.\n\n        Returns\n        -------\n        list\n            Either a one-element list that contains `dtype` if object denotes a Series\n            or a list that contains `dtypes` if object denotes a DataFrame.\n        \"\"\"\n        if hasattr(self, \"dtype\"):\n            return [self.dtype]\n        else:\n            return list(self.dtypes)\n\n    def align(\n        self,\n        other,\n        join=\"outer\",\n        axis=None,\n        level=None,\n        copy=None,\n        fill_value=None,\n        method=lib.no_default,\n        limit=lib.no_default,\n        fill_axis=lib.no_default,\n        broadcast_axis=lib.no_default,\n    ) -> tuple[Self, Self]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Align two objects on their axes with the specified join method.\n        \"\"\"\n        if (\n            method is not lib.no_default\n            or limit is not lib.no_default\n            or fill_axis is not lib.no_default\n        ):\n            warnings.warn(\n                \"The 'method', 'limit', and 'fill_axis' keywords in \"\n                + f\"{type(self).__name__}.align are deprecated and will be removed \"\n                + \"in a future version. Call fillna directly on the returned objects \"\n                + \"instead.\",\n                FutureWarning,\n            )\n        if fill_axis is lib.no_default:\n            fill_axis = 0\n        if method is lib.no_default:\n            method = None\n        if limit is lib.no_default:\n            limit = None\n\n        if broadcast_axis is not lib.no_default:\n            msg = (\n                f\"The 'broadcast_axis' keyword in {type(self).__name__}.align is \"\n                + \"deprecated and will be removed in a future version.\"\n            )\n            if broadcast_axis is not None:\n                if self.ndim == 1 and other.ndim == 2:\n                    msg += (\n                        \" Use left = DataFrame({col: left for col in right.columns}, \"\n                        + \"index=right.index) before calling `left.align(right)` instead.\"\n                    )\n                elif self.ndim == 2 and other.ndim == 1:\n                    msg += (\n                        \" Use right = DataFrame({col: right for col in left.columns}, \"\n                        + \"index=left.index) before calling `left.align(right)` instead\"\n                    )\n            warnings.warn(msg, FutureWarning)\n        else:\n            broadcast_axis = None\n\n        left, right = self._query_compiler.align(\n            other._query_compiler,\n            join=join,\n            axis=axis,\n            level=level,\n            copy=copy,\n            fill_value=fill_value,\n            method=method,\n            limit=limit,\n            fill_axis=fill_axis,\n            broadcast_axis=broadcast_axis,\n        )\n        return self.__constructor__(query_compiler=left), self.__constructor__(\n            query_compiler=right\n        )\n\n    @abc.abstractmethod\n    def _reduce_dimension(self, query_compiler: BaseQueryCompiler) -> Series | Scalar:\n        \"\"\"\n        Reduce the dimension of data from the `query_compiler`.\n\n        Parameters\n        ----------\n        query_compiler : BaseQueryCompiler\n            Query compiler to retrieve the data.\n\n        Returns\n        -------\n        Series | Scalar\n        \"\"\"\n        pass\n\n    def all(\n        self, axis=0, bool_only=False, skipna=True, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return whether all elements are True, potentially over an axis.\n        \"\"\"\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        if axis is not None:\n            axis = self._get_axis_number(axis)\n            if bool_only and axis == 0:\n                if hasattr(self, \"dtype\"):\n                    raise NotImplementedError(\n                        \"{}.{} does not implement numeric_only.\".format(\n                            type(self).__name__, \"all\"\n                        )\n                    )\n                data_for_compute = self[self.columns[self.dtypes == np.bool_]]\n                return data_for_compute.all(\n                    axis=axis, bool_only=False, skipna=skipna, **kwargs\n                )\n            return self._reduce_dimension(\n                self._query_compiler.all(\n                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs\n                )\n            )\n        else:\n            if bool_only:\n                raise ValueError(\"Axis must be 0 or 1 (got {})\".format(axis))\n            # Reduce to a scalar if axis is None.\n            result = self._reduce_dimension(\n                # FIXME: Judging by pandas docs `**kwargs` serves only compatibility\n                # purpose and does not affect the result, we shouldn't pass them to the query compiler.\n                self._query_compiler.all(\n                    axis=0,\n                    bool_only=bool_only,\n                    skipna=skipna,\n                    **kwargs,\n                )\n            )\n            if isinstance(result, BasePandasDataset):\n                return result.all(\n                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs\n                )\n            return result\n\n    def any(\n        self, *, axis=0, bool_only=False, skipna=True, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return whether any element is True, potentially over an axis.\n        \"\"\"\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        if axis is not None:\n            axis = self._get_axis_number(axis)\n            if bool_only and axis == 0:\n                if hasattr(self, \"dtype\"):\n                    raise NotImplementedError(\n                        \"{}.{} does not implement numeric_only.\".format(\n                            type(self).__name__, \"all\"\n                        )\n                    )\n                data_for_compute = self[self.columns[self.dtypes == np.bool_]]\n                return data_for_compute.any(\n                    axis=axis, bool_only=False, skipna=skipna, **kwargs\n                )\n            return self._reduce_dimension(\n                self._query_compiler.any(\n                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs\n                )\n            )\n        else:\n            if bool_only:\n                raise ValueError(\"Axis must be 0 or 1 (got {})\".format(axis))\n            # Reduce to a scalar if axis is None.\n            result = self._reduce_dimension(\n                self._query_compiler.any(\n                    axis=0,\n                    bool_only=bool_only,\n                    skipna=skipna,\n                    **kwargs,\n                )\n            )\n            if isinstance(result, BasePandasDataset):\n                return result.any(\n                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs\n                )\n            return result\n\n    def apply(\n        self,\n        func,\n        axis,\n        raw,\n        result_type,\n        args,\n        **kwds,\n    ) -> BaseQueryCompiler:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Apply a function along an axis of the `BasePandasDataset`.\n        \"\"\"\n\n        def error_raiser(msg, exception):\n            \"\"\"Convert passed exception to the same type as pandas do and raise it.\"\"\"\n            # HACK: to concord with pandas error types by replacing all of the\n            # TypeErrors to the AssertionErrors\n            exception = exception if exception is not TypeError else AssertionError\n            raise exception(msg)\n\n        self._validate_function(func, on_invalid=error_raiser)\n        axis = self._get_axis_number(axis)\n        if isinstance(func, str):\n            # if axis != 1 function can be bounded to the Series, which doesn't\n            # support axis parameter\n            if axis == 1:\n                kwds[\"axis\"] = axis\n            result = self._string_function(func, *args, **kwds)\n            if isinstance(result, BasePandasDataset):\n                return result._query_compiler\n            return result\n        elif isinstance(func, dict):\n            if self._query_compiler.get_axis_len(1) != len(set(self.columns)):\n                warnings.warn(\n                    \"duplicate column names not supported with apply().\",\n                    FutureWarning,\n                    stacklevel=2,\n                )\n        query_compiler = self._query_compiler.apply(\n            func,\n            axis,\n            args=args,\n            raw=raw,\n            result_type=result_type,\n            **kwds,\n        )\n        return query_compiler\n\n    def asfreq(\n        self, freq, method=None, how=None, normalize=False, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert time series to specified frequency.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.asfreq(\n                freq=freq,\n                method=method,\n                how=how,\n                normalize=normalize,\n                fill_value=fill_value,\n            )\n        )\n\n    def asof(self, where, subset=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the last row(s) without any NaNs before `where`.\n        \"\"\"\n        scalar = not is_list_like(where)\n        if isinstance(where, pandas.Index):\n            # Prevent accidental mutation of original:\n            where = where.copy()\n        else:\n            if scalar:\n                where = [where]\n            where = pandas.Index(where)\n\n        if subset is None:\n            data = self\n        else:\n            # Only relevant for DataFrames:\n            data = self[subset]\n        no_na_index = data.dropna().index\n        new_index = pandas.Index([no_na_index.asof(i) for i in where])\n        result = self.reindex(new_index)\n        result.index = where\n\n        if scalar:\n            # Need to return a Series:\n            result = result.squeeze()\n        return result\n\n    def astype(\n        self, dtype, copy=None, errors=\"raise\"\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Cast a Modin object to a specified dtype `dtype`.\n        \"\"\"\n        if copy is None:\n            copy = True\n        # dtype can be a series, a dict, or a scalar. If it's series,\n        # convert it to a dict before passing it to the query compiler.\n        if isinstance(dtype, (pd.Series, pandas.Series)):\n            if not dtype.index.is_unique:\n                raise ValueError(\"cannot reindex on an axis with duplicate labels\")\n            dtype = {column: dtype for column, dtype in dtype.items()}\n        # If we got a series or dict originally, dtype is a dict now. Its keys\n        # must be column names.\n        if isinstance(dtype, dict):\n            # avoid materializing columns in lazy mode. the query compiler\n            # will handle errors where dtype dict includes keys that are not\n            # in columns.\n            if (\n                not self._query_compiler.lazy_column_labels\n                and not set(dtype.keys()).issubset(set(self._query_compiler.columns))\n                and errors == \"raise\"\n            ):\n                raise KeyError(\n                    \"Only a column name can be used for the key in \"\n                    + \"a dtype mappings argument.\"\n                )\n\n        if not copy:\n            # If the new types match the old ones, then copying can be avoided\n            if self._query_compiler.frame_has_materialized_dtypes:\n                frame_dtypes = self._query_compiler.dtypes\n                if isinstance(dtype, dict):\n                    for col in dtype:\n                        if dtype[col] != frame_dtypes[col]:\n                            copy = True\n                            break\n                else:\n                    if not (frame_dtypes == dtype).all():\n                        copy = True\n            else:\n                copy = True\n\n        if copy:\n            new_query_compiler = self._query_compiler.astype(dtype, errors=errors)\n            return self._create_or_update_from_compiler(new_query_compiler)\n        return self\n\n    @property\n    def at(self, axis=None) -> _LocIndexer:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get a single value for a row/column label pair.\n        \"\"\"\n        from .indexing import _LocIndexer\n\n        return _LocIndexer(self)\n\n    def at_time(self, time, asof=False, axis=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Select values at particular time of day (e.g., 9:30AM).\n        \"\"\"\n        if asof:\n            # pandas raises NotImplementedError for asof=True, so we do, too.\n            raise NotImplementedError(\"'asof' argument is not supported\")\n        return self.between_time(\n            start_time=time, end_time=time, inclusive=\"both\", axis=axis\n        )\n\n    @_inherit_docstrings(\n        pandas.DataFrame.between_time, apilink=\"pandas.DataFrame.between_time\"\n    )\n    def between_time(\n        self,\n        start_time,\n        end_time,\n        inclusive=\"both\",\n        axis=None,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        return self._create_or_update_from_compiler(\n            self._query_compiler.between_time(\n                start_time=pandas.core.tools.times.to_time(start_time),\n                end_time=pandas.core.tools.times.to_time(end_time),\n                inclusive=inclusive,\n                axis=self._get_axis_number(axis),\n            )\n        )\n\n    def _deprecate_downcast(self, downcast, method_name: str):  # noqa: GL08\n        if downcast is not lib.no_default:\n            warnings.warn(\n                f\"The 'downcast' keyword in {method_name} is deprecated and \"\n                + \"will be removed in a future version. Use \"\n                + \"res.infer_objects(copy=False) to infer non-object dtype, or \"\n                + \"pd.to_numeric with the 'downcast' keyword to downcast numeric \"\n                + \"results.\",\n                FutureWarning,\n            )\n        else:\n            downcast = None\n        return downcast\n\n    def bfill(\n        self,\n        *,\n        axis=None,\n        inplace=False,\n        limit=None,\n        limit_area=None,\n        downcast=lib.no_default,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Synonym for `DataFrame.fillna` with ``method='bfill'``.\n        \"\"\"\n        if limit_area is not None:\n            return self._default_to_pandas(\n                \"bfill\",\n                reason=\"'limit_area' parameter isn't supported\",\n                axis=axis,\n                inplace=inplace,\n                limit=limit,\n                limit_area=limit_area,\n                downcast=downcast,\n            )\n        downcast = self._deprecate_downcast(downcast, \"bfill\")\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\", \".*fillna with 'method' is deprecated\", category=FutureWarning\n            )\n            return self.fillna(\n                method=\"bfill\",\n                axis=axis,\n                limit=limit,\n                downcast=downcast,\n                inplace=inplace,\n            )\n\n    def backfill(\n        self, *, axis=None, inplace=False, limit=None, downcast=lib.no_default\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Synonym for `DataFrame.bfill`.\n        \"\"\"\n        warnings.warn(\n            \"DataFrame.backfill/Series.backfill is deprecated. Use DataFrame.bfill/Series.bfill instead\",\n            FutureWarning,\n        )\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            return self.bfill(\n                axis=axis, inplace=inplace, limit=limit, downcast=downcast\n            )\n\n    def bool(self) -> bool:  # noqa: RT01, D200\n        \"\"\"\n        Return the bool of a single element `BasePandasDataset`.\n        \"\"\"\n        warnings.warn(\n            f\"{type(self).__name__}.bool is now deprecated and will be removed \"\n            + \"in future version of pandas\",\n            FutureWarning,\n        )\n        shape = self.shape\n        if shape != (1,) and shape != (1, 1):\n            raise ValueError(\n                \"\"\"The PandasObject does not have exactly\n                                1 element. Return the bool of a single\n                                element PandasObject. The truth value is\n                                ambiguous. Use a.empty, a.item(), a.any()\n                                or a.all().\"\"\"\n            )\n        else:\n            return self._to_pandas().bool()\n\n    def clip(\n        self, lower=None, upper=None, *, axis=None, inplace=False, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Trim values at input threshold(s).\n        \"\"\"\n        # validate inputs\n        if axis is not None:\n            axis = self._get_axis_number(axis)\n        self._validate_dtypes(numeric_only=True)\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        axis = numpy_compat.function.validate_clip_with_axis(axis, (), kwargs)\n        # any np.nan bounds are treated as None\n        if lower is not None and np.any(np.isnan(lower)):\n            lower = None\n        if upper is not None and np.any(np.isnan(upper)):\n            upper = None\n        if is_list_like(lower) or is_list_like(upper):\n            lower = self._validate_other(lower, axis)\n            upper = self._validate_other(upper, axis)\n        # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility\n        # purpose and does not affect the result, we shouldn't pass them to the query compiler.\n        new_query_compiler = self._query_compiler.clip(\n            lower=lower, upper=upper, axis=axis, **kwargs\n        )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def combine(\n        self, other, func, fill_value=None, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Perform combination of `BasePandasDataset`-s according to `func`.\n        \"\"\"\n        return self._binary_op(\n            \"combine\", other, _axis=0, func=func, fill_value=fill_value, **kwargs\n        )\n\n    def combine_first(self, other) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Update null elements with value in the same location in `other`.\n        \"\"\"\n        return self._binary_op(\"combine_first\", other, _axis=0)\n\n    def copy(self, deep=True) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Make a copy of the object's metadata.\n        \"\"\"\n        if deep:\n            return self.__constructor__(query_compiler=self._query_compiler.copy())\n        new_obj = self.__constructor__(query_compiler=self._query_compiler)\n        self._add_sibling(new_obj)\n        return new_obj\n\n    def count(\n        self, axis=0, numeric_only=False\n    ) -> Series | Scalar:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Count non-NA cells for `BasePandasDataset`.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        # select_dtypes is only implemented on DataFrames, but the numeric_only\n        # flag will always be set to false by the Series frontend\n        frame = self.select_dtypes([np.number, np.bool_]) if numeric_only else self\n\n        return frame._reduce_dimension(\n            frame._query_compiler.count(axis=axis, numeric_only=numeric_only)\n        )\n\n    def cummax(\n        self, axis=None, skipna=True, *args, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return cumulative maximum over a `BasePandasDataset` axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        if axis == 1:\n            self._validate_dtypes(numeric_only=True)\n        return self.__constructor__(\n            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility\n            # purpose and does not affect the result, we shouldn't pass them to the query compiler.\n            query_compiler=self._query_compiler.cummax(\n                fold_axis=axis, axis=axis, skipna=skipna, **kwargs\n            )\n        )\n\n    def cummin(\n        self, axis=None, skipna=True, *args, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return cumulative minimum over a `BasePandasDataset` axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        if axis == 1:\n            self._validate_dtypes(numeric_only=True)\n        return self.__constructor__(\n            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility\n            # purpose and does not affect the result, we shouldn't pass them to the query compiler.\n            query_compiler=self._query_compiler.cummin(\n                fold_axis=axis, axis=axis, skipna=skipna, **kwargs\n            )\n        )\n\n    def cumprod(\n        self, axis=None, skipna=True, *args, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return cumulative product over a `BasePandasDataset` axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        self._validate_dtypes(numeric_only=True)\n        return self.__constructor__(\n            # FIXME: Judging by pandas docs `**kwargs` serves only compatibility\n            # purpose and does not affect the result, we shouldn't pass them to the query compiler.\n            query_compiler=self._query_compiler.cumprod(\n                fold_axis=axis, axis=axis, skipna=skipna, **kwargs\n            )\n        )\n\n    def cumsum(\n        self, axis=None, skipna=True, *args, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return cumulative sum over a `BasePandasDataset` axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        self._validate_dtypes(numeric_only=True)\n        return self.__constructor__(\n            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility\n            # purpose and does not affect the result, we shouldn't pass them to the query compiler.\n            query_compiler=self._query_compiler.cumsum(\n                fold_axis=axis, axis=axis, skipna=skipna, **kwargs\n            )\n        )\n\n    def describe(\n        self,\n        percentiles=None,\n        include=None,\n        exclude=None,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Generate descriptive statistics.\n        \"\"\"\n        # copied from pandas.core.describe.describe_ndframe\n        percentiles = _refine_percentiles(percentiles)\n        data = self\n        if self._is_dataframe:\n            # include/exclude arguments are ignored for Series\n            if (include is None) and (exclude is None):\n                # when some numerics are found, keep only numerics\n                default_include: list[npt.DTypeLike] = [np.number]\n                default_include.append(\"datetime\")\n                data = self.select_dtypes(include=default_include)\n                if len(data.columns) == 0:\n                    data = self\n            elif include == \"all\":\n                if exclude is not None:\n                    msg = \"exclude must be None when include is 'all'\"\n                    raise ValueError(msg)\n                data = self\n            else:\n                data = self.select_dtypes(\n                    include=include,\n                    exclude=exclude,\n                )\n        if data.empty:\n            # Match pandas error from concatenting empty list of series descriptions.\n            raise ValueError(\"No objects to concatenate\")\n        return self.__constructor__(\n            query_compiler=data._query_compiler.describe(percentiles=percentiles)\n        )\n\n    def diff(self, periods=1, axis=0) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        First discrete difference of element.\n        \"\"\"\n        # Attempting to match pandas error behavior here\n        if not isinstance(periods, int):\n            raise ValueError(f\"periods must be an int. got {type(periods)} instead\")\n\n        # Attempting to match pandas error behavior here\n        for dtype in self._get_dtypes():\n            if not (is_numeric_dtype(dtype) or lib.is_np_dtype(dtype, \"mM\")):\n                raise TypeError(f\"unsupported operand type for -: got {dtype}\")\n\n        axis = self._get_axis_number(axis)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.diff(axis=axis, periods=periods)\n        )\n\n    def drop(\n        self,\n        labels=None,\n        *,\n        axis=0,\n        index=None,\n        columns=None,\n        level=None,\n        inplace=False,\n        errors=\"raise\",\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Drop specified labels from `BasePandasDataset`.\n        \"\"\"\n        # TODO implement level\n        if level is not None:\n            return self._default_to_pandas(\n                \"drop\",\n                labels=labels,\n                axis=axis,\n                index=index,\n                columns=columns,\n                level=level,\n                inplace=inplace,\n                errors=errors,\n            )\n\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        if labels is not None:\n            if index is not None or columns is not None:\n                raise ValueError(\"Cannot specify both 'labels' and 'index'/'columns'\")\n            axis_name = pandas.DataFrame._get_axis_name(axis)\n            axes = {axis_name: labels}\n        elif index is not None or columns is not None:\n            axes = {\"index\": index}\n            if self.ndim == 2:\n                axes[\"columns\"] = columns\n        else:\n            raise ValueError(\n                \"Need to specify at least one of 'labels', 'index' or 'columns'\"\n            )\n\n        for axis in [\"index\", \"columns\"]:\n            if axis not in axes:\n                axes[axis] = None\n            elif axes[axis] is not None:\n                if not is_list_like(axes[axis]):\n                    axes[axis] = [axes[axis]]\n                # In case of lazy execution we should bypass these error checking components\n                # because they can force the materialization of the row or column labels.\n                if (axis == \"index\" and self._query_compiler.lazy_row_labels) or (\n                    axis == \"columns\" and self._query_compiler.lazy_column_labels\n                ):\n                    continue\n                if errors == \"raise\":\n                    non_existent = pandas.Index(axes[axis]).difference(\n                        getattr(self, axis)\n                    )\n                    if len(non_existent):\n                        raise KeyError(f\"labels {non_existent} not contained in axis\")\n                else:\n                    axes[axis] = [\n                        obj for obj in axes[axis] if obj in getattr(self, axis)\n                    ]\n                    # If the length is zero, we will just do nothing\n                    if not len(axes[axis]):\n                        axes[axis] = None\n\n        new_query_compiler = self._query_compiler.drop(\n            index=axes[\"index\"], columns=axes[\"columns\"], errors=errors\n        )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def dropna(\n        self,\n        *,\n        axis: Axis = 0,\n        how: str | lib.NoDefault = lib.no_default,\n        thresh: int | lib.NoDefault = lib.no_default,\n        subset: IndexLabel = None,\n        inplace: bool = False,\n        ignore_index: bool = False,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Remove missing values.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n\n        if is_list_like(axis):\n            raise TypeError(\"supplying multiple axes to axis is no longer supported.\")\n\n        axis = self._get_axis_number(axis)\n        if how is not None and how not in [\"any\", \"all\", lib.no_default]:\n            raise ValueError(\"invalid how option: %s\" % how)\n        if how is None and thresh is None:\n            raise TypeError(\"must specify how or thresh\")\n        if subset is not None:\n            if axis == 1:\n                indices = self.index.get_indexer_for(subset)\n                check = indices == -1\n                if check.any():\n                    raise KeyError(list(np.compress(check, subset)))\n            else:\n                indices = self.columns.get_indexer_for(subset)\n                check = indices == -1\n                if check.any():\n                    raise KeyError(list(np.compress(check, subset)))\n        new_query_compiler = self._query_compiler.dropna(\n            axis=axis, how=how, thresh=thresh, subset=subset\n        )\n        if ignore_index:\n            new_query_compiler.index = pandas.RangeIndex(\n                stop=len(new_query_compiler.index)\n            )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def droplevel(self, level, axis=0) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return `BasePandasDataset` with requested index / column level(s) removed.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        result = self.copy()\n        if axis == 0:\n            index_columns = result.index.names.copy()\n            if is_integer(level):\n                level = index_columns[level]\n            elif is_list_like(level):\n                level = [\n                    index_columns[lev] if is_integer(lev) else lev for lev in level\n                ]\n            if is_list_like(level):\n                for lev in level:\n                    index_columns.remove(lev)\n            else:\n                index_columns.remove(level)\n            if len(result.columns.names) > 1:\n                # In this case, we are dealing with a MultiIndex column, so we need to\n                # be careful when dropping the additional index column.\n                if is_list_like(level):\n                    drop_labels = [(lev, \"\") for lev in level]\n                else:\n                    drop_labels = [(level, \"\")]\n                result = result.reset_index().drop(columns=drop_labels)\n            else:\n                result = result.reset_index().drop(columns=level)\n            result = result.set_index(index_columns)\n        else:\n            result.columns = self.columns.droplevel(level)\n        return result\n\n    def drop_duplicates(\n        self, keep=\"first\", inplace=False, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return `BasePandasDataset` with duplicate rows removed.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        ignore_index = kwargs.get(\"ignore_index\", False)\n        subset = kwargs.get(\"subset\", None)\n        if subset is not None:\n            if is_list_like(subset):\n                if not isinstance(subset, list):\n                    subset = list(subset)\n            else:\n                subset = [subset]\n            if len(diff := pandas.Index(subset).difference(self.columns)) > 0:\n                raise KeyError(diff)\n        result_qc = self._query_compiler.unique(\n            keep=keep, ignore_index=ignore_index, subset=subset\n        )\n        result = self.__constructor__(query_compiler=result_qc)\n        if inplace:\n            self._update_inplace(result._query_compiler)\n        else:\n            return result\n\n    def eq(self, other, axis=\"columns\", level=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get equality of `BasePandasDataset` and `other`, element-wise (binary operator `eq`).\n        \"\"\"\n        return self._binary_op(\"eq\", other, axis=axis, level=level, dtypes=np.bool_)\n\n    def explode(\n        self, column, ignore_index: bool = False\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Transform each element of a list-like to a row.\n        \"\"\"\n        exploded = self.__constructor__(\n            query_compiler=self._query_compiler.explode(column)\n        )\n        if ignore_index:\n            exploded = exploded.reset_index(drop=True)\n        return exploded\n\n    def ewm(\n        self,\n        com: float | None = None,\n        span: float | None = None,\n        halflife: float | TimedeltaConvertibleTypes | None = None,\n        alpha: float | None = None,\n        min_periods: int | None = 0,\n        adjust: bool = True,\n        ignore_na: bool = False,\n        axis: Axis = lib.no_default,\n        times: str | np.ndarray | BasePandasDataset | None = None,\n        method: str = \"single\",\n    ) -> pandas.core.window.ewm.ExponentialMovingWindow:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Provide exponentially weighted (EW) calculations.\n        \"\"\"\n        return self._default_to_pandas(\n            \"ewm\",\n            com=com,\n            span=span,\n            halflife=halflife,\n            alpha=alpha,\n            min_periods=min_periods,\n            adjust=adjust,\n            ignore_na=ignore_na,\n            axis=axis,\n            times=times,\n            method=method,\n        )\n\n    def expanding(\n        self, min_periods=1, axis=lib.no_default, method=\"single\"\n    ) -> Expanding:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Provide expanding window calculations.\n        \"\"\"\n        from .window import Expanding\n\n        if axis is not lib.no_default:\n            axis = self._get_axis_number(axis)\n            name = \"expanding\"\n            if axis == 1:\n                warnings.warn(\n                    f\"Support for axis=1 in {type(self).__name__}.{name} is \"\n                    + \"deprecated and will be removed in a future version. \"\n                    + f\"Use obj.T.{name}(...) instead\",\n                    FutureWarning,\n                )\n            else:\n                warnings.warn(\n                    f\"The 'axis' keyword in {type(self).__name__}.{name} is \"\n                    + \"deprecated and will be removed in a future version. \"\n                    + \"Call the method without the axis keyword instead.\",\n                    FutureWarning,\n                )\n        else:\n            axis = 0\n\n        return Expanding(\n            self,\n            min_periods=min_periods,\n            axis=axis,\n            method=method,\n        )\n\n    def ffill(\n        self,\n        *,\n        axis=None,\n        inplace=False,\n        limit=None,\n        limit_area=None,\n        downcast=lib.no_default,\n    ) -> Self | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Synonym for `DataFrame.fillna` with ``method='ffill'``.\n        \"\"\"\n        if limit_area is not None:\n            return self._default_to_pandas(\n                \"ffill\",\n                reason=\"'limit_area' parameter isn't supported\",\n                axis=axis,\n                inplace=inplace,\n                limit=limit,\n                limit_area=limit_area,\n                downcast=downcast,\n            )\n        downcast = self._deprecate_downcast(downcast, \"ffill\")\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\", \".*fillna with 'method' is deprecated\", category=FutureWarning\n            )\n            return self.fillna(\n                method=\"ffill\",\n                axis=axis,\n                limit=limit,\n                downcast=downcast,\n                inplace=inplace,\n            )\n\n    def pad(\n        self, *, axis=None, inplace=False, limit=None, downcast=lib.no_default\n    ) -> Self | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Synonym for `DataFrame.ffill`.\n        \"\"\"\n        warnings.warn(\n            \"DataFrame.pad/Series.pad is deprecated. Use DataFrame.ffill/Series.ffill instead\",\n            FutureWarning,\n        )\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=FutureWarning)\n            return self.ffill(\n                axis=axis, inplace=inplace, limit=limit, downcast=downcast\n            )\n\n    def fillna(\n        self,\n        squeeze_self,\n        squeeze_value,\n        value=None,\n        method=None,\n        axis=None,\n        inplace=False,\n        limit=None,\n        downcast=lib.no_default,\n    ) -> Self | None:\n        \"\"\"\n        Fill NA/NaN values using the specified method.\n\n        Parameters\n        ----------\n        squeeze_self : bool\n            If True then self contains a Series object, if False then self contains\n            a DataFrame object.\n        squeeze_value : bool\n            If True then value contains a Series object, if False then value contains\n            a DataFrame object.\n        value : scalar, dict, Series, or DataFrame, default: None\n            Value to use to fill holes (e.g. 0), alternately a\n            dict/Series/DataFrame of values specifying which value to use for\n            each index (for a Series) or column (for a DataFrame).  Values not\n            in the dict/Series/DataFrame will not be filled. This value cannot\n            be a list.\n        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default: None\n            Method to use for filling holes in reindexed Series\n            pad / ffill: propagate last valid observation forward to next valid\n            backfill / bfill: use next valid observation to fill gap.\n        axis : {None, 0, 1}, default: None\n            Axis along which to fill missing values.\n        inplace : bool, default: False\n            If True, fill in-place. Note: this will modify any\n            other views on this object (e.g., a no-copy slice for a column in a\n            DataFrame).\n        limit : int, default: None\n            If method is specified, this is the maximum number of consecutive\n            NaN values to forward/backward fill. In other words, if there is\n            a gap with more than this number of consecutive NaNs, it will only\n            be partially filled. If method is not specified, this is the\n            maximum number of entries along the entire axis where NaNs will be\n            filled. Must be greater than 0 if not None.\n        downcast : dict, default: None\n            A dict of item->dtype of what to downcast if possible,\n            or the string 'infer' which will try to downcast to an appropriate\n            equal type (e.g. float64 to int64 if possible).\n\n        Returns\n        -------\n        Series, DataFrame or None\n            Object with missing values filled or None if ``inplace=True``.\n        \"\"\"\n        if method is not None:\n            warnings.warn(\n                f\"{type(self).__name__}.fillna with 'method' is deprecated and \"\n                + \"will raise in a future version. Use obj.ffill() or obj.bfill() \"\n                + \"instead.\",\n                FutureWarning,\n            )\n        downcast = self._deprecate_downcast(downcast, \"fillna\")\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        axis = self._get_axis_number(axis)\n        if isinstance(value, (list, tuple)):\n            raise TypeError(\n                '\"value\" parameter must be a scalar or dict, but '\n                + f'you passed a \"{type(value).__name__}\"'\n            )\n        if value is None and method is None:\n            raise ValueError(\"must specify a fill method or value\")\n        if value is not None and method is not None:\n            raise ValueError(\"cannot specify both a fill method and value\")\n        if method is not None and method not in [\"backfill\", \"bfill\", \"pad\", \"ffill\"]:\n            expecting = \"pad (ffill) or backfill (bfill)\"\n            msg = \"Invalid fill method. Expecting {expecting}. Got {method}\".format(\n                expecting=expecting, method=method\n            )\n            raise ValueError(msg)\n        if limit is not None:\n            if not isinstance(limit, int):\n                raise ValueError(\"Limit must be an integer\")\n            elif limit <= 0:\n                raise ValueError(\"Limit must be greater than 0\")\n\n        if isinstance(value, BasePandasDataset):\n            value = value._query_compiler\n\n        new_query_compiler = self._query_compiler.fillna(\n            squeeze_self=squeeze_self,\n            squeeze_value=squeeze_value,\n            value=value,\n            method=method,\n            axis=axis,\n            inplace=False,\n            limit=limit,\n            downcast=downcast,\n        )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def filter(\n        self, items=None, like=None, regex=None, axis=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Subset the `BasePandasDataset` rows or columns according to the specified index labels.\n        \"\"\"\n        nkw = count_not_none(items, like, regex)\n        if nkw > 1:\n            raise TypeError(\n                \"Keyword arguments `items`, `like`, or `regex` are mutually exclusive\"\n            )\n        if nkw == 0:\n            raise TypeError(\"Must pass either `items`, `like`, or `regex`\")\n        if axis is None:\n            axis = \"columns\"  # This is the default info axis for dataframes\n\n        axis = self._get_axis_number(axis)\n        labels = self.columns if axis else self.index\n\n        if items is not None:\n            bool_arr = labels.isin(items)\n        elif like is not None:\n\n            def f(x):\n                return like in str(x)\n\n            bool_arr = labels.map(f).tolist()\n        else:\n\n            def f(x):\n                return matcher.search(str(x)) is not None\n\n            matcher = re.compile(regex)\n            bool_arr = labels.map(f).tolist()\n        if not axis:\n            return self[bool_arr]\n        return self[self.columns[bool_arr]]\n\n    def first(self, offset) -> Self | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Select initial periods of time series data based on a date offset.\n        \"\"\"\n        warnings.warn(\n            \"first is deprecated and will be removed in a future version. \"\n            + \"Please create a mask and filter using `.loc` instead\",\n            FutureWarning,\n        )\n        return self._create_or_update_from_compiler(\n            self._query_compiler.first(offset=to_offset(offset))\n        )\n\n    def first_valid_index(self) -> int:  # noqa: RT01, D200\n        \"\"\"\n        Return index for first non-NA value or None, if no non-NA value is found.\n        \"\"\"\n        return self._query_compiler.first_valid_index()\n\n    def floordiv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `floordiv`).\n        \"\"\"\n        return self._binary_op(\n            \"floordiv\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def ge(self, other, axis=\"columns\", level=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get greater than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ge`).\n        \"\"\"\n        return self._binary_op(\"ge\", other, axis=axis, level=level, dtypes=np.bool_)\n\n    def get(\n        self, key, default=None\n    ) -> DataFrame | Series | Scalar:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get item from object for given key.\n        \"\"\"\n        # Match pandas behavior here\n        try:\n            return self.__getitem__(key)\n        except (KeyError, ValueError, IndexError):\n            return default\n\n    def gt(self, other, axis=\"columns\", level=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get greater than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `gt`).\n        \"\"\"\n        return self._binary_op(\"gt\", other, axis=axis, level=level, dtypes=np.bool_)\n\n    def head(self, n=5) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the first `n` rows.\n        \"\"\"\n        return self.iloc[:n]\n\n    @property\n    def iat(self, axis=None) -> _iLocIndexer:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get a single value for a row/column pair by integer position.\n        \"\"\"\n        from .indexing import _iLocIndexer\n\n        return _iLocIndexer(self)\n\n    def idxmax(\n        self, axis=0, skipna=True, numeric_only=False\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return index of first occurrence of maximum over requested axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        return self._reduce_dimension(\n            self._query_compiler.idxmax(\n                axis=axis, skipna=skipna, numeric_only=numeric_only\n            )\n        )\n\n    def idxmin(\n        self, axis=0, skipna=True, numeric_only=False\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return index of first occurrence of minimum over requested axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        return self._reduce_dimension(\n            self._query_compiler.idxmin(\n                axis=axis, skipna=skipna, numeric_only=numeric_only\n            )\n        )\n\n    def infer_objects(self, copy=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Attempt to infer better dtypes for object columns.\n        \"\"\"\n        new_query_compiler = self._query_compiler.infer_objects()\n        return self._create_or_update_from_compiler(\n            new_query_compiler, inplace=False if copy is None else not copy\n        )\n\n    def convert_dtypes(\n        self,\n        infer_objects: bool = True,\n        convert_string: bool = True,\n        convert_integer: bool = True,\n        convert_boolean: bool = True,\n        convert_floating: bool = True,\n        dtype_backend: DtypeBackend = \"numpy_nullable\",\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.convert_dtypes(\n                infer_objects=infer_objects,\n                convert_string=convert_string,\n                convert_integer=convert_integer,\n                convert_boolean=convert_boolean,\n                convert_floating=convert_floating,\n                dtype_backend=dtype_backend,\n            )\n        )\n\n    def isin(self, values) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Whether elements in `BasePandasDataset` are contained in `values`.\n        \"\"\"\n        from .series import Series\n\n        ignore_indices = isinstance(values, Series)\n        values = getattr(values, \"_query_compiler\", values)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.isin(\n                values=values, ignore_indices=ignore_indices\n            )\n        )\n\n    def isna(self) -> Self:  # noqa: RT01, D200\n        \"\"\"\n        Detect missing values.\n        \"\"\"\n        return self.__constructor__(query_compiler=self._query_compiler.isna())\n\n    isnull: Self = isna\n\n    @property\n    def iloc(self) -> _iLocIndexer:  # noqa: RT01, D200\n        \"\"\"\n        Purely integer-location based indexing for selection by position.\n        \"\"\"\n        from .indexing import _iLocIndexer\n\n        return _iLocIndexer(self)\n\n    @_inherit_docstrings(pandas.DataFrame.kurt, apilink=\"pandas.DataFrame.kurt\")\n    def kurt(self, axis=0, skipna=True, numeric_only=False, **kwargs) -> Series | float:\n        return self._stat_operation(\"kurt\", axis, skipna, numeric_only, **kwargs)\n\n    kurtosis: Series | float = kurt\n\n    def last(self, offset) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Select final periods of time series data based on a date offset.\n        \"\"\"\n        warnings.warn(\n            \"last is deprecated and will be removed in a future version. \"\n            + \"Please create a mask and filter using `.loc` instead\",\n            FutureWarning,\n        )\n\n        return self._create_or_update_from_compiler(\n            self._query_compiler.last(offset=to_offset(offset))\n        )\n\n    def last_valid_index(self) -> int:  # noqa: RT01, D200\n        \"\"\"\n        Return index for last non-NA value or None, if no non-NA value is found.\n        \"\"\"\n        return self._query_compiler.last_valid_index()\n\n    def le(self, other, axis=\"columns\", level=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get less than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `le`).\n        \"\"\"\n        return self._binary_op(\"le\", other, axis=axis, level=level, dtypes=np.bool_)\n\n    def lt(self, other, axis=\"columns\", level=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get less than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `lt`).\n        \"\"\"\n        return self._binary_op(\"lt\", other, axis=axis, level=level, dtypes=np.bool_)\n\n    @property\n    def loc(self) -> _LocIndexer:  # noqa: RT01, D200\n        \"\"\"\n        Get a group of rows and columns by label(s) or a boolean array.\n        \"\"\"\n        from .indexing import _LocIndexer\n\n        return _LocIndexer(self)\n\n    def mask(\n        self,\n        cond,\n        other=lib.no_default,\n        *,\n        inplace: bool = False,\n        axis: Optional[Axis] = None,\n        level: Optional[Level] = None,\n    ) -> Self | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Replace values where the condition is True.\n        \"\"\"\n        return self._create_or_update_from_compiler(\n            self._query_compiler.mask(\n                cond,\n                other=other,\n                inplace=False,\n                axis=axis,\n                level=level,\n            ),\n            inplace=inplace,\n        )\n\n    def max(\n        self,\n        axis: Axis = 0,\n        skipna=True,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the maximum of the values over the requested axis.\n        \"\"\"\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        orig_axis = axis\n        axis = self._get_axis_number(axis)\n        data = self._validate_dtypes_min_max(axis, numeric_only)\n        res = data._reduce_dimension(\n            data._query_compiler.max(\n                axis=axis,\n                skipna=skipna,\n                numeric_only=numeric_only,\n                **kwargs,\n            )\n        )\n        if orig_axis is None:\n            res = res._reduce_dimension(\n                res._query_compiler.max(\n                    axis=0,\n                    skipna=skipna,\n                    numeric_only=False,\n                    **kwargs,\n                )\n            )\n        return res\n\n    def min(\n        self,\n        axis: Axis = 0,\n        skipna: bool = True,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the minimum of the values over the requested axis.\n        \"\"\"\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        orig_axis = axis\n        axis = self._get_axis_number(axis)\n        data = self._validate_dtypes_min_max(axis, numeric_only)\n        res = data._reduce_dimension(\n            data._query_compiler.min(\n                axis=axis,\n                skipna=skipna,\n                numeric_only=numeric_only,\n                **kwargs,\n            )\n        )\n        if orig_axis is None:\n            res = res._reduce_dimension(\n                res._query_compiler.min(\n                    axis=0,\n                    skipna=skipna,\n                    numeric_only=False,\n                    **kwargs,\n                )\n            )\n        return res\n\n    def _stat_operation(\n        self,\n        op_name: str,\n        axis: Optional[Union[int, str]],\n        skipna: bool,\n        numeric_only: Optional[bool] = False,\n        **kwargs,\n    ):\n        \"\"\"\n        Do common statistic reduce operations under frame.\n\n        Parameters\n        ----------\n        op_name : str\n            Name of method to apply.\n        axis : int or str\n            Axis to apply method on.\n        skipna : bool\n            Exclude NA/null values when computing the result.\n        numeric_only : bool, default: False\n            Include only float, int, boolean columns. If None, will attempt\n            to use everything, then use only numeric data.\n        **kwargs : dict\n            Additional keyword arguments to pass to `op_name`.\n\n        Returns\n        -------\n        scalar, Series or DataFrame\n            `scalar` - self is Series and level is not specified.\n            `Series` - self is Series and level is specified, or\n                self is DataFrame and level is not specified.\n            `DataFrame` - self is DataFrame and level is specified.\n        \"\"\"\n        axis = self._get_axis_number(axis) if axis is not None else None\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        if op_name == \"median\":\n            numpy_compat.function.validate_median((), kwargs)\n        elif op_name in (\"sem\", \"var\", \"std\"):\n            val_kwargs = {k: v for k, v in kwargs.items() if k != \"ddof\"}\n            numpy_compat.function.validate_stat_ddof_func((), val_kwargs, fname=op_name)\n        else:\n            numpy_compat.function.validate_stat_func((), kwargs, fname=op_name)\n\n        if not numeric_only:\n            self._validate_dtypes(numeric_only=True)\n\n        data = (\n            self._get_numeric_data(axis if axis is not None else 0)\n            if numeric_only\n            else self\n        )\n        result_qc = getattr(data._query_compiler, op_name)(\n            axis=axis,\n            skipna=skipna,\n            numeric_only=numeric_only,\n            **kwargs,\n        )\n        return (\n            self._reduce_dimension(result_qc)\n            if isinstance(result_qc, type(self._query_compiler))\n            # scalar case\n            else result_qc\n        )\n\n    def memory_usage(\n        self, index=True, deep=False\n    ) -> Series | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the memory usage of the `BasePandasDataset`.\n        \"\"\"\n        return self._reduce_dimension(\n            self._query_compiler.memory_usage(index=index, deep=deep)\n        )\n\n    def mod(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `mod`).\n        \"\"\"\n        return self._binary_op(\n            \"mod\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def mode(\n        self, axis=0, numeric_only=False, dropna=True\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get the mode(s) of each element along the selected axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.mode(\n                axis=axis, numeric_only=numeric_only, dropna=dropna\n            )\n        )\n\n    def mul(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get multiplication of `BasePandasDataset` and `other`, element-wise (binary operator `mul`).\n        \"\"\"\n        return self._binary_op(\n            \"mul\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    multiply: Self = mul\n\n    def ne(self, other, axis=\"columns\", level=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get Not equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ne`).\n        \"\"\"\n        return self._binary_op(\"ne\", other, axis=axis, level=level, dtypes=np.bool_)\n\n    def notna(self) -> Self:  # noqa: RT01, D200\n        \"\"\"\n        Detect existing (non-missing) values.\n        \"\"\"\n        return self.__constructor__(query_compiler=self._query_compiler.notna())\n\n    notnull: Self = notna\n\n    def nunique(self, axis=0, dropna=True) -> Series | int:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return number of unique elements in the `BasePandasDataset`.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        return self._reduce_dimension(\n            self._query_compiler.nunique(axis=axis, dropna=dropna)\n        )\n\n    def pct_change(\n        self,\n        periods=1,\n        fill_method=lib.no_default,\n        limit=lib.no_default,\n        freq=None,\n        **kwargs,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Percentage change between the current and a prior element.\n        \"\"\"\n        if fill_method not in (lib.no_default, None) or limit is not lib.no_default:\n            warnings.warn(\n                \"The 'fill_method' keyword being not None and the 'limit' keyword in \"\n                + f\"{type(self).__name__}.pct_change are deprecated and will be removed \"\n                + \"in a future version. Either fill in any non-leading NA values prior \"\n                + \"to calling pct_change or specify 'fill_method=None' to not fill NA \"\n                + \"values.\",\n                FutureWarning,\n            )\n        if fill_method is lib.no_default:\n            if self.isna().values.any():\n                warnings.warn(\n                    \"The default fill_method='pad' in \"\n                    + f\"{type(self).__name__}.pct_change is deprecated and will be \"\n                    + \"removed in a future version. Call ffill before calling \"\n                    + \"pct_change to retain current behavior and silence this warning.\",\n                    FutureWarning,\n                )\n            fill_method = \"pad\"\n        if limit is lib.no_default:\n            limit = None\n\n        # Attempting to match pandas error behavior here\n        if not isinstance(periods, int):\n            raise ValueError(f\"periods must be an int. got {type(periods)} instead\")\n\n        # Attempting to match pandas error behavior here\n        for dtype in self._get_dtypes():\n            if not is_numeric_dtype(dtype):\n                raise TypeError(f\"unsupported operand type for /: got {dtype}\")\n\n        return self.__constructor__(\n            query_compiler=self._query_compiler.pct_change(\n                periods=periods,\n                fill_method=fill_method,\n                limit=limit,\n                freq=freq,\n                **kwargs,\n            )\n        )\n\n    def pipe(\n        self, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs\n    ) -> T:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Apply chainable functions that expect `BasePandasDataset`.\n        \"\"\"\n        return pipe(self, func, *args, **kwargs)\n\n    def pop(self, item) -> Series | Scalar:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return item and drop from frame. Raise KeyError if not found.\n        \"\"\"\n        result = self[item]\n        del self[item]\n        return result\n\n    def pow(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `pow`).\n        \"\"\"\n        return self._binary_op(\n            \"pow\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def quantile(\n        self, q, axis, numeric_only, interpolation, method\n    ) -> DataFrame | Series | Scalar:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return values at the given quantile over requested axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n\n        def check_dtype(t):\n            return is_numeric_dtype(t) or lib.is_np_dtype(t, \"mM\")\n\n        numeric_only_df = self\n        if not numeric_only:\n            # If not numeric_only and columns, then check all columns are either\n            # numeric, timestamp, or timedelta\n            if not axis and not all(check_dtype(t) for t in self._get_dtypes()):\n                raise TypeError(\"can't multiply sequence by non-int of type 'float'\")\n            # If over rows, then make sure that all dtypes are equal for not\n            # numeric_only\n            elif axis:\n                for i in range(1, len(self._get_dtypes())):\n                    pre_dtype = self._get_dtypes()[i - 1]\n                    curr_dtype = self._get_dtypes()[i]\n                    if not is_dtype_equal(pre_dtype, curr_dtype):\n                        raise TypeError(\n                            \"Cannot compare type '{0}' with type '{1}'\".format(\n                                pre_dtype, curr_dtype\n                            )\n                        )\n        else:\n            numeric_only_df = self.drop(\n                columns=[\n                    i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])\n                ]\n            )\n\n        # check that all qs are between 0 and 1\n        validate_percentile(q)\n        axis = numeric_only_df._get_axis_number(axis)\n        if isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple)):\n            return numeric_only_df.__constructor__(\n                query_compiler=numeric_only_df._query_compiler.quantile_for_list_of_values(\n                    q=q,\n                    axis=axis,\n                    # `numeric_only=True` has already been processed by using `self.drop` function\n                    numeric_only=False,\n                    interpolation=interpolation,\n                    method=method,\n                )\n            )\n        else:\n            result = numeric_only_df._reduce_dimension(\n                numeric_only_df._query_compiler.quantile_for_single_value(\n                    q=q,\n                    axis=axis,\n                    # `numeric_only=True` has already been processed by using `self.drop` function\n                    numeric_only=False,\n                    interpolation=interpolation,\n                    method=method,\n                )\n            )\n            if isinstance(result, BasePandasDataset):\n                result.name = q\n            return result\n\n    @_inherit_docstrings(pandas.DataFrame.rank, apilink=\"pandas.DataFrame.rank\")\n    def rank(\n        self,\n        axis=0,\n        method: str = \"average\",\n        numeric_only=False,\n        na_option: str = \"keep\",\n        ascending: bool = True,\n        pct: bool = False,\n    ) -> Self:\n        if axis is None:\n            raise ValueError(\n                f\"No axis named None for object type {type(self).__name__}\"\n            )\n        axis = self._get_axis_number(axis)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.rank(\n                axis=axis,\n                method=method,\n                numeric_only=numeric_only,\n                na_option=na_option,\n                ascending=ascending,\n                pct=pct,\n            )\n        )\n\n    def _copy_index_metadata(self, source, destination):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Copy Index metadata from `source` to `destination` inplace.\n        \"\"\"\n        if hasattr(source, \"name\") and hasattr(destination, \"name\"):\n            destination.name = source.name\n        if hasattr(source, \"names\") and hasattr(destination, \"names\"):\n            destination.names = source.names\n        return destination\n\n    def _ensure_index(self, index_like, axis=0):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Ensure that we have an index from some index-like object.\n        \"\"\"\n        if (\n            self._query_compiler.has_multiindex(axis=axis)\n            and not isinstance(index_like, pandas.Index)\n            and is_list_like(index_like)\n            and len(index_like) > 0\n            and isinstance(index_like[0], tuple)\n        ):\n            try:\n                return pandas.MultiIndex.from_tuples(index_like)\n            except TypeError:\n                # not all tuples\n                pass\n        return ensure_index(index_like)\n\n    def reindex(\n        self,\n        index=None,\n        columns=None,\n        copy=True,\n        **kwargs,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Conform `BasePandasDataset` to new index with optional filling logic.\n        \"\"\"\n        new_query_compiler = None\n        if index is not None:\n            if not isinstance(index, pandas.Index) or not index.equals(self.index):\n                new_query_compiler = self._query_compiler.reindex(\n                    axis=0, labels=index, **kwargs\n                )\n        if new_query_compiler is None:\n            new_query_compiler = self._query_compiler\n        final_query_compiler = None\n        if columns is not None:\n            if not isinstance(index, pandas.Index) or not columns.equals(self.columns):\n                final_query_compiler = new_query_compiler.reindex(\n                    axis=1, labels=columns, **kwargs\n                )\n        if final_query_compiler is None:\n            final_query_compiler = new_query_compiler\n        return self._create_or_update_from_compiler(\n            final_query_compiler, inplace=False if copy is None else not copy\n        )\n\n    def rename_axis(\n        self,\n        mapper=lib.no_default,\n        *,\n        index=lib.no_default,\n        columns=lib.no_default,\n        axis=0,\n        copy=None,\n        inplace=False,\n    ) -> DataFrame | Series | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Set the name of the axis for the index or columns.\n        \"\"\"\n        axes = {\"index\": index, \"columns\": columns}\n\n        if copy is None:\n            copy = True\n\n        if axis is not None:\n            axis = self._get_axis_number(axis)\n\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n\n        if mapper is not lib.no_default:\n            # Use v0.23 behavior if a scalar or list\n            non_mapper = is_scalar(mapper) or (\n                is_list_like(mapper) and not is_dict_like(mapper)\n            )\n            if non_mapper:\n                return self._set_axis_name(mapper, axis=axis, inplace=inplace)\n            else:\n                raise ValueError(\"Use `.rename` to alter labels with a mapper.\")\n        else:\n            # Use new behavior.  Means that index and/or columns is specified\n            result = self if inplace else self.copy(deep=copy)\n\n            for axis in range(self.ndim):\n                v = axes.get(pandas.DataFrame._get_axis_name(axis))\n                if v is lib.no_default:\n                    continue\n                non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))\n                if non_mapper:\n                    newnames = v\n                else:\n\n                    def _get_rename_function(mapper):\n                        if isinstance(mapper, (dict, BasePandasDataset)):\n\n                            def f(x):\n                                if x in mapper:\n                                    return mapper[x]\n                                else:\n                                    return x\n\n                        else:\n                            f = mapper\n\n                        return f\n\n                    f = _get_rename_function(v)\n                    curnames = self.index.names if axis == 0 else self.columns.names\n                    newnames = [f(name) for name in curnames]\n                result._set_axis_name(newnames, axis=axis, inplace=True)\n            if not inplace:\n                return result\n\n    def reorder_levels(self, order, axis=0) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Rearrange index levels using input order.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        new_labels = self._get_axis(axis).reorder_levels(order)\n        return self.set_axis(new_labels, axis=axis)\n\n    def resample(\n        self,\n        rule,\n        axis: Axis = lib.no_default,\n        closed: Optional[str] = None,\n        label: Optional[str] = None,\n        convention: str = lib.no_default,\n        kind: Optional[str] = lib.no_default,\n        on: Level = None,\n        level: Level = None,\n        origin: str | TimestampConvertibleTypes = \"start_day\",\n        offset: Optional[TimedeltaConvertibleTypes] = None,\n        group_keys=False,\n    ) -> Resampler:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Resample time-series data.\n        \"\"\"\n        from .resample import Resampler\n\n        if axis is not lib.no_default:\n            axis = self._get_axis_number(axis)\n            if axis == 1:\n                warnings.warn(\n                    \"DataFrame.resample with axis=1 is deprecated. Do \"\n                    + \"`frame.T.resample(...)` without axis instead.\",\n                    FutureWarning,\n                )\n            else:\n                warnings.warn(\n                    f\"The 'axis' keyword in {type(self).__name__}.resample is \"\n                    + \"deprecated and will be removed in a future version.\",\n                    FutureWarning,\n                )\n        else:\n            axis = 0\n\n        return Resampler(\n            dataframe=self,\n            rule=rule,\n            axis=axis,\n            closed=closed,\n            label=label,\n            convention=convention,\n            kind=kind,\n            on=on,\n            level=level,\n            origin=origin,\n            offset=offset,\n            group_keys=group_keys,\n        )\n\n    def reset_index(\n        self,\n        level: IndexLabel = None,\n        *,\n        drop: bool = False,\n        inplace: bool = False,\n        col_level: Hashable = 0,\n        col_fill: Hashable = \"\",\n        allow_duplicates=lib.no_default,\n        names: Hashable | Sequence[Hashable] = None,\n    ) -> DataFrame | Series | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Reset the index, or a level of it.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        # Error checking for matching pandas. Pandas does not allow you to\n        # insert a dropped index into a DataFrame if these columns already\n        # exist.\n        if (\n            not drop\n            and not (\n                self._query_compiler.lazy_column_labels\n                or self._query_compiler.lazy_row_labels\n            )\n            and not self._query_compiler.has_multiindex()\n            and all(n in self.columns for n in [\"level_0\", \"index\"])\n        ):\n            raise ValueError(\"cannot insert level_0, already exists\")\n        new_query_compiler = self._query_compiler.reset_index(\n            drop=drop,\n            level=level,\n            col_level=col_level,\n            col_fill=col_fill,\n            allow_duplicates=allow_duplicates,\n            names=names,\n        )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def radd(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `radd`).\n        \"\"\"\n        return self._binary_op(\n            \"radd\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def rfloordiv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `rfloordiv`).\n        \"\"\"\n        return self._binary_op(\n            \"rfloordiv\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def rmod(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `rmod`).\n        \"\"\"\n        return self._binary_op(\n            \"rmod\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def rmul(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get Multiplication of dataframe and other, element-wise (binary operator `rmul`).\n        \"\"\"\n        return self._binary_op(\n            \"rmul\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def rolling(\n        self,\n        window,\n        min_periods: int | None = None,\n        center: bool = False,\n        win_type: str | None = None,\n        on: str | None = None,\n        axis: Axis = lib.no_default,\n        closed: str | None = None,\n        step: int | None = None,\n        method: str = \"single\",\n    ) -> Rolling | Window:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Provide rolling window calculations.\n        \"\"\"\n        if axis is not lib.no_default:\n            axis = self._get_axis_number(axis)\n            name = \"rolling\"\n            if axis == 1:\n                warnings.warn(\n                    f\"Support for axis=1 in {type(self).__name__}.{name} is \"\n                    + \"deprecated and will be removed in a future version. \"\n                    + f\"Use obj.T.{name}(...) instead\",\n                    FutureWarning,\n                )\n            else:\n                warnings.warn(\n                    f\"The 'axis' keyword in {type(self).__name__}.{name} is \"\n                    + \"deprecated and will be removed in a future version. \"\n                    + \"Call the method without the axis keyword instead.\",\n                    FutureWarning,\n                )\n        else:\n            axis = 0\n\n        if win_type is not None:\n            from .window import Window\n\n            return Window(\n                self,\n                window=window,\n                min_periods=min_periods,\n                center=center,\n                win_type=win_type,\n                on=on,\n                axis=axis,\n                closed=closed,\n                step=step,\n                method=method,\n            )\n        from .window import Rolling\n\n        return Rolling(\n            self,\n            window=window,\n            min_periods=min_periods,\n            center=center,\n            win_type=win_type,\n            on=on,\n            axis=axis,\n            closed=closed,\n            step=step,\n            method=method,\n        )\n\n    def round(self, decimals=0, *args, **kwargs) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Round a `BasePandasDataset` to a variable number of decimal places.\n        \"\"\"\n        # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility\n        # purpose and does not affect the result, we shouldn't pass them to the query compiler.\n        return self.__constructor__(\n            query_compiler=self._query_compiler.round(decimals=decimals, **kwargs)\n        )\n\n    def rpow(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `rpow`).\n        \"\"\"\n        return self._binary_op(\n            \"rpow\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def rsub(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `rsub`).\n        \"\"\"\n        return self._binary_op(\n            \"rsub\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    def rtruediv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `rtruediv`).\n        \"\"\"\n        return self._binary_op(\n            \"rtruediv\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    rdiv: Self = rtruediv\n\n    def sample(\n        self,\n        n: int | None = None,\n        frac: float | None = None,\n        replace: bool = False,\n        weights=None,\n        random_state: RandomState | None = None,\n        axis: Axis | None = None,\n        ignore_index: bool = False,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return a random sample of items from an axis of object.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        if axis:\n            axis_labels = self.columns\n            axis_length = len(axis_labels)\n        else:\n            # Getting rows requires indices instead of labels. RangeIndex provides this.\n            axis_labels = pandas.RangeIndex(len(self))\n            axis_length = len(axis_labels)\n        if weights is not None:\n            # Index of the weights Series should correspond to the index of the\n            # Dataframe in order to sample\n            if isinstance(weights, BasePandasDataset):\n                weights = weights.reindex(self._get_axis(axis))\n            # If weights arg is a string, the weights used for sampling will\n            # the be values in the column corresponding to that string\n            if isinstance(weights, str):\n                if axis == 0:\n                    try:\n                        weights = self[weights]\n                    except KeyError:\n                        raise KeyError(\"String passed to weights not a valid column\")\n                else:\n                    raise ValueError(\n                        \"Strings can only be passed to \"\n                        + \"weights when sampling from rows on \"\n                        + \"a DataFrame\"\n                    )\n            weights = pandas.Series(weights, dtype=\"float64\")\n\n            if len(weights) != axis_length:\n                raise ValueError(\n                    \"Weights and axis to be sampled must be of same length\"\n                )\n            if (weights == np.inf).any() or (weights == -np.inf).any():\n                raise ValueError(\"weight vector may not include `inf` values\")\n            if (weights < 0).any():\n                raise ValueError(\"weight vector many not include negative values\")\n            # weights cannot be NaN when sampling, so we must set all nan\n            # values to 0\n            weights = weights.fillna(0)\n            # If passed in weights are not equal to 1, renormalize them\n            # otherwise numpy sampling function will error\n            weights_sum = weights.sum()\n            if weights_sum != 1:\n                if weights_sum != 0:\n                    weights = weights / weights_sum\n                else:\n                    raise ValueError(\"Invalid weights: weights sum to zero\")\n            weights = weights.values\n\n        if n is None and frac is None:\n            # default to n = 1 if n and frac are both None (in accordance with\n            # pandas specification)\n            n = 1\n        elif n is not None and frac is None and n % 1 != 0:\n            # n must be an integer\n            raise ValueError(\"Only integers accepted as `n` values\")\n        elif n is None and frac is not None:\n            # compute the number of samples based on frac\n            n = int(round(frac * axis_length))\n        elif n is not None and frac is not None:\n            # Pandas specification does not allow both n and frac to be passed\n            # in\n            raise ValueError(\"Please enter a value for `frac` OR `n`, not both\")\n        if n < 0:\n            raise ValueError(\n                \"A negative number of rows requested. Please provide positive value.\"\n            )\n        if n == 0:\n            # This returns an empty object, and since it is a weird edge case that\n            # doesn't need to be distributed, we default to pandas for n=0.\n            # We don't need frac to be set to anything since n is already 0.\n            return self._default_to_pandas(\n                \"sample\",\n                n=n,\n                frac=None,\n                replace=replace,\n                weights=weights,\n                random_state=random_state,\n                axis=axis,\n                ignore_index=ignore_index,\n            )\n        if random_state is not None:\n            # Get a random number generator depending on the type of\n            # random_state that is passed in\n            if isinstance(random_state, int):\n                random_num_gen = np.random.RandomState(random_state)\n            elif isinstance(random_state, np.random.RandomState):\n                random_num_gen = random_state\n            else:\n                # random_state must be an int or a numpy RandomState object\n                raise ValueError(\n                    \"Please enter an `int` OR a \"\n                    + \"np.random.RandomState for random_state\"\n                )\n            # choose random numbers and then get corresponding labels from\n            # chosen axis\n            sample_indices = random_num_gen.choice(\n                np.arange(0, axis_length), size=n, replace=replace, p=weights\n            )\n            samples = axis_labels[sample_indices]\n        else:\n            # randomly select labels from chosen axis\n            samples = np.random.choice(\n                a=axis_labels, size=n, replace=replace, p=weights\n            )\n        if axis:\n            query_compiler = self._query_compiler.getitem_column_array(samples)\n            return self.__constructor__(query_compiler=query_compiler)\n        else:\n            query_compiler = self._query_compiler.getitem_row_array(samples)\n            return self.__constructor__(query_compiler=query_compiler)\n\n    def sem(\n        self,\n        axis: Axis = 0,\n        skipna: bool = True,\n        ddof: int = 1,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return unbiased standard error of the mean over requested axis.\n        \"\"\"\n        return self._stat_operation(\n            \"sem\", axis, skipna, numeric_only, ddof=ddof, **kwargs\n        )\n\n    def mean(\n        self,\n        axis: Axis = 0,\n        skipna=True,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the mean of the values over the requested axis.\n        \"\"\"\n        return self._stat_operation(\"mean\", axis, skipna, numeric_only, **kwargs)\n\n    def median(\n        self,\n        axis: Axis = 0,\n        skipna=True,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the mean of the values over the requested axis.\n        \"\"\"\n        return self._stat_operation(\"median\", axis, skipna, numeric_only, **kwargs)\n\n    def set_axis(\n        self,\n        labels,\n        *,\n        axis: Axis = 0,\n        copy=None,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Assign desired index to given axis.\n        \"\"\"\n        if copy is None:\n            copy = True\n        obj = self.copy() if copy else self\n        setattr(obj, pandas.DataFrame._get_axis_name(axis), labels)\n        return obj\n\n    def set_flags(\n        self, *, copy: bool = False, allows_duplicate_labels: Optional[bool] = None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return a new `BasePandasDataset` with updated flags.\n        \"\"\"\n        return self._default_to_pandas(\n            pandas.DataFrame.set_flags,\n            copy=copy,\n            allows_duplicate_labels=allows_duplicate_labels,\n        )\n\n    @property\n    def flags(self):\n        return self._default_to_pandas(lambda df: df.flags)\n\n    def shift(\n        self,\n        periods: int = 1,\n        freq=None,\n        axis: Axis = 0,\n        fill_value: Hashable = lib.no_default,\n        suffix=None,\n    ) -> Self | DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Shift index by desired number of periods with an optional time `freq`.\n        \"\"\"\n        if suffix:\n            return self._default_to_pandas(\n                lambda df: df.shift(\n                    periods=periods,\n                    freq=freq,\n                    axis=axis,\n                    fill_value=fill_value,\n                    suffix=suffix,\n                )\n            )\n\n        if freq is not None and fill_value is not lib.no_default:\n            raise ValueError(\n                \"Cannot pass both 'freq' and 'fill_value' to \"\n                + f\"{type(self).__name__}.shift\"\n            )\n\n        if periods == 0:\n            # Check obvious case first\n            return self.copy()\n        return self._create_or_update_from_compiler(\n            new_query_compiler=self._query_compiler.shift(\n                periods, freq, axis, fill_value\n            ),\n            inplace=False,\n        )\n\n    def skew(\n        self,\n        axis: Axis = 0,\n        skipna: bool = True,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return unbiased skew over requested axis.\n        \"\"\"\n        return self._stat_operation(\"skew\", axis, skipna, numeric_only, **kwargs)\n\n    def sort_index(\n        self,\n        *,\n        axis=0,\n        level=None,\n        ascending=True,\n        inplace=False,\n        kind=\"quicksort\",\n        na_position=\"last\",\n        sort_remaining=True,\n        ignore_index: bool = False,\n        key: Optional[IndexKeyFunc] = None,\n    ) -> Self | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Sort object by labels (along an axis).\n        \"\"\"\n        # pandas throws this exception. See pandas issie #39434\n        if ascending is None:\n            raise ValueError(\n                \"the `axis` parameter is not supported in the pandas implementation of argsort()\"\n            )\n        axis = self._get_axis_number(axis)\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        new_query_compiler = self._query_compiler.sort_index(\n            axis=axis,\n            level=level,\n            ascending=ascending,\n            inplace=inplace,\n            kind=kind,\n            na_position=na_position,\n            sort_remaining=sort_remaining,\n            ignore_index=ignore_index,\n            key=key,\n        )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def sort_values(\n        self,\n        by,\n        *,\n        axis=0,\n        ascending=True,\n        inplace: bool = False,\n        kind=\"quicksort\",\n        na_position=\"last\",\n        ignore_index: bool = False,\n        key: Optional[IndexKeyFunc] = None,\n    ) -> Self | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Sort by the values along either axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        ascending = validate_ascending(ascending)\n        if axis == 0:\n            result = self._query_compiler.sort_rows_by_column_values(\n                by,\n                ascending=ascending,\n                kind=kind,\n                na_position=na_position,\n                ignore_index=ignore_index,\n                key=key,\n            )\n        else:\n            result = self._query_compiler.sort_columns_by_row_values(\n                by,\n                ascending=ascending,\n                kind=kind,\n                na_position=na_position,\n                ignore_index=ignore_index,\n                key=key,\n            )\n        return self._create_or_update_from_compiler(result, inplace)\n\n    def std(\n        self,\n        axis: Axis = 0,\n        skipna: bool = True,\n        ddof: int = 1,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return sample standard deviation over requested axis.\n        \"\"\"\n        return self._stat_operation(\n            \"std\", axis, skipna, numeric_only, ddof=ddof, **kwargs\n        )\n\n    def sub(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `sub`).\n        \"\"\"\n        return self._binary_op(\n            \"sub\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    subtract: Self = sub\n\n    def swapaxes(self, axis1, axis2, copy=None) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Interchange axes and swap values axes appropriately.\n        \"\"\"\n        if copy is None:\n            copy = True\n        axis1 = self._get_axis_number(axis1)\n        axis2 = self._get_axis_number(axis2)\n        if axis1 != axis2:\n            return self.transpose()\n        if copy:\n            return self.copy()\n        return self\n\n    def swaplevel(self, i=-2, j=-1, axis=0) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Swap levels `i` and `j` in a `MultiIndex`.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        idx = self.index if axis == 0 else self.columns\n        return self.set_axis(idx.swaplevel(i, j), axis=axis)\n\n    def tail(self, n=5) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the last `n` rows.\n        \"\"\"\n        if n != 0:\n            return self.iloc[-n:]\n        return self.iloc[len(self) :]\n\n    def take(self, indices, axis=0, **kwargs) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the elements in the given *positional* indices along an axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        slice_obj = indices if axis == 0 else (slice(None), indices)\n        return self.iloc[slice_obj]\n\n    def to_clipboard(\n        self, excel=True, sep=None, **kwargs\n    ):  # pragma: no cover  # noqa: PR01, RT01, D200\n        \"\"\"\n        Copy object to the system clipboard.\n        \"\"\"\n        return self._default_to_pandas(\"to_clipboard\", excel=excel, sep=sep, **kwargs)\n\n    @expanduser_path_arg(\"path_or_buf\")\n    def to_csv(\n        self,\n        path_or_buf=None,\n        sep=\",\",\n        na_rep=\"\",\n        float_format=None,\n        columns=None,\n        header=True,\n        index=True,\n        index_label=None,\n        mode=\"w\",\n        encoding=None,\n        compression=\"infer\",\n        quoting=None,\n        quotechar='\"',\n        lineterminator=None,\n        chunksize=None,\n        date_format=None,\n        doublequote=True,\n        escapechar=None,\n        decimal=\".\",\n        errors: str = \"strict\",\n        storage_options: StorageOptions = None,\n    ) -> str | None:  # pragma: no cover\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        return FactoryDispatcher.to_csv(\n            self._query_compiler,\n            path_or_buf=path_or_buf,\n            sep=sep,\n            na_rep=na_rep,\n            float_format=float_format,\n            columns=columns,\n            header=header,\n            index=index,\n            index_label=index_label,\n            mode=mode,\n            encoding=encoding,\n            compression=compression,\n            quoting=quoting,\n            quotechar=quotechar,\n            lineterminator=lineterminator,\n            chunksize=chunksize,\n            date_format=date_format,\n            doublequote=doublequote,\n            escapechar=escapechar,\n            decimal=decimal,\n            errors=errors,\n            storage_options=storage_options,\n        )\n\n    @expanduser_path_arg(\"excel_writer\")\n    def to_excel(\n        self,\n        excel_writer,\n        sheet_name=\"Sheet1\",\n        na_rep=\"\",\n        float_format=None,\n        columns=None,\n        header=True,\n        index=True,\n        index_label=None,\n        startrow=0,\n        startcol=0,\n        engine=None,\n        merge_cells=True,\n        inf_rep=\"inf\",\n        freeze_panes=None,\n        storage_options: StorageOptions = None,\n        engine_kwargs=None,\n    ) -> None:  # pragma: no cover  # noqa: PR01, RT01, D200\n        \"\"\"\n        Write object to an Excel sheet.\n        \"\"\"\n        return self._default_to_pandas(\n            \"to_excel\",\n            excel_writer,\n            sheet_name=sheet_name,\n            na_rep=na_rep,\n            float_format=float_format,\n            columns=columns,\n            header=header,\n            index=index,\n            index_label=index_label,\n            startrow=startrow,\n            startcol=startcol,\n            engine=engine,\n            merge_cells=merge_cells,\n            inf_rep=inf_rep,\n            freeze_panes=freeze_panes,\n            storage_options=storage_options,\n            engine_kwargs=engine_kwargs,\n        )\n\n    def to_dict(self, orient=\"dict\", into=dict, index=True) -> dict:\n        return self._query_compiler.dataframe_to_dict(orient, into, index)\n\n    @expanduser_path_arg(\"path_or_buf\")\n    def to_hdf(\n        self,\n        path_or_buf,\n        key: str,\n        mode: Literal[\"a\", \"w\", \"r+\"] = \"a\",\n        complevel: int | None = None,\n        complib: Literal[\"zlib\", \"lzo\", \"bzip2\", \"blosc\"] | None = None,\n        append: bool = False,\n        format: Literal[\"fixed\", \"table\"] | None = None,\n        index: bool = True,\n        min_itemsize: int | dict[str, int] | None = None,\n        nan_rep=None,\n        dropna: bool | None = None,\n        data_columns: Literal[True] | list[str] | None = None,\n        errors: str = \"strict\",\n        encoding: str = \"UTF-8\",\n    ) -> None:  # pragma: no cover  # noqa: PR01, RT01, D200\n        \"\"\"\n        Write the contained data to an HDF5 file using HDFStore.\n        \"\"\"\n        return self._default_to_pandas(\n            \"to_hdf\",\n            path_or_buf,\n            key=key,\n            mode=mode,\n            complevel=complevel,\n            complib=complib,\n            append=append,\n            format=format,\n            index=index,\n            min_itemsize=min_itemsize,\n            nan_rep=nan_rep,\n            dropna=dropna,\n            data_columns=data_columns,\n            errors=errors,\n            encoding=encoding,\n        )\n\n    @expanduser_path_arg(\"path_or_buf\")\n    def to_json(\n        self,\n        path_or_buf=None,\n        orient=None,\n        date_format=None,\n        double_precision=10,\n        force_ascii=True,\n        date_unit=\"ms\",\n        default_handler=None,\n        lines=False,\n        compression=\"infer\",\n        index=None,\n        indent=None,\n        storage_options: StorageOptions = None,\n        mode=\"w\",\n    ) -> str | None:  # pragma: no cover  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert the object to a JSON string.\n        \"\"\"\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        return FactoryDispatcher.to_json(\n            self._query_compiler,\n            path_or_buf,\n            orient=orient,\n            date_format=date_format,\n            double_precision=double_precision,\n            force_ascii=force_ascii,\n            date_unit=date_unit,\n            default_handler=default_handler,\n            lines=lines,\n            compression=compression,\n            index=index,\n            indent=indent,\n            storage_options=storage_options,\n            mode=mode,\n        )\n\n    @expanduser_path_arg(\"buf\")\n    def to_latex(\n        self,\n        buf=None,\n        columns=None,\n        header=True,\n        index=True,\n        na_rep=\"NaN\",\n        formatters=None,\n        float_format=None,\n        sparsify=None,\n        index_names=True,\n        bold_rows=False,\n        column_format=None,\n        longtable=None,\n        escape=None,\n        encoding=None,\n        decimal=\".\",\n        multicolumn=None,\n        multicolumn_format=None,\n        multirow=None,\n        caption=None,\n        label=None,\n        position=None,\n    ) -> str | None:  # pragma: no cover  # noqa: PR01, RT01, D200\n        \"\"\"\n        Render object to a LaTeX tabular, longtable, or nested table.\n        \"\"\"\n        return self._default_to_pandas(\n            \"to_latex\",\n            buf=buf,\n            columns=columns,\n            header=header,\n            index=index,\n            na_rep=na_rep,\n            formatters=formatters,\n            float_format=float_format,\n            sparsify=sparsify,\n            index_names=index_names,\n            bold_rows=bold_rows,\n            column_format=column_format,\n            longtable=longtable,\n            escape=escape,\n            encoding=encoding,\n            decimal=decimal,\n            multicolumn=multicolumn,\n            multicolumn_format=multicolumn_format,\n            multirow=multirow,\n            caption=caption,\n            label=label,\n            position=position,\n        )\n\n    @expanduser_path_arg(\"buf\")\n    def to_markdown(\n        self,\n        buf=None,\n        mode: str = \"wt\",\n        index: bool = True,\n        storage_options: StorageOptions = None,\n        **kwargs,\n    ) -> str:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Print `BasePandasDataset` in Markdown-friendly format.\n        \"\"\"\n        return self._default_to_pandas(\n            \"to_markdown\",\n            buf=buf,\n            mode=mode,\n            index=index,\n            storage_options=storage_options,\n            **kwargs,\n        )\n\n    @expanduser_path_arg(\"path\")\n    def to_pickle(\n        self,\n        path,\n        compression: CompressionOptions = \"infer\",\n        protocol: int = pkl.HIGHEST_PROTOCOL,\n        storage_options: StorageOptions = None,\n    ) -> None:  # pragma: no cover  # noqa: PR01, D200\n        \"\"\"\n        Pickle (serialize) object to file.\n        \"\"\"\n        from modin.pandas import to_pickle\n\n        to_pickle(\n            self,\n            path,\n            compression=compression,\n            protocol=protocol,\n            storage_options=storage_options,\n        )\n\n    def _to_bare_numpy(\n        self, dtype=None, copy=False, na_value=lib.no_default\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert the `BasePandasDataset` to a NumPy array.\n        \"\"\"\n        return self._query_compiler.to_numpy(\n            dtype=dtype,\n            copy=copy,\n            na_value=na_value,\n        )\n\n    def to_numpy(\n        self, dtype=None, copy=False, na_value=lib.no_default\n    ) -> np.ndarray:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert the `BasePandasDataset` to a NumPy array or a Modin wrapper for NumPy array.\n        \"\"\"\n        from modin.config import ModinNumpy\n\n        if ModinNumpy.get():\n            from ..numpy.arr import array\n\n            return array(self, copy=copy)\n\n        return self._to_bare_numpy(\n            dtype=dtype,\n            copy=copy,\n            na_value=na_value,\n        )\n\n    # TODO(williamma12): When this gets implemented, have the series one call this.\n    def to_period(\n        self, freq=None, axis=0, copy=None\n    ) -> Self:  # pragma: no cover  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert `BasePandasDataset` from DatetimeIndex to PeriodIndex.\n        \"\"\"\n        return self._default_to_pandas(\"to_period\", freq=freq, axis=axis, copy=copy)\n\n    @expanduser_path_arg(\"buf\")\n    def to_string(\n        self,\n        buf=None,\n        columns=None,\n        col_space=None,\n        header=True,\n        index=True,\n        na_rep=\"NaN\",\n        formatters=None,\n        float_format=None,\n        sparsify=None,\n        index_names=True,\n        justify=None,\n        max_rows=None,\n        min_rows=None,\n        max_cols=None,\n        show_dimensions=False,\n        decimal=\".\",\n        line_width=None,\n        max_colwidth=None,\n        encoding=None,\n    ) -> str | None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Render a `BasePandasDataset` to a console-friendly tabular output.\n        \"\"\"\n        return self._default_to_pandas(\n            \"to_string\",\n            buf=buf,\n            columns=columns,\n            col_space=col_space,\n            header=header,\n            index=index,\n            na_rep=na_rep,\n            formatters=formatters,\n            float_format=float_format,\n            sparsify=sparsify,\n            index_names=index_names,\n            justify=justify,\n            max_rows=max_rows,\n            max_cols=max_cols,\n            show_dimensions=show_dimensions,\n            decimal=decimal,\n            line_width=line_width,\n            max_colwidth=max_colwidth,\n            encoding=encoding,\n        )\n\n    def to_sql(\n        self,\n        name,\n        con,\n        schema=None,\n        if_exists=\"fail\",\n        index=True,\n        index_label=None,\n        chunksize=None,\n        dtype=None,\n        method=None,\n    ) -> int | None:  # noqa: PR01, D200\n        \"\"\"\n        Write records stored in a `BasePandasDataset` to a SQL database.\n        \"\"\"\n        new_query_compiler = self._query_compiler\n        # writing the index to the database by inserting it to the DF\n        if index:\n            new_query_compiler = new_query_compiler.reset_index()\n            if index_label is not None:\n                if not is_list_like(index_label):\n                    index_label = [index_label]\n                new_query_compiler.columns = list(index_label) + list(\n                    new_query_compiler.columns[len(index_label) :]\n                )\n            # so pandas._to_sql will not write the index to the database as well\n            index = False\n\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        FactoryDispatcher.to_sql(\n            new_query_compiler,\n            name=name,\n            con=con,\n            schema=schema,\n            if_exists=if_exists,\n            index=index,\n            index_label=index_label,\n            chunksize=chunksize,\n            dtype=dtype,\n            method=method,\n        )\n\n    # TODO(williamma12): When this gets implemented, have the series one call this.\n    def to_timestamp(\n        self, freq=None, how=\"start\", axis=0, copy=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Cast to DatetimeIndex of timestamps, at *beginning* of period.\n        \"\"\"\n        return self._default_to_pandas(\n            \"to_timestamp\", freq=freq, how=how, axis=axis, copy=copy\n        )\n\n    def to_xarray(self):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return an xarray object from the `BasePandasDataset`.\n        \"\"\"\n        return self._default_to_pandas(\"to_xarray\")\n\n    def truediv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `truediv`).\n        \"\"\"\n        return self._binary_op(\n            \"truediv\", other, axis=axis, level=level, fill_value=fill_value\n        )\n\n    div: Self = truediv\n    divide: Self = truediv\n\n    def truncate(\n        self, before=None, after=None, axis=None, copy=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Truncate a `BasePandasDataset` before and after some index value.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        if (\n            not self._get_axis(axis).is_monotonic_increasing\n            and not self._get_axis(axis).is_monotonic_decreasing\n        ):\n            raise ValueError(\"truncate requires a sorted index\")\n\n        if before is not None and after is not None and before > after:\n            raise ValueError(f\"Truncate: {after} must be after {before}\")\n\n        s = slice(*self._get_axis(axis).slice_locs(before, after))\n        slice_obj = s if axis == 0 else (slice(None), s)\n        return self.iloc[slice_obj]\n\n    def transform(\n        self, func, axis=0, *args, **kwargs\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.\n        \"\"\"\n        kwargs[\"is_transform\"] = True\n        self._validate_function(func)\n        try:\n            result = self.agg(func, axis=axis, *args, **kwargs)\n        except (TypeError, pandas.errors.SpecificationError):\n            raise\n        except Exception as err:\n            raise ValueError(\"Transform function failed\") from err\n        if getattr(result, \"_pandas_class\", None) not in (\n            pandas.Series,\n            pandas.DataFrame,\n        ) or not result.index.equals(self.index):\n            raise ValueError(\"Function did not transform\")\n        return result\n\n    def tz_convert(\n        self, tz, axis=0, level=None, copy=None\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert tz-aware axis to target time zone.\n        \"\"\"\n        if copy is None:\n            copy = True\n        return self._create_or_update_from_compiler(\n            self._query_compiler.tz_convert(\n                tz, axis=self._get_axis_number(axis), level=level, copy=copy\n            ),\n            inplace=(not copy),\n        )\n\n    def tz_localize(\n        self, tz, axis=0, level=None, copy=None, ambiguous=\"raise\", nonexistent=\"raise\"\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Localize tz-naive index of a `BasePandasDataset` to target time zone.\n        \"\"\"\n        if copy is None:\n            copy = True\n        return self._create_or_update_from_compiler(\n            self._query_compiler.tz_localize(\n                tz,\n                axis=self._get_axis_number(axis),\n                level=level,\n                copy=copy,\n                ambiguous=ambiguous,\n                nonexistent=nonexistent,\n            ),\n            inplace=(not copy),\n        )\n\n    def interpolate(\n        self,\n        method=\"linear\",\n        *,\n        axis=0,\n        limit=None,\n        inplace=False,\n        limit_direction: Optional[str] = None,\n        limit_area=None,\n        downcast=lib.no_default,\n        **kwargs,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        if downcast is not lib.no_default:\n            warnings.warn(\n                f\"The 'downcast' keyword in {type(self).__name__}.interpolate \"\n                + \"is deprecated and will be removed in a future version. \"\n                + \"Call result.infer_objects(copy=False) on the result instead.\",\n                FutureWarning,\n            )\n        else:\n            downcast = None\n\n        return self._create_or_update_from_compiler(\n            self._query_compiler.interpolate(\n                method=method,\n                axis=axis,\n                limit=limit,\n                inplace=False,\n                limit_direction=limit_direction,\n                limit_area=limit_area,\n                downcast=downcast,\n                **kwargs,\n            ),\n            inplace=inplace,\n        )\n\n    # TODO: uncomment the following lines when #3331 issue will be closed\n    # @prepend_to_notes(\n    #     \"\"\"\n    #     In comparison with pandas, Modin's ``value_counts`` returns Series with ``MultiIndex``\n    #     only if multiple columns were passed via the `subset` parameter, otherwise, the resulted\n    #     Series's index will be a regular single dimensional ``Index``.\n    #     \"\"\"\n    # )\n    @_inherit_docstrings(\n        pandas.DataFrame.value_counts, apilink=\"pandas.DataFrame.value_counts\"\n    )\n    def value_counts(\n        self,\n        subset: Sequence[Hashable] | None = None,\n        normalize: bool = False,\n        sort: bool = True,\n        ascending: bool = False,\n        dropna: bool = True,\n    ) -> Series:\n        if subset is None:\n            subset = self._query_compiler.columns\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\".*groupby keys will be sorted anyway.*\",\n                category=UserWarning,\n            )\n            counted_values = self.groupby(\n                by=subset, dropna=dropna, observed=True, sort=False\n            ).size()\n        if sort:\n            if counted_values.name is None:\n                counted_values.name = 0\n            by = counted_values.name\n            result = counted_values._query_compiler.sort_rows_by_column_values(\n                columns=by,\n                ascending=ascending,\n            )\n            counted_values = self._create_or_update_from_compiler(result)\n            if isinstance(counted_values, pd.DataFrame):\n                counted_values = counted_values.squeeze(axis=1)\n        if normalize:\n            counted_values = counted_values / counted_values.sum()\n        # TODO: uncomment when strict compability mode will be implemented:\n        # https://github.com/modin-project/modin/issues/3411\n        # if STRICT_COMPABILITY and not isinstance(counted_values.index, MultiIndex):\n        #     counted_values.index = pandas.MultiIndex.from_arrays(\n        #         [counted_values.index], names=counted_values.index.names\n        #     )\n        # https://pandas.pydata.org/pandas-docs/version/2.0/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count\n        counted_values.name = \"proportion\" if normalize else \"count\"\n        return counted_values\n\n    def var(\n        self,\n        axis: Axis = 0,\n        skipna: bool = True,\n        ddof: int = 1,\n        numeric_only=False,\n        **kwargs,\n    ) -> Series | float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return unbiased variance over requested axis.\n        \"\"\"\n        return self._stat_operation(\n            \"var\", axis, skipna, numeric_only, ddof=ddof, **kwargs\n        )\n\n    def __abs__(self) -> Self:\n        \"\"\"\n        Return a `BasePandasDataset` with absolute numeric value of each element.\n\n        Returns\n        -------\n        BasePandasDataset\n            Object containing the absolute value of each element.\n        \"\"\"\n        return self.abs()\n\n    @_doc_binary_op(\n        operation=\"union\", bin_op=\"and\", right=\"other\", **_doc_binary_op_kwargs\n    )\n    def __and__(self, other) -> Self:\n        return self._binary_op(\"__and__\", other, axis=0)\n\n    @_doc_binary_op(\n        operation=\"union\", bin_op=\"rand\", right=\"other\", **_doc_binary_op_kwargs\n    )\n    def __rand__(self, other) -> Self:\n        return self._binary_op(\"__rand__\", other, axis=0)\n\n    def __array__(\n        self, dtype: npt.DTypeLike | None = None, copy: bool | None = None\n    ) -> np.ndarray:\n        \"\"\"\n        Return the values as a NumPy array.\n\n        Parameters\n        ----------\n        dtype : str or np.dtype, optional\n            The dtype of returned array.\n        copy : bool, default: None\n            This parameter has no effect; the method always returns a copy of\n            the data.\n\n        Returns\n        -------\n        arr : np.ndarray\n            NumPy representation of Modin object.\n        \"\"\"\n        return self._to_bare_numpy(dtype)\n\n    def __copy__(self, deep=True) -> Self:\n        \"\"\"\n        Return the copy of the `BasePandasDataset`.\n\n        Parameters\n        ----------\n        deep : bool, default: True\n            Whether the copy should be deep or not.\n\n        Returns\n        -------\n        BasePandasDataset\n        \"\"\"\n        return self.copy(deep=deep)\n\n    def __deepcopy__(self, memo=None) -> Self:\n        \"\"\"\n        Return the deep copy of the `BasePandasDataset`.\n\n        Parameters\n        ----------\n        memo : Any, optional\n           Deprecated parameter.\n\n        Returns\n        -------\n        BasePandasDataset\n        \"\"\"\n        return self.copy(deep=True)\n\n    @_doc_binary_op(\n        operation=\"equality comparison\",\n        bin_op=\"eq\",\n        right=\"other\",\n        **_doc_binary_op_kwargs,\n    )\n    def __eq__(self, other) -> Self:\n        return self.eq(other)\n\n    def __finalize__(self, other, method=None, **kwargs) -> Self:\n        \"\"\"\n        Propagate metadata from `other` to `self`.\n\n        Parameters\n        ----------\n        other : BasePandasDataset\n            The object from which to get the attributes that we are going\n            to propagate.\n        method : str, optional\n            A passed method name providing context on where `__finalize__`\n            was called.\n        **kwargs : dict\n            Additional keywords arguments to be passed to `__finalize__`.\n\n        Returns\n        -------\n        BasePandasDataset\n        \"\"\"\n        return self._default_to_pandas(\"__finalize__\", other, method=method, **kwargs)\n\n    @_doc_binary_op(\n        operation=\"greater than or equal comparison\",\n        bin_op=\"ge\",\n        right=\"right\",\n        **_doc_binary_op_kwargs,\n    )\n    def __ge__(self, right) -> Self:\n        return self.ge(right)\n\n    def __getitem__(self, key) -> Self:\n        \"\"\"\n        Retrieve dataset according to `key`.\n\n        Parameters\n        ----------\n        key : callable, scalar, slice, str or tuple\n            The global row index to retrieve data from.\n\n        Returns\n        -------\n        BasePandasDataset\n            Located dataset.\n        \"\"\"\n        if not self._query_compiler.lazy_row_count and len(self) == 0:\n            return self._default_to_pandas(\"__getitem__\", key)\n        # see if we can slice the rows\n        # This lets us reuse code in pandas to error check\n        indexer = None\n        if isinstance(key, slice):\n            indexer = self.index._convert_slice_indexer(key, kind=\"getitem\")\n        if indexer is not None:\n            return self._getitem_slice(indexer)\n        else:\n            return self._getitem(key)\n\n    def xs(\n        self,\n        key,\n        axis=0,\n        level=None,\n        drop_level: bool = True,\n    ) -> Self:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return cross-section from the Series/DataFrame.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        labels = self.columns if axis else self.index\n\n        if isinstance(key, list):\n            # deprecated in pandas, to be removed in 2.0\n            warnings.warn(\n                \"Passing lists as key for xs is deprecated and will be removed in a \"\n                + \"future version. Pass key as a tuple instead.\",\n                FutureWarning,\n            )\n\n        if level is not None:\n            if not isinstance(labels, pandas.MultiIndex):\n                raise TypeError(\"Index must be a MultiIndex\")\n            loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level)\n\n            # create the tuple of the indexer\n            _indexer = [slice(None)] * self.ndim\n            _indexer[axis] = loc\n            indexer = tuple(_indexer)\n\n            result = self.iloc[indexer]\n            setattr(result, self._pandas_class._get_axis_name(axis), new_ax)\n            return result\n\n        if axis == 1:\n            if drop_level:\n                return self[key]\n            index = self.columns\n        else:\n            index = self.index\n\n        new_index = None\n        if isinstance(index, pandas.MultiIndex):\n            loc, new_index = index._get_loc_level(key, level=0)\n            if not drop_level:\n                if is_integer(loc):\n                    new_index = index[loc : loc + 1]\n                else:\n                    new_index = index[loc]\n        else:\n            loc = index.get_loc(key)\n\n            if isinstance(loc, np.ndarray):\n                if loc.dtype == np.bool_:\n                    (loc,) = loc.nonzero()\n                # Note: pandas uses self._take_with_is_copy here\n                return self.take(loc, axis=axis)\n\n            if not is_scalar(loc):\n                new_index = index[loc]\n\n        if is_scalar(loc) and axis == 0:\n            # In this case loc should be an integer\n            if self.ndim == 1:\n                # if we encounter an array-like and we only have 1 dim\n                # that means that their are list/ndarrays inside the Series!\n                # so just return them (pandas GH 6394)\n                return self.iloc[loc]\n\n            result = self.iloc[loc]\n        elif is_scalar(loc):\n            result = self.iloc[:, slice(loc, loc + 1)]\n        elif axis == 1:\n            result = self.iloc[:, loc]\n        else:\n            result = self.iloc[loc]\n            if new_index is None:\n                raise RuntimeError(\n                    \"`new_index` variable shouldn't be equal to None here, something went wrong.\"\n                )\n            result.index = new_index\n\n        # Note: pandas does result._set_is_copy here\n        return result\n\n    __hash__ = None\n\n    def _setitem_slice(self, key: slice, value) -> None:\n        \"\"\"\n        Set rows specified by `key` slice with `value`.\n\n        Parameters\n        ----------\n        key : location or index-based slice\n            Key that points rows to modify.\n        value : object\n            Value to assing to the rows.\n        \"\"\"\n        indexer = self.index._convert_slice_indexer(key, kind=\"getitem\")\n        self.iloc[indexer] = value\n\n    def _getitem_slice(self, key: slice) -> Self:\n        \"\"\"\n        Get rows specified by `key` slice.\n\n        Parameters\n        ----------\n        key : location or index-based slice\n            Key that points to rows to retrieve.\n\n        Returns\n        -------\n        modin.pandas.BasePandasDataset\n            Selected rows.\n        \"\"\"\n        if is_full_grab_slice(\n            key,\n            # Avoid triggering shape computation for lazy executions\n            sequence_len=(None if self._query_compiler.lazy_row_count else len(self)),\n        ):\n            return self.copy()\n        return self.iloc[key]\n\n    @_doc_binary_op(\n        operation=\"greater than comparison\",\n        bin_op=\"gt\",\n        right=\"right\",\n        **_doc_binary_op_kwargs,\n    )\n    def __gt__(self, right) -> Self:\n        return self.gt(right)\n\n    def __invert__(self) -> Self:\n        \"\"\"\n        Apply bitwise inverse to each element of the `BasePandasDataset`.\n\n        Returns\n        -------\n        BasePandasDataset\n            New BasePandasDataset containing bitwise inverse to each value.\n        \"\"\"\n        if not all(is_bool_dtype(d) or is_integer_dtype(d) for d in self._get_dtypes()):\n            raise TypeError(\n                \"bad operand type for unary ~: '{}'\".format(\n                    next(\n                        d\n                        for d in self._get_dtypes()\n                        if not (is_bool_dtype(d) or is_integer_dtype(d))\n                    )\n                )\n            )\n        return self.__constructor__(query_compiler=self._query_compiler.invert())\n\n    @_doc_binary_op(\n        operation=\"less than or equal comparison\",\n        bin_op=\"le\",\n        right=\"right\",\n        **_doc_binary_op_kwargs,\n    )\n    def __le__(self, right) -> Self:\n        return self.le(right)\n\n    def __len__(self) -> int:\n        \"\"\"\n        Return length of info axis.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        return self._query_compiler.get_axis_len(0)\n\n    @_doc_binary_op(\n        operation=\"less than comparison\",\n        bin_op=\"lt\",\n        right=\"right\",\n        **_doc_binary_op_kwargs,\n    )\n    def __lt__(self, right) -> Self:\n        return self.lt(right)\n\n    def __matmul__(self, other) -> Self | np.ndarray | Scalar:\n        \"\"\"\n        Compute the matrix multiplication between the `BasePandasDataset` and `other`.\n\n        Parameters\n        ----------\n        other : BasePandasDataset or array-like\n            The other object to compute the matrix product with.\n\n        Returns\n        -------\n        BasePandasDataset, np.ndarray or scalar\n        \"\"\"\n        return self.dot(other)\n\n    @_doc_binary_op(\n        operation=\"not equal comparison\",\n        bin_op=\"ne\",\n        right=\"other\",\n        **_doc_binary_op_kwargs,\n    )\n    def __ne__(self, other) -> Self:\n        return self.ne(other)\n\n    def __neg__(self) -> Self:\n        \"\"\"\n        Change the sign for every value of self.\n\n        Returns\n        -------\n        BasePandasDataset\n        \"\"\"\n        self._validate_dtypes(numeric_only=True)\n        return self.__constructor__(query_compiler=self._query_compiler.negative())\n\n    def __nonzero__(self):\n        \"\"\"\n        Evaluate `BasePandasDataset` as boolean object.\n\n        Raises\n        ------\n        ValueError\n            Always since truth value for self is ambiguous.\n        \"\"\"\n        raise ValueError(\n            f\"The truth value of a {self.__class__.__name__} is ambiguous. \"\n            + \"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\n        )\n\n    __bool__ = __nonzero__\n\n    @_doc_binary_op(\n        operation=\"disjunction\",\n        bin_op=\"or\",\n        right=\"other\",\n        **_doc_binary_op_kwargs,\n    )\n    def __or__(self, other) -> Self:\n        return self._binary_op(\"__or__\", other, axis=0)\n\n    @_doc_binary_op(\n        operation=\"disjunction\",\n        bin_op=\"ror\",\n        right=\"other\",\n        **_doc_binary_op_kwargs,\n    )\n    def __ror__(self, other) -> Self:\n        return self._binary_op(\"__ror__\", other, axis=0)\n\n    def __sizeof__(self) -> int:\n        \"\"\"\n        Generate the total memory usage for an `BasePandasDataset`.\n\n        Returns\n        -------\n        int\n        \"\"\"\n        return self._query_compiler.sizeof()\n\n    def __str__(self) -> str:  # pragma: no cover\n        \"\"\"\n        Return str(self).\n\n        Returns\n        -------\n        str\n        \"\"\"\n        return repr(self)\n\n    @_doc_binary_op(\n        operation=\"exclusive disjunction\",\n        bin_op=\"xor\",\n        right=\"other\",\n        **_doc_binary_op_kwargs,\n    )\n    def __xor__(self, other) -> Self:\n        return self._binary_op(\"__xor__\", other, axis=0)\n\n    @_doc_binary_op(\n        operation=\"exclusive disjunction\",\n        bin_op=\"rxor\",\n        right=\"other\",\n        **_doc_binary_op_kwargs,\n    )\n    def __rxor__(self, other) -> Self:\n        return self._binary_op(\"__rxor__\", other, axis=0)\n\n    @property\n    def size(self) -> int:  # noqa: RT01, D200\n        \"\"\"\n        Return an int representing the number of elements in this `BasePandasDataset` object.\n        \"\"\"\n        return len(self._query_compiler.index) * len(self._query_compiler.columns)\n\n    @property\n    def values(self) -> np.ndarray:  # noqa: RT01, D200\n        \"\"\"\n        Return a NumPy representation of the `BasePandasDataset`.\n        \"\"\"\n        return self.to_numpy()\n\n    def _repartition(self, axis: Optional[int] = None) -> Self:\n        \"\"\"\n        Repartitioning Modin objects to get ideal partitions inside.\n\n        Allows to improve performance where the query compiler can't improve\n        yet by doing implicit repartitioning.\n\n        Parameters\n        ----------\n        axis : {0, 1, None}, optional\n            The axis along which the repartitioning occurs.\n            `None` is used for repartitioning along both axes.\n\n        Returns\n        -------\n        DataFrame or Series\n            The repartitioned dataframe or series, depending on the original type.\n        \"\"\"\n        allowed_axis_values = (0, 1, None)\n        if axis not in allowed_axis_values:\n            raise ValueError(\n                f\"Passed `axis` parameter: {axis}, but should be one of {allowed_axis_values}\"\n            )\n        return self.__constructor__(\n            query_compiler=self._query_compiler.repartition(axis=axis)\n        )\n\n    @disable_logging\n    def __getattribute__(self, item) -> Any:\n        \"\"\"\n        Return item from the `BasePandasDataset`.\n\n        Parameters\n        ----------\n        item : hashable\n            Item to get.\n\n        Returns\n        -------\n        Any\n        \"\"\"\n        # NOTE that to get an attribute, python calls __getattribute__() first and\n        # then falls back to __getattr__() if the former raises an AttributeError.\n\n        if item not in EXTENSION_NO_LOOKUP:\n            extensions_result = self._getattribute__from_extension_impl(\n                item, __class__._extensions\n            )\n            if extensions_result is not sentinel:\n                return extensions_result\n\n        attr = super().__getattribute__(item)\n        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_shape:\n            # We default to pandas on empty DataFrames. This avoids a large amount of\n            # pain in underlying implementation and returns a result immediately rather\n            # than dealing with the edge cases that empty DataFrames have.\n            if callable(attr) and self.empty and hasattr(self._pandas_class, item):\n\n                def default_handler(*args, **kwargs):\n                    return self._default_to_pandas(item, *args, **kwargs)\n\n                return default_handler\n        return attr\n\n    def __array_ufunc__(\n        self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any\n    ) -> DataFrame | Series | Any:\n        \"\"\"\n        Apply the `ufunc` to the `BasePandasDataset`.\n\n        Parameters\n        ----------\n        ufunc : np.ufunc\n            The NumPy ufunc to apply.\n        method : str\n            The method to apply.\n        *inputs : tuple\n            The inputs to the ufunc.\n        **kwargs : dict\n            Additional keyword arguments.\n\n        Returns\n        -------\n        BasePandasDataset\n            The result of the ufunc applied to the `BasePandasDataset`.\n        \"\"\"\n        return self._query_compiler.do_array_ufunc_implementation(\n            self, ufunc, method, *inputs, **kwargs\n        )\n\n    def __array_function__(\n        self,\n        func: np.func,\n        types: tuple,\n        args: tuple,\n        kwargs: dict,\n    ) -> DataFrame | Series | Any:\n        \"\"\"\n        Apply `func` to the `BasePandasDataset`.\n\n        This function implements NEP18-style dispatch for certain NumPy functions:\n        https://numpy.org/neps/nep-0018-array-function-protocol.html#nep18\n\n        By default, this function will transparently call __array__, followed by __array_function__\n        on the returned NumPy array. We implement this function to prevent bugs with the extension\n        system when another backend overrides this method.\n\n        Parameters\n        ----------\n        func : np.func\n            The NumPy func to apply.\n        types : tuple\n            The types of the args.\n        args : tuple\n            The args to the func.\n        kwargs : dict\n            Additional keyword arguments.\n\n        Returns\n        -------\n        DataFrame | Series | Any\n            The result of applying the function to this dataset. By default, it will return\n            a NumPy array.\n        \"\"\"\n        return self._query_compiler.do_array_function_implementation(\n            self, func, types, args, kwargs\n        )\n\n    # namespace for additional Modin functions that are not available in Pandas\n    modin: ModinAPI = CachedAccessor(\"modin\", ModinAPI)\n\n    @disable_logging\n    def is_backend_pinned(self) -> bool:\n        \"\"\"\n        Get whether this object's data is pinned to a particular backend.\n\n        Returns\n        -------\n        bool\n            True if the data is pinned.\n        \"\"\"\n        return self._pinned\n\n    def _set_backend_pinned(\n        self, pinned: bool, inplace: bool = False\n    ) -> Optional[Self]:\n        \"\"\"\n        Update whether this object's data is pinned to a particular backend.\n\n        Parameters\n        ----------\n        pinned : bool\n            Whether the data is pinned.\n\n        inplace : bool, default: False\n            Whether to update the object in place.\n\n        Returns\n        -------\n        Optional[Self]\n            The object with the new pin state, if `inplace` is False. Otherwise, None.\n        \"\"\"\n        change = (self.is_backend_pinned() and not pinned) or (\n            not self.is_backend_pinned() and pinned\n        )\n        if inplace:\n            self._pinned = pinned\n            return None\n        else:\n            if change:\n                new_obj = self.__constructor__(query_compiler=self._query_compiler)\n                new_obj._pinned = pinned\n                return new_obj\n            return self\n\n    @doc(SET_BACKEND_DOC, class_name=__qualname__)\n    def set_backend(\n        self, backend: str, inplace: bool = False, *, switch_operation: str = None\n    ) -> Optional[Self]:\n        # TODO(https://github.com/modin-project/modin/issues/7467): refactor\n        # to avoid this cyclic import in most places we do I/O, e.g. in\n        # modin/pandas/io.py\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        progress_split_count = 2\n        progress_iter = iter(range(progress_split_count))\n        self_backend = self.get_backend()\n        normalized_backend = Backend.normalize(backend)\n        if normalized_backend != self_backend:\n            max_rows, max_cols = self._query_compiler._max_shape()\n            # Format the transfer string to be relatively short, but informative. Each\n            # backend is given an allowable width of 10 and the shape integers use the\n            # general format to use scientific notation when needed.\n            std_field_length = 10\n            operation_str = switch_operation\n            self_backend_str = self_backend\n            normalized_backend_str = normalized_backend\n            if switch_operation is None:\n                operation_str = \"\"\n            # Provide the switch_operation; and specifically only the method, so\n            # DataFrame.merge would become \"merge\"\n            operation_str = operation_str.split(\".\")[-1]\n            # truncate all strings to the field length if needed\n            if len(operation_str) > 15:\n                operation_str = operation_str[: 15 - 3] + \"...\"\n            if len(self_backend_str) > std_field_length:\n                self_backend_str = self_backend_str[: std_field_length - 3] + \"...\"\n            if len(normalized_backend_str) > std_field_length:\n                normalized_backend_str = (\n                    normalized_backend_str[: std_field_length - 3] + \"...\"\n                )\n\n            # format the estimated max shape\n            max_shape_str = f\"({max_rows:.0g}, {max_cols:.0g})\"\n            desc = (\n                f\"Transfer: {self_backend_str:>10.10} → {normalized_backend_str:<10.10} \"\n                + f\" | {operation_str:^15.15} ≃ {max_shape_str:<10}\"\n            )\n\n            if ShowBackendSwitchProgress.get():\n                try:\n                    from tqdm.auto import trange\n\n                    progress_iter = iter(\n                        trange(\n                            progress_split_count, desc=desc, bar_format=\"{desc} [{bar}]\"\n                        )\n                    )\n                except ImportError:\n                    # Fallback to simple print statement when tqdm is not available.\n                    # Print to stderr to match tqdm's behavior.\n\n                    print(desc, file=sys.stderr)  # noqa: T201\n            else:\n                # Use a dummy progress iterator with no side effects if we do\n                # not want to show the progress.\n                progress_iter = iter(range(progress_split_count))\n        else:\n            return None if inplace else self\n        # If tqdm is imported and a conversion is necessary, then display a progress bar.\n        # Otherwise, use fallback print statements.\n        next(progress_iter)\n\n        # Attempt to transfer data based on the following preference order.\n        # 1. The `self._query_compiler.move_to()`, if implemented.\n        # 2. Otherwise, tries the other `query_compiler`'s `move_from()` method.\n        # 3. If both methods return `NotImplemented`, it falls back to materializing\n        #    as a pandas DataFrame, and then creates a new `query_compiler` on the\n        #    specified backend using `from_pandas`.\n        query_compiler = self._query_compiler.move_to(backend)\n        if query_compiler is NotImplemented:\n            query_compiler = FactoryDispatcher._get_prepared_factory_for_backend(\n                backend\n            ).io_cls.query_compiler_cls.move_from(\n                self._query_compiler,\n            )\n        if query_compiler is NotImplemented:\n            pandas_self = self._query_compiler.to_pandas()\n            next(progress_iter)\n            query_compiler = FactoryDispatcher.from_pandas(\n                df=pandas_self, backend=backend\n            )\n        else:\n            next(progress_iter)\n        try:\n            next(progress_iter)\n        except StopIteration:\n            # Last call to next informs tqdm that the operation is done\n            pass\n        if inplace:\n            self._update_inplace(query_compiler)\n            # Always unpin after an explicit set_backend operation\n            self._pinned = False\n            return None\n        else:\n            return self.__constructor__(query_compiler=query_compiler)\n\n    move_to = set_backend\n\n    @doc(GET_BACKEND_DOC, class_name=__qualname__)\n    @disable_logging\n    def get_backend(self) -> str:\n        return self._query_compiler.get_backend()\n\n    @disable_logging\n    def __setattr__(self, key: str, value: Any) -> None:\n        \"\"\"\n        Set attribute on this `BasePandasDataset`.\n\n        Parameters\n        ----------\n        key : str\n            The attribute name.\n        value : Any\n            The attribute value.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(key, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__set__\"):\n            return extension.__set__(self, value)\n        return super().__setattr__(key, value)\n\n    @disable_logging\n    def __delattr__(self, name) -> None:\n        \"\"\"\n        Delete attribute on this `BasePandasDataset`.\n\n        Parameters\n        ----------\n        name : str\n            The attribute name.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(name, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__delete__\"):\n            return extension.__delete__(self)\n        return super().__delattr__(name)\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)\n    def _get_query_compiler(self):\n        return getattr(self, \"_query_compiler\", None)\n"
  },
  {
    "path": "modin/pandas/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses ``DataFrame`` class, that is distributed version of ``pandas.DataFrame``.\"\"\"\n\nfrom __future__ import annotations\n\nimport datetime\nimport functools\nimport itertools\nimport os\nimport re\nimport sys\nimport warnings\nfrom typing import (\n    IO,\n    TYPE_CHECKING,\n    Any,\n    Hashable,\n    Iterable,\n    Iterator,\n    Optional,\n    Sequence,\n    Union,\n)\n\nimport numpy as np\nimport pandas\nfrom pandas import Categorical\nfrom pandas._libs import lib\nfrom pandas._typing import (\n    CompressionOptions,\n    FilePath,\n    IndexLabel,\n    Scalar,\n    StorageOptions,\n    WriteBuffer,\n)\nfrom pandas.core.common import apply_if_callable, get_cython_func\nfrom pandas.core.dtypes.common import (\n    infer_dtype_from_object,\n    is_dict_like,\n    is_list_like,\n    is_numeric_dtype,\n)\nfrom pandas.core.indexes.frozen import FrozenList\nfrom pandas.io.formats.info import DataFrameInfo\nfrom pandas.util._decorators import doc\nfrom pandas.util._validators import validate_bool_kwarg\n\nfrom modin.config import PersistentPickle\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    EXTENSION_DICT_TYPE,\n    EXTENSION_NO_LOOKUP,\n)\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import disable_logging\nfrom modin.pandas.io import from_non_pandas, from_pandas, to_pandas\nfrom modin.utils import (\n    MODIN_UNNAMED_SERIES_LABEL,\n    _inherit_docstrings,\n    expanduser_path_arg,\n    hashable,\n    import_optional_dependency,\n    sentinel,\n    try_cast_to_pandas,\n)\n\nfrom .accessor import CachedAccessor, SparseFrameAccessor\nfrom .base import _ATTRS_NO_LOOKUP, BasePandasDataset\nfrom .groupby import DataFrameGroupBy\nfrom .iterator import PartitionIterator\nfrom .series import Series\nfrom .utils import (\n    GET_BACKEND_DOC,\n    SET_BACKEND_DOC,\n    SET_DATAFRAME_ATTRIBUTE_WARNING,\n    _doc_binary_op,\n    cast_function_modin2pandas,\n)\n\nif TYPE_CHECKING:\n    from typing_extensions import Self\n\n    from modin.core.storage_formats import BaseQueryCompiler\n\n\n@_inherit_docstrings(\n    pandas.DataFrame, excluded=[pandas.DataFrame.__init__], apilink=\"pandas.DataFrame\"\n)\nclass DataFrame(BasePandasDataset):\n    \"\"\"\n    Modin distributed representation of ``pandas.DataFrame``.\n\n    Internally, the data can be divided into partitions along both columns and rows\n    in order to parallelize computations and utilize the user's hardware as much as possible.\n\n    Inherit common for ``DataFrame``-s and ``Series`` functionality from the\n    `BasePandasDataset` class.\n\n    Parameters\n    ----------\n    data : DataFrame, Series, pandas.DataFrame, ndarray, Iterable or dict, optional\n        Dict can contain ``Series``, arrays, constants, dataclass or list-like objects.\n        If data is a dict, column order follows insertion-order.\n    index : Index or array-like, optional\n        Index to use for resulting frame. Will default to ``RangeIndex`` if no\n        indexing information part of input data and no index provided.\n    columns : Index or array-like, optional\n        Column labels to use for resulting frame. Will default to\n        ``RangeIndex`` if no column labels are provided.\n    dtype : str, np.dtype, or pandas.ExtensionDtype, optional\n        Data type to force. Only a single dtype is allowed. If None, infer.\n    copy : bool, default: False\n        Copy data from inputs. Only affects ``pandas.DataFrame`` / 2d ndarray input.\n    query_compiler : BaseQueryCompiler, optional\n        A query compiler object to create the ``DataFrame`` from.\n\n    Notes\n    -----\n    ``DataFrame`` can be created either from passed `data` or `query_compiler`. If both\n    parameters are provided, data source will be prioritized in the next order:\n\n    1) Modin ``DataFrame`` or ``Series`` passed with `data` parameter.\n    2) Query compiler from the `query_compiler` parameter.\n    3) Various pandas/NumPy/Python data structures passed with `data` parameter.\n\n    The last option is less desirable since import of such data structures is very\n    inefficient, please use previously created Modin structures from the fist two\n    options or import data using highly efficient Modin IO tools (for example\n    ``pd.read_csv``).\n    \"\"\"\n\n    _pandas_class = pandas.DataFrame\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n\n    def __init__(\n        self,\n        data=None,\n        index=None,\n        columns=None,\n        dtype=None,\n        copy=None,\n        query_compiler: BaseQueryCompiler = None,\n    ) -> None:\n        from modin.numpy import array\n\n        # Siblings are other dataframes that share the same query compiler. We\n        # use this list to update inplace when there is a shallow copy.\n        self._siblings = []\n        if isinstance(data, (DataFrame, Series)):\n            self._query_compiler = data._query_compiler.copy()\n            if index is not None and any(i not in data.index for i in index):\n                raise NotImplementedError(\n                    \"Passing non-existant columns or index values to constructor not\"\n                    + \" yet implemented.\"\n                )\n            if isinstance(data, Series):\n                # We set the column name if it is not in the provided Series\n                if data.name is None:\n                    self.columns = [0] if columns is None else columns\n                # If the columns provided are not in the named Series, pandas clears\n                # the DataFrame and sets columns to the columns provided.\n                elif columns is not None and data.name not in columns:\n                    self._query_compiler = from_pandas(\n                        pandas.DataFrame(columns=columns)\n                    )._query_compiler\n                if index is not None:\n                    self._query_compiler = data.loc[index]._query_compiler\n            elif columns is None and index is None:\n                data._add_sibling(self)\n            else:\n                if columns is not None and any(i not in data.columns for i in columns):\n                    raise NotImplementedError(\n                        \"Passing non-existant columns or index values to constructor not\"\n                        + \" yet implemented.\"\n                    )\n                if index is None:\n                    index = slice(None)\n                if columns is None:\n                    columns = slice(None)\n                self._query_compiler = data.loc[index, columns]._query_compiler\n        elif isinstance(data, array):\n            self._query_compiler = data._query_compiler.copy()\n            if copy is not None and not copy:\n                data._add_sibling(self)\n            if columns is not None and not isinstance(columns, pandas.Index):\n                columns = pandas.Index(columns)\n            if columns is not None:\n                obj_with_new_columns = self.set_axis(columns, axis=1, copy=False)\n                self._update_inplace(obj_with_new_columns._query_compiler)\n            if index is not None:\n                obj_with_new_index = self.set_axis(index, axis=0, copy=False)\n                self._update_inplace(obj_with_new_index._query_compiler)\n            if dtype is not None:\n                casted_obj = self.astype(dtype, copy=False)\n                self._query_compiler = casted_obj._query_compiler\n        # Check type of data and use appropriate constructor\n        elif query_compiler is None:\n            distributed_frame = from_non_pandas(data, index, columns, dtype)\n            if distributed_frame is not None:\n                self._query_compiler = distributed_frame._query_compiler\n                return\n\n            if isinstance(data, pandas.Index):\n                pass\n            elif (\n                is_list_like(data)\n                and not is_dict_like(data)\n                and not isinstance(data, np.ndarray)\n            ):\n                old_dtype = getattr(data, \"dtype\", None)\n                values = [\n                    obj._to_pandas() if isinstance(obj, Series) else obj for obj in data\n                ]\n                try:\n                    data = type(data)(values, dtype=old_dtype)\n                except TypeError:\n                    data = values\n            elif is_dict_like(data) and not isinstance(\n                data, (pandas.Series, Series, pandas.DataFrame, DataFrame)\n            ):\n                if columns is not None:\n                    data = {key: value for key, value in data.items() if key in columns}\n\n                if len(data) and all(isinstance(v, Series) for v in data.values()):\n                    from .general import concat\n\n                    new_qc = concat(\n                        data.values(), axis=1, keys=data.keys()\n                    )._query_compiler\n\n                    if dtype is not None:\n                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})\n                    if index is not None:\n                        new_qc = new_qc.reindex(axis=0, labels=index)\n                    if columns is not None:\n                        new_qc = new_qc.reindex(axis=1, labels=columns)\n\n                    self._query_compiler = new_qc\n                    return\n\n                data = {\n                    k: v._to_pandas() if isinstance(v, Series) else v\n                    for k, v in data.items()\n                }\n            pandas_df = pandas.DataFrame(\n                data=data, index=index, columns=columns, dtype=dtype, copy=copy\n            )\n            if pandas_df.size >= 1_000_000:\n                warnings.warn(\n                    \"Distributing {} object. This may take some time.\".format(\n                        type(data)\n                    )\n                )\n            self._query_compiler = from_pandas(pandas_df)._query_compiler\n        else:\n            self._query_compiler = query_compiler\n\n    def __repr__(self) -> str:\n        \"\"\"\n        Return a string representation for a particular ``DataFrame``.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        num_rows = pandas.get_option(\"display.max_rows\") or len(self)\n        num_cols = pandas.get_option(\n            \"display.max_columns\"\n        ) or self._query_compiler.get_axis_len(1)\n        result = repr(self._build_repr_df(num_rows, num_cols))\n        if len(self) > num_rows or self._query_compiler.get_axis_len(1) > num_cols:\n            # The split here is so that we don't repr pandas row lengths.\n            return result.rsplit(\"\\n\\n\", 1)[0] + \"\\n\\n[{0} rows x {1} columns]\".format(\n                *self.shape\n            )\n        else:\n            return result\n\n    def _repr_html_(self) -> str:  # pragma: no cover\n        \"\"\"\n        Return a html representation for a particular ``DataFrame``.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        num_rows = pandas.get_option(\"display.max_rows\") or 60\n        num_cols = pandas.get_option(\"display.max_columns\") or 20\n\n        # We use pandas _repr_html_ to get a string of the HTML representation\n        # of the dataframe.\n        result = self._build_repr_df(num_rows, num_cols)._repr_html_()\n        if len(self) > num_rows or self._query_compiler.get_axis_len(1) > num_cols:\n            # We split so that we insert our correct dataframe dimensions.\n            return result.split(\"<p>\")[\n                0\n            ] + \"<p>{0} rows x {1} columns</p>\\n</div>\".format(*self.shape)\n        else:\n            return result\n\n    def _get_columns(self) -> pandas.Index:\n        \"\"\"\n        Get the columns for this ``DataFrame``.\n\n        Returns\n        -------\n        pandas.Index\n            The union of all indexes across the partitions.\n        \"\"\"\n        return self._query_compiler.columns\n\n    def _set_columns(self, new_columns) -> None:\n        \"\"\"\n        Set the columns for this ``DataFrame``.\n\n        Parameters\n        ----------\n        new_columns : list-like, Index\n            The new index to set.\n        \"\"\"\n        self._query_compiler.columns = new_columns\n\n    columns: pandas.Index = property(_get_columns, _set_columns)\n\n    @property\n    def ndim(self) -> int:  # noqa: RT01, D200\n        \"\"\"\n        Return the number of dimensions of the underlying data, by definition 2.\n        \"\"\"\n        return 2\n\n    def drop_duplicates(\n        self, subset=None, *, keep=\"first\", inplace=False, ignore_index=False\n    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return ``DataFrame`` with duplicate rows removed.\n        \"\"\"\n        return super(DataFrame, self).drop_duplicates(\n            subset=subset, keep=keep, inplace=inplace, ignore_index=ignore_index\n        )\n\n    @property\n    def dtypes(self) -> pandas.Series:  # noqa: RT01, D200\n        \"\"\"\n        Return the dtypes in the ``DataFrame``.\n        \"\"\"\n        return self._query_compiler.dtypes\n\n    def duplicated(self, subset=None, keep=\"first\") -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return boolean ``Series`` denoting duplicate rows.\n        \"\"\"\n        df = self[subset] if subset is not None else self\n        new_qc = df._query_compiler.duplicated(keep=keep)\n        duplicates = self._reduce_dimension(new_qc)\n        return duplicates\n\n    @property\n    def empty(self) -> bool:  # noqa: RT01, D200\n        \"\"\"\n        Indicate whether ``DataFrame`` is empty.\n        \"\"\"\n        return self._query_compiler.get_axis_len(1) == 0 or len(self) == 0\n\n    @property\n    def axes(self) -> list[pandas.Index]:  # noqa: RT01, D200\n        \"\"\"\n        Return a list representing the axes of the ``DataFrame``.\n        \"\"\"\n        return [self.index, self.columns]\n\n    @property\n    def shape(self) -> tuple[int, int]:  # noqa: RT01, D200\n        \"\"\"\n        Return a tuple representing the dimensionality of the ``DataFrame``.\n        \"\"\"\n        return len(self), self._query_compiler.get_axis_len(1)\n\n    def add_prefix(self, prefix, axis=None) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Prefix labels with string `prefix`.\n        \"\"\"\n        axis = 1 if axis is None else self._get_axis_number(axis)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.add_prefix(prefix, axis)\n        )\n\n    def add_suffix(self, suffix, axis=None) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Suffix labels with string `suffix`.\n        \"\"\"\n        axis = 1 if axis is None else self._get_axis_number(axis)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.add_suffix(suffix, axis)\n        )\n\n    def map(self, func, na_action: Optional[str] = None, **kwargs) -> DataFrame:\n        if not callable(func):\n            raise ValueError(\"'{0}' object is not callable\".format(type(func)))\n        return self.__constructor__(\n            query_compiler=self._query_compiler.map(func, na_action=na_action, **kwargs)\n        )\n\n    def applymap(self, func, na_action: Optional[str] = None, **kwargs) -> DataFrame:\n        warnings.warn(\n            \"DataFrame.applymap has been deprecated. Use DataFrame.map instead.\",\n            FutureWarning,\n        )\n        return self.map(func, na_action=na_action, **kwargs)\n\n    def apply(\n        self,\n        func,\n        axis=0,\n        raw=False,\n        result_type=None,\n        args=(),\n        by_row=\"compat\",\n        engine=\"python\",\n        engine_kwargs=None,\n        **kwargs,\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Apply a function along an axis of the ``DataFrame``.\n        \"\"\"\n        if by_row != \"compat\" or engine != \"python\" or engine_kwargs:\n            # TODO: add test\n            return self._default_to_pandas(\n                pandas.DataFrame.apply,\n                func=func,\n                axis=axis,\n                raw=raw,\n                result_type=result_type,\n                args=args,\n                by_row=by_row,\n                engine=engine,\n                engine_kwargs=engine_kwargs,\n                **kwargs,\n            )\n\n        func = cast_function_modin2pandas(func)\n        axis = self._get_axis_number(axis)\n        query_compiler = super(DataFrame, self).apply(\n            func,\n            axis=axis,\n            raw=raw,\n            result_type=result_type,\n            args=args,\n            **kwargs,\n        )\n        if not isinstance(query_compiler, type(self._query_compiler)):\n            # A scalar was returned\n            return query_compiler\n\n        if result_type == \"reduce\":\n            output_type = Series\n        elif result_type == \"broadcast\":\n            output_type = DataFrame\n        # the 'else' branch also handles 'result_type == \"expand\"' since it makes the output type\n        # depend on the `func` result (Series for a scalar, DataFrame for list-like)\n        else:\n            reduced_index = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])\n            if query_compiler.get_axis(axis).equals(\n                reduced_index\n            ) or query_compiler.get_axis(axis ^ 1).equals(reduced_index):\n                output_type = Series\n            else:\n                output_type = DataFrame\n\n        return output_type(query_compiler=query_compiler)\n\n    def groupby(\n        self,\n        by=None,\n        axis=lib.no_default,\n        level=None,\n        as_index=True,\n        sort=True,\n        group_keys=True,\n        observed=lib.no_default,\n        dropna: bool = True,\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Group ``DataFrame`` using a mapper or by a ``Series`` of columns.\n        \"\"\"\n        if axis is not lib.no_default:\n            axis = self._get_axis_number(axis)\n            if axis == 1:\n                warnings.warn(\n                    \"DataFrame.groupby with axis=1 is deprecated. Do \"\n                    + \"`frame.T.groupby(...)` without axis instead.\",\n                    FutureWarning,\n                )\n            else:\n                warnings.warn(\n                    \"The 'axis' keyword in DataFrame.groupby is deprecated and \"\n                    + \"will be removed in a future version.\",\n                    FutureWarning,\n                )\n        else:\n            axis = 0\n\n        axis = self._get_axis_number(axis)\n        idx_name = None\n        # Drop here indicates whether or not to drop the data column before doing the\n        # groupby. The typical pandas behavior is to drop when the data came from this\n        # dataframe. When a string, Series directly from this dataframe, or list of\n        # strings is passed in, the data used for the groupby is dropped before the\n        # groupby takes place.\n        drop = False\n\n        return_tuple_when_iterating = False\n        if (\n            not isinstance(by, (pandas.Series, Series))\n            and is_list_like(by)\n            and len(by) == 1\n        ):\n            by = by[0]\n            return_tuple_when_iterating = True\n\n        if callable(by):\n            by = self.index.map(by)\n        elif hashable(by) and not isinstance(by, (pandas.Grouper, FrozenList)):\n            drop = by in self.columns\n            idx_name = by\n            if by is not None and by in self._query_compiler.get_index_names(axis):\n                # In this case we pass the string value of the name through to the\n                # partitions. This is more efficient than broadcasting the values.\n                level, by = by, None\n            elif level is None:\n                by = self.__getitem__(by)._query_compiler\n        elif isinstance(by, Series):\n            drop = by._parent is self\n            idx_name = by.name\n            by = by._query_compiler\n        elif isinstance(by, pandas.Grouper):\n            drop = by.key in self\n        elif is_list_like(by):\n            # fastpath for multi column groupby\n            if axis == 0 and all(\n                (\n                    (hashable(o) and (o in self))\n                    or isinstance(o, Series)\n                    or (isinstance(o, pandas.Grouper) and o.key in self)\n                    or (is_list_like(o) and len(o) == len(self._get_axis(axis)))\n                )\n                for o in by\n            ):\n                has_external = False\n                processed_by = []\n\n                for current_by in by:\n                    if isinstance(current_by, pandas.Grouper):\n                        processed_by.append(current_by)\n                        has_external = True\n                    elif hashable(current_by):\n                        processed_by.append(current_by)\n                    elif isinstance(current_by, Series):\n                        if current_by._parent is self:\n                            processed_by.append(current_by.name)\n                        else:\n                            processed_by.append(current_by._query_compiler)\n                            has_external = True\n                    else:\n                        has_external = True\n                        processed_by.append(current_by)\n\n                by = processed_by\n\n                if not has_external:\n                    by = self[processed_by]._query_compiler\n\n                drop = True\n            else:\n                mismatch = len(by) != len(self._get_axis(axis))\n                if mismatch and all(\n                    hashable(obj)\n                    and (\n                        obj in self or obj in self._query_compiler.get_index_names(axis)\n                    )\n                    for obj in by\n                ):\n                    # In the future, we will need to add logic to handle this, but for now\n                    # we default to pandas in this case.\n                    pass\n                elif mismatch and any(\n                    hashable(obj) and obj not in self.columns for obj in by\n                ):\n                    names = [o.name if isinstance(o, Series) else o for o in by]\n                    raise KeyError(next(x for x in names if x not in self))\n        return DataFrameGroupBy(\n            self,\n            by,\n            axis,\n            level,\n            as_index,\n            sort,\n            group_keys,\n            idx_name,\n            observed=observed,\n            drop=drop,\n            dropna=dropna,\n            return_tuple_when_iterating=return_tuple_when_iterating,\n            backend_pinned=self.is_backend_pinned(),\n        )\n\n    def keys(self) -> pandas.Index:  # noqa: RT01, D200\n        \"\"\"\n        Get columns of the ``DataFrame``.\n        \"\"\"\n        return self.columns\n\n    def transpose(self, copy=False, *args) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Transpose index and columns.\n        \"\"\"\n        # FIXME: Judging by pandas docs `*args` serves only compatibility purpose\n        # and does not affect the result, we shouldn't pass it to the query compiler.\n        return self.__constructor__(\n            query_compiler=self._query_compiler.transpose(*args)\n        )\n\n    # To enable dynamic backend switching, we must use a `def` so the lookup of `self.transpose`\n    # is performed dynamically, whereas declaring `T = property(transpose)` makes it always use\n    # the originally-defined version without the switching wrapper.\n    @property\n    def T(self) -> DataFrame:\n        return self.transpose()\n\n    def add(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `add`).\n        \"\"\"\n        return self._binary_op(\n            \"add\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def assign(self, **kwargs) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Assign new columns to a ``DataFrame``.\n        \"\"\"\n        df = self.copy()\n        for k, v in kwargs.items():\n            if callable(v):\n                df[k] = v(df)\n            else:\n                df[k] = v\n        return df\n\n    def boxplot(\n        self,\n        column=None,\n        by=None,\n        ax=None,\n        fontsize=None,\n        rot=0,\n        grid=True,\n        figsize=None,\n        layout=None,\n        return_type=None,\n        backend=None,\n        **kwargs,\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Make a box plot from ``DataFrame`` columns.\n        \"\"\"\n        return to_pandas(self).boxplot(\n            column=column,\n            by=by,\n            ax=ax,\n            fontsize=fontsize,\n            rot=rot,\n            grid=grid,\n            figsize=figsize,\n            layout=layout,\n            return_type=return_type,\n            backend=backend,\n            **kwargs,\n        )\n\n    def combine(\n        self, other, func, fill_value=None, overwrite=True\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Perform column-wise combine with another ``DataFrame``.\n        \"\"\"\n        return super(DataFrame, self).combine(\n            other, func, fill_value=fill_value, overwrite=overwrite\n        )\n\n    def compare(\n        self,\n        other,\n        align_axis=1,\n        keep_shape: bool = False,\n        keep_equal: bool = False,\n        result_names=(\"self\", \"other\"),\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compare to another ``DataFrame`` and show the differences.\n        \"\"\"\n        if not isinstance(other, DataFrame):\n            raise TypeError(f\"Cannot compare DataFrame to {type(other)}\")\n        other = self._validate_other(other, 0, compare_index=True)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.compare(\n                other,\n                align_axis=align_axis,\n                keep_shape=keep_shape,\n                keep_equal=keep_equal,\n                result_names=result_names,\n            )\n        )\n\n    def corr(\n        self, method=\"pearson\", min_periods=1, numeric_only=False\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute pairwise correlation of columns, excluding NA/null values.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.corr(\n                method=method,\n                min_periods=min_periods,\n                numeric_only=numeric_only,\n            )\n        )\n\n    def corrwith(\n        self, other, axis=0, drop=False, method=\"pearson\", numeric_only=False\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute pairwise correlation.\n        \"\"\"\n        if not isinstance(other, (Series, DataFrame)):\n            raise TypeError(f\"unsupported type: {type(other)}\")\n        return self.__constructor__(\n            query_compiler=self._query_compiler.corrwith(\n                other=other._query_compiler,\n                axis=axis,\n                drop=drop,\n                method=method,\n                numeric_only=numeric_only,\n            )\n        )\n\n    def cov(\n        self, min_periods=None, ddof: Optional[int] = 1, numeric_only=False\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute pairwise covariance of columns, excluding NA/null values.\n        \"\"\"\n        cov_df = self\n        if numeric_only:\n            cov_df = self.drop(\n                columns=[\n                    i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])\n                ]\n            )\n\n        if min_periods is not None and min_periods > len(cov_df):\n            result = np.empty((cov_df.shape[1], cov_df.shape[1]))\n            result.fill(np.nan)\n            return cov_df.__constructor__(result)\n\n        return cov_df.__constructor__(\n            query_compiler=cov_df._query_compiler.cov(\n                min_periods=min_periods, ddof=ddof\n            )\n        )\n\n    def dot(self, other) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute the matrix multiplication between the ``DataFrame`` and `other`.\n        \"\"\"\n        if isinstance(other, BasePandasDataset):\n            common = self.columns.union(other.index)\n            if len(common) > self._query_compiler.get_axis_len(1) or len(common) > len(\n                other\n            ):\n                raise ValueError(\"Matrices are not aligned\")\n\n            qc = other.reindex(index=common)._query_compiler\n            if isinstance(other, DataFrame):\n                return self.__constructor__(\n                    query_compiler=self._query_compiler.dot(\n                        qc, squeeze_self=False, squeeze_other=False\n                    )\n                )\n            else:\n                return self._reduce_dimension(\n                    query_compiler=self._query_compiler.dot(\n                        qc, squeeze_self=False, squeeze_other=True\n                    )\n                )\n\n        other = np.asarray(other)\n        if self.shape[1] != other.shape[0]:\n            raise ValueError(\n                \"Dot product shape mismatch, {} vs {}\".format(self.shape, other.shape)\n            )\n\n        if len(other.shape) > 1:\n            return self.__constructor__(\n                query_compiler=self._query_compiler.dot(other, squeeze_self=False)\n            )\n\n        return self._reduce_dimension(\n            query_compiler=self._query_compiler.dot(other, squeeze_self=False)\n        )\n\n    def eq(\n        self, other, axis=\"columns\", level=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Perform equality comparison of ``DataFrame`` and `other` (binary operator `eq`).\n        \"\"\"\n        return self._binary_op(\n            \"eq\", other, axis=axis, level=level, broadcast=isinstance(other, Series)\n        )\n\n    def equals(self, other) -> bool:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Test whether two objects contain the same elements.\n        \"\"\"\n        if isinstance(other, pandas.DataFrame):\n            # Copy into a Modin DataFrame to simplify logic below\n            other = self.__constructor__(other)\n\n        if (\n            type(self) is not type(other)\n            or not self.index.equals(other.index)\n            or not self.columns.equals(other.columns)\n        ):\n            return False\n\n        result = self.__constructor__(\n            query_compiler=self._query_compiler.equals(other._query_compiler)\n        )\n        return result.all(axis=None)\n\n    def _update_var_dicts_in_kwargs(self, expr, kwargs) -> None:\n        \"\"\"\n        Copy variables with \"@\" prefix in `local_dict` and `global_dict` keys of kwargs.\n\n        Parameters\n        ----------\n        expr : str\n            The expression string to search variables with \"@\" prefix.\n        kwargs : dict\n            See the documentation for eval() for complete details on the keyword arguments accepted by query().\n        \"\"\"\n        if \"@\" not in expr:\n            return\n        frame = sys._getframe()\n        try:\n            # TODO(https://github.com/modin-project/modin/issues/4478): fix this\n            f_locals = frame.f_back.f_back.f_back.f_back.f_back.f_back.f_locals\n            f_globals = frame.f_back.f_back.f_back.f_back.f_back.f_back.f_globals\n        finally:\n            del frame\n        local_names = set(re.findall(r\"@([\\w]+)\", expr))\n        local_dict = {}\n        global_dict = {}\n\n        for name in local_names:\n            for dct_out, dct_in in ((local_dict, f_locals), (global_dict, f_globals)):\n                try:\n                    dct_out[name] = dct_in[name]\n                except KeyError:\n                    pass\n\n        if local_dict:\n            local_dict.update(kwargs.get(\"local_dict\") or {})\n            kwargs[\"local_dict\"] = local_dict\n        if global_dict:\n            global_dict.update(kwargs.get(\"global_dict\") or {})\n            kwargs[\"global_dict\"] = global_dict\n\n    def eval(self, expr, inplace=False, **kwargs):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Evaluate a string describing operations on ``DataFrame`` columns.\n        \"\"\"\n        from modin.core.computation.eval import _check_engine\n\n        self._update_var_dicts_in_kwargs(expr, kwargs)\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n\n        if _check_engine(kwargs.get(\"engine\", None)) == \"numexpr\":\n            # on numexpr engine, pandas.eval returns np.array if input is not of pandas\n            # type, so we can't use pandas eval [1]. Even if we could, pandas eval seems\n            # to convert all the data to numpy and then do the numexpr add, which is\n            # slow for modin. The user would not really be getting the benefit of\n            # numexpr.\n            # [1] https://github.com/pandas-dev/pandas/blob/934eebb532cf50e872f40638a788000be6e4dda4/pandas/core/computation/align.py#L78\n            return self._default_to_pandas(\n                pandas.DataFrame.eval, expr, inplace=inplace, **kwargs\n            )\n\n        from modin.core.computation.eval import eval as _eval\n\n        kwargs[\"level\"] = kwargs.pop(\"level\", 0) + 1\n        index_resolvers = self._get_index_resolvers()\n        column_resolvers = self._get_cleaned_column_resolvers()\n        resolvers = column_resolvers, index_resolvers\n        if \"target\" not in kwargs:\n            kwargs[\"target\"] = self\n        kwargs[\"resolvers\"] = tuple(kwargs.get(\"resolvers\", ())) + resolvers\n\n        return _eval(expr, inplace=inplace, **kwargs)\n\n    def fillna(\n        self,\n        value=None,\n        *,\n        method=None,\n        axis=None,\n        inplace=False,\n        limit=None,\n        downcast=lib.no_default,\n    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Fill NA/NaN values using the specified method.\n        \"\"\"\n        return super(DataFrame, self).fillna(\n            squeeze_self=False,\n            squeeze_value=isinstance(value, Series),\n            value=value,\n            method=method,\n            axis=axis,\n            inplace=inplace,\n            limit=limit,\n            downcast=downcast,\n        )\n\n    def floordiv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `floordiv`).\n        \"\"\"\n        return self._binary_op(\n            \"floordiv\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    @classmethod\n    def from_dict(\n        cls, data, orient=\"columns\", dtype=None, columns=None\n    ) -> DataFrame:  # pragma: no cover # noqa: PR01, RT01, D200\n        \"\"\"\n        Construct ``DataFrame`` from dict of array-like or dicts.\n        \"\"\"\n        ErrorMessage.default_to_pandas(\"`from_dict`\")\n        return from_pandas(\n            pandas.DataFrame.from_dict(\n                data, orient=orient, dtype=dtype, columns=columns\n            )\n        )\n\n    @classmethod\n    def from_records(\n        cls,\n        data,\n        index=None,\n        exclude=None,\n        columns=None,\n        coerce_float=False,\n        nrows=None,\n    ) -> DataFrame:  # pragma: no cover # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert structured or record ndarray to ``DataFrame``.\n        \"\"\"\n        ErrorMessage.default_to_pandas(\"`from_records`\")\n        return from_pandas(\n            pandas.DataFrame.from_records(\n                data,\n                index=index,\n                exclude=exclude,\n                columns=columns,\n                coerce_float=coerce_float,\n                nrows=nrows,\n            )\n        )\n\n    def ge(\n        self, other, axis=\"columns\", level=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get greater than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).\n        \"\"\"\n        return self._binary_op(\n            \"ge\", other, axis=axis, level=level, broadcast=isinstance(other, Series)\n        )\n\n    def gt(\n        self, other, axis=\"columns\", level=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get greater than comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).\n        \"\"\"\n        return self._binary_op(\n            \"gt\", other, axis=axis, level=level, broadcast=isinstance(other, Series)\n        )\n\n    def hist(\n        data,\n        column: IndexLabel | None = None,\n        by=None,\n        grid: bool = True,\n        xlabelsize: int | None = None,\n        xrot: float | None = None,\n        ylabelsize: int | None = None,\n        yrot: float | None = None,\n        ax=None,\n        sharex: bool = False,\n        sharey: bool = False,\n        figsize: tuple[int, int] | None = None,\n        layout: tuple[int, int] | None = None,\n        bins: int | Sequence[int] = 10,\n        backend: str | None = None,\n        legend: bool = False,\n        **kwargs,\n    ):  # pragma: no cover # noqa: PR01, RT01, D200\n        \"\"\"\n        Make a histogram of the ``DataFrame``.\n        \"\"\"\n        return data._default_to_pandas(\n            pandas.DataFrame.hist,\n            column=column,\n            by=by,\n            grid=grid,\n            xlabelsize=xlabelsize,\n            xrot=xrot,\n            ylabelsize=ylabelsize,\n            yrot=yrot,\n            ax=ax,\n            sharex=sharex,\n            sharey=sharey,\n            figsize=figsize,\n            layout=layout,\n            bins=bins,\n            backend=backend,\n            legend=legend,\n            **kwargs,\n        )\n\n    def info(\n        self,\n        verbose: Optional[bool] = None,\n        buf: Optional[IO[str]] = None,\n        max_cols: Optional[int] = None,\n        memory_usage: Optional[Union[bool, str]] = None,\n        show_counts: Optional[bool] = None,\n    ) -> None:  # noqa: PR01, D200\n        \"\"\"\n        Print a concise summary of the ``DataFrame``.\n        \"\"\"\n        info = DataFrameInfo(\n            data=self,\n            memory_usage=memory_usage,\n        )\n        info.render(\n            buf=buf,\n            max_cols=max_cols,\n            verbose=verbose,\n            show_counts=show_counts,\n        )\n\n    def insert(\n        self, loc, column, value, allow_duplicates=lib.no_default\n    ) -> None:  # noqa: PR01, D200\n        \"\"\"\n        Insert column into ``DataFrame`` at specified location.\n        \"\"\"\n        from modin.numpy import array\n\n        if (\n            isinstance(value, (DataFrame, pandas.DataFrame))\n            or isinstance(value, (array, np.ndarray))\n            and len(value.shape) > 1\n        ):\n            if isinstance(value, (array, np.ndarray)) and value.shape[1] != 1:\n                raise ValueError(\n                    f\"Expected a 1D array, got an array with shape {value.shape}\"\n                )\n            elif (\n                isinstance(value, (DataFrame, pandas.DataFrame)) and value.shape[1] != 1\n            ):\n                raise ValueError(\n                    \"Expected a one-dimensional object, got a DataFrame with \"\n                    + f\"{len(value.columns)} columns instead.\"\n                )\n            value = value.squeeze(axis=1)\n        if not self._query_compiler.lazy_row_count and len(self) == 0:\n            if not hasattr(value, \"index\"):\n                try:\n                    value = pandas.Series(value)\n                except (TypeError, ValueError, IndexError):\n                    raise ValueError(\n                        \"Cannot insert into a DataFrame with no defined index \"\n                        + \"and a value that cannot be converted to a \"\n                        + \"Series\"\n                    )\n            new_index = value.index.copy()\n            new_columns = self.columns.insert(loc, column)\n            new_query_compiler = self.__constructor__(\n                value, index=new_index, columns=new_columns\n            )._query_compiler\n        elif self._query_compiler.get_axis_len(1) == 0 and loc == 0:\n            new_index = self.index\n            new_query_compiler = self.__constructor__(\n                data=value,\n                columns=[column],\n                index=None if len(new_index) == 0 else new_index,\n            )._query_compiler\n        else:\n            if (\n                is_list_like(value)\n                and not isinstance(value, (pandas.Series, Series))\n                and len(value) != len(self)\n            ):\n                raise ValueError(\n                    \"Length of values ({}) does not match length of index ({})\".format(\n                        len(value), len(self)\n                    )\n                )\n            if allow_duplicates is not True and column in self.columns:\n                raise ValueError(f\"cannot insert {column}, already exists\")\n            columns_len = self._query_compiler.get_axis_len(1)\n            if not -columns_len <= loc <= columns_len:\n                raise IndexError(\n                    f\"index {loc} is out of bounds for axis 0 with size {columns_len}\"\n                )\n            elif loc < 0:\n                raise ValueError(\"unbounded slice\")\n            if isinstance(value, (Series, array)):\n                value = value._query_compiler\n            new_query_compiler = self._query_compiler.insert(loc, column, value)\n\n        self._update_inplace(new_query_compiler=new_query_compiler)\n\n    def isna(self) -> DataFrame:\n        \"\"\"\n        Detect missing values.\n\n        Returns\n        -------\n        DataFrame\n            The result of detecting missing values.\n        \"\"\"\n        return super(DataFrame, self).isna()\n\n    def isnull(self) -> DataFrame:\n        \"\"\"\n        Detect missing values.\n\n        Returns\n        -------\n        DataFrame\n            The result of detecting missing values.\n        \"\"\"\n        return super(DataFrame, self).isnull()\n\n    def iterrows(self) -> Iterable[tuple[Hashable, Series]]:  # noqa: D200\n        \"\"\"\n        Iterate over ``DataFrame`` rows as (index, ``Series``) pairs.\n        \"\"\"\n\n        def iterrow_builder(s):\n            \"\"\"Return tuple of the given `s` parameter name and the parameter themself.\"\"\"\n            return s.name, s\n\n        partition_iterator = PartitionIterator(self, 0, iterrow_builder)\n        for v in partition_iterator:\n            yield v\n\n    def items(self) -> Iterable[tuple[Hashable, Series]]:  # noqa: D200\n        \"\"\"\n        Iterate over (column name, ``Series``) pairs.\n        \"\"\"\n\n        def items_builder(s):\n            \"\"\"Return tuple of the given `s` parameter name and the parameter themself.\"\"\"\n            return s.name, s\n\n        partition_iterator = PartitionIterator(self, 1, items_builder)\n        for v in partition_iterator:\n            yield v\n\n    def itertuples(\n        self, index=True, name=\"Pandas\"\n    ) -> Iterable[tuple[Any, ...]]:  # noqa: PR01, D200\n        \"\"\"\n        Iterate over ``DataFrame`` rows as ``namedtuple``-s.\n        \"\"\"\n\n        def itertuples_builder(s):\n            \"\"\"Return the next ``namedtuple``.\"\"\"\n            return next(s._to_pandas().to_frame().T.itertuples(index=index, name=name))\n\n        partition_iterator = PartitionIterator(self, 0, itertuples_builder)\n        for v in partition_iterator:\n            yield v\n\n    def join(\n        self,\n        other,\n        on=None,\n        how=\"left\",\n        lsuffix=\"\",\n        rsuffix=\"\",\n        sort=False,\n        validate=None,\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Join columns of another ``DataFrame``.\n        \"\"\"\n        if on is not None and not isinstance(other, (Series, DataFrame)):\n            raise ValueError(\n                \"Joining multiple DataFrames only supported for joining on index\"\n            )\n        if validate is not None:\n            return self._default_to_pandas(\n                pandas.DataFrame.join,\n                other,\n                on=on,\n                how=how,\n                lsuffix=lsuffix,\n                rsuffix=rsuffix,\n                sort=sort,\n                validate=validate,\n            )\n\n        if isinstance(other, Series):\n            if other.name is None:\n                raise ValueError(\"Other Series must have a name\")\n            other = self.__constructor__(other)\n        if on is not None or how == \"cross\":\n            return self.__constructor__(\n                query_compiler=self._query_compiler.join(\n                    other._query_compiler,\n                    on=on,\n                    how=how,\n                    lsuffix=lsuffix,\n                    rsuffix=rsuffix,\n                    sort=sort,\n                    validate=validate,\n                )\n            )\n        if isinstance(other, DataFrame):\n            # Joining the empty DataFrames with either index or columns is\n            # fast. It gives us proper error checking for the edge cases that\n            # would otherwise require a lot more logic.\n            new_columns = (\n                pandas.DataFrame(columns=self.columns)\n                .join(\n                    pandas.DataFrame(columns=other.columns),\n                    lsuffix=lsuffix,\n                    rsuffix=rsuffix,\n                )\n                .columns\n            )\n            other = [other]\n        else:\n            new_columns = (\n                pandas.DataFrame(columns=self.columns)\n                .join(\n                    [pandas.DataFrame(columns=obj.columns) for obj in other],\n                    lsuffix=lsuffix,\n                    rsuffix=rsuffix,\n                )\n                .columns\n            )\n        new_frame = self.__constructor__(\n            query_compiler=self._query_compiler.concat(\n                1, [obj._query_compiler for obj in other], join=how, sort=sort\n            )\n        )\n        new_frame.columns = new_columns\n        return new_frame\n\n    def isetitem(self, loc, value) -> None:\n        return self._default_to_pandas(\n            pandas.DataFrame.isetitem,\n            loc=loc,\n            value=value,\n        )\n\n    def le(\n        self, other, axis=\"columns\", level=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get less than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).\n        \"\"\"\n        return self._binary_op(\n            \"le\", other, axis=axis, level=level, broadcast=isinstance(other, Series)\n        )\n\n    def lt(\n        self, other, axis=\"columns\", level=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get less than comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).\n        \"\"\"\n        return self._binary_op(\n            \"lt\", other, axis=axis, level=level, broadcast=isinstance(other, Series)\n        )\n\n    def melt(\n        self,\n        id_vars=None,\n        value_vars=None,\n        var_name=None,\n        value_name=\"value\",\n        col_level=None,\n        ignore_index=True,\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Unpivot a ``DataFrame`` from wide to long format, optionally leaving identifiers set.\n        \"\"\"\n        if id_vars is None:\n            id_vars = []\n        if not is_list_like(id_vars):\n            id_vars = [id_vars]\n        if value_vars is None:\n            value_vars = self.columns.drop(id_vars)\n        if var_name is None:\n            columns_name = self._query_compiler.get_index_name(axis=1)\n            var_name = columns_name if columns_name is not None else \"variable\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.melt(\n                id_vars=id_vars,\n                value_vars=value_vars,\n                var_name=var_name,\n                value_name=value_name,\n                col_level=col_level,\n                ignore_index=ignore_index,\n            )\n        )\n\n    def merge(\n        self,\n        right,\n        how=\"inner\",\n        on=None,\n        left_on=None,\n        right_on=None,\n        left_index=False,\n        right_index=False,\n        sort=False,\n        suffixes=(\"_x\", \"_y\"),\n        copy=None,\n        indicator=False,\n        validate=None,\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Merge ``DataFrame`` or named ``Series`` objects with a database-style join.\n        \"\"\"\n        if copy is None:\n            copy = True\n        if isinstance(right, Series):\n            if right.name is None:\n                raise ValueError(\"Cannot merge a Series without a name\")\n            else:\n                right = right.to_frame()\n        if not isinstance(right, DataFrame):\n            raise TypeError(\n                f\"Can only merge Series or DataFrame objects, a {type(right)} was passed\"\n            )\n\n        # If we are joining on the index and we are using\n        # default parameters we can map this to a join\n        if left_index and right_index and not indicator:\n            return self.join(\n                right, how=how, lsuffix=suffixes[0], rsuffix=suffixes[1], sort=sort\n            )\n\n        return self.__constructor__(\n            query_compiler=self._query_compiler.merge(\n                right._query_compiler,\n                how=how,\n                on=on,\n                left_on=left_on,\n                right_on=right_on,\n                left_index=left_index,\n                right_index=right_index,\n                sort=sort,\n                suffixes=suffixes,\n                copy=copy,\n                indicator=indicator,\n                validate=validate,\n            )\n        )\n\n    def mod(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `mod`).\n        \"\"\"\n        return self._binary_op(\n            \"mod\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def mul(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).\n        \"\"\"\n        return self._binary_op(\n            \"mul\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    multiply = mul\n\n    def rmul(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).\n        \"\"\"\n        return self._binary_op(\n            \"rmul\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def ne(\n        self, other, axis=\"columns\", level=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get not equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ne`).\n        \"\"\"\n        return self._binary_op(\n            \"ne\", other, axis=axis, level=level, broadcast=isinstance(other, Series)\n        )\n\n    def nlargest(self, n, columns, keep=\"first\") -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the first `n` rows ordered by `columns` in descending order.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.nlargest(n, columns, keep)\n        )\n\n    def nsmallest(\n        self, n, columns, keep=\"first\"\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the first `n` rows ordered by `columns` in ascending order.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.nsmallest(\n                n=n, columns=columns, keep=keep\n            )\n        )\n\n    def unstack(\n        self, level=-1, fill_value=None, sort=True\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Pivot a level of the (necessarily hierarchical) index labels.\n        \"\"\"\n        if not sort:\n            # TODO: it should be easy to add support for sort == False\n            return self._default_to_pandas(\n                pandas.DataFrame.unstack, level=level, fill_value=fill_value, sort=sort\n            )\n\n        # This ensures that non-pandas MultiIndex objects are caught.\n        is_multiindex = len(self.index.names) > 1\n        if not is_multiindex or (\n            is_multiindex and is_list_like(level) and len(level) == self.index.nlevels\n        ):\n            return self._reduce_dimension(\n                query_compiler=self._query_compiler.unstack(level, fill_value)\n            )\n        else:\n            return self.__constructor__(\n                query_compiler=self._query_compiler.unstack(level, fill_value)\n            )\n\n    def pivot(\n        self, *, columns, index=lib.no_default, values=lib.no_default\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return reshaped ``DataFrame`` organized by given index / column values.\n        \"\"\"\n        if index is lib.no_default:\n            index = None\n        if values is lib.no_default:\n            values = None\n\n        # if values is not specified, it should be the remaining columns not in\n        # index or columns\n        if values is None:\n            values = list(self.columns)\n            if index is not None:\n                values = [v for v in values if v not in index]\n            if columns is not None:\n                values = [v for v in values if v not in columns]\n\n        return self.__constructor__(\n            query_compiler=self._query_compiler.pivot(\n                index=index, columns=columns, values=values\n            )\n        )\n\n    def pivot_table(\n        self,\n        values=None,\n        index=None,\n        columns=None,\n        aggfunc=\"mean\",\n        fill_value=None,\n        margins=False,\n        dropna=True,\n        margins_name=\"All\",\n        observed=lib.no_default,\n        sort=True,\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Create a spreadsheet-style pivot table as a ``DataFrame``.\n        \"\"\"\n        # Convert callable to a string aggregation name if possible\n        if hashable(aggfunc):\n            aggfunc = get_cython_func(aggfunc) or aggfunc\n\n        result = self.__constructor__(\n            query_compiler=self._query_compiler.pivot_table(\n                index=index,\n                values=values,\n                columns=columns,\n                aggfunc=aggfunc,\n                fill_value=fill_value,\n                margins=margins,\n                dropna=dropna,\n                margins_name=margins_name,\n                observed=observed,\n                sort=sort,\n            )\n        )\n        return result\n\n    @property\n    def plot(\n        self,\n        x=None,\n        y=None,\n        kind=\"line\",\n        ax=None,\n        subplots=False,\n        sharex=None,\n        sharey=False,\n        layout=None,\n        figsize=None,\n        use_index=True,\n        title=None,\n        grid=None,\n        legend=True,\n        style=None,\n        logx=False,\n        logy=False,\n        loglog=False,\n        xticks=None,\n        yticks=None,\n        xlim=None,\n        ylim=None,\n        rot=None,\n        fontsize=None,\n        colormap=None,\n        table=False,\n        yerr=None,\n        xerr=None,\n        secondary_y=False,\n        sort_columns=False,\n        **kwargs,\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Make plots of ``DataFrame``.\n        \"\"\"\n        return self._to_pandas().plot\n\n    def pow(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `pow`).\n        \"\"\"\n        if isinstance(other, Series):\n            return self._default_to_pandas(\n                \"pow\", other, axis=axis, level=level, fill_value=fill_value\n            )\n        return self._binary_op(\n            \"pow\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def prod(\n        self,\n        axis=0,\n        skipna=True,\n        numeric_only=False,\n        min_count=0,\n        **kwargs,\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the product of the values over the requested axis.\n        \"\"\"\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        axis = self._get_axis_number(axis)\n\n        axis_to_apply = self.columns if axis else self.index\n        if (\n            skipna is not False\n            and numeric_only is False\n            and min_count > len(axis_to_apply)\n            # This fast path is only suitable for the default backend\n            and self._query_compiler.get_pandas_backend() is None\n        ):\n            new_index = self.columns if not axis else self.index\n            # >>> pd.DataFrame([1,2,3,4], dtype=\"int64[pyarrow]\").prod(min_count=10)\n            # 0    <NA>\n            # dtype: int64[pyarrow]\n            return Series(\n                [np.nan] * len(new_index),\n                index=new_index,\n                dtype=pandas.api.types.pandas_dtype(\"float64\"),\n            )\n\n        data = self._validate_dtypes_prod_mean(axis, numeric_only, ignore_axis=True)\n        if min_count > 1:\n            return data._reduce_dimension(\n                data._query_compiler.prod_min_count(\n                    axis=axis,\n                    skipna=skipna,\n                    numeric_only=numeric_only,\n                    min_count=min_count,\n                    **kwargs,\n                )\n            )\n        return data._reduce_dimension(\n            data._query_compiler.prod(\n                axis=axis,\n                skipna=skipna,\n                numeric_only=numeric_only,\n                min_count=min_count,\n                **kwargs,\n            )\n        )\n\n    product = prod\n\n    def quantile(\n        self,\n        q=0.5,\n        axis=0,\n        numeric_only=False,\n        interpolation=\"linear\",\n        method=\"single\",\n    ) -> Union[DataFrame, Series]:\n        return super(DataFrame, self).quantile(\n            q=q,\n            axis=axis,\n            numeric_only=numeric_only,\n            interpolation=interpolation,\n            method=method,\n        )\n\n    # methods and fields we need to use pandas.DataFrame.query\n    _AXIS_ORDERS = [\"index\", \"columns\"]\n    _get_index_resolvers = pandas.DataFrame._get_index_resolvers\n\n    def _get_axis_resolvers(self, axis: str) -> dict:  # noqa: GL08\n        # forked from pandas because we only want to update the index if there's more\n        # than one level of the index.\n        # index or columns\n        axis_index = getattr(self, axis)\n        d = {}\n        prefix = axis[0]\n\n        for i, name in enumerate(axis_index.names):\n            if name is not None:\n                key = level = name\n            else:\n                # prefix with 'i' or 'c' depending on the input axis\n                # e.g., you must do ilevel_0 for the 0th level of an unnamed\n                # multiiindex\n                key = f\"{prefix}level_{i}\"\n                level = i\n\n            level_values = axis_index.get_level_values(level)\n            s = level_values.to_series()\n            if axis_index.nlevels > 1:\n                s.index = axis_index\n            d[key] = s\n\n        # put the index/columns itself in the dict\n        if axis_index.nlevels > 2:\n            dindex = axis_index\n        else:\n            dindex = axis_index.to_series()\n\n        d[axis] = dindex\n        return d\n\n    def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:  # noqa: RT01\n        \"\"\"\n        Return the special character free column resolvers of a dataframe.\n\n        Column names with special characters are 'cleaned up' so that they can\n        be referred to by backtick quoting.\n        Used in `DataFrame.eval`.\n\n        Notes\n        -----\n        Copied from pandas.\n        \"\"\"\n        from modin.core.computation.parsing import clean_column_name\n\n        return {\n            clean_column_name(k): v for k, v in self.items() if not isinstance(k, int)\n        }\n\n    def query(\n        self, expr, inplace=False, **kwargs\n    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Query the columns of a ``DataFrame`` with a boolean expression.\n        \"\"\"\n        self._update_var_dicts_in_kwargs(expr, kwargs)\n        self._validate_eval_query(expr, **kwargs)\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        if not isinstance(expr, str):\n            msg = f\"expr must be a string to be evaluated, {type(expr)} given\"\n            raise ValueError(msg)\n        # HACK: this condition kind of breaks the idea of backend agnostic API as all queries\n        # _should_ work fine for all of the engines using `pandas.DataFrame.query(...)` approach.\n        # However, at this point we know that we can execute simple queries way more efficiently\n        # using the QC's API directly in case of pandas backend. Ideally, we have to make it work\n        # with the 'pandas.query' approach the same as good the direct QC call is. But investigating\n        # and fixing the root cause of the perf difference appears to be much more complicated\n        # than putting this hack here. Hopefully, we'll get rid of it soon:\n        # https://github.com/modin-project/modin/issues/6499\n        try:\n            new_query_compiler = self._query_compiler.rowwise_query(expr, **kwargs)\n        except NotImplementedError:\n            # a non row-wise query was passed, falling back to the\n            # implementation forked from pandas.DataFrame.query. This\n            # implementation will effectively evaluate the condition at the\n            # modin.pandas API level, so that e.g. we interpret\n            # df.query(\"col > 0\") as df.loc[df.col > 0]\n            kwargs[\"target\"] = None\n            res = self.eval(expr, **kwargs)\n\n            try:\n                result = self.loc[res]\n            except ValueError:\n                # when res is multi-dimensional loc raises, but this is\n                # sometimes a valid query.\n                result = self[res]\n\n            new_query_compiler = result._query_compiler\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def rename(\n        self,\n        mapper=None,\n        index=None,\n        columns=None,\n        axis=None,\n        copy=None,\n        inplace=False,\n        level=None,\n        errors=\"ignore\",\n    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Alter axes labels.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        if mapper is None and index is None and columns is None:\n            raise TypeError(\"must pass an index to rename\")\n        # We have to do this with the args because of how rename handles kwargs. It\n        # doesn't ignore None values passed in, so we have to filter them ourselves.\n        args = locals()\n        kwargs = {k: v for k, v in args.items() if v is not None and k != \"self\"}\n        # inplace should always be true because this is just a copy, and we will use the\n        # results after.\n        kwargs[\"inplace\"] = False\n        axis = self._get_axis_number(axis)\n        if index is not None or (mapper is not None and axis == 0):\n            new_index = pandas.DataFrame(index=self.index).rename(**kwargs).index\n        else:\n            new_index = None\n        if columns is not None or (mapper is not None and axis == 1):\n            new_columns = (\n                pandas.DataFrame(columns=self.columns).rename(**kwargs).columns\n            )\n        else:\n            new_columns = None\n\n        if inplace:\n            obj = self\n        else:\n            obj = self.copy()\n        if new_index is not None:\n            obj.index = new_index\n        if new_columns is not None:\n            obj.columns = new_columns\n\n        if not inplace:\n            return obj\n\n    def reindex(\n        self,\n        labels=None,\n        *,\n        index=None,\n        columns=None,\n        axis=None,\n        method=None,\n        copy=None,\n        level=None,\n        fill_value=np.nan,\n        limit=None,\n        tolerance=None,\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        axis = self._get_axis_number(axis)\n        if axis == 0 and labels is not None:\n            index = labels\n        elif labels is not None:\n            columns = labels\n        return super(DataFrame, self).reindex(\n            index=index,\n            columns=columns,\n            method=method,\n            copy=copy,\n            level=level,\n            fill_value=fill_value,\n            limit=limit,\n            tolerance=tolerance,\n        )\n\n    def replace(\n        self,\n        to_replace=None,\n        value=lib.no_default,\n        *,\n        inplace: bool = False,\n        limit=None,\n        regex: bool = False,\n        method: str | lib.NoDefault = lib.no_default,\n    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Replace values given in `to_replace` with `value`.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        new_query_compiler = self._query_compiler.replace(\n            to_replace=to_replace,\n            value=value,\n            inplace=False,\n            limit=limit,\n            regex=regex,\n            method=method,\n        )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def rfloordiv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `rfloordiv`).\n        \"\"\"\n        return self._binary_op(\n            \"rfloordiv\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def radd(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `radd`).\n        \"\"\"\n        return self._binary_op(\n            \"radd\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def rmod(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `rmod`).\n        \"\"\"\n        return self._binary_op(\n            \"rmod\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def rpow(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `rpow`).\n        \"\"\"\n        if isinstance(other, Series):\n            return self._default_to_pandas(\n                \"rpow\", other, axis=axis, level=level, fill_value=fill_value\n            )\n        return self._binary_op(\n            \"rpow\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def rsub(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `rsub`).\n        \"\"\"\n        return self._binary_op(\n            \"rsub\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    def rtruediv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `rtruediv`).\n        \"\"\"\n        return self._binary_op(\n            \"rtruediv\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    rdiv = rtruediv\n\n    def select_dtypes(\n        self, include=None, exclude=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return a subset of the ``DataFrame``'s columns based on the column dtypes.\n        \"\"\"\n        # Validates arguments for whether both include and exclude are None or\n        # if they are disjoint. Also invalidates string dtypes.\n        pandas.DataFrame().select_dtypes(include, exclude)\n\n        if include and not is_list_like(include):\n            include = [include]\n        elif include is None:\n            include = []\n        if exclude and not is_list_like(exclude):\n            exclude = [exclude]\n        elif exclude is None:\n            exclude = []\n\n        sel = tuple(map(set, (include, exclude)))\n        include, exclude = map(lambda x: set(map(infer_dtype_from_object, x)), sel)\n        include_these = pandas.Series(not bool(include), index=self.columns)\n        exclude_these = pandas.Series(not bool(exclude), index=self.columns)\n\n        def is_dtype_instance_mapper(column, dtype):\n            return column, functools.partial(issubclass, dtype.type)\n\n        for column, f in itertools.starmap(\n            is_dtype_instance_mapper, self.dtypes.items()\n        ):\n            if include:  # checks for the case of empty include or exclude\n                include_these[column] = any(map(f, include))\n            if exclude:\n                exclude_these[column] = not any(map(f, exclude))\n\n        dtype_indexer = include_these & exclude_these\n        indicate = [\n            i for i in range(len(dtype_indexer.values)) if not dtype_indexer.values[i]\n        ]\n        return self.drop(columns=self.columns[indicate], inplace=False)\n\n    def set_index(\n        self, keys, *, drop=True, append=False, inplace=False, verify_integrity=False\n    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Set the ``DataFrame`` index using existing columns.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        if not isinstance(keys, list):\n            keys = [keys]\n\n        if any(\n            isinstance(col, (pandas.Index, Series, np.ndarray, list, Iterator))\n            for col in keys\n        ):\n            if inplace:\n                frame = self\n            else:\n                frame = self.copy()\n            if drop:\n                keys = [k if is_list_like(k) else frame.pop(k) for k in keys]\n            keys = try_cast_to_pandas(keys)\n            # These are single-threaded objects, so we might as well let pandas do the\n            # calculation so that it matches.\n            frame.index = (\n                pandas.DataFrame(index=self.index)\n                .set_index(keys, append=append, verify_integrity=verify_integrity)\n                .index\n            )\n            if not inplace:\n                return frame\n            else:\n                return\n\n        missing = []\n        for col in keys:\n            # everything else gets tried as a key;\n            # see https://github.com/pandas-dev/pandas/issues/24969\n            try:\n                found = col in self.columns\n            except TypeError as err:\n                raise TypeError(\n                    'The parameter \"keys\" may be a column key, one-dimensional '\n                    + \"array, or a list containing only valid column keys and \"\n                    + f\"one-dimensional arrays. Received column of type {type(col)}\"\n                ) from err\n            else:\n                if not found:\n                    missing.append(col)\n        # If the missing column is a \"primitive\", return the errors.\n        # Otherwise we let the query compiler figure out what to do with\n        # the keys\n        if missing and not hasattr(missing[0], \"__dict__\"):\n            # The keys are a primitive type\n            raise KeyError(f\"None of {missing} are in the columns\")\n\n        new_query_compiler = self._query_compiler.set_index_from_columns(\n            keys, drop=drop, append=append\n        )\n\n        if verify_integrity and not new_query_compiler.index.is_unique:\n            duplicates = new_query_compiler.index[\n                new_query_compiler.index.duplicated()\n            ].unique()\n            raise ValueError(f\"Index has duplicate keys: {duplicates}\")\n\n        return self._create_or_update_from_compiler(new_query_compiler, inplace=inplace)\n\n    sparse = CachedAccessor(\"sparse\", SparseFrameAccessor)\n\n    def squeeze(\n        self, axis=None\n    ) -> Union[DataFrame, Series, Scalar]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Squeeze 1 dimensional axis objects into scalars.\n        \"\"\"\n        axis = self._get_axis_number(axis) if axis is not None else None\n        if axis is None and (\n            self._query_compiler.get_axis_len(1) == 1 or len(self) == 1\n        ):\n            return Series(query_compiler=self._query_compiler).squeeze()\n        if axis == 1 and self._query_compiler.get_axis_len(1) == 1:\n            self._query_compiler._shape_hint = \"column\"\n            return Series(query_compiler=self._query_compiler)\n        if axis == 0 and len(self) == 1:\n            qc = self.T._query_compiler\n            qc._shape_hint = \"column\"\n            return Series(query_compiler=qc)\n        else:\n            return self.copy()\n\n    def stack(\n        self, level=-1, dropna=lib.no_default, sort=lib.no_default, future_stack=False\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Stack the prescribed level(s) from columns to index.\n        \"\"\"\n        if future_stack:\n            return self._default_to_pandas(\n                pandas.DataFrame.stack,\n                level=level,\n                dropna=dropna,\n                sort=sort,\n                future_stack=future_stack,\n            )\n\n        # FutureWarnings only needed if future_stack == True\n        if dropna is lib.no_default:\n            dropna = True\n        if sort is lib.no_default:\n            sort = True\n\n        # This ensures that non-pandas MultiIndex objects are caught.\n        is_multiindex = len(self.columns.names) > 1\n        if not is_multiindex or (\n            is_multiindex and is_list_like(level) and len(level) == self.columns.nlevels\n        ):\n            return self._reduce_dimension(\n                query_compiler=self._query_compiler.stack(level, dropna, sort)\n            )\n        else:\n            return self.__constructor__(\n                query_compiler=self._query_compiler.stack(level, dropna, sort)\n            )\n\n    def sub(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `sub`).\n        \"\"\"\n        return self._binary_op(\n            \"sub\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    subtract = sub\n\n    def sum(\n        self,\n        axis=0,\n        skipna=True,\n        numeric_only=False,\n        min_count=0,\n        **kwargs,\n    ) -> Series:  # noqa: PR01, RT01, D200\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        \"\"\"\n        Return the sum of the values over the requested axis.\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        axis_to_apply = self.columns if axis else self.index\n        if (\n            skipna is not False\n            and numeric_only is False\n            and min_count > len(axis_to_apply)\n            # This fast path is only suitable for the default backend\n            and self._query_compiler.get_pandas_backend() is None\n        ):\n            new_index = self.columns if not axis else self.index\n            return Series(\n                [np.nan] * len(new_index),\n                index=new_index,\n                dtype=pandas.api.types.pandas_dtype(\"float64\"),\n            )\n\n        # We cannot add datetime types, so if we are summing a column with\n        # dtype datetime64 and cannot ignore non-numeric types, we must throw a\n        # TypeError.\n        if numeric_only is False and any(\n            dtype == pandas.api.types.pandas_dtype(\"datetime64[ns]\")\n            for dtype in self.dtypes\n        ):\n            raise TypeError(\n                \"'DatetimeArray' with dtype datetime64[ns] does not support reduction 'sum'\"\n            )\n\n        data = self._get_numeric_data(axis) if numeric_only else self\n\n        if min_count > 1:\n            return data._reduce_dimension(\n                data._query_compiler.sum_min_count(\n                    axis=axis,\n                    skipna=skipna,\n                    numeric_only=numeric_only,\n                    min_count=min_count,\n                    **kwargs,\n                )\n            )\n        return data._reduce_dimension(\n            data._query_compiler.sum(\n                axis=axis,\n                skipna=skipna,\n                numeric_only=numeric_only,\n                min_count=min_count,\n                **kwargs,\n            )\n        )\n\n    @expanduser_path_arg(\"path\")\n    def to_feather(\n        self, path, **kwargs\n    ) -> None:  # pragma: no cover # noqa: PR01, RT01, D200\n        \"\"\"\n        Write a ``DataFrame`` to the binary Feather format.\n        \"\"\"\n        return self._default_to_pandas(pandas.DataFrame.to_feather, path, **kwargs)\n\n    def to_gbq(\n        self,\n        destination_table,\n        project_id=None,\n        chunksize=None,\n        reauth=False,\n        if_exists=\"fail\",\n        auth_local_webserver=True,\n        table_schema=None,\n        location=None,\n        progress_bar=True,\n        credentials=None,\n    ) -> None:  # pragma: no cover # noqa: PR01, RT01, D200\n        \"\"\"\n        Write a ``DataFrame`` to a Google BigQuery table.\n        \"\"\"\n        return self._default_to_pandas(\n            pandas.DataFrame.to_gbq,\n            destination_table,\n            project_id=project_id,\n            chunksize=chunksize,\n            reauth=reauth,\n            if_exists=if_exists,\n            auth_local_webserver=auth_local_webserver,\n            table_schema=table_schema,\n            location=location,\n            progress_bar=progress_bar,\n            credentials=credentials,\n        )\n\n    @expanduser_path_arg(\"path\")\n    def to_orc(\n        self, path=None, *, engine=\"pyarrow\", index=None, engine_kwargs=None\n    ) -> Union[bytes, None]:\n        return self._default_to_pandas(\n            pandas.DataFrame.to_orc,\n            path=path,\n            engine=engine,\n            index=index,\n            engine_kwargs=engine_kwargs,\n        )\n\n    @expanduser_path_arg(\"buf\")\n    def to_html(\n        self,\n        buf=None,\n        columns=None,\n        col_space=None,\n        header=True,\n        index=True,\n        na_rep=\"NaN\",\n        formatters=None,\n        float_format=None,\n        sparsify=None,\n        index_names=True,\n        justify=None,\n        max_rows=None,\n        max_cols=None,\n        show_dimensions=False,\n        decimal=\".\",\n        bold_rows=True,\n        classes=None,\n        escape=True,\n        notebook=False,\n        border=None,\n        table_id=None,\n        render_links=False,\n        encoding=None,\n    ) -> Union[str, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Render a ``DataFrame`` as an HTML table.\n        \"\"\"\n        return self._default_to_pandas(\n            pandas.DataFrame.to_html,\n            buf=buf,\n            columns=columns,\n            col_space=col_space,\n            header=header,\n            index=index,\n            na_rep=na_rep,\n            formatters=formatters,\n            float_format=float_format,\n            sparsify=sparsify,\n            index_names=index_names,\n            justify=justify,\n            max_rows=max_rows,\n            max_cols=max_cols,\n            show_dimensions=show_dimensions,\n            decimal=decimal,\n            bold_rows=bold_rows,\n            classes=classes,\n            escape=escape,\n            notebook=notebook,\n            border=border,\n            table_id=table_id,\n            render_links=render_links,\n            encoding=None,\n        )\n\n    @expanduser_path_arg(\"path\")\n    def to_parquet(\n        self,\n        path=None,\n        engine=\"auto\",\n        compression=\"snappy\",\n        index=None,\n        partition_cols=None,\n        storage_options: StorageOptions = None,\n        **kwargs,\n    ) -> Union[bytes, None]:\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        return FactoryDispatcher.to_parquet(\n            self._query_compiler,\n            path=path,\n            engine=engine,\n            compression=compression,\n            index=index,\n            partition_cols=partition_cols,\n            storage_options=storage_options,\n            **kwargs,\n        )\n\n    def to_period(\n        self, freq=None, axis=0, copy=None\n    ) -> DataFrame:  # pragma: no cover # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert ``DataFrame`` from ``DatetimeIndex`` to ``PeriodIndex``.\n        \"\"\"\n        return super(DataFrame, self).to_period(freq=freq, axis=axis, copy=copy)\n\n    def to_records(\n        self, index=True, column_dtypes=None, index_dtypes=None\n    ) -> np.rec.recarray:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert ``DataFrame`` to a NumPy record array.\n        \"\"\"\n        return self._default_to_pandas(\n            pandas.DataFrame.to_records,\n            index=index,\n            column_dtypes=column_dtypes,\n            index_dtypes=index_dtypes,\n        )\n\n    @expanduser_path_arg(\"path\")\n    def to_stata(\n        self,\n        path: FilePath | WriteBuffer[bytes],\n        *,\n        convert_dates: dict[Hashable, str] | None = None,\n        write_index: bool = True,\n        byteorder: str | None = None,\n        time_stamp: datetime.datetime | None = None,\n        data_label: str | None = None,\n        variable_labels: dict[Hashable, str] | None = None,\n        version: int | None = 114,\n        convert_strl: Sequence[Hashable] | None = None,\n        compression: CompressionOptions = \"infer\",\n        storage_options: StorageOptions = None,\n        value_labels: dict[Hashable, dict[float | int, str]] | None = None,\n    ) -> None:\n        return self._default_to_pandas(\n            pandas.DataFrame.to_stata,\n            path,\n            convert_dates=convert_dates,\n            write_index=write_index,\n            byteorder=byteorder,\n            time_stamp=time_stamp,\n            data_label=data_label,\n            variable_labels=variable_labels,\n            version=version,\n            convert_strl=convert_strl,\n            compression=compression,\n            storage_options=storage_options,\n            value_labels=value_labels,\n        )\n\n    @expanduser_path_arg(\"path_or_buffer\")\n    def to_xml(\n        self,\n        path_or_buffer=None,\n        index=True,\n        root_name=\"data\",\n        row_name=\"row\",\n        na_rep=None,\n        attr_cols=None,\n        elem_cols=None,\n        namespaces=None,\n        prefix=None,\n        encoding=\"utf-8\",\n        xml_declaration=True,\n        pretty_print=True,\n        parser=\"lxml\",\n        stylesheet=None,\n        compression=\"infer\",\n        storage_options=None,\n    ) -> Union[str, None]:\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        return FactoryDispatcher.to_xml(\n            self._query_compiler,\n            path_or_buffer=path_or_buffer,\n            index=index,\n            root_name=root_name,\n            row_name=row_name,\n            na_rep=na_rep,\n            attr_cols=attr_cols,\n            elem_cols=elem_cols,\n            namespaces=namespaces,\n            prefix=prefix,\n            encoding=encoding,\n            xml_declaration=xml_declaration,\n            pretty_print=pretty_print,\n            parser=parser,\n            stylesheet=stylesheet,\n            compression=compression,\n            storage_options=storage_options,\n        )\n\n    def to_timestamp(\n        self, freq=None, how=\"start\", axis=0, copy=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Cast to DatetimeIndex of timestamps, at *beginning* of period.\n        \"\"\"\n        return super(DataFrame, self).to_timestamp(\n            freq=freq, how=how, axis=axis, copy=copy\n        )\n\n    def truediv(\n        self, other, axis=\"columns\", level=None, fill_value=None\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `truediv`).\n        \"\"\"\n        return self._binary_op(\n            \"truediv\",\n            other,\n            axis=axis,\n            level=level,\n            fill_value=fill_value,\n            broadcast=isinstance(other, Series),\n        )\n\n    div = divide = truediv\n\n    def update(\n        self, other, join=\"left\", overwrite=True, filter_func=None, errors=\"ignore\"\n    ) -> None:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Modify in place using non-NA values from another ``DataFrame``.\n        \"\"\"\n        if not isinstance(other, DataFrame):\n            other = self.__constructor__(other)\n        query_compiler = self._query_compiler.df_update(\n            other._query_compiler,\n            join=join,\n            overwrite=overwrite,\n            filter_func=filter_func,\n            errors=errors,\n        )\n        self._update_inplace(new_query_compiler=query_compiler)\n\n    def where(\n        self,\n        cond,\n        other=np.nan,\n        *,\n        inplace=False,\n        axis=None,\n        level=None,\n    ) -> Union[DataFrame, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Replace values where the condition is False.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        if isinstance(other, Series) and axis is None:\n            raise ValueError(\"Must specify axis=0 or 1\")\n        if level is not None:\n            if isinstance(other, DataFrame):\n                other = other._query_compiler.to_pandas()\n            if isinstance(cond, DataFrame):\n                cond = cond._query_compiler.to_pandas()\n            new_query_compiler = self._default_to_pandas(\n                pandas.DataFrame.where,\n                cond,\n                other=other,\n                inplace=False,\n                axis=axis,\n                level=level,\n            )\n            return self._create_or_update_from_compiler(new_query_compiler, inplace)\n        cond = cond(self) if callable(cond) else cond\n\n        if not isinstance(cond, DataFrame):\n            if not hasattr(cond, \"shape\"):\n                cond = np.asanyarray(cond)\n            if cond.shape != self.shape:\n                raise ValueError(\"Array conditional must be same shape as self\")\n            cond = self.__constructor__(cond, index=self.index, columns=self.columns)\n        if isinstance(other, DataFrame):\n            other = other._query_compiler\n        else:\n            \"\"\"\n            Only infer the axis number when ``other`` will be made into a\n            series. When ``other`` is a dataframe, axis=None has a meaning\n            distinct from 0 and 1, e.g. at pandas 1.4.3:\n\n            import pandas as pd\n            df = pd.DataFrame([[1,2], [3, 4]], index=[1, 0])\n            cond = pd.DataFrame([[True,False], [False, True]], columns=[1, 0])\n            other = pd.DataFrame([[5,6], [7,8]], columns=[1, 0])\n\n            print(df.where(cond, other, axis=None))\n            0  1\n            1  1  7\n            0  6  4\n\n            print(df.where(cond, other, axis=0))\n\n            0  1\n            1  1  8\n            0  5  4\n\n            print(df.where(cond, other, axis=1))\n\n            0  1\n            1  1  5\n            0  8  4\n            \"\"\"\n            # _get_axis_number interprets lib.no_default as None, but where doesn't\n            # accept lib.no_default.\n            if axis == lib.no_default:\n                raise ValueError(\n                    \"No axis named NoDefault.no_default for object type DataFrame\"\n                )\n            axis = self._get_axis_number(axis)\n            if isinstance(other, Series):\n                other = other.reindex(\n                    self.index if axis == 0 else self.columns\n                )._query_compiler\n                if other._shape_hint is None:\n                    # To make the query compiler recognizable as a Series at lower levels\n                    other._shape_hint = \"column\"\n            elif is_list_like(other):\n                index = self.index if axis == 0 else self.columns\n                other = pandas.Series(other, index=index)\n        query_compiler = self._query_compiler.where(\n            cond._query_compiler, other, axis=axis, level=level\n        )\n        return self._create_or_update_from_compiler(query_compiler, inplace)\n\n    def _getitem_column(self, key) -> Series:\n        \"\"\"\n        Get column specified by `key`.\n\n        Parameters\n        ----------\n        key : hashable\n            Key that points to column to retrieve.\n\n        Returns\n        -------\n        Series\n            Selected column.\n        \"\"\"\n        if key not in self.keys():\n            raise KeyError(\"{}\".format(key))\n        s = self.__constructor__(\n            query_compiler=self._query_compiler.getitem_column_array([key])\n        ).squeeze(axis=1)\n        if isinstance(s, Series):\n            s._parent = self\n            s._parent_axis = 1\n        return s\n\n    @disable_logging\n    def __getattribute__(self, item: str) -> Any:\n        \"\"\"\n        Return attribute from the `BasePandasDataset`.\n\n        Parameters\n        ----------\n        item : str\n            Item to get.\n\n        Returns\n        -------\n        Any\n        \"\"\"\n        # NOTE that to get an attribute, python calls __getattribute__() first and\n        # then falls back to __getattr__() if the former raises an AttributeError.\n\n        if item not in EXTENSION_NO_LOOKUP:\n            extensions_result = self._getattribute__from_extension_impl(\n                item, __class__._extensions\n            )\n            if extensions_result is not sentinel:\n                return extensions_result\n        return super().__getattribute__(item)\n\n    @disable_logging\n    def __getattr__(self, key) -> Any:\n        \"\"\"\n        Return item identified by `key`.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to get.\n\n        Returns\n        -------\n        Any\n\n        Notes\n        -----\n        First try to use `__getattribute__` method. If it fails\n        try to get `key` from ``DataFrame`` fields.\n        \"\"\"\n        # NOTE that to get an attribute, python calls __getattribute__() first and\n        # then falls back to __getattr__() if the former raises an AttributeError.\n        if key not in _ATTRS_NO_LOOKUP and key in self.columns:\n            return self[key]\n        raise AttributeError(f\"'DataFrame' object has no attribute '{key}'\")\n\n    def __setattr__(self, key, value) -> None:\n        \"\"\"\n        Set attribute `value` identified by `key`.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to set.\n        value : Any\n            Value to set.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # While we let users assign to a column labeled \"x\" with \"df.x\" , there\n        # are some attributes that we should assume are NOT column names and\n        # therefore should follow the default Python object assignment\n        # behavior. These are:\n        # - anything in self.__dict__. This includes any attributes that the\n        #   user has added to the dataframe with,  e.g., `df.c = 3`, and\n        #   any attribute that Modin has added to the frame, e.g.\n        #   `_query_compiler` and `_siblings`\n        # - `_query_compiler`, which Modin initializes before it appears in\n        #   __dict__\n        # - `_siblings`, which Modin initializes before it appears in __dict__\n        #   before it appears in __dict__.\n        if key in (\"_query_compiler\", \"_siblings\") or key in self.__dict__:\n            pass\n        elif self._get_extension(key, __class__._extensions) is not sentinel:\n            return self._get_extension(key, __class__._extensions).__set__(self, value)\n        # we have to check for the key in `dir(self)` first in order not to trigger columns computation\n        elif key not in dir(self) and key in self:\n            self.__setitem__(key, value)\n            # Note: return immediately so we don't keep this `key` as dataframe state.\n            # `__getattr__` will return the columns not present in `dir(self)`, so we do not need\n            # to manually track this state in the `dir`.\n            return\n        elif is_list_like(value) and key not in [\"index\", \"columns\"]:\n            warnings.warn(\n                SET_DATAFRAME_ATTRIBUTE_WARNING,\n                UserWarning,\n            )\n        super().__setattr__(key, value)\n\n    def __setitem__(self, key, value) -> None:\n        \"\"\"\n        Set attribute `value` identified by `key`.\n\n        Parameters\n        ----------\n        key : Any\n            Key to set.\n        value : Any\n            Value to set.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        if isinstance(key, slice):\n            return self._setitem_slice(key, value)\n\n        if hashable(key) and key not in self.columns:\n            if isinstance(value, Series) and self._query_compiler.get_axis_len(1) == 0:\n                # Note: column information is lost when assigning a query compiler\n                prev_index = self.columns\n                self._query_compiler = value._query_compiler.copy()\n                # Now that the data is appended, we need to update the column name for\n                # that column to `key`, otherwise the name could be incorrect.\n                self.columns = prev_index.insert(0, key)\n                return\n            # Do new column assignment after error checks and possible value modifications\n            self.insert(\n                loc=self._query_compiler.get_axis_len(1), column=key, value=value\n            )\n            return\n\n        if not hashable(key):\n            if isinstance(key, DataFrame) or isinstance(key, np.ndarray):\n                if isinstance(key, np.ndarray):\n                    if key.shape != self.shape:\n                        raise ValueError(\"Array must be same shape as DataFrame\")\n                    key = self.__constructor__(key, columns=self.columns)\n                return self.mask(key, value, inplace=True)\n\n            if isinstance(key, (list, pandas.Index)) and all(\n                (x in self.columns for x in key)\n            ):\n                if is_list_like(value):\n                    if not (hasattr(value, \"shape\") and hasattr(value, \"ndim\")):\n                        value = np.array(value)\n                    if len(key) != value.shape[-1]:\n                        raise ValueError(\"Columns must be same length as key\")\n                if isinstance(value, type(self)):\n                    # importing here to avoid circular import\n                    from .general import concat\n\n                    if not value.columns.equals(pandas.Index(key)):\n                        # we only need to change the labels, so shallow copy here\n                        value = value.copy(deep=False)\n                        value.columns = key\n\n                    # here we iterate over every column in the 'self' frame, then check if it's in the 'key'\n                    # and so has to be taken from either from the 'value' or from the 'self'. After that,\n                    # we concatenate those mixed column chunks and get a dataframe with updated columns\n                    to_concat = []\n                    # columns to take for this chunk\n                    to_take = []\n                    # whether columns in this chunk are in the 'key' and has to be taken from the 'value'\n                    get_cols_from_value = False\n                    # an object to take columns from for this chunk\n                    src_obj = self\n                    for col in self.columns:\n                        if (col in key) != get_cols_from_value:\n                            if len(to_take):\n                                to_concat.append(src_obj[to_take])\n                            to_take = [col]\n                            get_cols_from_value = not get_cols_from_value\n                            src_obj = value if get_cols_from_value else self\n                        else:\n                            to_take.append(col)\n                    if len(to_take):\n                        to_concat.append(src_obj[to_take])\n\n                    new_qc = concat(to_concat, axis=1)._query_compiler\n                else:\n                    new_qc = self._query_compiler.write_items(\n                        slice(None),\n                        self.columns.get_indexer_for(key),\n                        value,\n                        need_columns_reindex=False,\n                    )\n                self._update_inplace(new_qc)\n                # self.loc[:, key] = value\n                return\n            elif (\n                isinstance(key, list)\n                and isinstance(value, type(self))\n                # Mixed case is more complicated, it's defaulting to pandas for now\n                and all((x not in self.columns for x in key))\n            ):\n                if len(key) != len(value.columns):\n                    raise ValueError(\"Columns must be same length as key\")\n\n                # Aligning the value's columns with the key\n                if not np.array_equal(value.columns, key):\n                    value = value.set_axis(key, axis=1)\n\n                new_qc = self._query_compiler.insert_item(\n                    axis=1,\n                    loc=self._query_compiler.get_axis_len(1),\n                    value=value._query_compiler,\n                    how=\"left\",\n                )\n                self._update_inplace(new_qc)\n                return\n\n            def setitem_unhashable_key(df, value):\n                df[key] = value\n                return df\n\n            return self._update_inplace(\n                self._default_to_pandas(setitem_unhashable_key, value)._query_compiler\n            )\n        if is_list_like(value):\n            if isinstance(value, (pandas.DataFrame, DataFrame)):\n                value = value[value.columns[0]].values\n            elif isinstance(value, np.ndarray):\n                assert (\n                    len(value.shape) < 3\n                ), \"Shape of new values must be compatible with manager shape\"\n                value = value.T.reshape(-1)\n                if len(self) > 0:\n                    value = value[: len(self)]\n            if not isinstance(value, (Series, Categorical, np.ndarray, list, range)):\n                value = list(value)\n\n        if isinstance(value, Series):\n            value = value._query_compiler\n        self._update_inplace(self._query_compiler.setitem(axis=0, key=key, value=value))\n\n    def __iter__(self) -> Iterable[Hashable]:\n        \"\"\"\n        Iterate over info axis.\n\n        Returns\n        -------\n        iterable\n            Iterator of the columns names.\n        \"\"\"\n        return iter(self.columns)\n\n    def __contains__(self, key) -> bool:\n        \"\"\"\n        Check if `key` in the ``DataFrame.columns``.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to check the presence in the columns.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return self.columns.__contains__(key)\n\n    def __round__(self, decimals=0) -> DataFrame:\n        \"\"\"\n        Round each value in a ``DataFrame`` to the given number of decimals.\n\n        Parameters\n        ----------\n        decimals : int, default: 0\n            Number of decimal places to round to.\n\n        Returns\n        -------\n        DataFrame\n        \"\"\"\n        return self.round(decimals)\n\n    def __delitem__(self, key) -> None:\n        \"\"\"\n        Delete item identified by `key` label.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to delete.\n        \"\"\"\n        if key not in self:\n            raise KeyError(key)\n        self._update_inplace(new_query_compiler=self._query_compiler.delitem(key))\n\n    @_doc_binary_op(\n        operation=\"integer division and modulo\",\n        bin_op=\"divmod\",\n        returns=\"tuple of two DataFrames\",\n    )\n    def __divmod__(self, right) -> tuple[DataFrame, DataFrame]:\n        return self._default_to_pandas(pandas.DataFrame.__divmod__, right)\n\n    @_doc_binary_op(\n        operation=\"integer division and modulo\",\n        bin_op=\"divmod\",\n        right=\"left\",\n        returns=\"tuple of two DataFrames\",\n    )\n    def __rdivmod__(self, left) -> tuple[DataFrame, DataFrame]:\n        return self._default_to_pandas(pandas.DataFrame.__rdivmod__, left)\n\n    __add__ = add\n    __iadd__ = add  # pragma: no cover\n    __radd__ = radd\n    __mul__ = mul\n    __imul__ = mul  # pragma: no cover\n    __rmul__ = rmul\n    __pow__ = pow\n    __ipow__ = pow  # pragma: no cover\n    __rpow__ = rpow\n    __sub__ = sub\n    __isub__ = sub  # pragma: no cover\n    __rsub__ = rsub\n    __floordiv__ = floordiv\n    __ifloordiv__ = floordiv  # pragma: no cover\n    __rfloordiv__ = rfloordiv\n    __truediv__ = truediv\n    __itruediv__ = truediv  # pragma: no cover\n    __rtruediv__ = rtruediv\n    __mod__ = mod\n    __imod__ = mod  # pragma: no cover\n    __rmod__ = rmod\n    __rdiv__ = rdiv\n\n    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):\n        \"\"\"\n        Get a Modin DataFrame that implements the dataframe exchange protocol.\n\n        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.\n\n        Parameters\n        ----------\n        nan_as_null : bool, default: False\n            A keyword intended for the consumer to tell the producer\n            to overwrite null values in the data with ``NaN`` (or ``NaT``).\n            This currently has no effect; once support for nullable extension\n            dtypes is added, this value should be propagated to columns.\n        allow_copy : bool, default: True\n            A keyword that defines whether or not the library is allowed\n            to make a copy of the data. For example, copying data would be necessary\n            if a library supports strided buffers, given that this protocol\n            specifies contiguous buffers. Currently, if the flag is set to ``False``\n            and a copy is needed, a ``RuntimeError`` will be raised.\n\n        Returns\n        -------\n        ProtocolDataframe\n            A dataframe object following the dataframe protocol specification.\n        \"\"\"\n        return self._query_compiler.to_interchange_dataframe(\n            nan_as_null=nan_as_null, allow_copy=allow_copy\n        )\n\n    def __dataframe_consortium_standard__(\n        self, *, api_version: str | None = None\n    ):  # noqa: PR01, RT01\n        \"\"\"\n        Provide entry point to the Consortium DataFrame Standard API.\n\n        This is developed and maintained outside of Modin.\n        Please report any issues to https://github.com/data-apis/dataframe-api-compat.\n        \"\"\"\n        dataframe_api_compat = import_optional_dependency(\n            \"dataframe_api_compat\", \"implementation\"\n        )\n        convert_to_standard_compliant_dataframe = (\n            dataframe_api_compat.modin_standard.convert_to_standard_compliant_dataframe\n        )\n        return convert_to_standard_compliant_dataframe(self, api_version=api_version)\n\n    @property\n    def attrs(self) -> dict:  # noqa: RT01, D200\n        \"\"\"\n        Return dictionary of global attributes of this dataset.\n        \"\"\"\n\n        def attrs(df):\n            return df.attrs\n\n        return self._default_to_pandas(attrs)\n\n    @property\n    def style(self):  # noqa: RT01, D200\n        \"\"\"\n        Return a Styler object.\n        \"\"\"\n\n        def style(df):\n            \"\"\"Define __name__ attr because properties do not have it.\"\"\"\n            return df.style\n\n        return self._default_to_pandas(style)\n\n    def reindex_like(\n        self: DataFrame,\n        other,\n        method=None,\n        copy: Optional[bool] = None,\n        limit=None,\n        tolerance=None,\n    ) -> DataFrame:\n        if copy is None:\n            copy = True\n        # docs say \"Same as calling .reindex(index=other.index, columns=other.columns,...).\":\n        # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.DataFrame.reindex_like.html\n        return self.reindex(\n            index=other.index,\n            columns=other.columns,\n            method=method,\n            copy=copy,\n            limit=limit,\n            tolerance=tolerance,\n        )\n\n    def _create_or_update_from_compiler(\n        self, new_query_compiler, inplace=False\n    ) -> Union[DataFrame, None]:\n        \"\"\"\n        Return or update a ``DataFrame`` with given `new_query_compiler`.\n\n        Parameters\n        ----------\n        new_query_compiler : PandasQueryCompiler\n            QueryCompiler to use to manage the data.\n        inplace : bool, default: False\n            Whether or not to perform update or creation inplace.\n\n        Returns\n        -------\n        DataFrame or None\n            None if update was done, ``DataFrame`` otherwise.\n        \"\"\"\n        assert isinstance(\n            new_query_compiler, self._query_compiler.__class__.__bases__\n        ), \"Invalid Query Compiler object: {}\".format(type(new_query_compiler))\n        if not inplace:\n            return self.__constructor__(query_compiler=new_query_compiler)\n        else:\n            self._update_inplace(new_query_compiler=new_query_compiler)\n\n    def _get_numeric_data(self, axis: int) -> DataFrame:\n        \"\"\"\n        Grab only numeric data from ``DataFrame``.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to inspect on having numeric types only.\n\n        Returns\n        -------\n        DataFrame\n            ``DataFrame`` with numeric data.\n        \"\"\"\n        # Pandas ignores `numeric_only` if `axis` is 1, but we do have to drop\n        # non-numeric columns if `axis` is 0.\n        if axis != 0:\n            return self\n        return self.drop(\n            columns=[\n                i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])\n            ]\n        )\n\n    def _validate_dtypes(self, numeric_only=False) -> None:\n        \"\"\"\n        Check that all the dtypes are the same.\n\n        Parameters\n        ----------\n        numeric_only : bool, default: False\n            Whether or not to allow only numeric data.\n            If True and non-numeric data is found, exception\n            will be raised.\n        \"\"\"\n        # Series.__getitem__ treating keys as positions is deprecated. In a future version,\n        # integer keys will always be treated as labels (consistent with DataFrame behavior).\n        # To access a value by position, use `ser.iloc[pos]`\n        dtypes = self._query_compiler.get_dtypes_set()\n        dtype = next(iter(dtypes))\n        for t in dtypes:\n            if numeric_only and not is_numeric_dtype(t):\n                raise TypeError(\"{0} is not a numeric data type\".format(t))\n            elif not numeric_only and t != dtype:\n                raise TypeError(\n                    \"Cannot compare type '{0}' with type '{1}'\".format(t, dtype)\n                )\n\n    def _validate_dtypes_min_max(self, axis, numeric_only) -> DataFrame:\n        \"\"\"\n        Validate data dtype for `min` and `max` methods.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to validate over.\n        numeric_only : bool\n            Whether or not to allow only numeric data.\n            If True and non-numeric data is found, exception.\n\n        Returns\n        -------\n        DataFrame\n        \"\"\"\n        # If our DataFrame has both numeric and non-numeric dtypes then\n        # comparisons between these types do not make sense and we must raise a\n        # TypeError. We must check explicitly if\n        # numeric_only is False because if it is None, it will default to True\n        # if the operation fails with mixed dtypes.\n        if (\n            axis\n            and numeric_only is False\n            and not all([is_numeric_dtype(dtype) for dtype in self.dtypes])\n        ):\n            raise TypeError(\"Cannot compare Numeric and Non-Numeric Types\")\n\n        return self._get_numeric_data(axis) if numeric_only else self\n\n    def _validate_dtypes_prod_mean(\n        self, axis, numeric_only, ignore_axis=False\n    ) -> DataFrame:\n        \"\"\"\n        Validate data dtype for `prod` and `mean` methods.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to validate over.\n        numeric_only : bool\n            Whether or not to allow only numeric data.\n            If True and non-numeric data is found, exception\n            will be raised.\n        ignore_axis : bool, default: False\n            Whether or not to ignore `axis` parameter.\n\n        Returns\n        -------\n        DataFrame\n        \"\"\"\n        # If our DataFrame has both numeric and non-numeric dtypes then\n        # operations between these types do not make sense and we must raise a\n        # TypeError. We must check explicitly if\n        # numeric_only is False because if it is None, it will default to True\n        # if the operation fails with mixed dtypes.\n        if (\n            (axis or ignore_axis)\n            and numeric_only is False\n            and not all([is_numeric_dtype(dtype) for dtype in self.dtypes])\n        ):\n            raise TypeError(\"Cannot operate on Numeric and Non-Numeric Types\")\n\n        return self._get_numeric_data(axis) if numeric_only else self\n\n    def _to_pandas(self) -> pandas.DataFrame:\n        \"\"\"\n        Convert Modin ``DataFrame`` to pandas ``DataFrame``.\n\n        Recommended conversion method: `dataframe.modin.to_pandas()`.\n\n        Returns\n        -------\n        pandas.DataFrame\n        \"\"\"\n        return self._query_compiler.to_pandas()\n\n    def _validate_eval_query(self, expr, **kwargs) -> None:\n        \"\"\"\n        Validate the arguments of ``eval`` and ``query`` functions.\n\n        Parameters\n        ----------\n        expr : str\n            The expression to evaluate. This string cannot contain any\n            Python statements, only Python expressions.\n        **kwargs : dict\n            Optional arguments of ``eval`` and ``query`` functions.\n        \"\"\"\n        if isinstance(expr, str) and expr == \"\":\n            raise ValueError(\"expr cannot be an empty string\")\n\n        if isinstance(expr, str) and \"not\" in expr:\n            if \"parser\" in kwargs and kwargs[\"parser\"] == \"python\":\n                ErrorMessage.not_implemented(\n                    \"'Not' nodes are not implemented.\"\n                )  # pragma: no cover\n\n    def _reduce_dimension(self, query_compiler: BaseQueryCompiler) -> Series:\n        \"\"\"\n        Reduce the dimension of data from the `query_compiler`.\n\n        Parameters\n        ----------\n        query_compiler : BaseQueryCompiler\n            Query compiler to retrieve the data.\n\n        Returns\n        -------\n        Series\n        \"\"\"\n        return Series(query_compiler=query_compiler)\n\n    def _set_axis_name(self, name, axis=0, inplace=False) -> Union[DataFrame, None]:\n        \"\"\"\n        Alter the name or names of the axis.\n\n        Parameters\n        ----------\n        name : str or list of str\n            Name for the Index, or list of names for the MultiIndex.\n        axis : str or int, default: 0\n            The axis to set the label.\n            0 or 'index' for the index, 1 or 'columns' for the columns.\n        inplace : bool, default: False\n            Whether to modify `self` directly or return a copy.\n\n        Returns\n        -------\n        DataFrame or None\n        \"\"\"\n        axis = self._get_axis_number(axis)\n        renamed = self if inplace else self.copy()\n        if axis == 0:\n            renamed.index = renamed.index.set_names(name)\n        else:\n            renamed.columns = renamed.columns.set_names(name)\n        if not inplace:\n            return renamed\n\n    def _to_datetime(self, **kwargs) -> Series:\n        \"\"\"\n        Convert `self` to datetime.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Optional arguments to use during query compiler's\n            `to_datetime` invocation.\n\n        Returns\n        -------\n        Series of datetime64 dtype\n        \"\"\"\n        return self._reduce_dimension(\n            query_compiler=self._query_compiler.to_datetime(**kwargs)\n        )\n\n    def _getitem(self, key) -> Union[DataFrame, Series]:\n        \"\"\"\n        Get the data specified by `key` for this ``DataFrame``.\n\n        Parameters\n        ----------\n        key : callable, Series, DataFrame, np.ndarray, pandas.Index or list\n            Data identifiers to retrieve.\n\n        Returns\n        -------\n        Series or DataFrame\n            Retrieved data.\n        \"\"\"\n        key = apply_if_callable(key, self)\n        # Shortcut if key is an actual column\n        is_mi_columns = self._query_compiler.has_multiindex(axis=1)\n        try:\n            if key in self.columns and not is_mi_columns:\n                return self._getitem_column(key)\n        except (KeyError, ValueError, TypeError):\n            pass\n        if isinstance(key, Series):\n            return self.__constructor__(\n                query_compiler=self._query_compiler.getitem_array(key._query_compiler)\n            )\n        elif isinstance(key, (np.ndarray, pandas.Index, list)):\n            return self.__constructor__(\n                query_compiler=self._query_compiler.getitem_array(key)\n            )\n        elif isinstance(key, DataFrame):\n            return self.where(key)\n        elif is_mi_columns:\n            return self._default_to_pandas(pandas.DataFrame.__getitem__, key)\n            # return self._getitem_multilevel(key)\n        else:\n            return self._getitem_column(key)\n\n    # Persistance support methods - BEGIN\n    @classmethod\n    def _inflate_light(cls, query_compiler, source_pid) -> DataFrame:\n        \"\"\"\n        Re-creates the object from previously-serialized lightweight representation.\n\n        The method is used for faster but not disk-storable persistence.\n\n        Parameters\n        ----------\n        query_compiler : BaseQueryCompiler\n            Query compiler to use for object re-creation.\n        source_pid : int\n            Determines whether a Modin or pandas object needs to be created.\n            Modin objects are created only on the main process.\n\n        Returns\n        -------\n        DataFrame\n            New ``DataFrame`` based on the `query_compiler`.\n        \"\"\"\n        if os.getpid() != source_pid:\n            return query_compiler.to_pandas()\n        # The current logic does not involve creating Modin objects\n        # and manipulation with them in worker processes\n        return cls(query_compiler=query_compiler)\n\n    @classmethod\n    def _inflate_full(cls, pandas_df, source_pid) -> DataFrame:\n        \"\"\"\n        Re-creates the object from previously-serialized disk-storable representation.\n\n        Parameters\n        ----------\n        pandas_df : pandas.DataFrame\n            Data to use for object re-creation.\n        source_pid : int\n            Determines whether a Modin or pandas object needs to be created.\n            Modin objects are created only on the main process.\n\n        Returns\n        -------\n        DataFrame\n            New ``DataFrame`` based on the `pandas_df`.\n        \"\"\"\n        if os.getpid() != source_pid:\n            return pandas_df\n        # The current logic does not involve creating Modin objects\n        # and manipulation with them in worker processes\n        return cls(data=from_pandas(pandas_df))\n\n    def __reduce__(self):\n        self._query_compiler.finalize()\n        pid = os.getpid()\n        if (\n            PersistentPickle.get()\n            or not self._query_compiler.support_materialization_in_worker_process()\n        ):\n            return self._inflate_full, (self._to_pandas(), pid)\n        return self._inflate_light, (self._query_compiler, pid)\n\n    # Persistance support methods - END\n\n    @doc(SET_BACKEND_DOC, class_name=__qualname__)\n    def set_backend(\n        self,\n        backend: str,\n        inplace: bool = False,\n        *,\n        switch_operation: Optional[str] = None,\n    ) -> Optional[Self]:\n        return super().set_backend(\n            backend=backend, inplace=inplace, switch_operation=switch_operation\n        )\n\n    move_to = set_backend\n\n    @doc(GET_BACKEND_DOC, class_name=__qualname__)\n    @disable_logging\n    def get_backend(self) -> str:\n        return super().get_backend()\n\n    @disable_logging\n    def __delattr__(self, name: str) -> None:\n        \"\"\"\n        Delete attribute `name`.\n\n        Parameters\n        ----------\n        name : str\n            Name of the attribute to delete.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        extension = self._get_extension(name, __class__._extensions)\n        if extension is not sentinel:\n            return extension.__delete__(self)\n        return super().__delattr__(name)\n\n    @disable_logging\n    @_inherit_docstrings(BasePandasDataset._copy_into)\n    def _copy_into(self, other: DataFrame) -> None:\n        other._query_compiler = self._query_compiler\n        other._siblings = self._siblings\n        return None\n"
  },
  {
    "path": "modin/pandas/errors/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\n\"\"\"The module is needed to allow the following import `import modin.pandas.errors`.\"\"\"\n\nfrom pandas.errors import *  # noqa: F403, F401\nfrom pandas.errors import __all__  # noqa: F401\n"
  },
  {
    "path": "modin/pandas/general.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement pandas general API.\"\"\"\n\nfrom __future__ import annotations\n\nimport warnings\nfrom typing import Hashable, Iterable, Mapping, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas._libs.lib import NoDefault, no_default\nfrom pandas._typing import ArrayLike, DtypeBackend, Scalar, npt\nfrom pandas.core.dtypes.common import is_list_like\n\nfrom modin.core.storage_formats import BaseQueryCompiler\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    wrap_free_function_in_argument_caster,\n)\nfrom modin.logging import enable_logging\nfrom modin.pandas.io import to_pandas\nfrom modin.utils import _inherit_docstrings, _maybe_warn_on_default\n\nfrom .base import BasePandasDataset\nfrom .dataframe import DataFrame\nfrom .series import Series\n\n\n@enable_logging\ndef _isna(\n    obj,\n) -> bool | npt.NDArray[np.bool_] | Series | DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Detect missing values for an array-like object.\n    \"\"\"\n    if isinstance(obj, BasePandasDataset):\n        return obj.isna()\n    else:\n        return pandas.isna(obj)\n\n\n_inherit_isna_docstring = _inherit_docstrings(pandas.isnull, apilink=\"pandas.isna\")\n\nisna = _inherit_isna_docstring(wrap_free_function_in_argument_caster(\"isna\")(_isna))\n\nisnull = _inherit_isna_docstring(wrap_free_function_in_argument_caster(\"isnull\")(_isna))\n\n\n@enable_logging\ndef _notna(\n    obj,\n) -> bool | npt.NDArray[np.bool_] | Series | DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Detect non-missing values for an array-like object.\n    \"\"\"\n    if isinstance(obj, BasePandasDataset):\n        return obj.notna()\n    else:\n        return pandas.notna(obj)\n\n\n_inherit_notna_docstring = _inherit_docstrings(pandas.notna, apilink=\"pandas.notna\")\n\nnotnull = _inherit_notna_docstring(\n    wrap_free_function_in_argument_caster(\"notnull\")(_notna)\n)\n\nnotna = _inherit_notna_docstring(wrap_free_function_in_argument_caster(\"notna\")(_notna))\n\n\n@_inherit_docstrings(pandas.merge, apilink=\"pandas.merge\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"merge\")\ndef merge(\n    left,\n    right,\n    how: str = \"inner\",\n    on=None,\n    left_on=None,\n    right_on=None,\n    left_index: bool = False,\n    right_index: bool = False,\n    sort: bool = False,\n    suffixes=(\"_x\", \"_y\"),\n    copy: Optional[bool] = None,\n    indicator: bool = False,\n    validate=None,\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Merge DataFrame or named Series objects with a database-style join.\n    \"\"\"\n    if isinstance(left, Series):\n        if left.name is None:\n            raise ValueError(\"Cannot merge a Series without a name\")\n        else:\n            left = left.to_frame()\n\n    if not isinstance(left, DataFrame):\n        raise TypeError(\n            f\"Can only merge Series or DataFrame objects, a {type(left)} was passed\"\n        )\n\n    return left.merge(\n        right,\n        how=how,\n        on=on,\n        left_on=left_on,\n        right_on=right_on,\n        left_index=left_index,\n        right_index=right_index,\n        sort=sort,\n        suffixes=suffixes,\n        copy=copy,\n        indicator=indicator,\n        validate=validate,\n    )\n\n\n@_inherit_docstrings(pandas.merge_ordered, apilink=\"pandas.merge_ordered\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"merge_ordered\")\ndef merge_ordered(\n    left,\n    right,\n    on=None,\n    left_on=None,\n    right_on=None,\n    left_by=None,\n    right_by=None,\n    fill_method=None,\n    suffixes=(\"_x\", \"_y\"),\n    how: str = \"outer\",\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Perform a merge for ordered data with optional filling/interpolation.\n    \"\"\"\n    for operand in (left, right):\n        if not isinstance(operand, (Series, DataFrame)):\n            raise TypeError(\n                f\"Can only merge Series or DataFrame objects, a {type(operand)} was passed\"\n            )\n\n    return DataFrame(\n        query_compiler=left._query_compiler.merge_ordered(\n            right._query_compiler,\n            on=on,\n            left_on=left_on,\n            right_on=right_on,\n            left_by=left_by,\n            right_by=right_by,\n            fill_method=fill_method,\n            suffixes=suffixes,\n            how=how,\n        )\n    )\n\n\n@_inherit_docstrings(pandas.merge_asof, apilink=\"pandas.merge_asof\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"merge_asof\")\ndef merge_asof(\n    left,\n    right,\n    on=None,\n    left_on=None,\n    right_on=None,\n    left_index: bool = False,\n    right_index: bool = False,\n    by=None,\n    left_by=None,\n    right_by=None,\n    suffixes=(\"_x\", \"_y\"),\n    tolerance=None,\n    allow_exact_matches: bool = True,\n    direction: str = \"backward\",\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Perform a merge by key distance.\n    \"\"\"\n    if not isinstance(left, DataFrame):\n        raise ValueError(\n            \"can not merge DataFrame with instance of type {}\".format(type(right))\n        )\n    left._query_compiler._maybe_warn_on_default(message=\"`merge_asof`\")\n\n    # As of Pandas 1.2 these should raise an error; before that it did\n    # something likely random:\n    if (\n        (on and (left_index or right_index))\n        or (left_on and left_index)\n        or (right_on and right_index)\n    ):\n        raise ValueError(\"Can't combine left/right_index with left/right_on or on.\")\n\n    if on is not None:\n        if left_on is not None or right_on is not None:\n            raise ValueError(\"If 'on' is set, 'left_on' and 'right_on' can't be set.\")\n        left_on = on\n        right_on = on\n\n    if by is not None:\n        if left_by is not None or right_by is not None:\n            raise ValueError(\"Can't have both 'by' and 'left_by' or 'right_by'\")\n        left_by = right_by = by\n\n    if left_on is None and not left_index:\n        raise ValueError(\"Must pass on, left_on, or left_index=True\")\n\n    if right_on is None and not right_index:\n        raise ValueError(\"Must pass on, right_on, or right_index=True\")\n\n    return DataFrame(\n        query_compiler=left._query_compiler.merge_asof(\n            right._query_compiler,\n            left_on,\n            right_on,\n            left_index,\n            right_index,\n            left_by,\n            right_by,\n            suffixes,\n            tolerance,\n            allow_exact_matches,\n            direction,\n        )\n    )\n\n\n@_inherit_docstrings(pandas.pivot_table, apilink=\"pandas.pivot_table\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"pivot_table\")\ndef pivot_table(\n    data,\n    values=None,\n    index=None,\n    columns=None,\n    aggfunc=\"mean\",\n    fill_value=None,\n    margins=False,\n    dropna=True,\n    margins_name=\"All\",\n    observed=no_default,\n    sort=True,\n) -> DataFrame:\n    if not isinstance(data, DataFrame):\n        raise ValueError(\n            \"can not create pivot table with instance of type {}\".format(type(data))\n        )\n\n    return data.pivot_table(\n        values=values,\n        index=index,\n        columns=columns,\n        aggfunc=aggfunc,\n        fill_value=fill_value,\n        margins=margins,\n        dropna=dropna,\n        margins_name=margins_name,\n        observed=observed,\n        sort=sort,\n    )\n\n\n@_inherit_docstrings(pandas.pivot, apilink=\"pandas.pivot\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"pivot\")\ndef pivot(\n    data, *, columns, index=no_default, values=no_default\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Return reshaped DataFrame organized by given index / column values.\n    \"\"\"\n    if not isinstance(data, DataFrame):\n        raise ValueError(\"can not pivot with instance of type {}\".format(type(data)))\n    return data.pivot(index=index, columns=columns, values=values)\n\n\n@_inherit_docstrings(pandas.to_numeric, apilink=\"pandas.to_numeric\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"to_numeric\")\ndef to_numeric(\n    arg,\n    errors=\"raise\",\n    downcast=None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> Scalar | np.ndarray | Series:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Convert argument to a numeric type.\n    \"\"\"\n    if not isinstance(arg, Series):\n        return pandas.to_numeric(\n            arg, errors=errors, downcast=downcast, dtype_backend=dtype_backend\n        )\n    return arg._to_numeric(\n        errors=errors, downcast=downcast, dtype_backend=dtype_backend\n    )\n\n\n@_inherit_docstrings(pandas.qcut, apilink=\"pandas.qcut\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"qcut\")\ndef qcut(\n    x, q, labels=None, retbins=False, precision=3, duplicates=\"raise\"\n):  # noqa: PR01, RT01, D200\n    \"\"\"\n    Quantile-based discretization function.\n    \"\"\"\n    kwargs = {\n        \"labels\": labels,\n        \"retbins\": retbins,\n        \"precision\": precision,\n        \"duplicates\": duplicates,\n    }\n    if not isinstance(x, Series):\n        return pandas.qcut(x, q, **kwargs)\n    return x._qcut(q, **kwargs)\n\n\n@_inherit_docstrings(pandas.cut, apilink=\"pandas.cut\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"cut\")\ndef cut(\n    x,\n    bins,\n    right: bool = True,\n    labels=None,\n    retbins: bool = False,\n    precision: int = 3,\n    include_lowest: bool = False,\n    duplicates: str = \"raise\",\n    ordered: bool = True,\n):\n    if isinstance(x, DataFrame):\n        raise ValueError(\"Input array must be 1 dimensional\")\n    if not isinstance(x, Series):\n        _maybe_warn_on_default(\n            reason=f\"pd.cut is not supported on objects of type {type(x)}\"\n        )\n        import pandas\n\n        return pandas.cut(\n            x,\n            bins,\n            right=right,\n            labels=labels,\n            retbins=retbins,\n            precision=precision,\n            include_lowest=include_lowest,\n            duplicates=duplicates,\n            ordered=ordered,\n        )\n\n    def _wrap_in_series_object(qc_result):\n        if isinstance(qc_result, type(x._query_compiler)):\n            return Series(query_compiler=qc_result)\n        if isinstance(qc_result, (tuple, list)):\n            return tuple([_wrap_in_series_object(result) for result in qc_result])\n        return qc_result\n\n    return _wrap_in_series_object(\n        x._query_compiler.cut(\n            bins,\n            right=right,\n            labels=labels,\n            retbins=retbins,\n            precision=precision,\n            include_lowest=include_lowest,\n            duplicates=duplicates,\n            ordered=ordered,\n        )\n    )\n\n\n@_inherit_docstrings(pandas.unique, apilink=\"pandas.unique\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"unique\")\ndef unique(values) -> ArrayLike:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Return unique values based on a hash table.\n    \"\"\"\n    return Series(values).unique()\n\n\n# Adding docstring since pandas docs don't have web section for this function.\n@enable_logging\n@wrap_free_function_in_argument_caster(\"value_counts\")\ndef value_counts(\n    values, sort=True, ascending=False, normalize=False, bins=None, dropna=True\n) -> Series:\n    \"\"\"\n    Compute a histogram of the counts of non-null values.\n\n    Parameters\n    ----------\n    values : ndarray (1-d)\n        Values to perform computation.\n    sort : bool, default: True\n        Sort by values.\n    ascending : bool, default: False\n        Sort in ascending order.\n    normalize : bool, default: False\n        If True then compute a relative histogram.\n    bins : integer, optional\n        Rather than count values, group them into half-open bins,\n        convenience for pd.cut, only works with numeric data.\n    dropna : bool, default: True\n        Don't include counts of NaN.\n\n    Returns\n    -------\n    Series\n    \"\"\"\n    warnings.warn(\n        \"pandas.value_counts is deprecated and will be removed in a \"\n        + \"future version. Use pd.Series(obj).value_counts() instead.\",\n        FutureWarning,\n    )\n    return Series(values).value_counts(\n        sort=sort,\n        ascending=ascending,\n        normalize=normalize,\n        bins=bins,\n        dropna=dropna,\n    )\n\n\n@_inherit_docstrings(pandas.concat, apilink=\"pandas.concat\")\n@enable_logging\n@wrap_free_function_in_argument_caster(name=\"concat\")\ndef concat(\n    objs: \"Iterable[DataFrame | Series] | Mapping[Hashable, DataFrame | Series]\",\n    *,\n    axis=0,\n    join=\"outer\",\n    ignore_index: bool = False,\n    keys=None,\n    levels=None,\n    names=None,\n    verify_integrity: bool = False,\n    sort: bool = False,\n    copy: Optional[bool] = None,\n) -> DataFrame | Series:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Concatenate Modin objects along a particular axis.\n    \"\"\"\n    if isinstance(objs, (pandas.Series, Series, DataFrame, str, pandas.DataFrame)):\n        raise TypeError(\n            \"first argument must be an iterable of pandas \"\n            + \"objects, you passed an object of type \"\n            + f'\"{type(objs).__name__}\"'\n        )\n    axis = pandas.DataFrame()._get_axis_number(axis)\n    if isinstance(objs, dict):\n        input_list_of_objs = list(objs.values())\n    else:\n        input_list_of_objs = list(objs)\n    if len(input_list_of_objs) == 0:\n        raise ValueError(\"No objects to concatenate\")\n\n    list_of_objs = [obj for obj in input_list_of_objs if obj is not None]\n\n    if len(list_of_objs) == 0:\n        raise ValueError(\"All objects passed were None\")\n    try:\n        type_check = next(\n            obj\n            for obj in list_of_objs\n            if not isinstance(obj, (pandas.Series, Series, pandas.DataFrame, DataFrame))\n        )\n    except StopIteration:\n        type_check = None\n    if type_check is not None:\n        raise ValueError(\n            'cannot concatenate object of type \"{0}\"; only '\n            + \"modin.pandas.Series \"\n            + \"and modin.pandas.DataFrame objs are \"\n            + \"valid\",\n            type(type_check),\n        )\n    all_series = all(isinstance(obj, Series) for obj in list_of_objs)\n    if all_series and axis == 0:\n        return Series(\n            query_compiler=list_of_objs[0]._query_compiler.concat(\n                axis,\n                [o._query_compiler for o in list_of_objs[1:]],\n                join=join,\n                join_axes=None,\n                ignore_index=ignore_index,\n                keys=None,\n                levels=None,\n                names=None,\n                verify_integrity=False,\n                copy=True,\n                sort=sort,\n            )\n        )\n    if join == \"outer\":\n        # Filter out empties\n        list_of_objs = [\n            obj\n            for obj in list_of_objs\n            if (\n                isinstance(obj, (Series, pandas.Series))\n                or (isinstance(obj, DataFrame) and obj._query_compiler.lazy_shape)\n                or sum(obj.shape) > 0\n            )\n        ]\n    elif join != \"inner\":\n        raise ValueError(\n            \"Only can inner (intersect) or outer (union) join the other axis\"\n        )\n    list_of_objs = [\n        (\n            obj._query_compiler\n            if isinstance(obj, DataFrame)\n            else DataFrame(obj)._query_compiler\n        )\n        for obj in list_of_objs\n    ]\n    if keys is None and isinstance(objs, dict):\n        keys = list(objs.keys())\n    if keys is not None:\n        if all_series:\n            new_idx = keys\n        else:\n            list_of_objs = [\n                list_of_objs[i] for i in range(min(len(list_of_objs), len(keys)))\n            ]\n            new_idx_labels = {\n                k: v.index if axis == 0 else v.columns\n                for k, v in zip(keys, list_of_objs)\n            }\n            tuples = [\n                (k, *o) if isinstance(o, tuple) else (k, o)\n                for k, obj in new_idx_labels.items()\n                for o in obj\n            ]\n            new_idx = pandas.MultiIndex.from_tuples(tuples)\n            if names is not None:\n                new_idx.names = names\n            else:\n                old_name = _determine_name(list_of_objs, axis)\n                if old_name is not None:\n                    new_idx.names = [None] + old_name\n    else:\n        new_idx = None\n\n    if len(list_of_objs) == 0:\n        return DataFrame(\n            index=input_list_of_objs[0].index.append(\n                [f.index for f in input_list_of_objs[1:]]\n            )\n        )\n\n    new_query_compiler = list_of_objs[0].concat(\n        axis,\n        list_of_objs[1:],\n        join=join,\n        join_axes=None,\n        ignore_index=ignore_index,\n        keys=None,\n        levels=None,\n        names=None,\n        verify_integrity=False,\n        copy=True,\n        sort=sort,\n    )\n    result_df = DataFrame(query_compiler=new_query_compiler)\n    if new_idx is not None:\n        if axis == 0:\n            result_df.index = new_idx\n        else:\n            result_df.columns = new_idx\n    return result_df\n\n\n@_inherit_docstrings(pandas.to_datetime, apilink=\"pandas.to_datetime\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"to_datetime\")\ndef to_datetime(\n    arg,\n    errors=\"raise\",\n    dayfirst=False,\n    yearfirst=False,\n    utc=False,\n    format=None,\n    exact=no_default,\n    unit=None,\n    infer_datetime_format=no_default,\n    origin=\"unix\",\n    cache=True,\n) -> Scalar | ArrayLike | Series | DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Convert argument to datetime.\n    \"\"\"\n    if not hasattr(arg, \"_to_datetime\"):\n        return pandas.to_datetime(\n            arg,\n            errors=errors,\n            dayfirst=dayfirst,\n            yearfirst=yearfirst,\n            utc=utc,\n            format=format,\n            exact=exact,\n            unit=unit,\n            infer_datetime_format=infer_datetime_format,\n            origin=origin,\n            cache=cache,\n        )\n    return arg._to_datetime(\n        errors=errors,\n        dayfirst=dayfirst,\n        yearfirst=yearfirst,\n        utc=utc,\n        format=format,\n        exact=exact,\n        unit=unit,\n        infer_datetime_format=infer_datetime_format,\n        origin=origin,\n        cache=cache,\n    )\n\n\n@_inherit_docstrings(pandas.get_dummies, apilink=\"pandas.get_dummies\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"get_dummies\")\ndef get_dummies(\n    data,\n    prefix=None,\n    prefix_sep=\"_\",\n    dummy_na=False,\n    columns=None,\n    sparse=False,\n    drop_first=False,\n    dtype=None,\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Convert categorical variable into dummy/indicator variables.\n    \"\"\"\n    if sparse:\n        raise NotImplementedError(\n            \"SparseArray is not implemented. \"\n            + \"To contribute to Modin, please visit \"\n            + \"github.com/modin-project/modin.\"\n        )\n    if not isinstance(data, DataFrame):\n        _maybe_warn_on_default(\"`get_dummies` on non-DataFrame\")\n        if isinstance(data, Series):\n            data = data._to_pandas()\n        return DataFrame(\n            pandas.get_dummies(\n                data,\n                prefix=prefix,\n                prefix_sep=prefix_sep,\n                dummy_na=dummy_na,\n                columns=columns,\n                sparse=sparse,\n                drop_first=drop_first,\n                dtype=dtype,\n            )\n        )\n    else:\n        new_manager = data._query_compiler.get_dummies(\n            columns,\n            prefix=prefix,\n            prefix_sep=prefix_sep,\n            dummy_na=dummy_na,\n            drop_first=drop_first,\n            dtype=dtype,\n        )\n        return DataFrame(query_compiler=new_manager)\n\n\n@_inherit_docstrings(pandas.melt, apilink=\"pandas.melt\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"melt\")\ndef melt(\n    frame,\n    id_vars=None,\n    value_vars=None,\n    var_name=None,\n    value_name=\"value\",\n    col_level=None,\n    ignore_index: bool = True,\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.\n    \"\"\"\n    return frame.melt(\n        id_vars=id_vars,\n        value_vars=value_vars,\n        var_name=var_name,\n        value_name=value_name,\n        col_level=col_level,\n        ignore_index=ignore_index,\n    )\n\n\n@_inherit_docstrings(pandas.crosstab, apilink=\"pandas.crosstab\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"crosstab\")\ndef crosstab(\n    index,\n    columns,\n    values=None,\n    rownames=None,\n    colnames=None,\n    aggfunc=None,\n    margins=False,\n    margins_name: str = \"All\",\n    dropna: bool = True,\n    normalize=False,\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Compute a simple cross tabulation of two (or more) factors.\n    \"\"\"\n    _maybe_warn_on_default(\"`crosstab`\")\n    pandas_crosstab = pandas.crosstab(\n        index,\n        columns,\n        values,\n        rownames,\n        colnames,\n        aggfunc,\n        margins,\n        margins_name,\n        dropna,\n        normalize,\n    )\n    return DataFrame(pandas_crosstab)\n\n\n# Adding docstring since pandas docs don't have web section for this function.\n@enable_logging\n@wrap_free_function_in_argument_caster(\"lreshape\")\ndef lreshape(data: DataFrame, groups, dropna=True) -> DataFrame:\n    \"\"\"\n    Reshape wide-format data to long. Generalized inverse of ``DataFrame.pivot``.\n\n    Accepts a dictionary, `groups`, in which each key is a new column name\n    and each value is a list of old column names that will be \"melted\" under\n    the new column name as part of the reshape.\n\n    Parameters\n    ----------\n    data : DataFrame\n        The wide-format DataFrame.\n    groups : dict\n        Dictionary in the form: `{new_name : list_of_columns}`.\n    dropna : bool, default: True\n        Whether include columns whose entries are all NaN or not.\n\n    Returns\n    -------\n    DataFrame\n        Reshaped DataFrame.\n    \"\"\"\n    if not isinstance(data, DataFrame):\n        raise ValueError(\"can not lreshape with instance of type {}\".format(type(data)))\n    data._query_compiler._maybe_warn_on_default(message=\"`lreshape`\")\n    return DataFrame(pandas.lreshape(to_pandas(data), groups, dropna=dropna))\n\n\n@_inherit_docstrings(pandas.wide_to_long, apilink=\"pandas.wide_to_long\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"wide_to_long\")\ndef wide_to_long(\n    df: DataFrame, stubnames, i, j, sep: str = \"\", suffix: str = r\"\\d+\"\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Unpivot a DataFrame from wide to long format.\n    \"\"\"\n    if not isinstance(df, DataFrame):\n        raise ValueError(\n            \"can not wide_to_long with instance of type {}\".format(type(df))\n        )\n    return DataFrame(\n        query_compiler=df._query_compiler.wide_to_long(\n            stubnames=stubnames,\n            i=i,\n            j=j,\n            sep=sep,\n            suffix=suffix,\n        )\n    )\n\n\n@wrap_free_function_in_argument_caster(\"_determine_name\")\ndef _determine_name(objs: Iterable[BaseQueryCompiler], axis: Union[int, str]):\n    \"\"\"\n    Determine names of index after concatenation along passed axis.\n\n    Parameters\n    ----------\n    objs : iterable of QueryCompilers\n        Objects to concatenate.\n    axis : int or str\n        The axis to concatenate along.\n\n    Returns\n    -------\n    list with single element\n        Computed index name, `None` if it could not be determined.\n    \"\"\"\n    axis = pandas.DataFrame()._get_axis_number(axis)\n\n    def get_names(obj):\n        return obj.columns.names if axis else obj.index.names\n\n    names = np.array([get_names(obj) for obj in objs])\n\n    # saving old name, only if index names of all objs are the same\n    if np.all(names == names[0]):\n        # we must do this check to avoid this calls `list(str_like_name)`\n        return list(names[0]) if is_list_like(names[0]) else [names[0]]\n    else:\n        return None\n\n\n@_inherit_docstrings(pandas.to_datetime, apilink=\"pandas.to_timedelta\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"to_timedelta\")\ndef to_timedelta(\n    arg, unit=None, errors=\"raise\"\n) -> Scalar | pandas.Index | Series:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Convert argument to timedelta.\n\n    Accepts str, timedelta, list-like or Series for arg parameter.\n    Returns a Series if and only if arg is provided as a Series.\n    \"\"\"\n    if isinstance(arg, Series):\n        query_compiler = arg._query_compiler.to_timedelta(unit=unit, errors=errors)\n        return Series(query_compiler=query_compiler)\n    return pandas.to_timedelta(arg, unit=unit, errors=errors)\n"
  },
  {
    "path": "modin/pandas/groupby.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement GroupBy public API as pandas does.\"\"\"\n\nfrom __future__ import annotations\n\nimport warnings\nfrom collections.abc import Iterable\nfrom functools import cached_property\nfrom types import BuiltinFunctionType\nfrom typing import TYPE_CHECKING, Any, Hashable, Optional, Union\n\nimport numpy as np\nimport pandas\nimport pandas.core.common as com\nimport pandas.core.groupby\nfrom pandas._libs import lib\nfrom pandas.api.types import is_scalar\nfrom pandas.core.apply import reconstruct_func\nfrom pandas.core.dtypes.common import (\n    is_datetime64_any_dtype,\n    is_integer,\n    is_list_like,\n    is_numeric_dtype,\n)\nfrom pandas.errors import SpecificationError\nfrom typing_extensions import Self\n\nfrom modin.core.dataframe.algebra.default2pandas.groupby import GroupBy\nfrom modin.core.storage_formats.base.query_compiler import BaseQueryCompiler\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    EXTENSION_DICT_TYPE,\n    EXTENSION_NO_LOOKUP,\n    QueryCompilerCaster,\n    visit_nested_args,\n)\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger, disable_logging\nfrom modin.pandas.utils import cast_function_modin2pandas\nfrom modin.utils import (\n    MODIN_UNNAMED_SERIES_LABEL,\n    _inherit_docstrings,\n    hashable,\n    sentinel,\n    try_cast_to_pandas,\n    wrap_into_list,\n    wrap_udf_function,\n)\n\nfrom .series import Series\nfrom .utils import is_label\nfrom .window import RollingGroupby\n\nif TYPE_CHECKING:\n    from modin.pandas import DataFrame\n\n_DEFAULT_BEHAVIOUR = EXTENSION_NO_LOOKUP | {\n    \"__class__\",\n    \"__getitem__\",\n    \"__init__\",\n    \"__iter__\",\n    \"_as_index\",\n    \"_axis\",\n    \"_by\",\n    \"_check_index\",\n    \"_columns\",\n    \"_compute_index_grouped\",\n    \"_default_to_pandas\",\n    \"_df\",\n    \"_drop\",\n    \"_idx_name\",\n    \"_index\",\n    \"_internal_by\",\n    \"_is_multi_by\",\n    \"_iter\",\n    \"_kwargs\",\n    \"_level\",\n    \"_pandas_class\",\n    \"_query_compiler\",\n    \"_sort\",\n    \"_wrap_aggregation\",\n}\n\nGROUPBY_EXTENSION_NO_LOOKUP = EXTENSION_NO_LOOKUP | {\n    \"_axis\",\n    \"_idx_name\",\n    \"_df\",\n    \"_query_compiler\",\n    \"_columns\",\n    \"_by\",\n    \"_drop\",\n    \"_return_tuple_when_iterating\",\n    \"_is_multi_by\",\n    \"_level\",\n    \"_kwargs\",\n    \"_get_query_compiler\",\n}\n\n\n@_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy)\nclass DataFrameGroupBy(ClassLogger, QueryCompilerCaster):  # noqa: GL08\n    _pandas_class = pandas.core.groupby.DataFrameGroupBy\n    _return_tuple_when_iterating = False\n    _df: Union[DataFrame, Series]\n    _query_compiler: BaseQueryCompiler\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n\n    def __init__(\n        self,\n        df: Union[DataFrame, Series],\n        by,\n        axis,\n        level,\n        as_index,\n        sort,\n        group_keys,\n        idx_name,\n        drop,\n        backend_pinned: bool,\n        **kwargs,\n    ):\n        self._axis = axis\n        self._idx_name = idx_name\n        self._df = df\n        self._query_compiler = self._df._query_compiler\n        self._columns = self._query_compiler.columns\n        self._by = by\n        self._drop = drop\n        # When providing a list of columns of length one to DataFrame.groupby(),\n        # the keys that are returned by iterating over the resulting DataFrameGroupBy\n        # object will now be tuples of length one (pandas#GH47761)\n        self._return_tuple_when_iterating = kwargs.pop(\n            \"return_tuple_when_iterating\", False\n        )\n        # Whether the backend of this groupby object has been pinned.\n        self._backend_pinned = backend_pinned\n\n        if (\n            level is None\n            and is_list_like(by)\n            or isinstance(by, type(self._query_compiler))\n        ):\n            # This tells us whether or not there are multiple columns/rows in the groupby\n            self._is_multi_by = (\n                isinstance(by, type(self._query_compiler)) and len(by.columns) > 1\n            ) or (\n                not isinstance(by, type(self._query_compiler))\n                and axis == 0\n                and all(\n                    (hashable(obj) and obj in self._query_compiler.columns)\n                    or isinstance(obj, type(self._query_compiler))\n                    or is_list_like(obj)\n                    for obj in self._by\n                )\n            )\n        else:\n            self._is_multi_by = False\n        self._level = level\n        self._kwargs = {\n            \"level\": level,\n            \"sort\": sort,\n            \"as_index\": as_index,\n            \"group_keys\": group_keys,\n        }\n        self._kwargs.update(kwargs)\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)\n    def _get_query_compiler(self) -> Optional[BaseQueryCompiler]:\n        if hasattr(self, \"_df\"):\n            return self._df._query_compiler\n        return None\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster.get_backend)\n    def get_backend(self) -> str:\n        return self._df.get_backend()\n\n    @disable_logging\n    def set_backend(\n        self,\n        backend: str,\n        inplace: bool = False,\n        *,\n        switch_operation: Optional[str] = None,\n    ) -> Optional[Self]:\n        \"\"\"\n        Move the data in this groupby object to a different backend.\n\n        Parameters\n        ----------\n        backend : str\n            The name of the backend to switch to.\n        inplace : bool, default: False\n            Whether to perform the operation in-place.\n        switch_operation : str, optional\n            The operation being performed that triggered the backend switch.\n\n        Returns\n        -------\n        DataFrameGroupBy or None\n            If inplace=False, returns a new groupby object with the specified backend.\n            If inplace=True, returns None and changes the backend of the current object.\n\n        Notes\n        -----\n        When `inplace=True`, this method will move the data between backends\n        for all parent objects (the DataFrame/Series used to create this\n        groupby, and any DataFrames/Series in the `by` list). When\n        `inplace=False`, new copies of the parent objects are created with their\n        data in the target backend for the returned groupby object, leaving the\n        original parent objects unchanged.\n        \"\"\"\n\n        def set_instance_variable_backend(arg: Any) -> Any:\n            # groupby object _by and _df fields may include both\n            # QueryCompilerCaster objects and BaseQueryCompiler objects,\n            # so we have to be able to set the backend on both of those.\n\n            if isinstance(arg, QueryCompilerCaster):\n                result = arg.set_backend(\n                    backend=backend, inplace=inplace, switch_operation=switch_operation\n                )\n                return arg if inplace else result\n            if isinstance(arg, BaseQueryCompiler):\n                # Use a cyclic import here because query compilers themselves\n                # do not implement set_backend().\n                from modin.pandas import DataFrame\n\n                return (\n                    DataFrame(query_compiler=arg)\n                    .set_backend(backend=backend, inplace=False)\n                    ._query_compiler\n                )\n            return arg\n\n        new_by = visit_nested_args([self._by], set_instance_variable_backend)[0]\n        new_df = visit_nested_args([self._df], set_instance_variable_backend)[0]\n\n        if inplace:\n            self._df = new_df\n            self._query_compiler = new_df._query_compiler\n            self._by = new_by\n            return None\n        return type(self)(\n            df=new_df,\n            by=new_by,\n            axis=self._axis,\n            level=self._level,\n            as_index=self._as_index,\n            sort=self._sort,\n            group_keys=self._kwargs[\"group_keys\"],\n            idx_name=self._idx_name,\n            drop=self._drop,\n            backend_pinned=self._backend_pinned,\n            # We have added as_index, sort, group_keys, and level to the kwargs\n            # dictionary, so we need to remove them from the keyword arguments\n            # that we pass to the new DataFrameGroupBy object.\n            **{\n                k: v\n                for k, v in self._kwargs.items()\n                if k not in [\"as_index\", \"sort\", \"group_keys\", \"level\"]\n            },\n        )\n\n    @_inherit_docstrings(QueryCompilerCaster.is_backend_pinned)\n    def is_backend_pinned(self) -> bool:\n        return self._backend_pinned\n\n    @_inherit_docstrings(QueryCompilerCaster._set_backend_pinned)\n    def _set_backend_pinned(self, pinned: bool, inplace: bool) -> Optional[Self]:\n        if inplace:\n            self._backend_pinned = pinned\n            return None\n        else:\n            # Create a new groupby object with the updated pinned status\n            new_obj = self._override(backend_pinned=pinned)\n            # Force the correct pinned status since the automatic pinning logic\n            # in query_compiler_caster.py might override it\n            new_obj._backend_pinned = pinned\n            return new_obj\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)\n    def _copy_into(self, other: Self) -> None:\n        # TODO(https://github.com/modin-project/modin/issues/7544): implement\n        # this method to support automatic pre-operation backend switch for\n        # groupby methods.\n        ErrorMessage.not_implemented()\n\n    def _override(self, **kwargs):\n        \"\"\"\n        Override groupby parameters.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Parameters to override.\n\n        Returns\n        -------\n        DataFrameGroupBy\n            A groupby object with new parameters.\n        \"\"\"\n        new_kw = dict(\n            df=self._df,\n            by=self._by,\n            axis=self._axis,\n            idx_name=self._idx_name,\n            drop=self._drop,\n            backend_pinned=self._backend_pinned,\n            **self._kwargs,\n        )\n        new_kw.update(kwargs)\n        return type(self)(**new_kw)\n\n    @disable_logging\n    def __getattr__(self, key):\n        \"\"\"\n        Alter regular attribute access, looks up the name in the columns.\n\n        Parameters\n        ----------\n        key : str\n            Attribute name.\n\n        Returns\n        -------\n        The value of the attribute.\n        \"\"\"\n        try:\n            return self._getattr__from_extension_impl(\n                key=key,\n                default_behavior_attributes=GROUPBY_EXTENSION_NO_LOOKUP,\n                extensions=__class__._extensions,\n            )\n        except AttributeError as err:\n            if key != \"_columns\" and key in self._columns:\n                return self.__getitem__(key)\n            raise err\n\n    @disable_logging\n    def __getattribute__(self, item: str) -> Any:\n        \"\"\"\n        Override __getattribute__, which python calls to access any attribute of an object of this class.\n\n        We override this method\n            1) to default to pandas for empty dataframes on non-lazy engines.\n            2) to get non-method extensions (e.g. properties)\n\n        Parameters\n        ----------\n        item : str\n            The name of the attribute to access.\n\n        Returns\n        -------\n        Any\n            The value of the attribute.\n        \"\"\"\n        if item not in GROUPBY_EXTENSION_NO_LOOKUP:\n            extensions_result = self._getattribute__from_extension_impl(\n                item, __class__._extensions\n            )\n            if extensions_result is not sentinel:\n                return extensions_result\n\n        attr = super().__getattribute__(item)\n        if item not in _DEFAULT_BEHAVIOUR and not self._query_compiler.lazy_shape:\n            # We default to pandas on empty DataFrames. This avoids a large amount of\n            # pain in underlying implementation and returns a result immediately rather\n            # than dealing with the edge cases that empty DataFrames have.\n            if callable(attr) and self._df.empty and hasattr(self._pandas_class, item):\n\n                def default_handler(*args, **kwargs):\n                    return self._default_to_pandas(item, *args, **kwargs)\n\n                return default_handler\n        return attr\n\n    @disable_logging\n    def __setattr__(self, key: str, value) -> None:\n        \"\"\"\n        Set an attribute on the object.\n\n        We override this method to set extension properties.\n\n        Parameters\n        ----------\n        key : str\n            The name of the attribute to set.\n        value : Any\n            The value to set the attribute to.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(key, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__set__\"):\n            return extension.__set__(self, value)\n        return super().__setattr__(key, value)\n\n    @disable_logging\n    def __delattr__(self, name: str) -> None:\n        \"\"\"\n        Delete an attribute on the object.\n\n        We override this method to delete extension properties.\n\n        Parameters\n        ----------\n        name : str\n            The name of the attribute to delete.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(name, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__delete__\"):\n            return extension.__delete__(self)\n        return super().__delattr__(name)\n\n    @property\n    def ngroups(self):  # noqa: GL08\n        return len(self)\n\n    def skew(self, axis=lib.no_default, skipna=True, numeric_only=False, **kwargs):\n        # default behaviour for aggregations; for the reference see\n        # `_op_via_apply` func in pandas==2.0.2\n        if axis is None or axis is lib.no_default:\n            axis = self._axis\n\n        if axis != 0 or not skipna:\n            return self._default_to_pandas(\n                lambda df: df.skew(\n                    axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs\n                )\n            )\n\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_skew,\n            agg_kwargs=kwargs,\n            numeric_only=numeric_only,\n        )\n\n    def ffill(self, limit=None):\n        ErrorMessage.single_warning(\n            \".ffill() is implemented using .fillna() in Modin, \"\n            + \"which can be impacted by pandas bug https://github.com/pandas-dev/pandas/issues/43412 \"\n            + \"on dataframes with duplicated indices\"\n        )\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\".*fillna with 'method' is deprecated.*\",\n                category=FutureWarning,\n            )\n            return self.fillna(limit=limit, method=\"ffill\")\n\n    def sem(self, ddof=1, numeric_only=False):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_sem,\n            agg_kwargs=dict(ddof=ddof),\n            numeric_only=numeric_only,\n        )\n\n    def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None):\n        return self._default_to_pandas(\n            lambda df: df.sample(\n                n=n,\n                frac=frac,\n                replace=replace,\n                weights=weights,\n                random_state=random_state,\n            )\n        )\n\n    def ewm(self, *args, **kwargs):\n        return self._default_to_pandas(lambda df: df.ewm(*args, **kwargs))\n\n    def value_counts(\n        self,\n        subset=None,\n        normalize: bool = False,\n        sort: bool = True,\n        ascending: bool = False,\n        dropna: bool = True,\n    ):\n        return self._default_to_pandas(\n            lambda df: df.value_counts(\n                subset=subset,\n                normalize=normalize,\n                sort=sort,\n                ascending=ascending,\n                dropna=dropna,\n            )\n        )\n\n    def mean(self, numeric_only=False, engine=None, engine_kwargs=None):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.mean(\n                    numeric_only=numeric_only,\n                    engine=engine,\n                    engine_kwargs=engine_kwargs,\n                )\n            )\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_mean,\n                agg_kwargs=dict(numeric_only=numeric_only),\n                numeric_only=numeric_only,\n            )\n        )\n\n    def any(self, skipna=True):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_any,\n            numeric_only=False,\n            agg_kwargs=dict(skipna=skipna),\n        )\n\n    @property\n    def plot(self):  # pragma: no cover\n        return self._default_to_pandas(lambda df: df.plot)\n\n    def ohlc(self):\n        from .dataframe import DataFrame\n\n        return DataFrame(\n            query_compiler=self._query_compiler.groupby_ohlc(\n                by=self._by,\n                axis=self._axis,\n                groupby_kwargs=self._kwargs,\n                agg_args=[],\n                agg_kwargs={},\n                is_df=isinstance(self._df, DataFrame),\n            ),\n        )\n\n    def __bytes__(self):\n        \"\"\"\n        Convert DataFrameGroupBy object into a python2-style byte string.\n\n        Returns\n        -------\n        bytearray\n            Byte array representation of `self`.\n\n        Notes\n        -----\n        Deprecated and removed in pandas and will be likely removed in Modin.\n        \"\"\"\n        return self._default_to_pandas(lambda df: df.__bytes__())\n\n    @cached_property\n    def groups(self):\n        return self._compute_index_grouped(numerical=False)\n\n    def min(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.min(\n                    numeric_only=numeric_only,\n                    min_count=min_count,\n                    engine=engine,\n                    engine_kwargs=engine_kwargs,\n                )\n            )\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_min,\n            agg_kwargs=dict(min_count=min_count),\n            numeric_only=numeric_only,\n        )\n\n    def max(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.max(\n                    numeric_only=numeric_only,\n                    min_count=min_count,\n                    engine=engine,\n                    engine_kwargs=engine_kwargs,\n                )\n            )\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_max,\n            agg_kwargs=dict(min_count=min_count),\n            numeric_only=numeric_only,\n        )\n\n    def idxmax(self, axis=lib.no_default, skipna=True, numeric_only=False):\n        if axis is not lib.no_default:\n            self._deprecate_axis(axis, \"idxmax\")\n        # default behaviour for aggregations; for the reference see\n        # `_op_via_apply` func in pandas==2.0.2\n        if axis is None or axis is lib.no_default:\n            axis = self._axis\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_idxmax,\n            agg_kwargs=dict(axis=axis, skipna=skipna),\n            numeric_only=numeric_only,\n        )\n\n    def idxmin(self, axis=lib.no_default, skipna=True, numeric_only=False):\n        if axis is not lib.no_default:\n            self._deprecate_axis(axis, \"idxmin\")\n        # default behaviour for aggregations; for the reference see\n        # `_op_via_apply` func in pandas==2.0.2\n        if axis is None or axis is lib.no_default:\n            axis = self._axis\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_idxmin,\n            agg_kwargs=dict(axis=axis, skipna=skipna),\n            numeric_only=numeric_only,\n        )\n\n    @property\n    def ndim(self):\n        \"\"\"\n        Return 2.\n\n        Returns\n        -------\n        int\n            Returns 2.\n\n        Notes\n        -----\n        Deprecated and removed in pandas and will be likely removed in Modin.\n        \"\"\"\n        return 2  # ndim is always 2 for DataFrames\n\n    def shift(\n        self,\n        periods=1,\n        freq=None,\n        axis=lib.no_default,\n        fill_value=lib.no_default,\n        suffix=None,\n    ):\n        if suffix:\n            return self._default_to_pandas(\n                lambda df: df.shift(\n                    periods=periods,\n                    freq=freq,\n                    axis=axis,\n                    fill_value=fill_value,\n                    suffix=suffix,\n                )\n            )\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"shift\")\n        else:\n            axis = 0\n\n        def _shift(data, periods, freq, axis, fill_value, is_set_nan_rows=True):\n            from .dataframe import DataFrame\n\n            result = data.shift(periods, freq, axis, fill_value)\n\n            if (\n                is_set_nan_rows\n                and isinstance(self._by, BaseQueryCompiler)\n                and (\n                    # Check using `issubset` is effective only in case of MultiIndex\n                    set(self._by.columns).issubset(list(data.columns))\n                    if isinstance(self._by.columns, pandas.MultiIndex)\n                    else len(\n                        self._by.columns.unique()\n                        .sort_values()\n                        .difference(data.columns.unique().sort_values())\n                    )\n                    == 0\n                )\n                and DataFrame(query_compiler=self._by.isna()).any(axis=None)\n            ):\n                mask_nan_rows = data[self._by.columns].isna().any(axis=1)\n                result = result.loc[~mask_nan_rows]\n            return result\n\n        if freq is None and axis == 1 and self._axis == 0:\n            result = _shift(self._df, periods, freq, axis, fill_value)\n        elif (\n            freq is not None\n            and axis == 0\n            and self._axis == 0\n            and isinstance(self._by, BaseQueryCompiler)\n        ):\n            result = _shift(\n                self._df, periods, freq, axis, fill_value, is_set_nan_rows=False\n            )\n            result = result.dropna(subset=self._by.columns)\n            if self._sort:\n                result = result.sort_values(list(self._by.columns), axis=axis)\n            else:\n                result = result.sort_index()\n        else:\n            result = self._wrap_aggregation(\n                type(self._query_compiler).groupby_shift,\n                numeric_only=False,\n                agg_kwargs=dict(\n                    periods=periods, freq=freq, axis=axis, fill_value=fill_value\n                ),\n            )\n        return result\n\n    def nth(self, n, dropna=None):\n        # TODO: what we really should do is create a GroupByNthSelector to mimic\n        # pandas behavior and then implement some of these methods there.\n        # Adapted error checking from pandas\n        if dropna:\n            if not is_integer(n):\n                raise ValueError(\"dropna option only supported for an integer argument\")\n\n            if dropna not in (\"any\", \"all\"):\n                # Note: when agg-ing picker doesn't raise this, just returns NaN\n                raise ValueError(\n                    \"For a DataFrame or Series groupby.nth, dropna must be \"\n                    + \"either None, 'any' or 'all', \"\n                    + f\"(was passed {dropna}).\"\n                )\n\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_nth,\n                numeric_only=False,\n                agg_kwargs=dict(n=n, dropna=dropna),\n            )\n        )\n\n    def cumsum(self, axis=lib.no_default, *args, **kwargs):\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"cumsum\")\n        else:\n            axis = 0\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_cumsum,\n            agg_args=args,\n            agg_kwargs=dict(axis=axis, **kwargs),\n        )\n\n    @cached_property\n    def indices(self):\n        return self._compute_index_grouped(numerical=True)\n\n    @_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy.pct_change)\n    def pct_change(\n        self,\n        periods=1,\n        fill_method=lib.no_default,\n        limit=lib.no_default,\n        freq=None,\n        axis=lib.no_default,\n    ):\n        from .dataframe import DataFrame\n\n        if fill_method not in (lib.no_default, None) or limit is not lib.no_default:\n            warnings.warn(\n                \"The 'fill_method' keyword being not None and the 'limit' keyword in \"\n                + f\"{type(self).__name__}.pct_change are deprecated and will be removed \"\n                + \"in a future version. Either fill in any non-leading NA values prior \"\n                + \"to calling pct_change or specify 'fill_method=None' to not fill NA \"\n                + \"values.\",\n                FutureWarning,\n            )\n        if fill_method is lib.no_default:\n            if any(grp.isna().values.any() for _, grp in self):\n                warnings.warn(\n                    \"The default fill_method='ffill' in \"\n                    + f\"{type(self).__name__}.pct_change is deprecated and will be \"\n                    + \"removed in a future version. Call ffill before calling \"\n                    + \"pct_change to retain current behavior and silence this warning.\",\n                    FutureWarning,\n                )\n            fill_method = \"ffill\"\n        if limit is lib.no_default:\n            limit = None\n\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"pct_change\")\n        else:\n            axis = 0\n\n        # Should check for API level errors\n        # Attempting to match pandas error behavior here\n        if not isinstance(periods, int):\n            raise TypeError(f\"periods must be an int. got {type(periods)} instead\")\n\n        if isinstance(self._df, Series):\n            if not is_numeric_dtype(self._df.dtypes):\n                raise TypeError(\n                    f\"unsupported operand type for -: got {self._df.dtypes}\"\n                )\n        elif isinstance(self._df, DataFrame) and axis == 0:\n            for col, dtype in self._df.dtypes.items():\n                # can't calculate change on non-numeric columns, so check for\n                # non-numeric columns that are not included in the `by`\n                if not is_numeric_dtype(dtype) and not (\n                    isinstance(self._by, BaseQueryCompiler) and col in self._by.columns\n                ):\n                    raise TypeError(f\"unsupported operand type for -: got {dtype}\")\n\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_pct_change,\n            agg_kwargs=dict(\n                periods=periods,\n                fill_method=fill_method,\n                limit=limit,\n                freq=freq,\n                axis=axis,\n            ),\n        )\n\n    def filter(self, func, dropna=True, *args, **kwargs):\n        return self._default_to_pandas(\n            lambda df: df.filter(func, dropna=dropna, *args, **kwargs)\n        )\n\n    def _deprecate_axis(self, axis: int, name: str) -> None:  # noqa: GL08\n        if axis == 1:\n            warnings.warn(\n                f\"{type(self).__name__}.{name} with axis=1 is deprecated and \"\n                + \"will be removed in a future version. Operate on the un-grouped \"\n                + \"DataFrame instead\",\n                FutureWarning,\n            )\n        else:\n            warnings.warn(\n                f\"The 'axis' keyword in {type(self).__name__}.{name} is deprecated \"\n                + \"and will be removed in a future version. \"\n                + \"Call without passing 'axis' instead.\",\n                FutureWarning,\n            )\n\n    def cummax(self, axis=lib.no_default, numeric_only=False, **kwargs):\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"cummax\")\n        else:\n            axis = 0\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_cummax,\n            agg_kwargs=dict(axis=axis, **kwargs),\n            numeric_only=numeric_only,\n        )\n\n    def apply(self, func, *args, include_groups=True, **kwargs):\n        func = cast_function_modin2pandas(func)\n        if not isinstance(func, BuiltinFunctionType):\n            func = wrap_udf_function(func)\n\n        apply_res = self._wrap_aggregation(\n            qc_method=type(self._query_compiler).groupby_agg,\n            numeric_only=False,\n            agg_func=func,\n            agg_args=args,\n            agg_kwargs={**kwargs, \"include_groups\": include_groups},\n            how=\"group_wise\",\n        )\n        reduced_index = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])\n        if not isinstance(apply_res, Series) and apply_res.columns.equals(\n            reduced_index\n        ):\n            apply_res = apply_res.squeeze(axis=1)\n        return self._check_index(apply_res)\n\n    @property\n    def dtypes(self):\n        if self._axis == 1:\n            raise ValueError(\"Cannot call dtypes on groupby with axis=1\")\n        warnings.warn(\n            f\"{type(self).__name__}.dtypes is deprecated and will be removed in \"\n            + \"a future version. Check the dtypes on the base object instead\",\n            FutureWarning,\n        )\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_dtypes,\n                numeric_only=False,\n            )\n        )\n\n    def first(self, numeric_only=False, min_count=-1, skipna=True):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_first,\n            agg_kwargs=dict(min_count=min_count, skipna=skipna),\n            numeric_only=numeric_only,\n        )\n\n    def last(self, numeric_only=False, min_count=-1, skipna=True):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_last,\n            agg_kwargs=dict(min_count=min_count, skipna=skipna),\n            numeric_only=numeric_only,\n        )\n\n    @cached_property\n    def _internal_by(self) -> tuple[Hashable]:\n        \"\"\"\n        Get only those components of 'by' that are column labels of the source frame.\n\n        Returns\n        -------\n        tuple of labels\n        \"\"\"\n        internal_by = tuple()\n        if self._drop:\n            if is_list_like(self._by):\n                internal_by_list = []\n                for by in self._by:\n                    if isinstance(by, str):\n                        internal_by_list.append(by)\n                    elif isinstance(by, pandas.Grouper):\n                        internal_by_list.append(by.key)\n                internal_by = tuple(internal_by_list)\n            elif isinstance(self._by, pandas.Grouper):\n                internal_by = tuple([self._by.key])\n            else:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=not isinstance(self._by, BaseQueryCompiler),\n                    extra_log=f\"When 'drop' is True, 'by' must be either list-like, Grouper, or a QueryCompiler, met: {type(self._by)}.\",\n                )\n                internal_by = tuple(self._by.columns)\n        return internal_by\n\n    def __getitem__(self, key):\n        \"\"\"\n        Implement indexing operation on a DataFrameGroupBy object.\n\n        Parameters\n        ----------\n        key : list or str\n            Names of columns to use as subset of original object.\n\n        Returns\n        -------\n        DataFrameGroupBy or SeriesGroupBy\n            Result of indexing operation.\n\n        Raises\n        ------\n        NotImplementedError\n            Column lookups on GroupBy with arbitrary Series in by is not yet supported.\n        \"\"\"\n        # These parameters are common for building the resulted Series or DataFrame groupby object\n        kwargs = {\n            **self._kwargs.copy(),\n            \"by\": self._by,\n            \"axis\": self._axis,\n            \"idx_name\": self._idx_name,\n        }\n        # The rules of type deduction for the resulted object is the following:\n        #   1. If `key` is a list-like or `as_index is False`, then the resulted object is a DataFrameGroupBy\n        #   2. Otherwise, the resulted object is SeriesGroupBy\n        #   3. Result type does not depend on the `by` origin\n        # Examples:\n        #   - drop: any, as_index: any, __getitem__(key: list_like) -> DataFrameGroupBy\n        #   - drop: any, as_index: False, __getitem__(key: any) -> DataFrameGroupBy\n        #   - drop: any, as_index: True, __getitem__(key: label) -> SeriesGroupBy\n        if is_list_like(key):\n            make_dataframe = True\n        else:\n            if self._as_index:\n                make_dataframe = False\n            else:\n                make_dataframe = True\n                key = [key]\n        if make_dataframe:\n            internal_by = frozenset(self._internal_by)\n            if len(internal_by.intersection(key)) != 0:\n                ErrorMessage.mismatch_with_pandas(\n                    operation=\"GroupBy.__getitem__\",\n                    message=(\n                        \"intersection of the selection and 'by' columns is not yet supported, \"\n                        + \"to achieve the desired result rewrite the original code from:\\n\"\n                        + \"df.groupby('by_column')['by_column']\\n\"\n                        + \"to the:\\n\"\n                        + \"df.groupby(df['by_column'].copy())['by_column']\"\n                    ),\n                )\n            # We need to maintain order of the columns in key, using a set doesn't\n            # maintain order.\n            # We use dictionaries since they maintain insertion order as of 3.7,\n            # and its faster to call dict.update than it is to loop through `key`\n            # and select only the elements which aren't in `cols_to_grab`.\n            cols_to_grab = dict.fromkeys(self._internal_by)\n            cols_to_grab.update(dict.fromkeys(key))\n            key = [col for col in cols_to_grab.keys() if col in self._df.columns]\n            return DataFrameGroupBy(\n                self._df[key],\n                drop=self._drop,\n                backend_pinned=self._backend_pinned,\n                **kwargs,\n            )\n        if (\n            self._is_multi_by\n            and isinstance(self._by, list)\n            and not all(hashable(o) and o in self._df for o in self._by)\n        ):\n            raise NotImplementedError(\n                \"Column lookups on GroupBy with arbitrary Series in by\"\n                + \" is not yet supported.\"\n            )\n        return SeriesGroupBy(\n            self._df[key],\n            drop=False,\n            backend_pinned=self._backend_pinned,\n            **kwargs,\n        )\n\n    def cummin(self, axis=lib.no_default, numeric_only=False, **kwargs):\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"cummin\")\n        else:\n            axis = 0\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_cummin,\n            agg_kwargs=dict(axis=axis, **kwargs),\n            numeric_only=numeric_only,\n        )\n\n    def bfill(self, limit=None):\n        ErrorMessage.single_warning(\n            \".bfill() is implemented using .fillna() in Modin, \"\n            + \"which can be impacted by pandas bug https://github.com/pandas-dev/pandas/issues/43412 \"\n            + \"on dataframes with duplicated indices\"\n        )\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\".*fillna with 'method' is deprecated.*\",\n                category=FutureWarning,\n            )\n            return self.fillna(limit=limit, method=\"bfill\")\n\n    def prod(self, numeric_only=False, min_count=0):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_prod,\n            agg_kwargs=dict(min_count=min_count),\n            numeric_only=numeric_only,\n        )\n\n    def std(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=False):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.std(\n                    ddof=ddof,\n                    engine=engine,\n                    engine_kwargs=engine_kwargs,\n                    numeric_only=numeric_only,\n                )\n            )\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_std,\n            agg_kwargs=dict(ddof=ddof),\n            numeric_only=numeric_only,\n        )\n\n    def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.aggregate(\n                    func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs\n                )\n            )\n        if self._axis != 0:\n            # This is not implemented in pandas,\n            # so we throw a different message\n            raise NotImplementedError(\"axis other than 0 is not supported\")\n\n        if (\n            callable(func)\n            and isinstance(func, BuiltinFunctionType)\n            and func.__name__ in dir(self)\n        ):\n            func_name = func.__name__\n            warnings.warn(\n                f\"The provided callable {func} is currently using \"\n                + f\"{type(self).__name__}.{func_name}. In a future version of pandas, \"\n                + \"the provided callable will be used directly. To keep current \"\n                + f\"behavior pass the string {func_name} instead.\",\n                category=FutureWarning,\n            )\n            func = func_name\n\n        do_relabel = None\n        if isinstance(func, dict) or func is None:\n            # the order from `reconstruct_func` cannot be used correctly if there\n            # is more than one columnar partition, since for correct use all columns\n            # must be available within one partition.\n            old_kwargs = dict(kwargs)\n            relabeling_required, func_dict, new_columns, _ = reconstruct_func(\n                func, **kwargs\n            )\n\n            if relabeling_required:\n\n                def do_relabel(obj_to_relabel):  # noqa: F811\n                    # unwrap nested labels into one level tuple\n                    result_labels = [None] * len(old_kwargs)\n                    for idx, labels in enumerate(old_kwargs.values()):\n                        if is_scalar(labels) or callable(labels):\n                            result_labels[idx] = (\n                                labels if not callable(labels) else labels.__name__\n                            )\n                            continue\n                        new_elem = []\n                        for label in labels:\n                            if is_scalar(label) or callable(label):\n                                new_elem.append(\n                                    label if not callable(label) else label.__name__\n                                )\n                            else:\n                                new_elem.extend(label)\n                        result_labels[idx] = tuple(new_elem)\n\n                    new_order = obj_to_relabel.columns.get_indexer(result_labels)\n                    new_columns_idx = pandas.Index(new_columns)\n                    if not self._as_index:\n                        nby_cols = len(obj_to_relabel.columns) - len(new_columns_idx)\n                        new_order = np.concatenate([np.arange(nby_cols), new_order])\n                        by_cols = obj_to_relabel.columns[:nby_cols]\n                        if by_cols.nlevels != new_columns_idx.nlevels:\n                            by_cols = by_cols.remove_unused_levels()\n                            empty_levels = [\n                                i\n                                for i, level in enumerate(by_cols.levels)\n                                if len(level) == 1 and level[0] == \"\"\n                            ]\n                            by_cols = by_cols.droplevel(empty_levels)\n                        new_columns_idx = by_cols.append(new_columns_idx)\n                    result = obj_to_relabel.iloc[:, new_order]\n                    result.columns = new_columns_idx\n                    return result\n\n            if any(isinstance(fn, list) for fn in func_dict.values()):\n                # multicolumn case\n                # putting functions in a `list` allows to achieve multicolumn in each partition\n                func_dict = {\n                    col: fn if isinstance(fn, list) else [fn]\n                    for col, fn in func_dict.items()\n                }\n            if (\n                relabeling_required\n                and not self._as_index\n                and any(col in func_dict for col in self._internal_by)\n            ):\n                ErrorMessage.mismatch_with_pandas(\n                    operation=\"GroupBy.aggregate(**dictionary_renaming_aggregation)\",\n                    message=(\n                        \"intersection of the columns to aggregate and 'by' is not yet supported when 'as_index=False', \"\n                        + \"columns with group names of the intersection will not be presented in the result. \"\n                        + \"To achieve the desired result rewrite the original code from:\\n\"\n                        + \"df.groupby('by_column', as_index=False).agg(agg_func=('by_column', agg_func))\\n\"\n                        + \"to the:\\n\"\n                        + \"df.groupby('by_column').agg(agg_func=('by_column', agg_func)).reset_index()\"\n                    ),\n                )\n\n            if any(i not in self._df.columns for i in func_dict.keys()):\n                raise SpecificationError(\"nested renamer is not supported\")\n            if func is None:\n                kwargs = {}\n            func = func_dict\n        elif is_list_like(func):\n            # for list-list aggregation pandas always puts\n            # groups as index in the result, ignoring as_index,\n            # so we have to reset it to default value\n            res = self._override(as_index=True)._wrap_aggregation(\n                qc_method=type(self._query_compiler).groupby_agg,\n                numeric_only=False,\n                agg_func=func,\n                agg_args=args,\n                agg_kwargs=kwargs,\n                how=\"axis_wise\",\n            )\n            if not self._kwargs[\"as_index\"]:\n                res.reset_index(inplace=True)\n            return res\n        elif callable(func):\n            return self._check_index(\n                self._wrap_aggregation(\n                    qc_method=type(self._query_compiler).groupby_agg,\n                    numeric_only=False,\n                    agg_func=func,\n                    agg_args=args,\n                    agg_kwargs=kwargs,\n                    how=\"axis_wise\",\n                )\n            )\n        elif isinstance(func, str):\n            # Using \"getattr\" here masks possible AttributeError which we throw\n            # in __getattr__, so we should call __getattr__ directly instead.\n            agg_func = self.__getattr__(func)\n            if callable(agg_func):\n                return agg_func(*args, **kwargs)\n\n        result = self._wrap_aggregation(\n            qc_method=type(self._query_compiler).groupby_agg,\n            numeric_only=False,\n            agg_func=func,\n            agg_args=args,\n            agg_kwargs=kwargs,\n            how=\"axis_wise\",\n        )\n        return do_relabel(result) if do_relabel else result\n\n    agg = aggregate\n\n    def rank(\n        self,\n        method=\"average\",\n        ascending=True,\n        na_option=\"keep\",\n        pct=False,\n        axis=lib.no_default,\n    ):\n        if na_option not in {\"keep\", \"top\", \"bottom\"}:\n            raise ValueError(\"na_option must be one of 'keep', 'top', or 'bottom'\")\n\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"rank\")\n        else:\n            axis = 0\n\n        result = self._wrap_aggregation(\n            type(self._query_compiler).groupby_rank,\n            agg_kwargs=dict(\n                method=method,\n                ascending=ascending,\n                na_option=na_option,\n                pct=pct,\n                axis=axis,\n            ),\n            numeric_only=False,\n        )\n        return result\n\n    @property\n    def corrwith(self):\n        return self._default_to_pandas(lambda df: df.corrwith)\n\n    def var(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=False):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.var(\n                    ddof=ddof,\n                    engine=engine,\n                    engine_kwargs=engine_kwargs,\n                    numeric_only=numeric_only,\n                )\n            )\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_var,\n            agg_kwargs=dict(ddof=ddof),\n            numeric_only=numeric_only,\n        )\n\n    def get_group(self, name, obj=None):\n        work_object = self._override(\n            df=obj if obj is not None else self._df, as_index=True\n        )\n\n        return work_object._check_index(\n            work_object._wrap_aggregation(\n                qc_method=type(work_object._query_compiler).groupby_get_group,\n                numeric_only=False,\n                agg_kwargs=dict(name=name),\n            )\n        )\n\n    def __len__(self):  # noqa: GL08\n        return len(self.indices)\n\n    def all(self, skipna=True):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_all,\n            numeric_only=False,\n            agg_kwargs=dict(skipna=skipna),\n        )\n\n    def size(self):\n        if self._axis == 1:\n            return DataFrameGroupBy(\n                self._df.T.iloc[:, [0]],\n                self._by,\n                0,\n                drop=self._drop,\n                idx_name=self._idx_name,\n                backend_pinned=self._backend_pinned,\n                **self._kwargs,\n            ).size()\n        result = self._wrap_aggregation(\n            type(self._query_compiler).groupby_size,\n            numeric_only=False,\n        )\n        if not isinstance(result, Series):\n            result = result.squeeze(axis=1)\n        if not self._kwargs.get(\"as_index\") and not isinstance(result, Series):\n            result = (\n                result.rename(columns={MODIN_UNNAMED_SERIES_LABEL: \"index\"})\n                if MODIN_UNNAMED_SERIES_LABEL in result.columns\n                else result\n            )\n        elif isinstance(self._df, Series):\n            result.name = self._df.name\n        return result\n\n    def sum(self, numeric_only=False, min_count=0, engine=None, engine_kwargs=None):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.sum(\n                    numeric_only=numeric_only,\n                    min_count=min_count,\n                    engine=engine,\n                    engine_kwargs=engine_kwargs,\n                )\n            )\n\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_sum,\n            agg_kwargs=dict(min_count=min_count),\n            numeric_only=numeric_only,\n        )\n\n    def describe(self, percentiles=None, include=None, exclude=None):\n        return self._default_to_pandas(\n            lambda df: df.describe(\n                percentiles=percentiles, include=include, exclude=exclude\n            )\n        )\n\n    def boxplot(\n        self,\n        grouped,\n        subplots=True,\n        column=None,\n        fontsize=None,\n        rot=0,\n        grid=True,\n        ax=None,\n        figsize=None,\n        layout=None,\n        sharex=False,\n        sharey=True,\n        backend=None,\n        **kwargs,\n    ):\n        return self._default_to_pandas(\n            lambda df: df.boxplot(\n                grouped,\n                subplots=subplots,\n                column=column,\n                fontsize=fontsize,\n                rot=rot,\n                grid=grid,\n                ax=ax,\n                figsize=figsize,\n                layout=layout,\n                sharex=sharex,\n                sharey=sharey,\n                backend=backend,\n                **kwargs,\n            )\n        )\n\n    def ngroup(self, ascending=True):\n        result = self._wrap_aggregation(\n            type(self._query_compiler).groupby_ngroup,\n            numeric_only=False,\n            agg_kwargs=dict(ascending=ascending),\n        )\n        if not isinstance(result, Series):\n            # The result should always be a Series with name None and type int64\n            result = result.squeeze(axis=1)\n        return result\n\n    def nunique(self, dropna=True):\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_nunique,\n                numeric_only=False,\n                agg_kwargs=dict(dropna=dropna),\n            )\n        )\n\n    def resample(self, rule, *args, include_groups=True, **kwargs):\n        return self._default_to_pandas(\n            lambda df: df.resample(rule, *args, include_groups=include_groups, **kwargs)\n        )\n\n    def median(self, numeric_only=False):\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_median,\n                numeric_only=numeric_only,\n            )\n        )\n\n    def head(self, n=5):\n        # groupby().head()/.tail() ignore as_index, so override it to True\n        work_object = self._override(as_index=True)\n\n        return work_object._check_index(\n            work_object._wrap_aggregation(\n                type(work_object._query_compiler).groupby_head,\n                agg_kwargs=dict(n=n),\n                numeric_only=False,\n            )\n        )\n\n    def cumprod(self, axis=lib.no_default, *args, **kwargs):\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"cumprod\")\n        else:\n            axis = 0\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_cumprod,\n            agg_args=args,\n            agg_kwargs=dict(axis=axis, **kwargs),\n        )\n\n    def __iter__(self):\n        return self._iter.__iter__()\n\n    def cov(self, min_periods=None, ddof=1, numeric_only=False):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_cov,\n            agg_kwargs=dict(min_periods=min_periods, ddof=ddof),\n            numeric_only=numeric_only,\n        )\n\n    def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):\n        if engine not in (\"cython\", None) and engine_kwargs is not None:\n            return self._default_to_pandas(\n                lambda df: df.transform(\n                    func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs\n                )\n            )\n\n        return self._wrap_aggregation(\n            qc_method=type(self._query_compiler).groupby_agg,\n            numeric_only=False,\n            agg_func=func,\n            agg_args=args,\n            agg_kwargs=kwargs,\n            how=\"transform\",\n        )\n\n    def corr(self, method=\"pearson\", min_periods=1, numeric_only=False):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_corr,\n            agg_kwargs=dict(method=method, min_periods=min_periods),\n            numeric_only=numeric_only,\n        )\n\n    def fillna(\n        self,\n        value=None,\n        method=None,\n        axis=lib.no_default,\n        inplace=False,\n        limit=None,\n        downcast=lib.no_default,\n    ):\n        if axis is not lib.no_default:\n            self._deprecate_axis(axis, \"fillna\")\n\n        warnings.warn(\n            f\"{type(self).__name__}.fillna is deprecated and will be removed \"\n            + \"in a future version. Use obj.ffill(), obj.bfill(), \"\n            + \"or obj.nearest() instead.\",\n            FutureWarning,\n        )\n\n        # default behaviour for aggregations; for the reference see\n        # `_op_via_apply` func in pandas==2.0.2\n        if axis is None or axis is lib.no_default:\n            axis = self._axis\n\n        new_groupby_kwargs = self._kwargs.copy()\n        new_groupby_kwargs[\"as_index\"] = True\n        work_object = type(self)(\n            df=self._df,\n            by=self._by,\n            axis=self._axis,\n            idx_name=self._idx_name,\n            drop=self._drop,\n            backend_pinned=self._backend_pinned,\n            **new_groupby_kwargs,\n        )\n        return work_object._wrap_aggregation(\n            type(self._query_compiler).groupby_fillna,\n            agg_kwargs=dict(\n                value=value,\n                method=method,\n                axis=axis,\n                inplace=inplace,\n                limit=limit,\n                downcast=downcast,\n            ),\n            numeric_only=False,\n        )\n\n    def count(self):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_count,\n            numeric_only=False,\n        )\n\n    def pipe(self, func, *args, **kwargs):\n        return com.pipe(self, func, *args, **kwargs)\n\n    def cumcount(self, ascending=True):\n        result = self._wrap_aggregation(\n            type(self._query_compiler).groupby_cumcount,\n            numeric_only=False,\n            agg_kwargs=dict(ascending=ascending),\n        )\n        if not isinstance(result, Series):\n            # The result should always be a Series with name None and type int64\n            result = result.squeeze(axis=1)\n        return result\n\n    def tail(self, n=5):\n        # groupby().head()/.tail() ignore as_index, so override it to True\n        work_object = self._override(as_index=True)\n        return work_object._check_index(\n            work_object._wrap_aggregation(\n                type(work_object._query_compiler).groupby_tail,\n                agg_kwargs=dict(n=n),\n                numeric_only=False,\n            )\n        )\n\n    # expanding and rolling are unique cases and need to likely be handled\n    # separately. They do not appear to be commonly used.\n    def expanding(self, *args, **kwargs):\n        return self._default_to_pandas(lambda df: df.expanding(*args, **kwargs))\n\n    def rolling(self, *args, **kwargs):\n        return RollingGroupby(self, *args, **kwargs)\n\n    def hist(\n        self,\n        column=None,\n        by=None,\n        grid=True,\n        xlabelsize=None,\n        xrot=None,\n        ylabelsize=None,\n        yrot=None,\n        ax=None,\n        sharex=False,\n        sharey=False,\n        figsize=None,\n        layout=None,\n        bins=10,\n        backend=None,\n        legend=False,\n        **kwargs,\n    ):\n        return self._default_to_pandas(\n            lambda df: df.hist(\n                column=column,\n                by=by,\n                grid=grid,\n                xlabelsize=xlabelsize,\n                xrot=xrot,\n                ylabelsize=ylabelsize,\n                yrot=yrot,\n                ax=ax,\n                sharex=sharex,\n                sharey=sharey,\n                figsize=figsize,\n                layout=layout,\n                bins=bins,\n                backend=backend,\n                legend=legend,\n                **kwargs,\n            )\n        )\n\n    def quantile(self, q=0.5, interpolation=\"linear\", numeric_only=False):\n        # TODO: handle list-like cases properly\n        if is_list_like(q):\n            return self._default_to_pandas(\n                lambda df: df.quantile(q=q, interpolation=interpolation)\n            )\n\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_quantile,\n                numeric_only=numeric_only,\n                agg_kwargs=dict(q=q, interpolation=interpolation),\n            )\n        )\n\n    def diff(self, periods=1, axis=lib.no_default):\n        from .dataframe import DataFrame\n\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"diff\")\n        else:\n            axis = 0\n\n        # Should check for API level errors\n        # Attempting to match pandas error behavior here\n        if not isinstance(periods, int):\n            raise TypeError(f\"periods must be an int. got {type(periods)} instead\")\n\n        if isinstance(self._df, Series):\n            if not is_numeric_dtype(self._df.dtypes):\n                raise TypeError(\n                    f\"unsupported operand type for -: got {self._df.dtypes}\"\n                )\n        elif isinstance(self._df, DataFrame) and axis == 0:\n            for col, dtype in self._df.dtypes.items():\n                # can't calculate diff on non-numeric columns, so check for non-numeric\n                # columns that are not included in the `by`\n                if not (\n                    is_numeric_dtype(dtype) or is_datetime64_any_dtype(dtype)\n                ) and not (\n                    isinstance(self._by, BaseQueryCompiler) and col in self._by.columns\n                ):\n                    raise TypeError(f\"unsupported operand type for -: got {dtype}\")\n\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_diff,\n            agg_kwargs=dict(\n                periods=periods,\n                axis=axis,\n            ),\n        )\n\n    def take(self, indices, axis=lib.no_default, **kwargs):\n        return self._default_to_pandas(lambda df: df.take(indices, axis=axis, **kwargs))\n\n    @property\n    def _index(self):\n        \"\"\"\n        Get index value.\n\n        Returns\n        -------\n        pandas.Index\n            Index value.\n        \"\"\"\n        return self._query_compiler.index\n\n    @property\n    def _sort(self):\n        \"\"\"\n        Get sort parameter value.\n\n        Returns\n        -------\n        bool\n            Value of sort parameter used to create DataFrameGroupBy object.\n        \"\"\"\n        return self._kwargs.get(\"sort\")\n\n    @property\n    def _as_index(self):\n        \"\"\"\n        Get as_index parameter value.\n\n        Returns\n        -------\n        bool\n            Value of as_index parameter used to create DataFrameGroupBy object.\n        \"\"\"\n        return self._kwargs.get(\"as_index\")\n\n    @property\n    def _iter(self):\n        \"\"\"\n        Construct a tuple of (group_id, DataFrame) tuples to allow iteration over groups.\n\n        Returns\n        -------\n        generator\n            Generator expression of GroupBy object broken down into tuples for iteration.\n        \"\"\"\n        from .dataframe import DataFrame\n\n        indices = self.indices\n        group_ids = indices.keys()\n        if self._axis == 0:\n            return (\n                (\n                    (k,) if self._return_tuple_when_iterating else k,\n                    DataFrame(\n                        query_compiler=self._query_compiler.getitem_row_array(\n                            indices[k]\n                        )\n                    ),\n                )\n                for k in (sorted(group_ids) if self._sort else group_ids)\n            )\n        else:\n            return (\n                (\n                    (k,) if self._return_tuple_when_iterating else k,\n                    DataFrame(\n                        query_compiler=self._query_compiler.getitem_column_array(\n                            indices[k], numeric=True\n                        )\n                    ),\n                )\n                for k in (sorted(group_ids) if self._sort else group_ids)\n            )\n\n    def _compute_index_grouped(self, numerical=False):\n        \"\"\"\n        Construct an index of group IDs.\n\n        Parameters\n        ----------\n        numerical : bool, default: False\n            Whether a group indices should be positional (True) or label-based (False).\n\n        Returns\n        -------\n        dict\n            A dict of {group name -> group indices} values.\n\n        See Also\n        --------\n        pandas.core.groupby.GroupBy.groups\n        \"\"\"\n        # We end up using pure pandas to compute group indices, so raising a warning\n        ErrorMessage.default_to_pandas(\"Group indices computation\")\n\n        # Splitting level-by and column-by since we serialize them in a different ways\n        by = None\n        level = []\n        if self._level is not None:\n            level = self._level\n            if not isinstance(level, list):\n                level = [level]\n        elif isinstance(self._by, list):\n            by = []\n            for o in self._by:\n                if hashable(o) and o in self._query_compiler.get_index_names(\n                    self._axis\n                ):\n                    level.append(o)\n                else:\n                    by.append(o)\n        else:\n            by = self._by\n\n        is_multi_by = self._is_multi_by or (by is not None and len(level) > 0)\n        # `dropna` param is the only one that matters for the group indices result\n        dropna = self._kwargs.get(\"dropna\", True)\n\n        if isinstance(self._by, BaseQueryCompiler) and is_multi_by:\n            by = list(self._by.columns)\n\n        if is_multi_by:\n            # Because we are doing a collect (to_pandas) here and then groupby, we\n            # end up using pandas implementation. Add the warning so the user is\n            # aware.\n            ErrorMessage.catch_bugs_and_request_email(self._axis == 1)\n            if isinstance(by, list) and all(\n                is_label(self._df, o, self._axis) for o in by\n            ):\n                pandas_df = self._df._query_compiler.getitem_column_array(\n                    by\n                ).to_pandas()\n            else:\n                by = try_cast_to_pandas(by, squeeze=True)\n                pandas_df = self._df._to_pandas()\n            by = wrap_into_list(by, level)\n            groupby_obj = pandas_df.groupby(by=by, dropna=dropna)\n            return groupby_obj.indices if numerical else groupby_obj.groups\n        else:\n            if isinstance(self._by, type(self._query_compiler)):\n                by = self._by.to_pandas().squeeze().values\n            elif self._by is None:\n                index = self._query_compiler.get_axis(self._axis)\n                levels_to_drop = [\n                    i\n                    for i, name in enumerate(index.names)\n                    if name not in level and i not in level\n                ]\n                by = index.droplevel(levels_to_drop)\n                if isinstance(by, pandas.MultiIndex):\n                    by = by.reorder_levels(level)\n            else:\n                by = self._by\n            axis_labels = self._query_compiler.get_axis(self._axis)\n            if numerical:\n                # Since we want positional indices of the groups, we want to group\n                # on a `RangeIndex`, not on the actual index labels\n                axis_labels = pandas.RangeIndex(len(axis_labels))\n            # `pandas.Index.groupby` doesn't take any parameters except `by`.\n            # Have to convert an Index to a Series to be able to process `dropna=False`:\n            if dropna:\n                return axis_labels.groupby(by)\n            else:\n                groupby_obj = axis_labels.to_series().groupby(by, dropna=dropna)\n                return groupby_obj.indices if numerical else groupby_obj.groups\n\n    def _wrap_aggregation(\n        self,\n        qc_method,\n        numeric_only=False,\n        agg_args=None,\n        agg_kwargs=None,\n        **kwargs,\n    ):\n        \"\"\"\n        Perform common metadata transformations and apply groupby functions.\n\n        Parameters\n        ----------\n        qc_method : callable\n            The query compiler method to call.\n        numeric_only : {None, True, False}, default: None\n            Specifies whether to aggregate non numeric columns:\n                - True: include only numeric columns (including categories that holds a numeric dtype)\n                - False: include all columns\n                - None: infer the parameter, ``False`` if there are no numeric types in the frame,\n                  ``True`` otherwise.\n        agg_args : list-like, optional\n            Positional arguments to pass to the aggregation function.\n        agg_kwargs : dict-like, optional\n            Keyword arguments to pass to the aggregation function.\n        **kwargs : dict\n            Keyword arguments to pass to the specified query compiler's method.\n\n        Returns\n        -------\n        DataFrame or Series\n            Returns the same type as `self._df`.\n        \"\"\"\n        agg_args = tuple() if agg_args is None else agg_args\n        agg_kwargs = dict() if agg_kwargs is None else agg_kwargs\n\n        if numeric_only and self.ndim == 2:\n            by_cols = self._internal_by\n            mask_cols = [\n                col\n                for col, dtype in self._query_compiler.dtypes.items()\n                if (is_numeric_dtype(dtype) or col in by_cols)\n            ]\n            groupby_qc = self._query_compiler.getitem_column_array(mask_cols)\n        else:\n            groupby_qc = self._query_compiler\n\n        return type(self._df)(\n            query_compiler=qc_method(\n                groupby_qc,\n                by=self._by,\n                axis=self._axis,\n                groupby_kwargs=self._kwargs,\n                agg_args=agg_args,\n                agg_kwargs=agg_kwargs,\n                drop=self._drop,\n                **kwargs,\n            )\n        )\n\n    def _check_index(self, result):\n        \"\"\"\n        Check the result of groupby aggregation on the need of resetting index.\n\n        Parameters\n        ----------\n        result : DataFrame\n            Group by aggregation result.\n\n        Returns\n        -------\n        DataFrame\n        \"\"\"\n        if self._by is None and not self._as_index:\n            # This is a workaround to align behavior with pandas. In this case pandas\n            # resets index, but Modin doesn't do that. More details are in https://github.com/modin-project/modin/issues/3716.\n            result.reset_index(drop=True, inplace=True)\n\n        return result\n\n    def _default_to_pandas(self, f, *args, **kwargs):\n        \"\"\"\n        Execute function `f` in default-to-pandas way.\n\n        Parameters\n        ----------\n        f : callable or str\n            The function to apply to each group.\n        *args : list\n            Extra positional arguments to pass to `f`.\n        **kwargs : dict\n            Extra keyword arguments to pass to `f`.\n\n        Returns\n        -------\n        modin.pandas.DataFrame\n            A new Modin DataFrame with the result of the pandas function.\n        \"\"\"\n        if (\n            isinstance(self._by, type(self._query_compiler))\n            and len(self._by.columns) == 1\n        ):\n            by = self._by.columns[0] if self._drop else self._by.to_pandas().squeeze()\n        # converting QC 'by' to a list of column labels only if this 'by' comes from the self (if drop is True)\n        elif self._drop and isinstance(self._by, type(self._query_compiler)):\n            by = list(self._by.columns)\n        else:\n            by = self._by\n\n        by = try_cast_to_pandas(by, squeeze=True)\n        # Since 'by' may be a 2D query compiler holding columns to group by,\n        # to_pandas will also produce a pandas DataFrame containing them.\n        # So splitting 2D 'by' into a list of 1D Series using 'GroupBy.validate_by':\n        by = GroupBy.validate_by(by)\n\n        def groupby_on_multiple_columns(df, *args, **kwargs):\n            groupby_obj = df.groupby(by=by, axis=self._axis, **self._kwargs)\n\n            if callable(f):\n                return f(groupby_obj, *args, **kwargs)\n            else:\n                ErrorMessage.catch_bugs_and_request_email(\n                    failure_condition=not isinstance(f, str)\n                )\n                attribute = getattr(groupby_obj, f)\n                if callable(attribute):\n                    return attribute(*args, **kwargs)\n                return attribute\n\n        return self._df._default_to_pandas(groupby_on_multiple_columns, *args, **kwargs)\n\n\n@_inherit_docstrings(pandas.core.groupby.SeriesGroupBy)\nclass SeriesGroupBy(DataFrameGroupBy):  # noqa: GL08\n    _pandas_class = pandas.core.groupby.SeriesGroupBy\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n\n    @disable_logging\n    def __getattribute__(self, item: str) -> Any:\n        \"\"\"\n        Get an attribute of the object.\n\n        Python calls this method for every attribute access. We override it to\n        get extension attributes.\n\n        Parameters\n        ----------\n        item : str\n            Attribute name.\n\n        Returns\n        -------\n        Any\n            The value of the attribute.\n        \"\"\"\n        if item not in GROUPBY_EXTENSION_NO_LOOKUP:\n            extensions_result = self._getattribute__from_extension_impl(\n                item, __class__._extensions\n            )\n            if extensions_result is not sentinel:\n                return extensions_result\n\n        return super().__getattribute__(item)\n\n    @_inherit_docstrings(QueryCompilerCaster._getattr__from_extension_impl)\n    def __getattr__(self, key: str) -> Any:\n        return self._getattr__from_extension_impl(\n            key=key,\n            default_behavior_attributes=GROUPBY_EXTENSION_NO_LOOKUP,\n            extensions=__class__._extensions,\n        )\n\n    @disable_logging\n    def __setattr__(self, key: str, value: Any) -> None:\n        \"\"\"\n        Set an attribute of the object.\n\n        We override this method to support settable extension attributes.\n\n        Parameters\n        ----------\n        key : str\n            Attribute name.\n        value : Any\n            Value to set the attribute to.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(key, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__set__\"):\n            return extension.__set__(self, value)\n        return super().__setattr__(key, value)\n\n    @disable_logging\n    def __delattr__(self, name: str) -> None:\n        \"\"\"\n        Delete an attribute of the object.\n\n        We override this method to support deletable extension attributes.\n\n        Parameters\n        ----------\n        name : str\n            Attribute name.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(name, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__delete__\"):\n            return extension.__delete__(self)\n        return super().__delattr__(name)\n\n    @property\n    def ndim(self):\n        \"\"\"\n        Return 1.\n\n        Returns\n        -------\n        int\n            Returns 1.\n\n        Notes\n        -----\n        Deprecated and removed in pandas and will be likely removed in Modin.\n        \"\"\"\n        return 1  # ndim is always 1 for Series\n\n    @property\n    def _iter(self):\n        \"\"\"\n        Construct a tuple of (group_id, Series) tuples to allow iteration over groups.\n\n        Returns\n        -------\n        generator\n            Generator expression of GroupBy object broken down into tuples for iteration.\n        \"\"\"\n        indices = self.indices\n        group_ids = indices.keys()\n        if self._axis == 0:\n            return (\n                (\n                    k,\n                    Series(\n                        query_compiler=self._query_compiler.getitem_row_array(\n                            indices[k]\n                        )\n                    ),\n                )\n                for k in (sorted(group_ids) if self._sort else group_ids)\n            )\n        else:\n            return (\n                (\n                    k,\n                    Series(\n                        query_compiler=self._query_compiler.getitem_column_array(\n                            indices[k], numeric=True\n                        )\n                    ),\n                )\n                for k in (sorted(group_ids) if self._sort else group_ids)\n            )\n\n    def _try_get_str_func(self, fn):\n        \"\"\"\n        Try to convert a groupby aggregation function to a string or list of such.\n\n        Parameters\n        ----------\n        fn : callable, str, or Iterable\n\n        Returns\n        -------\n        str, list\n            If `fn` is a callable, return its name, otherwise return `fn` itself.\n            If `fn` is a string, return it. If `fn` is an Iterable, return a list\n            of _try_get_str_func applied to each element of `fn`.\n        \"\"\"\n        if not isinstance(fn, str) and isinstance(fn, Iterable):\n            return [self._try_get_str_func(f) for f in fn]\n        return fn.__name__ if callable(fn) else fn\n\n    def value_counts(\n        self,\n        normalize: bool = False,\n        sort: bool = True,\n        ascending: bool = False,\n        bins=None,\n        dropna: bool = True,\n    ):  # noqa: GL08\n        return self._default_to_pandas(\n            lambda ser: ser.value_counts(\n                normalize=normalize,\n                sort=sort,\n                ascending=ascending,\n                bins=bins,\n                dropna=dropna,\n            )\n        )\n\n    def corr(self, other, method=\"pearson\", min_periods=None):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_corr,\n            agg_kwargs=dict(other=other, method=method, min_periods=min_periods),\n        )\n\n    def cov(self, other, min_periods=None, ddof=1):\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_cov,\n            agg_kwargs=dict(other=other, min_periods=min_periods, ddof=ddof),\n        )\n\n    def describe(self, percentiles=None, include=None, exclude=None):\n        return self._default_to_pandas(\n            lambda df: df.describe(\n                percentiles=percentiles, include=include, exclude=exclude\n            )\n        )\n\n    def apply(self, func, *args, **kwargs):\n        return super().apply(func, *args, **kwargs)\n\n    def idxmax(self, axis=lib.no_default, skipna=True):\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"idxmax\")\n        else:\n            axis = 0\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_idxmax,\n            agg_kwargs=dict(axis=axis, skipna=skipna),\n        )\n\n    def idxmin(self, axis=lib.no_default, skipna=True):\n        if axis is not lib.no_default:\n            axis = self._df._get_axis_number(axis)\n            self._deprecate_axis(axis, \"idxmin\")\n        else:\n            axis = 0\n        return self._wrap_aggregation(\n            type(self._query_compiler).groupby_idxmin,\n            agg_kwargs=dict(axis=axis, skipna=skipna),\n        )\n\n    def hist(\n        self,\n        by=None,\n        ax=None,\n        grid=True,\n        xlabelsize=None,\n        xrot=None,\n        ylabelsize=None,\n        yrot=None,\n        figsize=None,\n        bins=10,\n        backend=None,\n        legend=False,\n        **kwargs,\n    ):\n        return self._default_to_pandas(\n            lambda df: df.hist(\n                by=by,\n                ax=ax,\n                grid=grid,\n                xlabelsize=xlabelsize,\n                xrot=xrot,\n                ylabelsize=ylabelsize,\n                yrot=yrot,\n                figsize=figsize,\n                bins=bins,\n                backend=backend,\n                legend=legend,\n                **kwargs,\n            )\n        )\n\n    @property\n    def is_monotonic_decreasing(self):\n        return self._default_to_pandas(lambda ser: ser.is_monotonic_decreasing)\n\n    @property\n    def is_monotonic_increasing(self):\n        return self._default_to_pandas(lambda ser: ser.is_monotonic_increasing)\n\n    @property\n    def dtype(self):\n        return self._default_to_pandas(lambda ser: ser.dtype)\n\n    def unique(self):\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_unique,\n                numeric_only=False,\n            )\n        )\n\n    def nlargest(self, n=5, keep=\"first\"):\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_nlargest,\n                agg_kwargs=dict(n=n, keep=keep),\n                numeric_only=True,\n            )\n        )\n\n    def nsmallest(self, n=5, keep=\"first\"):\n        return self._check_index(\n            self._wrap_aggregation(\n                type(self._query_compiler).groupby_nsmallest,\n                agg_kwargs=dict(n=n, keep=keep),\n                numeric_only=True,\n            )\n        )\n\n    def _validate_func_kwargs(self, kwargs: dict):\n        \"\"\"\n        Validate types of user-provided \"named aggregation\" kwargs.\n\n        Parameters\n        ----------\n        kwargs : dict\n\n        Returns\n        -------\n        columns : List[str]\n            List of user-provided keys.\n        funcs : List[Union[str, callable[...,Any]]]\n            List of user-provided aggfuncs.\n\n        Raises\n        ------\n        `TypeError` is raised if aggfunc is not `str` or callable.\n\n        Notes\n        -----\n        Copied from pandas.\n        \"\"\"\n        columns = list(kwargs)\n        funcs = []\n        for col_func in kwargs.values():\n            if not (isinstance(col_func, str) or callable(col_func)):\n                raise TypeError(\n                    f\"func is expected but received {type(col_func).__name__} in **kwargs.\"\n                )\n            funcs.append(col_func)\n        if not columns:\n            raise TypeError(\"Must provide 'func' or named aggregation **kwargs.\")\n        return columns, funcs\n\n    def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):\n        engine_default = engine is None and engine_kwargs is None\n        # if func is None, will switch to user-provided \"named aggregation\" kwargs\n        if func_is_none := func is None:\n            columns, func = self._validate_func_kwargs(kwargs)\n            kwargs = {}\n        if isinstance(func, dict) and engine_default:\n            raise SpecificationError(\"nested renamer is not supported\")\n        elif is_list_like(func) and engine_default:\n            from .dataframe import DataFrame\n\n            result = DataFrame(\n                query_compiler=self._query_compiler.groupby_agg(\n                    by=self._by,\n                    agg_func=func,\n                    axis=self._axis,\n                    groupby_kwargs=self._kwargs,\n                    agg_args=args,\n                    agg_kwargs=kwargs,\n                )\n            )\n            # query compiler always gives result a multiindex on the axis with the\n            # function names, but series always gets a regular index on the columns\n            # because there is no need to identify which original column's aggregation\n            # the new column represents. alternatively we could give the query compiler\n            # a hint that it's for a series, not a dataframe.\n            if func_is_none:\n                return result.set_axis(labels=columns, axis=1, copy=False)\n            return result.set_axis(\n                labels=self._try_get_str_func(func), axis=1, copy=False\n            )\n        else:\n            return super().aggregate(\n                func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs\n            )\n\n    agg = aggregate\n"
  },
  {
    "path": "modin/pandas/indexing.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n# noqa: MD02\n\"\"\"\nDetails about how Indexing Helper Class works.\n\n_LocationIndexerBase provide methods framework for __getitem__\n  and __setitem__ that work with Modin DataFrame's internal index. Base\n  class's __{get,set}item__ takes in partitions & idx_in_partition data\n  and perform lookup/item write.\n\n_LocIndexer and _iLocIndexer is responsible for indexer specific logic and\n  lookup computation. Loc will take care of enlarge DataFrame. Both indexer\n  will take care of translating pandas's lookup to Modin DataFrame's internal\n  lookup.\n\nAn illustration is available at\nhttps://github.com/ray-project/ray/pull/1955#issuecomment-386781826\n\"\"\"\n\nfrom __future__ import annotations\n\nimport itertools\nfrom typing import TYPE_CHECKING, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas.api.types import is_bool, is_list_like\nfrom pandas.core.dtypes.common import is_bool_dtype, is_integer, is_integer_dtype\nfrom pandas.core.indexing import IndexingError\n\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    EXTENSION_DICT_TYPE,\n    QueryCompilerCaster,\n)\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger, disable_logging\nfrom modin.utils import _inherit_docstrings\n\nfrom .dataframe import DataFrame\nfrom .series import Series\nfrom .utils import is_scalar\n\nif TYPE_CHECKING:\n    from typing_extensions import Self\n\n    from modin.core.storage_formats import BaseQueryCompiler\n\n\ndef is_slice(x):\n    \"\"\"\n    Check that argument is an instance of slice.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is a slice, False otherwise.\n    \"\"\"\n    return isinstance(x, slice)\n\n\ndef compute_sliced_len(slc, sequence_len):\n    \"\"\"\n    Compute length of sliced object.\n\n    Parameters\n    ----------\n    slc : slice\n        Slice object.\n    sequence_len : int\n        Length of sequence, to which slice will be applied.\n\n    Returns\n    -------\n    int\n        Length of object after applying slice object on it.\n    \"\"\"\n    # This will translate slice to a range, from which we can retrieve length\n    return len(range(*slc.indices(sequence_len)))\n\n\ndef is_2d(x):\n    \"\"\"\n    Check that argument is a list or a slice.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        `True` if argument is a list or slice, `False` otherwise.\n    \"\"\"\n    return is_list_like(x) or is_slice(x)\n\n\ndef is_tuple(x):\n    \"\"\"\n    Check that argument is a tuple.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is a tuple, False otherwise.\n    \"\"\"\n    return isinstance(x, tuple)\n\n\ndef is_boolean_array(x):\n    \"\"\"\n    Check that argument is an array of bool.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is an array of bool, False otherwise.\n    \"\"\"\n    if isinstance(x, (np.ndarray, Series, pandas.Series, pandas.Index)):\n        return is_bool_dtype(x.dtype)\n    elif isinstance(x, (DataFrame, pandas.DataFrame)):\n        return all(map(is_bool_dtype, x.dtypes))\n    return is_list_like(x) and all(map(is_bool, x))\n\n\ndef is_integer_array(x):\n    \"\"\"\n    Check that argument is an array of integers.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is an array of integers, False otherwise.\n    \"\"\"\n    if isinstance(x, (np.ndarray, Series, pandas.Series, pandas.Index)):\n        return is_integer_dtype(x.dtype)\n    elif isinstance(x, (DataFrame, pandas.DataFrame)):\n        return all(map(is_integer_dtype, x.dtypes))\n    return is_list_like(x) and all(map(is_integer, x))\n\n\ndef is_integer_slice(x):\n    \"\"\"\n    Check that argument is an array of int.\n\n    Parameters\n    ----------\n    x : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if argument is an array of int, False otherwise.\n    \"\"\"\n    if not is_slice(x):\n        return False\n    for pos in [x.start, x.stop, x.step]:\n        if not ((pos is None) or is_integer(pos)):\n            return False  # one position is neither None nor int\n    return True\n\n\ndef is_range_like(obj):\n    \"\"\"\n    Check if the object is range-like.\n\n    Objects that are considered range-like have information about the range (start and\n    stop positions, and step) and also have to be iterable. Examples of range-like\n    objects are: Python range, pandas.RangeIndex.\n\n    Parameters\n    ----------\n    obj : object\n\n    Returns\n    -------\n    bool\n    \"\"\"\n    return (\n        hasattr(obj, \"__iter__\")\n        and hasattr(obj, \"start\")\n        and hasattr(obj, \"stop\")\n        and hasattr(obj, \"step\")\n    )\n\n\ndef boolean_mask_to_numeric(indexer):\n    \"\"\"\n    Convert boolean mask to numeric indices.\n\n    Parameters\n    ----------\n    indexer : list-like of booleans\n\n    Returns\n    -------\n    np.ndarray of ints\n        Numerical positions of ``True`` elements in the passed `indexer`.\n    \"\"\"\n    if isinstance(indexer, (np.ndarray, Series, pandas.Series)):\n        return np.where(indexer)[0]\n    else:\n        # It's faster to build the resulting numpy array from the reduced amount of data via\n        # `compress` iterator than convert non-numpy-like `indexer` to numpy and apply `np.where`.\n        return np.fromiter(\n            # `itertools.compress` masks `data` with the `selectors` mask,\n            # works about ~10% faster than a pure list comprehension\n            itertools.compress(data=range(len(indexer)), selectors=indexer),\n            dtype=np.int64,\n        )\n\n\n_ILOC_INT_ONLY_ERROR = \"\"\"\nLocation based indexing can only have [integer, integer slice (START point is\nINCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types.\n\"\"\"\n\n_one_ellipsis_message = \"indexer may only contain one '...' entry\"\n\n\ndef _compute_ndim(row_loc, col_loc):\n    \"\"\"\n    Compute the number of dimensions of result from locators.\n\n    Parameters\n    ----------\n    row_loc : list or scalar\n        Row locator.\n    col_loc : list or scalar\n        Column locator.\n\n    Returns\n    -------\n    {0, 1, 2}\n        Number of dimensions in located dataset.\n    \"\"\"\n    row_scalar = is_scalar(row_loc) or is_tuple(row_loc)\n    col_scalar = is_scalar(col_loc) or is_tuple(col_loc)\n\n    if row_scalar and col_scalar:\n        ndim = 0\n    elif row_scalar ^ col_scalar:\n        ndim = 1\n    else:\n        ndim = 2\n\n    return ndim\n\n\nclass _LocationIndexerBase(QueryCompilerCaster, ClassLogger):\n    \"\"\"\n    Base class for location indexer like loc and iloc.\n\n    Parameters\n    ----------\n    modin_df : Union[DataFrame, Series]\n        DataFrame to operate on.\n    \"\"\"\n\n    df: Union[DataFrame, Series]\n    qc: BaseQueryCompiler\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n\n    def is_backend_pinned(self) -> bool:\n        \"\"\"\n        Get whether this object's data is pinned to a particular backend.\n\n        Returns\n        -------\n        bool\n            True if the data is pinned.\n        \"\"\"\n        return self.df.is_backend_pinned()\n\n    def _set_backend_pinned(self, pinned: bool, inplace: bool = False):\n        \"\"\"\n        Update whether this object's data is pinned to a particular backend.\n\n        Parameters\n        ----------\n        pinned : bool\n            Whether the data is pinned.\n\n        inplace : bool, default: False\n            Whether to update the object in place.\n\n        Returns\n        -------\n        Optional[Self]\n            The object with the new pin state, if `inplace` is False. Otherwise, None.\n        \"\"\"\n        change = (self.is_backend_pinned() and not pinned) or (\n            not self.is_backend_pinned() and pinned\n        )\n        if not change:\n            return None if inplace else self\n        result = type(self)(self.df._set_backend_pinned(pinned))\n        if inplace:\n            result._copy_into(self)\n            return None\n        return result\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster.set_backend)\n    def set_backend(\n        self, backend, inplace: bool = False, *, switch_operation: Optional[str] = None\n    ) -> Optional[Self]:\n        result = type(self)(\n            self.df.set_backend(backend, switch_operation=switch_operation)\n        )\n        if inplace:\n            result._copy_into(self)\n            return None\n        return result\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster._get_query_compiler)\n    def _get_query_compiler(self):\n        return getattr(self, \"qc\", None)\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster.get_backend)\n    def get_backend(self):\n        return self.qc.get_backend()\n\n    @disable_logging\n    @_inherit_docstrings(QueryCompilerCaster._copy_into)\n    def _copy_into(self, other: Series):\n        other.qc = self.df._query_compiler\n        other.df._update_inplace(new_query_compiler=self.df._query_compiler)\n        other.df._set_backend_pinned(self.is_backend_pinned())\n        return None\n\n    def __init__(self, modin_df: Union[DataFrame, Series]):\n        # TODO(https://github.com/modin-project/modin/issues/7513): Do not keep\n        # both `df` and `qc`.\n        self.df = modin_df\n        self.qc = modin_df._query_compiler\n\n    def _validate_key_length(self, key: tuple) -> tuple:  # noqa: GL08\n        # Implementation copied from pandas.\n        if len(key) > self.df.ndim:\n            if key[0] is Ellipsis:\n                # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]\n                key = key[1:]\n                if Ellipsis in key:\n                    raise IndexingError(_one_ellipsis_message)\n                return self._validate_key_length(key)\n            raise IndexingError(\"Too many indexers\")\n        return key\n\n    def __getitem__(self, key):  # pragma: no cover\n        \"\"\"\n        Retrieve dataset according to `key`.\n\n        Parameters\n        ----------\n        key : callable, scalar, or tuple\n            The global row index to retrieve data from.\n\n        Returns\n        -------\n        modin.pandas.DataFrame or modin.pandas.Series\n            Located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.loc\n        \"\"\"\n        raise NotImplementedError(\"Implemented by subclasses\")\n\n    def __setitem__(self, key, item):  # pragma: no cover\n        \"\"\"\n        Assign `item` value to dataset located by `key`.\n\n        Parameters\n        ----------\n        key : callable or tuple\n            The global row numbers to assign data to.\n        item : modin.pandas.DataFrame, modin.pandas.Series or scalar\n            Value that should be assigned to located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.iloc\n        \"\"\"\n        raise NotImplementedError(\"Implemented by subclasses\")\n\n    def _get_pandas_object_from_qc_view(\n        self,\n        qc_view,\n        row_multiindex_full_lookup: bool,\n        col_multiindex_full_lookup: bool,\n        row_scalar: bool,\n        col_scalar: bool,\n        ndim: int,\n    ):\n        \"\"\"\n        Convert the query compiler view to the appropriate pandas object.\n\n        Parameters\n        ----------\n        qc_view : BaseQueryCompiler\n            Query compiler to convert.\n        row_multiindex_full_lookup : bool\n            See _multiindex_possibly_contains_key.__doc__.\n        col_multiindex_full_lookup : bool\n            See _multiindex_possibly_contains_key.__doc__.\n        row_scalar : bool\n            Whether indexer for rows is scalar.\n        col_scalar : bool\n            Whether indexer for columns is scalar.\n        ndim : {0, 1, 2}\n            Number of dimensions in dataset to be retrieved.\n\n        Returns\n        -------\n        modin.pandas.DataFrame or modin.pandas.Series\n            The pandas object with the data from the query compiler view.\n\n        Notes\n        -----\n        Usage of `slice(None)` as a lookup is a hack to pass information about\n        full-axis grab without computing actual indices that triggers lazy computations.\n        Ideally, this API should get rid of using slices as indexers and either use a\n        common ``Indexer`` object or range and ``np.ndarray`` only.\n        \"\"\"\n        if ndim == 2:\n            return self.df.__constructor__(query_compiler=qc_view)\n        if isinstance(self.df, Series) and not row_scalar:\n            return self.df.__constructor__(query_compiler=qc_view)\n\n        if isinstance(self.df, Series):\n            axis = 0\n        elif ndim == 0:\n            axis = None\n        else:\n            # We are in the case where ndim == 1\n            # The axis we squeeze on depends on whether we are looking for an exact\n            # value or a subset of rows and columns. Knowing if we have a full MultiIndex\n            # lookup or scalar lookup can help us figure out whether we need to squeeze\n            # on the row or column index.\n            axis = (\n                None\n                if (col_scalar and row_scalar)\n                or (row_multiindex_full_lookup and col_multiindex_full_lookup)\n                else 1 if col_scalar or col_multiindex_full_lookup else 0\n            )\n\n        res_df = self.df.__constructor__(query_compiler=qc_view)\n        return res_df.squeeze(axis=axis)\n\n    def _setitem_positional(self, row_lookup, col_lookup, item, axis=None):\n        \"\"\"\n        Assign `item` value to located dataset.\n\n        Parameters\n        ----------\n        row_lookup : slice or scalar\n            The global row index to write item to.\n        col_lookup : slice or scalar\n            The global col index to write item to.\n        item : DataFrame, Series or scalar\n            The new item needs to be set. It can be any shape that's\n            broadcast-able to the product of the lookup tables.\n        axis : {None, 0, 1}, default: None\n            If not None, it means that whole axis is used to assign a value.\n            0 means assign to whole column, 1 means assign to whole row.\n            If None, it means that partial assignment is done on both axes.\n        \"\"\"\n        # Convert slices to indices for the purposes of application.\n        # TODO (devin-petersohn): Apply to slice without conversion to list\n        if isinstance(row_lookup, slice):\n            row_lookup = range(len(self.qc.index))[row_lookup]\n        if isinstance(col_lookup, slice):\n            col_lookup = range(len(self.qc.columns))[col_lookup]\n        # This is True when we dealing with assignment of a full column. This case\n        # should be handled in a fastpath with `df[col] = item`.\n        if axis == 0:\n            assert len(col_lookup) == 1\n            self.df[self.df.columns[col_lookup][0]] = item\n        # This is True when we are assigning to a full row. We want to reuse the setitem\n        # mechanism to operate along only one axis for performance reasons.\n        elif axis == 1:\n            if hasattr(item, \"_query_compiler\"):\n                if isinstance(item, DataFrame):\n                    item = item.squeeze(axis=0)\n                item = item._query_compiler\n            assert len(row_lookup) == 1\n            new_qc = self.qc.setitem(1, self.qc.index[row_lookup[0]], item)\n            self.df._create_or_update_from_compiler(new_qc, inplace=True)\n        # Assignment to both axes.\n        else:\n            new_qc = self.qc.write_items(row_lookup, col_lookup, item)\n            self.df._create_or_update_from_compiler(new_qc, inplace=True)\n\n        self.qc = self.df._query_compiler\n\n    def _determine_setitem_axis(self, row_lookup, col_lookup, row_scalar, col_scalar):\n        \"\"\"\n        Determine an axis along which we should do an assignment.\n\n        Parameters\n        ----------\n        row_lookup : slice or list\n            Indexer for rows.\n        col_lookup : slice or list\n            Indexer for columns.\n        row_scalar : bool\n            Whether indexer for rows is scalar or not.\n        col_scalar : bool\n            Whether indexer for columns is scalar or not.\n\n        Returns\n        -------\n        int or None\n            None if this will be a both axis assignment, number of axis to assign in other cases.\n\n        Notes\n        -----\n        axis = 0: column assignment df[col] = item\n        axis = 1: row assignment df.loc[row] = item\n        axis = None: assignment along both axes\n        \"\"\"\n        if self.df.shape == (1, 1):\n            return None if not (row_scalar ^ col_scalar) else 1 if row_scalar else 0\n\n        def get_axis(axis):\n            return self.qc.index if axis == 0 else self.qc.columns\n\n        row_lookup_len, col_lookup_len = [\n            (\n                len(lookup)\n                if not isinstance(lookup, slice)\n                else compute_sliced_len(lookup, len(get_axis(i)))\n            )\n            for i, lookup in enumerate([row_lookup, col_lookup])\n        ]\n\n        if col_lookup_len == 1 and row_lookup_len == 1:\n            axis = None\n        elif (\n            row_lookup_len == len(self.qc.index)\n            and col_lookup_len == 1\n            and isinstance(self.df, DataFrame)\n        ):\n            axis = 0\n        elif col_lookup_len == len(self.qc.columns) and row_lookup_len == 1:\n            axis = 1\n        else:\n            axis = None\n        return axis\n\n    def _parse_row_and_column_locators(self, tup):\n        \"\"\"\n        Unpack the user input for getitem and setitem and compute ndim.\n\n        loc[a] -> ([a], :), 1D\n        loc[[a,b]] -> ([a,b], :),\n        loc[a,b] -> ([a], [b]), 0D\n\n        Parameters\n        ----------\n        tup : tuple\n            User input to unpack.\n\n        Returns\n        -------\n        row_loc : scalar or list\n            Row locator(s) as a scalar or List.\n        col_list : scalar or list\n            Column locator(s) as a scalar or List.\n        ndim : {0, 1, 2}\n            Number of dimensions of located dataset.\n        \"\"\"\n        row_loc, col_loc = slice(None), slice(None)\n\n        if is_tuple(tup):\n            row_loc = tup[0]\n            if len(tup) == 2:\n                col_loc = tup[1]\n            if len(tup) > 2:\n                raise IndexingError(\"Too many indexers\")\n        else:\n            row_loc = tup\n\n        row_loc = row_loc(self.df) if callable(row_loc) else row_loc\n        col_loc = col_loc(self.df) if callable(col_loc) else col_loc\n        return row_loc, col_loc, _compute_ndim(row_loc, col_loc)\n\n    # HACK: This method bypasses regular ``loc/iloc.__getitem__`` flow in order to ensure better\n    # performance in the case of boolean masking. The only purpose of this method is to compensate\n    # for a lack of backend's indexing API, there is no Query Compiler method allowing masking\n    # along both axis when any of the indexers is a boolean. That's why rows and columns masking\n    # phases are separate in this case.\n    # TODO: Remove this method and handle this case naturally via ``loc/iloc.__getitem__`` flow\n    # when QC API would support both-axis masking with boolean indexers.\n    def _handle_boolean_masking(self, row_loc, col_loc):\n        \"\"\"\n        Retrieve dataset according to the boolean mask for rows and an indexer for columns.\n\n        In comparison with the regular ``loc/iloc.__getitem__`` flow this method efficiently\n        masks rows with a Modin Series boolean mask without materializing it (if the selected\n        execution implements such masking).\n\n        Parameters\n        ----------\n        row_loc : modin.pandas.Series of bool dtype\n            Boolean mask to index rows with.\n        col_loc : object\n            An indexer along column axis.\n\n        Returns\n        -------\n        modin.pandas.DataFrame or modin.pandas.Series\n            Located dataset.\n        \"\"\"\n        ErrorMessage.catch_bugs_and_request_email(\n            failure_condition=not isinstance(row_loc, Series),\n            extra_log=f\"Only ``modin.pandas.Series`` boolean masks are acceptable, got: {type(row_loc)}\",\n        )\n        masked_df = self.df.__constructor__(\n            query_compiler=self.qc.getitem_array(row_loc._query_compiler)\n        )\n        if isinstance(masked_df, Series):\n            assert col_loc == slice(None)\n            return masked_df\n        # Passing `slice(None)` as a row indexer since we've just applied it\n        return type(self)(masked_df)[(slice(None), col_loc)]\n\n    def _multiindex_possibly_contains_key(self, axis, key):\n        \"\"\"\n        Determine if a MultiIndex row/column possibly contains a key.\n\n        Check to see if the current DataFrame has a MultiIndex row/column and if it does,\n        check to see if the key is potentially a full key-lookup such that the number of\n        levels match up with the length of the tuple key.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            0 for row, 1 for column.\n        key : Any\n            Lookup key for MultiIndex row/column.\n\n        Returns\n        -------\n        bool\n            If the MultiIndex possibly contains the given key.\n\n        Notes\n        -----\n        This function only returns False if we have a partial key lookup. It's\n        possible that this function returns True for a key that does NOT exist\n        since we only check the length of the `key` tuple to match the number\n        of levels in the MultiIndex row/colunmn.\n        \"\"\"\n        if not self.qc.has_multiindex(axis=axis):\n            return False\n\n        multiindex = self.df.index if axis == 0 else self.df.columns\n        return isinstance(key, tuple) and len(key) == len(multiindex.levels)\n\n\nclass _LocIndexer(_LocationIndexerBase):\n    \"\"\"\n    An indexer for modin_df.loc[] functionality.\n\n    Parameters\n    ----------\n    modin_df : Union[DataFrame, Series]\n        DataFrame to operate on.\n    \"\"\"\n\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n\n    def __getitem__(self, key):\n        \"\"\"\n        Retrieve dataset according to `key`.\n\n        Parameters\n        ----------\n        key : callable, scalar, or tuple\n            The global row index to retrieve data from.\n\n        Returns\n        -------\n        modin.pandas.DataFrame or modin.pandas.Series\n            Located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.loc\n        \"\"\"\n        if self.df.empty:\n            return self.df._default_to_pandas(lambda df: df.loc[key])\n        if isinstance(key, tuple):\n            key = self._validate_key_length(key)\n        if (\n            isinstance(key, tuple)\n            and len(key) == 2\n            and all((is_scalar(k) for k in key))\n            and self.qc.has_multiindex(axis=0)\n        ):\n            # __getitem__ has no way to distinguish between\n            # loc[('level_one_key', level_two_key')] and\n            # loc['level_one_key', 'column_name']. It's possible for both to be valid\n            # when we have a multiindex on axis=0, and it seems pandas uses\n            # interpretation 1 if that's possible. Do the same.\n            locators = self._parse_row_and_column_locators((key, slice(None)))\n            try:\n                return self._helper_for__getitem__(key, *locators)\n            except KeyError:\n                pass\n        return self._helper_for__getitem__(\n            key, *self._parse_row_and_column_locators(key)\n        )\n\n    def _helper_for__getitem__(self, key, row_loc, col_loc, ndim):\n        \"\"\"\n        Retrieve dataset according to `key`, row_loc, and col_loc.\n\n        Parameters\n        ----------\n        key : callable, scalar, or tuple\n            The global row index to retrieve data from.\n        row_loc : callable, scalar, or slice\n            Row locator(s) as a scalar or List.\n        col_loc : callable, scalar, or slice\n            Row locator(s) as a scalar or List.\n        ndim : int\n            The number of dimensions of the returned object.\n\n        Returns\n        -------\n        modin.pandas.DataFrame or modin.pandas.Series\n            Located dataset.\n        \"\"\"\n        row_scalar = is_scalar(row_loc)\n        col_scalar = is_scalar(col_loc)\n\n        # The thought process here is that we should check to see that we have a full key lookup\n        # for a MultiIndex DataFrame. If that's the case, then we should not drop any levels\n        # since our resulting intermediate dataframe will have dropped these for us already.\n        # Thus, we need to make sure we don't try to drop these levels again. The logic here is\n        # kind of hacked together. Ideally, we should handle this properly in the lower-level\n        # implementations, but this will have to be engineered properly later.\n        row_multiindex_full_lookup = self._multiindex_possibly_contains_key(\n            axis=0, key=row_loc\n        )\n        col_multiindex_full_lookup = self._multiindex_possibly_contains_key(\n            axis=1, key=col_loc\n        )\n        levels_already_dropped = (\n            row_multiindex_full_lookup or col_multiindex_full_lookup\n        )\n\n        if isinstance(row_loc, Series) and is_boolean_array(row_loc):\n            return self._handle_boolean_masking(row_loc, col_loc)\n\n        qc_view = self.qc.take_2d_labels(row_loc, col_loc)\n        result = self._get_pandas_object_from_qc_view(\n            qc_view,\n            row_multiindex_full_lookup,\n            col_multiindex_full_lookup,\n            row_scalar,\n            col_scalar,\n            ndim,\n        )\n\n        if isinstance(result, Series):\n            result._parent = self.df\n            result._parent_axis = 0\n\n        col_loc_as_list = [col_loc] if col_scalar else col_loc\n        row_loc_as_list = [row_loc] if row_scalar else row_loc\n        # Pandas drops the levels that are in the `loc`, so we have to as well.\n        if (\n            isinstance(result, (Series, DataFrame))\n            and result._query_compiler.has_multiindex()\n            and not levels_already_dropped\n        ):\n            if (\n                isinstance(result, Series)\n                and not isinstance(col_loc_as_list, slice)\n                and all(\n                    col_loc_as_list[i] in result.index.levels[i]\n                    for i in range(len(col_loc_as_list))\n                )\n            ):\n                result.index = result.index.droplevel(list(range(len(col_loc_as_list))))\n            elif not isinstance(row_loc_as_list, slice) and all(\n                not isinstance(row_loc_as_list[i], slice)\n                and row_loc_as_list[i] in result.index.levels[i]\n                for i in range(len(row_loc_as_list))\n            ):\n                result.index = result.index.droplevel(list(range(len(row_loc_as_list))))\n        if (\n            isinstance(result, DataFrame)\n            and not isinstance(col_loc_as_list, slice)\n            and not levels_already_dropped\n            and result._query_compiler.has_multiindex(axis=1)\n            and all(\n                col_loc_as_list[i] in result.columns.levels[i]\n                for i in range(len(col_loc_as_list))\n            )\n        ):\n            result.columns = result.columns.droplevel(list(range(len(col_loc_as_list))))\n        # This is done for cases where the index passed in has other state, like a\n        # frequency in the case of DateTimeIndex.\n        if (\n            row_loc is not None\n            and isinstance(col_loc, slice)\n            and col_loc == slice(None)\n            and isinstance(key, pandas.Index)\n        ):\n            result.index = key\n        return result\n\n    def __setitem__(self, key, item):\n        \"\"\"\n        Assign `item` value to dataset located by `key`.\n\n        Parameters\n        ----------\n        key : callable or tuple\n            The global row index to assign data to.\n        item : modin.pandas.DataFrame, modin.pandas.Series or scalar\n            Value that should be assigned to located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.loc\n        \"\"\"\n        if self.df.empty:\n\n            def _loc(df):\n                df.loc[key] = item\n                return df\n\n            self.df._update_inplace(\n                new_query_compiler=self.df._default_to_pandas(_loc)._query_compiler\n            )\n            self.qc = self.df._query_compiler\n            return\n        row_loc, col_loc, ndims = self._parse_row_and_column_locators(key)\n        append_axis = self._check_missing_loc(row_loc, col_loc)\n        if ndims >= 1 and append_axis is not None:\n            # We enter this codepath if we're either appending a row or a column\n            if append_axis:\n                # Appending at least one new column\n                if is_scalar(col_loc):\n                    col_loc = [col_loc]\n                self._setitem_with_new_columns(row_loc, col_loc, item)\n            else:\n                # Appending at most one new row\n                if is_scalar(row_loc) or len(row_loc) == 1:\n                    index = self.qc.index.insert(len(self.qc.index), row_loc)\n                    self.qc = self.qc.reindex(labels=index, axis=0, fill_value=0)\n                    self.df._update_inplace(new_query_compiler=self.qc)\n                self._set_item_existing_loc(row_loc, col_loc, item)\n        else:\n            self._set_item_existing_loc(row_loc, col_loc, item)\n        self.qc = self.df._query_compiler\n\n    def _setitem_with_new_columns(self, row_loc, col_loc, item):\n        \"\"\"\n        Assign `item` value to dataset located by `row_loc` and `col_loc` with new columns.\n\n        Parameters\n        ----------\n        row_loc : scalar, slice, list, array or tuple\n            Row locator.\n        col_loc : list, array or tuple\n            Columns locator.\n        item : modin.pandas.DataFrame, modin.pandas.Series or scalar\n            Value that should be assigned to located dataset.\n        \"\"\"\n        if is_list_like(item) and not isinstance(item, (DataFrame, Series)):\n            item = np.array(item)\n            if len(item.shape) == 1:\n                if len(col_loc) != 1:\n                    raise ValueError(\n                        \"Must have equal len keys and value when setting with an iterable\"\n                    )\n            else:\n                if item.shape[-1] != len(col_loc):\n                    raise ValueError(\n                        \"Must have equal len keys and value when setting with an iterable\"\n                    )\n        common_label_loc = np.isin(col_loc, self.qc.columns.values)\n        if not all(common_label_loc):\n            # In this case we have some new cols and some old ones\n            columns = self.qc.columns\n            for i in range(len(common_label_loc)):\n                if not common_label_loc[i]:\n                    columns = columns.insert(len(columns), col_loc[i])\n            self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.nan)\n            self.df._update_inplace(new_query_compiler=self.qc)\n        self._set_item_existing_loc(row_loc, np.array(col_loc), item)\n        self.qc = self.df._query_compiler\n\n    def _set_item_existing_loc(self, row_loc, col_loc, item):\n        \"\"\"\n        Assign `item` value to dataset located by `row_loc` and `col_loc` with existing rows and columns.\n\n        Parameters\n        ----------\n        row_loc : scalar, slice, list, array or tuple\n            Row locator.\n        col_loc : scalar, slice, list, array or tuple\n            Columns locator.\n        item : modin.pandas.DataFrame, modin.pandas.Series or scalar\n            Value that should be assigned to located dataset.\n        \"\"\"\n        if (\n            isinstance(row_loc, Series)\n            and is_boolean_array(row_loc)\n            and is_scalar(item)\n        ):\n            new_qc = self.df._query_compiler.setitem_bool(\n                row_loc._query_compiler, col_loc, item\n            )\n            self.df._update_inplace(new_qc)\n            self.qc = self.df._query_compiler\n            return\n\n        row_lookup, col_lookup = self.qc.get_positions_from_labels(row_loc, col_loc)\n        if isinstance(item, np.ndarray) and is_boolean_array(row_loc):\n            # fix for 'test_loc_series'; np.log(Series) returns nd.array instead\n            # of Series as it was before (`Series.__array_wrap__` is removed)\n            # otherwise incompatible shapes are obtained\n            item = item.take(row_lookup)\n        self._setitem_positional(\n            row_lookup,\n            col_lookup,\n            item,\n            axis=self._determine_setitem_axis(\n                row_lookup, col_lookup, is_scalar(row_loc), is_scalar(col_loc)\n            ),\n        )\n\n    def _check_missing_loc(self, row_loc, col_loc):\n        \"\"\"\n        Help `__setitem__` compute whether an axis needs appending.\n\n        Parameters\n        ----------\n        row_loc : scalar, slice, list, array or tuple\n            Row locator.\n        col_loc : scalar, slice, list, array or tuple\n            Columns locator.\n\n        Returns\n        -------\n        int or None :\n            0 if new row, 1 if new column, None if neither.\n        \"\"\"\n        if is_scalar(row_loc):\n            return 0 if row_loc not in self.qc.index else None\n        elif isinstance(row_loc, list):\n            missing_labels = self._compute_enlarge_labels(\n                pandas.Index(row_loc), self.qc.index\n            )\n            if len(missing_labels) > 1:\n                # We cast to list to copy pandas' error:\n                # In pandas, we get: KeyError: [a, b,...] not in index\n                # If we don't convert to list we get: KeyError: [a b ...] not in index\n                raise KeyError(\"{} not in index\".format(list(missing_labels)))\n        if (\n            not (is_list_like(row_loc) or isinstance(row_loc, slice))\n            and row_loc not in self.qc.index\n        ):\n            return 0\n        if (\n            isinstance(col_loc, list)\n            and len(pandas.Index(col_loc).difference(self.qc.columns)) >= 1\n        ):\n            return 1\n        if is_scalar(col_loc) and col_loc not in self.qc.columns:\n            return 1\n        return None\n\n    def _compute_enlarge_labels(self, locator, base_index):\n        \"\"\"\n        Help to _enlarge_axis, compute common labels and extra labels.\n\n        Parameters\n        ----------\n        locator : pandas.Index\n            Index from locator.\n        base_index : pandas.Index\n            Current index.\n\n        Returns\n        -------\n        nan_labels : pandas.Index\n            The labels that need to be added.\n        \"\"\"\n        # base_index_type can be pd.Index or pd.DatetimeIndex\n        # depending on user input and pandas behavior\n        # See issue #2264\n        base_as_index = pandas.Index(list(base_index))\n        locator_as_index = pandas.Index(list(locator))\n\n        if locator_as_index.inferred_type == \"boolean\":\n            if len(locator_as_index) != len(base_as_index):\n                raise ValueError(\n                    f\"Item wrong length {len(locator_as_index)} instead of {len(base_as_index)}!\"\n                )\n            common_labels = base_as_index[locator_as_index]\n            nan_labels = pandas.Index([])\n        else:\n            common_labels = locator_as_index.intersection(base_as_index)\n            nan_labels = locator_as_index.difference(base_as_index)\n\n        if len(common_labels) == 0:\n            raise KeyError(\n                \"None of [{labels}] are in the [{base_index_name}]\".format(\n                    labels=list(locator_as_index), base_index_name=base_as_index\n                )\n            )\n        return nan_labels\n\n\nclass _iLocIndexer(_LocationIndexerBase):\n    \"\"\"\n    An indexer for modin_df.iloc[] functionality.\n\n    Parameters\n    ----------\n    modin_df : Union[DataFrame, Series]\n        DataFrame to operate on.\n    \"\"\"\n\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n\n    def __getitem__(self, key):\n        \"\"\"\n        Retrieve dataset according to `key`.\n\n        Parameters\n        ----------\n        key : callable or tuple\n            The global row numbers to retrieve data from.\n\n        Returns\n        -------\n        DataFrame or Series\n            Located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.iloc\n        \"\"\"\n        if self.df.empty:\n            return self.df._default_to_pandas(lambda df: df.iloc[key])\n        if isinstance(key, tuple):\n            key = self._validate_key_length(key)\n        row_loc, col_loc, ndim = self._parse_row_and_column_locators(key)\n        row_scalar = is_scalar(row_loc)\n        col_scalar = is_scalar(col_loc)\n        self._check_dtypes(row_loc)\n        self._check_dtypes(col_loc)\n\n        if isinstance(row_loc, Series) and is_boolean_array(row_loc):\n            return self._handle_boolean_masking(row_loc, col_loc)\n\n        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)\n        if isinstance(row_lookup, slice):\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=row_lookup != slice(None),\n                extra_log=f\"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}\",\n            )\n            row_lookup = None\n        if isinstance(col_lookup, slice):\n            ErrorMessage.catch_bugs_and_request_email(\n                failure_condition=col_lookup != slice(None),\n                extra_log=f\"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}\",\n            )\n            col_lookup = None\n        qc_view = self.qc.take_2d_positional(row_lookup, col_lookup)\n        result = self._get_pandas_object_from_qc_view(\n            qc_view,\n            row_multiindex_full_lookup=False,\n            col_multiindex_full_lookup=False,\n            row_scalar=row_scalar,\n            col_scalar=col_scalar,\n            ndim=ndim,\n        )\n\n        if isinstance(result, Series):\n            result._parent = self.df\n            result._parent_axis = 0\n        return result\n\n    def __setitem__(self, key, item):\n        \"\"\"\n        Assign `item` value to dataset located by `key`.\n\n        Parameters\n        ----------\n        key : callable or tuple\n            The global row numbers to assign data to.\n        item : modin.pandas.DataFrame, modin.pandas.Series or scalar\n            Value that should be assigned to located dataset.\n\n        See Also\n        --------\n        pandas.DataFrame.iloc\n        \"\"\"\n        if self.df.empty:\n\n            def _iloc(df):\n                df.iloc[key] = item\n                return df\n\n            self.df._update_inplace(\n                new_query_compiler=self.df._default_to_pandas(_iloc)._query_compiler\n            )\n            self.qc = self.df._query_compiler\n            return\n        row_loc, col_loc, _ = self._parse_row_and_column_locators(key)\n        row_scalar = is_scalar(row_loc)\n        col_scalar = is_scalar(col_loc)\n        self._check_dtypes(row_loc)\n        self._check_dtypes(col_loc)\n\n        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)\n        self._setitem_positional(\n            row_lookup,\n            col_lookup,\n            item,\n            axis=self._determine_setitem_axis(\n                row_lookup, col_lookup, row_scalar, col_scalar\n            ),\n        )\n\n    def _compute_lookup(self, row_loc, col_loc):\n        \"\"\"\n        Compute index and column labels from index and column integer locators.\n\n        Parameters\n        ----------\n        row_loc : slice, list, array or tuple\n            Row locator.\n        col_loc : slice, list, array or tuple\n            Columns locator.\n\n        Returns\n        -------\n        row_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise\n            List of index labels.\n        col_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise\n            List of columns labels.\n\n        Notes\n        -----\n        Usage of `slice(None)` as a resulting lookup is a hack to pass information about\n        full-axis grab without computing actual indices that triggers lazy computations.\n        Ideally, this API should get rid of using slices as indexers and either use a\n        common ``Indexer`` object or range and ``np.ndarray`` only.\n        \"\"\"\n        lookups = []\n        for axis, axis_loc in enumerate((row_loc, col_loc)):\n            if is_scalar(axis_loc):\n                axis_loc = np.array([axis_loc])\n            if isinstance(axis_loc, slice):\n                axis_lookup = (\n                    axis_loc\n                    if axis_loc == slice(None)\n                    else pandas.RangeIndex(\n                        *axis_loc.indices(len(self.qc.get_axis(axis)))\n                    )\n                )\n            elif is_range_like(axis_loc):\n                axis_lookup = pandas.RangeIndex(\n                    axis_loc.start, axis_loc.stop, axis_loc.step\n                )\n            elif is_boolean_array(axis_loc):\n                axis_lookup = boolean_mask_to_numeric(axis_loc)\n            else:\n                if isinstance(axis_loc, pandas.Index):\n                    axis_loc = axis_loc.values\n                elif is_list_like(axis_loc) and not isinstance(axis_loc, np.ndarray):\n                    # `Index.__getitem__` works much faster with numpy arrays than with python lists,\n                    # so although we lose some time here on converting to numpy, `Index.__getitem__`\n                    # speedup covers the loss that we gain here.\n                    axis_loc = np.array(axis_loc, dtype=np.int64)\n                # Relatively fast check allows us to not trigger `self.qc.get_axis()` computation\n                # if there're no negative indices and so they don't not depend on the axis length.\n                if isinstance(axis_loc, np.ndarray) and not (axis_loc < 0).any():\n                    axis_lookup = axis_loc\n                else:\n                    axis_lookup = pandas.RangeIndex(len(self.qc.get_axis(axis)))[\n                        axis_loc\n                    ]\n\n            if isinstance(axis_lookup, pandas.Index) and not is_range_like(axis_lookup):\n                axis_lookup = axis_lookup.values\n            lookups.append(axis_lookup)\n        return lookups\n\n    def _check_dtypes(self, locator):\n        \"\"\"\n        Check that `locator` is an integer scalar, integer slice, integer list or array of booleans.\n\n        Parameters\n        ----------\n        locator : scalar, list, slice or array\n            Object to check.\n\n        Raises\n        ------\n        ValueError\n            If check fails.\n        \"\"\"\n        is_int = is_integer(locator)\n        is_int_slice = is_integer_slice(locator)\n        is_int_arr = is_integer_array(locator)\n        is_bool_arr = is_boolean_array(locator)\n\n        if not any([is_int, is_int_slice, is_int_arr, is_bool_arr]):\n            raise ValueError(_ILOC_INT_ONLY_ERROR)\n"
  },
  {
    "path": "modin/pandas/io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nImplement I/O public API as pandas does.\n\nAlmost all docstrings for public and magic methods should be inherited from pandas\nfor better maintability.\nManually add documentation for methods which are not presented in pandas.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport csv\nimport inspect\nimport pathlib\nimport pickle\nfrom typing import (\n    IO,\n    TYPE_CHECKING,\n    Any,\n    AnyStr,\n    Callable,\n    Dict,\n    Hashable,\n    Iterable,\n    Iterator,\n    List,\n    Literal,\n    Optional,\n    Pattern,\n    Sequence,\n    Union,\n)\n\nimport numpy as np\nimport pandas\nfrom pandas._libs.lib import NoDefault, no_default\nfrom pandas._typing import (\n    CompressionOptions,\n    ConvertersArg,\n    CSVEngine,\n    DtypeArg,\n    DtypeBackend,\n    FilePath,\n    IndexLabel,\n    IntStrT,\n    ParseDatesArg,\n    ReadBuffer,\n    ReadCsvBuffer,\n    StorageOptions,\n    XMLParsers,\n)\nfrom pandas.io.parsers import TextFileReader\nfrom pandas.io.parsers.readers import _c_parser_defaults\n\nfrom modin.config import ModinNumpy\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (\n    ProtocolDataframe,\n)\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    wrap_free_function_in_argument_caster,\n)\nfrom modin.logging import ClassLogger, enable_logging\nfrom modin.utils import (\n    SupportsPrivateToNumPy,\n    SupportsPublicToNumPy,\n    SupportsPublicToPandas,\n    _inherit_docstrings,\n    _maybe_warn_on_default,\n    classproperty,\n    expanduser_path_arg,\n)\n\n# below logic is to handle circular imports without errors\nif TYPE_CHECKING:\n    from .dataframe import DataFrame\n    from .series import Series\n\n\nclass ModinObjects:\n    \"\"\"Lazily import Modin classes and provide an access to them.\"\"\"\n\n    _dataframe = None\n\n    @classproperty\n    def DataFrame(cls):\n        \"\"\"Get ``modin.pandas.DataFrame`` class.\"\"\"\n        if cls._dataframe is None:\n            from .dataframe import DataFrame\n\n            cls._dataframe = DataFrame\n        return cls._dataframe\n\n\ndef _read(**kwargs):\n    \"\"\"\n    Read csv file from local disk.\n\n    Parameters\n    ----------\n    **kwargs : dict\n        Keyword arguments in pandas.read_csv.\n\n    Returns\n    -------\n    modin.pandas.DataFrame\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    squeeze = kwargs.pop(\"squeeze\", False)\n    pd_obj = FactoryDispatcher.read_csv(**kwargs)\n    # This happens when `read_csv` returns a TextFileReader object for iterating through\n    if isinstance(pd_obj, TextFileReader):\n        reader = pd_obj.read\n        pd_obj.read = lambda *args, **kwargs: ModinObjects.DataFrame(\n            query_compiler=reader(*args, **kwargs)\n        )\n        return pd_obj\n    result = ModinObjects.DataFrame(query_compiler=pd_obj)\n    if squeeze:\n        return result.squeeze(axis=1)\n    return result\n\n\n@_inherit_docstrings(pandas.read_xml, apilink=\"pandas.read_xml\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_xml\")\n@expanduser_path_arg(\"path_or_buffer\")\ndef read_xml(\n    path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],\n    *,\n    xpath: str = \"./*\",\n    namespaces: dict[str, str] | None = None,\n    elems_only: bool = False,\n    attrs_only: bool = False,\n    names: Sequence[str] | None = None,\n    dtype: DtypeArg | None = None,\n    converters: ConvertersArg | None = None,\n    parse_dates: ParseDatesArg | None = None,\n    encoding: str | None = \"utf-8\",\n    parser: XMLParsers = \"lxml\",\n    stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None,\n    iterparse: dict[str, list[str]] | None = None,\n    compression: CompressionOptions = \"infer\",\n    storage_options: StorageOptions = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> DataFrame:\n    _maybe_warn_on_default(\"read_xml\")\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n    return ModinObjects.DataFrame(pandas.read_xml(**kwargs))\n\n\n@_inherit_docstrings(pandas.read_csv, apilink=\"pandas.read_csv\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_csv\")\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_csv(\n    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],\n    *,\n    sep: str | None | NoDefault = no_default,\n    delimiter: str | None | NoDefault = None,\n    # Column and Index Locations and Names\n    header: int | Sequence[int] | None | Literal[\"infer\"] = \"infer\",\n    names: Sequence[Hashable] | None | NoDefault = no_default,\n    index_col: IndexLabel | Literal[False] | None = None,\n    usecols=None,\n    # General Parsing Configuration\n    dtype: DtypeArg | None = None,\n    engine: CSVEngine | None = None,\n    converters=None,\n    true_values=None,\n    false_values=None,\n    skipinitialspace: bool = False,\n    skiprows=None,\n    skipfooter: int = 0,\n    nrows: int | None = None,\n    # NA and Missing Data Handling\n    na_values=None,\n    keep_default_na: bool = True,\n    na_filter: bool = True,\n    verbose: bool = no_default,\n    skip_blank_lines: bool = True,\n    # Datetime Handling\n    parse_dates=None,\n    infer_datetime_format: bool = no_default,\n    keep_date_col: bool = no_default,\n    date_parser=no_default,\n    date_format=None,\n    dayfirst: bool = False,\n    cache_dates: bool = True,\n    # Iteration\n    iterator: bool = False,\n    chunksize: int | None = None,\n    # Quoting, Compression, and File Format\n    compression: CompressionOptions = \"infer\",\n    thousands: str | None = None,\n    decimal: str = \".\",\n    lineterminator: str | None = None,\n    quotechar: str = '\"',\n    quoting: int = csv.QUOTE_MINIMAL,\n    doublequote: bool = True,\n    escapechar: str | None = None,\n    comment: str | None = None,\n    encoding: str | None = None,\n    encoding_errors: str | None = \"strict\",\n    dialect: str | csv.Dialect | None = None,\n    # Error Handling\n    on_bad_lines=\"error\",\n    # Internal\n    delim_whitespace: bool = no_default,\n    low_memory=_c_parser_defaults[\"low_memory\"],\n    memory_map: bool = False,\n    float_precision: Literal[\"high\", \"legacy\"] | None = None,\n    storage_options: StorageOptions = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> DataFrame | TextFileReader:\n    # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args\n    _pd_read_csv_signature = {\n        val.name for val in inspect.signature(pandas.read_csv).parameters.values()\n    }\n    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())\n    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}\n    return _read(**kwargs)\n\n\n@_inherit_docstrings(pandas.read_table, apilink=\"pandas.read_table\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_table\")\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_table(\n    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],\n    *,\n    sep: str | None | NoDefault = no_default,\n    delimiter: str | None | NoDefault = None,\n    # Column and Index Locations and Names\n    header: int | Sequence[int] | None | Literal[\"infer\"] = \"infer\",\n    names: Sequence[Hashable] | None | NoDefault = no_default,\n    index_col: IndexLabel | Literal[False] | None = None,\n    usecols=None,\n    # General Parsing Configuration\n    dtype: DtypeArg | None = None,\n    engine: CSVEngine | None = None,\n    converters=None,\n    true_values=None,\n    false_values=None,\n    skipinitialspace: bool = False,\n    skiprows=None,\n    skipfooter: int = 0,\n    nrows: int | None = None,\n    # NA and Missing Data Handling\n    na_values=None,\n    keep_default_na: bool = True,\n    na_filter: bool = True,\n    verbose: bool = no_default,\n    skip_blank_lines: bool = True,\n    # Datetime Handling\n    parse_dates=False,\n    infer_datetime_format: bool = no_default,\n    keep_date_col: bool = no_default,\n    date_parser=no_default,\n    date_format: str = None,\n    dayfirst: bool = False,\n    cache_dates: bool = True,\n    # Iteration\n    iterator: bool = False,\n    chunksize: int | None = None,\n    # Quoting, Compression, and File Format\n    compression: CompressionOptions = \"infer\",\n    thousands: str | None = None,\n    decimal: str = \".\",\n    lineterminator: str | None = None,\n    quotechar: str = '\"',\n    quoting: int = csv.QUOTE_MINIMAL,\n    doublequote: bool = True,\n    escapechar: str | None = None,\n    comment: str | None = None,\n    encoding: str | None = None,\n    encoding_errors: str | None = \"strict\",\n    dialect: str | csv.Dialect | None = None,\n    # Error Handling\n    on_bad_lines=\"error\",\n    # Internal\n    delim_whitespace: bool = no_default,\n    low_memory=_c_parser_defaults[\"low_memory\"],\n    memory_map: bool = False,\n    float_precision: str | None = None,\n    storage_options: StorageOptions = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> DataFrame | TextFileReader:\n    # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args\n    _pd_read_table_signature = {\n        val.name for val in inspect.signature(pandas.read_table).parameters.values()\n    }\n    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())\n    if f_locals.get(\"sep\", sep) is False or f_locals.get(\"sep\", sep) is no_default:\n        f_locals[\"sep\"] = \"\\t\"\n    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_table_signature}\n    return _read(**kwargs)\n\n\n@_inherit_docstrings(pandas.read_parquet, apilink=\"pandas.read_parquet\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_parquet\")\n@expanduser_path_arg(\"path\")\ndef read_parquet(\n    path,\n    engine: str = \"auto\",\n    columns: list[str] | None = None,\n    storage_options: StorageOptions = None,\n    use_nullable_dtypes: bool = no_default,\n    dtype_backend=no_default,\n    filesystem=None,\n    filters=None,\n    **kwargs,\n) -> DataFrame:\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if engine == \"fastparquet\" and dtype_backend is not no_default:\n        raise ValueError(\n            \"The 'dtype_backend' argument is not supported for the fastparquet engine\"\n        )\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_parquet(\n            path=path,\n            engine=engine,\n            columns=columns,\n            storage_options=storage_options,\n            use_nullable_dtypes=use_nullable_dtypes,\n            dtype_backend=dtype_backend,\n            filesystem=filesystem,\n            filters=filters,\n            **kwargs,\n        )\n    )\n\n\n@_inherit_docstrings(pandas.read_json, apilink=\"pandas.read_json\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_json\")\n@expanduser_path_arg(\"path_or_buf\")\ndef read_json(\n    path_or_buf,\n    *,\n    orient: str | None = None,\n    typ: Literal[\"frame\", \"series\"] = \"frame\",\n    dtype: DtypeArg | None = None,\n    convert_axes=None,\n    convert_dates: bool | list[str] = True,\n    keep_default_dates: bool = True,\n    precise_float: bool = False,\n    date_unit: str | None = None,\n    encoding: str | None = None,\n    encoding_errors: str | None = \"strict\",\n    lines: bool = False,\n    chunksize: int | None = None,\n    compression: CompressionOptions = \"infer\",\n    nrows: int | None = None,\n    storage_options: StorageOptions = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n    engine=\"ujson\",\n) -> DataFrame | Series | pandas.io.json._json.JsonReader:\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_json(**kwargs))\n\n\n@_inherit_docstrings(pandas.read_gbq, apilink=\"pandas.read_gbq\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_gbq\")\ndef read_gbq(\n    query: str,\n    project_id: str | None = None,\n    index_col: str | None = None,\n    col_order: list[str] | None = None,\n    reauth: bool = False,\n    auth_local_webserver: bool = True,\n    dialect: str | None = None,\n    location: str | None = None,\n    configuration: dict[str, Any] | None = None,\n    credentials=None,\n    use_bqstorage_api: bool | None = None,\n    max_results: int | None = None,\n    progress_bar_type: str | None = None,\n) -> DataFrame:\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n    kwargs.update(kwargs.pop(\"kwargs\", {}))\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_gbq(**kwargs))\n\n\n@_inherit_docstrings(pandas.read_html, apilink=\"pandas.read_html\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_html\")\n@expanduser_path_arg(\"io\")\ndef read_html(\n    io,\n    *,\n    match: str | Pattern = \".+\",\n    flavor: str | None = None,\n    header: int | Sequence[int] | None = None,\n    index_col: int | Sequence[int] | None = None,\n    skiprows: int | Sequence[int] | slice | None = None,\n    attrs: dict[str, str] | None = None,\n    parse_dates: bool = False,\n    thousands: str | None = \",\",\n    encoding: str | None = None,\n    decimal: str = \".\",\n    converters: dict | None = None,\n    na_values: Iterable[object] | None = None,\n    keep_default_na: bool = True,\n    displayed_only: bool = True,\n    extract_links: Literal[None, \"header\", \"footer\", \"body\", \"all\"] = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n    storage_options: StorageOptions = None,\n) -> list[DataFrame]:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Read HTML tables into a ``DataFrame`` object.\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    qcs = FactoryDispatcher.read_html(**kwargs)\n    return [ModinObjects.DataFrame(query_compiler=qc) for qc in qcs]\n\n\n@_inherit_docstrings(pandas.read_clipboard, apilink=\"pandas.read_clipboard\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_clipboard\")\ndef read_clipboard(\n    sep=r\"\\s+\",\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n    **kwargs,\n) -> DataFrame:  # pragma: no cover  # noqa: PR01, RT01, D200\n    \"\"\"\n    Read text from clipboard and pass to read_csv.\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n    kwargs.update(kwargs.pop(\"kwargs\", {}))\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_clipboard(**kwargs)\n    )\n\n\n@_inherit_docstrings(pandas.read_excel, apilink=\"pandas.read_excel\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_excel\")\n@expanduser_path_arg(\"io\")\ndef read_excel(\n    io,\n    sheet_name: str | int | list[IntStrT] | None = 0,\n    *,\n    header: int | Sequence[int] | None = 0,\n    names: list[str] | None = None,\n    index_col: int | Sequence[int] | None = None,\n    usecols: (\n        int | str | Sequence[int] | Sequence[str] | Callable[[str], bool] | None\n    ) = None,\n    dtype: DtypeArg | None = None,\n    engine: Literal[(\"xlrd\", \"openpyxl\", \"odf\", \"pyxlsb\")] | None = None,\n    converters: dict[str, Callable] | dict[int, Callable] | None = None,\n    true_values: Iterable[Hashable] | None = None,\n    false_values: Iterable[Hashable] | None = None,\n    skiprows: Sequence[int] | int | Callable[[int], object] | None = None,\n    nrows: int | None = None,\n    na_values=None,\n    keep_default_na: bool = True,\n    na_filter: bool = True,\n    verbose: bool = False,\n    parse_dates: list | dict | bool = False,\n    date_parser: Union[Callable, NoDefault] = no_default,\n    date_format=None,\n    thousands: str | None = None,\n    decimal: str = \".\",\n    comment: str | None = None,\n    skipfooter: int = 0,\n    storage_options: StorageOptions = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n    engine_kwargs: Optional[dict] = None,\n) -> DataFrame | dict[IntStrT, DataFrame]:\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    intermediate = FactoryDispatcher.read_excel(**kwargs)\n    if isinstance(intermediate, dict):\n        parsed = type(intermediate)()\n        for key in intermediate.keys():\n            parsed[key] = ModinObjects.DataFrame(query_compiler=intermediate.get(key))\n        return parsed\n    else:\n        return ModinObjects.DataFrame(query_compiler=intermediate)\n\n\n@_inherit_docstrings(pandas.read_hdf, apilink=\"pandas.read_hdf\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_hdf\")\n@expanduser_path_arg(\"path_or_buf\")\ndef read_hdf(\n    path_or_buf,\n    key=None,\n    mode: str = \"r\",\n    errors: str = \"strict\",\n    where=None,\n    start: Optional[int] = None,\n    stop: Optional[int] = None,\n    columns=None,\n    iterator=False,\n    chunksize: Optional[int] = None,\n    **kwargs,\n):  # noqa: PR01, RT01, D200\n    \"\"\"\n    Read data from the store into DataFrame.\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n    kwargs.update(kwargs.pop(\"kwargs\", {}))\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_hdf(**kwargs))\n\n\n@_inherit_docstrings(pandas.read_feather, apilink=\"pandas.read_feather\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_feather\")\n@expanduser_path_arg(\"path\")\ndef read_feather(\n    path,\n    columns: Sequence[Hashable] | None = None,\n    use_threads: bool = True,\n    storage_options: StorageOptions = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> DataFrame:\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_feather(**kwargs)\n    )\n\n\n@_inherit_docstrings(pandas.read_stata)\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_stata\")\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_stata(\n    filepath_or_buffer,\n    *,\n    convert_dates: bool = True,\n    convert_categoricals: bool = True,\n    index_col: str | None = None,\n    convert_missing: bool = False,\n    preserve_dtypes: bool = True,\n    columns: Sequence[str] | None = None,\n    order_categoricals: bool = True,\n    chunksize: int | None = None,\n    iterator: bool = False,\n    compression: CompressionOptions = \"infer\",\n    storage_options: StorageOptions = None,\n) -> DataFrame | pandas.io.stata.StataReader:\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_stata(**kwargs))\n\n\n@_inherit_docstrings(pandas.read_sas, apilink=\"pandas.read_sas\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_sas\")\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_sas(\n    filepath_or_buffer,\n    *,\n    format: str | None = None,\n    index: Hashable | None = None,\n    encoding: str | None = None,\n    chunksize: int | None = None,\n    iterator: bool = False,\n    compression: CompressionOptions = \"infer\",\n) -> DataFrame | pandas.io.sas.sasreader.ReaderBase:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Read SAS files stored as either XPORT or SAS7BDAT format files.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_sas(\n            filepath_or_buffer=filepath_or_buffer,\n            format=format,\n            index=index,\n            encoding=encoding,\n            chunksize=chunksize,\n            iterator=iterator,\n            compression=compression,\n        )\n    )\n\n\n@_inherit_docstrings(pandas.read_pickle, apilink=\"pandas.read_pickle\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_pickle\")\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_pickle(\n    filepath_or_buffer,\n    compression: CompressionOptions = \"infer\",\n    storage_options: StorageOptions = None,\n) -> DataFrame | Series:\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_pickle(**kwargs)\n    )\n\n\n@_inherit_docstrings(pandas.read_sql, apilink=\"pandas.read_sql\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_sql\")\ndef read_sql(\n    sql,\n    con,\n    index_col=None,\n    coerce_float=True,\n    params=None,\n    parse_dates=None,\n    columns=None,\n    chunksize=None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n    dtype=None,\n) -> DataFrame | Iterator[DataFrame]:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Read SQL query or database table into a DataFrame.\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if kwargs.get(\"chunksize\") is not None:\n        _maybe_warn_on_default(\"Parameters provided [chunksize]\")\n        df_gen = pandas.read_sql(**kwargs)\n        return (\n            ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))\n            for df in df_gen\n        )\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_sql(**kwargs))\n\n\n@_inherit_docstrings(pandas.read_fwf, apilink=\"pandas.read_fwf\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_fwf\")\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef read_fwf(\n    filepath_or_buffer: Union[str, pathlib.Path, IO[AnyStr]],\n    *,\n    colspecs=\"infer\",\n    widths=None,\n    infer_nrows=100,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n    iterator: bool = False,\n    chunksize: Optional[int] = None,\n    **kwds,\n) -> DataFrame | TextFileReader:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Read a table of fixed-width formatted lines into DataFrame.\n    \"\"\"\n    from pandas.io.parsers.base_parser import parser_defaults\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n    kwargs.update(kwargs.pop(\"kwds\", {}))\n    target_kwargs = parser_defaults.copy()\n    target_kwargs.update(kwargs)\n    pd_obj = FactoryDispatcher.read_fwf(**target_kwargs)\n    # When `read_fwf` returns a TextFileReader object for iterating through\n    if isinstance(pd_obj, TextFileReader):\n        reader = pd_obj.read\n        pd_obj.read = lambda *args, **kwargs: ModinObjects.DataFrame(\n            query_compiler=reader(*args, **kwargs)\n        )\n        return pd_obj\n    return ModinObjects.DataFrame(query_compiler=pd_obj)\n\n\n@_inherit_docstrings(pandas.read_sql_table, apilink=\"pandas.read_sql_table\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_sql_table\")\ndef read_sql_table(\n    table_name,\n    con,\n    schema=None,\n    index_col=None,\n    coerce_float=True,\n    parse_dates=None,\n    columns=None,\n    chunksize=None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> DataFrame | Iterator[DataFrame]:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Read SQL database table into a DataFrame.\n    \"\"\"\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_sql_table(**kwargs)\n    )\n\n\n@_inherit_docstrings(pandas.read_sql_query, apilink=\"pandas.read_sql_query\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_sql_query\")\ndef read_sql_query(\n    sql,\n    con,\n    index_col: str | list[str] | None = None,\n    coerce_float: bool = True,\n    params: list[str] | dict[str, str] | None = None,\n    parse_dates: list[str] | dict[str, str] | None = None,\n    chunksize: int | None = None,\n    dtype: DtypeArg | None = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> DataFrame | Iterator[DataFrame]:\n    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())\n\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_sql_query(**kwargs)\n    )\n\n\n@_inherit_docstrings(pandas.to_pickle)\n@enable_logging\n@wrap_free_function_in_argument_caster(\"to_pickle\")\n@expanduser_path_arg(\"filepath_or_buffer\")\ndef to_pickle(\n    obj: Any,\n    filepath_or_buffer,\n    compression: CompressionOptions = \"infer\",\n    protocol: int = pickle.HIGHEST_PROTOCOL,\n    storage_options: StorageOptions = None,\n) -> None:\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    if isinstance(obj, ModinObjects.DataFrame):\n        obj = obj._query_compiler\n    return FactoryDispatcher.to_pickle(\n        obj,\n        filepath_or_buffer=filepath_or_buffer,\n        compression=compression,\n        protocol=protocol,\n        storage_options=storage_options,\n    )\n\n\n@_inherit_docstrings(pandas.read_spss, apilink=\"pandas.read_spss\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_spss\")\n@expanduser_path_arg(\"path\")\ndef read_spss(\n    path: Union[str, pathlib.Path],\n    usecols: Optional[Sequence[str]] = None,\n    convert_categoricals: bool = True,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Load an SPSS file from the file path, returning a DataFrame.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.read_spss(\n            path=path,\n            usecols=usecols,\n            convert_categoricals=convert_categoricals,\n            dtype_backend=dtype_backend,\n        )\n    )\n\n\n@_inherit_docstrings(pandas.json_normalize, apilink=\"pandas.json_normalize\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"json_normalize\")\ndef json_normalize(\n    data: Union[Dict, List[Dict]],\n    record_path: Optional[Union[str, List]] = None,\n    meta: Optional[Union[str, List[Union[str, List[str]]]]] = None,\n    meta_prefix: Optional[str] = None,\n    record_prefix: Optional[str] = None,\n    errors: Optional[str] = \"raise\",\n    sep: str = \".\",\n    max_level: Optional[int] = None,\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Normalize semi-structured JSON data into a flat table.\n    \"\"\"\n    _maybe_warn_on_default(\"json_normalize\")\n    return ModinObjects.DataFrame(\n        pandas.json_normalize(\n            data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level\n        )\n    )\n\n\n@_inherit_docstrings(pandas.read_orc, apilink=\"pandas.read_orc\")\n@enable_logging\n@wrap_free_function_in_argument_caster(\"read_orc\")\n@expanduser_path_arg(\"path\")\ndef read_orc(\n    path,\n    columns: Optional[List[str]] = None,\n    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,\n    filesystem=None,\n    **kwargs,\n) -> DataFrame:  # noqa: PR01, RT01, D200\n    \"\"\"\n    Load an ORC object from the file path, returning a DataFrame.\n    \"\"\"\n    _maybe_warn_on_default(\"read_orc\")\n    return ModinObjects.DataFrame(\n        pandas.read_orc(\n            path,\n            columns=columns,\n            dtype_backend=dtype_backend,\n            filesystem=filesystem,\n            **kwargs,\n        )\n    )\n\n\n@_inherit_docstrings(pandas.HDFStore)\nclass HDFStore(ClassLogger, pandas.HDFStore):  # noqa: PR01, D200\n    \"\"\"\n    Dict-like IO interface for storing pandas objects in PyTables.\n    \"\"\"\n\n    _return_modin_dataframe = True\n\n    def __getattribute__(self, item):\n        default_behaviors = [\"__init__\", \"__class__\"]\n        method = super(HDFStore, self).__getattribute__(item)\n        if item not in default_behaviors:\n            if callable(method):\n\n                def return_handler(*args, **kwargs):\n                    \"\"\"\n                    Replace the default behavior of methods with inplace kwarg.\n\n                    Returns\n                    -------\n                    A Modin DataFrame in place of a pandas DataFrame, or the same\n                    return type as pandas.HDFStore.\n\n                    Notes\n                    -----\n                    This function will replace all of the arguments passed to\n                    methods of HDFStore with the pandas equivalent. It will convert\n                    Modin DataFrame to pandas DataFrame, etc. Currently, pytables\n                    does not accept Modin DataFrame objects, so we must convert to\n                    pandas.\n                    \"\"\"\n                    # We don't want to constantly be giving this error message for\n                    # internal methods.\n                    if item[0] != \"_\":\n                        _maybe_warn_on_default(\"`{}`\".format(item))\n                    args = [\n                        (\n                            to_pandas(arg)\n                            if isinstance(arg, ModinObjects.DataFrame)\n                            else arg\n                        )\n                        for arg in args\n                    ]\n                    kwargs = {\n                        k: to_pandas(v) if isinstance(v, ModinObjects.DataFrame) else v\n                        for k, v in kwargs.items()\n                    }\n                    obj = super(HDFStore, self).__getattribute__(item)(*args, **kwargs)\n                    if self._return_modin_dataframe and isinstance(\n                        obj, pandas.DataFrame\n                    ):\n                        return ModinObjects.DataFrame(obj)\n                    return obj\n\n                # We replace the method with `return_handler` for inplace operations\n                method = return_handler\n        return method\n\n\n@_inherit_docstrings(pandas.ExcelFile)\nclass ExcelFile(ClassLogger, pandas.ExcelFile):  # noqa: PR01, D200\n    \"\"\"\n    Class for parsing tabular excel sheets into DataFrame objects.\n    \"\"\"\n\n    _behave_like_pandas = False\n\n    def _set_pandas_mode(self):  # noqa\n        # disable Modin behavior to be able to pass object to `pandas.read_excel`\n        # otherwise, Modin objects may be passed to the pandas context, resulting\n        # in undefined behavior\n        self._behave_like_pandas = True\n\n    def __getattribute__(self, item):\n        if item in [\"_set_pandas_mode\", \"_behave_like_pandas\"]:\n            return object.__getattribute__(self, item)\n\n        default_behaviors = [\"__init__\", \"__class__\"]\n        method = super(ExcelFile, self).__getattribute__(item)\n        if not self._behave_like_pandas and item not in default_behaviors:\n            if callable(method):\n\n                def return_handler(*args, **kwargs):\n                    \"\"\"\n                    Replace the default behavior of methods with inplace kwarg.\n\n                    Returns\n                    -------\n                    A Modin DataFrame in place of a pandas DataFrame, or the same\n                    return type as pandas.ExcelFile.\n\n                    Notes\n                    -----\n                    This function will replace all of the arguments passed to\n                    methods of ExcelFile with the pandas equivalent. It will convert\n                    Modin DataFrame to pandas DataFrame, etc.\n                    \"\"\"\n                    # We don't want to constantly be giving this error message for\n                    # internal methods.\n                    if item[0] != \"_\":\n                        _maybe_warn_on_default(\"`{}`\".format(item))\n                    args = [\n                        (\n                            to_pandas(arg)\n                            if isinstance(arg, ModinObjects.DataFrame)\n                            else arg\n                        )\n                        for arg in args\n                    ]\n                    kwargs = {\n                        k: to_pandas(v) if isinstance(v, ModinObjects.DataFrame) else v\n                        for k, v in kwargs.items()\n                    }\n                    obj = super(ExcelFile, self).__getattribute__(item)(*args, **kwargs)\n                    if isinstance(obj, pandas.DataFrame):\n                        return ModinObjects.DataFrame(obj)\n                    return obj\n\n                # We replace the method with `return_handler` for inplace operations\n                method = return_handler\n        return method\n\n\n@wrap_free_function_in_argument_caster(\"from_non_pandas\")\ndef from_non_pandas(df, index, columns, dtype) -> DataFrame | None:\n    \"\"\"\n    Convert a non-pandas DataFrame into Modin DataFrame.\n\n    Parameters\n    ----------\n    df : object\n        Non-pandas DataFrame.\n    index : object\n        Index for non-pandas DataFrame.\n    columns : object\n        Columns for non-pandas DataFrame.\n    dtype : type\n        Data type to force.\n\n    Returns\n    -------\n    modin.pandas.DataFrame\n        Converted DataFrame.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    new_qc = FactoryDispatcher.from_non_pandas(df, index, columns, dtype)\n    if new_qc is not None:\n        return ModinObjects.DataFrame(query_compiler=new_qc)\n    return new_qc\n\n\n@wrap_free_function_in_argument_caster(\"from_pandas\")\ndef from_pandas(df) -> DataFrame:\n    \"\"\"\n    Convert a pandas DataFrame to a Modin DataFrame.\n\n    Parameters\n    ----------\n    df : pandas.DataFrame\n        The pandas DataFrame to convert.\n\n    Returns\n    -------\n    modin.pandas.DataFrame\n        A new Modin DataFrame object.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))\n\n\n@wrap_free_function_in_argument_caster(\"from_arrow\")\ndef from_arrow(at) -> DataFrame:\n    \"\"\"\n    Convert an Arrow Table to a Modin DataFrame.\n\n    Parameters\n    ----------\n    at : Arrow Table\n        The Arrow Table to convert from.\n\n    Returns\n    -------\n    DataFrame\n        A new Modin DataFrame object.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_arrow(at))\n\n\n@wrap_free_function_in_argument_caster(\"from_dataframe\")\ndef from_dataframe(df: ProtocolDataframe) -> DataFrame:\n    \"\"\"\n    Convert a DataFrame implementing the dataframe interchange protocol to a Modin DataFrame.\n\n    See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.\n\n    Parameters\n    ----------\n    df : ProtocolDataframe\n        An object supporting the dataframe interchange protocol.\n\n    Returns\n    -------\n    DataFrame\n        A new Modin DataFrame object.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.from_interchange_dataframe(df)\n    )\n\n\n@wrap_free_function_in_argument_caster(\"from_ray\")\ndef from_ray(ray_obj) -> DataFrame:\n    \"\"\"\n    Convert a Ray Dataset into Modin DataFrame.\n\n    Parameters\n    ----------\n    ray_obj : ray.data.Dataset\n        The Ray Dataset to convert from.\n\n    Returns\n    -------\n    DataFrame\n        A new Modin DataFrame object.\n\n    Notes\n    -----\n    Ray Dataset can only be converted to Modin DataFrame if Modin uses a Ray engine.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_ray(ray_obj))\n\n\n@wrap_free_function_in_argument_caster(\"from_dask\")\ndef from_dask(dask_obj) -> DataFrame:\n    \"\"\"\n    Convert a Dask DataFrame to a Modin DataFrame.\n\n    Parameters\n    ----------\n    dask_obj : dask.dataframe.DataFrame\n        The Dask DataFrame to convert from.\n\n    Returns\n    -------\n    DataFrame\n        A new Modin DataFrame object.\n\n    Notes\n    -----\n    Dask DataFrame can only be converted to Modin DataFrame if Modin uses a Dask engine.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_dask(dask_obj))\n\n\n@wrap_free_function_in_argument_caster(\"from_map\")\ndef from_map(func, iterable, *args, **kwargs) -> DataFrame:\n    \"\"\"\n    Create a Modin DataFrame from map function applied to an iterable object.\n\n    This method will construct a Modin DataFrame split by row partitions.\n    The number of row partitions matches the number of elements in the iterable object.\n\n    Parameters\n    ----------\n    func : callable\n        Function to map across the iterable object.\n    iterable : Iterable\n        An iterable object.\n    *args : tuple\n        Positional arguments to pass in `func`.\n    **kwargs : dict\n        Keyword arguments to pass in `func`.\n\n    Returns\n    -------\n    DataFrame\n        A new Modin DataFrame object.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return ModinObjects.DataFrame(\n        query_compiler=FactoryDispatcher.from_map(func, iterable, *args, *kwargs)\n    )\n\n\n@wrap_free_function_in_argument_caster(\"to_pandas\")\ndef to_pandas(modin_obj: SupportsPublicToPandas) -> DataFrame | Series:\n    \"\"\"\n    Convert a Modin DataFrame/Series to a pandas DataFrame/Series.\n\n    Parameters\n    ----------\n    modin_obj : modin.DataFrame, modin.Series\n        The Modin DataFrame/Series to convert.\n\n    Returns\n    -------\n    pandas.DataFrame or pandas.Series\n        Converted object with type depending on input.\n    \"\"\"\n    return modin_obj._to_pandas()\n\n\n@wrap_free_function_in_argument_caster(\"to_numpy\")\ndef to_numpy(\n    modin_obj: Union[SupportsPrivateToNumPy, SupportsPublicToNumPy],\n) -> np.ndarray:\n    \"\"\"\n    Convert a Modin object to a NumPy array.\n\n    Parameters\n    ----------\n    modin_obj : modin.DataFrame, modin.Series, modin.numpy.array\n        The Modin distributed object to convert.\n\n    Returns\n    -------\n    numpy.array\n        Converted object with type depending on input.\n    \"\"\"\n    if isinstance(modin_obj, SupportsPrivateToNumPy):\n        return modin_obj._to_numpy()\n    array = modin_obj.to_numpy()\n    if ModinNumpy.get():\n        array = array._to_numpy()\n    return array\n\n\n@wrap_free_function_in_argument_caster(\"to_ray\")\ndef to_ray(modin_obj):\n    \"\"\"\n    Convert a Modin DataFrame/Series to a Ray Dataset.\n\n    Parameters\n    ----------\n    modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n        The DataFrame/Series to convert.\n\n    Returns\n    -------\n    ray.data.Dataset\n        Converted object with type depending on input.\n\n    Notes\n    -----\n    Modin DataFrame/Series can only be converted to a Ray Dataset if Modin uses a Ray engine.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return FactoryDispatcher.to_ray(modin_obj)\n\n\n@wrap_free_function_in_argument_caster(\"to_dask\")\ndef to_dask(modin_obj):\n    \"\"\"\n    Convert a Modin DataFrame/Series to a Dask DataFrame/Series.\n\n    Parameters\n    ----------\n    modin_obj : modin.pandas.DataFrame, modin.pandas.Series\n        The Modin DataFrame/Series to convert.\n\n    Returns\n    -------\n    dask.dataframe.DataFrame or dask.dataframe.Series\n        Converted object with type depending on input.\n\n    Notes\n    -----\n    Modin DataFrame/Series can only be converted to a Dask DataFrame/Series if Modin uses a Dask engine.\n    \"\"\"\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    return FactoryDispatcher.to_dask(modin_obj)\n\n\n__all__ = [\n    \"ExcelFile\",\n    \"HDFStore\",\n    \"json_normalize\",\n    \"read_clipboard\",\n    \"read_csv\",\n    \"read_excel\",\n    \"read_feather\",\n    \"read_fwf\",\n    \"read_gbq\",\n    \"read_hdf\",\n    \"read_html\",\n    \"read_json\",\n    \"read_orc\",\n    \"read_parquet\",\n    \"read_pickle\",\n    \"read_sas\",\n    \"read_spss\",\n    \"read_sql\",\n    \"read_sql_query\",\n    \"read_sql_table\",\n    \"read_stata\",\n    \"read_table\",\n    \"read_xml\",\n    \"from_non_pandas\",\n    \"from_pandas\",\n    \"from_arrow\",\n    \"from_dataframe\",\n    \"to_pickle\",\n    \"to_pandas\",\n    \"to_numpy\",\n]\n"
  },
  {
    "path": "modin/pandas/iterator.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Place to define the Modin iterator.\"\"\"\n\nfrom __future__ import annotations\n\nfrom collections.abc import Iterator\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from modin.pandas import DataFrame\n\n\nclass PartitionIterator(Iterator):\n    \"\"\"\n    Iterator on partitioned data.\n\n    Parameters\n    ----------\n    df : modin.pandas.DataFrame\n        The dataframe to iterate over.\n    axis : {0, 1}\n        Axis to iterate over.\n    func : callable\n        The function to get inner iterables from each partition.\n    \"\"\"\n\n    df: DataFrame\n\n    def __init__(self, df: DataFrame, axis, func):\n        self.df = df\n        self.axis = axis\n        self.index_iter = (\n            zip(\n                iter(slice(None) for _ in range(len(self.df.columns))),\n                range(len(self.df.columns)),\n            )\n            if axis\n            else zip(\n                range(len(self.df.index)),\n                iter(slice(None) for _ in range(len(self.df.index))),\n            )\n        )\n        self.func = func\n\n    def __iter__(self):\n        \"\"\"\n        Implement iterator interface.\n\n        Returns\n        -------\n        PartitionIterator\n            Iterator object.\n        \"\"\"\n        return self\n\n    def __next__(self):\n        \"\"\"\n        Implement iterator interface.\n\n        Returns\n        -------\n        PartitionIterator\n            Incremented iterator object.\n        \"\"\"\n        key = next(self.index_iter)\n        df = self.df.iloc[key]\n        return self.func(df)\n"
  },
  {
    "path": "modin/pandas/plotting.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement pandas plotting API.\"\"\"\n\nfrom pandas import plotting as pdplot\n\nfrom modin.logging import ClassLogger\nfrom modin.pandas.io import to_pandas\nfrom modin.utils import instancer\n\nfrom .dataframe import DataFrame\n\n\n@instancer\nclass Plotting(ClassLogger):\n    \"\"\"Wrapper of pandas plotting module.\"\"\"\n\n    def __dir__(self):\n        \"\"\"\n        Enable tab completion of plotting library.\n\n        Returns\n        -------\n        list\n            List of attributes in `self`.\n        \"\"\"\n        return dir(pdplot)\n\n    def __getattribute__(self, item):\n        \"\"\"\n        Convert any Modin DataFrames in parameters to pandas so that they can be plotted normally.\n\n        Parameters\n        ----------\n        item : str\n            Attribute to look for.\n\n        Returns\n        -------\n        object\n            If attribute is found in pandas.plotting, and it is a callable, a wrapper function is\n            returned which converts its arguments to pandas and calls a function pandas.plotting.`item`\n            on these arguments.\n            If attribute is found in pandas.plotting but it is not a callable, returns it.\n            Otherwise function tries to look for an attribute in `self`.\n        \"\"\"\n        if hasattr(pdplot, item):\n            func = getattr(pdplot, item)\n            if callable(func):\n\n                def wrap_func(*args, **kwargs):\n                    \"\"\"Convert Modin DataFrames to pandas then call the function.\"\"\"\n                    args = tuple(\n                        arg if not isinstance(arg, DataFrame) else to_pandas(arg)\n                        for arg in args\n                    )\n                    kwargs = {\n                        kwd: val if not isinstance(val, DataFrame) else to_pandas(val)\n                        for kwd, val in kwargs.items()\n                    }\n                    return func(*args, **kwargs)\n\n                return wrap_func\n            else:\n                return func\n        else:\n            return object.__getattribute__(self, item)\n"
  },
  {
    "path": "modin/pandas/resample.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement Resampler public API.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Optional, Union\n\nimport numpy as np\nimport pandas\nimport pandas.core.resample\nfrom pandas._libs import lib\nfrom pandas.core.dtypes.common import is_list_like\n\nfrom modin.logging import ClassLogger\nfrom modin.pandas.utils import cast_function_modin2pandas\nfrom modin.utils import _inherit_docstrings\n\nif TYPE_CHECKING:\n    from modin.core.storage_formats import BaseQueryCompiler\n    from modin.pandas import DataFrame, Series\n\n\n@_inherit_docstrings(pandas.core.resample.Resampler)\nclass Resampler(ClassLogger):\n    _dataframe: Union[DataFrame, Series]\n    _query_compiler: BaseQueryCompiler\n\n    def __init__(\n        self,\n        dataframe: Union[DataFrame, Series],\n        rule,\n        axis=0,\n        closed=None,\n        label=None,\n        convention=\"start\",\n        kind=None,\n        on=None,\n        level=None,\n        origin=\"start_day\",\n        offset=None,\n        group_keys=lib.no_default,\n    ):\n        self._dataframe = dataframe\n        self._query_compiler = dataframe._query_compiler\n        self.axis = self._dataframe._get_axis_number(axis)\n        self.resample_kwargs = {\n            \"rule\": rule,\n            \"axis\": axis,\n            \"closed\": closed,\n            \"label\": label,\n            \"convention\": convention,\n            \"kind\": kind,\n            \"on\": on,\n            \"level\": level,\n            \"origin\": origin,\n            \"offset\": offset,\n            \"group_keys\": group_keys,\n        }\n        self.__groups = self._get_groups()\n\n    def _get_groups(self):\n        \"\"\"\n        Compute the resampled groups.\n\n        Returns\n        -------\n        PandasGroupby\n            Groups as specified by resampling arguments.\n        \"\"\"\n        df = self._dataframe if self.axis == 0 else self._dataframe.T\n        convention = self.resample_kwargs[\"convention\"]\n        groups = df.groupby(\n            pandas.Grouper(\n                key=self.resample_kwargs[\"on\"],\n                freq=self.resample_kwargs[\"rule\"],\n                closed=self.resample_kwargs[\"closed\"],\n                label=self.resample_kwargs[\"label\"],\n                convention=convention if convention is not lib.no_default else \"start\",\n                level=self.resample_kwargs[\"level\"],\n                origin=self.resample_kwargs[\"origin\"],\n                offset=self.resample_kwargs[\"offset\"],\n            ),\n            group_keys=self.resample_kwargs[\"group_keys\"],\n        )\n        return groups\n\n    def __getitem__(self, key):\n        \"\"\"\n        Get ``Resampler`` based on `key` columns of original dataframe.\n\n        Parameters\n        ----------\n        key : str or list\n            String or list of selections.\n\n        Returns\n        -------\n        modin.pandas.BasePandasDataset\n            New ``Resampler`` based on `key` columns subset\n            of the original dataframe.\n        \"\"\"\n\n        def _get_new_resampler(key):\n            subset = self._dataframe[key]\n            resampler = type(self)(subset, **self.resample_kwargs)\n            return resampler\n\n        from .series import Series\n\n        if isinstance(\n            key, (list, tuple, Series, pandas.Series, pandas.Index, np.ndarray)\n        ):\n            if len(self._dataframe.columns.intersection(key)) != len(set(key)):\n                missed_keys = list(set(key).difference(self._dataframe.columns))\n                raise KeyError(f\"Columns not found: {str(sorted(missed_keys))[1:-1]}\")\n            return _get_new_resampler(list(key))\n\n        if key not in self._dataframe:\n            raise KeyError(f\"Column not found: {key}\")\n\n        return _get_new_resampler(key)\n\n    @property\n    def groups(self):\n        return self._query_compiler.default_to_pandas(\n            lambda df: pandas.DataFrame.resample(df, **self.resample_kwargs).groups\n        )\n\n    @property\n    def indices(self):\n        return self._query_compiler.default_to_pandas(\n            lambda df: pandas.DataFrame.resample(df, **self.resample_kwargs).indices\n        )\n\n    def get_group(self, name, obj=None):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_get_group(\n                self.resample_kwargs, name, obj\n            )\n        )\n\n    def apply(self, func, *args, **kwargs):\n        func = cast_function_modin2pandas(func)\n        from .dataframe import DataFrame\n\n        if isinstance(self._dataframe, DataFrame):\n            query_comp_op = self._query_compiler.resample_app_df\n        else:\n            query_comp_op = self._query_compiler.resample_app_ser\n\n        dataframe = DataFrame(\n            query_compiler=query_comp_op(\n                self.resample_kwargs,\n                func,\n                *args,\n                **kwargs,\n            )\n        )\n        if is_list_like(func) or isinstance(self._dataframe, DataFrame):\n            return dataframe\n        else:\n            if len(dataframe.index) == 1:\n                return dataframe.iloc[0]\n            else:\n                return dataframe.squeeze()\n\n    def aggregate(self, func, *args, **kwargs):\n        from .dataframe import DataFrame\n\n        if isinstance(self._dataframe, DataFrame):\n            query_comp_op = self._query_compiler.resample_agg_df\n        else:\n            query_comp_op = self._query_compiler.resample_agg_ser\n\n        dataframe = DataFrame(\n            query_compiler=query_comp_op(\n                self.resample_kwargs,\n                func,\n                *args,\n                **kwargs,\n            )\n        )\n        if is_list_like(func) or isinstance(self._dataframe, DataFrame):\n            return dataframe\n        else:\n            if len(dataframe.index) == 1:\n                return dataframe.iloc[0]\n            else:\n                return dataframe.squeeze()\n\n    def transform(self, arg, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_transform(\n                self.resample_kwargs, arg, *args, **kwargs\n            )\n        )\n\n    def pipe(self, func, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_pipe(\n                self.resample_kwargs, func, *args, **kwargs\n            )\n        )\n\n    def ffill(self, limit=None):\n        return self.fillna(method=\"ffill\", limit=limit)\n\n    def bfill(self, limit=None):\n        return self.fillna(method=\"bfill\", limit=limit)\n\n    def nearest(self, limit=None):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_nearest(\n                self.resample_kwargs, limit\n            )\n        )\n\n    def fillna(self, method, limit=None):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_fillna(\n                self.resample_kwargs, method, limit\n            )\n        )\n\n    def asfreq(self, fill_value=None):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_asfreq(\n                self.resample_kwargs, fill_value\n            )\n        )\n\n    def interpolate(\n        self,\n        method=\"linear\",\n        *,\n        axis=0,\n        limit=None,\n        inplace=False,\n        limit_direction: Optional[str] = None,\n        limit_area=None,\n        downcast=lib.no_default,\n        **kwargs,\n    ):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_interpolate(\n                self.resample_kwargs,\n                method,\n                axis=axis,\n                limit=limit,\n                inplace=inplace,\n                limit_direction=limit_direction,\n                limit_area=limit_area,\n                downcast=downcast,\n                **kwargs,\n            )\n        )\n\n    def count(self):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_count(self.resample_kwargs)\n        )\n\n    def nunique(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_nunique(\n                self.resample_kwargs, *args, **kwargs\n            )\n        )\n\n    def first(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_first(\n                self.resample_kwargs,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def last(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_last(\n                self.resample_kwargs,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def max(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_max(\n                self.resample_kwargs,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def mean(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_mean(\n                self.resample_kwargs,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def median(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_median(\n                self.resample_kwargs,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def min(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_min(\n                self.resample_kwargs,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def ohlc(self, *args, **kwargs):\n        from .dataframe import DataFrame\n\n        if isinstance(self._dataframe, DataFrame):\n            return DataFrame(\n                query_compiler=self._query_compiler.resample_ohlc_df(\n                    self.resample_kwargs,\n                    *args,\n                    **kwargs,\n                )\n            )\n        else:\n            return DataFrame(\n                query_compiler=self._query_compiler.resample_ohlc_ser(\n                    self.resample_kwargs,\n                    *args,\n                    **kwargs,\n                )\n            )\n\n    def prod(self, min_count=0, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_prod(\n                self.resample_kwargs, min_count=min_count, *args, **kwargs\n            )\n        )\n\n    def size(self):\n        from .series import Series\n\n        output_series = Series(\n            query_compiler=self._query_compiler.resample_size(self.resample_kwargs)\n        )\n        if not isinstance(self._dataframe, Series):\n            # If input is a DataFrame, rename output Series to None\n            return output_series.rename(None)\n        return output_series\n\n    def sem(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_sem(\n                self.resample_kwargs,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def std(self, ddof=1, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_std(\n                self.resample_kwargs, *args, ddof=ddof, **kwargs\n            )\n        )\n\n    def sum(self, min_count=0, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_sum(\n                self.resample_kwargs, min_count=min_count, *args, **kwargs\n            )\n        )\n\n    def var(self, ddof=1, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_var(\n                self.resample_kwargs, *args, ddof=ddof, **kwargs\n            )\n        )\n\n    def quantile(self, q=0.5, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.resample_quantile(\n                self.resample_kwargs, q, **kwargs\n            )\n        )\n"
  },
  {
    "path": "modin/pandas/series.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `Series` class, that is distributed version of `pandas.Series`.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nimport warnings\nfrom typing import IO, TYPE_CHECKING, Any, Hashable, Iterable, Optional, Union\n\nimport numpy as np\nimport pandas\nfrom pandas._libs import lib\nfrom pandas._typing import (\n    ArrayLike,\n    Axis,\n    DtypeObj,\n    IndexKeyFunc,\n    Scalar,\n    Sequence,\n    StorageOptions,\n)\nfrom pandas.api.types import is_integer\nfrom pandas.core.arrays import ExtensionArray\nfrom pandas.core.common import apply_if_callable, is_bool_indexer\nfrom pandas.core.dtypes.common import is_dict_like, is_list_like\nfrom pandas.core.series import _coerce_method\nfrom pandas.io.formats.info import SeriesInfo\nfrom pandas.util._decorators import doc\nfrom pandas.util._validators import validate_bool_kwarg\n\nfrom modin.config import PersistentPickle\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    EXTENSION_DICT_TYPE,\n    EXTENSION_NO_LOOKUP,\n)\nfrom modin.logging import disable_logging\nfrom modin.pandas.io import from_pandas, to_pandas\nfrom modin.utils import (\n    MODIN_UNNAMED_SERIES_LABEL,\n    _inherit_docstrings,\n    import_optional_dependency,\n    sentinel,\n)\n\nfrom .accessor import CachedAccessor, SparseAccessor\nfrom .base import _ATTRS_NO_LOOKUP, BasePandasDataset\nfrom .iterator import PartitionIterator\nfrom .series_utils import (\n    CategoryMethods,\n    DatetimeProperties,\n    ListAccessor,\n    StringMethods,\n    StructAccessor,\n)\nfrom .utils import (\n    GET_BACKEND_DOC,\n    SET_BACKEND_DOC,\n    _doc_binary_op,\n    cast_function_modin2pandas,\n    is_scalar,\n)\n\nif TYPE_CHECKING:\n    import numpy.typing as npt\n    from typing_extensions import Self\n\n    from modin.core.storage_formats import BaseQueryCompiler\n\n    from .dataframe import DataFrame\n\n\n@_inherit_docstrings(\n    pandas.Series, excluded=[pandas.Series.__init__], apilink=\"pandas.Series\"\n)\nclass Series(BasePandasDataset):\n    \"\"\"\n    Modin distributed representation of `pandas.Series`.\n\n    Internally, the data can be divided into partitions in order to parallelize\n    computations and utilize the user's hardware as much as possible.\n\n    Inherit common for DataFrames and Series functionality from the\n    `BasePandasDataset` class.\n\n    Parameters\n    ----------\n    data : modin.pandas.Series, array-like, Iterable, dict, or scalar value, optional\n        Contains data stored in Series. If data is a dict, argument order is\n        maintained.\n    index : array-like or Index (1d), optional\n        Values must be hashable and have the same length as `data`.\n    dtype : str, np.dtype, or pandas.ExtensionDtype, optional\n        Data type for the output Series. If not specified, this will be\n        inferred from `data`.\n    name : str, optional\n        The name to give to the Series.\n    copy : bool, default: False\n        Copy input data.\n    fastpath : bool, default: False\n        `pandas` internal parameter.\n    query_compiler : BaseQueryCompiler, optional\n        A query compiler object to create the Series from.\n    \"\"\"\n\n    _pandas_class = pandas.Series\n    __array_priority__ = pandas.Series.__array_priority__\n\n    _extensions: EXTENSION_DICT_TYPE = EXTENSION_DICT_TYPE(dict)\n\n    def __init__(\n        self,\n        data=None,\n        index=None,\n        dtype=None,\n        name=None,\n        copy=None,\n        fastpath=lib.no_default,\n        query_compiler: BaseQueryCompiler = None,\n    ) -> None:\n        from modin.numpy import array\n\n        # Siblings are other dataframes that share the same query compiler. We\n        # use this list to update inplace when there is a shallow copy.\n        self._siblings = []\n        if isinstance(data, type(self)):\n            query_compiler = data._query_compiler.copy()\n            if index is not None:\n                if any(i not in data.index for i in index):\n                    raise NotImplementedError(\n                        \"Passing non-existent columns or index values to constructor \"\n                        + \"not yet implemented.\"\n                    )\n                query_compiler = data.loc[index]._query_compiler\n        if isinstance(data, array):\n            if data._ndim == 2:\n                raise ValueError(\"Data must be 1-dimensional\")\n            query_compiler = data._query_compiler.copy()\n            if index is not None:\n                query_compiler.index = index\n            if dtype is not None:\n                query_compiler = query_compiler.astype(\n                    {col_name: dtype for col_name in query_compiler.columns}\n                )\n            if name is None:\n                query_compiler.columns = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])\n        if query_compiler is None:\n            # Defaulting to pandas\n            if name is None:\n                name = MODIN_UNNAMED_SERIES_LABEL\n                if isinstance(data, pandas.Series) and data.name is not None:\n                    name = data.name\n\n            pandas_df = pandas.DataFrame(\n                pandas.Series(\n                    data=data,\n                    index=index,\n                    dtype=dtype,\n                    name=name,\n                    copy=copy,\n                    fastpath=fastpath,\n                )\n            )\n            if pandas_df.size >= 2_500_000:\n                warnings.warn(\n                    \"Distributing {} object. This may take some time.\".format(\n                        type(data)\n                    )\n                )\n            query_compiler = from_pandas(pandas_df)._query_compiler\n        self._query_compiler = query_compiler.columnarize()\n        if name is not None:\n            self.name = name\n\n    def _get_name(self) -> Hashable:\n        \"\"\"\n        Get the value of the `name` property.\n\n        Returns\n        -------\n        hashable\n        \"\"\"\n        name = self._query_compiler.columns[0]\n        if name == MODIN_UNNAMED_SERIES_LABEL:\n            return None\n        return name\n\n    def _set_name(self, name: Hashable) -> None:\n        \"\"\"\n        Set the value of the `name` property.\n\n        Parameters\n        ----------\n        name : hashable\n            Name value to set.\n        \"\"\"\n        if name is None:\n            name = MODIN_UNNAMED_SERIES_LABEL\n        if isinstance(name, tuple):\n            columns = pandas.MultiIndex.from_tuples(tuples=[name])\n        else:\n            columns = [name]\n        self._query_compiler.columns = columns\n\n    name: Hashable = property(_get_name, _set_name)\n    _parent = None\n    # Parent axis denotes axis that was used to select series in a parent dataframe.\n    # If _parent_axis == 0, then it means that index axis was used via df.loc[row]\n    # indexing operations and assignments should be done to rows of parent.\n    # If _parent_axis == 1 it means that column axis was used via df[column] and assignments\n    # should be done to columns of parent.\n    _parent_axis = 0\n\n    @_doc_binary_op(operation=\"addition\", bin_op=\"add\")\n    def __add__(self, right) -> Series:\n        return self.add(right)\n\n    @_doc_binary_op(operation=\"addition\", bin_op=\"radd\", right=\"left\")\n    def __radd__(self, left) -> Series:\n        return self.radd(left)\n\n    @_doc_binary_op(operation=\"union\", bin_op=\"and\", right=\"other\")\n    def __and__(self, other) -> Series:\n        if isinstance(other, (list, np.ndarray, pandas.Series)):\n            return self._default_to_pandas(pandas.Series.__and__, other)\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).__and__(new_other)\n\n    @_doc_binary_op(operation=\"union\", bin_op=\"and\", right=\"other\")\n    def __rand__(self, other) -> Series:\n        if isinstance(other, (list, np.ndarray, pandas.Series)):\n            return self._default_to_pandas(pandas.Series.__rand__, other)\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).__rand__(new_other)\n\n    # add `_inherit_docstrings` decorator to force method link addition.\n    @_inherit_docstrings(pandas.Series.__array__, apilink=\"pandas.Series.__array__\")\n    def __array__(\n        self, dtype: npt.DTypeLike | None = None, copy: bool | None = None\n    ) -> np.ndarray:\n        return super(Series, self).__array__(dtype).flatten()\n\n    def __column_consortium_standard__(\n        self, *, api_version: str | None = None\n    ):  # noqa: PR01, RT01\n        \"\"\"\n        Provide entry point to the Consortium DataFrame Standard API.\n\n        This is developed and maintained outside of Modin.\n        Please report any issues to https://github.com/data-apis/dataframe-api-compat.\n        \"\"\"\n        dataframe_api_compat = import_optional_dependency(\n            \"dataframe_api_compat\", \"implementation\"\n        )\n        return dataframe_api_compat.modin_standard.convert_to_standard_compliant_column(\n            self, api_version=api_version\n        )\n\n    def __contains__(self, key: Hashable) -> bool:\n        \"\"\"\n        Check if `key` in the `Series.index`.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to check the presence in the index.\n\n        Returns\n        -------\n        bool\n        \"\"\"\n        return key in self.index\n\n    def __copy__(self, deep: bool = True) -> Series:\n        \"\"\"\n        Return the copy of the Series.\n\n        Parameters\n        ----------\n        deep : bool, default: True\n            Whether the copy should be deep or not.\n\n        Returns\n        -------\n        Series\n        \"\"\"\n        return self.copy(deep=deep)\n\n    def __deepcopy__(self, memo=None) -> Series:\n        \"\"\"\n        Return the deep copy of the Series.\n\n        Parameters\n        ----------\n        memo : Any, optional\n           Deprecated parameter.\n\n        Returns\n        -------\n        Series\n        \"\"\"\n        return self.copy(deep=True)\n\n    def __delitem__(self, key: Hashable) -> None:\n        \"\"\"\n        Delete item identified by `key` label.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to delete.\n        \"\"\"\n        if key not in self.keys():\n            raise KeyError(key)\n        self.drop(labels=key, inplace=True)\n\n    @_doc_binary_op(\n        operation=\"integer division and modulo\",\n        bin_op=\"divmod\",\n        returns=\"tuple of two Series\",\n    )\n    def __divmod__(self, right) -> tuple[Series, Series]:\n        return self.divmod(right)\n\n    @_doc_binary_op(\n        operation=\"integer division and modulo\",\n        bin_op=\"divmod\",\n        right=\"left\",\n        returns=\"tuple of two Series\",\n    )\n    def __rdivmod__(self, left) -> tuple[Series, Series]:\n        return self.rdivmod(left)\n\n    @_doc_binary_op(operation=\"integer division\", bin_op=\"floordiv\")\n    def __floordiv__(self, right) -> Series:\n        return self.floordiv(right)\n\n    @_doc_binary_op(operation=\"integer division\", bin_op=\"floordiv\")\n    def __rfloordiv__(self, right) -> Series:\n        return self.rfloordiv(right)\n\n    @disable_logging\n    def __getattribute__(self, key: str) -> Any:\n        \"\"\"\n        Get attribute identified by `key`.\n\n        Parameters\n        ----------\n        key : str\n            Key to get.\n\n        Returns\n        -------\n        Any\n            The attribute.\n        \"\"\"\n        # NOTE that to get an attribute, python calls __getattribute__() first and\n        # then falls back to __getattr__() if the former raises an AttributeError.\n        if key not in EXTENSION_NO_LOOKUP:\n            extensions_result = self._getattribute__from_extension_impl(\n                key, __class__._extensions\n            )\n            if extensions_result is not sentinel:\n                return extensions_result\n\n        return super().__getattribute__(key)\n\n    @disable_logging\n    def __getattr__(self, key: Hashable) -> Any:\n        \"\"\"\n        Return item identified by `key`.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to get.\n\n        Returns\n        -------\n        Any\n\n        Notes\n        -----\n        First try to use `__getattribute__` method. If it fails\n        try to get `key` from `Series` fields.\n        \"\"\"\n        # NOTE that to get an attribute, python calls __getattribute__() first and\n        # then falls back to __getattr__() if the former raises an AttributeError.\n        if key not in _ATTRS_NO_LOOKUP and key in self._query_compiler.index:\n            return self[key]\n        raise AttributeError(f\"'Series' object has no attribute '{key}'\")\n\n    __float__ = _coerce_method(float)\n    __int__ = _coerce_method(int)\n\n    def __iter__(self):\n        \"\"\"\n        Return an iterator of the values.\n\n        Returns\n        -------\n        iterable\n        \"\"\"\n        return self._to_pandas().__iter__()\n\n    @_doc_binary_op(operation=\"modulo\", bin_op=\"mod\")\n    def __mod__(self, right) -> Series:\n        return self.mod(right)\n\n    @_doc_binary_op(operation=\"modulo\", bin_op=\"mod\", right=\"left\")\n    def __rmod__(self, left) -> Series:\n        return self.rmod(left)\n\n    @_doc_binary_op(operation=\"multiplication\", bin_op=\"mul\")\n    def __mul__(self, right) -> Series:\n        return self.mul(right)\n\n    @_doc_binary_op(operation=\"multiplication\", bin_op=\"mul\", right=\"left\")\n    def __rmul__(self, left) -> Series:\n        return self.rmul(left)\n\n    @_doc_binary_op(operation=\"disjunction\", bin_op=\"or\", right=\"other\")\n    def __or__(self, other) -> Series:\n        if isinstance(other, (list, np.ndarray, pandas.Series)):\n            return self._default_to_pandas(pandas.Series.__or__, other)\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).__or__(new_other)\n\n    @_doc_binary_op(operation=\"disjunction\", bin_op=\"or\", right=\"other\")\n    def __ror__(self, other) -> Series:\n        if isinstance(other, (list, np.ndarray, pandas.Series)):\n            return self._default_to_pandas(pandas.Series.__ror__, other)\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).__ror__(new_other)\n\n    @_doc_binary_op(operation=\"exclusive or\", bin_op=\"xor\", right=\"other\")\n    def __xor__(self, other) -> Series:\n        if isinstance(other, (list, np.ndarray, pandas.Series)):\n            return self._default_to_pandas(pandas.Series.__xor__, other)\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).__xor__(new_other)\n\n    @_doc_binary_op(operation=\"exclusive or\", bin_op=\"xor\", right=\"other\")\n    def __rxor__(self, other) -> Series:\n        if isinstance(other, (list, np.ndarray, pandas.Series)):\n            return self._default_to_pandas(pandas.Series.__rxor__, other)\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).__rxor__(new_other)\n\n    @_doc_binary_op(operation=\"exponential power\", bin_op=\"pow\")\n    def __pow__(self, right) -> Series:\n        return self.pow(right)\n\n    @_doc_binary_op(operation=\"exponential power\", bin_op=\"pow\", right=\"left\")\n    def __rpow__(self, left) -> Series:\n        return self.rpow(left)\n\n    def __repr__(self) -> str:\n        \"\"\"\n        Return a string representation for a particular Series.\n\n        Returns\n        -------\n        str\n        \"\"\"\n        num_rows = pandas.get_option(\"display.max_rows\") or 60\n        num_cols = pandas.get_option(\"display.max_columns\") or 20\n        temp_df = self._build_repr_df(num_rows, num_cols)\n        if isinstance(temp_df, pandas.DataFrame) and not temp_df.empty:\n            temp_df = temp_df.iloc[:, 0]\n        temp_str = repr(temp_df)\n        freq_str = (\n            \"Freq: {}, \".format(self.index.freqstr)\n            if isinstance(self.index, pandas.DatetimeIndex)\n            else \"\"\n        )\n        if self.name is not None:\n            name_str = \"Name: {}, \".format(str(self.name))\n        else:\n            name_str = \"\"\n        if len(self) > num_rows:\n            len_str = \"Length: {}, \".format(len(self))\n        else:\n            len_str = \"\"\n        dtype_str = \"dtype: {}\".format(\n            str(self.dtype) + \")\"\n            if temp_df.empty\n            else temp_str.rsplit(\"dtype: \", 1)[-1]\n        )\n        if len(self) == 0:\n            return \"Series([], {}{}{}\".format(freq_str, name_str, dtype_str)\n        maxsplit = 1\n        if (\n            isinstance(temp_df, pandas.Series)\n            and temp_df.name is not None\n            and isinstance(temp_df.dtype, pandas.CategoricalDtype)\n        ):\n            maxsplit = 2\n        return temp_str.rsplit(\"\\n\", maxsplit)[0] + \"\\n{}{}{}{}\".format(\n            freq_str, name_str, len_str, dtype_str\n        )\n\n    def __round__(self, decimals=0) -> Series:\n        \"\"\"\n        Round each value in a Series to the given number of decimals.\n\n        Parameters\n        ----------\n        decimals : int, default: 0\n            Number of decimal places to round to.\n\n        Returns\n        -------\n        Series\n        \"\"\"\n        return self._create_or_update_from_compiler(\n            self._query_compiler.round(decimals=decimals)\n        )\n\n    def __setitem__(self, key: Hashable, value: Any) -> None:\n        \"\"\"\n        Set `value` identified by `key` in the Series.\n\n        Parameters\n        ----------\n        key : hashable\n            Key to set.\n        value : Any\n            Value to set.\n        \"\"\"\n        if isinstance(key, slice):\n            self._setitem_slice(key, value)\n        else:\n            self.loc[key] = value\n\n    @disable_logging\n    def __setattr__(self, name: str, value: Any) -> None:\n        \"\"\"\n        Set attribute `name` to `value`.\n\n        Parameters\n        ----------\n        name : str\n            Name of the attribute to set.\n        value : Any\n            Value to set.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(name, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__set__\"):\n            return extension.__set__(self, value)\n        return super().__setattr__(name, value)\n\n    @disable_logging\n    def __delattr__(self, name) -> None:\n        \"\"\"\n        Delete attribute `name`.\n\n        Parameters\n        ----------\n        name : str\n            Name of the attribute to delete.\n\n        Returns\n        -------\n        None\n        \"\"\"\n        # An extension property is only accessible if the backend supports it.\n        extension = self._get_extension(name, __class__._extensions)\n        if extension is not sentinel and hasattr(extension, \"__delete__\"):\n            return extension.__delete__(self)\n        return super().__delattr__(name)\n\n    @_doc_binary_op(operation=\"subtraction\", bin_op=\"sub\")\n    def __sub__(self, right) -> Series:\n        return self.sub(right)\n\n    @_doc_binary_op(operation=\"subtraction\", bin_op=\"sub\", right=\"left\")\n    def __rsub__(self, left) -> Series:\n        return self.rsub(left)\n\n    @_doc_binary_op(operation=\"floating division\", bin_op=\"truediv\")\n    def __truediv__(self, right) -> Series:\n        return self.truediv(right)\n\n    @_doc_binary_op(operation=\"floating division\", bin_op=\"truediv\", right=\"left\")\n    def __rtruediv__(self, left) -> Series:\n        return self.rtruediv(left)\n\n    __iadd__ = __add__\n    __imul__ = __mul__\n    __ipow__ = __pow__\n    __isub__ = __sub__\n    __itruediv__ = __truediv__\n\n    @property\n    def values(self):  # noqa: RT01, D200\n        \"\"\"\n        Return Series as ndarray or ndarray-like depending on the dtype.\n        \"\"\"\n        import modin.pandas as pd\n\n        if isinstance(\n            self.dtype, pandas.core.dtypes.dtypes.ExtensionDtype\n        ) and not isinstance(self.dtype, pd.CategoricalDtype):\n            return self._default_to_pandas(\"values\")\n\n        data = self.to_numpy()\n        if isinstance(self.dtype, pd.CategoricalDtype):\n            from modin.config import ModinNumpy\n\n            if ModinNumpy.get():\n                data = data._to_numpy()\n            data = pd.Categorical(data, dtype=self.dtype)\n        return data\n\n    def __arrow_array__(self, type=None):  # noqa: GL08\n        # Although pandas.Series does not implement this method (true for version 2.2.*),\n        # however, pyarrow has support for it. This method emulates this behavior and\n        # allows pyarrow to work with modin.pandas.Series.\n        import pyarrow\n\n        return pyarrow.array(self._to_pandas(), type=type)\n\n    def add(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return Addition of series and other, element-wise (binary operator add).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).add(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def radd(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return Addition of series and other, element-wise (binary operator radd).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).radd(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def add_prefix(\n        self, prefix, axis=None\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Prefix labels with string `prefix`.\n        \"\"\"\n        axis = 0 if axis is None else self._get_axis_number(axis)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.add_prefix(prefix, axis=axis)\n        )\n\n    def add_suffix(\n        self, suffix, axis=None\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Suffix labels with string `suffix`.\n        \"\"\"\n        axis = 0 if axis is None else self._get_axis_number(axis)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.add_suffix(suffix, axis=axis)\n        )\n\n    def aggregate(\n        self, func=None, axis=0, *args, **kwargs\n    ) -> Union[Series, Scalar]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Aggregate using one or more operations over the specified axis.\n        \"\"\"\n\n        def error_raiser(msg, exception):\n            \"\"\"Convert passed exception to the same type as pandas do and raise it.\"\"\"\n            # HACK: to concord with pandas error types by replacing all of the\n            # TypeErrors to the AssertionErrors\n            exception = exception if exception is not TypeError else AssertionError\n            raise exception(msg)\n\n        self._validate_function(func, on_invalid=error_raiser)\n        return super(Series, self).aggregate(func, axis, *args, **kwargs)\n\n    agg = aggregate\n\n    def apply(\n        self, func, convert_dtype=lib.no_default, args=(), by_row=\"compat\", **kwargs\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Invoke function on values of Series.\n        \"\"\"\n        if by_row != \"compat\":\n            # TODO: add test\n            return self._default_to_pandas(\n                pandas.Series.apply,\n                func=func,\n                convert_dtype=convert_dtype,\n                args=args,\n                by_row=by_row,\n                **kwargs,\n            )\n\n        if convert_dtype is lib.no_default:\n            convert_dtype = True\n        else:\n            warnings.warn(\n                \"the convert_dtype parameter is deprecated and will be removed in a \"\n                + \"future version.  Do ``ser.astype(object).apply()`` \"\n                + \"instead if you want ``convert_dtype=False``.\",\n                FutureWarning,\n            )\n\n        func = cast_function_modin2pandas(func)\n        self._validate_function(func)\n        # apply and aggregate have slightly different behaviors, so we have to use\n        # each one separately to determine the correct return type. In the case of\n        # `agg`, the axis is set, but it is not required for the computation, so we use\n        # it to determine which function to run.\n        if kwargs.pop(\"axis\", None) is not None:\n            apply_func = \"agg\"\n        else:\n            apply_func = \"apply\"\n\n        # This is the simplest way to determine the return type, but there are checks\n        # in pandas that verify that some results are created. This is a challenge for\n        # empty DataFrames, but fortunately they only happen when the `func` type is\n        # a list or a dictionary, which means that the return type won't change from\n        # type(self), so we catch that error and use `type(self).__name__` for the return\n        # type.\n        # We create a \"dummy\" `Series` to do the error checking and determining\n        # the return type.\n        try:\n            return_type = type(\n                getattr(\n                    pandas.Series(self[:1].values, index=self.index[:1]), apply_func\n                )(func, *args, **kwargs)\n            ).__name__\n        except Exception:\n            return_type = type(self).__name__\n        if (\n            isinstance(func, str)\n            or is_list_like(func)\n            or return_type not in [\"DataFrame\", \"Series\"]\n        ):\n            # use the explicit non-Compat parent to avoid infinite recursion\n            result = super(Series, self).apply(\n                func,\n                axis=0,\n                raw=False,\n                result_type=None,\n                args=args,\n                **kwargs,\n            )\n        else:\n            # handle ufuncs and lambdas\n            if kwargs or args and not isinstance(func, np.ufunc):\n\n                def f(x):\n                    return func(x, *args, **kwargs)\n\n            else:\n                f = func\n            with np.errstate(all=\"ignore\"):\n                if isinstance(f, np.ufunc):\n                    return f(self)\n\n                # The return_type is only a DataFrame when we have a function\n                # return a Series object. This is a very particular case that\n                # has to be handled by the underlying pandas.Series apply\n                # function and not our default map call.\n                if return_type == \"DataFrame\":\n                    result = self._query_compiler.apply_on_series(f)\n                else:\n                    result = self.map(f)._query_compiler\n\n        if return_type == \"DataFrame\":\n            from .dataframe import DataFrame\n\n            result = DataFrame(query_compiler=result)\n        elif return_type == \"Series\":\n            result = self.__constructor__(query_compiler=result)\n            if result.name == self.index[0]:\n                result.name = None\n        elif isinstance(result, type(self._query_compiler)):\n            # sometimes result can be not a query_compiler, but scalar (for example\n            # for sum or count functions)\n            return result.to_pandas().squeeze()\n        return result\n\n    def transform(\n        self, func, axis=0, *args, **kwargs\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.\n        \"\"\"\n        if isinstance(func, list):\n            # drop nonunique functions to align with pandas behavior instead of getting\n            # \"pandas.errors.SpecificationError: Function names must be unique...\"\n            # Example:\n            # >>> pandas.Series([0., 1., 4.]).transform([\"sqrt\", \"sqrt\"])\n            # sqrt\n            # 0   0.0\n            # 1   1.0\n            # 2   2.0\n            unique_func = [func[0]]\n            for one_func in func[1:]:\n                if one_func not in unique_func:\n                    unique_func.append(one_func)\n            func = unique_func\n        return super(Series, self).transform(func, axis, *args, **kwargs)\n\n    def argmax(\n        self, axis=None, skipna=True, *args, **kwargs\n    ) -> int:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return int position of the largest value in the Series.\n        \"\"\"\n        result = self.reset_index(drop=True).idxmax(\n            axis=axis, skipna=skipna, *args, **kwargs\n        )\n        if np.isnan(result) or result is pandas.NA:\n            result = -1\n        return result\n\n    def argmin(\n        self, axis=None, skipna=True, *args, **kwargs\n    ) -> int:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return int position of the smallest value in the Series.\n        \"\"\"\n        result = self.reset_index(drop=True).idxmin(\n            axis=axis, skipna=skipna, *args, **kwargs\n        )\n        if np.isnan(result) or result is pandas.NA:\n            result = -1\n        return result\n\n    def argsort(\n        self, axis=0, kind=\"quicksort\", order=None, stable=None\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the integer indices that would sort the Series values.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.argsort(\n                # 'stable' parameter has no effect in Pandas and is only accepted\n                # for compatibility with NumPy, so we're not passing it forward on purpose\n                axis=axis,\n                kind=kind,\n                order=order,\n            )\n        )\n\n    def autocorr(self, lag=1) -> float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute the lag-N autocorrelation.\n        \"\"\"\n        return self.corr(self.shift(lag))\n\n    def between(\n        self, left, right, inclusive: str = \"both\"\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return boolean Series equivalent to left <= series <= right.\n        \"\"\"\n        # 'pandas.Series.between()' only uses public Series' API,\n        # so passing a Modin Series there is safe\n        return pandas.Series.between(self, left, right, inclusive)\n\n    def combine(self, other, func, fill_value=None) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Combine the Series with a Series or scalar according to `func`.\n        \"\"\"\n        return super(Series, self).combine(\n            other, lambda s1, s2: s1.combine(s2, func, fill_value=fill_value)\n        )\n\n    def compare(\n        self,\n        other: Series,\n        align_axis: Union[str, int] = 1,\n        keep_shape: bool = False,\n        keep_equal: bool = False,\n        result_names: tuple = (\"self\", \"other\"),\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compare to another Series and show the differences.\n        \"\"\"\n        if not isinstance(other, Series):\n            raise TypeError(f\"Cannot compare Series to {type(other)}\")\n        result = self.to_frame().compare(\n            other.to_frame(),\n            align_axis=align_axis,\n            keep_shape=keep_shape,\n            keep_equal=keep_equal,\n            result_names=result_names,\n        )\n        if align_axis == \"columns\" or align_axis == 1:\n            # Pandas.DataFrame.Compare returns a dataframe with a multidimensional index object as the\n            # columns so we have to change column object back.\n            result.columns = pandas.Index([\"self\", \"other\"])\n        else:\n            result = result.squeeze().rename(None)\n        return result\n\n    def corr(\n        self, other, method=\"pearson\", min_periods=None\n    ) -> float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute correlation with `other` Series, excluding missing values.\n        \"\"\"\n        if method == \"pearson\":\n            this, other = self.align(other, join=\"inner\", copy=False)\n            this = self.__constructor__(this)\n            other = self.__constructor__(other)\n\n            if len(this) == 0:\n                return np.nan\n            if len(this) != len(other):\n                raise ValueError(\"Operands must have same size\")\n\n            if min_periods is None:\n                min_periods = 1\n\n            valid = this.notna() & other.notna()\n            if not valid.all():\n                this = this[valid]\n                other = other[valid]\n            if len(this) < min_periods:\n                return np.nan\n\n            this = this.astype(dtype=\"float64\")\n            other = other.astype(dtype=\"float64\")\n            this -= this.mean()\n            other -= other.mean()\n\n            other = other.__constructor__(query_compiler=other._query_compiler.conj())\n            result = this * other / (len(this) - 1)\n            result = np.array([result.sum()])\n\n            stddev_this = ((this * this) / (len(this) - 1)).sum()\n            stddev_other = ((other * other) / (len(other) - 1)).sum()\n\n            stddev_this = np.array([np.sqrt(stddev_this)])\n            stddev_other = np.array([np.sqrt(stddev_other)])\n\n            result /= stddev_this * stddev_other\n\n            np.clip(result.real, -1, 1, out=result.real)\n            if np.iscomplexobj(result):\n                np.clip(result.imag, -1, 1, out=result.imag)\n            return result[0]\n\n        return self._query_compiler.series_corr(\n            other=other, method=method, min_periods=min_periods\n        )\n\n    def count(self) -> int:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return number of non-NA/null observations in the Series.\n        \"\"\"\n        return super(Series, self).count()\n\n    def cov(\n        self, other, min_periods=None, ddof: Optional[int] = 1\n    ) -> float:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute covariance with Series, excluding missing values.\n        \"\"\"\n        this, other = self.align(other, join=\"inner\", copy=False)\n        this = self.__constructor__(this)\n        other = self.__constructor__(other)\n        if len(this) == 0:\n            return np.nan\n\n        if len(this) != len(other):\n            raise ValueError(\"Operands must have same size\")\n\n        if min_periods is None:\n            min_periods = 1\n\n        valid = this.notna() & other.notna()\n        if not valid.all():\n            this = this[valid]\n            other = other[valid]\n\n        if len(this) < min_periods:\n            return np.nan\n\n        this = this.astype(dtype=\"float64\")\n        other = other.astype(dtype=\"float64\")\n\n        this -= this.mean()\n        other -= other.mean()\n\n        other = other.__constructor__(query_compiler=other._query_compiler.conj())\n        result = this * other / (len(this) - ddof)\n        result = result.sum()\n        return result\n\n    def describe(\n        self,\n        percentiles=None,\n        include=None,\n        exclude=None,\n    ) -> Union[DataFrame, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Generate descriptive statistics.\n        \"\"\"\n        # Pandas ignores the `include` and `exclude` for Series for some reason.\n        return super(Series, self).describe(\n            percentiles=percentiles,\n            include=None,\n            exclude=None,\n        )\n\n    def diff(self, periods=1) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        First discrete difference of element.\n        \"\"\"\n        return super(Series, self).diff(periods=periods, axis=0)\n\n    def divmod(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> tuple[Series, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return Integer division and modulo of series and `other`, element-wise (binary operator `divmod`).\n        \"\"\"\n        division, modulo = self._query_compiler.divmod(\n            other=other, level=level, fill_value=fill_value, axis=axis\n        )\n        return self.__constructor__(query_compiler=division), self.__constructor__(\n            query_compiler=modulo\n        )\n\n    def dot(self, other) -> Union[Series, np.ndarray]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Compute the dot product between the Series and the columns of `other`.\n        \"\"\"\n        if isinstance(other, BasePandasDataset):\n            common = self.index.union(other.index)\n            if len(common) > len(self) or len(common) > len(other):\n                raise ValueError(\"Matrices are not aligned\")\n\n            qc = other.reindex(index=common)._query_compiler\n            if isinstance(other, Series):\n                return self._reduce_dimension(\n                    query_compiler=self._query_compiler.dot(\n                        qc, squeeze_self=True, squeeze_other=True\n                    )\n                )\n            else:\n                return self.__constructor__(\n                    query_compiler=self._query_compiler.dot(\n                        qc, squeeze_self=True, squeeze_other=False\n                    )\n                )\n\n        other = np.asarray(other)\n        if self.shape[0] != other.shape[0]:\n            raise ValueError(\n                \"Dot product shape mismatch, {} vs {}\".format(self.shape, other.shape)\n            )\n\n        if len(other.shape) > 1:\n            return (\n                self._query_compiler.dot(other, squeeze_self=True).to_numpy().squeeze()\n            )\n\n        return self._reduce_dimension(\n            query_compiler=self._query_compiler.dot(other, squeeze_self=True)\n        )\n\n    def drop_duplicates(\n        self, *, keep=\"first\", inplace=False, ignore_index=False\n    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return Series with duplicate values removed.\n        \"\"\"\n        return super(Series, self).drop_duplicates(\n            keep=keep, inplace=inplace, ignore_index=ignore_index\n        )\n\n    def dropna(\n        self, *, axis=0, inplace=False, how=None, ignore_index=False\n    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return a new Series with missing values removed.\n        \"\"\"\n        return super(Series, self).dropna(\n            axis=axis, inplace=inplace, ignore_index=ignore_index\n        )\n\n    def duplicated(self, keep=\"first\") -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Indicate duplicate Series values.\n        \"\"\"\n        name = self.name\n        result = self.to_frame().duplicated(keep=keep)\n        # DataFrame.duplicated drops the name, so we need to manually restore it\n        if name is not None:\n            result.name = name\n        return result\n\n    def eq(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return Equal to of series and `other`, element-wise (binary operator `eq`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return new_self._binary_op(\n            \"eq\",\n            new_other,\n            level=level,\n            fill_value=fill_value,\n            axis=axis,\n            squeeze_other=isinstance(other, Series),\n        )\n\n    def equals(self, other) -> bool:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Test whether two objects contain the same elements.\n        \"\"\"\n        if isinstance(other, pandas.Series):\n            # Copy into a Modin Series to simplify logic below\n            other = self.__constructor__(other)\n\n        if type(self) is not type(other) or not self.index.equals(other.index):\n            return False\n\n        old_name_self = self.name\n        old_name_other = other.name\n        try:\n            self.name = \"temp_name_for_equals_op\"\n            other.name = \"temp_name_for_equals_op\"\n            # this function should return only scalar\n            res = self.__constructor__(\n                query_compiler=self._query_compiler.equals(other._query_compiler)\n            )\n        finally:\n            self.name = old_name_self\n            other.name = old_name_other\n        return res.all()\n\n    def explode(self, ignore_index: bool = False) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Transform each element of a list-like to a row.\n        \"\"\"\n        return super(Series, self).explode(\n            MODIN_UNNAMED_SERIES_LABEL if self.name is None else self.name,\n            ignore_index=ignore_index,\n        )\n\n    def factorize(self, sort=False, use_na_sentinel=True):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Encode the object as an enumerated type or categorical variable.\n        \"\"\"\n        return self._default_to_pandas(\n            pandas.Series.factorize,\n            sort=sort,\n            use_na_sentinel=use_na_sentinel,\n        )\n\n    def case_when(self, caselist) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Replace values where the conditions are True.\n        \"\"\"\n        modin_type = type(self)\n        caselist = [\n            tuple(\n                data._query_compiler if isinstance(data, modin_type) else data\n                for data in case_tuple\n            )\n            for case_tuple in caselist\n        ]\n        return self.__constructor__(\n            query_compiler=self._query_compiler.case_when(caselist=caselist)\n        )\n\n    def fillna(\n        self,\n        value=None,\n        *,\n        method=None,\n        axis=None,\n        inplace=False,\n        limit=None,\n        downcast=lib.no_default,\n    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Fill NaNs inside of a Series object.\n        \"\"\"\n        if isinstance(value, BasePandasDataset) and not isinstance(value, Series):\n            raise TypeError(\n                '\"value\" parameter must be a scalar, dict or Series, but '\n                + f'you passed a \"{type(value).__name__}\"'\n            )\n        return super(Series, self).fillna(\n            squeeze_self=True,\n            squeeze_value=isinstance(value, Series),\n            value=value,\n            method=method,\n            axis=axis,\n            inplace=inplace,\n            limit=limit,\n            downcast=downcast,\n        )\n\n    def floordiv(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Get Integer division of series and `other`, element-wise (binary operator `floordiv`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).floordiv(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def ge(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return greater than or equal to of series and `other`, element-wise (binary operator `ge`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return new_self._binary_op(\n            \"ge\",\n            new_other,\n            level=level,\n            fill_value=fill_value,\n            axis=axis,\n            squeeze_other=isinstance(other, Series),\n        )\n\n    def groupby(\n        self,\n        by=None,\n        axis=0,\n        level=None,\n        as_index=True,\n        sort=True,\n        group_keys=True,\n        observed=lib.no_default,\n        dropna: bool = True,\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Group Series using a mapper or by a Series of columns.\n        \"\"\"\n        from .groupby import SeriesGroupBy\n\n        if not as_index:\n            raise TypeError(\"as_index=False only valid with DataFrame\")\n        # SeriesGroupBy expects a query compiler object if it is available\n        if isinstance(by, Series):\n            by = by._query_compiler\n        elif callable(by):\n            by = by(self.index)\n        elif by is None and level is None:\n            raise TypeError(\"You have to supply one of 'by' and 'level'\")\n        return SeriesGroupBy(\n            self,\n            by,\n            axis,\n            level,\n            as_index,\n            sort,\n            group_keys,\n            idx_name=None,\n            observed=observed,\n            drop=False,\n            dropna=dropna,\n            backend_pinned=self.is_backend_pinned(),\n        )\n\n    def gt(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return greater than of series and `other`, element-wise (binary operator `gt`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return new_self._binary_op(\n            \"gt\",\n            new_other,\n            level=level,\n            fill_value=fill_value,\n            axis=axis,\n            squeeze_other=isinstance(other, Series),\n        )\n\n    def hist(\n        self,\n        by=None,\n        ax=None,\n        grid: bool = True,\n        xlabelsize: int | None = None,\n        xrot: float | None = None,\n        ylabelsize: int | None = None,\n        yrot: float | None = None,\n        figsize: tuple[int, int] | None = None,\n        bins: int | Sequence[int] = 10,\n        backend: str | None = None,\n        legend: bool = False,\n        **kwargs,\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Draw histogram of the input series using matplotlib.\n        \"\"\"\n        return self._default_to_pandas(\n            pandas.Series.hist,\n            by=by,\n            ax=ax,\n            grid=grid,\n            xlabelsize=xlabelsize,\n            xrot=xrot,\n            ylabelsize=ylabelsize,\n            yrot=yrot,\n            figsize=figsize,\n            bins=bins,\n            backend=backend,\n            legend=legend,\n            **kwargs,\n        )\n\n    def idxmax(\n        self, axis=0, skipna=True, *args, **kwargs\n    ) -> Hashable:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the row label of the maximum value.\n        \"\"\"\n        return super(Series, self).idxmax(axis=axis, skipna=skipna, *args, **kwargs)\n\n    def idxmin(\n        self, axis=0, skipna=True, *args, **kwargs\n    ) -> Hashable:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the row label of the minimum value.\n        \"\"\"\n        return super(Series, self).idxmin(axis=axis, skipna=skipna, *args, **kwargs)\n\n    def info(\n        self,\n        verbose: bool | None = None,\n        buf: IO[str] | None = None,\n        max_cols: int | None = None,\n        memory_usage: bool | str | None = None,\n        show_counts: bool = True,\n    ) -> None:\n        return SeriesInfo(self, memory_usage).render(\n            buf=buf,\n            max_cols=max_cols,\n            verbose=verbose,\n            show_counts=show_counts,\n        )\n\n    def isna(self) -> Series:\n        \"\"\"\n        Detect missing values.\n\n        Returns\n        -------\n        The result of detecting missing values.\n        \"\"\"\n        return super(Series, self).isna()\n\n    def isnull(self) -> Series:\n        \"\"\"\n        Detect missing values.\n\n        Returns\n        -------\n        The result of detecting missing values.\n        \"\"\"\n        return super(Series, self).isnull()\n\n    def item(self) -> Scalar:  # noqa: RT01, D200\n        \"\"\"\n        Return the first element of the underlying data as a Python scalar.\n        \"\"\"\n        return self[0]\n\n    def items(self) -> Iterable[tuple[Hashable, Any]]:  # noqa: D200\n        \"\"\"\n        Lazily iterate over (index, value) tuples.\n        \"\"\"\n\n        def item_builder(s):\n            return s.name, s.squeeze()\n\n        partition_iterator = PartitionIterator(self.to_frame(), 0, item_builder)\n        for v in partition_iterator:\n            yield v\n\n    def keys(self) -> pandas.Index:  # noqa: RT01, D200\n        \"\"\"\n        Return alias for index.\n        \"\"\"\n        return self.index\n\n    def le(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return less than or equal to of series and `other`, element-wise (binary operator `le`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return new_self._binary_op(\n            \"le\",\n            new_other,\n            level=level,\n            fill_value=fill_value,\n            axis=axis,\n            squeeze_other=isinstance(other, Series),\n        )\n\n    def lt(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return less than of series and `other`, element-wise (binary operator `lt`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return new_self._binary_op(\n            \"lt\",\n            new_other,\n            level=level,\n            fill_value=fill_value,\n            axis=axis,\n            squeeze_other=isinstance(other, Series),\n        )\n\n    def map(self, arg, na_action=None) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Map values of Series according to input correspondence.\n        \"\"\"\n        if isinstance(arg, type(self)):\n            # HACK: if we don't cast to pandas, then the execution engine will try to\n            # propagate the distributed Series to workers and most likely would have\n            # some performance problems.\n            # TODO: A better way of doing so could be passing this `arg` as a query compiler\n            # and broadcast accordingly.\n            arg = arg._to_pandas()\n\n        if not callable(arg) and hasattr(arg, \"get\"):\n            mapper = arg\n\n            def arg(s):\n                return mapper.get(s, np.nan)\n\n        return self.__constructor__(\n            query_compiler=self._query_compiler.map(\n                lambda s: (\n                    arg(s) if pandas.isnull(s) is not True or na_action is None else s\n                )\n            )\n        )\n\n    def sem(\n        self,\n        axis: Optional[Axis] = None,\n        skipna: bool = True,\n        ddof: int = 1,\n        numeric_only=False,\n        **kwargs,\n    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return unbiased standard error of the mean over requested axis.\n        \"\"\"\n        return super(Series, self)._stat_operation(\n            \"sem\", axis, skipna, numeric_only, ddof=ddof, **kwargs\n        )\n\n    def std(\n        self,\n        axis: Optional[Axis] = None,\n        skipna: bool = True,\n        ddof: int = 1,\n        numeric_only=False,\n        **kwargs,\n    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return sample standard deviation over requested axis.\n        \"\"\"\n        return super(Series, self)._stat_operation(\n            \"std\", axis, skipna, numeric_only, ddof=ddof, **kwargs\n        )\n\n    def var(\n        self,\n        axis: Optional[Axis] = None,\n        skipna: bool = True,\n        ddof: int = 1,\n        numeric_only=False,\n        **kwargs,\n    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return unbiased variance over requested axis.\n        \"\"\"\n        return super(Series, self)._stat_operation(\n            \"var\", axis, skipna, numeric_only, ddof=ddof, **kwargs\n        )\n\n    def memory_usage(self, index=True, deep=False) -> int:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the memory usage of the Series.\n        \"\"\"\n        return super(Series, self).memory_usage(index=index, deep=deep).sum()\n\n    def mod(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return Modulo of series and `other`, element-wise (binary operator `mod`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).mod(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def mode(self, dropna=True) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the mode(s) of the Series.\n        \"\"\"\n        return super(Series, self).mode(numeric_only=False, dropna=dropna)\n\n    def mul(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return multiplication of series and `other`, element-wise (binary operator `mul`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).mul(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    multiply = mul\n\n    def rmul(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return multiplication of series and `other`, element-wise (binary operator `mul`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).rmul(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def ne(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return not equal to of series and `other`, element-wise (binary operator `ne`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return new_self._binary_op(\n            \"ne\",\n            new_other,\n            level=level,\n            fill_value=fill_value,\n            axis=axis,\n            squeeze_other=isinstance(other, Series),\n        )\n\n    def nlargest(self, n=5, keep=\"first\") -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the largest `n` elements.\n        \"\"\"\n        if len(self._query_compiler.columns) == 0:\n            # pandas returns empty series when requested largest/smallest from empty series\n            return self.__constructor__(data=[], dtype=float)\n        return Series(\n            query_compiler=self._query_compiler.nlargest(\n                n=n, columns=self.name, keep=keep\n            )\n        )\n\n    def nsmallest(self, n=5, keep=\"first\") -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the smallest `n` elements.\n        \"\"\"\n        if len(self._query_compiler.columns) == 0:\n            # pandas returns empty series when requested largest/smallest from empty series\n            return self.__constructor__(data=[], dtype=float)\n        return self.__constructor__(\n            query_compiler=self._query_compiler.nsmallest(\n                n=n, columns=self.name, keep=keep\n            )\n        )\n\n    def shift(\n        self,\n        periods=1,\n        freq=None,\n        axis=0,\n        fill_value=lib.no_default,\n        suffix=None,\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Shift index by desired number of periods with an optional time `freq`.\n        \"\"\"\n        # pandas 2.1.0 ignores suffix parameter (https://github.com/pandas-dev/pandas/issues/54806)\n        if freq is not None and fill_value is not lib.no_default:\n            raise ValueError(\n                \"Cannot pass both 'freq' and 'fill_value' to \"\n                + f\"{type(self).__name__}.shift\"\n            )\n        if axis == 1:\n            raise ValueError(\n                f\"No axis named {axis} for object type {type(self).__name__}\"\n            )\n        return super(type(self), self).shift(\n            periods=periods, freq=freq, axis=axis, fill_value=fill_value\n        )\n\n    def unstack(\n        self, level=-1, fill_value=None, sort=True\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.\n        \"\"\"\n        from .dataframe import DataFrame\n\n        if not sort:\n            # TODO: it should be easy to add support for sort == False\n            return self._default_to_pandas(\n                pandas.Series.unstack, level=level, fill_value=fill_value, sort=sort\n            )\n\n        # We can't unstack a Series object, if we don't have a MultiIndex.\n        if len(self.index.names) > 1:\n            result = DataFrame(\n                query_compiler=self._query_compiler.unstack(level, fill_value)\n            )\n        else:\n            raise ValueError(\n                f\"index must be a MultiIndex to unstack, {type(self.index)} was passed\"\n            )\n\n        return result.droplevel(0, axis=1) if result.columns.nlevels > 1 else result\n\n    @property\n    def plot(\n        self,\n        kind=\"line\",\n        ax=None,\n        figsize=None,\n        use_index=True,\n        title=None,\n        grid=None,\n        legend=False,\n        style=None,\n        logx=False,\n        logy=False,\n        loglog=False,\n        xticks=None,\n        yticks=None,\n        xlim=None,\n        ylim=None,\n        rot=None,\n        fontsize=None,\n        colormap=None,\n        table=False,\n        yerr=None,\n        xerr=None,\n        label=None,\n        secondary_y=False,\n        **kwds,\n    ):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Make plot of Series.\n        \"\"\"\n        return self._to_pandas().plot\n\n    def pow(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return exponential power of series and `other`, element-wise (binary operator `pow`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).pow(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    @_inherit_docstrings(pandas.Series.prod, apilink=\"pandas.Series.prod\")\n    def prod(\n        self,\n        axis=None,\n        skipna=True,\n        numeric_only=False,\n        min_count=0,\n        **kwargs,\n    ) -> Scalar:\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        axis = self._get_axis_number(axis)\n        new_index = self.columns if axis else self.index\n        if min_count > len(new_index):\n            return np.nan\n\n        data = self._validate_dtypes_prod_mean(axis, numeric_only, ignore_axis=True)\n        if min_count > 1:\n            return data._reduce_dimension(\n                data._query_compiler.prod_min_count(\n                    axis=axis,\n                    skipna=skipna,\n                    numeric_only=numeric_only,\n                    min_count=min_count,\n                    **kwargs,\n                )\n            )\n        return data._reduce_dimension(\n            data._query_compiler.prod(\n                axis=axis,\n                skipna=skipna,\n                numeric_only=numeric_only,\n                min_count=min_count,\n                **kwargs,\n            )\n        )\n\n    product = prod\n\n    def ravel(self, order=\"C\") -> ArrayLike:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the flattened underlying data as an ndarray.\n        \"\"\"\n        data = self._query_compiler.to_numpy().flatten(order=order)\n        if isinstance(self.dtype, pandas.CategoricalDtype):\n            data = pandas.Categorical(data, dtype=self.dtype)\n\n        return data\n\n    @_inherit_docstrings(pandas.Series.reindex, apilink=\"pandas.Series.reindex\")\n    def reindex(\n        self,\n        index=None,\n        *,\n        axis: Axis = None,\n        method: str = None,\n        copy: Optional[bool] = None,\n        level=None,\n        fill_value=None,\n        limit: int = None,\n        tolerance=None,\n    ) -> Series:  # noqa: PR01, RT01, D200\n        if fill_value is None:\n            fill_value = np.nan\n        return super(Series, self).reindex(\n            index=index,\n            columns=None,\n            method=method,\n            level=level,\n            copy=copy,\n            limit=limit,\n            tolerance=tolerance,\n            fill_value=fill_value,\n        )\n\n    def rename_axis(\n        self,\n        mapper=lib.no_default,\n        *,\n        index=lib.no_default,\n        axis=0,\n        copy=True,\n        inplace=False,\n    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Set the name of the axis for the index or columns.\n        \"\"\"\n        return super().rename_axis(\n            mapper=mapper, index=index, axis=axis, copy=copy, inplace=inplace\n        )\n\n    def _set_axis_name(self, name, axis=0, inplace=False) -> Union[Series, None]:\n        \"\"\"\n        Alter the name of the axis.\n\n        Parameters\n        ----------\n        name : str\n            Name for the Series.\n        axis : str or int, default: 0\n            The axis to set the label.\n            Only 0 is valid for Series.\n        inplace : bool, default: False\n            Whether to modify `self` directly or return a copy.\n\n        Returns\n        -------\n        Series or None\n        \"\"\"\n        self._get_axis_number(axis)  # raises ValueError if not 0\n        renamed = self if inplace else self.copy()\n        renamed.index = renamed.index.set_names(name)\n        return None if inplace else renamed\n\n    def rename(\n        self,\n        index=None,\n        *,\n        axis=None,\n        copy=None,\n        inplace=False,\n        level=None,\n        errors=\"ignore\",\n    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Alter Series index labels or name.\n        \"\"\"\n        non_mapping = is_scalar(index) or (\n            is_list_like(index) and not is_dict_like(index)\n        )\n        if non_mapping:\n            if inplace:\n                self.name = index\n            else:\n                self_cp = self.copy()\n                self_cp.name = index\n                return self_cp\n        else:\n            from .dataframe import DataFrame\n\n            result = DataFrame(self.copy()).rename(index=index).squeeze(axis=1)\n            result.name = self.name\n            return result\n\n    def repeat(self, repeats, axis=None) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Repeat elements of a Series.\n        \"\"\"\n        if (isinstance(repeats, int) and repeats == 0) or (\n            is_list_like(repeats) and len(repeats) == 1 and repeats[0] == 0\n        ):\n            return self.__constructor__()\n\n        return self.__constructor__(query_compiler=self._query_compiler.repeat(repeats))\n\n    def reset_index(\n        self,\n        level=None,\n        *,\n        drop=False,\n        name=lib.no_default,\n        inplace=False,\n        allow_duplicates=False,\n    ) -> Union[DataFrame, Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Generate a new Series with the index reset.\n        \"\"\"\n        if name is lib.no_default:\n            # For backwards compatibility, keep columns as [0] instead of\n            #  [None] when self.name is None\n            name = 0 if self.name is None else self.name\n\n        if drop and level is None:\n            new_idx = pandas.RangeIndex(len(self))\n            if inplace:\n                self.index = new_idx\n            else:\n                result = self.copy()\n                result.index = new_idx\n                return result\n        elif not drop and inplace:\n            raise TypeError(\n                \"Cannot reset_index inplace on a Series to create a DataFrame\"\n            )\n        else:\n            obj = self.copy()\n            obj.name = name\n            from .dataframe import DataFrame\n\n            # Here `query_compiler` is passed instead of `obj` to avoid unnecessary `copy()`\n            # inside `DataFrame` constructor\n            return DataFrame(query_compiler=obj._query_compiler).reset_index(\n                level=level,\n                drop=drop,\n                inplace=inplace,\n                col_level=0,\n                col_fill=\"\",\n                allow_duplicates=allow_duplicates,\n                names=None,\n            )\n\n    def rdivmod(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return integer division and modulo of series and `other`, element-wise (binary operator `rdivmod`).\n        \"\"\"\n        division, modulo = self._query_compiler.rdivmod(\n            other=other, level=level, fill_value=fill_value, axis=axis\n        )\n        return self.__constructor__(query_compiler=division), self.__constructor__(\n            query_compiler=modulo\n        )\n\n    def rfloordiv(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return integer division of series and `other`, element-wise (binary operator `rfloordiv`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).rfloordiv(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def rmod(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return modulo of series and `other`, element-wise (binary operator `rmod`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).rmod(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def rpow(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return exponential power of series and `other`, element-wise (binary operator `rpow`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).rpow(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def rsub(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return subtraction of series and `other`, element-wise (binary operator `rsub`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).rsub(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    def rtruediv(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return floating division of series and `other`, element-wise (binary operator `rtruediv`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).rtruediv(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    rdiv = rtruediv\n\n    def quantile(\n        self, q=0.5, interpolation=\"linear\"\n    ) -> Union[float, Series]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return value at the given quantile.\n        \"\"\"\n        return super(Series, self).quantile(\n            q=q,\n            axis=0,\n            numeric_only=False,\n            interpolation=interpolation,\n            method=\"single\",\n        )\n\n    def reorder_levels(self, order) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Rearrange index levels using input order.\n        \"\"\"\n        return super(Series, self).reorder_levels(order)\n\n    def replace(\n        self,\n        to_replace=None,\n        value=lib.no_default,\n        *,\n        inplace=False,\n        limit=None,\n        regex=False,\n        method: str | lib.NoDefault = lib.no_default,\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Replace values given in `to_replace` with `value`.\n        \"\"\"\n        inplace = validate_bool_kwarg(inplace, \"inplace\")\n        new_query_compiler = self._query_compiler.replace(\n            to_replace=to_replace,\n            value=value,\n            inplace=False,\n            limit=limit,\n            regex=regex,\n            method=method,\n        )\n        return self._create_or_update_from_compiler(new_query_compiler, inplace)\n\n    def searchsorted(\n        self, value, side=\"left\", sorter=None\n    ) -> Union[npt.NDArray[np.intp], np.intp]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Find indices where elements should be inserted to maintain order.\n        \"\"\"\n        searchsorted_qc = self._query_compiler\n        if sorter is not None:\n            # `iloc` method works slowly (https://github.com/modin-project/modin/issues/1903),\n            # so _default_to_pandas is used for now\n            # searchsorted_qc = self.iloc[sorter].reset_index(drop=True)._query_compiler\n            # sorter = None\n            return self._default_to_pandas(\n                pandas.Series.searchsorted, value, side=side, sorter=sorter\n            )\n        # searchsorted should return item number irrespective of Series index, so\n        # Series.index is always set to pandas.RangeIndex, which can be easily processed\n        # on the query_compiler level\n        if not isinstance(searchsorted_qc.index, pandas.RangeIndex):\n            searchsorted_qc = searchsorted_qc.reset_index(drop=True)\n\n        result = self.__constructor__(\n            query_compiler=searchsorted_qc.searchsorted(\n                value=value, side=side, sorter=sorter\n            )\n        ).squeeze()\n\n        # matching Pandas output\n        if not is_scalar(value) and not is_list_like(result):\n            result = np.array([result])\n        elif isinstance(result, type(self)):\n            result = result.to_numpy()\n\n        return result\n\n    def sort_values(\n        self,\n        *,\n        axis=0,\n        ascending=True,\n        inplace=False,\n        kind=\"quicksort\",\n        na_position=\"last\",\n        ignore_index: bool = False,\n        key: Optional[IndexKeyFunc] = None,\n    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Sort by the values.\n        \"\"\"\n        from .dataframe import DataFrame\n\n        # When we convert to a DataFrame, the name is automatically converted to 0 if it\n        # is None, so we do this to avoid a KeyError.\n        by = self.name if self.name is not None else 0\n        result = (\n            DataFrame(self.copy())\n            .sort_values(\n                by=by,\n                ascending=ascending,\n                inplace=False,\n                kind=kind,\n                na_position=na_position,\n                ignore_index=ignore_index,\n                key=key,\n            )\n            .squeeze(axis=1)\n        )\n        result.name = self.name\n        return self._create_or_update_from_compiler(\n            result._query_compiler, inplace=inplace\n        )\n\n    cat = CachedAccessor(\"cat\", CategoryMethods)\n    sparse = CachedAccessor(\"sparse\", SparseAccessor)\n    str = CachedAccessor(\"str\", StringMethods)\n    dt = CachedAccessor(\"dt\", DatetimeProperties)\n    list = CachedAccessor(\"list\", ListAccessor)\n    struct = CachedAccessor(\"struct\", StructAccessor)\n\n    def squeeze(self, axis=None) -> Union[Series, Scalar]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Squeeze 1 dimensional axis objects into scalars.\n        \"\"\"\n        if axis is not None:\n            # Validate `axis`\n            pandas.Series._get_axis_number(axis)\n        if len(self) == 1:\n            return self._reduce_dimension(self._query_compiler)\n        else:\n            return self.copy()\n\n    def sub(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return subtraction of Series and `other`, element-wise (binary operator `sub`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).sub(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    subtract = sub\n\n    def sum(\n        self,\n        axis=None,\n        skipna=True,\n        numeric_only=False,\n        min_count=0,\n        **kwargs,\n    ) -> Scalar:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the sum of the values.\n        \"\"\"\n        validate_bool_kwarg(skipna, \"skipna\", none_allowed=False)\n        axis = self._get_axis_number(axis)\n\n        new_index = self.columns if axis else self.index\n        if min_count > len(new_index):\n            return np.nan\n\n        data = self._validate_dtypes_prod_mean(axis, numeric_only, ignore_axis=False)\n        if min_count > 1:\n            return data._reduce_dimension(\n                data._query_compiler.sum_min_count(\n                    axis=axis,\n                    skipna=skipna,\n                    numeric_only=numeric_only,\n                    min_count=min_count,\n                    **kwargs,\n                )\n            )\n        return data._reduce_dimension(\n            data._query_compiler.sum(\n                axis=axis,\n                skipna=skipna,\n                numeric_only=numeric_only,\n                min_count=min_count,\n                **kwargs,\n            )\n        )\n\n    def swaplevel(self, i=-2, j=-1, copy=None) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Swap levels `i` and `j` in a `MultiIndex`.\n        \"\"\"\n        copy = True if copy is None else copy\n        obj = self.copy() if copy else self\n        return super(Series, obj).swaplevel(i, j, axis=0)\n\n    def take(self, indices, axis=0, **kwargs) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the elements in the given positional indices along an axis.\n        \"\"\"\n        return super(Series, self).take(indices, axis=axis, **kwargs)\n\n    def to_dict(self, into=dict) -> dict:  # pragma: no cover # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert Series to {label -> value} dict or dict-like object.\n        \"\"\"\n        return self._query_compiler.series_to_dict(into)\n\n    def to_frame(\n        self, name: Hashable = lib.no_default\n    ) -> DataFrame:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Convert Series to {label -> value} dict or dict-like object.\n        \"\"\"\n        from .dataframe import DataFrame\n\n        if name is None:\n            name = lib.no_default\n\n        self_cp = self.copy()\n        if name is not lib.no_default:\n            self_cp.name = name\n\n        return DataFrame(self_cp)\n\n    def to_json(\n        self,\n        path_or_buf=None,\n        orient=None,\n        date_format=None,\n        double_precision=10,\n        force_ascii=True,\n        date_unit=\"ms\",\n        default_handler=None,\n        lines=False,\n        compression=\"infer\",\n        index=None,\n        indent=None,\n        storage_options: StorageOptions = None,\n        mode=\"w\",\n    ) -> str | None:\n        from modin.core.execution.dispatching.factories.dispatcher import (\n            FactoryDispatcher,\n        )\n\n        return FactoryDispatcher.to_json_series(\n            self._query_compiler,\n            path_or_buf,\n            orient=orient,\n            date_format=date_format,\n            double_precision=double_precision,\n            force_ascii=force_ascii,\n            date_unit=date_unit,\n            default_handler=default_handler,\n            lines=lines,\n            compression=compression,\n            index=index,\n            indent=indent,\n            storage_options=storage_options,\n            mode=mode,\n        )\n\n    def to_list(self) -> list:  # noqa: RT01, D200\n        \"\"\"\n        Return a list of the values.\n        \"\"\"\n        return self._query_compiler.to_list()\n\n    def to_numpy(\n        self, dtype=None, copy=False, na_value=lib.no_default, **kwargs\n    ) -> np.ndarray:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the NumPy ndarray representing the values in this Series or Index.\n        \"\"\"\n        from modin.config import ModinNumpy\n\n        if not ModinNumpy.get():\n            return (\n                super(Series, self)\n                .to_numpy(\n                    dtype=dtype,\n                    copy=copy,\n                    na_value=na_value,\n                )\n                .flatten()\n            )\n        else:\n            from ..numpy.arr import array\n\n            return array(self, copy=copy)\n\n    tolist = to_list\n\n    # TODO(williamma12): When we implement to_timestamp, have this call the version\n    # in base.py\n    def to_period(self, freq=None, copy=None) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Cast to PeriodArray/Index at a particular frequency.\n        \"\"\"\n        return self._default_to_pandas(\"to_period\", freq=freq, copy=copy)\n\n    def to_string(\n        self,\n        buf=None,\n        na_rep=\"NaN\",\n        float_format=None,\n        header=True,\n        index=True,\n        length=False,\n        dtype=False,\n        name=False,\n        max_rows=None,\n        min_rows=None,\n    ) -> Union[str, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Render a string representation of the Series.\n        \"\"\"\n        return self._default_to_pandas(\n            pandas.Series.to_string,\n            buf=buf,\n            na_rep=na_rep,\n            float_format=float_format,\n            header=header,\n            index=index,\n            length=length,\n            dtype=dtype,\n            name=name,\n            max_rows=max_rows,\n        )\n\n    # TODO(williamma12): When we implement to_timestamp, have this call the version\n    # in base.py\n    def to_timestamp(\n        self, freq=None, how=\"start\", copy=None\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Cast to DatetimeIndex of Timestamps, at beginning of period.\n        \"\"\"\n        return self._default_to_pandas(\"to_timestamp\", freq=freq, how=how, copy=copy)\n\n    def transpose(self, *args, **kwargs) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return the transpose, which is by definition `self`.\n        \"\"\"\n        return self\n\n    # To enable dynamic backend switching, we must use a `def` so the lookup of `self.transpose`\n    # is performed dynamically, whereas declaring `T = property(transpose)` makes it always use\n    # the originally-defined version without the switching wrapper.\n    @property\n    def T(self) -> Series:\n        return self.transpose()\n\n    def truediv(\n        self, other, level=None, fill_value=None, axis=0\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return floating division of series and `other`, element-wise (binary operator `truediv`).\n        \"\"\"\n        new_self, new_other = self._prepare_inter_op(other)\n        return super(Series, new_self).truediv(\n            new_other, level=level, fill_value=fill_value, axis=axis\n        )\n\n    div = divide = truediv\n\n    def unique(self) -> ArrayLike:  # noqa: RT01, D200\n        \"\"\"\n        Return unique values of Series object.\n        \"\"\"\n        # `values` can't be used here because it performs unnecessary conversion,\n        # after which the result type does not match the pandas\n        return (\n            self.__constructor__(query_compiler=self._query_compiler.unique())\n            .modin.to_pandas()\n            ._values\n        )\n\n    def update(self, other) -> None:  # noqa: PR01, D200\n        \"\"\"\n        Modify Series in place using values from passed Series.\n        \"\"\"\n        if not isinstance(other, Series):\n            other = self.__constructor__(other)\n        query_compiler = self._query_compiler.series_update(other._query_compiler)\n        self._update_inplace(new_query_compiler=query_compiler)\n\n    def value_counts(\n        self, normalize=False, sort=True, ascending=False, bins=None, dropna=True\n    ) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return a Series containing counts of unique values.\n        \"\"\"\n        if bins is not None:\n            # Potentially we could implement `cut` function from pandas API, which\n            # bins values into intervals, and then we can just count them as regular values.\n            # TODO #1333: new_self = self.__constructor__(pd.cut(self, bins, include_lowest=True), dtype=\"interval\")\n            return self._default_to_pandas(\n                pandas.Series.value_counts,\n                normalize=normalize,\n                sort=sort,\n                ascending=ascending,\n                bins=bins,\n                dropna=dropna,\n            )\n        counted_values = super(Series, self).value_counts(\n            subset=self,\n            normalize=normalize,\n            sort=sort,\n            ascending=ascending,\n            dropna=dropna,\n        )\n        return counted_values\n\n    def view(self, dtype=None) -> Series:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Create a new view of the Series.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.series_view(dtype=dtype)\n        )\n\n    def where(\n        self,\n        cond,\n        other=np.nan,\n        *,\n        inplace=False,\n        axis=None,\n        level=None,\n    ) -> Union[Series, None]:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Replace values where the condition is False.\n        \"\"\"\n        # TODO: probably need to remove this conversion to pandas\n        if isinstance(other, Series):\n            other = to_pandas(other)\n        # TODO: add error checking like for dataframe where, then forward to\n        # same query compiler method\n        return self._default_to_pandas(\n            pandas.Series.where,\n            cond,\n            other=other,\n            inplace=inplace,\n            axis=axis,\n            level=level,\n        )\n\n    @property\n    def attrs(self) -> dict:  # noqa: RT01, D200\n        \"\"\"\n        Return dictionary of global attributes of this dataset.\n        \"\"\"\n\n        def attrs(df):\n            return df.attrs\n\n        return self._default_to_pandas(attrs)\n\n    @property\n    def array(self) -> ExtensionArray:  # noqa: RT01, D200\n        \"\"\"\n        Return the ExtensionArray of the data backing this Series or Index.\n        \"\"\"\n\n        def array(df):\n            return df.array\n\n        return self._default_to_pandas(array)\n\n    @property\n    def axes(self) -> list[pandas.Index]:  # noqa: RT01, D200\n        \"\"\"\n        Return a list of the row axis labels.\n        \"\"\"\n        return [self.index]\n\n    @property\n    def dtype(self) -> DtypeObj:  # noqa: RT01, D200\n        \"\"\"\n        Return the dtype object of the underlying data.\n        \"\"\"\n        return self._query_compiler.dtypes.squeeze()\n\n    dtypes = dtype\n\n    @property\n    def empty(self) -> bool:  # noqa: RT01, D200\n        \"\"\"\n        Indicate whether Series is empty.\n        \"\"\"\n        return len(self) == 0\n\n    @property\n    def hasnans(self) -> bool:  # noqa: RT01, D200\n        \"\"\"\n        Return True if Series has any nans.\n        \"\"\"\n        return self.isna().sum() > 0\n\n    @property\n    def is_monotonic_increasing(self) -> bool:  # noqa: RT01, D200\n        \"\"\"\n        Return True if values in the Series are monotonic_increasing.\n        \"\"\"\n        return self._reduce_dimension(self._query_compiler.is_monotonic_increasing())\n\n    @property\n    def is_monotonic_decreasing(self) -> bool:  # noqa: RT01, D200\n        \"\"\"\n        Return True if values in the Series are monotonic_decreasing.\n        \"\"\"\n        return self._reduce_dimension(self._query_compiler.is_monotonic_decreasing())\n\n    @property\n    def is_unique(self) -> bool:  # noqa: RT01, D200\n        \"\"\"\n        Return True if values in the Series are unique.\n        \"\"\"\n        return self.nunique(dropna=False) == len(self)\n\n    @property\n    def nbytes(self) -> int:  # noqa: RT01, D200\n        \"\"\"\n        Return the number of bytes in the underlying data.\n        \"\"\"\n        return self.memory_usage(index=False)\n\n    @property\n    def ndim(self) -> int:  # noqa: RT01, D200\n        \"\"\"\n        Return the number of dimensions of the underlying data, by definition 1.\n        \"\"\"\n        return 1\n\n    def nunique(self, dropna=True) -> int:  # noqa: PR01, RT01, D200\n        \"\"\"\n        Return number of unique elements in the object.\n        \"\"\"\n        return super(Series, self).nunique(dropna=dropna)\n\n    @property\n    def shape(self) -> tuple[int]:  # noqa: RT01, D200\n        \"\"\"\n        Return a tuple of the shape of the underlying data.\n        \"\"\"\n        return (len(self),)\n\n    def reindex_like(\n        self,\n        other,\n        method=None,\n        copy: Optional[bool] = None,\n        limit=None,\n        tolerance=None,\n    ) -> Series:\n        # docs say \"Same as calling .reindex(index=other.index, columns=other.columns,...).\":\n        # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.Series.reindex_like.html\n        return self.reindex(\n            index=other.index,\n            method=method,\n            copy=copy,\n            limit=limit,\n            tolerance=tolerance,\n        )\n\n    def _to_pandas(self) -> pandas.Series:\n        \"\"\"\n        Convert Modin Series to pandas Series.\n\n        Recommended conversion method: `series.modin.to_pandas()`.\n\n        Returns\n        -------\n        pandas.Series\n        \"\"\"\n        df = self._query_compiler.to_pandas()\n        series = df[df.columns[0]]\n        if self._query_compiler.columns[0] == MODIN_UNNAMED_SERIES_LABEL:\n            series.name = None\n        return series\n\n    def _to_datetime(self, **kwargs) -> Series:\n        \"\"\"\n        Convert `self` to datetime.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Optional arguments to use during query compiler's\n            `to_datetime` invocation.\n\n        Returns\n        -------\n        datetime\n            Series of datetime64 dtype.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.to_datetime(**kwargs)\n        )\n\n    def _to_numeric(self, **kwargs) -> Series:\n        \"\"\"\n        Convert `self` to numeric.\n\n        Parameters\n        ----------\n        **kwargs : dict\n            Optional arguments to use during query compiler's\n            `to_numeric` invocation.\n\n        Returns\n        -------\n        numeric\n            Series of numeric dtype.\n        \"\"\"\n        return self.__constructor__(\n            query_compiler=self._query_compiler.to_numeric(**kwargs)\n        )\n\n    def _qcut(self, q, **kwargs):  # noqa: PR01, RT01, D200\n        \"\"\"\n        Quantile-based discretization function.\n        \"\"\"\n        return self._default_to_pandas(pandas.qcut, q, **kwargs)\n\n    def _reduce_dimension(self, query_compiler) -> Series | Scalar:\n        \"\"\"\n        Try to reduce the dimension of data from the `query_compiler`.\n\n        Parameters\n        ----------\n        query_compiler : BaseQueryCompiler\n            Query compiler to retrieve the data.\n\n        Returns\n        -------\n        pandas.Series or scalar.\n        \"\"\"\n        return query_compiler.to_pandas().squeeze()\n\n    def _validate_dtypes_prod_mean(\n        self, axis, numeric_only, ignore_axis=False\n    ) -> Series:\n        \"\"\"\n        Validate data dtype for `prod` and `mean` methods.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to validate over.\n        numeric_only : bool\n            Whether or not to allow only numeric data.\n            If True and non-numeric data is found, exception\n            will be raised.\n        ignore_axis : bool, default: False\n            Whether or not to ignore `axis` parameter.\n\n        Returns\n        -------\n        Series\n\n        Notes\n        -----\n        Actually returns unmodified `self` object,\n        added for compatibility with Modin DataFrame.\n        \"\"\"\n        return self\n\n    def _validate_dtypes_min_max(self, axis, numeric_only) -> Series:\n        \"\"\"\n        Validate data dtype for `min` and `max` methods.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to validate over.\n        numeric_only : bool\n            Whether or not to allow only numeric data.\n            If True and non-numeric data is found, exception.\n\n        Returns\n        -------\n        Series\n\n        Notes\n        -----\n        Actually returns unmodified `self` object,\n        added for compatibility with Modin DataFrame.\n        \"\"\"\n        return self\n\n    def _validate_dtypes(self, numeric_only=False) -> None:\n        \"\"\"\n        Check that all the dtypes are the same.\n\n        Parameters\n        ----------\n        numeric_only : bool, default: False\n            Whether or not to allow only numeric data.\n            If True and non-numeric data is found, exception\n            will be raised.\n\n        Notes\n        -----\n        Actually does nothing, added for compatibility with Modin DataFrame.\n        \"\"\"\n        pass\n\n    def _get_numeric_data(self, axis: int) -> Series:\n        \"\"\"\n        Grab only numeric data from Series.\n\n        Parameters\n        ----------\n        axis : {0, 1}\n            Axis to inspect on having numeric types only.\n\n        Returns\n        -------\n        Series\n\n        Notes\n        -----\n        `numeric_only` parameter is not supported by Series, so this method\n        does not do anything. The method is added for compatibility with Modin DataFrame.\n        \"\"\"\n        return self\n\n    def _update_inplace(self, new_query_compiler) -> None:\n        \"\"\"\n        Update the current Series in-place using `new_query_compiler`.\n\n        Parameters\n        ----------\n        new_query_compiler : BaseQueryCompiler\n            QueryCompiler to use to manage the data.\n        \"\"\"\n        super(Series, self)._update_inplace(new_query_compiler=new_query_compiler)\n        # Propagate changes back to parent so that column in dataframe had the same contents\n        if self._parent is not None:\n            if self._parent_axis == 0:\n                self._parent.loc[self.name] = self\n            else:\n                self._parent[self.name] = self\n\n    def _create_or_update_from_compiler(\n        self, new_query_compiler, inplace=False\n    ) -> Union[Series, None]:\n        \"\"\"\n        Return or update a Series with given `new_query_compiler`.\n\n        Parameters\n        ----------\n        new_query_compiler : PandasQueryCompiler\n            QueryCompiler to use to manage the data.\n        inplace : bool, default: False\n            Whether or not to perform update or creation inplace.\n\n        Returns\n        -------\n        Series or None\n            None if update was done, Series otherwise.\n        \"\"\"\n        assert (\n            isinstance(new_query_compiler, type(self._query_compiler))\n            or type(new_query_compiler) in self._query_compiler.__class__.__bases__\n        ), \"Invalid Query Compiler object: {}\".format(type(new_query_compiler))\n        if not inplace and new_query_compiler.is_series_like():\n            return self.__constructor__(query_compiler=new_query_compiler)\n        elif not inplace:\n            # This can happen with things like `reset_index` where we can add columns.\n            from .dataframe import DataFrame\n\n            return DataFrame(query_compiler=new_query_compiler)\n        else:\n            self._update_inplace(new_query_compiler=new_query_compiler)\n\n    def _prepare_inter_op(self, other) -> tuple[Series, Series]:\n        \"\"\"\n        Prepare `self` and `other` for further interaction.\n\n        Parameters\n        ----------\n        other : Series or scalar value\n            Another object `self` should interact with.\n\n        Returns\n        -------\n        Series\n            Prepared `self`.\n        Series\n            Prepared `other`.\n        \"\"\"\n        if isinstance(other, Series):\n            names_different = self.name != other.name\n            # NB: if we don't need a rename, do the interaction with shallow\n            # copies so that we preserve obj.index._id. It's fine to work\n            # with shallow copies because we'll discard the copies but keep\n            # the result after the interaction opreation. We can't do a rename\n            # on shallow copies because we'll mutate the original objects.\n            new_self = self.copy(deep=names_different)\n            new_other = other.copy(deep=names_different)\n            if names_different:\n                new_self.name = new_other.name = MODIN_UNNAMED_SERIES_LABEL\n        else:\n            new_self = self\n            new_other = other\n        return new_self, new_other\n\n    def _getitem(self, key) -> Union[Series, Scalar]:\n        \"\"\"\n        Get the data specified by `key` for this Series.\n\n        Parameters\n        ----------\n        key : Any\n            Column id to retrieve from Series.\n\n        Returns\n        -------\n        Series or scalar\n            Retrieved data.\n        \"\"\"\n        key = apply_if_callable(key, self)\n        if isinstance(key, Series) and key.dtype == np.bool_:\n            # This ends up being significantly faster than looping through and getting\n            # each item individually.\n            key = key._to_pandas()\n        if is_bool_indexer(key):\n            return self.__constructor__(\n                query_compiler=self._query_compiler.getitem_row_array(\n                    pandas.RangeIndex(len(self))[key]\n                )\n            )\n        # TODO: More efficiently handle `tuple` case for `Series.__getitem__`\n        if isinstance(key, tuple):\n            return self._default_to_pandas(pandas.Series.__getitem__, key)\n\n        if not is_list_like(key):\n            reduce_dimension = True\n            key = [key]\n        else:\n            reduce_dimension = False\n        # The check for whether or not `key` is in `keys()` will throw a TypeError\n        # if the object is not hashable. When that happens, we just assume the\n        # key is a list-like of row positions.\n        try:\n            is_indexer = all(k in self.keys() for k in key)\n        except TypeError:\n            is_indexer = False\n        row_positions = self.index.get_indexer_for(key) if is_indexer else key\n        if not all(is_integer(x) for x in row_positions):\n            raise KeyError(key[0] if reduce_dimension else key)\n        result = self._query_compiler.getitem_row_array(row_positions)\n\n        if reduce_dimension:\n            return self._reduce_dimension(result)\n        return self.__constructor__(query_compiler=result)\n\n    def _repartition(self) -> Series:\n        \"\"\"\n        Repartitioning Series to get ideal partitions inside.\n\n        Allows to improve performance where the query compiler can't improve\n        yet by doing implicit repartitioning.\n\n        Returns\n        -------\n        Series\n            The repartitioned Series.\n        \"\"\"\n        return super()._repartition(axis=0)\n\n    # Persistance support methods - BEGIN\n    @classmethod\n    def _inflate_light(cls, query_compiler, name, source_pid) -> Series:\n        \"\"\"\n        Re-creates the object from previously-serialized lightweight representation.\n\n        The method is used for faster but not disk-storable persistence.\n\n        Parameters\n        ----------\n        query_compiler : BaseQueryCompiler\n            Query compiler to use for object re-creation.\n        name : str\n            The name to give to the new object.\n        source_pid : int\n            Determines whether a Modin or pandas object needs to be created.\n            Modin objects are created only on the main process.\n\n        Returns\n        -------\n        Series\n            New Series based on the `query_compiler`.\n        \"\"\"\n        if os.getpid() != source_pid:\n            res = query_compiler.to_pandas()\n            # at the query compiler layer, `to_pandas` always returns a DataFrame,\n            # even if it stores a Series, as a single-column DataFrame\n            if res.columns == [MODIN_UNNAMED_SERIES_LABEL]:\n                res = res.squeeze(axis=1)\n                res.name = None\n            return res\n        # The current logic does not involve creating Modin objects\n        # and manipulation with them in worker processes\n        return cls(query_compiler=query_compiler, name=name)\n\n    @classmethod\n    def _inflate_full(cls, pandas_series, source_pid) -> Series:\n        \"\"\"\n        Re-creates the object from previously-serialized disk-storable representation.\n\n        Parameters\n        ----------\n        pandas_series : pandas.Series\n            Data to use for object re-creation.\n        source_pid : int\n            Determines whether a Modin or pandas object needs to be created.\n            Modin objects are created only on the main process.\n\n        Returns\n        -------\n        Series\n            New Series based on the `pandas_series`.\n        \"\"\"\n        if os.getpid() != source_pid:\n            return pandas_series\n        # The current logic does not involve creating Modin objects\n        # and manipulation with them in worker processes\n        return cls(data=pandas_series)\n\n    def __reduce__(self):\n        self._query_compiler.finalize()\n        pid = os.getpid()\n        if (\n            PersistentPickle.get()\n            or not self._query_compiler.support_materialization_in_worker_process()\n        ):\n            return self._inflate_full, (self._to_pandas(), pid)\n        return self._inflate_light, (self._query_compiler, self.name, pid)\n\n    # Persistance support methods - END\n\n    @doc(SET_BACKEND_DOC, class_name=__qualname__)\n    def set_backend(\n        self,\n        backend: str,\n        inplace: bool = False,\n        *,\n        switch_operation: Optional[str] = None,\n    ) -> Optional[Self]:\n        # A series which is moved, potentially without its parent needs to\n        # have it's parent reset. This is aligned with CoW chained assigment\n        # semantics as well, but it is a little different from existing modin\n        # semantics. This is why we only do this for hybrid and inplace\n        # modification.\n        if (\n            inplace\n            and self._parent is not None\n            and backend != self._parent.get_backend()\n        ):\n            self._parent = None\n        return super().set_backend(\n            backend=backend, inplace=inplace, switch_operation=switch_operation\n        )\n\n    move_to = set_backend\n\n    @doc(GET_BACKEND_DOC, class_name=__qualname__)\n    @disable_logging\n    def get_backend(self) -> str:\n        return super().get_backend()\n\n    @disable_logging\n    @_inherit_docstrings(BasePandasDataset._copy_into)\n    def _copy_into(self, other: Series):\n        other._query_compiler = self._query_compiler\n        other._siblings = self._siblings\n        return None\n"
  },
  {
    "path": "modin/pandas/series_utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nImplement Series's accessors public API as pandas does.\n\nAccessors: `Series.cat`, `Series.str`, `Series.dt`\n\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom functools import cached_property\nfrom typing import TYPE_CHECKING\n\nimport numpy as np\nimport pandas\nfrom pandas._libs import lib\n\nfrom modin.logging import ClassLogger\nfrom modin.utils import _inherit_docstrings\n\nif TYPE_CHECKING:\n    from datetime import tzinfo\n\n    from pandas._typing import npt\n\n    from modin.core.storage_formats import BaseQueryCompiler\n    from modin.pandas import Series\n\n\n@_inherit_docstrings(pandas.core.arrays.arrow.ListAccessor)\nclass ListAccessor(ClassLogger):\n    _series: Series\n    _query_compiler: BaseQueryCompiler\n\n    def __init__(self, data: Series = None):\n        self._series = data\n        self._query_compiler = data._query_compiler\n\n    @cached_property\n    def _Series(self) -> Series:  # noqa: GL08\n        # to avoid cyclic import\n        from .series import Series\n\n        return Series\n\n    def flatten(self):\n        return self._Series(query_compiler=self._query_compiler.list_flatten())\n\n    def len(self):\n        return self._Series(query_compiler=self._query_compiler.list_len())\n\n    def __getitem__(self, key):\n        return self._Series(\n            query_compiler=self._query_compiler.list__getitem__(key=key)\n        )\n\n\n@_inherit_docstrings(pandas.core.arrays.arrow.StructAccessor)\nclass StructAccessor(ClassLogger):\n    _series: Series\n    _query_compiler: BaseQueryCompiler\n\n    def __init__(self, data: Series = None):\n        self._series = data\n        self._query_compiler = data._query_compiler\n\n    @cached_property\n    def _Series(self) -> Series:  # noqa: GL08\n        # to avoid cyclic import\n        from modin.pandas.series import Series\n\n        return Series\n\n    @property\n    def dtypes(self):\n        return self._Series(query_compiler=self._query_compiler.struct_dtypes())\n\n    def field(self, name_or_index):\n        return self._Series(\n            query_compiler=self._query_compiler.struct_field(\n                name_or_index=name_or_index\n            )\n        )\n\n    def explode(self):\n        from modin.pandas.dataframe import DataFrame\n\n        return DataFrame(query_compiler=self._query_compiler.struct_explode())\n\n\n@_inherit_docstrings(pandas.core.arrays.categorical.CategoricalAccessor)\nclass CategoryMethods(ClassLogger):\n    _series: Series\n    _query_compiler: BaseQueryCompiler\n\n    def __init__(self, data: Series):\n        self._series = data\n        self._query_compiler = data._query_compiler\n\n    @cached_property\n    def _Series(self) -> Series:  # noqa: GL08\n        # to avoid cyclic import\n        from modin.pandas.series import Series\n\n        return Series\n\n    @property\n    def categories(self):\n        return self._series.dtype.categories\n\n    @categories.setter\n    def categories(self, categories):\n        def set_categories(series, categories):\n            series.cat.categories = categories\n\n        self._series._default_to_pandas(set_categories, categories=categories)\n\n    @property\n    def ordered(self):\n        return self._series.dtype.ordered\n\n    @property\n    def codes(self):\n        return self._Series(query_compiler=self._query_compiler.cat_codes())\n\n    def rename_categories(self, new_categories):\n        return self._default_to_pandas(\n            pandas.Series.cat.rename_categories, new_categories\n        )\n\n    def reorder_categories(self, new_categories, ordered=None):\n        return self._default_to_pandas(\n            pandas.Series.cat.reorder_categories,\n            new_categories,\n            ordered=ordered,\n        )\n\n    def add_categories(self, new_categories):\n        return self._default_to_pandas(pandas.Series.cat.add_categories, new_categories)\n\n    def remove_categories(self, removals):\n        return self._default_to_pandas(pandas.Series.cat.remove_categories, removals)\n\n    def remove_unused_categories(self):\n        return self._default_to_pandas(pandas.Series.cat.remove_unused_categories)\n\n    def set_categories(self, new_categories, ordered=None, rename=False):\n        return self._default_to_pandas(\n            pandas.Series.cat.set_categories,\n            new_categories,\n            ordered=ordered,\n            rename=rename,\n        )\n\n    def as_ordered(self):\n        return self._default_to_pandas(pandas.Series.cat.as_ordered)\n\n    def as_unordered(self):\n        return self._default_to_pandas(pandas.Series.cat.as_unordered)\n\n    def _default_to_pandas(self, op, *args, **kwargs):\n        \"\"\"\n        Convert `self` to pandas type and call a pandas cat.`op` on it.\n\n        Parameters\n        ----------\n        op : str\n            Name of pandas function.\n        *args : list\n            Additional positional arguments to be passed in `op`.\n        **kwargs : dict\n            Additional keywords arguments to be passed in `op`.\n\n        Returns\n        -------\n        object\n            Result of operation.\n        \"\"\"\n        return self._series._default_to_pandas(\n            lambda series: op(series.cat, *args, **kwargs)\n        )\n\n\n@_inherit_docstrings(pandas.core.strings.accessor.StringMethods)\nclass StringMethods(ClassLogger):\n    _series: Series\n    _query_compiler: BaseQueryCompiler\n\n    def __init__(self, data: Series):\n        # Check if dtypes is objects\n\n        self._series = data\n        self._query_compiler = data._query_compiler\n\n    @cached_property\n    def _Series(self) -> Series:  # noqa: GL08\n        # to avoid cyclic import\n        from .series import Series\n\n        return Series\n\n    def casefold(self):\n        return self._Series(query_compiler=self._query_compiler.str_casefold())\n\n    def cat(self, others=None, sep=None, na_rep=None, join=\"left\"):\n        if isinstance(others, self._Series):\n            others = others._to_pandas()\n        compiler_result = self._query_compiler.str_cat(\n            others=others, sep=sep, na_rep=na_rep, join=join\n        )\n        # if others is None, result is a string. otherwise, it's a series.\n        return (\n            compiler_result.to_pandas().squeeze()\n            if others is None\n            else self._Series(query_compiler=compiler_result)\n        )\n\n    def decode(self, encoding, errors=\"strict\", dtype=None):\n        return self._Series(\n            query_compiler=self._query_compiler.str_decode(encoding, errors, dtype)\n        )\n\n    def split(self, pat=None, *, n=-1, expand=False, regex=None):\n        if expand:\n            from .dataframe import DataFrame\n\n            return DataFrame(\n                query_compiler=self._query_compiler.str_split(\n                    pat=pat, n=n, expand=True, regex=regex\n                )\n            )\n        else:\n            return self._Series(\n                query_compiler=self._query_compiler.str_split(\n                    pat=pat, n=n, expand=expand, regex=regex\n                )\n            )\n\n    def rsplit(self, pat=None, *, n=-1, expand=False):\n        if not pat and pat is not None:\n            raise ValueError(\"rsplit() requires a non-empty pattern match.\")\n\n        if expand:\n            from .dataframe import DataFrame\n\n            return DataFrame(\n                query_compiler=self._query_compiler.str_rsplit(\n                    pat=pat, n=n, expand=True\n                )\n            )\n        else:\n            return self._Series(\n                query_compiler=self._query_compiler.str_rsplit(\n                    pat=pat, n=n, expand=expand\n                )\n            )\n\n    def get(self, i):\n        return self._Series(query_compiler=self._query_compiler.str_get(i))\n\n    def join(self, sep):\n        if sep is None:\n            raise AttributeError(\"'NoneType' object has no attribute 'join'\")\n        return self._Series(query_compiler=self._query_compiler.str_join(sep))\n\n    def get_dummies(self, sep=\"|\"):\n        return self._Series(query_compiler=self._query_compiler.str_get_dummies(sep))\n\n    def contains(self, pat, case=True, flags=0, na=lib.no_default, regex=True):\n        if pat is None and not case:\n            raise AttributeError(\"'NoneType' object has no attribute 'upper'\")\n        if na is lib.no_default:\n            na = None\n        return self._Series(\n            query_compiler=self._query_compiler.str_contains(\n                pat, case=case, flags=flags, na=na, regex=regex\n            )\n        )\n\n    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=False):\n        if not (isinstance(repl, str) or callable(repl)):\n            raise TypeError(\"repl must be a string or callable\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_replace(\n                pat, repl, n=n, case=case, flags=flags, regex=regex\n            )\n        )\n\n    def pad(self, width, side=\"left\", fillchar=\" \"):\n        if len(fillchar) != 1:\n            raise TypeError(\"fillchar must be a character, not str\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_pad(\n                width, side=side, fillchar=fillchar\n            )\n        )\n\n    def center(self, width, fillchar=\" \"):\n        if len(fillchar) != 1:\n            raise TypeError(\"fillchar must be a character, not str\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_center(width, fillchar=fillchar)\n        )\n\n    def ljust(self, width, fillchar=\" \"):\n        if len(fillchar) != 1:\n            raise TypeError(\"fillchar must be a character, not str\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_ljust(width, fillchar=fillchar)\n        )\n\n    def rjust(self, width, fillchar=\" \"):\n        if len(fillchar) != 1:\n            raise TypeError(\"fillchar must be a character, not str\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_rjust(width, fillchar=fillchar)\n        )\n\n    def zfill(self, width):\n        return self._Series(query_compiler=self._query_compiler.str_zfill(width))\n\n    def wrap(self, width, **kwargs):\n        if width <= 0:\n            raise ValueError(\"invalid width {} (must be > 0)\".format(width))\n        return self._Series(\n            query_compiler=self._query_compiler.str_wrap(width, **kwargs)\n        )\n\n    def slice(self, start=None, stop=None, step=None):\n        if step == 0:\n            raise ValueError(\"slice step cannot be zero\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_slice(\n                start=start, stop=stop, step=step\n            )\n        )\n\n    def slice_replace(self, start=None, stop=None, repl=None):\n        return self._Series(\n            query_compiler=self._query_compiler.str_slice_replace(\n                start=start, stop=stop, repl=repl\n            )\n        )\n\n    def count(self, pat, flags=0):\n        if not isinstance(pat, (str, re.Pattern)):\n            raise TypeError(\"first argument must be string or compiled pattern\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_count(pat, flags=flags)\n        )\n\n    def startswith(self, pat, na=lib.no_default):\n        if na is lib.no_default:\n            na = None\n        return self._Series(\n            query_compiler=self._query_compiler.str_startswith(pat, na=na)\n        )\n\n    def encode(self, encoding, errors=\"strict\"):\n        return self._Series(\n            query_compiler=self._query_compiler.str_encode(encoding, errors)\n        )\n\n    def endswith(self, pat, na=lib.no_default):\n        if na is lib.no_default:\n            na = None\n        return self._Series(\n            query_compiler=self._query_compiler.str_endswith(pat, na=na)\n        )\n\n    def findall(self, pat, flags=0):\n        if not isinstance(pat, (str, re.Pattern)):\n            raise TypeError(\"first argument must be string or compiled pattern\")\n        return self._Series(\n            query_compiler=self._query_compiler.str_findall(pat, flags=flags)\n        )\n\n    def fullmatch(self, pat, case=True, flags=0, na=lib.no_default):\n        if not isinstance(pat, (str, re.Pattern)):\n            raise TypeError(\"first argument must be string or compiled pattern\")\n        if na is lib.no_default:\n            na = None\n        return self._Series(\n            query_compiler=self._query_compiler.str_fullmatch(\n                pat, case=case, flags=flags, na=na\n            )\n        )\n\n    def match(self, pat, case=True, flags=0, na=lib.no_default):\n        if not isinstance(pat, (str, re.Pattern)):\n            raise TypeError(\"first argument must be string or compiled pattern\")\n        if na is lib.no_default:\n            na = None\n        return self._Series(\n            query_compiler=self._query_compiler.str_match(\n                pat, case=case, flags=flags, na=na\n            )\n        )\n\n    def extract(self, pat, flags=0, expand=True):\n        query_compiler = self._query_compiler.str_extract(\n            pat, flags=flags, expand=expand\n        )\n        from .dataframe import DataFrame\n\n        return (\n            DataFrame(query_compiler=query_compiler)\n            if expand or re.compile(pat).groups > 1\n            else self._Series(query_compiler=query_compiler)\n        )\n\n    def extractall(self, pat, flags=0):\n        return self._Series(\n            query_compiler=self._query_compiler.str_extractall(pat, flags)\n        )\n\n    def len(self):\n        return self._Series(query_compiler=self._query_compiler.str_len())\n\n    def strip(self, to_strip=None):\n        return self._Series(\n            query_compiler=self._query_compiler.str_strip(to_strip=to_strip)\n        )\n\n    def rstrip(self, to_strip=None):\n        return self._Series(\n            query_compiler=self._query_compiler.str_rstrip(to_strip=to_strip)\n        )\n\n    def lstrip(self, to_strip=None):\n        return self._Series(\n            query_compiler=self._query_compiler.str_lstrip(to_strip=to_strip)\n        )\n\n    def partition(self, sep=\" \", expand=True):\n        if sep is not None and len(sep) == 0:\n            raise ValueError(\"empty separator\")\n\n        from .dataframe import DataFrame\n\n        return (DataFrame if expand else self._Series)(\n            query_compiler=self._query_compiler.str_partition(sep=sep, expand=expand)\n        )\n\n    def removeprefix(self, prefix):\n        return self._Series(\n            query_compiler=self._query_compiler.str_removeprefix(prefix)\n        )\n\n    def removesuffix(self, suffix):\n        return self._Series(\n            query_compiler=self._query_compiler.str_removesuffix(suffix)\n        )\n\n    def repeat(self, repeats):\n        return self._Series(query_compiler=self._query_compiler.str_repeat(repeats))\n\n    def rpartition(self, sep=\" \", expand=True):\n        if sep is not None and len(sep) == 0:\n            raise ValueError(\"empty separator\")\n\n        from .dataframe import DataFrame\n\n        return (DataFrame if expand else self._Series)(\n            query_compiler=self._query_compiler.str_rpartition(sep=sep, expand=expand)\n        )\n\n    def lower(self):\n        return self._Series(query_compiler=self._query_compiler.str_lower())\n\n    def upper(self):\n        return self._Series(query_compiler=self._query_compiler.str_upper())\n\n    def title(self):\n        return self._Series(query_compiler=self._query_compiler.str_title())\n\n    def find(self, sub, start=0, end=None):\n        if not isinstance(sub, str):\n            raise TypeError(\n                \"expected a string object, not {0}\".format(type(sub).__name__)\n            )\n        return self._Series(\n            query_compiler=self._query_compiler.str_find(sub, start=start, end=end)\n        )\n\n    def rfind(self, sub, start=0, end=None):\n        if not isinstance(sub, str):\n            raise TypeError(\n                \"expected a string object, not {0}\".format(type(sub).__name__)\n            )\n        return self._Series(\n            query_compiler=self._query_compiler.str_rfind(sub, start=start, end=end)\n        )\n\n    def index(self, sub, start=0, end=None):\n        if not isinstance(sub, str):\n            raise TypeError(\n                \"expected a string object, not {0}\".format(type(sub).__name__)\n            )\n        return self._Series(\n            query_compiler=self._query_compiler.str_index(sub, start=start, end=end)\n        )\n\n    def rindex(self, sub, start=0, end=None):\n        if not isinstance(sub, str):\n            raise TypeError(\n                \"expected a string object, not {0}\".format(type(sub).__name__)\n            )\n        return self._Series(\n            query_compiler=self._query_compiler.str_rindex(sub, start=start, end=end)\n        )\n\n    def capitalize(self):\n        return self._Series(query_compiler=self._query_compiler.str_capitalize())\n\n    def swapcase(self):\n        return self._Series(query_compiler=self._query_compiler.str_swapcase())\n\n    def normalize(self, form):\n        return self._Series(query_compiler=self._query_compiler.str_normalize(form))\n\n    def translate(self, table):\n        return self._Series(query_compiler=self._query_compiler.str_translate(table))\n\n    def isalnum(self):\n        return self._Series(query_compiler=self._query_compiler.str_isalnum())\n\n    def isalpha(self):\n        return self._Series(query_compiler=self._query_compiler.str_isalpha())\n\n    def isdigit(self):\n        return self._Series(query_compiler=self._query_compiler.str_isdigit())\n\n    def isspace(self):\n        return self._Series(query_compiler=self._query_compiler.str_isspace())\n\n    def islower(self):\n        return self._Series(query_compiler=self._query_compiler.str_islower())\n\n    def isupper(self):\n        return self._Series(query_compiler=self._query_compiler.str_isupper())\n\n    def istitle(self):\n        return self._Series(query_compiler=self._query_compiler.str_istitle())\n\n    def isnumeric(self):\n        return self._Series(query_compiler=self._query_compiler.str_isnumeric())\n\n    def isdecimal(self):\n        return self._Series(query_compiler=self._query_compiler.str_isdecimal())\n\n    def __getitem__(self, key):  # noqa: GL08\n        return self._Series(query_compiler=self._query_compiler.str___getitem__(key))\n\n    def _default_to_pandas(self, op, *args, **kwargs):\n        \"\"\"\n        Convert `self` to pandas type and call a pandas str.`op` on it.\n\n        Parameters\n        ----------\n        op : str\n            Name of pandas function.\n        *args : list\n            Additional positional arguments to be passed in `op`.\n        **kwargs : dict\n            Additional keywords arguments to be passed in `op`.\n\n        Returns\n        -------\n        object\n            Result of operation.\n        \"\"\"\n        return self._series._default_to_pandas(\n            lambda series: op(series.str, *args, **kwargs)\n        )\n\n\n@_inherit_docstrings(pandas.core.indexes.accessors.CombinedDatetimelikeProperties)\nclass DatetimeProperties(ClassLogger):  # noqa: GL08\n    _series: Series\n    _query_compiler: BaseQueryCompiler\n\n    def __init__(self, data: Series):\n        self._series = data\n        self._query_compiler = data._query_compiler\n\n    @cached_property\n    def _Series(self) -> Series:  # noqa: GL08\n        # to avoid cyclic import\n        from .series import Series\n\n        return Series\n\n    @property\n    def date(self):\n        return self._Series(query_compiler=self._query_compiler.dt_date())\n\n    @property\n    def time(self):\n        return self._Series(query_compiler=self._query_compiler.dt_time())\n\n    @property\n    def timetz(self):\n        return self._Series(query_compiler=self._query_compiler.dt_timetz())\n\n    @property\n    def year(self):\n        return self._Series(query_compiler=self._query_compiler.dt_year())\n\n    @property\n    def month(self):\n        return self._Series(query_compiler=self._query_compiler.dt_month())\n\n    @property\n    def day(self):\n        return self._Series(query_compiler=self._query_compiler.dt_day())\n\n    @property\n    def hour(self):\n        return self._Series(query_compiler=self._query_compiler.dt_hour())\n\n    @property\n    def minute(self):\n        return self._Series(query_compiler=self._query_compiler.dt_minute())\n\n    @property\n    def second(self):\n        return self._Series(query_compiler=self._query_compiler.dt_second())\n\n    @property\n    def microsecond(self):\n        return self._Series(query_compiler=self._query_compiler.dt_microsecond())\n\n    @property\n    def nanosecond(self):\n        return self._Series(query_compiler=self._query_compiler.dt_nanosecond())\n\n    @property\n    def dayofweek(self):\n        return self._Series(query_compiler=self._query_compiler.dt_dayofweek())\n\n    day_of_week = dayofweek\n\n    @property\n    def weekday(self):\n        return self._Series(query_compiler=self._query_compiler.dt_weekday())\n\n    @property\n    def dayofyear(self):\n        return self._Series(query_compiler=self._query_compiler.dt_dayofyear())\n\n    day_of_year = dayofyear\n\n    @property\n    def quarter(self):\n        return self._Series(query_compiler=self._query_compiler.dt_quarter())\n\n    @property\n    def is_month_start(self):\n        return self._Series(query_compiler=self._query_compiler.dt_is_month_start())\n\n    @property\n    def is_month_end(self):\n        return self._Series(query_compiler=self._query_compiler.dt_is_month_end())\n\n    @property\n    def is_quarter_start(self):\n        return self._Series(query_compiler=self._query_compiler.dt_is_quarter_start())\n\n    @property\n    def is_quarter_end(self):\n        return self._Series(query_compiler=self._query_compiler.dt_is_quarter_end())\n\n    @property\n    def is_year_start(self):\n        return self._Series(query_compiler=self._query_compiler.dt_is_year_start())\n\n    @property\n    def is_year_end(self):\n        return self._Series(query_compiler=self._query_compiler.dt_is_year_end())\n\n    @property\n    def is_leap_year(self):\n        return self._Series(query_compiler=self._query_compiler.dt_is_leap_year())\n\n    @property\n    def daysinmonth(self):\n        return self._Series(query_compiler=self._query_compiler.dt_daysinmonth())\n\n    @property\n    def days_in_month(self):\n        return self._Series(query_compiler=self._query_compiler.dt_days_in_month())\n\n    @property\n    def tz(self) -> \"tzinfo | None\":\n        dtype = self._series.dtype\n        if isinstance(dtype, np.dtype):\n            return None\n        return dtype.tz\n\n    @property\n    def freq(self):  # noqa: GL08\n        return self._query_compiler.dt_freq().to_pandas().squeeze()\n\n    @property\n    def unit(self):  # noqa: GL08\n        # use `iloc[0]` to return scalar\n        return self._Series(query_compiler=self._query_compiler.dt_unit()).iloc[0]\n\n    def as_unit(self, *args, **kwargs):  # noqa: GL08\n        return self._Series(\n            query_compiler=self._query_compiler.dt_as_unit(*args, **kwargs)\n        )\n\n    def to_period(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_to_period(*args, **kwargs)\n        )\n\n    def asfreq(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_asfreq(*args, **kwargs)\n        )\n\n    def to_pydatetime(self):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_to_pydatetime()\n        ).to_numpy()\n\n    def tz_localize(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_tz_localize(*args, **kwargs)\n        )\n\n    def tz_convert(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_tz_convert(*args, **kwargs)\n        )\n\n    def normalize(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_normalize(*args, **kwargs)\n        )\n\n    def strftime(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_strftime(*args, **kwargs)\n        )\n\n    def round(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_round(*args, **kwargs)\n        )\n\n    def floor(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_floor(*args, **kwargs)\n        )\n\n    def ceil(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_ceil(*args, **kwargs)\n        )\n\n    def month_name(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_month_name(*args, **kwargs)\n        )\n\n    def day_name(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_day_name(*args, **kwargs)\n        )\n\n    def total_seconds(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_total_seconds(*args, **kwargs)\n        )\n\n    def to_pytimedelta(self) -> \"npt.NDArray[np.object_]\":\n        res = self._query_compiler.dt_to_pytimedelta()\n        return res.to_numpy()[:, 0]\n\n    @property\n    def seconds(self):\n        return self._Series(query_compiler=self._query_compiler.dt_seconds())\n\n    @property\n    def days(self):\n        return self._Series(query_compiler=self._query_compiler.dt_days())\n\n    @property\n    def microseconds(self):\n        return self._Series(query_compiler=self._query_compiler.dt_microseconds())\n\n    @property\n    def nanoseconds(self):\n        return self._Series(query_compiler=self._query_compiler.dt_nanoseconds())\n\n    @property\n    def components(self):\n        from .dataframe import DataFrame\n\n        return DataFrame(query_compiler=self._query_compiler.dt_components())\n\n    def isocalendar(self):\n        from .dataframe import DataFrame\n\n        return DataFrame(query_compiler=self._query_compiler.dt_isocalendar())\n\n    @property\n    def qyear(self):  # noqa: GL08\n        return self._Series(query_compiler=self._query_compiler.dt_qyear())\n\n    @property\n    def start_time(self):\n        return self._Series(query_compiler=self._query_compiler.dt_start_time())\n\n    @property\n    def end_time(self):\n        return self._Series(query_compiler=self._query_compiler.dt_end_time())\n\n    def to_timestamp(self, *args, **kwargs):\n        return self._Series(\n            query_compiler=self._query_compiler.dt_to_timestamp(*args, **kwargs)\n        )\n"
  },
  {
    "path": "modin/pandas/testing/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nPublic testing utility functions.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Literal\n\nfrom pandas._libs import lib\nfrom pandas.testing import assert_extension_array_equal\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom pandas.testing import assert_index_equal\nfrom pandas.testing import assert_series_equal as pd_assert_series_equal\n\nfrom modin.utils import _inherit_docstrings, try_cast_to_pandas\n\n\n@_inherit_docstrings(pd_assert_frame_equal, apilink=\"pandas.testing.assert_frame_equal\")\ndef assert_frame_equal(\n    left,\n    right,\n    check_dtype: bool | Literal[\"equiv\"] = True,\n    check_index_type: bool | Literal[\"equiv\"] = \"equiv\",\n    check_column_type: bool | Literal[\"equiv\"] = \"equiv\",\n    check_frame_type: bool = True,\n    check_names: bool = True,\n    by_blocks: bool = False,\n    check_exact: bool | lib.NoDefault = lib.no_default,\n    check_datetimelike_compat: bool = False,\n    check_categorical: bool = True,\n    check_like: bool = False,\n    check_freq: bool = True,\n    check_flags: bool = True,\n    rtol: float | lib.NoDefault = lib.no_default,\n    atol: float | lib.NoDefault = lib.no_default,\n    obj: str = \"DataFrame\",\n) -> None:\n    left = try_cast_to_pandas(left)\n    right = try_cast_to_pandas(right)\n    pd_assert_frame_equal(\n        left,\n        right,\n        check_dtype=check_dtype,\n        check_index_type=check_index_type,\n        check_column_type=check_column_type,\n        check_frame_type=check_frame_type,\n        check_names=check_names,\n        by_blocks=by_blocks,\n        check_exact=check_exact,\n        check_datetimelike_compat=check_datetimelike_compat,\n        check_categorical=check_categorical,\n        check_like=check_like,\n        check_freq=check_freq,\n        check_flags=check_flags,\n        rtol=rtol,\n        atol=atol,\n        obj=obj,\n    )\n\n\n@_inherit_docstrings(\n    pd_assert_series_equal, apilink=\"pandas.testing.assert_series_equal\"\n)\ndef assert_series_equal(\n    left,\n    right,\n    check_dtype: bool | Literal[\"equiv\"] = True,\n    check_index_type: bool | Literal[\"equiv\"] = \"equiv\",\n    check_series_type: bool = True,\n    check_names: bool = True,\n    check_exact: bool | lib.NoDefault = lib.no_default,\n    check_datetimelike_compat: bool = False,\n    check_categorical: bool = True,\n    check_category_order: bool = True,\n    check_freq: bool = True,\n    check_flags: bool = True,\n    rtol: float | lib.NoDefault = lib.no_default,\n    atol: float | lib.NoDefault = lib.no_default,\n    obj: str = \"Series\",\n    *,\n    check_index: bool = True,\n    check_like: bool = False,\n) -> None:\n    left = try_cast_to_pandas(left)\n    right = try_cast_to_pandas(right)\n    pd_assert_series_equal(\n        left,\n        right,\n        check_dtype=check_dtype,\n        check_index_type=check_index_type,\n        check_series_type=check_series_type,\n        check_names=check_names,\n        check_exact=check_exact,\n        check_datetimelike_compat=check_datetimelike_compat,\n        check_categorical=check_categorical,\n        check_category_order=check_category_order,\n        check_freq=check_freq,\n        check_flags=check_flags,\n        rtol=rtol,\n        atol=atol,\n        obj=obj,\n        check_index=check_index,\n        check_like=check_like,\n    )\n\n\n__all__ = [\n    \"assert_extension_array_equal\",\n    \"assert_frame_equal\",\n    \"assert_series_equal\",\n    \"assert_index_equal\",\n]\n"
  },
  {
    "path": "modin/pandas/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement utils for pandas component.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any, Iterator, Optional, Tuple\n\nimport numpy as np\nimport pandas\nfrom pandas._typing import AggFuncType, AggFuncTypeBase, AggFuncTypeDict, IndexLabel\nfrom pandas.util._decorators import doc\n\nfrom modin.utils import hashable\n\n_doc_binary_operation = \"\"\"\nReturn {operation} of {left} and `{right}` (binary operator `{bin_op}`).\n\nParameters\n----------\n{right} : {right_type}\n    The second operand to perform computation.\n\nReturns\n-------\n{returns}\n\"\"\"\n\nSET_DATAFRAME_ATTRIBUTE_WARNING = (\n    \"Modin doesn't allow columns to be created via a new attribute name - see \"\n    + \"https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\"\n)\n\n\nGET_BACKEND_DOC = \"\"\"\nGet the backend for this ``{class_name}``.\n\nReturns\n-------\nstr\n    The name of the backend.\n\"\"\"\n\nSET_BACKEND_DOC = \"\"\"\nMove the data in this ``{class_name}`` from its current backend to the given one.\n\nFurther operations on this ``{class_name}`` will use the new backend instead of\nthe current one.\n\nParameters\n----------\nbackend : str\n    The name of the backend to set.\ninplace : bool, default: False\n    Whether to modify this ``{class_name}`` in place.\nswitch_operation : Optional[str], default: None\n    The name of the operation that triggered the set_backend call.\n    Internal argument used for displaying progress bar information.\n\nReturns\n-------\n``{class_name}`` or None\n    If ``inplace`` is False, returns a new instance of the ``{class_name}``\n    with the given backend. If ``inplace`` is ``True``, returns None.\n\nNotes\n-----\nThis method will attempt to use the starting and new backend's move_from or move_to\nmethods if the backends implement them. Otherwise, it will\n\n    1) convert the data in this ``{class_name}`` to a pandas DataFrame in this\n       Python process\n    2) load the data from pandas to the new backend.\n\nEither step may be slow and/or memory-intensive, especially if this\n``{class_name}``'s data is large, or one or both of the backends do not store\ntheir data locally.\n\"\"\"\n\n\ndef cast_function_modin2pandas(func):\n    \"\"\"\n    Replace Modin functions with pandas functions if `func` is callable.\n\n    Parameters\n    ----------\n    func : object\n\n    Returns\n    -------\n    object\n    \"\"\"\n    if callable(func) and (module := getattr(func, \"__module__\", None)) is not None:\n        if module == \"modin.pandas.series\":\n            func = getattr(pandas.Series, func.__name__)\n        elif module in (\"modin.pandas.dataframe\", \"modin.pandas.base\"):\n            # FIXME: when the method is defined in `modin.pandas.base` file, then the\n            # type cannot be determined, in general there may be an error, but at the\n            # moment it is better.\n            func = getattr(pandas.DataFrame, func.__name__)\n    return func\n\n\ndef is_scalar(obj):\n    \"\"\"\n    Return True if given object is scalar.\n\n    This method works the same as is_scalar method from pandas but\n    it is optimized for Modin frames. For BasePandasDataset objects\n    pandas version of is_scalar tries to access missing attribute\n    causing index scan. This triggers execution for lazy frames and\n    we avoid it by handling BasePandasDataset objects separately.\n\n    Parameters\n    ----------\n    obj : object\n        Object to check.\n\n    Returns\n    -------\n    bool\n        True if given object is scalar and False otherwise.\n    \"\"\"\n    from pandas.api.types import is_scalar as pandas_is_scalar\n\n    from .base import BasePandasDataset\n\n    return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj)\n\n\ndef get_pandas_backend(dtypes: pandas.Series) -> str | None:\n    \"\"\"\n    Determine the backend based on the `dtypes`.\n\n    Parameters\n    ----------\n    dtypes : pandas.Series\n        DataFrame dtypes.\n\n    Returns\n    -------\n    str | None\n        Backend name.\n    \"\"\"\n    backend = None\n    if any(isinstance(x, pandas.ArrowDtype) for x in dtypes):\n        backend = \"pyarrow\"\n    return backend\n\n\ndef is_full_grab_slice(slc, sequence_len=None):\n    \"\"\"\n    Check that the passed slice grabs the whole sequence.\n\n    Parameters\n    ----------\n    slc : slice\n        Slice object to check.\n    sequence_len : int, optional\n        Length of the sequence to index with the passed `slc`.\n        If not specified the function won't be able to check whether\n        ``slc.stop`` is equal or greater than the sequence length to\n        consider `slc` to be a full-grab, and so, only slices with\n        ``.stop is None`` are considered to be a full-grab.\n\n    Returns\n    -------\n    bool\n    \"\"\"\n    assert isinstance(slc, slice), \"slice object required\"\n    return (\n        slc.start in (None, 0)\n        and slc.step in (None, 1)\n        and (\n            slc.stop is None or (sequence_len is not None and slc.stop >= sequence_len)\n        )\n    )\n\n\ndef from_modin_frame_to_mi(df, sortorder=None, names=None):\n    \"\"\"\n    Make a pandas.MultiIndex from a DataFrame.\n\n    Parameters\n    ----------\n    df : DataFrame\n        DataFrame to be converted to pandas.MultiIndex.\n    sortorder : int, default: None\n        Level of sortedness (must be lexicographically sorted by that\n        level).\n    names : list-like, optional\n        If no names are provided, use the column names, or tuple of column\n        names if the columns is a MultiIndex. If a sequence, overwrite\n        names with the given sequence.\n\n    Returns\n    -------\n    pandas.MultiIndex\n        The pandas.MultiIndex representation of the given DataFrame.\n    \"\"\"\n    from .dataframe import DataFrame\n\n    if isinstance(df, DataFrame):\n        from modin.error_message import ErrorMessage\n\n        ErrorMessage.default_to_pandas(\"`MultiIndex.from_frame`\")\n        df = df._to_pandas()\n    return _original_pandas_MultiIndex_from_frame(df, sortorder, names)\n\n\ndef is_label(obj, label, axis=0):\n    \"\"\"\n    Check whether or not 'obj' contain column or index level with name 'label'.\n\n    Parameters\n    ----------\n    obj : modin.pandas.DataFrame, modin.pandas.Series or modin.core.storage_formats.base.BaseQueryCompiler\n        Object to check.\n    label : object\n        Label name to check.\n    axis : {0, 1}, default: 0\n        Axis to search for `label` along.\n\n    Returns\n    -------\n    bool\n        True if check is successful, False otherwise.\n    \"\"\"\n    qc = getattr(obj, \"_query_compiler\", obj)\n    return hashable(label) and (\n        label in qc.get_axis(axis ^ 1) or label in qc.get_index_names(axis)\n    )\n\n\ndef check_both_not_none(option1, option2):\n    \"\"\"\n    Check that both `option1` and `option2` are not None.\n\n    Parameters\n    ----------\n    option1 : Any\n        First object to check if not None.\n    option2 : Any\n        Second object to check if not None.\n\n    Returns\n    -------\n    bool\n        True if both option1 and option2 are not None, False otherwise.\n    \"\"\"\n    return not (option1 is None or option2 is None)\n\n\ndef broadcast_item(\n    obj,\n    row_lookup,\n    col_lookup,\n    item,\n    need_columns_reindex: bool = True,\n    sort_lookups_and_item: bool = True,\n):\n    \"\"\"\n    Use NumPy to broadcast or reshape item with reindexing.\n\n    Parameters\n    ----------\n    obj : DataFrame or Series or query compiler\n        The object containing the necessary information about the axes.\n    row_lookup : slice or scalar\n        The global row index to locate inside of `item`.\n    col_lookup : range, array, list, slice or scalar\n        The global col index to locate inside of `item`.\n    item : DataFrame, Series, or query_compiler\n        Value that should be broadcast to a new shape of `to_shape`.\n    need_columns_reindex : bool, default: True\n        In the case of assigning columns to a dataframe (broadcasting is\n        part of the flow), reindexing is not needed.\n    sort_lookups_and_item : bool, default: True\n        If set, sort the lookups in ascending order and the item to match. This is necessary to\n        ensure writes across multiple partitions are ordered correctly when the lookups are unsorted.\n\n    Returns\n    -------\n    (np.ndarray, Optional[Series], array-like, array-like)\n        * np.ndarray - `item` after it was broadcasted to `to_shape`.\n        * Series - item's dtypes.\n        * array-like - sorted version of `row_lookup` (may or may not be the same reference)\n        * array-like - sorted version of `col_lookup` (may or may not be the same reference)\n\n    Raises\n    ------\n    ValueError\n        1) If `row_lookup` or `col_lookup` contains values missing in\n        DataFrame/Series index or columns correspondingly.\n        2) If `item` cannot be broadcast from its own shape to `to_shape`.\n\n    Notes\n    -----\n    NumPy is memory efficient, there shouldn't be performance issue.\n    \"\"\"\n    # It is valid to pass a DataFrame or Series to __setitem__ that is larger than\n    # the target the user is trying to overwrite.\n\n    from .dataframe import DataFrame\n    from .series import Series\n\n    new_row_len = (\n        len(obj.index[row_lookup]) if isinstance(row_lookup, slice) else len(row_lookup)\n    )\n    new_col_len = (\n        len(obj.columns[col_lookup])\n        if isinstance(col_lookup, slice)\n        else len(col_lookup)\n    )\n    to_shape = new_row_len, new_col_len\n\n    dtypes = None\n    if isinstance(item, (pandas.Series, pandas.DataFrame, Series, DataFrame)):\n        # convert indices in lookups to names, as pandas reindex expects them to be so\n        axes_to_reindex = {}\n        index_values = obj.index[row_lookup]\n        if not index_values.equals(item.index):\n            axes_to_reindex[\"index\"] = index_values\n        if need_columns_reindex and isinstance(item, (pandas.DataFrame, DataFrame)):\n            column_values = obj.columns[col_lookup]\n            if not column_values.equals(item.columns):\n                axes_to_reindex[\"columns\"] = column_values\n        # New value for columns/index make that reindex add NaN values\n        if axes_to_reindex:\n            item = item.reindex(**axes_to_reindex)\n\n        dtypes = item.dtypes\n        if not isinstance(dtypes, pandas.Series):\n            dtypes = pandas.Series([dtypes])\n\n    try:\n        # Cast to numpy drop information about heterogeneous types (cast to common)\n        # TODO: we shouldn't do that, maybe there should be the if branch\n        item = np.array(item)\n\n        def sort_index(lookup: Any) -> np.ndarray:\n            \"\"\"\n            Return the argsort and sorted version of the lookup index.\n\n            Values in the lookup are guaranteed by the indexing frontend to be non-negative.\n\n            The sort operation must be stable to ensure proper behavior for iloc set, which\n            will use the last item encountered if two items share an index.\n            \"\"\"\n            if isinstance(lookup, slice):\n                # Special case for if a descending slice is passed\n                # Directly calling np.array(slice(...)) does not work\n                lookup = range(lookup.start or 0, lookup.stop or 0, lookup.step or 0)\n            argsort_index = np.argsort(lookup, kind=\"stable\")\n            return argsort_index, np.array(lookup)[argsort_index]\n\n        def should_avoid_sort(lookup: Any) -> bool:\n            return (\n                not sort_lookups_and_item\n                or (\n                    isinstance(lookup, (range, pandas.RangeIndex, slice))\n                    and lookup.step is not None\n                    and lookup.step > 0\n                )\n                or (isinstance(lookup, slice) and lookup == slice(None))\n            )\n\n        # Fast path to avoid sorting for range/RangeIndex, which are already sorted, or the empty slice\n        avoid_row_lookup_sort = should_avoid_sort(row_lookup)\n        avoid_col_lookup_sort = should_avoid_sort(col_lookup)\n        # Sort both the columns and rows if necessary\n        if item.ndim >= 2:\n            if avoid_row_lookup_sort:\n                if not avoid_col_lookup_sort:\n                    col_argsort, col_lookup = sort_index(col_lookup)\n                    item = item[:, col_argsort]\n            elif avoid_col_lookup_sort:\n                row_argsort, row_lookup = sort_index(row_lookup)\n                item = item[row_argsort, :]\n            else:\n                row_argsort, row_lookup = sort_index(row_lookup)\n                col_argsort, col_lookup = sort_index(col_lookup)\n                # Use np.ix_ to handle broadcasting errors\n                item = item[np.ix_(row_argsort, col_argsort)]\n        elif not avoid_row_lookup_sort:\n            # Item is 1D, so only sort row indexer\n            row_argsort, row_lookup = sort_index(row_lookup)\n            item = item[row_argsort]\n        if dtypes is None:\n            dtypes = pandas.Series([item.dtype] * len(col_lookup))\n        if np.prod(to_shape) == np.prod(item.shape):\n            return item.reshape(to_shape), dtypes, row_lookup, col_lookup\n        else:\n            return np.broadcast_to(item, to_shape), dtypes, row_lookup, col_lookup\n    except ValueError:\n        from_shape = np.array(item).shape\n        raise ValueError(\n            f\"could not broadcast input array from shape {from_shape} into shape \"\n            + f\"{to_shape}\"\n        )\n\n\ndef _walk_aggregation_func(\n    key: IndexLabel, value: AggFuncType, depth: int = 0\n) -> Iterator[Tuple[IndexLabel, AggFuncTypeBase, Optional[str], bool]]:\n    \"\"\"\n    Walk over a function from a dictionary-specified aggregation.\n\n    Note: this function is not supposed to be called directly and\n    is used by ``walk_aggregation_dict``.\n\n    Parameters\n    ----------\n    key : IndexLabel\n        A key in a dictionary-specified aggregation for the passed `value`.\n        This means an index label to apply the `value` functions against.\n    value : AggFuncType\n        An aggregation function matching the `key`.\n    depth : int, default: 0\n        Specifies a nesting level for the `value` where ``depth=0`` is when\n        you call the function on a raw dictionary value.\n\n    Yields\n    ------\n    (col: IndexLabel, func: AggFuncTypeBase, func_name: Optional[str], col_renaming_required: bool)\n        Yield an aggregation function with its metadata:\n            - `col`: column name to apply the function.\n            - `func`: aggregation function to apply to the column.\n            - `func_name`: custom function name that was specified in the dict.\n            - `col_renaming_required`: whether it's required to rename the\n                `col` into ``(col, func_name)``.\n    \"\"\"\n    col_renaming_required = bool(depth)\n\n    if isinstance(value, (list, tuple)):\n        if depth == 0:\n            for val in value:\n                yield from _walk_aggregation_func(key, val, depth + 1)\n        elif depth == 1:\n            if len(value) != 2:\n                raise ValueError(\n                    f\"Incorrect rename format. Renamer must consist of exactly two elements, got: {len(value)}.\"\n                )\n            func_name, func = value\n            yield key, func, func_name, col_renaming_required\n        else:\n            # pandas doesn't support this as well\n            raise NotImplementedError(\"Nested renaming is not supported.\")\n    else:\n        yield key, value, None, col_renaming_required\n\n\ndef walk_aggregation_dict(\n    agg_dict: AggFuncTypeDict,\n) -> Iterator[Tuple[IndexLabel, AggFuncTypeBase, Optional[str], bool]]:\n    \"\"\"\n    Walk over an aggregation dictionary.\n\n    Parameters\n    ----------\n    agg_dict : AggFuncTypeDict\n\n    Yields\n    ------\n    (col: IndexLabel, func: AggFuncTypeBase, func_name: Optional[str], col_renaming_required: bool)\n        Yield an aggregation function with its metadata:\n            - `col`: column name to apply the function.\n            - `func`: aggregation function to apply to the column.\n            - `func_name`: custom function name that was specified in the dict.\n            - `col_renaming_required`: whether it's required to rename the\n                `col` into ``(col, func_name)``.\n    \"\"\"\n    for key, value in agg_dict.items():\n        yield from _walk_aggregation_func(key, value)\n\n\ndef _doc_binary_op(operation, bin_op, left=\"Series\", right=\"right\", returns=\"Series\"):\n    \"\"\"\n    Return callable documenting `Series` or `DataFrame` binary operator.\n\n    Parameters\n    ----------\n    operation : str\n        Operation name.\n    bin_op : str\n        Binary operation name.\n    left : str, default: 'Series'\n        The left object to document.\n    right : str, default: 'right'\n        The right operand name.\n    returns : str, default: 'Series'\n        Type of returns.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n    if left == \"Series\":\n        right_type = \"Series or scalar value\"\n    elif left == \"DataFrame\":\n        right_type = \"DataFrame, Series or scalar value\"\n    elif left == \"BasePandasDataset\":\n        right_type = \"BasePandasDataset or scalar value\"\n    else:\n        raise NotImplementedError(\n            f\"Only 'BasePandasDataset', `DataFrame` and 'Series' `left` are allowed, actually passed: {left}\"\n        )\n    doc_op = doc(\n        _doc_binary_operation,\n        operation=operation,\n        right=right,\n        right_type=right_type,\n        bin_op=bin_op,\n        returns=returns,\n        left=left,\n    )\n\n    return doc_op\n\n\n_original_pandas_MultiIndex_from_frame = pandas.MultiIndex.from_frame\npandas.MultiIndex.from_frame = from_modin_frame_to_mi\n"
  },
  {
    "path": "modin/pandas/window.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement Window and Rolling public API.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Optional, Union\n\nimport pandas.core.window.rolling\nfrom pandas.core.dtypes.common import is_list_like\n\nfrom modin.error_message import ErrorMessage\nfrom modin.logging import ClassLogger\nfrom modin.pandas.utils import cast_function_modin2pandas\nfrom modin.utils import _inherit_docstrings\n\nif TYPE_CHECKING:\n    from modin.core.storage_formats import BaseQueryCompiler\n    from modin.pandas import DataFrame, Series\n\n\n@_inherit_docstrings(pandas.core.window.rolling.Window)\nclass Window(ClassLogger):\n    _dataframe: Union[DataFrame, Series]\n    _query_compiler: BaseQueryCompiler\n\n    def __init__(\n        self,\n        dataframe: Union[DataFrame, Series],\n        window=None,\n        min_periods=None,\n        center=False,\n        win_type=None,\n        on=None,\n        axis=0,\n        closed=None,\n        step=None,\n        method=\"single\",\n    ):\n        self._dataframe = dataframe\n        self._query_compiler = dataframe._query_compiler\n        self.window_kwargs = {\n            \"window\": window,\n            \"min_periods\": min_periods,\n            \"center\": center,\n            \"win_type\": win_type,\n            \"on\": on,\n            \"axis\": axis,\n            \"closed\": closed,\n            \"step\": step,\n            \"method\": method,\n        }\n        self.axis = axis\n\n    def mean(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.window_mean(\n                self.axis, self.window_kwargs, *args, **kwargs\n            )\n        )\n\n    def sum(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.window_sum(\n                self.axis, self.window_kwargs, *args, **kwargs\n            )\n        )\n\n    def var(self, ddof=1, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.window_var(\n                self.axis, self.window_kwargs, ddof, *args, **kwargs\n            )\n        )\n\n    def std(self, ddof=1, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.window_std(\n                self.axis, self.window_kwargs, ddof, *args, **kwargs\n            )\n        )\n\n\n@_inherit_docstrings(\n    pandas.core.window.rolling.Rolling,\n    excluded=[pandas.core.window.rolling.Rolling.__init__],\n)\nclass Rolling(ClassLogger):\n    def __init__(\n        self,\n        dataframe,\n        window=None,\n        min_periods=None,\n        center=False,\n        win_type=None,\n        on=None,\n        axis=0,\n        closed=None,\n        step=None,\n        method=\"single\",\n    ):\n        if step is not None:\n            raise NotImplementedError(\"step parameter is not implemented yet.\")\n        self._dataframe = dataframe\n        self._query_compiler = dataframe._query_compiler\n        self.rolling_kwargs = {\n            \"window\": window,\n            \"min_periods\": min_periods,\n            \"center\": center,\n            \"win_type\": win_type,\n            \"on\": on,\n            \"axis\": axis,\n            \"closed\": closed,\n            \"step\": step,\n            \"method\": method,\n        }\n        self.axis = axis\n\n    def _call_qc_method(self, method_name, *args, **kwargs):\n        \"\"\"\n        Call a query compiler method for the specified rolling aggregation.\n\n        Parameters\n        ----------\n        method_name : str\n            Name of the aggregation.\n        *args : tuple\n            Positional arguments to pass to the query compiler method.\n        **kwargs : dict\n            Keyword arguments to pass to the query compiler method.\n\n        Returns\n        -------\n        BaseQueryCompiler\n            QueryCompiler holding the result of the aggregation.\n        \"\"\"\n        qc_method = getattr(self._query_compiler, f\"rolling_{method_name}\")\n        return qc_method(self.axis, self.rolling_kwargs, *args, **kwargs)\n\n    def _aggregate(self, method_name, *args, **kwargs):\n        \"\"\"\n        Run the specified rolling aggregation.\n\n        Parameters\n        ----------\n        method_name : str\n            Name of the aggregation.\n        *args : tuple\n            Positional arguments to pass to the aggregation.\n        **kwargs : dict\n            Keyword arguments to pass to the aggregation.\n\n        Returns\n        -------\n        DataFrame or Series\n            Result of the aggregation.\n        \"\"\"\n        qc_result = self._call_qc_method(method_name, *args, **kwargs)\n        return self._dataframe.__constructor__(query_compiler=qc_result)\n\n    def count(self):\n        return self._aggregate(\"count\")\n\n    def sem(self, *args, **kwargs):\n        return self._aggregate(\"sem\", *args, **kwargs)\n\n    def sum(self, *args, **kwargs):\n        return self._aggregate(\"sum\", *args, **kwargs)\n\n    def mean(self, *args, **kwargs):\n        return self._aggregate(\"mean\", *args, **kwargs)\n\n    def median(self, **kwargs):\n        return self._aggregate(\"median\", **kwargs)\n\n    def var(self, ddof=1, *args, **kwargs):\n        return self._aggregate(\"var\", ddof, *args, **kwargs)\n\n    def std(self, ddof=1, *args, **kwargs):\n        return self._aggregate(\"std\", ddof, *args, **kwargs)\n\n    def min(self, *args, **kwargs):\n        return self._aggregate(\"min\", *args, **kwargs)\n\n    def max(self, *args, **kwargs):\n        return self._aggregate(\"max\", *args, **kwargs)\n\n    def corr(self, other=None, pairwise=None, *args, **kwargs):\n        from .dataframe import DataFrame\n        from .series import Series\n\n        if isinstance(other, DataFrame):\n            other = other._query_compiler.to_pandas()\n        elif isinstance(other, Series):\n            other = other._query_compiler.to_pandas().squeeze()\n\n        return self._aggregate(\"corr\", other, pairwise, *args, **kwargs)\n\n    def cov(self, other=None, pairwise=None, ddof: Optional[int] = 1, **kwargs):\n        from .dataframe import DataFrame\n        from .series import Series\n\n        if isinstance(other, DataFrame):\n            other = other._query_compiler.to_pandas()\n        elif isinstance(other, Series):\n            other = other._query_compiler.to_pandas().squeeze()\n\n        return self._aggregate(\"cov\", other, pairwise, ddof, **kwargs)\n\n    def skew(self, **kwargs):\n        return self._aggregate(\"skew\", **kwargs)\n\n    def kurt(self, **kwargs):\n        return self._aggregate(\"kurt\", **kwargs)\n\n    def apply(\n        self,\n        func,\n        raw=False,\n        engine=\"cython\",\n        engine_kwargs=None,\n        args=None,\n        kwargs=None,\n    ):\n        func = cast_function_modin2pandas(func)\n        return self._aggregate(\"apply\", func, raw, engine, engine_kwargs, args, kwargs)\n\n    def aggregate(\n        self,\n        func,\n        *args,\n        **kwargs,\n    ):\n        from .dataframe import DataFrame\n\n        dataframe = DataFrame(\n            query_compiler=self._call_qc_method(\n                \"aggregate\",\n                func,\n                *args,\n                **kwargs,\n            )\n        )\n        if isinstance(self._dataframe, DataFrame):\n            return dataframe\n        elif is_list_like(func) and dataframe.columns.nlevels > 1:\n            dataframe.columns = dataframe.columns.droplevel()\n            return dataframe\n        else:\n            return dataframe.squeeze()\n\n    agg = aggregate\n\n    def quantile(self, q, interpolation=\"linear\", **kwargs):\n        return self._aggregate(\"quantile\", q, interpolation, **kwargs)\n\n    def rank(\n        self, method=\"average\", ascending=True, pct=False, numeric_only=False, **kwargs\n    ):\n        return self._aggregate(\"rank\", method, ascending, pct, numeric_only, **kwargs)\n\n\n@_inherit_docstrings(Rolling)\nclass RollingGroupby(Rolling):\n    def __init__(self, groupby_obj, *args, **kwargs):\n        self._as_index = groupby_obj._kwargs.get(\"as_index\", True)\n        self._groupby_obj = (\n            groupby_obj if self._as_index else groupby_obj._override(as_index=True)\n        )\n        super().__init__(self._groupby_obj._df, *args, **kwargs)\n\n    def sem(self, *args, **kwargs):\n        ErrorMessage.mismatch_with_pandas(\n            operation=\"RollingGroupby.sem() when 'as_index=False'\",\n            message=(\n                \"The group columns won't be involved in the aggregation.\\n\"\n                + \"See this gh-issue for more information: https://github.com/modin-project/modin/issues/6291\"\n            ),\n        )\n        return super().sem(*args, **kwargs)\n\n    def corr(self, other=None, pairwise=None, *args, **kwargs):\n        # pandas behavior is that it always assumes that 'as_index=True' for the '.corr()' method\n        return super().corr(\n            *args, as_index=True, other=other, pairwise=pairwise, **kwargs\n        )\n\n    def cov(self, other=None, pairwise=None, ddof: Optional[int] = 1, **kwargs):\n        # pandas behavior is that it always assumes that 'as_index=True' for the '.cov()' method\n        return super().cov(as_index=True, other=other, pairwise=pairwise, **kwargs)\n\n    def _aggregate(self, method_name, *args, as_index=None, **kwargs):\n        \"\"\"\n        Run the specified rolling aggregation.\n\n        Parameters\n        ----------\n        method_name : str\n            Name of the aggregation.\n        *args : tuple\n            Positional arguments to pass to the aggregation.\n        as_index : bool, optional\n            Whether the result should have the group labels as index levels or as columns.\n            If not specified the parameter value will be taken from groupby kwargs.\n        **kwargs : dict\n            Keyword arguments to pass to the aggregation.\n\n        Returns\n        -------\n        DataFrame or Series\n            Result of the aggregation.\n        \"\"\"\n        res = self._groupby_obj._wrap_aggregation(\n            qc_method=type(self._query_compiler).groupby_rolling,\n            numeric_only=False,\n            agg_args=args,\n            agg_kwargs=kwargs,\n            agg_func=method_name,\n            rolling_kwargs=self.rolling_kwargs,\n        )\n\n        if as_index is None:\n            as_index = self._as_index\n\n        if not as_index:\n            res = res.reset_index(\n                level=[i for i in range(len(self._groupby_obj._internal_by))],\n                drop=False,\n            )\n\n        return res\n\n    def _call_qc_method(self, method_name, *args, **kwargs):\n        return self._aggregate(method_name, *args, **kwargs)._query_compiler\n\n\n@_inherit_docstrings(\n    pandas.core.window.expanding.Expanding,\n    excluded=[pandas.core.window.expanding.Expanding.__init__],\n)\nclass Expanding(ClassLogger):\n    def __init__(self, dataframe, min_periods=1, axis=0, method=\"single\"):\n        self._dataframe = dataframe\n        self._query_compiler = dataframe._query_compiler\n        self.expanding_args = [min_periods, axis, method]\n        self.axis = axis\n\n    def aggregate(self, func, *args, **kwargs):\n        from .dataframe import DataFrame\n\n        dataframe = DataFrame(\n            query_compiler=self._query_compiler.expanding_aggregate(\n                self.axis, self.expanding_args, func, *args, **kwargs\n            )\n        )\n        if isinstance(self._dataframe, DataFrame):\n            return dataframe\n        elif is_list_like(func):\n            dataframe.columns = dataframe.columns.droplevel()\n            return dataframe\n        else:\n            return dataframe.squeeze()\n\n    def sum(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_sum(\n                self.axis, self.expanding_args, *args, **kwargs\n            )\n        )\n\n    def min(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_min(\n                self.axis, self.expanding_args, *args, **kwargs\n            )\n        )\n\n    def max(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_max(\n                self.axis, self.expanding_args, *args, **kwargs\n            )\n        )\n\n    def mean(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_mean(\n                self.axis, self.expanding_args, *args, **kwargs\n            )\n        )\n\n    def median(self, numeric_only=False, engine=None, engine_kwargs=None, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_median(\n                self.axis,\n                self.expanding_args,\n                numeric_only=numeric_only,\n                engine=engine,\n                engine_kwargs=engine_kwargs,\n                **kwargs,\n            )\n        )\n\n    def var(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_var(\n                self.axis, self.expanding_args, *args, **kwargs\n            )\n        )\n\n    def std(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_std(\n                self.axis, self.expanding_args, *args, **kwargs\n            )\n        )\n\n    def count(self, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_count(\n                self.axis, self.expanding_args, *args, **kwargs\n            )\n        )\n\n    def cov(self, other=None, pairwise=None, ddof=1, numeric_only=False, **kwargs):\n        from .dataframe import DataFrame\n        from .series import Series\n\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_cov(\n                self.axis,\n                self.expanding_args,\n                squeeze_self=isinstance(self._dataframe, Series),\n                squeeze_other=isinstance(other, Series),\n                other=(\n                    other._query_compiler\n                    if isinstance(other, (Series, DataFrame))\n                    else other\n                ),\n                pairwise=pairwise,\n                ddof=ddof,\n                numeric_only=numeric_only,\n                **kwargs,\n            )\n        )\n\n    def corr(self, other=None, pairwise=None, ddof=1, numeric_only=False, **kwargs):\n        from .dataframe import DataFrame\n        from .series import Series\n\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_corr(\n                self.axis,\n                self.expanding_args,\n                squeeze_self=isinstance(self._dataframe, Series),\n                squeeze_other=isinstance(other, Series),\n                other=(\n                    other._query_compiler\n                    if isinstance(other, (Series, DataFrame))\n                    else other\n                ),\n                pairwise=pairwise,\n                ddof=ddof,\n                numeric_only=numeric_only,\n                **kwargs,\n            )\n        )\n\n    def sem(self, ddof=1, numeric_only=False, *args, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_sem(\n                self.axis,\n                self.expanding_args,\n                ddof=ddof,\n                numeric_only=numeric_only,\n                *args,\n                **kwargs,\n            )\n        )\n\n    def skew(self, numeric_only=False, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_skew(\n                self.axis, self.expanding_args, numeric_only=numeric_only, **kwargs\n            )\n        )\n\n    def kurt(self, **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_kurt(\n                self.axis, self.expanding_args, **kwargs\n            )\n        )\n\n    def quantile(self, q, interpolation=\"linear\", **kwargs):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_quantile(\n                self.axis, self.expanding_args, q, interpolation, **kwargs\n            )\n        )\n\n    def rank(\n        self, method=\"average\", ascending=True, pct=False, numeric_only=False, **kwargs\n    ):\n        return self._dataframe.__constructor__(\n            query_compiler=self._query_compiler.expanding_rank(\n                self.axis,\n                self.expanding_args,\n                method,\n                ascending,\n                pct,\n                numeric_only,\n                **kwargs,\n            )\n        )\n"
  },
  {
    "path": "modin/polars/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom modin.polars.dataframe import DataFrame\nfrom modin.polars.series import Series\n\n__all__ = [\"DataFrame\", \"Series\"]\n"
  },
  {
    "path": "modin/polars/base.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement DataFrame/Series public API as polars does.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Any, Sequence\n\nimport polars\n\nfrom modin.core.storage_formats import BaseQueryCompiler\n\nif TYPE_CHECKING:\n    import numpy as np\n\n    from modin.polars import DataFrame, Series\n\n\nclass BasePolarsDataset:\n\n    _query_compiler: BaseQueryCompiler\n\n    @property\n    def __constructor__(self):\n        \"\"\"\n        DataFrame constructor.\n\n        Returns:\n            Constructor of the DataFrame\n        \"\"\"\n        return type(self)\n\n    def __eq__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.eq(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __ne__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.ne(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __add__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.add(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __sub__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.sub(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __mul__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.mul(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __truediv__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.truediv(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __floordiv__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.floordiv(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __mod__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.mod(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __pow__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.pow(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __and__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.__and__(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __or__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.__or__(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __xor__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.__xor__(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __lt__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.lt(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __le__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.le(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __gt__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.gt(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __ge__(self, other) -> \"BasePolarsDataset\":\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.ge(\n                other._query_compiler if isinstance(other, BasePolarsDataset) else other\n            )\n        )\n\n    def __invert__(self) -> \"BasePolarsDataset\":\n        return self.__constructor__(_query_compiler=self._query_compiler.invert())\n\n    def __neg__(self) -> \"BasePolarsDataset\":\n        return self.__constructor__(_query_compiler=self._query_compiler.negative())\n\n    def __abs__(self) -> \"BasePolarsDataset\":\n        return self.__constructor__(_query_compiler=self._query_compiler.abs())\n\n    def is_duplicated(self):\n        \"\"\"\n        Determine whether each row is a duplicate in the DataFrame.\n\n        Returns:\n            DataFrame with True for each duplicate row, and False for unique rows.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.duplicated(keep=False)\n        )\n\n    def is_empty(self) -> bool:\n        \"\"\"\n        Determine whether the DataFrame is empty.\n\n        Returns:\n            True if the DataFrame is empty, False otherwise\n        \"\"\"\n        return self.height == 0\n\n    def is_unique(self):\n        \"\"\"\n        Determine whether each row is unique in the DataFrame.\n\n        Returns:\n            DataFrame with True for each unique row, and False for duplicate rows.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.duplicated(keep=False).invert()\n        )\n\n    def n_chunks(self, strategy: str = \"first\") -> int | list[int]:\n        raise NotImplementedError(\"not yet\")\n\n    def to_arrow(self):\n        \"\"\"\n        Convert the DataFrame to Arrow format.\n\n        Returns:\n            Arrow representation of the DataFrame.\n        \"\"\"\n        return polars.from_pandas(self._query_compiler.to_pandas()).to_arrow()\n\n    def to_jax(self, device=None):\n        \"\"\"\n        Convert the DataFrame to JAX format.\n\n        Args:\n            device: The device to use.\n\n        Returns:\n            JAX representation of the DataFrame.\n        \"\"\"\n        return polars.from_pandas(self._query_compiler.to_pandas()).to_jax(\n            device=device\n        )\n\n    def to_numpy(\n        self,\n        *,\n        writable: bool = False,\n        allow_copy: bool = True,\n        use_pyarrow: bool | None = None,\n        zero_copy_only: bool | None = None,\n    ) -> \"np.ndarray\":\n        \"\"\"\n        Convert the DataFrame to a NumPy representation.\n\n        Args:\n            writable: Whether the NumPy array should be writable.\n            allow_copy: Whether to allow copying the data.\n            use_pyarrow: Whether to use PyArrow for conversion.\n            zero_copy_only: Whether to use zero-copy conversion only.\n\n        Returns:\n            NumPy representation of the DataFrame.\n        \"\"\"\n        return polars.from_pandas(self._query_compiler.to_pandas()).to_numpy(\n            writable=writable,\n            allow_copy=allow_copy,\n            use_pyarrow=use_pyarrow,\n            zero_copy_only=zero_copy_only,\n        )\n\n    def to_torch(self):\n        \"\"\"\n        Convert the DataFrame to PyTorch format.\n\n        Returns:\n            PyTorch representation of the DataFrame.\n        \"\"\"\n        return polars.from_pandas(self._query_compiler.to_pandas()).to_torch()\n\n    def bottom_k(\n        self,\n        k: int,\n        *,\n        by,\n        descending: bool | Sequence[bool] = False,\n        nulls_last: bool | Sequence[bool] | None = None,\n        maintain_order: bool | None = None,\n    ) -> \"BasePolarsDataset\":\n        raise NotImplementedError(\"not yet\")\n\n    def cast(self, dtypes, *, strict: bool = True) -> \"BasePolarsDataset\":\n        \"\"\"\n        Cast the DataFrame to the given dtypes.\n\n        Args:\n            dtypes: Dtypes to cast the DataFrame to.\n            strict: Whether to enforce strict casting.\n\n        Returns:\n            DataFrame with the new dtypes.\n        \"\"\"\n        # TODO: support strict\n        return self.__constructor__(_query_compiler=self._query_compiler.astype(dtypes))\n\n    def clone(self) -> \"BasePolarsDataset\":\n        \"\"\"\n        Clone the DataFrame.\n\n        Returns:\n            Cloned DataFrame.\n        \"\"\"\n        return self.copy()\n\n    def drop_nulls(self, subset=None):\n        \"\"\"\n        Drop the rows with null values.\n\n        Args:\n            subset: Columns to consider for null values.\n\n        Returns:\n            DataFrame with the rows with null values dropped.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.dropna(subset=subset, how=\"any\")\n        )\n\n    def explode(self, columns: str, *more_columns: str) -> \"BasePolarsDataset\":\n        \"\"\"\n        Explode the given columns to long format.\n\n        Args:\n            columns: Columns to explode.\n            more_columns: Additional columns to explode.\n\n        Returns:\n            DataFrame with the columns exploded.\n        \"\"\"\n        if len(more_columns) > 0:\n            columns = [columns, *more_columns]\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.explode(columns)\n        )\n\n    def extend(self, other: \"BasePolarsDataset\") -> \"BasePolarsDataset\":\n        \"\"\"\n        Extend the DataFrame with another DataFrame.\n\n        Args:\n            other: DataFrame to extend with.\n\n        Returns:\n            Extended DataFrame for convenience. DataFrame is modified in place.\n        \"\"\"\n        self._query_compiler = self._query_compiler.concat(\n            axis=0, other=other._query_compiler\n        )\n        return self\n\n    def fill_nan(self, value):\n        \"\"\"\n        Fill NaN values with the given value.\n\n        Args:\n            value: Value to fill NaN values with.\n\n        Returns:\n            DataFrame with NaN values filled.\n        \"\"\"\n        # TODO: Handle null values differently than nan.\n        return self.__constructor__(_query_compiler=self._query_compiler.fillna(value))\n\n    def fill_null(\n        self,\n        value: Any | None = None,\n        strategy: str | None = None,\n        limit: int | None = None,\n        *,\n        matches_supertype: bool = True,\n    ) -> \"BasePolarsDataset\":\n        \"\"\"\n        Fill null values with the given value or strategy.\n\n        Args:\n            value: Value to fill null values with.\n            strategy: Strategy to fill null values with.\n            limit: Maximum number of null values to fill.\n            matches_supertype: Whether the value matches the supertype.\n\n        Returns:\n            DataFrame with null values filled.\n        \"\"\"\n        if strategy == \"forward\":\n            strategy = \"ffill\"\n        elif strategy == \"backward\":\n            strategy = \"bfill\"\n        elif strategy in [\"min\", \"max\", \"mean\"]:\n            value = getattr(self, strategy)()._query_compiler\n            strategy = None\n        elif strategy == \"zero\":\n            strategy = None\n            value = 0\n        elif strategy == \"one\":\n            strategy = None\n            value = 1\n        else:\n            raise ValueError(f\"Unknown strategy: {strategy}\")\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.fillna(\n                value=value, method=strategy, limit=limit\n            )\n        )\n\n    def filter(self, *predicates, **constraints: Any) -> \"BasePolarsDataset\":\n        predicates = predicates[0]\n        for p in predicates[1:]:\n            predicates = predicates & p\n        if constraints:\n            raise NotImplementedError(\"Named constraints are not supported\")\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.getitem_array(\n                predicates._query_compiler\n            )\n        )\n\n    def gather_every(self, n: int, offset: int = 0) -> \"BasePolarsDataset\":\n        \"\"\"\n        Gather every nth row of the DataFrame.\n\n        Args:\n            n: Number of rows to gather.\n            offset: Offset to start gathering from.\n\n        Returns:\n            DataFrame with every nth row gathered.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.getitem_row_array(\n                slice(offset, None, n)\n            )\n        )\n\n    def head(self, n: int = 5) -> \"BasePolarsDataset\":\n        \"\"\"\n        Get the first n rows of the DataFrame.\n\n        Args:\n            n: Number of rows to get.\n\n        Returns:\n            DataFrame with the first n rows.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.getitem_row_array(slice(0, n))\n        )\n\n    def limit(self, n: int = 10) -> \"BasePolarsDataset\":\n        \"\"\"\n        Limit the DataFrame to the first n rows.\n\n        Args:\n            n: Number of rows to limit to.\n\n        Returns:\n            DataFrame with the first n rows.\n        \"\"\"\n        return self.head(n)\n\n    def interpolate(self) -> \"BasePolarsDataset\":\n        \"\"\"\n        Interpolate values the DataFrame using a linear method.\n\n        Returns:\n            DataFrame with the interpolated values.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.interpolate())\n\n    def sample(\n        self,\n        n: int | \"Series\" | None = None,\n        *,\n        fraction: float | \"Series\" | None = None,\n        with_replacement: bool = False,\n        shuffle: bool = False,\n        seed: int | None = None,\n    ) -> \"BasePolarsDataset\":\n        \"\"\"\n        Sample the DataFrame.\n\n        Args:\n            n: Number of rows to sample.\n            fraction: Fraction of rows to sample.\n            with_replacement: Whether to sample with replacement.\n            shuffle: Whether to shuffle the rows.\n            seed: Seed for the random number generator.\n\n        Returns:\n            Sampled DataFrame.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self.to_pandas()\n            .sample(n=n, frac=fraction, replace=with_replacement, random_state=seed)\n            ._query_compiler\n        )\n\n    def shift(self, n: int = 1, *, fill_value=None) -> \"DataFrame\":\n        raise NotImplementedError(\"not yet\")\n\n    def shrink_to_fit(self) -> \"DataFrame\":\n        \"\"\"\n        Shrink the DataFrame to fit in memory.\n\n        Returns:\n            A copy of the DataFrame.\n        \"\"\"\n        return self.copy()\n\n    def slice(self, offset: int, length: int) -> \"DataFrame\":\n        \"\"\"\n        Slice the DataFrame.\n\n        Args:\n            offset: Offset to start the slice from.\n            length: Length of the slice.\n\n        Returns:\n            Sliced DataFrame.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.getitem_row_array(\n                slice(offset, offset + length)\n            )\n        )\n\n    def sort(\n        self,\n        by,\n        *more_by,\n        descending: bool | Sequence[bool] = False,\n        nulls_last: bool | Sequence[bool] | None = None,\n        multithreaded: bool = True,\n        maintain_order: bool = False,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Sort the DataFrame.\n\n        Args:\n            by: Column to sort by.\n            more_by: Additional columns to sort by.\n            descending: Whether to sort in descending order.\n            nulls_last: Whether to sort null values last.\n            multithreaded: Whether to use multiple threads.\n            maintain_order: Whether to maintain the order of the DataFrame.\n\n        Returns:\n            Sorted DataFrame.\n        \"\"\"\n        # TODO: support expressions in by\n        if len(more_by) > 0:\n            by = [by, *more_by]\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.sort_rows_by_column_values(\n                by=by,\n                reverse=descending,\n                nulls_first=None if nulls_last is None else not nulls_last,\n            )\n        )\n\n    def tail(self, n: int = 5) -> \"DataFrame\":\n        \"\"\"\n        Get the last n rows of the DataFrame.\n\n        Args:\n            n: Number of rows to get.\n\n        Returns:\n            DataFrame with the last n rows.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.getitem_row_array(slice(-n, None))\n        )\n\n    def to_dummies(\n        self,\n        columns: str | Sequence[str] | None = None,\n        *,\n        separator: str = \"_\",\n        drop_first: bool = False,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Convert the columns to dummy variables.\n\n        Args:\n            columns: Columns to convert to dummy variables.\n            separator: Separator for the dummy variables.\n            drop_first: Whether to drop the first dummy variable.\n\n        Returns:\n            DataFrame with the columns converted to dummy variables.\n        \"\"\"\n        if columns is not None:\n            if isinstance(columns, str):\n                columns = [columns]\n        else:\n            columns = self.columns\n        result = self.__constructor__(\n            _query_compiler=self._query_compiler.get_dummies(columns)\n        )\n        if separator != \"_\":\n            result.columns = [\n                c.replace(separator, \"_\") if separator in c else c\n                for c in result.columns\n            ]\n        if drop_first:\n            columns_to_drop = [\n                next(\n                    result_col\n                    for result_col in result.columns\n                    if result_col.startswith(c)\n                )\n                for c in columns\n            ]\n            return result.drop(columns_to_drop)\n        else:\n            return result\n\n    def top_k(\n        self,\n        k: int,\n        *,\n        by,\n        descending: bool | Sequence[bool] = False,\n        nulls_last: bool | Sequence[bool] | None = None,\n        maintain_order: bool | None = None,\n    ) -> \"DataFrame\":\n        raise NotImplementedError(\"not yet\")\n\n    def unique(self, subset=None, *, keep=\"any\", maintain_order: bool = False):\n        \"\"\"\n        Get the unique values in each column.\n\n        Args:\n            subset: Columns to consider for unique values.\n            keep: Strategy to keep unique values.\n            maintain_order: Whether to maintain the order of the unique values.\n\n        Returns:\n            DataFrame with the unique values in each column.\n        \"\"\"\n        if keep == \"none\" or keep == \"last\":\n            # TODO: support keep=\"none\"\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.unique(subset=subset)\n        )\n\n    def equals(self, other: \"BasePolarsDataset\", *, null_equal: bool = True) -> bool:\n        \"\"\"\n        Determine whether the DataFrame is equal to another DataFrame.\n\n        Args:\n            other: DataFrame to compare with.\n\n        Returns:\n            True if the DataFrames are equal, False otherwise.\n        \"\"\"\n        return (\n            isinstance(other, type(self))\n            and self._query_compiler.equals(other._query_compiler)\n            and (\n                null_equal\n                or (\n                    not self.to_pandas().isna().any(axis=None)\n                    and not other.to_pandas().isna().any(axis=None)\n                )\n            )\n        )\n\n    @property\n    def plot(self):\n        return polars.from_pandas(self._query_compiler.to_pandas()).plot\n\n    def count(self):\n        \"\"\"\n        Get the number of non-null values in each column.\n\n        Returns:\n            DataFrame with the counts.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.count(axis=0))\n"
  },
  {
    "path": "modin/polars/dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses ``DataFrame`` class, that is distributed version of ``polars.DataFrame``.\"\"\"\n\nfrom __future__ import annotations\n\nfrom collections import OrderedDict\nfrom typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Sequence\n\nimport numpy as np\nimport pandas\nimport polars\nfrom pandas.core.dtypes.common import is_list_like\n\nfrom modin.core.storage_formats.base.query_compiler import BaseQueryCompiler\nfrom modin.pandas import DataFrame as ModinPandasDataFrame\nfrom modin.pandas import Series as ModinPandasSeries\nfrom modin.pandas.io import from_pandas\nfrom modin.polars.base import BasePolarsDataset\n\nif TYPE_CHECKING:\n    from modin.polars import Series\n    from modin.polars.groupby import GroupBy\n    from modin.polars.lazyframe import LazyFrame\n\n\nclass DataFrame(BasePolarsDataset):\n\n    def __init__(\n        self,\n        data=None,\n        schema=None,\n        *,\n        schema_overrides=None,\n        strict=True,\n        orient=None,\n        infer_schema_length=100,\n        nan_to_null=False,\n        _query_compiler=None,\n    ) -> None:\n        \"\"\"\n        Constructor for DataFrame object.\n\n        Args:\n            data: Data to be converted to DataFrame.\n            schema: Schema of the data.\n            schema_overrides: Schema overrides.\n            strict: Whether to enforce strict schema.\n            orient: Orientation of the data.\n            infer_schema_length: Length of the data to infer schema.\n            nan_to_null: Whether to convert NaNs to nulls.\n            _query_compiler: Query compiler to use.\n        \"\"\"\n        if _query_compiler is None:\n            if isinstance(data, (ModinPandasDataFrame, ModinPandasSeries)):\n                self._query_compiler: BaseQueryCompiler = data._query_compiler.copy()\n            else:\n                self._query_compiler: BaseQueryCompiler = from_pandas(\n                    polars.DataFrame(\n                        data=data,\n                        schema=schema,\n                        schema_overrides=schema_overrides,\n                        strict=strict,\n                        orient=orient,\n                        infer_schema_length=infer_schema_length,\n                        nan_to_null=nan_to_null,\n                    ).to_pandas()\n                )._query_compiler\n        else:\n            self._query_compiler: BaseQueryCompiler = _query_compiler\n\n    def __getitem__(self, item):\n        \"\"\"\n        Get item from DataFrame.\n\n        Args:\n            item: Column to get.\n\n        Returns:\n            Series or DataFrame with the column.\n        \"\"\"\n        if is_list_like(item):\n            missing = [i for i in item if i not in self.columns]\n            if len(missing) > 0:\n                raise polars.exceptions.ColumnNotFoundError(missing[0])\n            return self.__constructor__(\n                _query_compiler=self._query_compiler.getitem_array(item)\n            )\n        else:\n            if item not in self.columns:\n                raise polars.exceptions.ColumnNotFoundError(item)\n            from .series import Series\n\n            return Series(_query_compiler=self._query_compiler.getitem_array([item]))\n\n    def _copy(self):\n        return self.__constructor__(_query_compiler=self._query_compiler.copy())\n\n    def _to_polars(self) -> polars.DataFrame:\n        \"\"\"\n        Convert the DataFrame to Polars format.\n\n        Returns:\n            Polars representation of the DataFrame.\n        \"\"\"\n        return polars.from_pandas(self._query_compiler.to_pandas())\n\n    def _get_columns(self):\n        \"\"\"\n        Get columns of the DataFrame.\n\n        Returns:\n            List of columns.\n        \"\"\"\n        return list(self._query_compiler.columns)\n\n    def _set_columns(self, new_columns):\n        \"\"\"\n        Set columns of the DataFrame.\n\n        Args:\n            new_columns: New columns to set.\n        \"\"\"\n        new_query_compiler = self._query_compiler.copy()\n        new_query_compiler.columns = new_columns\n        self._query_compiler = new_query_compiler\n\n    columns = property(_get_columns, _set_columns)\n\n    _sorted_columns_cache = None\n\n    def _get_sorted_columns(self):\n        if self._sorted_columns_cache is None:\n            self._sorted_columns_cache = [False] * len(self.columns)\n        return self._sorted_columns_cache\n\n    def _set_sorted_columns(self, value):\n        self._sorted_columns_cache = value\n\n    _sorted_columns = property(_get_sorted_columns, _set_sorted_columns)\n\n    @property\n    def dtypes(self):\n        \"\"\"\n        Get dtypes of the DataFrame.\n\n        Returns:\n            List of dtypes.\n        \"\"\"\n        return polars.from_pandas(\n            pandas.DataFrame(columns=self.columns).astype(self._query_compiler.dtypes)\n        ).dtypes\n\n    @property\n    def flags(self):\n        \"\"\"\n        Get flags of the DataFrame.\n\n        Returns:\n            List of flags.\n        \"\"\"\n        # TODO: Add flags support\n        return []\n\n    @property\n    def height(self):\n        \"\"\"\n        Get height of the DataFrame.\n\n        Returns:\n            Number of rows in the DataFrame.\n        \"\"\"\n        return len(self._query_compiler.index)\n\n    @property\n    def schema(self):\n        \"\"\"\n        Get schema of the DataFrame.\n\n        Returns:\n            OrderedDict of column names and dtypes.\n        \"\"\"\n        return OrderedDict(zip(self.columns, self.dtypes, strict=True))\n\n    @property\n    def shape(self):\n        \"\"\"\n        Get shape of the DataFrame.\n\n        Returns:\n            Tuple of (height, width\n        \"\"\"\n        return self.height, self.width\n\n    @property\n    def width(self):\n        \"\"\"\n        Get width of the DataFrame.\n\n        Returns:\n            Number of columns in the DataFrame.\n        \"\"\"\n        return len(self.columns)\n\n    def __repr__(self):\n        \"\"\"\n        Get string representation of the DataFrame.\n\n        Returns:\n            String representation of the DataFrame.\n        \"\"\"\n        return repr(polars.from_pandas(self._query_compiler.to_pandas()))\n\n    def max(self, axis=None):\n        \"\"\"\n        Get the maximum value in each column.\n\n        Args:\n            axis: Axis to get the maximum value on.\n\n        Returns:\n            DataFrame with the maximum values.\n        \"\"\"\n        if axis is None or axis == 0:\n            return self.__constructor__(\n                _query_compiler=self._query_compiler.max(axis=0)\n            )\n        else:\n            return self.max_horizontal()\n\n    def max_horizontal(self):\n        \"\"\"\n        Get the maximum value in each row.\n\n        Returns:\n            DataFrame with the maximum values.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.max(axis=1))\n\n    def _convert_non_numeric_to_null(self):\n        \"\"\"\n        Convert non-numeric columns to null.\n\n        Returns:\n            DataFrame with non-numeric columns converted to null.\n        \"\"\"\n        non_numeric_cols = [\n            c\n            for c, t in zip(self.columns, self.dtypes, strict=True)\n            if not t.is_numeric()\n        ]\n        if len(non_numeric_cols) > 0:\n            return self.__constructor__(\n                _query_compiler=self._query_compiler.write_items(\n                    slice(None),\n                    [self.columns.index(c) for c in non_numeric_cols],\n                    pandas.NA,\n                    need_columns_reindex=False,\n                ).astype({c: self._query_compiler.dtypes[c] for c in non_numeric_cols})\n            )\n        return self._copy()\n\n    def mean(self, *, axis=None, null_strategy=\"ignore\"):\n        \"\"\"\n        Get the mean of each column.\n\n        Args:\n            axis: Axis to get the mean on.\n            null_strategy: Strategy to handle null values.\n\n        Returns:\n            DataFrame with the mean of each column or row.\n        \"\"\"\n        # TODO: this converts non numeric columns to numeric\n        obj = self._convert_non_numeric_to_null()\n        if axis is None or axis == 0:\n            return self.__constructor__(\n                _query_compiler=obj._query_compiler.mean(\n                    axis=0,\n                    skipna=True if null_strategy == \"ignore\" else False,\n                )\n            )\n        else:\n            return obj.mean_horizontal(\n                ignore_nulls=True if null_strategy == \"ignore\" else False\n            )\n\n    def median(self) -> \"DataFrame\":\n        \"\"\"\n        Get the median of each column.\n\n        Returns:\n            DataFrame with the median of each column.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._convert_non_numeric_to_null()._query_compiler.median(\n                0\n            )\n        )\n\n    def mean_horizontal(self, *, ignore_nulls: bool = True):\n        \"\"\"\n        Get the mean of each row.\n\n        Args:\n            ignore_nulls: Whether to ignore null values.\n\n        Returns:\n            DataFrame with the mean of each row.\n        \"\"\"\n        obj = self._convert_non_numeric_to_null()\n        return self.__constructor__(\n            _query_compiler=obj._query_compiler.mean(axis=1, skipna=ignore_nulls)\n        )\n\n    def min(self, axis=None):\n        \"\"\"\n        Get the minimum value in each column.\n\n        Args:\n            axis: Axis to get the minimum value on.\n\n        Returns:\n            DataFrame with the minimum values of each row or column.\n        \"\"\"\n        if axis is None or axis == 0:\n            return self.__constructor__(\n                _query_compiler=self._query_compiler.min(axis=0)\n            )\n        else:\n            return self.max_horizontal()\n\n    def min_horizontal(self):\n        \"\"\"\n        Get the minimum value in each row.\n\n        Returns:\n            DataFrame with the minimum values of each row.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.min(axis=1))\n\n    def product(self):\n        \"\"\"\n        Get the product of each column.\n\n        Returns:\n            DataFrame with the product of each column.\n        \"\"\"\n        obj = self._convert_non_numeric_to_null()\n        return self.__constructor__(_query_compiler=obj._query_compiler.prod(axis=0))\n\n    def quantile(self, quantile: float, interpolation=\"nearest\"):\n        \"\"\"\n        Get the quantile of each column.\n\n        Args:\n            quantile: Quantile to get.\n            interpolation: Interpolation method.\n\n        Returns:\n            DataFrame with the quantile of each column.\n        \"\"\"\n        obj = self._convert_non_numeric_to_null()\n        # TODO: interpolation support\n        return self.__constructor__(\n            _query_compiler=obj._query_compiler.quantile_for_single_value(quantile)\n        )\n\n    def std(self, ddof: int = 1):\n        \"\"\"\n        Get the standard deviation of each column.\n\n        Args:\n            ddof: Delta degrees of freedom.\n\n        Returns:\n            DataFrame with the standard deviation of each column\n        \"\"\"\n        obj = self._convert_non_numeric_to_null()\n        return self.__constructor__(_query_compiler=obj._query_compiler.std(ddof=ddof))\n\n    def sum(self, axis: int | None = None, null_strategy=\"ignore\"):\n        \"\"\"\n        Get the sum of each column.\n\n        Args:\n            axis: Axis to get the sum on.\n            null_strategy: Strategy to handle null values.\n\n        Returns:\n            DataFrame with the sum of each column or row.\n        \"\"\"\n        obj = self._convert_non_numeric_to_null()\n        if axis is None or axis == 0:\n            return self.__constructor__(\n                _query_compiler=obj._query_compiler.sum(\n                    axis=0,\n                    skipna=True if null_strategy == \"ignore\" else False,\n                )\n            )\n        else:\n            return obj.sum_horizontal(\n                ignore_nulls=True if null_strategy == \"ignore\" else False\n            )\n\n    def sum_horizontal(self, *, ignore_nulls: bool = True):\n        \"\"\"\n        Get the sum of each row.\n\n        Args:\n            ignore_nulls: Whether to ignore null values.\n\n        Returns:\n            DataFrame with the sum of each row.\n        \"\"\"\n        # TODO: if there are strings in the row, polars will append numeric values\n        # this behavior may not be intended so doing this instead (for now)\n        obj = self._convert_non_numeric_to_null()\n        return self.__constructor__(\n            _query_compiler=obj._query_compiler.sum(axis=1, skipna=ignore_nulls)\n        )\n\n    def var(self, ddof: int = 1):\n        \"\"\"\n        Get the variance of each column.\n\n        Args:\n            ddof: Delta degrees of freedom.\n\n        Returns:\n            DataFrame with the variance of each column.\n        \"\"\"\n        obj = self._convert_non_numeric_to_null()\n        return self.__constructor__(_query_compiler=obj._query_compiler.var(ddof=ddof))\n\n    def approx_n_unique(self):\n        \"\"\"\n        Get the approximate number of unique values in each column.\n\n        Returns:\n            DataFrame with the approximate number of unique values in each column.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.nunique())\n\n    def describe(self, percentiles: Sequence[float] | float = (0.25, 0.5, 0.75)):\n        \"\"\"\n        Get the descriptive statistics of each column.\n\n        Args:\n            percentiles: Percentiles to get.\n\n        Returns:\n            DataFrame with the descriptive statistics of each column.\n        \"\"\"\n        return self.__constructor__(\n            self.__constructor__(\n                _query_compiler=self._query_compiler.describe(\n                    percentiles=np.array(percentiles)\n                ).astype(\n                    {\n                        k: str\n                        for k, v in zip(self.columns, self.dtypes, strict=True)\n                        if v == polars.String\n                    }\n                )\n            )\n            .to_pandas()\n            .loc[\n                [\n                    \"count\",\n                    # \"null_count\",  TODO: support null_count in describe\n                    \"mean\",\n                    \"std\",\n                    \"min\",\n                    \"25%\",\n                    \"50%\",\n                    \"75%\",\n                    \"max\",\n                ]\n            ]\n            .reset_index()\n            .rename({\"index\": \"statistic\"})\n        )\n\n    def estimated_size(self, unit=\"b\"):\n        \"\"\"\n        Get the estimated amount of memory used by the DataFrame.\n\n        Args:\n            unit: Unit of the memory size.\n\n        Returns:\n            DataFrame with the extimated memory usage.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.memory_usage())\n\n    def glimpse(\n        self,\n        *,\n        max_items_per_column: int = 10,\n        max_colname_length: int = 50,\n        return_as_string: bool = False,\n    ) -> str | None:\n        raise NotImplementedError(\"not yet\")\n\n    def n_unique(self, subset=None) -> int:\n        \"\"\"\n        Get the number of unique values in each column.\n\n        Args:\n            subset: Columns to get the number of unique values for.\n\n        Returns:\n            Number of unique values in each column.\n        \"\"\"\n        if subset is not None:\n            raise NotImplementedError(\"not yet\")\n        return (\n            self.is_unique()._query_compiler.sum(axis=0).to_pandas().squeeze(axis=None)\n        )\n\n    def null_count(self) -> \"DataFrame\":\n        \"\"\"\n        Get the number of null values in each column.\n\n        Returns:\n            DataFrame with the number of null values in each column.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.isna().sum(axis=0)\n        )\n\n    def to_pandas(self):\n        \"\"\"\n        Convert the DataFrame to Pandas format.\n\n        Returns:\n            modin.pandas representation of the DataFrame.\n        \"\"\"\n        return ModinPandasDataFrame(query_compiler=self._query_compiler.copy())\n\n    def group_by(\n        self,\n        *by,\n        maintain_order: bool = False,\n        **named_by,\n    ) -> \"GroupBy\":\n        \"\"\"\n        Group the DataFrame by the given columns.\n\n        Args:\n            by: Columns to group by.\n            maintain_order: Whether to maintain the order of the groups.\n            named_by: Named columns to group by.\n\n        Returns:\n            GroupBy object.\n        \"\"\"\n        from modin.polars.groupby import GroupBy\n\n        return GroupBy(self, *by, maintain_order=maintain_order, **named_by)\n\n    def drop(self, *columns, strict: bool = True) -> \"DataFrame\":\n        \"\"\"\n        Drop the given columns.\n\n        Args:\n            columns: Columns to drop.\n            strict: Whether to raise an error if a column is not found.\n\n        Returns:\n            DataFrame with the columns dropped.\n        \"\"\"\n        if strict:\n            for c in columns:\n                if c not in self.columns:\n                    raise KeyError(c)\n        columns = list(columns) if not isinstance(columns[0], list) else columns[0]\n        return self.__constructor__(_query_compiler=self._query_compiler.drop(columns))\n\n    def drop_in_place(self, name: str) -> \"DataFrame\":\n        \"\"\"\n        Drop the given column in place and return the dropped column.\n\n        Args:\n            name: Column to drop.\n\n        Returns:\n            The column that was dropped from the DataFrame.\n        \"\"\"\n        col_to_return = self[name]\n        self._query_compiler = self._query_compiler.drop([name])\n        return col_to_return\n\n    def get_column(self, name: str) -> \"Series\":\n        \"\"\"\n        Get the column by name.\n\n        Args:\n            name: Name of the column to get.\n\n        Returns:\n            Series with the column.\n        \"\"\"\n        return self[name]\n\n    def get_column_index(self, name: str) -> int:\n        \"\"\"\n        Find the index of the column by name.\n\n        Args:\n            name: Name of the column to find.\n\n        Returns:\n            Index of the column.\n        \"\"\"\n        return self.columns.index(name)\n\n    def get_columns(self) -> list[\"Series\"]:\n        \"\"\"\n        Get the columns of the DataFrame.\n\n        Returns:\n            List of Series with the columns.\n        \"\"\"\n        return [self[name] for name in self.columns]\n\n    def group_by_dynamic(\n        self,\n        index_column,\n        *,\n        every,\n        period,\n        offset,\n        truncate,\n        include_boundaries,\n        closed,\n        label,\n        group_by,\n        start_by,\n        check_sorted,\n    ):\n        raise NotImplementedError(\"not yet\")\n\n    def hstack(self, columns, *, inplace: bool = False) -> \"DataFrame\":\n        \"\"\"\n        Stack the given columns horizontally.\n\n        Args:\n            columns: Columns to stack.\n            inplace: Whether to stack the columns in place.\n\n        Returns:\n            DataFrame with the columns stacked horizontally.\n        \"\"\"\n        if isinstance(columns, DataFrame):\n            columns = columns.get_columns()\n        result_query_compiler = self._query_compiler.concat(\n            axis=1, other=[c._query_compiler for c in columns]\n        )\n        if inplace:\n            self._query_compiler = result_query_compiler\n            return self\n        return self.__constructor__(_query_compiler=result_query_compiler)\n\n    def insert_column(self, index: int, column: \"Series\") -> \"DataFrame\":\n        \"\"\"\n        Insert the given column at the given index.\n\n        Args:\n            index: Index to insert the column at.\n            column: Column to insert.\n            name: Name of the column to insert.\n\n        Returns:\n            DataFrame with the column inserted.\n        \"\"\"\n        return self.__constructor__(\n            self._query_compiler.insert(index, column.name, column._query_compiler)\n        )\n\n    def item(self, row: int | None = None, column: str | int | None = None) -> Any:\n        \"\"\"\n        Get the value at the given row and column.\n\n        Args:\n            row: Row to get the value from.\n            column: Column to get the value from.\n\n        Returns:\n            Value at the given row and column.\n        \"\"\"\n        if row is None:\n            row = 0\n        if column is None:\n            column = 0\n        if isinstance(column, str):\n            column = self.columns.index(column)\n        return (\n            self._query_compiler.take_2d_labels(row, column)\n            .to_pandas()\n            .squeeze(axis=None)\n        )\n\n    def iter_columns(self) -> Iterator[\"Series\"]:\n        \"\"\"\n        Iterate over the columns of the DataFrame.\n\n        Returns:\n            Iterator over the columns.\n        \"\"\"\n        return iter(self.get_columns())\n\n    def iter_rows(\n        self,\n        *,\n        named: bool = False,\n        buffer_size: int = 512,\n    ) -> Iterator[tuple[Any]] | Iterator[dict[str, Any]]:\n        \"\"\"\n        Iterate over the rows of the DataFrame.\n\n        Returns:\n            Iterator over the rows.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def iter_slices(\n        self,\n        n_rows: int = 10000,\n    ) -> Iterator[\"DataFrame\"]:\n        \"\"\"\n        Iterate over the slices of the DataFrame.\n\n        Args:\n            n_rows: Number of rows in each slice.\n\n        Returns:\n            Iterator over the slices.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def join(\n        self,\n        other: \"DataFrame\",\n        on: str | list[str] | None = None,\n        how: str = \"inner\",\n        *,\n        left_on: str | list[str] | None = None,\n        right_on: str | list[str] | None = None,\n        suffix: str = \"_right\",\n        validate=\"m:m\",\n        join_nulls: bool = False,\n        coalesce: bool | None = None,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Join the DataFrame with another DataFrame.\n\n        Args:\n            other: DataFrame to join with.\n            on: Column to join on.\n            how: How to join the DataFrames.\n\n        Returns:\n            Joined DataFrame.\n        \"\"\"\n        if how == \"full\":\n            how = \"outer\"\n        elif how == \"cross\":\n            raise NotImplementedError(\"not yet\")\n        elif how == \"semi\":\n            how = \"right\"\n        elif how == \"anti\":\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.merge(\n                other._query_compiler,\n                on=on,\n                how=how,\n                suffixes=(\"\", suffix),\n                left_on=left_on,\n                right_on=right_on,\n            )\n        )\n\n    def join_asof(\n        self,\n        other: \"DataFrame\",\n        *,\n        left_on: str | None = None,\n        right_on: str | None = None,\n        on: str | None = None,\n        by_left: str | Sequence[str] | None = None,\n        by_right: str | Sequence[str] | None = None,\n        by: str | Sequence[str] | None = None,\n        strategy: str = \"backward\",\n        suffix: str = \"_right\",\n        tolerance: str,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Join the DataFrame with another DataFrame using asof logic.\n\n        Args:\n            other: DataFrame to join with.\n            left_on: Column to join on in the left DataFrame.\n            right_on: Column to join on in the right DataFrame.\n            on: Column to join on in both DataFrames.\n            by_left: Columns to join on in the left DataFrame.\n            by_right: Columns to join on in the right DataFrame.\n            by: Columns to join on in both DataFrames.\n            strategy: Strategy to use for the join.\n            suffix: Suffix to add to the columns.\n            tolerance: Tolerance for the join.\n\n        Returns:\n            Joined DataFrame.\n        \"\"\"\n        if on is not None and left_on is None and right_on is None:\n            left_on = right_on = on\n        if by is not None and by_left is None and by_right is None:\n            by_left = by_right = by\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.merge_asof(\n                other._query_compiler,\n                left_on=left_on,\n                right_on=right_on,\n                left_by=by_left,\n                right_by=by_right,\n                direction=strategy,\n                suffixes=(\"\", suffix),\n                tolerance=tolerance,\n            )\n        )\n\n    def melt(\n        self,\n        id_vars=None,\n        value_vars=None,\n        variable_name: str | None = None,\n        value_name: str | None = None,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Melt the DataFrame.\n\n        Args:\n            id_vars: Columns to keep.\n            value_vars: Columns to melt.\n            variable_name: Name of the variable column.\n            value_name: Name of the value column.\n\n        Returns:\n            Melted DataFrame.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.melt(\n                id_vars=id_vars,\n                value_vars=value_vars,\n                var_name=variable_name,\n                value_name=value_name,\n            )\n        )\n\n    def merge_sorted(self, other: \"DataFrame\", on: str | list[str]) -> \"DataFrame\":\n        # TODO: support natural join + sort\n        raise NotImplementedError(\"not yet\")\n\n    def partition_by(\n        self,\n        by,\n        *more_by,\n        maintain_order: bool = True,\n        include_key: bool = True,\n        as_dict: bool = False,\n    ) -> list[\"DataFrame\"] | dict[Any, \"DataFrame\"]:\n        \"\"\"\n        Partition the DataFrame by the given columns.\n\n        Args:\n            by: Columns to partition by.\n            more_by: Additional columns to partition by.\n            maintain_order: Whether to maintain the order of the partitions.\n            include_key: Whether to include the partition key.\n            as_dict: Whether to return the partitions as a dictionary.\n\n        Returns:\n            List of DataFrames or dictionary of DataFrames.\n        \"\"\"\n        if isinstance(by, str):\n            by = [by, *more_by]\n        elif isinstance(by, list):\n            by = [*by, *more_by]\n        if as_dict:\n            return {\n                k: self.__constructor__(v)\n                for k, v in self.to_pandas()\n                .groupby(by, as_index=not include_key)\n                .groups\n            }\n        else:\n            return [\n                self.__constructor__(g)\n                for g in self.to_pandas().groupby(by, as_index=not include_key)\n            ]\n\n    def pipe(self, function, *args, **kwargs) -> Any:\n        return function(self, *args, **kwargs)\n\n    def pivot(\n        self,\n        *,\n        values,\n        index,\n        columns,\n        aggregate_function=None,\n        maintain_order: bool = True,\n        sort_columns: bool = False,\n        separator: str = \"_\",\n    ) -> \"DataFrame\":\n        \"\"\"\n        Pivot the DataFrame.\n\n        Args:\n            values: Values to pivot.\n            index: Index columns.\n            columns: Columns to pivot.\n            aggregate_function: Function to aggregate the values.\n            maintain_order: Whether to maintain the order of the pivot.\n            sort_columns: Whether to sort the columns.\n            separator: Separator for the columns.\n\n        Returns:\n            Pivoted DataFrame.\n        \"\"\"\n        # TODO: handle maintain_order, sort_columns, separator\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.pivot(\n                values=values,\n                index=index,\n                columns=columns,\n                agg=aggregate_function,\n            )\n        )\n\n    def rechunk(self) -> \"DataFrame\":\n        \"\"\"\n        Rechunk the DataFrame into the given number of partitions.\n\n        Returns:\n            Rechunked DataFrame.\n        \"\"\"\n        return self._copy()\n\n    def rename(self, mapping: dict[str, str] | callable) -> \"DataFrame\":\n        \"\"\"\n        Rename the columns of the DataFrame.\n\n        Args:\n            mapping: Mapping of old names to new names.\n\n        Returns:\n            DataFrame with the columns renamed.\n        \"\"\"\n        if callable(mapping):\n            mapping = {c: mapping(c) for c in self.columns}\n        # TODO: add a query compiler method for `rename`\n        new_columns = {c: mapping.get(c, c) for c in self.columns}\n        new_obj = self._copy()\n        new_obj.columns = new_columns\n        return new_obj\n\n    def replace_column(self, index: int, column: \"Series\") -> \"DataFrame\":\n        \"\"\"\n        Replace the column at the given index with the new column.\n\n        Args:\n            index: Index of the column to replace.\n            column: New column to replace with.\n\n        Returns:\n            DataFrame with the column replaced.\n        \"\"\"\n        self._query_compiler = self._query_compiler.drop([self.columns[index]]).insert(\n            index,\n            column.name,\n            column._query_compiler,\n        )\n        return self\n\n    def reverse(self) -> \"DataFrame\":\n        \"\"\"\n        Reverse the DataFrame.\n\n        Returns:\n            Reversed DataFrame.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.getitem_row_array(\n                slice(None, None, -1)\n            )\n        )\n\n    def rolling(self, index_column, *, period, offset, closed, group_by, check_sorted):\n        raise NotImplementedError(\"not yet\")\n\n    def row(\n        self, index: int | None = None, *, by_predicate=None, named: bool = False\n    ) -> tuple[Any] | dict[str, Any]:\n        \"\"\"\n        Get the row at the given index.\n\n        Args:\n            index: Index of the row to get.\n            by_predicate: Predicate to get the row by.\n            named: Whether to return the row as a dictionary.\n\n        Returns:\n            Row at the given index.\n        \"\"\"\n        if index is not None:\n            if named:\n                return dict(self.to_pandas().iloc[index])\n            else:\n                return tuple(self.to_pandas().iloc[index])\n        else:\n            # TODO: support expressions\n            raise NotImplementedError(\"not yet\")\n\n    def rows(self, *, named: bool = False) -> list[tuple[Any]] | list[dict[str, Any]]:\n        raise NotImplementedError(\"not yet\")\n\n    def rows_by_key(\n        self,\n        key: Any,\n        *,\n        named: bool = False,\n        include_key: bool = False,\n        unique: bool = False,\n    ) -> dict[Any, Iterable[Any]]:\n        raise NotImplementedError(\"not yet\")\n\n    def select(self, *exprs, **named_exprs) -> \"DataFrame\":\n        # TODO: support expressions\n        raise NotImplementedError(\"not yet\")\n\n    def select_seq(self, *exprs, **named_exprs) -> \"DataFrame\":\n        # TODO: support expressions\n        raise NotImplementedError(\"not yet\")\n\n    def set_sorted(\n        self, column: str | Iterable[str], *more_columns: str, descending: bool = False\n    ) -> \"DataFrame\":\n        \"\"\"\n        Set the columns to be sorted.\n\n        Args:\n            column: Column to sort by.\n            more_columns: Additional columns to sort by.\n            descending: Whether to sort in descending order.\n\n        Returns:\n            DataFrame with the columns sorted.\n        \"\"\"\n        if len(more_columns) > 0:\n            if isinstance(column, Iterable):\n                column = [*column, *more_columns]\n            else:\n                column = [column, *more_columns]\n        if isinstance(column, str):\n            column = [column]\n        new_sorted_columns = [c in column for c in self.columns]\n        obj = self._copy()\n        obj._sorted_columns = new_sorted_columns\n        return obj\n\n    def sql(self, query: str, *, table_name: str = \"self\") -> \"DataFrame\":\n        raise NotImplementedError(\"not yet\")\n\n    def to_series(self, index: int = 0) -> \"Series\":\n        \"\"\"\n        Convert the DataFrame at index provided to a Series.\n\n        Args:\n            index: Index of the column to convert to a Series.\n\n        Returns:\n            Series representation of the DataFrame at index provided.\n        \"\"\"\n        return self[self.columns[index]]\n\n    def transpose(\n        self,\n        *,\n        include_header: bool = False,\n        header_name: str = \"column\",\n        column_names: str | Sequence[str] | None = None,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Transpose the DataFrame.\n\n        Args:\n            include_header: Whether to include a header.\n            header_name: Name of the header.\n            column_names: Names of the columns.\n\n        Returns:\n            Transposed DataFrame.\n        \"\"\"\n        result = self.__constructor__(_query_compiler=self._query_compiler.transpose())\n        if column_names is not None:\n            result.columns = column_names\n        elif include_header:\n            result.columns = [f\"{header_name}_{i}\" for i in range(result.width)]\n        return result\n\n    def unnest(self, columns, *more_columns) -> \"DataFrame\":\n        \"\"\"\n        Unnest the given columns.\n\n        Args:\n            columns: Columns to unnest.\n            more_columns: Additional columns to unnest.\n\n        Returns:\n            DataFrame with the columns unnested.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def unstack(\n        self,\n        step: int,\n        how: str = \"vertical\",\n        columns=None,\n        fill_values: list[Any] | None = None,\n    ):\n        \"\"\"\n        Unstack the DataFrame.\n\n        Args:\n            step: Step to unstack by.\n            how: How to unstack the DataFrame.\n            columns: Columns to unstack.\n            fill_values: Values to fill the unstacked DataFrame with.\n\n        Returns:\n            Unstacked DataFrame.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def update(\n        self,\n        other: \"DataFrame\",\n        on: str | Sequence[str] | None = None,\n        how: Literal[\"left\", \"inner\", \"full\"] = \"left\",\n        *,\n        left_on: str | Sequence[str] | None = None,\n        right_on: str | Sequence[str] | None = None,\n        include_nulls: bool = False,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Update the DataFrame with another DataFrame.\n\n        Args:\n            other: DataFrame to update with.\n            on: Column to update on.\n            how: How to update the DataFrame.\n\n        Returns:\n            Updated DataFrame.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def upsample(\n        self,\n        time_column: str,\n        *,\n        every: str,\n        offset: str | None = None,\n        group_by: str | Sequence[str] | None = None,\n        maintain_order: bool = False,\n    ) -> \"DataFrame\":\n        raise NotImplementedError(\"not yet\")\n\n    def vstack(self, other: \"DataFrame\", *, in_place: bool = False) -> \"DataFrame\":\n        \"\"\"\n        Stack the given DataFrame vertically.\n\n        Args:\n            other: DataFrame to stack.\n            in_place: Whether to stack the DataFrames in place.\n\n        Returns:\n            Stacked DataFrame.\n        \"\"\"\n        if in_place:\n            self._query_compiler = self._query_compiler.concat(\n                axis=0, other=other._query_compiler\n            )\n            return self\n        else:\n            return self.__constructor__(\n                _query_compiler=self._query_compiler.concat(\n                    axis=0, other=other._query_compiler\n                )\n            )\n\n    def with_columns(self, *exprs, **named_exprs) -> \"DataFrame\":\n        # TODO: support expressions\n        raise NotImplementedError(\"not yet\")\n\n    def with_columns_seq(self, *exprs, **named_exprs) -> \"DataFrame\":\n        # TODO: support expressions\n        raise NotImplementedError(\"not yet\")\n\n    def with_row_index(self, name: str = \"index\", offset: int = 0) -> \"DataFrame\":\n        \"\"\"\n        Add a row index to the DataFrame.\n\n        Args:\n            name: Name of the row index.\n            offset: Offset for the row index.\n\n        Returns:\n            DataFrame with the row index added.\n        \"\"\"\n        if offset != 0:\n            obj = self._copy()\n            obj.index = obj.index + offset\n        result = self.__constructor__(\n            _query_compiler=self._query_compiler.reset_index(drop=False)\n        )\n        result.columns = [name, *self.columns]\n        return result\n\n    with_row_count = with_row_index\n\n    def map_rows(\n        self, function: callable, return_dtype=None, *, inference_size: int = 256\n    ) -> \"DataFrame\":\n        \"\"\"\n        Apply the given function to the DataFrame.\n\n        Args:\n            function: Function to apply.\n            return_dtype: Return type of the function.\n            inference_size: Size of the inference.\n\n        Returns:\n            DataFrame with the function applied.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.apply(function, axis=1)\n        )\n\n    def corr(self, **kwargs: Any) -> \"DataFrame\":\n        \"\"\"\n        Compute the correlation of the DataFrame.\n\n        Returns:\n            DataFrame with the correlation.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.corr(**kwargs))\n\n    def lazy(self) -> \"LazyFrame\":\n        \"\"\"\n        Convert the DataFrame to a lazy DataFrame.\n\n        Returns:\n            Lazy DataFrame.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    @classmethod\n    def deserialize(cls, source) -> \"DataFrame\":\n        \"\"\"\n        Deserialize the DataFrame.\n\n        Args:\n            source: Source to deserialize.\n\n        Returns:\n            Deserialized DataFrame.\n        \"\"\"\n        return cls(polars.DataFrame.deserialize(source))\n\n    def serialize(self, file=None) -> str | None:\n        \"\"\"\n        Serialize the DataFrame.\n\n        Args:\n            file: File to serialize to.\n\n        Returns:\n            Serialized DataFrame.\n        \"\"\"\n        return polars.from_pandas(self._query_compiler.to_pandas()).serialize(file)\n\n    @property\n    def style(self):\n        \"\"\"\n        Create a Great Table for styling.\n\n        Returns:\n            GreatTable object.\n        \"\"\"\n        return self._to_polars().style\n\n    def to_dict(\n        self, *, as_series: bool = True\n    ) -> dict[str, \"Series\"] | dict[str, list[Any]]:\n        \"\"\"\n        Convert the DataFrame to a dictionary representation.\n\n        Args:\n            as_series: Whether to convert the columns to Series.\n\n        Returns:\n            Dictionary representation of the DataFrame.\n        \"\"\"\n        if as_series:\n            return {name: self[name] for name in self.columns}\n        else:\n            return polars.from_pandas(self._query_compiler.to_pandas()).to_dict(\n                as_series=as_series\n            )\n\n    def to_dicts(self) -> list[dict[str, Any]]:\n        \"\"\"\n        Convert the DataFrame to a list of dictionaries.\n\n        Returns:\n            List of dictionaries.\n        \"\"\"\n        return self._to_polars().to_dicts()\n\n    def to_init_repr(self, n: int = 1000) -> str:\n        \"\"\"\n        Get the string representation of the DataFrame for initialization.\n\n        Returns:\n            String representation of the DataFrame for initialization.\n        \"\"\"\n        return self._to_polars().to_init_repr(n)\n\n    def to_struct(self, name: str = \"\") -> \"Series\":\n        \"\"\"\n        Convert the DataFrame to a struct.\n\n        Args:\n            name: Name of the struct.\n\n        Returns:\n            Series representation of the DataFrame as a struct.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def unpivot(\n        self,\n        on,\n        *,\n        index,\n        variable_name: str | None = None,\n        value_name: str | None = None,\n    ) -> \"DataFrame\":\n        \"\"\"\n        Unpivot a DataFrame from wide to long format.\n\n        Args:\n            on: Columns to unpivot.\n            index: Columns to keep.\n            variable_name: Name of the variable column.\n            value_name: Name of the value column.\n\n        Returns:\n            Unpivoted DataFrame.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.melt(\n                on=on,\n                index=index,\n                var_name=variable_name,\n                value_name=value_name,\n            )\n        )\n\n    write_avro = write_clipboard = write_csv = write_database = write_delta = (\n        write_excel\n    ) = write_ipc = write_ipc_stream = write_json = write_ndjson = write_parquet = (\n        write_parquet_partitioned\n    ) = lambda *args, **kwargs: (_ for _ in ()).throw(NotImplementedError(\"not yet\"))\n\n    def clear(self, n: int = 0) -> \"DataFrame\":\n        \"\"\"\n        Create an empty (n=0) or null filled (n>0) DataFrame.\n\n        Args:\n            n: Number of rows to create.\n\n        Returns:\n            Empty or null filled DataFrame.\n        \"\"\"\n        return self.__constructor__(polars.DataFrame(schema=self.schema).clear(n=n))\n\n    def collect_schema(self) -> dict[str, str]:\n        \"\"\"\n        Collect the schema of the DataFrame.\n\n        Returns:\n            Dictionary of the schema.\n        \"\"\"\n        return self.schema\n\n    def fold(self, operation: callable) -> \"Series\":\n        \"\"\"\n        Fold the DataFrame.\n\n        Args:\n            operation: Operation to fold the DataFrame with.\n\n        Returns:\n            Series with the folded DataFrame.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def hash_rows(\n        self,\n        seed: int = 0,\n        seed_1: int | None = None,\n        seed_2: int | None = None,\n        seed_3: int | None = None,\n    ) -> \"Series\":\n        raise NotImplementedError(\"not yet\")\n"
  },
  {
    "path": "modin/polars/groupby.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Implement GroupBy public API as pandas does.\"\"\"\n\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from modin.polars import DataFrame\n\n\nclass GroupBy:\n\n    def __init__(\n        self,\n        df: \"DataFrame\",\n        *by,\n        maintain_order: bool = False,\n        **named_by,\n    ) -> None:\n        self.df = df\n        if len(by) == 1:\n            self.by = by[0]\n        else:\n            if all(isinstance(b, str) and b in self.df.columns for b in by):\n                self.by = self.df[list(by)]._query_compiler\n            elif all(isinstance(b, type(self._df._query_compiler)) for b in by):\n                self.by = by\n            else:\n                raise NotImplementedError(\"not yet\")\n        self.named_by = named_by\n        self.maintain_order = maintain_order\n\n    def agg(self, *aggs, **named_aggs):\n        raise NotImplementedError(\"not yet\")\n\n    def all(self):\n        raise NotImplementedError(\"not yet\")\n\n    def map_groups(self, function) -> \"DataFrame\":\n        raise NotImplementedError(\"not yet\")\n\n    apply = map_groups\n\n    def count(self):\n        return self.len(name=\"count\")\n\n    def first(self) -> \"DataFrame\":\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_first(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=True,\n                ),\n                agg_args=(),\n                agg_kwargs={},\n                drop=False,\n            ).reset_index(drop=False)\n        )\n\n    def head(self, n: int = 5):\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_head(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=False,\n                ),\n                agg_args=(),\n                agg_kwargs=dict(n=n),\n                drop=False,\n            )\n        )\n\n    def last(self) -> \"DataFrame\":\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_last(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=True,\n                ),\n                agg_args=(),\n                agg_kwargs={},\n                drop=False,\n            ).reset_index(drop=False)\n        )\n\n    def len(self, name: str | None = None) -> \"DataFrame\":\n        if name is None:\n            name = \"len\"\n        result = self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_size(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=False,\n                ),\n                agg_args=(),\n                agg_kwargs={},\n                drop=False,\n            )\n        )\n        result._query_compiler.columns = [\n            c if c != \"size\" else name for c in result.columns\n        ]\n        return result\n\n    def max(self) -> \"DataFrame\":\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_max(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=False,\n                ),\n                agg_args=(),\n                agg_kwargs={},\n                drop=False,\n            )\n        )\n\n    def mean(self) -> \"DataFrame\":\n        # TODO: Non numeric columns are dropped, but in Polars they are converted to null\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_mean(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=True,\n                ),\n                agg_args=(),\n                agg_kwargs=dict(numeric_only=True),\n                drop=False,\n            ).reset_index(drop=False)\n        )\n\n    def median(self) -> \"DataFrame\":\n        # TODO: Non numeric columns are dropped, but in Polars they are converted to null\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_median(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=True,\n                ),\n                agg_args=(),\n                agg_kwargs=dict(numeric_only=True),\n                drop=False,\n            ).reset_index(drop=False)\n        )\n\n    def min(self) -> \"DataFrame\":\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_min(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=False,\n                ),\n                agg_args=(),\n                agg_kwargs={},\n                drop=False,\n            )\n        )\n\n    def n_unique(self) -> \"DataFrame\":\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_nunique(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=False,\n                ),\n                agg_args=(),\n                agg_kwargs={},\n                drop=False,\n            )\n        )\n\n    def quantile(self, quantile: float, interpolation=\"nearest\") -> \"DataFrame\":\n        # TODO: Non numeric columns are dropped, but in Polars they are converted to null\n        # TODO: interpolation types not yet supported\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_quantile(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=True,\n                ),\n                agg_args=(),\n                agg_kwargs=dict(numeric_only=True, q=quantile),\n                drop=False,\n            ).reset_index(drop=False)\n        )\n\n    def sum(self) -> \"DataFrame\":\n        # TODO: Non numeric columns are dropped, but in Polars they are converted to null\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_sum(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=True,\n                ),\n                agg_args=(),\n                agg_kwargs=dict(numeric_only=True),\n                drop=False,\n            ).reset_index(drop=False)\n        )\n\n    def tail(self, n: int = 5):\n        return self.df.__constructor__(\n            _query_compiler=self.df._query_compiler.groupby_tail(\n                self.by,\n                axis=0,\n                groupby_kwargs=dict(\n                    sort=not self.maintain_order,\n                    as_index=False,\n                ),\n                agg_args=(),\n                agg_kwargs=dict(n=n),\n                drop=False,\n            )\n        )\n"
  },
  {
    "path": "modin/polars/lazyframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom modin.polars.base import BasePolarsDataset\n\n\nclass LazyFrame(BasePolarsDataset):\n    \"\"\"\n    Stub for Lazy Frame implementation.\n    \"\"\"\n\n    pass\n"
  },
  {
    "path": "modin/polars/series.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Module houses `Series` class, that is distributed version of `polars.Series`.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, Any, Sequence\n\nimport numpy as np\nimport pandas\nimport polars\nfrom polars._utils.various import no_default\n\nfrom modin.core.storage_formats.base.query_compiler import BaseQueryCompiler\nfrom modin.error_message import ErrorMessage\nfrom modin.pandas import Series as ModinPandasSeries\nfrom modin.pandas.io import from_pandas\nfrom modin.polars.base import BasePolarsDataset\n\nif TYPE_CHECKING:\n    from numpy.typing import ArrayLike\n    from polars import PolarsDataType\n\n    from modin.polars import DataFrame\n\n\nclass Series(BasePolarsDataset):\n    def __init__(\n        self,\n        name: str | \"ArrayLike\" | None = None,\n        values: \"ArrayLike\" | None = None,\n        dtype: \"PolarsDataType | None\" = None,\n        *,\n        strict: \"bool\" = True,\n        nan_to_null: \"bool\" = False,\n        dtype_if_empty: \"PolarsDataType\" = polars.Null,\n        _query_compiler: BaseQueryCompiler | None = None,\n    ) -> None:\n        if _query_compiler is None:\n            if isinstance(values, ModinPandasSeries):\n                self._query_compiler = values._query_compiler.copy()\n            else:\n                self._query_compiler: BaseQueryCompiler = from_pandas(\n                    polars.Series(\n                        name=name,\n                        values=values,\n                        dtype=dtype,\n                        strict=strict,\n                        nan_to_null=nan_to_null,\n                        dtype_if_empty=dtype_if_empty,\n                    )\n                    .to_pandas()\n                    .to_frame()\n                )._query_compiler\n        else:\n            self._query_compiler: BaseQueryCompiler = _query_compiler\n\n    def __repr__(self):\n        return repr(\n            polars.from_pandas(self._query_compiler.to_pandas().squeeze(axis=1))\n        )\n\n    _sorted = False\n    _descending = None\n\n    def to_pandas(self) -> ModinPandasSeries:\n        return ModinPandasSeries(query_compiler=self._query_compiler)\n\n    def arg_max(self) -> int:\n        \"\"\"\n        Get the index of the maximum value.\n\n        Returns:\n            Index of the maximum value.\n        \"\"\"\n        return self.to_pandas().argmax()\n\n    def arg_min(self) -> int:\n        \"\"\"\n        Get the index of the minimum value.\n\n        Returns:\n            Index of the minimum value.\n        \"\"\"\n        return self.to_pandas().argmin()\n\n    def implode(self) -> \"Series\":\n        \"\"\"\n        Aggregate values into a list.\n\n        Returns:\n            Imploded Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def max(self) -> Any:\n        \"\"\"\n        Get the maximum value.\n\n        Returns:\n            Maximum value.\n        \"\"\"\n        return self.to_pandas().max()\n\n    def min(self) -> Any:\n        \"\"\"\n        Get the minimum value.\n\n        Returns:\n            Minimum value.\n        \"\"\"\n        return self.to_pandas().min()\n\n    def mean(self) -> Any:\n        \"\"\"\n        Get the mean value.\n\n        Returns:\n            Mean value.\n        \"\"\"\n        return self.to_pandas().mean()\n\n    def median(self) -> Any:\n        \"\"\"\n        Get the median value.\n\n        Returns:\n            Median value.\n        \"\"\"\n        return self.to_pandas().median()\n\n    def mode(self) -> Any:\n        \"\"\"\n        Get the mode value.\n\n        Returns:\n            Mode value.\n        \"\"\"\n        return self.to_pandas().mode()\n\n    def nan_max(self) -> Any:\n        \"\"\"\n        Get the maximum value, ignoring NaN values.\n\n        Returns:\n            Maximum value.\n        \"\"\"\n        return self.to_pandas().max(skipna=True)\n\n    def nan_min(self) -> Any:\n        \"\"\"\n        Get the minimum value, ignoring NaN values.\n\n        Returns:\n            Minimum value.\n        \"\"\"\n        return self.to_pandas().min(skipna=True)\n\n    def product(self) -> Any:\n        \"\"\"\n        Get the product of all values.\n\n        Returns:\n            Product of all values.\n        \"\"\"\n        return self.to_pandas().product()\n\n    def quantile(self, quantile: float, interpolation: str = \"nearest\") -> float | None:\n        \"\"\"\n        Get the quantile value.\n\n        Args:\n            quantile: Quantile to calculate.\n            interpolation: Interpolation method.\n\n        Returns:\n            Quantile value.\n        \"\"\"\n        return self.to_pandas().quantile(quantile, interpolation=interpolation)\n\n    def std(self, ddof: int = 1) -> float:\n        \"\"\"\n        Get the standard deviation.\n\n        Args:\n            ddof: Delta Degrees of Freedom.\n\n        Returns:\n            Standard deviation.\n        \"\"\"\n        return self.to_pandas().std(ddof=ddof)\n\n    def sum(self) -> Any:\n        \"\"\"\n        Get the sum of all values.\n\n        Returns:\n            Sum of all values.\n        \"\"\"\n        return self.to_pandas().sum()\n\n    def var(self, ddof: int = 1) -> float:\n        \"\"\"\n        Get the variance.\n\n        Args:\n            ddof: Delta Degrees of Freedom.\n\n        Returns:\n            Variance.\n        \"\"\"\n        return self.to_pandas().var(ddof=ddof)\n\n    @property\n    def arr(self) -> polars.series.array.ArrayNameSpace:\n        \"\"\"\n        Get the underlying array.\n\n        Returns:\n            Underlying array.\n        \"\"\"\n        return polars.from_pandas(self._query_compiler.to_pandas().squeeze(axis=1)).arr\n\n    @property\n    def dtype(self) -> polars.datatypes.DataType:\n        \"\"\"\n        Get the data type.\n\n        Returns:\n            Data type.\n        \"\"\"\n        return polars.from_pandas(\n            pandas.Series().astype(self._query_compiler.dtypes.iloc[0])\n        ).dtype\n\n    @property\n    def name(self) -> str:\n        \"\"\"\n        Get the name.\n\n        Returns:\n            Name.\n        \"\"\"\n        return self._query_compiler.columns[0]\n\n    @property\n    def shape(self) -> tuple[int]:\n        \"\"\"\n        Get the shape.\n\n        Returns:\n            Shape.\n        \"\"\"\n        return (len(self._query_compiler.index),)\n\n    flags = []\n\n    @property\n    def bin(self):\n        raise NotImplementedError(\"not yet\")\n\n    def all(self) -> bool:\n        \"\"\"\n        Check if all values are True.\n\n        Returns:\n            True if all values are True, False otherwise.\n        \"\"\"\n        return self.to_pandas().all()\n\n    def any(self) -> bool:\n        \"\"\"\n        Check if any value is True.\n\n        Returns:\n            True if any value is True, False otherwise.\n        \"\"\"\n        return self.to_pandas().any()\n\n    def not_(self) -> \"Series\":\n        \"\"\"\n        Negate the values.\n\n        Returns:\n            Negated Series.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.invert())\n\n    @property\n    def cat(self):\n        raise NotImplementedError(\"not yet\")\n\n    def abs(self) -> \"Series\":\n        \"\"\"\n        Get the absolute values.\n\n        Returns:\n            Absolute values Series.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.abs())\n\n    def arccos(self) -> \"Series\":\n        \"\"\"\n        Get the arc cosine values.\n\n        Returns:\n            Arc cosine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def arccosh(self) -> \"Series\":\n        \"\"\"\n        Get the hyperbolic arc cosine values.\n\n        Returns:\n            Hyperbolic arc cosine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def arcsin(self) -> \"Series\":\n        \"\"\"\n        Get the arc sine values.\n\n        Returns:\n            Arc sine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def arcsinh(self) -> \"Series\":\n        \"\"\"\n        Get the hyperbolic arc sine values.\n\n        Returns:\n            Hyperbolic arc sine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def arctan(self) -> \"Series\":\n        \"\"\"\n        Get the arc tangent values.\n\n        Returns:\n            Arc tangent values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def arctanh(self) -> \"Series\":\n        \"\"\"\n        Get the hyperbolic arc tangent values.\n\n        Returns:\n            Hyperbolic arc tangent values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def arg_true(self) -> \"Series\":\n        \"\"\"\n        Get the index of the first True value.\n\n        Returns:\n            Index of the first True value.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.reset_index(drop=False)\n            .getitem_array(self._query_compiler)\n            .getitem_column_array(0, numeric=True)\n        ).rename(self.name)\n\n    def arg_unique(self) -> \"Series\":\n        \"\"\"\n        Get the index of the first unique value.\n\n        Returns:\n            Index of the first unique value.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def cbrt(self) -> \"Series\":\n        \"\"\"\n        Get the cube root values.\n\n        Returns:\n            Cube root values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def cos(self) -> \"Series\":\n        \"\"\"\n        Get the cosine values.\n\n        Returns:\n            Cosine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def cosh(self) -> \"Series\":\n        \"\"\"\n        Get the hyperbolic cosine values.\n\n        Returns:\n            Hyperbolic cosine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def cot(self) -> \"Series\":\n        \"\"\"\n        Get the cotangent values.\n\n        Returns:\n            Cotangent values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def cum_count(self) -> \"Series\":\n        \"\"\"\n        Get the cumulative count values.\n\n        Returns:\n            Cumulative count values Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.isna().cumsum()\n        )\n\n    def cum_max(self) -> \"Series\":\n        \"\"\"\n        Get the cumulative maximum values.\n\n        Returns:\n            Cumulative maximum values Series.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.cummax())\n\n    def cum_min(self) -> \"Series\":\n        \"\"\"\n        Get the cumulative minimum values.\n\n        Returns:\n            Cumulative minimum values Series.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.cummin())\n\n    def cum_prod(self) -> \"Series\":\n        \"\"\"\n        Get the cumulative product values.\n\n        Returns:\n            Cumulative product values Series.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.cumprod())\n\n    def cum_sum(self) -> \"Series\":\n        \"\"\"\n        Get the cumulative sum values.\n\n        Returns:\n            Cumulative sum values Series.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.cumsum())\n\n    def cumulative_eval(\n        self, expr, min_periods: int = 1, *, parallel: bool = False\n    ) -> \"Series\":\n        \"\"\"\n        Get the cumulative evaluation values.\n\n        Args:\n            expr: Expression to evaluate.\n            min_periods: Minimum number of periods.\n\n        Returns:\n            Cumulative evaluation values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def diff(self, n: int = 1, null_behavior: str = \"ignore\") -> \"Series\":\n        \"\"\"\n        Calculate the first discrete difference between shifted items.\n\n        Args:\n            n: Number of periods to shift.\n            null_behavior: Null behavior.\n\n        Returns:\n            Difference values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def dot(self, other) -> int | float | None:\n        \"\"\"\n        Calculate the dot product.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Dot product.\n        \"\"\"\n        if isinstance(other, Series):\n            other = other.to_pandas()\n        return self.to_pandas().dot(other)\n\n    def entropy(\n        self, base: float = 2.718281828459045, *, normalize: bool = False\n    ) -> float:\n        \"\"\"\n        Calculate the entropy.\n\n        Args:\n            base: Logarithm base.\n            normalize: Normalize the entropy.\n\n        Returns:\n            Entropy.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def ewm_mean(\n        self,\n        com: int | None = None,\n        span: int | None = None,\n        half_life: int | None = None,\n        alpha: float | None = None,\n        *,\n        adjust: bool = True,\n        min_periods: int = 1,\n        ignore_nulls: bool | None = None,\n    ) -> \"Series\":\n        \"\"\"\n        Calculate the exponential weighted mean.\n\n        Args:\n            com: Center of mass.\n            span: Span.\n\n        Returns:\n            Exponential weighted mean Series.\n        \"\"\"\n        return self.__constructor__(\n            self.to_pandas()\n            .ewm(\n                com=com,\n                span=span,\n                halflife=half_life,\n                alpha=alpha,\n                adjust=adjust,\n                min_periods=min_periods,\n                ignore_na=ignore_nulls,\n            )\n            .mean()\n        )\n\n    def ewm_mean_by(self, by, *, half_life: int | None = None) -> \"Series\":\n        \"\"\"\n        Calculate the exponential weighted mean by group.\n\n        Args:\n            by: Grouping Series.\n\n        Returns:\n            Exponential weighted mean Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def ewm_std(\n        self,\n        com: int | None = None,\n        span: int | None = None,\n        half_life: int | None = None,\n        alpha: float | None = None,\n        *,\n        adjust: bool = True,\n        min_periods: int = 1,\n        ignore_nulls: bool | None = None,\n    ) -> \"Series\":\n        \"\"\"\n        Calculate the exponential weighted standard deviation.\n\n        Args:\n            com: Center of mass.\n            span: Span.\n\n        Returns:\n            Exponential weighted standard deviation Series.\n        \"\"\"\n        return self.__constructor__(\n            self.to_pandas()\n            .ewm(\n                com=com,\n                span=span,\n                halflife=half_life,\n                alpha=alpha,\n                adjust=adjust,\n                min_periods=min_periods,\n                ignore_na=ignore_nulls,\n            )\n            .std()\n        )\n\n    def ewm_var(\n        self,\n        com: int | None = None,\n        span: int | None = None,\n        half_life: int | None = None,\n        alpha: float | None = None,\n        *,\n        adjust: bool = True,\n        min_periods: int = 1,\n        ignore_nulls: bool | None = None,\n    ) -> \"Series\":\n        \"\"\"\n        Calculate the exponential weighted variance.\n\n        Args:\n            com: Center of mass.\n            span: Span.\n\n        Returns:\n            Exponential weighted variance Series.\n        \"\"\"\n        return self.__constructor__(\n            self.to_pandas()\n            .ewm(\n                com=com,\n                span=span,\n                halflife=half_life,\n                alpha=alpha,\n                adjust=adjust,\n                min_periods=min_periods,\n                ignore_na=ignore_nulls,\n            )\n            .var()\n        )\n\n    def exp(self) -> \"Series\":\n        \"\"\"\n        Calculate the exponential values.\n\n        Returns:\n            Exponential values Series.\n        \"\"\"\n        return self.__constructor__(self.to_pandas().exp())\n\n    def hash(\n        self,\n        seed: int = 0,\n        seed_1: int | None = None,\n        seed_2: int | None = None,\n        seed_3: int | None = None,\n    ) -> \"Series\":\n        \"\"\"\n        Calculate the hash values.\n\n        Args:\n            seed: Seed.\n            seed_1: Seed 1.\n            seed_2: Seed 2.\n            seed_3: Seed 3.\n\n        Returns:\n            Hash values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def hist(\n        self,\n        bins: list[float] | None = None,\n        *,\n        bin_count: int | None = None,\n        include_category: bool = True,\n        include_breakpoint: bool = True,\n    ) -> \"Series\":\n        \"\"\"\n        Calculate the histogram.\n\n        Args:\n            bins: Bins.\n            bin_count: Bin count.\n\n        Returns:\n            Histogram Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def is_between(self, lower_bound, upper_bound, closed: str = \"both\") -> \"Series\":\n        \"\"\"\n        Check if values are between the bounds.\n\n        Args:\n            lower_bound: Lower bound.\n            upper_bound: Upper bound.\n            closed: Closed bounds.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def kurtosis(self, *, fisher: bool = True, bias: bool = True) -> float | None:\n        \"\"\"\n        Calculate the kurtosis.\n\n        Args:\n            fisher: Fisher method.\n            bias: Bias method.\n\n        Returns:\n            Kurtosis.\n        \"\"\"\n        return self.to_pandas().kurtosis(fisher=fisher, bias=bias)\n\n    def log(self, base: float = 2.718281828459045) -> \"Series\":\n        \"\"\"\n        Calculate the logarithm values.\n\n        Args:\n            base: Logarithm base.\n\n        Returns:\n            Logarithm values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def log10(self) -> \"Series\":\n        \"\"\"\n        Calculate the base 10 logarithm values.\n\n        Returns:\n            Base 10 logarithm values Series.\n        \"\"\"\n        return self.log(10)\n\n    def log1p(self) -> \"Series\":\n        \"\"\"\n        Calculate the natural logarithm of 1 plus the values.\n\n        Returns:\n            Natural logarithm of 1 plus the values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def replace(\n        self,\n        mapping: dict[Any, Any],\n        *,\n        default: Any = None,\n        return_dtype=None,\n    ) -> \"Series\":\n        \"\"\"\n        Map values to other values.\n\n        Args:\n            mapping: Mapping.\n\n        Returns:\n            Mapped Series.\n        \"\"\"\n        return self.__constructor__(\n            self.to_pandas().apply(lambda x: mapping.get(x, default))\n        )\n\n    def pct_change(self, n: int = 1) -> \"Series\":\n        \"\"\"\n        Calculate the percentage change.\n\n        Args:\n            n: Number of periods to shift.\n\n        Returns:\n            Percentage change Series.\n        \"\"\"\n        return self.__constructor__(self.to_pandas().pct_change(n))\n\n    def peak_max(self) -> \"Series\":\n        \"\"\"\n        Get the peak maximum values.\n\n        Returns:\n            Peak maximum values Series.\n        \"\"\"\n        return self.__eq__(self.max())\n\n    def peak_min(self) -> \"Series\":\n        \"\"\"\n        Get the peak minimum values.\n\n        Returns:\n            Peak minimum values Series.\n        \"\"\"\n        return self.__eq__(self.min())\n\n    def rank(\n        self,\n        method: str = \"average\",\n        *,\n        descending: bool = False,\n        seed: int | None = None,\n    ) -> \"Series\":\n        \"\"\"\n        Calculate the rank.\n\n        Args:\n            method: Rank method.\n\n        Returns:\n            Rank Series.\n        \"\"\"\n        # TODO: support seed\n        if method not in [\"average\", \"min\", \"max\", \"first\", \"dense\"]:\n            raise ValueError(f\"method {method} not supported\")\n        return self.__constructor__(\n            self.to_pandas().rank(method=method, ascending=not descending)\n        )\n\n    def rolling_map(\n        self,\n        function: callable,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .apply(function)\n        )\n\n    def rolling_max(\n        self,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling maximum function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .max()\n        )\n\n    def rolling_mean(\n        self,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling mean function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .mean()\n        )\n\n    def rolling_median(\n        self,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling median function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .median()\n        )\n\n    def rolling_min(\n        self,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling minimum function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .min()\n        )\n\n    def rolling_quantile(\n        self,\n        window_size: int,\n        quantile: float,\n        interpolation: str = \"nearest\",\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling quantile function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .quantile(quantile, interpolation=interpolation)\n        )\n\n    def rolling_skew(self, window_size: int, *, bias: bool = False) -> \"Series\":\n        \"\"\"\n        Apply a rolling skewness function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        return self.__constructor__(self.to_pandas().rolling(window=window_size).skew())\n\n    def rolling_std(\n        self,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n        ddof: int = 1,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling standard deviation function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .std(ddof=ddof)\n        )\n\n    def rolling_sum(\n        self,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling sum function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .sum()\n        )\n\n    def rolling_var(\n        self,\n        window_size: int,\n        weights: list[float] | None = None,\n        min_periods: int = 1,\n        *,\n        center: bool = False,\n        ddof: int = 1,\n    ) -> \"Series\":\n        \"\"\"\n        Apply a rolling variance function.\n\n        Args:\n            function: Function to apply.\n            window_size: Window size.\n\n        Returns:\n            Applied Series.\n        \"\"\"\n        if weights is not None:\n            raise NotImplementedError(\"not yet\")\n        return self.__constructor__(\n            self.to_pandas()\n            .rolling(window=window_size, min_periods=min_periods, center=center)\n            .var(ddof=ddof)\n        )\n\n    def search_sorted(self, element, side: str = \"any\") -> int | \"Series\":\n        \"\"\"\n        Search for the element in the sorted Series.\n\n        Args:\n            element: Element to search.\n            side: Side to search.\n\n        Returns:\n            Index of the element.\n        \"\"\"\n        if side == \"any\":\n            side = \"left\"\n        return self.__constructor__(self.to_pandas().searchsorted(element, side=side))\n\n    def sign(self) -> \"Series\":\n        \"\"\"\n        Get the sign values.\n\n        Returns:\n            Sign values Series.\n        \"\"\"\n        return self.__lt__(0).__mul__(-1).__add__(self.__gt__(0))\n\n    def sin(self) -> \"Series\":\n        \"\"\"\n        Get the sine values.\n\n        Returns:\n            Sine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def sinh(self) -> \"Series\":\n        \"\"\"\n        Get the hyperbolic sine values.\n\n        Returns:\n            Hyperbolic sine values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def skew(self, *, bias: bool = True) -> float:\n        \"\"\"\n        Calculate the skewness.\n\n        Args:\n            bias: Bias method.\n\n        Returns:\n            Skewness.\n        \"\"\"\n        return self.to_pandas().skew()\n\n    def sqrt(self) -> \"Series\":\n        \"\"\"\n        Get the square root values.\n\n        Returns:\n            Square root values Series.\n        \"\"\"\n        return self.__constructor__(self.to_pandas().sqrt())\n\n    def tan(self) -> \"Series\":\n        \"\"\"\n        Get the tangent values.\n\n        Returns:\n            Tangent values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def tanh(self) -> \"Series\":\n        \"\"\"\n        Get the hyperbolic tangent values.\n\n        Returns:\n            Hyperbolic tangent values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def chunk_lengths(self) -> list[int]:\n        \"\"\"\n        Get the chunk lengths.\n\n        Returns:\n            Chunk lengths.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def describe(\n        self,\n        percentiles: Sequence[float] | float | None = (0.25, 0.5, 0.75),\n        interpolation: str = \"nearest\",\n    ):\n        \"\"\"\n        Generate descriptive statistics.\n\n        Args:\n            percentiles: Percentiles to calculate.\n\n        Returns:\n            Descriptive statistics.\n        \"\"\"\n        return self.to_pandas().describe(percentiles=percentiles)\n\n    def estimated_size(self) -> int:\n        \"\"\"\n        Get the estimated size.\n\n        Returns:\n            Estimated size.\n        \"\"\"\n        return self.to_pandas().memory_usage(index=False)\n\n    def has_nulls(self) -> bool:\n        \"\"\"\n        Check if there are null values.\n\n        Returns:\n            True if there are null values, False otherwise.\n        \"\"\"\n        return self.to_pandas().isnull().any()\n\n    has_validity = has_nulls\n\n    def is_finite(self) -> \"Series\":\n        \"\"\"\n        Check if the values are finite.\n\n        Returns:\n            True if the values are finite, False otherwise.\n        \"\"\"\n        return self.__ne__(np.inf)\n\n    def is_first_distinct(self) -> \"Series\":\n        \"\"\"\n        Check if the values are the first occurrence.\n\n        Returns:\n            True if the values are the first occurrence, False otherwise.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def is_in(self, other: \"Series\" | list[Any]) -> \"Series\":\n        \"\"\"\n        Check if the values are in the other Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            True if the values are in the other Series, False otherwise.\n        \"\"\"\n        return self.__constructor__(self.to_pandas().isin(other))\n\n    def is_infinite(self) -> \"Series\":\n        \"\"\"\n        Check if the values are infinite.\n\n        Returns:\n            True if the values are infinite, False otherwise.\n        \"\"\"\n        return self.__eq__(np.inf)\n\n    def is_last_distinct(self) -> \"Series\":\n        \"\"\"\n        Check if the values are the last occurrence.\n\n        Returns:\n            True if the values are the last occurrence, False otherwise.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def is_nan(self) -> \"Series\":\n        \"\"\"\n        Check if the values are NaN.\n\n        Returns:\n            True if the values are NaN, False otherwise.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.isna())\n\n    def is_not_nan(self) -> \"Series\":\n        \"\"\"\n        Check if the values are not NaN.\n\n        Returns:\n            True if the values are not NaN, False otherwise.\n        \"\"\"\n        return self.__constructor__(_query_compiler=self._query_compiler.notna())\n\n    def is_not_null(self) -> \"Series\":\n        \"\"\"\n        Check if the values are not null.\n\n        Returns:\n            True if the values are not null, False otherwise.\n        \"\"\"\n        return self.is_not_nan()\n\n    def is_null(self) -> \"Series\":\n        \"\"\"\n        Check if the values are null.\n\n        Returns:\n            True if the values are null, False otherwise.\n        \"\"\"\n        return self.is_nan()\n\n    def is_sorted(\n        self,\n        *,\n        descending: bool = False,\n        nulls_last: bool = False,\n    ) -> bool:\n        \"\"\"\n        Check if the values are sorted.\n\n        Args:\n            descending: Descending order.\n\n        Returns:\n            True if the values are sorted, False otherwise.\n        \"\"\"\n        return (\n            self.to_pandas().is_monotonic_increasing\n            if not descending\n            else self.to_pandas().is_monotonic_decreasing\n        )\n\n    def len(self) -> int:\n        \"\"\"\n        Get the length of the values.\n\n        Returns:\n            Length of the values Series.\n        \"\"\"\n        return len(self.to_pandas())\n\n    def lower_bound(self) -> \"Series\":\n        \"\"\"\n        Get the lower bound values.\n\n        Returns:\n            Lower bound values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def null_count(self) -> int:\n        \"\"\"\n        Get the number of null values.\n\n        Returns:\n            Number of null values.\n        \"\"\"\n        return self.to_pandas().isnull().sum()\n\n    def unique_counts(self) -> \"Series\":\n        \"\"\"\n        Get the unique counts.\n\n        Returns:\n            Unique counts.\n        \"\"\"\n        return self.__constructor__(values=self.to_pandas().value_counts())\n\n    def upper_bound(self) -> \"Series\":\n        \"\"\"\n        Get the upper bound values.\n\n        Returns:\n            Upper bound values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def value_counts(\n        self, *, sort: bool = False, parallel: bool = False, name: str = \"count\"\n    ) -> \"DataFrame\":\n        \"\"\"\n        Get the value counts.\n\n        Returns:\n            Value counts.\n        \"\"\"\n        from modin.polars import DataFrame\n\n        return DataFrame(\n            self.to_pandas().value_counts(sort=sort).reset_index(drop=False, names=name)\n        )\n\n    def to_frame(self, name: str | None = None) -> \"DataFrame\":\n        \"\"\"\n        Convert the Series to a DataFrame.\n\n        Args:\n            name: Name of the Series.\n\n        Returns:\n            DataFrame representation of the Series.\n        \"\"\"\n        from modin.polars import DataFrame\n\n        return DataFrame(_query_compiler=self._query_compiler).rename({self.name: name})\n\n    def to_init_repr(self, n: int = 1000) -> str:\n        \"\"\"\n        Convert Series to instantiatable string representation.\n\n        Args:\n            n: First n elements.\n\n        Returns:\n            Instantiatable string representation.\n        \"\"\"\n        return polars.from_pandas(\n            self.slice(0, n)._query_compiler.to_pandas()\n        ).to_init_repr()\n\n    @property\n    def list(self):\n        # TODO: implement list object\n        #  https://docs.pola.rs/api/python/stable/reference/series/list.html\n        raise NotImplementedError(\"not yet\")\n\n    def alias(self, name: str) -> \"Series\":\n        \"\"\"\n        Rename the Series.\n\n        Args:\n            name: New name.\n\n        Returns:\n            Renamed Series.\n        \"\"\"\n        return self.to_frame(name).to_series()\n\n    def append(self, other: \"Series\") -> \"Series\":\n        \"\"\"\n        Append another Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Appended Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.concat(0, other._query_compiler)\n        )\n\n    def arg_sort(\n        self, *, descending: bool = False, nulls_last: bool = False\n    ) -> \"Series\":\n        \"\"\"\n        Get the sorted indices.\n\n        Args:\n            descending: Descending order.\n\n        Returns:\n            Sorted indices Series.\n        \"\"\"\n        # TODO: implement nulls_last\n        result = self.__constructor__(values=self.to_pandas().argsort())\n        if descending:\n            return result.reverse()\n        else:\n            return result\n\n    def ceil(self) -> \"Series\":\n        \"\"\"\n        Get the ceiling values.\n\n        Returns:\n            Ceiling values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def clear(self, n: int = 0) -> \"Series\":\n        \"\"\"\n        Create an empty copy of the current Series, with zero to ‘n’ elements.\n\n        Args:\n            n: Number of elements.\n\n        Returns:\n            Series will n nulls.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def clip(self, lower_bound=None, upper_bound=None) -> \"Series\":\n        \"\"\"\n        Clip the values.\n\n        Args:\n            lower_bound: Lower bound.\n            upper_bound: Upper bound.\n\n        Returns:\n            Clipped values Series.\n        \"\"\"\n        return self.__constructor__(\n            values=self.to_pandas().clip(lower_bound, upper_bound)\n        )\n\n    def cut(\n        self,\n        breaks: Sequence[float],\n        *,\n        labels: list[str] | None = None,\n        break_point_label: str = \"breakpoint\",\n        left_closed: bool = False,\n        include_breaks: bool = False,\n        as_series: bool = True,\n    ) -> \"BasePolarsDataset\":\n        raise NotImplementedError(\"not yet\")\n\n    def extend_constant(self, value) -> \"Series\":\n        \"\"\"\n        Extend the Series with a constant value.\n\n        Args:\n            value: Constant value.\n\n        Returns:\n            Extended Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def floor(self) -> \"BasePolarsDataset\":\n        return self.__floordiv__(1)\n\n    def gather(self, indices) -> \"Series\":\n        \"\"\"\n        Gather values by indices.\n\n        Args:\n            indices: Indices.\n\n        Returns:\n            Gathered Series.\n        \"\"\"\n        return self.__constructor__(\n            values=self.to_pandas().iloc[\n                (\n                    indices._query_compiler\n                    if hasattr(indices, \"_query_compiler\")\n                    else indices\n                )\n            ]\n        )\n\n    def interpolate_by(self, by) -> \"Series\":\n        \"\"\"\n        Interpolate values by group.\n\n        Args:\n            by: Grouping Series.\n\n        Returns:\n            Interpolated Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def item(self, index: int | None = None) -> Any:\n        \"\"\"\n        Get the item at the index.\n\n        Args:\n            index: Index.\n\n        Returns:\n            Item at the index.\n        \"\"\"\n        return self.to_pandas().iloc[index]\n\n    def new_from_index(self, index: int, length: int) -> \"Series\":\n        \"\"\"\n        Create a new Series from the index.\n\n        Args:\n            index: Index.\n            length: Length.\n\n        Returns:\n            New Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def qcut(\n        self,\n        quantiles: Sequence[float] | int,\n        *,\n        labels: Sequence[str] | None = None,\n        left_closed: bool = False,\n        allow_duplicates: bool = False,\n        include_breaks: bool = False,\n        break_point_label: str = \"breakpoint\",\n        category_labels: str = \"category\",\n        as_series: bool = True,\n    ) -> \"Series\" | \"DataFrame\":\n        \"\"\"\n        Bin continuous values into discrete categories based on quantiles.\n\n        Args:\n            quantiles: Number of quantiles or sequence of quantiles.\n            labels: Labels for the resulting bins.\n            left_closed: Whether the intervals are left-closed.\n            allow_duplicates: Whether to allow duplicate intervals.\n            include_breaks: Whether to include the breaks in the result.\n            break_point_label: Label for the break points.\n            category_labels: Label for the categories.\n            as_series: Whether to return a Series.\n\n        Returns:\n            Binned Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def rechunk(self, *, in_place: bool = False) -> \"Series\":\n        \"\"\"\n        Rechunk the Series.\n\n        Args:\n            in_place: In-place operation.\n\n        Returns:\n            Rechunked Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    rename = alias\n\n    def reshape(self, dimensions, nested_type) -> \"Series\":\n        \"\"\"\n        Reshape the Series.\n\n        Args:\n            dimensions: Dimensions.\n            nested_type: Nested type.\n\n        Returns:\n            Reshaped Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def reverse(self) -> \"Series\":\n        \"\"\"\n        Reverse the Series.\n\n        Returns:\n            Reversed Series.\n        \"\"\"\n        return self.__constructor__(values=self.to_pandas().iloc[::-1])\n\n    def rle(self) -> \"Series\":\n        \"\"\"\n        Run-length encode the Series.\n\n        Returns:\n            Run-length encoded Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def rle_id(self) -> \"Series\":\n        \"\"\"\n        Run-length encode the Series with IDs.\n\n        Returns:\n            Run-length encoded Series with IDs.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def round(self, decimals: int = 0) -> \"Series\":\n        \"\"\"\n        Round the values.\n\n        Args:\n            decimals: Number of decimals.\n\n        Returns:\n            Rounded values Series.\n        \"\"\"\n        return self.__constructor__(values=self.to_pandas().round(decimals))\n\n    def round_sig_figs(self, digits: int) -> \"Series\":\n        \"\"\"\n        Round the values to significant figures.\n\n        Args:\n            digits: Number of significant figures.\n\n        Returns:\n            Rounded values Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def scatter(self, indices, values) -> \"Series\":\n        \"\"\"\n        Scatter values by indices.\n\n        Args:\n            indices: Indices.\n            values: Values.\n\n        Returns:\n            Scattered Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def set(self, filter: \"Series\", value: int | float | str | bool | None) -> \"Series\":\n        \"\"\"\n        Set values by filter.\n\n        Args:\n            filter: Filter.\n            value: Value.\n\n        Returns:\n            Set Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def shrink_dtype(self) -> \"Series\":\n        \"\"\"\n        Shrink the data type.\n\n        Returns:\n            Shrunk Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def shuffle(self, seed: int | None = None) -> \"Series\":\n        \"\"\"\n        Shuffle the Series.\n\n        Args:\n            seed: Seed.\n\n        Returns:\n            Shuffled Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def zip_with(self, mask: \"Series\", other: \"Series\") -> \"Series\":\n        \"\"\"\n        Zip the Series with another Series.\n\n        Args:\n            mask: Mask Series.\n            other: Other Series.\n\n        Returns:\n            Zipped Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.where(\n                mask._query_compiler, other._query_compiler\n            )\n        )\n\n    def map_elements(\n        self,\n        function: callable,\n        return_dtype=None,\n        *,\n        skip_nulls: bool = True,\n    ) -> \"Series\":\n        \"\"\"\n        Map the elements.\n\n        Args:\n            function: Function to apply.\n\n        Returns:\n            Mapped Series.\n        \"\"\"\n        if return_dtype is not None or skip_nulls is False:\n            ErrorMessage.warn(\n                \"`return_dtype` and `skip_nulls=False` are not supported yet\"\n            )\n        return self.__constructor__(values=self.to_pandas().apply(function))\n\n    def reinterpret(self, *, signed: bool = True) -> \"Series\":\n        \"\"\"\n        Reinterpret the data type of the series as signed or unsigned.\n\n        Args:\n            signed: If True, reinterpret as signed, otherwise as unsigned.\n\n        Returns:\n            Reinterpreted Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def set_sorted(self, *, descending: bool = False) -> \"Series\":\n        \"\"\"\n        Set the Series as sorted.\n\n        Args:\n            descending: Descending order.\n\n        Returns:\n            Sorted Series.\n        \"\"\"\n        self._sorted = True\n        self._descending = descending\n        return self\n\n    def to_physical(self) -> \"Series\":\n        \"\"\"\n        Convert the Series to physical.\n\n        Returns:\n            Physical Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def get_chunks(self) -> list[\"Series\"]:\n        \"\"\"\n        Get the chunks.\n\n        Returns:\n            Chunks.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    @property\n    def str(self):\n        # TODO: implement str object\n        #  https://docs.pola.rs/api/python/stable/reference/series/string.html\n        raise NotImplementedError(\"not yet\")\n\n    @property\n    def struct(self):\n        # TODO: implement struct object\n        #  https://docs.pola.rs/api/python/stable/reference/series/struct.html\n        raise NotImplementedError(\"not yet\")\n\n    @property\n    def dt(self):\n        # TODO: implement dt object\n        #  https://docs.pola.rs/api/python/stable/reference/series/temporal.html\n        raise NotImplementedError(\"not yet\")\n\n    def __len__(self) -> int:\n        \"\"\"\n        Get the length of the Series.\n        \"\"\"\n        return self.len()\n\n    def __matmul__(self, other) -> \"Series\":\n        \"\"\"\n        Matrix multiplication.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Matrix multiplication Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def __radd__(self, other) -> \"Series\":\n        \"\"\"\n        Right addition.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Added Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.radd(other, axis=0)\n        )\n\n    def __rand__(self, other) -> \"Series\":\n        \"\"\"\n        Right and.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            And Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.__rand__(other, axis=0)\n        )\n\n    def __rfloordiv__(self, other) -> \"Series\":\n        \"\"\"\n        Right floor division.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Floored Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.rfloordiv(other, axis=0)\n        )\n\n    def __rmatmul__(self, other) -> \"Series\":\n        \"\"\"\n        Right matrix multiplication.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Matrix multiplication Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def __rmod__(self, other) -> \"Series\":\n        \"\"\"\n        Right modulo.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Modulo Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.rmod(other, axis=0)\n        )\n\n    def __rmul__(self, other) -> \"Series\":\n        \"\"\"\n        Right multiplication.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Multiplied Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.rmul(other, axis=0)\n        )\n\n    def __ror__(self, other) -> \"Series\":\n        \"\"\"\n        Right or.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Or Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.__ror__(other, axis=0)\n        )\n\n    def __rpow__(self, other) -> \"Series\":\n        \"\"\"\n        Right power.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Powered Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.rpow(other, axis=0)\n        )\n\n    def __rsub__(self, other) -> \"Series\":\n        \"\"\"\n        Right subtraction.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Subtracted Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.rsub(other, axis=0)\n        )\n\n    def __rtruediv__(self, other) -> \"Series\":\n        \"\"\"\n        Right true division.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Divided Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.rtruediv(other, axis=0)\n        )\n\n    def __rxor__(self, other) -> \"Series\":\n        \"\"\"\n        Right xor.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Xor Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.__rxor__(other, axis=0)\n        )\n\n    def eq(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are equal to the other Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.eq(other._query_compiler)\n        )\n\n    def eq_missing(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are equal to the other Series, including missing values.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def ge(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are greater than or equal to the other Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.ge(other._query_compiler)\n        )\n\n    def gt(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are greater than the other Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.gt(other._query_compiler)\n        )\n\n    def le(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are less than or equal to the other Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.le(other._query_compiler)\n        )\n\n    def lt(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are less than the other Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.lt(other._query_compiler)\n        )\n\n    def n_unique(self) -> int:\n        \"\"\"\n        Get the number of unique values.\n\n        Returns:\n            Number of unique values.\n        \"\"\"\n        return self._query_compiler.nunique().to_pandas().squeeze(axis=None)\n\n    def ne(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are not equal to the other Series.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.ne(other._query_compiler)\n        )\n\n    def ne_missing(self, other) -> \"Series\":\n        \"\"\"\n        Check if the values are not equal to the other Series, including missing values.\n\n        Args:\n            other: Other Series.\n\n        Returns:\n            Boolean Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def pow(self, exponent) -> \"Series\":\n        \"\"\"\n        Raise the values to the power of the exponent.\n\n        Args:\n            exponent: Exponent.\n\n        Returns:\n            Powered Series.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.pow(exponent, axis=0)\n        )\n\n    def replace_strict(\n        self, old, new=no_default, *, default=no_default, return_dtype=None\n    ) -> \"Series\":\n        \"\"\"\n        Replace values strictly.\n\n        Args:\n            old: Old values.\n            new: New values.\n            default: Default value.\n\n        Returns:\n            Replaced Series.\n        \"\"\"\n        raise NotImplementedError(\"not yet\")\n\n    def to_list(self) -> list:\n        \"\"\"\n        Convert the Series to a list.\n\n        Returns:\n            List representation of the Series.\n        \"\"\"\n        return self._to_polars().tolist()\n\n    def drop_nans(self) -> \"Series\":\n        \"\"\"\n        Drop NaN values.\n\n        Returns:\n            Series without NaN values.\n        \"\"\"\n        return self.__constructor__(\n            _query_compiler=self._query_compiler.dropna(how=\"any\")\n        )\n"
  },
  {
    "path": "modin/tests/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/config/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/config/docs_module/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom .classes import BasePandasDataset, DataFrame, Series\nfrom .functions import read_csv\n\n__all__ = [\"BasePandasDataset\", \"DataFrame\", \"Series\", \"read_csv\"]\n"
  },
  {
    "path": "modin/tests/config/docs_module/classes.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nclass DataFrame:\n    def apply(self):\n        \"\"\"This is a test of the documentation module for DataFrame.\"\"\"\n        return\n\n\nclass Series:\n    def isna(self):\n        \"\"\"This is a test of the documentation module for Series.\"\"\"\n        return\n\n\nclass BasePandasDataset:\n    \"\"\"This is a test of the documentation module for BasePandasDataSet.\"\"\"\n\n    def apply():\n        \"\"\"This is a test of the documentation module for BasePandasDataSet.apply.\"\"\"\n        return\n\n    def astype():\n        \"\"\"This is a test of the documentation module for BasePandasDataSet.astype.\"\"\"\n"
  },
  {
    "path": "modin/tests/config/docs_module/functions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\ndef read_csv():\n    \"\"\"Test override for functions on the module.\"\"\"\n    return\n"
  },
  {
    "path": "modin/tests/config/docs_module_with_just_base/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom .classes import BasePandasDataset\n\n__all__ = [\"BasePandasDataset\"]\n"
  },
  {
    "path": "modin/tests/config/docs_module_with_just_base/classes.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nclass BasePandasDataset:\n    def astype():\n        \"\"\"This is a test of the documentation module for BasePandasDataSet.astype.\"\"\"\n"
  },
  {
    "path": "modin/tests/config/test_envvars.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport itertools\nimport os\nimport re\nimport sys\nimport unittest.mock as mock\nfrom unittest.mock import Mock, patch\n\nimport pandas\nimport pytest\nfrom pytest import param\n\nimport modin.config as cfg\nimport modin.pandas as pd\nfrom modin.config.envvars import _check_vars\nfrom modin.config.pubsub import _UNSET, ExactStr, ValueSource\nfrom modin.pandas.base import BasePandasDataset\nfrom modin.tests.pandas.utils import switch_execution\n\n################# WARNING #####################################################\n# Test cases in this file affect global state, e.g. by setting environment\n# variables. The test cases may produce unexpected results when repeated on run\n# out of the order they are defined in. Be careful when running the test\n# locally or when adding new test cases. In particular, note:\n#   - test_ray_cluster_resources() causes us to permanently attach the\n#     `_initialize_engine` subscriber to Engine: https://github.com/modin-project/modin/blob/6252ebde19935bd1f6a6850209bf8a1f5e5ecfb7/modin/core/execution/dispatching/factories/dispatcher.py#L115\n#     Changing to any engine after that test runs will cause Modin to try to\n#     initialize the engine.\n#   - In CI, we only run these tests with Ray execution, in the\n#     `test-internal` job.\n#   - test_wrong_values() permanently messes up some config variables. For more\n#     details see https://github.com/modin-project/modin/issues/7454\n################# WARNING ######################\n\nUNIDIST_SKIP_REASON = (\n    \"Switching to unidist causes an error since we have to execute unidist \"\n    + \"tests differently, with `mpiexec` instead of just `pytest`\"\n)\n\n\n@pytest.fixture\ndef clear_backend_execution_and_storage_format(monkeypatch):\n    \"\"\"\n    Reset environment variables and config classes for backend, execution, and storage format.\n\n    Parameters\n    ----------\n    *vars : tuple[Parameter]\n    \"\"\"\n    for variable in (cfg.Backend, cfg.StorageFormat, cfg.Engine):\n        monkeypatch.setattr(variable, \"_value\", _UNSET)\n        monkeypatch.setattr(variable, \"_value_source\", ValueSource.DEFAULT)\n        monkeypatch.delitem(os.environ, variable.varname, raising=False)\n\n\n@pytest.fixture\ndef make_unknown_env():\n    varname = \"MODIN_UNKNOWN\"\n    os.environ[varname] = \"foo\"\n    yield varname\n    del os.environ[varname]\n\n\n@pytest.fixture(params=[str, ExactStr])\ndef make_custom_envvar(request):\n    class CustomVar(cfg.EnvironmentVariable, type=request.param):\n        \"\"\"custom var\"\"\"\n\n        default = 10\n        varname = \"MODIN_CUSTOM\"\n        choices = (1, 5, 10)\n\n    return CustomVar\n\n\n@pytest.fixture(scope=\"session\")\ndef add_pandas_duplicate_on_ray_execution():\n    \"\"\"\n    Add an execution mode with the storage format Test_Pandasduplicate and engine Ray.\n\n    This mode's execution is equivalent to PandasOnRay execution.\n    \"\"\"\n    cfg.StorageFormat.add_option(\"Test_Pandasduplicate\")\n    from modin.core.execution.dispatching.factories import factories\n\n    factories.Test_PandasduplicateOnRayFactory = factories.PandasOnRayFactory\n    cfg.Backend.register_backend(\n        \"Test_Backend_1\",\n        cfg.Execution(\n            storage_format=\"Test_Pandasduplicate\",\n            engine=\"Ray\",\n        ),\n    )\n\n\n@pytest.fixture\ndef set_custom_envvar(make_custom_envvar):\n    os.environ[make_custom_envvar.varname] = \"  custom  \"\n    yield \"Custom\" if make_custom_envvar.type is str else \"  custom  \"\n    del os.environ[make_custom_envvar.varname]\n\n\ndef test_unknown(make_unknown_env):\n    with pytest.warns(UserWarning, match=f\"Found unknown .*{make_unknown_env}.*\"):\n        _check_vars()\n\n\ndef test_custom_default(make_custom_envvar):\n    assert make_custom_envvar.get() == 10\n\n\ndef test_custom_set(make_custom_envvar, set_custom_envvar):\n    assert make_custom_envvar.get() == set_custom_envvar\n\n\ndef test_custom_help(make_custom_envvar):\n    assert \"MODIN_CUSTOM\" in make_custom_envvar.get_help()\n    assert \"custom var\" in make_custom_envvar.get_help()\n\n\nclass TestDocModule:\n    \"\"\"\n    Test using a module to replace default docstrings.\n    \"\"\"\n\n    def test_overrides(self):\n        cfg.DocModule.put(\"modin.tests.config.docs_module\")\n\n        # Test for override\n        assert BasePandasDataset.__doc__ == (\n            \"This is a test of the documentation module for BasePandasDataSet.\"\n        )\n        assert BasePandasDataset.apply.__doc__ == (\n            \"This is a test of the documentation module for BasePandasDataSet.apply.\"\n        )\n        # Test scenario 2 from https://github.com/modin-project/modin/issues/7113:\n        # We can correctly override the docstring for BasePandasDataset.astype,\n        # which is the same method (modulo some wrapping that we add to handle\n        # extensions) as Series.astype.\n        assert (\n            pd.Series.astype.__wrapped__.__wrapped__\n            is BasePandasDataset.astype.__wrapped__\n        )\n        assert BasePandasDataset.astype.__doc__ == (\n            \"This is a test of the documentation module for BasePandasDataSet.astype.\"\n        )\n        assert (\n            pd.DataFrame.apply.__doc__\n            == \"This is a test of the documentation module for DataFrame.\"\n        )\n        # Test for pandas doc when method is not defined on the plugin module\n        assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__\n        assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__\n        assert BasePandasDataset.astype.__doc__ in pd.DataFrame.astype.__doc__\n        # Test for override\n        assert (\n            pd.Series.isna.__doc__\n            == \"This is a test of the documentation module for Series.\"\n        )\n        # Test for pandas doc when method is not defined on the plugin module\n        assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__\n        assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__\n        # Test for override\n        assert pd.read_csv.__doc__ == \"Test override for functions on the module.\"\n        # Test for pandas doc when function is not defined on module.\n        assert pandas.read_table.__doc__ in pd.read_table.__doc__\n\n    def test_not_redefining_classes_modin_issue_7138(self):\n        original_dataframe_class = pd.DataFrame\n\n        cfg.DocModule.put(\"modin.tests.config.docs_module\")\n\n        # Test for override\n        assert (\n            pd.DataFrame.apply.__doc__\n            == \"This is a test of the documentation module for DataFrame.\"\n        )\n\n        assert pd.DataFrame is original_dataframe_class\n\n    def test_base_docstring_override_with_no_dataframe_or_series_class_issue_7113(\n        self,\n    ):\n        # This test case tests scenario 1 from issue 7113.\n        sys.path.append(f\"{os.path.dirname(__file__)}\")\n        cfg.DocModule.put(\"docs_module_with_just_base\")\n        assert BasePandasDataset.astype.__doc__ == (\n            \"This is a test of the documentation module for BasePandasDataSet.astype.\"\n        )\n\n\n@pytest.mark.skipif(cfg.Engine.get() != \"Ray\", reason=\"Ray specific test\")\ndef test_ray_cluster_resources():\n    import ray\n\n    cfg.RayInitCustomResources.put({\"special_hardware\": 1.0})\n    # create a dummy df to initialize Ray engine\n    _ = pd.DataFrame([1, 2, 3])\n    assert ray.cluster_resources()[\"special_hardware\"] == 1.0\n\n\n@pytest.mark.parametrize(\n    \"modify_config\",\n    [{cfg.RangePartitioning: False, cfg.LazyExecution: \"Auto\"}],\n    indirect=True,\n)\ndef test_context_manager_update_config(modify_config):\n    # simple case, 1 parameter\n    assert cfg.RangePartitioning.get() is False\n    with cfg.context(RangePartitioning=True):\n        assert cfg.RangePartitioning.get() is True\n    assert cfg.RangePartitioning.get() is False\n\n    # nested case, 1 parameter\n    assert cfg.RangePartitioning.get() is False\n    with cfg.context(RangePartitioning=True):\n        assert cfg.RangePartitioning.get() is True\n        with cfg.context(RangePartitioning=False):\n            assert cfg.RangePartitioning.get() is False\n            with cfg.context(RangePartitioning=False):\n                assert cfg.RangePartitioning.get() is False\n            assert cfg.RangePartitioning.get() is False\n        assert cfg.RangePartitioning.get() is True\n    assert cfg.RangePartitioning.get() is False\n\n    # simple case, 2 parameters\n    assert cfg.RangePartitioning.get() is False\n    assert cfg.LazyExecution.get() == \"Auto\"\n    with cfg.context(RangePartitioning=True, LazyExecution=\"Off\"):\n        assert cfg.RangePartitioning.get() is True\n        assert cfg.LazyExecution.get() == \"Off\"\n    assert cfg.RangePartitioning.get() is False\n    assert cfg.LazyExecution.get() == \"Auto\"\n\n    # nested case, 2 parameters\n    assert cfg.RangePartitioning.get() is False\n    assert cfg.LazyExecution.get() == \"Auto\"\n    with cfg.context(RangePartitioning=True, LazyExecution=\"Off\"):\n        assert cfg.RangePartitioning.get() is True\n        assert cfg.LazyExecution.get() == \"Off\"\n        with cfg.context(RangePartitioning=False):\n            assert cfg.RangePartitioning.get() is False\n            assert cfg.LazyExecution.get() == \"Off\"\n            with cfg.context(LazyExecution=\"On\"):\n                assert cfg.RangePartitioning.get() is False\n                assert cfg.LazyExecution.get() == \"On\"\n                with cfg.context(RangePartitioning=True, LazyExecution=\"Off\"):\n                    assert cfg.RangePartitioning.get() is True\n                    assert cfg.LazyExecution.get() == \"Off\"\n                assert cfg.RangePartitioning.get() is False\n                assert cfg.LazyExecution.get() == \"On\"\n            assert cfg.RangePartitioning.get() is False\n            assert cfg.LazyExecution.get() == \"Off\"\n        assert cfg.RangePartitioning.get() is True\n        assert cfg.LazyExecution.get() == \"Off\"\n    assert cfg.RangePartitioning.get() is False\n    assert cfg.LazyExecution.get() == \"Auto\"\n\n\nclass TestBackend:\n\n    @pytest.mark.parametrize(\n        \"engine, storage_format, expected_backend\",\n        [\n            (\"Python\", \"Pandas\", \"Python_Test\"),\n            (\"Ray\", \"Pandas\", \"Ray\"),\n            param(\n                \"Unidist\",\n                \"Pandas\",\n                \"Unidist\",\n                marks=pytest.mark.skip(reason=UNIDIST_SKIP_REASON),\n            ),\n            (\"Dask\", \"Pandas\", \"Dask\"),\n            (\"Native\", \"Native\", \"Pandas\"),\n        ],\n    )\n    def test_setting_execution_changes_backend(\n        self, engine, storage_format, expected_backend\n    ):\n        previous_backend = cfg.Backend.get()\n        with switch_execution(engine, storage_format):\n            assert cfg.Backend.get() == expected_backend\n        assert cfg.Backend.get() == previous_backend\n\n    def test_subscribing_to_backend_triggers_callback(self):\n        backend_subscriber = Mock()\n        cfg.Backend.subscribe(backend_subscriber)\n        backend_subscriber.assert_called_once_with(cfg.Backend)\n\n    def test_setting_backend_triggers_all_callbacks(self):\n        # Start with a known backend (rather than the one that we start the\n        # test with).\n        with cfg.context(Backend=\"Pandas\"):\n            backend_subscriber = Mock()\n            cfg.Backend.subscribe(backend_subscriber)\n            backend_subscriber.reset_mock()\n\n            storage_format_subscriber = Mock()\n            cfg.StorageFormat.subscribe(storage_format_subscriber)\n            storage_format_subscriber.reset_mock()\n\n            engine_subscriber = Mock()\n            cfg.Engine.subscribe(engine_subscriber)\n            engine_subscriber.reset_mock()\n\n            with cfg.context(Backend=\"Python_Test\"):\n                backend_subscriber.assert_called_once_with(cfg.Backend)\n                storage_format_subscriber.assert_called_once_with(cfg.StorageFormat)\n                engine_subscriber.assert_called_once_with(cfg.Engine)\n\n    @pytest.mark.parametrize(\n        \"backend, expected_engine, expected_storage_format\",\n        [\n            (\"Python_test\", \"Python\", \"Pandas\"),\n            (\"PYTHON_test\", \"Python\", \"Pandas\"),\n            (\"python_TEST\", \"Python\", \"Pandas\"),\n            (\"Ray\", \"Ray\", \"Pandas\"),\n            param(\n                \"Unidist\",\n                \"Unidist\",\n                \"Pandas\",\n                marks=pytest.mark.skip(reason=UNIDIST_SKIP_REASON),\n            ),\n            (\"Dask\", \"Dask\", \"Pandas\"),\n            (\"Pandas\", \"Native\", \"Native\"),\n        ],\n    )\n    def test_setting_backend_changes_execution(\n        self, backend, expected_engine, expected_storage_format\n    ):\n        previous_engine = cfg.Engine.get()\n        previous_storage_format = cfg.StorageFormat.get()\n        with cfg.context(Backend=backend):\n            assert cfg.Engine.get() == expected_engine\n            assert cfg.StorageFormat.get() == expected_storage_format\n        assert cfg.Engine.get() == previous_engine\n        assert cfg.StorageFormat.get() == previous_storage_format\n\n    def test_setting_engine_alone_changes_backend(self):\n        # Start with a known backend (rather than the one that we start the\n        # test with).\n        with switch_execution(storage_format=\"Pandas\", engine=\"Ray\"):\n            current_backend = cfg.Backend.get()\n            assert current_backend == \"Ray\"\n            with cfg.context(Engine=\"Python\"):\n                assert cfg.Backend.get() == \"Python_Test\"\n            assert cfg.Backend.get() == current_backend\n\n    def test_setting_engine_triggers_callbacks(self):\n        # Start with a known backend (rather than the one that we start the\n        # test with).\n        with switch_execution(storage_format=\"Pandas\", engine=\"Ray\"):\n            engine_subscriber = Mock()\n            cfg.Engine.subscribe(engine_subscriber)\n            engine_subscriber.reset_mock()\n\n            backend_subscriber = Mock()\n            cfg.Backend.subscribe(backend_subscriber)\n            backend_subscriber.reset_mock()\n\n            storage_format_subscriber = Mock()\n            cfg.StorageFormat.subscribe(storage_format_subscriber)\n            storage_format_subscriber.reset_mock()\n\n            with cfg.context(Engine=\"Dask\"):\n                engine_subscriber.assert_called_once_with(cfg.Engine)\n                backend_subscriber.assert_called_once_with(cfg.Backend)\n                # StorageFormat stayed the same, so we don't call its callback.\n                storage_format_subscriber.assert_not_called()\n\n    def test_setting_storage_format_triggers_callbacks(self):\n        # There's only one built-in storage format, pandas, so we add a new one\n        # here.\n        cfg.StorageFormat.add_option(\"Pandasduplicate\")\n        from modin.core.execution.dispatching.factories import factories\n\n        factories.PandasduplicateOnRayFactory = factories.PandasOnRayFactory\n        cfg.Backend.register_backend(\n            \"NewBackend\",\n            cfg.Execution(\n                storage_format=\"Pandasduplicate\",\n                engine=\"Ray\",\n            ),\n        )\n\n        with switch_execution(storage_format=\"Pandas\", engine=\"Ray\"):\n            engine_subscriber = Mock()\n            cfg.Engine.subscribe(engine_subscriber)\n            engine_subscriber.reset_mock()\n            backend_subscriber = Mock()\n            cfg.Backend.subscribe(backend_subscriber)\n            backend_subscriber.reset_mock()\n            storage_format_subscriber = Mock()\n            cfg.StorageFormat.subscribe(storage_format_subscriber)\n            storage_format_subscriber.reset_mock()\n            with cfg.context(StorageFormat=\"PANDASDUPLICATE\"):\n                storage_format_subscriber.assert_called_once_with(cfg.StorageFormat)\n                backend_subscriber.assert_called_once_with(cfg.Backend)\n                # Engine stayed the same, so we don't call its callback.\n                engine_subscriber.assert_not_called()\n\n    @pytest.mark.parametrize(\"name\", [\"Python_Test\", \"python_Test\"])\n    def test_register_existing_backend(self, name):\n        with pytest.raises(\n            ValueError,\n            match=re.escape(\n                \"Backend 'Python_Test' is already registered with the execution \"\n                + \"Execution(storage_format='Pandas', engine='Python')\"\n            ),\n        ):\n            cfg.Backend.register_backend(\n                name,\n                cfg.Execution(\n                    storage_format=\"Pandas\",\n                    engine=\"Python\",\n                ),\n            )\n\n    def test_register_existing_execution(self):\n        with pytest.raises(\n            ValueError,\n            match=re.escape(\n                \"Execution(storage_format='Pandas', engine='Python') is already registered with the backend Python_Test.\"\n            ),\n        ):\n            cfg.Backend.register_backend(\n                \"NewBackend2\",\n                cfg.Execution(\n                    storage_format=\"Pandas\",\n                    engine=\"Python\",\n                ),\n            )\n\n    def test_set_invalid_backend(self):\n        with pytest.raises(ValueError, match=re.escape(\"Unknown backend 'Unknown'\")):\n            cfg.Backend.put(\"Unknown\")\n\n    def test_switch_to_unregistered_backend_with_switch_execution(self):\n        cfg.StorageFormat.add_option(\"Pandas2\")\n        from modin.core.execution.dispatching.factories import factories\n\n        factories.Pandas2OnRayFactory = factories.PandasOnRayFactory\n        with pytest.raises(\n            ValueError,\n            match=re.escape(\n                \"Execution(storage_format='Pandas2', engine='Ray') \"\n                + \"has no known backend. Please register a backend for it with \"\n                + \"Backend.register_backend()\"\n            ),\n        ), switch_execution(engine=\"Ray\", storage_format=\"Pandas2\"):\n            pass\n\n    def test_switch_to_unregistered_backend_with_switch_storage_format(self):\n        cfg.StorageFormat.add_option(\"Pandas3\")\n        from modin.core.execution.dispatching.factories import factories\n\n        factories.Pandas2OnRayFactory = factories.PandasOnPythonFactory\n        with cfg.context(StorageFormat=\"Pandas\", Engine=\"Python\"):\n            with pytest.raises(\n                ValueError,\n                match=re.escape(\n                    \"Execution(storage_format='Pandas3', engine='Python') \"\n                    + \"has no known backend. Please register a backend for it with \"\n                    + \"Backend.register_backend()\"\n                ),\n            ):\n                cfg.StorageFormat.put(\"Pandas3\")\n\n    def test_switch_to_unregistered_backend_with_switch_engine(self):\n        cfg.Engine.add_option(\"Python2\")\n        from modin.core.execution.dispatching.factories import factories\n\n        factories.PandasOnPython2Factory = factories.PandasOnPythonFactory\n        with cfg.context(StorageFormat=\"Pandas\", Engine=\"Python\"):\n            with pytest.raises(\n                ValueError,\n                match=re.escape(\n                    \"Execution(storage_format='Pandas', engine='Python2') \"\n                    + \"has no known backend. Please register a backend for it with \"\n                    + \"Backend.register_backend()\"\n                ),\n            ):\n                cfg.Engine.put(\"Python2\")\n\n    # The default engine and storage format, and hence the default backend,\n    # will depend on which engines are available in the current environment.\n    # For simplicity, patch the defaults.\n    @patch(\n        target=\"modin.config.StorageFormat._get_default\",\n    )\n    @patch(\n        target=\"modin.config.Engine._get_default\",\n    )\n    def test_backend_default(\n        self,\n        mocked_get_default,\n        mocked_get_default2,\n    ):\n        mocked_get_default.return_value = \"Native\"\n        mocked_get_default2.return_value = \"Native\"\n        assert cfg.Backend._get_default() == \"Pandas\"\n\n    def test_add_backend_option(self):\n        with pytest.raises(\n            ValueError,\n            match=re.escape(\n                \"Cannot add an option to Backend directly. Use Backend.register_backend instead.\"\n            ),\n        ):\n            cfg.Backend.add_option(\"NewBackend\")\n\n    @pytest.mark.parametrize(\n        \"order_to_get_in\",\n        itertools.permutations(\n            [\n                cfg.Backend,\n                cfg.Engine,\n                cfg.StorageFormat,\n            ]\n        ),\n        ids=lambda permutation: \"_\".join(x.__name__ for x in permutation),\n    )\n    @pytest.mark.parametrize(\n        \"storage_environment_variable, engine_environment_variable, variable_to_expected_value\",\n        [\n            (\n                \"Native\",\n                \"Native\",\n                {\n                    cfg.Backend: \"Pandas\",\n                    cfg.Engine: \"Native\",\n                    cfg.StorageFormat: \"Native\",\n                },\n            ),\n            (\n                \"NATIVE\",\n                \"NATIVE\",\n                {\n                    cfg.Backend: \"Pandas\",\n                    cfg.Engine: \"Native\",\n                    cfg.StorageFormat: \"Native\",\n                },\n            ),\n            (\n                \"Pandas\",\n                \"Dask\",\n                {\n                    cfg.Backend: \"Dask\",\n                    cfg.Engine: \"Dask\",\n                    cfg.StorageFormat: \"Pandas\",\n                },\n            ),\n        ],\n    )\n    def test_storage_format_and_engine_come_from_environment(\n        self,\n        monkeypatch,\n        clear_backend_execution_and_storage_format,\n        order_to_get_in,\n        storage_environment_variable,\n        engine_environment_variable,\n        variable_to_expected_value,\n    ):\n        with mock.patch.dict(\n            os.environ,\n            {\n                cfg.StorageFormat.varname: storage_environment_variable,\n                cfg.Engine.varname: engine_environment_variable,\n            },\n        ):\n            for variable in order_to_get_in:\n                expected_value = variable_to_expected_value[variable]\n                assert (\n                    variable.get() == expected_value\n                ), f\"{variable.__name__} was {variable.get()} instead of {expected_value}\"\n\n    @pytest.mark.parametrize(\n        \"order_to_get_in\",\n        itertools.permutations(\n            [\n                cfg.Backend,\n                cfg.Engine,\n                cfg.StorageFormat,\n            ]\n        ),\n        ids=lambda permutation: \"_\".join(x.__name__ for x in permutation),\n    )\n    @pytest.mark.parametrize(\n        \"engine_environment_variable, variable_to_expected_value\",\n        [\n            (\n                \"Dask\",\n                {cfg.Backend: \"Dask\", cfg.StorageFormat: \"Pandas\", cfg.Engine: \"Dask\"},\n            ),\n            (\n                \"DASK\",\n                {cfg.Backend: \"Dask\", cfg.StorageFormat: \"Pandas\", cfg.Engine: \"Dask\"},\n            ),\n            (\n                \"python\",\n                {\n                    cfg.Backend: \"Python_Test\",\n                    cfg.StorageFormat: \"Pandas\",\n                    cfg.Engine: \"Python\",\n                },\n            ),\n            (\n                \"ray\",\n                {cfg.Backend: \"Ray\", cfg.StorageFormat: \"Pandas\", cfg.Engine: \"Ray\"},\n            ),\n            # note that we can't test Native here because it's not valid to use\n            # \"Native\" engine with the default storage format of \"Pandas.\"\n        ],\n    )\n    def test_only_engine_comes_from_environment(\n        self,\n        clear_backend_execution_and_storage_format,\n        order_to_get_in,\n        engine_environment_variable,\n        variable_to_expected_value,\n    ):\n        with mock.patch.dict(\n            os.environ,\n            {cfg.Engine.varname: engine_environment_variable},\n        ):\n            for var in order_to_get_in:\n                expected_value = variable_to_expected_value[var]\n                assert (\n                    var.get() == expected_value\n                ), f\"{var.__name__} was {var.get()} instead of {expected_value}\"\n\n    @pytest.mark.parametrize(\n        \"order_to_get_in\",\n        itertools.permutations(\n            [\n                cfg.Backend,\n                cfg.Engine,\n                cfg.StorageFormat,\n            ]\n        ),\n        ids=lambda permutation: \"_\".join(x.__name__ for x in permutation),\n    )\n    def test_only_storage_format_comes_from_environment(\n        self,\n        clear_backend_execution_and_storage_format,\n        order_to_get_in,\n        add_pandas_duplicate_on_ray_execution,\n    ):\n        # To test switching StorageFormat alone, we have to add a new backend\n        # that works with the default \"Pandas\" execution.\n        with mock.patch.dict(\n            os.environ,\n            {\n                cfg.StorageFormat.varname: \"Test_Pandasduplicate\",\n            },\n        ):\n            cfg.Engine.put(\"Ray\")\n            for variable in order_to_get_in:\n                expected_value = {\n                    cfg.Backend: \"Test_Backend_1\",\n                    cfg.Engine: \"Ray\",\n                    cfg.StorageFormat: \"Test_Pandasduplicate\",\n                }[variable]\n                assert (\n                    variable.get() == expected_value\n                ), f\"{variable.__name__} was {variable.get()} instead of {expected_value}\"\n\n    @pytest.mark.parametrize(\n        \"order_to_get_in\",\n        itertools.permutations(\n            [\n                cfg.Backend,\n                cfg.Engine,\n                cfg.StorageFormat,\n            ]\n        ),\n        ids=lambda permutation: \"_\".join(x.__name__ for x in permutation),\n    )\n    @pytest.mark.parametrize(\n        \"backend_environment_variable, variable_to_expected_value\",\n        [\n            (\n                \"Pandas\",\n                {\n                    cfg.Backend: \"Pandas\",\n                    cfg.Engine: \"Native\",\n                    cfg.StorageFormat: \"Native\",\n                },\n            ),\n            (\n                \"Ray\",\n                {cfg.Backend: \"Ray\", cfg.Engine: \"Ray\", cfg.StorageFormat: \"Pandas\"},\n            ),\n            (\n                \"Dask\",\n                {cfg.Backend: \"Dask\", cfg.Engine: \"Dask\", cfg.StorageFormat: \"Pandas\"},\n            ),\n            (\n                \"python_test\",\n                {\n                    cfg.Backend: \"Python_Test\",\n                    cfg.Engine: \"Python\",\n                    cfg.StorageFormat: \"Pandas\",\n                },\n            ),\n        ],\n    )\n    def test_backend_comes_from_environment(\n        self,\n        monkeypatch,\n        clear_backend_execution_and_storage_format,\n        order_to_get_in,\n        backend_environment_variable,\n        variable_to_expected_value,\n    ):\n        with mock.patch.dict(\n            os.environ,\n            {\n                cfg.Backend.varname: backend_environment_variable,\n            },\n        ):\n            for variable in order_to_get_in:\n                expected_value = variable_to_expected_value[variable]\n                assert (\n                    variable.get() == expected_value\n                ), f\"{variable.__name__} was {variable.get()} instead of {expected_value}\"\n\n    @pytest.mark.parametrize(\n        \"order_to_get_in\",\n        itertools.permutations(\n            [cfg.Backend, cfg.Engine, cfg.StorageFormat],\n        ),\n        ids=lambda permutation: \"_\".join(x.__name__ for x in permutation),\n    )\n    def test_environment_not_set_and_pick_up_default_engine(\n        self, clear_backend_execution_and_storage_format, order_to_get_in\n    ):\n        for variable in order_to_get_in:\n            assert variable.get() == variable._get_default()\n\n    @pytest.mark.parametrize(\n        \"execution_variable, value\",\n        [(cfg.Engine, \"Python\"), (cfg.StorageFormat, \"Pandas\")],\n    )\n    @pytest.mark.parametrize(\n        \"variable_to_get\",\n        [cfg.Backend, cfg.Engine, cfg.StorageFormat],\n    )\n    def test_conflicting_execution_and_backend_in_environment(\n        self,\n        monkeypatch,\n        clear_backend_execution_and_storage_format,\n        execution_variable,\n        value,\n        variable_to_get,\n    ):\n        monkeypatch.setitem(os.environ, cfg.Backend.varname, \"Ray\")\n        monkeypatch.setitem(os.environ, execution_variable.varname, value)\n        with pytest.raises(\n            ValueError,\n            match=re.escape(\"Can't specify both execution and backend in environment\"),\n        ):\n            variable_to_get.get()\n\n    def test_get_execution_for_unknown_backend(self):\n        backend_choice_string = \", \".join(\n            f\"'{choice}'\" for choice in cfg.Backend.choices\n        )\n        with pytest.raises(\n            ValueError,\n            match=re.escape(\n                f\"Unknown backend 'Unknown'. Available backends are: {backend_choice_string}\"\n            ),\n        ):\n            cfg.Backend.get_execution_for_backend(\"Unknown\")\n\n\n@pytest.mark.parametrize(\n    \"config_name\",\n    [\n        \"NPartitions\",\n        \"CpuCount\",\n        \"LogMemoryInterval\",\n        \"LogFileSize\",\n        \"MinRowPartitionSize\",\n        \"MinColumnPartitionSize\",\n    ],\n)\ndef test_wrong_values(config_name):\n    config: cfg.EnvironmentVariable = getattr(cfg, config_name)\n    new_value = -1\n    with pytest.raises(ValueError):\n        with cfg.context(**{config_name: new_value}):\n            _ = config.get()\n"
  },
  {
    "path": "modin/tests/config/test_parameter.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom collections import defaultdict\n\nimport pytest\n\nfrom modin.config import Parameter\nfrom modin.config.pubsub import _TYPE_PARAMS\n\n\ndef make_prefilled(vartype, varinit):\n    class Prefilled(Parameter, type=vartype):\n        @classmethod\n        def _get_value_from_config(cls):\n            if not _TYPE_PARAMS[cls.type].verify(varinit):\n                raise ValueError(f\"Unsupported raw value: {varinit}\")\n            return _TYPE_PARAMS[cls.type].decode(varinit)\n\n    return Prefilled\n\n\n@pytest.fixture\ndef prefilled_parameter():\n    return make_prefilled(str, \"init\")\n\n\ndef test_equals(prefilled_parameter):\n    assert prefilled_parameter.get() == \"Init\"\n\n    prefilled_parameter.put(\"value2\")\n    assert prefilled_parameter.get() == \"Value2\"\n\n\ndef test_triggers(prefilled_parameter):\n    results = defaultdict(int)\n    callbacks = []\n\n    def make_callback(name, res=results):\n        def callback(p: Parameter):\n            res[name] += 1\n\n        # keep reference to callbacks so they won't be removed by GC\n        callbacks.append(callback)\n        return callback\n\n    prefilled_parameter.once(\"init\", make_callback(\"init\"))\n    assert results[\"init\"] == 1\n\n    prefilled_parameter.once(\"never\", make_callback(\"never\"))\n    prefilled_parameter.once(\"once\", make_callback(\"once\"))\n    prefilled_parameter.subscribe(make_callback(\"subscribe\"))\n\n    prefilled_parameter.put(\"multi\")\n    prefilled_parameter.put(\"once\")\n    prefilled_parameter.put(\"multi\")\n    prefilled_parameter.put(\"once\")\n\n    expected = [(\"init\", 1), (\"never\", 0), (\"once\", 1), (\"subscribe\", 5)]\n    for name, val in expected:\n        assert results[name] == val, \"{} has wrong count\".format(name)\n\n\n@pytest.mark.parametrize(\n    \"parameter,good,bad\",\n    [\n        (make_prefilled(bool, \"false\"), {\"1\": True, False: False}, [\"nope\", 2]),\n        (make_prefilled(int, \"10\"), {\" 15\\t\": 15, 25: 25}, [\"-10\", 1.0, \"foo\"]),\n        (\n            make_prefilled(dict, \"key = value\"),\n            {\n                \"KEY1 = VALUE1, KEY2=VALUE2=VALUE3,KEY3=0\": {\n                    \"KEY1\": \"VALUE1\",\n                    \"KEY2\": \"VALUE2=VALUE3\",\n                    \"KEY3\": 0,\n                },\n                \"KEY=1\": {\"KEY\": 1},\n            },\n            [\"key1=some,string\", \"key1=value1,key2=\", \"random string\"],\n        ),\n    ],\n)\ndef test_validation(parameter, good, bad):\n    for inval, outval in good.items():\n        parameter.put(inval)\n        assert parameter.get() == outval\n    for inval in bad:\n        with pytest.raises(ValueError):\n            parameter.put(inval)\n\n\n@pytest.mark.parametrize(\"vartype\", [bool, int, dict])\ndef test_init_validation(vartype):\n    parameter = make_prefilled(vartype, \"bad value\")\n    with pytest.raises(ValueError):\n        parameter.get()\n"
  },
  {
    "path": "modin/tests/core/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/core/storage_formats/base/test_internals.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.tests.pandas.utils import create_test_dfs, df_equals, test_data_values\n\nNPartitions.put(4)\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"item_length\", [0, 1, 2])\n@pytest.mark.parametrize(\"loc\", [\"first\", \"first + 1\", \"middle\", \"penult\", \"last\"])\n@pytest.mark.parametrize(\"replace\", [True, False])\ndef test_insert_item(axis, item_length, loc, replace):\n    data = test_data_values[0]\n\n    def post_fn(df):\n        return (\n            (df.iloc[:, :-item_length], df.iloc[:, -item_length:])\n            if axis\n            else (df.iloc[:-item_length, :], df.iloc[-item_length:, :])\n        )\n\n    def get_loc(frame, loc):\n        locs_dict = {\n            \"first\": 0,\n            \"first + 1\": 1,\n            \"middle\": len(frame.axes[axis]) // 2,\n            \"penult\": len(frame.axes[axis]) - 1,\n            \"last\": len(frame.axes[axis]),\n        }\n        return locs_dict[loc]\n\n    def get_reference(df, value, loc):\n        if axis == 0:\n            first_mask = df.iloc[:loc]\n            if replace:\n                loc += 1\n            second_mask = df.iloc[loc:]\n        else:\n            first_mask = df.iloc[:, :loc]\n            if replace:\n                loc += 1\n            second_mask = df.iloc[:, loc:]\n        return pandas.concat([first_mask, value, second_mask], axis=axis)\n\n    md_frames, pd_frames = create_test_dfs(data, post_fn=post_fn)\n    md_item1, md_item2 = md_frames\n    pd_item1, pd_item2 = pd_frames\n\n    index_loc = get_loc(pd_item1, loc)\n\n    pd_res = get_reference(pd_item1, loc=index_loc, value=pd_item2)\n    md_res = md_item1._query_compiler.insert_item(\n        axis=axis, loc=index_loc, value=md_item2._query_compiler, replace=replace\n    ).to_pandas()\n    df_equals(\n        md_res,\n        pd_res,\n        # This test causes an empty slice to be generated thus triggering:\n        # https://github.com/modin-project/modin/issues/5974\n        check_dtypes=axis != 0,\n    )\n\n    index_loc = get_loc(pd_item2, loc)\n\n    pd_res = get_reference(pd_item2, loc=index_loc, value=pd_item1)\n    md_res = md_item2._query_compiler.insert_item(\n        axis=axis, loc=index_loc, value=md_item1._query_compiler, replace=replace\n    ).to_pandas()\n\n    df_equals(\n        md_res,\n        pd_res,\n        # This test causes an empty slice to be generated thus triggering:\n        # https://github.com/modin-project/modin/issues/5974\n        check_dtypes=axis != 0,\n    )\n\n\n@pytest.mark.parametrize(\"num_rows\", list(range(1, 5)), ids=lambda x: f\"num_rows={x}\")\n@pytest.mark.parametrize(\"num_cols\", list(range(1, 5)), ids=lambda x: f\"num_cols={x}\")\ndef test_repr_size_issue_6104(num_rows, num_cols):\n    # this tests an edge case where we used to select exactly num_cols / 2 + 1 columns\n    # from both the front and the back of the dataframe, but the dataframe is such a\n    # length that the front and back columns overlap at one column. The result is that\n    # we convert one column twice to pandas, although we would never see the duplicate\n    # column in the output because pandas would also only represent the num_cols / 2\n    # columns from the front and back.\n    df = pd.DataFrame([list(range(4)) for _ in range(4)])\n    pandas_repr_df = df._build_repr_df(num_rows, num_cols)\n    assert pandas_repr_df.columns.is_unique\n    assert pandas_repr_df.index.is_unique\n"
  },
  {
    "path": "modin/tests/core/storage_formats/cudf/test_gpu_managers.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/core/storage_formats/cudf/test_internals.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/core/storage_formats/pandas/test_internals.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport functools\nimport sys\nimport unittest.mock as mock\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import (\n    CpuCount,\n    Engine,\n    MinColumnPartitionSize,\n    MinRowPartitionSize,\n    NPartitions,\n    RangePartitioning,\n    context,\n)\nfrom modin.core.dataframe.algebra import Fold\nfrom modin.core.dataframe.algebra.default2pandas import DataFrameDefault\nfrom modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe\nfrom modin.core.dataframe.pandas.dataframe.utils import ColumnInfo, ShuffleSortFunctions\nfrom modin.core.dataframe.pandas.metadata import (\n    DtypesDescriptor,\n    LazyProxyCategoricalDtype,\n    ModinDtypes,\n)\nfrom modin.core.execution.utils import remote_function\nfrom modin.core.storage_formats import PandasQueryCompiler\nfrom modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas\nfrom modin.distributed.dataframe.pandas import from_partitions\nfrom modin.tests.pandas.utils import (\n    create_test_dfs,\n    df_equals,\n    eval_general,\n    test_data_values,\n)\nfrom modin.utils import try_cast_to_pandas\n\nNPartitions.put(4)\n\nif Engine.get() == \"Ray\":\n    import ray\n\n    from modin.core.execution.ray.common import RayWrapper\n    from modin.core.execution.ray.common.deferred_execution import MetaList\n    from modin.core.execution.ray.implementations.pandas_on_ray.partitioning import (\n        PandasOnRayDataframeColumnPartition,\n        PandasOnRayDataframePartition,\n        PandasOnRayDataframeRowPartition,\n    )\n\n    block_partition_class = PandasOnRayDataframePartition\n    virtual_column_partition_class = PandasOnRayDataframeColumnPartition\n    virtual_row_partition_class = PandasOnRayDataframeRowPartition\n    put = RayWrapper.put\n    deploy = RayWrapper.deploy\n    materialize = RayWrapper.materialize\nelif Engine.get() == \"Dask\":\n    from modin.core.execution.dask.common import DaskWrapper\n    from modin.core.execution.dask.implementations.pandas_on_dask.partitioning import (\n        PandasOnDaskDataframeColumnPartition,\n        PandasOnDaskDataframePartition,\n        PandasOnDaskDataframeRowPartition,\n    )\n\n    # initialize modin dataframe to initialize dask\n    pd.DataFrame()\n\n    def put(x):\n        return DaskWrapper.put(x, hash=False)\n\n    block_partition_class = PandasOnDaskDataframePartition\n    virtual_column_partition_class = PandasOnDaskDataframeColumnPartition\n    virtual_row_partition_class = PandasOnDaskDataframeRowPartition\n    deploy = DaskWrapper.deploy\n    materialize = DaskWrapper.materialize\nelif Engine.get() == \"Unidist\":\n    from modin.core.execution.unidist.common import UnidistWrapper\n    from modin.core.execution.unidist.implementations.pandas_on_unidist.partitioning import (\n        PandasOnUnidistDataframeColumnPartition,\n        PandasOnUnidistDataframePartition,\n        PandasOnUnidistDataframeRowPartition,\n    )\n\n    block_partition_class = PandasOnUnidistDataframePartition\n    virtual_column_partition_class = PandasOnUnidistDataframeColumnPartition\n    virtual_row_partition_class = PandasOnUnidistDataframeRowPartition\n    put = UnidistWrapper.put\nelif Engine.get() == \"Python\":\n    from modin.core.execution.python.common import PythonWrapper\n    from modin.core.execution.python.implementations.pandas_on_python.partitioning import (\n        PandasOnPythonDataframeColumnPartition,\n        PandasOnPythonDataframePartition,\n        PandasOnPythonDataframeRowPartition,\n    )\n\n    def put(x):\n        return PythonWrapper.put(x, hash=False)\n\n    def deploy(func, args=tuple()):\n        return func(*args)\n\n    def materialize(arg):\n        return arg\n\n    block_partition_class = PandasOnPythonDataframePartition\n    virtual_column_partition_class = PandasOnPythonDataframeColumnPartition\n    virtual_row_partition_class = PandasOnPythonDataframeRowPartition\nelse:\n    raise NotImplementedError(\n        f\"These test suites are not implemented for the '{Engine.get()}' engine\"\n    )\n\n\ndef construct_modin_df_by_scheme(pandas_df, partitioning_scheme):\n    \"\"\"\n    Build ``modin.pandas.DataFrame`` from ``pandas.DataFrame`` according the `partitioning_scheme`.\n\n    Parameters\n    ----------\n    pandas_df : pandas.DataFrame\n    partitioning_scheme : dict[{\"row_lengths\", \"column_widths\"}] -> list of ints\n\n    Returns\n    -------\n    modin.pandas.DataFrame\n    \"\"\"\n    index = pandas_df.index\n    columns = pandas_df.columns\n    row_lengths = partitioning_scheme[\"row_lengths\"]\n    column_widths = partitioning_scheme[\"column_widths\"]\n    new_length = sum(row_lengths)\n    new_width = sum(column_widths)\n    new_index = index if len(index) == new_length else index[:new_length]\n    new_columns = columns if len(columns) == new_width else columns[:new_width]\n\n    row_partitions = split_result_of_axis_func_pandas(\n        axis=0,\n        num_splits=len(row_lengths),\n        result=pandas_df,\n        min_block_size=MinRowPartitionSize.get(),\n        length_list=row_lengths,\n    )\n    partitions = [\n        split_result_of_axis_func_pandas(\n            axis=1,\n            num_splits=len(column_widths),\n            result=row_part,\n            min_block_size=MinColumnPartitionSize.get(),\n            length_list=column_widths,\n        )\n        for row_part in row_partitions\n    ]\n\n    md_df = from_partitions(\n        [[put(part) for part in row_parts] for row_parts in partitions],\n        axis=None,\n        index=new_index,\n        columns=new_columns,\n        row_lengths=row_lengths,\n        column_widths=column_widths,\n    )\n    return md_df\n\n\ndef validate_partitions_cache(df, axis=None):\n    \"\"\"\n    Assert that the ``PandasDataframe`` shape caches correspond to the actual partition's shapes.\n\n    Parameters\n    ----------\n    df : PandasDataframe\n    axis : int, optional\n        An axis to verify the cache for. If not specified, verify cache for both of the axes.\n    \"\"\"\n    axis = [0, 1] if axis is None else [axis]\n\n    axis_lengths = [df._row_lengths_cache, df._column_widths_cache]\n\n    for ax in axis:\n        assert axis_lengths[ax] is not None\n        assert df._partitions.shape[ax] == len(axis_lengths[ax])\n\n    for i in range(df._partitions.shape[0]):\n        for j in range(df._partitions.shape[1]):\n            if 0 in axis:\n                assert df._partitions[i, j].length() == axis_lengths[0][i]\n            if 1 in axis:\n                assert df._partitions[i, j].width() == axis_lengths[1][j]\n\n\ndef assert_has_no_cache(df, axis=0):\n    \"\"\"\n    Assert that the passed dataframe has no labels and no lengths cache along the specified axis.\n\n    Parameters\n    ----------\n    df : modin.pandas.DataFrame\n    axis : int, default: 0\n    \"\"\"\n    mf = df._query_compiler._modin_frame\n    if axis == 0:\n        assert not mf.has_materialized_index and mf._row_lengths_cache is None\n    else:\n        assert not mf.has_materialized_columns and mf._column_widths_cache is None\n\n\ndef remove_axis_cache(df, axis=0, remove_lengths=True):\n    \"\"\"\n    Remove index/columns cache for the passed dataframe.\n\n    Parameters\n    ----------\n    df : modin.pandas.DataFrame\n    axis : int, default: 0\n        0 - remove index cache, 1 - remove columns cache.\n    remove_lengths : bool, default: True\n        Whether to remove row lengths/column widths cache.\n    \"\"\"\n    mf = df._query_compiler._modin_frame\n    if axis == 0:\n        mf.set_index_cache(None)\n        if remove_lengths:\n            mf._row_lengths_cache = None\n    else:\n        mf.set_columns_cache(None)\n        if remove_lengths:\n            mf._column_widths_cache = None\n\n\ndef test_aligning_blocks():\n    # Test problem when modin frames have the same number of rows, but different\n    # blocks (partition.list_of_blocks). See #2322 for details\n    accm = pd.DataFrame([\"-22\\n\"] * 162)\n    accm = accm.iloc[2:, :]\n    accm.reset_index(drop=True, inplace=True)\n    accm[\"T\"] = pd.Series([\"24.67\\n\"] * 145)\n\n    # see #2322 for details\n    try_cast_to_pandas(accm)  # force materialization\n\n\ndef test_aligning_blocks_with_duplicated_index():\n    # Same problem as in `test_aligning_blocks` but with duplicated values in index.\n    data11 = [0, 1]\n    data12 = [2, 3]\n\n    data21 = [0]\n    data22 = [1, 2, 3]\n\n    df1 = pd.concat((pd.DataFrame(data11), pd.DataFrame(data12)))\n    df2 = pd.concat((pd.DataFrame(data21), pd.DataFrame(data22)))\n\n    try_cast_to_pandas(df1 - df2)  # force materialization\n\n\ndef test_aligning_partitions():\n    data = [0, 1, 2, 3, 4, 5]\n    modin_df1, _ = create_test_dfs({\"a\": data, \"b\": data})\n    modin_df = modin_df1.loc[:2]\n\n    modin_df2 = pd.concat((modin_df, modin_df))\n\n    modin_df2[\"c\"] = modin_df1[\"b\"]\n    try_cast_to_pandas(modin_df2)  # force materialization\n\n\n@pytest.mark.parametrize(\"row_labels\", [None, [(\"a\", \"\")], [\"a\"]])\n@pytest.mark.parametrize(\"col_labels\", [None, [\"a1\"], [(\"c1\", \"z\")]])\ndef test_take_2d_labels_or_positional(row_labels, col_labels):\n    kwargs = {\n        \"index\": [[\"a\", \"b\", \"c\", \"d\"], [\"\", \"\", \"x\", \"y\"]],\n        \"columns\": [[\"a1\", \"b1\", \"c1\", \"d1\"], [\"\", \"\", \"z\", \"x\"]],\n    }\n    md_df, pd_df = create_test_dfs(np.random.rand(4, 4), **kwargs)\n\n    _row_labels = slice(None) if row_labels is None else row_labels\n    _col_labels = slice(None) if col_labels is None else col_labels\n    pd_df = pd_df.loc[_row_labels, _col_labels]\n    modin_frame = md_df._query_compiler._modin_frame\n    new_modin_frame = modin_frame.take_2d_labels_or_positional(\n        row_labels=row_labels, col_labels=col_labels\n    )\n    md_df._query_compiler._modin_frame = new_modin_frame\n\n    df_equals(md_df, pd_df)\n\n\n@pytest.mark.parametrize(\"has_partitions_shape_cache\", [True, False])\n@pytest.mark.parametrize(\"has_frame_shape_cache\", [True, False])\ndef test_apply_func_to_both_axis(has_partitions_shape_cache, has_frame_shape_cache):\n    \"\"\"\n    Test ``modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe.apply_select_indices`` functionality of broadcasting non-distributed items.\n    \"\"\"\n    data = test_data_values[0]\n\n    md_df, pd_df = create_test_dfs(data)\n    values = pd_df.values + 1\n\n    pd_df.iloc[:, :] = values\n\n    modin_frame = md_df._query_compiler._modin_frame\n\n    if has_frame_shape_cache:\n        # Explicitly compute rows & columns shapes to store this info in frame's cache\n        modin_frame.row_lengths\n        modin_frame.column_widths\n    else:\n        # Explicitly reset frame's cache\n        modin_frame._row_lengths_cache = None\n        modin_frame._column_widths_cache = None\n\n    for row in modin_frame._partitions:\n        for part in row:\n            if has_partitions_shape_cache:\n                # Explicitly compute partition shape to store this info in its cache\n                part.length()\n                part.width()\n            else:\n                # Explicitly reset partition's shape cache\n                part._length_cache = None\n                part._width_cache = None\n\n    def func_to_apply(partition, row_internal_indices, col_internal_indices, item):\n        partition.iloc[row_internal_indices, col_internal_indices] = item\n        return partition\n\n    new_modin_frame = modin_frame.apply_select_indices(\n        axis=None,\n        func=func_to_apply,\n        # Passing none-slices does not trigger shapes recomputation and so the cache is untouched.\n        row_labels=slice(None),\n        col_labels=slice(None),\n        keep_remaining=True,\n        new_index=pd_df.index,\n        new_columns=pd_df.columns,\n        item_to_distribute=values,\n    )\n    md_df._query_compiler._modin_frame = new_modin_frame\n\n    df_equals(md_df, pd_df)\n\n\n@pytest.mark.parametrize(\n    \"test_type\",\n    [\n        \"many_small_dfs\",\n        \"concatted_df_with_small_dfs\",\n        \"large_df_plus_small_dfs\",\n    ],\n)\n@pytest.mark.parametrize(\n    \"set_num_partitions\",\n    [1, 4],\n    indirect=True,\n)\ndef test_rebalance_partitions(test_type, set_num_partitions):\n    num_partitions = NPartitions.get()\n    if test_type == \"many_small_dfs\":\n        small_dfs = [\n            pd.DataFrame(\n                [[i + j for j in range(0, 1000)]],\n                columns=[f\"col{j}\" for j in range(0, 1000)],\n                index=pd.Index([i]),\n            )\n            for i in range(1, 100001, 1000)\n        ]\n        large_df = pd.concat(small_dfs)\n        col_length = 100\n    elif test_type == \"concatted_df_with_small_dfs\":\n        small_dfs = [\n            pd.DataFrame(\n                [[i + j for j in range(0, 1000)]],\n                columns=[f\"col{j}\" for j in range(0, 1000)],\n                index=pd.Index([i]),\n            )\n            for i in range(1, 100001, 1000)\n        ]\n        large_df = pd.concat([pd.concat(small_dfs)] + small_dfs[:3])\n        col_length = 103\n    else:\n        large_df = pd.DataFrame(\n            [[i + j for j in range(1, 1000)] for i in range(0, 100000, 1000)],\n            columns=[f\"col{j}\" for j in range(1, 1000)],\n            index=pd.Index(list(range(0, 100000, 1000))),\n        )\n        small_dfs = [\n            pd.DataFrame(\n                [[i + j for j in range(0, 1000)]],\n                columns=[f\"col{j}\" for j in range(0, 1000)],\n                index=pd.Index([i]),\n            )\n            for i in range(1, 4001, 1000)\n        ]\n        large_df = pd.concat([large_df] + small_dfs[:3])\n        col_length = 103\n    large_modin_frame = large_df._query_compiler._modin_frame\n    assert large_modin_frame._partitions.shape == (\n        num_partitions,\n        num_partitions,\n    ), \"Partitions were not rebalanced after concat.\"\n    assert all(\n        isinstance(ptn, large_modin_frame._partition_mgr_cls._column_partitions_class)\n        for ptn in large_modin_frame._partitions.flatten()\n    )\n    # The following check tests that we can correctly form full-axis virtual partitions\n    # over the orthogonal axis from non-full-axis virtual partitions.\n\n    def col_apply_func(col):\n        assert len(col) == col_length, \"Partial axis partition detected.\"\n        return col + 1\n\n    large_apply_result = large_df.apply(col_apply_func)\n    large_apply_result_frame = large_apply_result._query_compiler._modin_frame\n    assert large_apply_result_frame._partitions.shape == (\n        num_partitions,\n        num_partitions,\n    ), \"Partitions list shape is incorrect.\"\n    assert all(\n        isinstance(ptn, large_apply_result_frame._partition_mgr_cls._partition_class)\n        for ptn in large_apply_result_frame._partitions.flatten()\n    ), \"Partitions are not block partitioned after column-wise apply.\"\n    large_df = pd.DataFrame(\n        query_compiler=large_df._query_compiler.__constructor__(large_modin_frame)\n    )\n    # The following check tests that we can correctly form full-axis virtual partitions\n    # over the same axis from non-full-axis virtual partitions.\n\n    def row_apply_func(row):\n        assert len(row) == 1000, \"Partial axis partition detected.\"\n        return row + 1\n\n    large_apply_result = large_df.apply(row_apply_func, axis=1)\n    large_apply_result_frame = large_apply_result._query_compiler._modin_frame\n    assert large_apply_result_frame._partitions.shape == (\n        num_partitions,\n        num_partitions,\n    ), \"Partitions list shape is incorrect.\"\n    assert all(\n        isinstance(ptn, large_apply_result_frame._partition_mgr_cls._partition_class)\n        for ptn in large_apply_result_frame._partitions.flatten()\n    ), \"Partitions are not block partitioned after row-wise apply.\"\n\n    large_apply_result = large_df.applymap(lambda x: x)\n    large_apply_result_frame = large_apply_result._query_compiler._modin_frame\n    assert large_apply_result_frame._partitions.shape == (\n        num_partitions,\n        num_partitions,\n    ), \"Partitions list shape is incorrect.\"\n    assert all(\n        isinstance(ptn, large_apply_result_frame._partition_mgr_cls._partition_class)\n        for ptn in large_apply_result_frame._partitions.flatten()\n    ), \"Partitions are not block partitioned after element-wise apply.\"\n\n\n@pytest.mark.parametrize(\n    \"axis,virtual_partition_class\",\n    ((0, virtual_column_partition_class), (1, virtual_row_partition_class)),\n    ids=[\"partitions_spanning_all_columns\", \"partitions_spanning_all_rows\"],\n)\nclass TestDrainVirtualPartitionCallQueue:\n    \"\"\"Test draining virtual partition call queues.\n\n    Test creating a virtual partition made of block partitions and/or one or\n    more layers of virtual partitions, draining the top-level partition's\n    call queue, and getting the result.\n\n    In all these test cases, the full_axis argument doesn't matter for\n    correctness because it only affects `apply`, which is not used here.\n    Still, virtual partition users are not supposed to create full-axis\n    virtual partitions out of other full-axis virtual partitions, so\n    set full_axis to False everywhere.\n    \"\"\"\n\n    def test_from_virtual_partitions_with_call_queues(\n        self,\n        axis,\n        virtual_partition_class,\n    ):\n        # reverse the dataframe along the virtual partition axis.\n        def reverse(df):\n            return df.iloc[::-1, :] if axis == 0 else df.iloc[:, ::-1]\n\n        level_zero_blocks_first = [\n            block_partition_class(put(pandas.DataFrame([0]))),\n            block_partition_class(put(pandas.DataFrame([1]))),\n        ]\n        level_one_virtual_first = virtual_partition_class(\n            level_zero_blocks_first, full_axis=False\n        )\n        level_one_virtual_first = level_one_virtual_first.add_to_apply_calls(reverse)\n        level_zero_blocks_second = [\n            block_partition_class(put(pandas.DataFrame([2]))),\n            block_partition_class(put(pandas.DataFrame([3]))),\n        ]\n        level_one_virtual_second = virtual_partition_class(\n            level_zero_blocks_second, full_axis=False\n        )\n        level_one_virtual_second = level_one_virtual_second.add_to_apply_calls(reverse)\n        level_two_virtual = virtual_partition_class(\n            [level_one_virtual_first, level_one_virtual_second], full_axis=False\n        )\n        level_two_virtual.drain_call_queue()\n        if axis == 0:\n            expected_df = pandas.DataFrame([1, 0, 3, 2], index=[0, 0, 0, 0])\n        else:\n            expected_df = pandas.DataFrame([[1, 0, 3, 2]], columns=[0, 0, 0, 0])\n        df_equals(\n            level_two_virtual.to_pandas(),\n            expected_df,\n        )\n\n    def test_from_block_and_virtual_partition_with_call_queues(\n        self, axis, virtual_partition_class\n    ):\n        # make a function that reverses the dataframe along the virtual\n        # partition axis.\n        # for testing axis == 0, start with two 2-rows-by-1-column blocks. for\n        # axis == 1, start with two 1-rows-by-2-column blocks.\n        def reverse(df):\n            return df.iloc[::-1, :] if axis == 0 else df.iloc[:, ::-1]\n\n        block_data = [[0, 1], [2, 3]] if axis == 0 else [[[0, 1]], [[2, 3]]]\n        level_zero_blocks = [\n            block_partition_class(put(pandas.DataFrame(block_data[0]))),\n            block_partition_class(put(pandas.DataFrame(block_data[1]))),\n        ]\n        level_zero_blocks[0] = level_zero_blocks[0].add_to_apply_calls(reverse)\n        level_one_virtual = virtual_partition_class(\n            level_zero_blocks[1], full_axis=False\n        )\n        level_one_virtual = level_one_virtual.add_to_apply_calls(reverse)\n        level_two_virtual = virtual_partition_class(\n            [level_zero_blocks[0], level_one_virtual], full_axis=False\n        )\n        level_two_virtual.drain_call_queue()\n        if axis == 0:\n            expected_df = pandas.DataFrame([1, 0, 3, 2], index=[1, 0, 1, 0])\n        else:\n            expected_df = pandas.DataFrame([[1, 0, 3, 2]], columns=[1, 0, 1, 0])\n        df_equals(level_two_virtual.to_pandas(), expected_df)\n\n    def test_virtual_partition_call_queues_at_three_levels(\n        self, axis, virtual_partition_class\n    ):\n        block = block_partition_class(put(pandas.DataFrame([1])))\n        level_one_virtual = virtual_partition_class([block], full_axis=False)\n        level_one_virtual = level_one_virtual.add_to_apply_calls(\n            lambda df: pandas.concat([df, pandas.DataFrame([2])])\n        )\n        level_two_virtual = virtual_partition_class(\n            [level_one_virtual], full_axis=False\n        )\n        level_two_virtual = level_two_virtual.add_to_apply_calls(\n            lambda df: pandas.concat([df, pandas.DataFrame([3])])\n        )\n        level_three_virtual = virtual_partition_class(\n            [level_two_virtual], full_axis=False\n        )\n        level_three_virtual = level_three_virtual.add_to_apply_calls(\n            lambda df: pandas.concat([df, pandas.DataFrame([4])])\n        )\n        level_three_virtual.drain_call_queue()\n        df_equals(\n            level_three_virtual.to_pandas(),\n            pd.DataFrame([1, 2, 3, 4], index=[0, 0, 0, 0]),\n        )\n\n\n@pytest.mark.parametrize(\n    \"virtual_partition_class\",\n    (virtual_column_partition_class, virtual_row_partition_class),\n    ids=[\"partitions_spanning_all_columns\", \"partitions_spanning_all_rows\"],\n)\ndef test_virtual_partition_apply_not_returning_pandas_dataframe(\n    virtual_partition_class,\n):\n    # see https://github.com/modin-project/modin/issues/4811\n\n    partition = virtual_partition_class(\n        block_partition_class(put(pandas.DataFrame())), full_axis=False\n    )\n\n    apply_result = partition.apply(lambda df: 1).get()\n    assert apply_result == 1\n\n\n@pytest.mark.skipif(\n    Engine.get() != \"Ray\",\n    reason=\"Only ray.wait() does not take duplicate object refs.\",\n)\ndef test_virtual_partition_dup_object_ref():\n    # See https://github.com/modin-project/modin/issues/5045\n    frame_c = pd.DataFrame(np.zeros((100, 20), dtype=np.float32, order=\"C\"))\n    frame_c = [frame_c] * 20\n    df = pd.concat(frame_c)\n    partition = df._query_compiler._modin_frame._partitions.flatten()[0]\n    obj_refs = partition.list_of_blocks\n    assert len(obj_refs) != len(\n        set(obj_refs)\n    ), \"Test setup did not contain duplicate objects\"\n    # The below call to wait() should not crash\n    partition.wait()\n\n\n__test_reorder_labels_cache_axis_positions = [\n    pytest.param(lambda index: None, id=\"no_reordering\"),\n    pytest.param(lambda index: np.arange(len(index) - 1, -1, -1), id=\"reordering_only\"),\n    pytest.param(\n        lambda index: [0, 1, 2, len(index) - 3, len(index) - 2, len(index) - 1],\n        id=\"projection_only\",\n    ),\n    pytest.param(\n        lambda index: np.repeat(np.arange(len(index)), repeats=3), id=\"size_grow\"\n    ),\n]\n\n\n@pytest.mark.parametrize(\"row_positions\", __test_reorder_labels_cache_axis_positions)\n@pytest.mark.parametrize(\"col_positions\", __test_reorder_labels_cache_axis_positions)\n@pytest.mark.parametrize(\n    \"partitioning_scheme\",\n    [\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [df.shape[0]],\n                \"column_widths\": [df.shape[1]],\n            },\n            id=\"single_partition\",\n        ),\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [32, max(0, df.shape[0] - 32)],\n                \"column_widths\": [32, max(0, df.shape[1] - 32)],\n            },\n            id=\"two_unbalanced_partitions\",\n        ),\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [df.shape[0] // NPartitions.get()] * NPartitions.get(),\n                \"column_widths\": [df.shape[1] // NPartitions.get()] * NPartitions.get(),\n            },\n            id=\"perfect_partitioning\",\n        ),\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [2**i for i in range(NPartitions.get())],\n                \"column_widths\": [2**i for i in range(NPartitions.get())],\n            },\n            id=\"unbalanced_partitioning_equals_npartition\",\n        ),\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [2] * (df.shape[0] // 2),\n                \"column_widths\": [2] * (df.shape[1] // 2),\n            },\n            id=\"unbalanced_partitioning\",\n        ),\n    ],\n)\ndef test_reorder_labels_cache(\n    row_positions,\n    col_positions,\n    partitioning_scheme,\n):\n    pandas_df = pandas.DataFrame(test_data_values[0])\n\n    md_df = construct_modin_df_by_scheme(pandas_df, partitioning_scheme(pandas_df))\n    md_df = md_df._query_compiler._modin_frame\n\n    result = md_df._reorder_labels(\n        row_positions(md_df.index), col_positions(md_df.columns)\n    )\n    validate_partitions_cache(result)\n\n\ndef test_reorder_labels_dtypes():\n    pandas_df = pandas.DataFrame(\n        {\n            \"a\": [1, 2, 3, 4],\n            \"b\": [1.0, 2.4, 3.4, 4.5],\n            \"c\": [\"a\", \"b\", \"c\", \"d\"],\n            \"d\": pd.to_datetime([1, 2, 3, 4], unit=\"D\"),\n        }\n    )\n\n    md_df = construct_modin_df_by_scheme(\n        pandas_df,\n        partitioning_scheme={\n            \"row_lengths\": [len(pandas_df)],\n            \"column_widths\": [\n                len(pandas_df) // 2,\n                len(pandas_df) // 2 + len(pandas_df) % 2,\n            ],\n        },\n    )\n    md_df = md_df._query_compiler._modin_frame\n\n    result = md_df._reorder_labels(\n        row_positions=None, col_positions=np.arange(len(md_df.columns) - 1, -1, -1)\n    )\n    df_equals(result.dtypes, result.to_pandas().dtypes)\n\n\n@pytest.mark.parametrize(\n    \"left_partitioning, right_partitioning, ref_with_cache_available, ref_with_no_cache\",\n    # Note: this test takes into consideration that `MinRowPartitionSize == 32`,\n    # `MinColumnPartitionSize == 32` and `NPartitions == 4`\n    [\n        (\n            [2],\n            [2],\n            1,  # the num_splits is computed like (2 + 2 = 4 / chunk_size = 1 split)\n            2,  # the num_splits is just splits sum (1 + 1 == 2)\n        ),\n        (\n            [24],\n            [54],\n            3,  # the num_splits is computed like (24 + 54 = 78 / chunk_size = 3 splits)\n            2,  # the num_splits is just splits sum (1 + 1 == 2)\n        ),\n        (\n            [2],\n            [299],\n            4,  # the num_splits is bounded by NPartitions (2 + 299 = 301 / chunk_size = 10 splits -> bound by 4)\n            2,  # the num_splits is just splits sum (1 + 1 == 2)\n        ),\n        (\n            [32, 32],\n            [128],\n            4,  # the num_splits is bounded by NPartitions (32 + 32 + 128 = 192 / chunk_size = 6 splits -> bound by 4)\n            3,  # the num_splits is just splits sum (2 + 1 == 3)\n        ),\n        (\n            [128] * 7,\n            [128] * 6,\n            4,  # the num_splits is bounded by NPartitions (128 * 7 + 128 * 6 = 1664 / chunk_size = 52 splits -> bound by 4)\n            4,  # the num_splits is just splits sum bound by NPartitions (7 + 6 = 13 splits -> 4 splits)\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"modify_config\",\n    [{NPartitions: 4, MinRowPartitionSize: 32, MinColumnPartitionSize: 32}],\n    indirect=True,\n)\ndef test_merge_partitioning(\n    left_partitioning,\n    right_partitioning,\n    ref_with_cache_available,\n    ref_with_no_cache,\n    modify_config,\n):\n    from modin.core.storage_formats.pandas.utils import merge_partitioning\n\n    left_df = pandas.DataFrame(\n        [np.arange(sum(left_partitioning)) for _ in range(sum(left_partitioning))]\n    )\n    right_df = pandas.DataFrame(\n        [np.arange(sum(right_partitioning)) for _ in range(sum(right_partitioning))]\n    )\n\n    left = construct_modin_df_by_scheme(\n        left_df, {\"row_lengths\": left_partitioning, \"column_widths\": left_partitioning}\n    )._query_compiler._modin_frame\n    right = construct_modin_df_by_scheme(\n        right_df,\n        {\"row_lengths\": right_partitioning, \"column_widths\": right_partitioning},\n    )._query_compiler._modin_frame\n\n    assert left.row_lengths == left.column_widths == left_partitioning\n    assert right.row_lengths == right.column_widths == right_partitioning\n\n    res = merge_partitioning(left, right, axis=0)\n    assert res == ref_with_cache_available\n\n    res = merge_partitioning(left, right, axis=1)\n    assert res == ref_with_cache_available\n\n    (\n        left._row_lengths_cache,\n        left._column_widths_cache,\n        right._row_lengths_cache,\n        right._column_widths_cache,\n    ) = [None] * 4\n\n    res = merge_partitioning(left, right, axis=0)\n    assert res == ref_with_no_cache\n    # Verifying that no computations are being triggered\n    assert all(\n        cache is None\n        for cache in (\n            left._row_lengths_cache,\n            left._column_widths_cache,\n            right._row_lengths_cache,\n            right._column_widths_cache,\n        )\n    )\n\n    res = merge_partitioning(left, right, axis=1)\n    assert res == ref_with_no_cache\n    # Verifying that no computations are being triggered\n    assert all(\n        cache is None\n        for cache in (\n            left._row_lengths_cache,\n            left._column_widths_cache,\n            right._row_lengths_cache,\n            right._column_widths_cache,\n        )\n    )\n\n\ndef test_merge_with_bad_partitioning():\n    # https://github.com/modin-project/modin/pull/7229\n\n    left_partitioning = [256]\n    right_partitioning = [32, 32, 32, 32]\n\n    left_df = pandas.DataFrame(\n        [np.arange(sum(left_partitioning)) for _ in range(sum(left_partitioning))]\n    )\n    right_df = pandas.DataFrame(\n        [np.arange(sum(right_partitioning)) for _ in range(sum(right_partitioning))]\n    )\n\n    left = construct_modin_df_by_scheme(\n        left_df, {\"row_lengths\": left_partitioning, \"column_widths\": left_partitioning}\n    )\n    right = construct_modin_df_by_scheme(\n        right_df,\n        {\"row_lengths\": right_partitioning, \"column_widths\": right_partitioning},\n    )\n\n    left_frame = left._query_compiler._modin_frame\n    right_frame = right._query_compiler._modin_frame\n    assert left_frame.row_lengths == left_frame.column_widths == left_partitioning\n    assert right_frame.row_lengths == right_frame.column_widths == right_partitioning\n\n    # just a dummy value\n    return_value = pd.DataFrame([1, 2, 3, 4])._query_compiler\n    with mock.patch.object(\n        left._query_compiler, \"repartition\", return_value=return_value\n    ) as repartition:\n        _ = left.merge(right)\n        repartition.assert_called_once_with(axis=0)\n\n\ndef test_groupby_with_empty_partition():\n    # see #5461 for details\n    md_df = construct_modin_df_by_scheme(\n        pandas_df=pandas.DataFrame({\"a\": [1, 1, 2, 2], \"b\": [3, 4, 5, 6]}),\n        partitioning_scheme={\"row_lengths\": [2, 2], \"column_widths\": [2]},\n    )\n    md_res = md_df.query(\"a > 1\", engine=\"python\")\n    grp_obj = md_res.groupby(\"a\")\n    # check index error due to partitioning mismatching\n    grp_obj.count()\n\n    md_df = construct_modin_df_by_scheme(\n        pandas_df=pandas.DataFrame({\"a\": [1, 1, 2, 2], \"b\": [3, 4, 5, 6]}),\n        partitioning_scheme={\"row_lengths\": [2, 2], \"column_widths\": [2]},\n    )\n    md_res = md_df.query(\"a > 1\", engine=\"python\")\n    grp_obj = md_res.groupby(md_res[\"a\"])\n    grp_obj.count()\n\n\n@pytest.mark.parametrize(\"set_num_partitions\", [2], indirect=True)\ndef test_repartitioning(set_num_partitions):\n    \"\"\"\n    This test verifies that 'keep_partitioning=False' doesn't actually preserve partitioning.\n\n    For more details see: https://github.com/modin-project/modin/issues/5621\n    \"\"\"\n    assert NPartitions.get() == 2\n\n    pandas_df = pandas.DataFrame(\n        {\"a\": [1, 1, 2, 2], \"b\": [3, 4, 5, 6], \"c\": [1, 2, 3, 4], \"d\": [4, 5, 6, 7]}\n    )\n\n    modin_df = construct_modin_df_by_scheme(\n        pandas_df=pandas.DataFrame(\n            {\"a\": [1, 1, 2, 2], \"b\": [3, 4, 5, 6], \"c\": [1, 2, 3, 4], \"d\": [4, 5, 6, 7]}\n        ),\n        partitioning_scheme={\"row_lengths\": [4], \"column_widths\": [2, 2]},\n    )\n\n    modin_frame = modin_df._query_compiler._modin_frame\n\n    assert modin_frame._partitions.shape == (1, 2)\n    assert modin_frame.column_widths == [2, 2]\n\n    res = modin_frame.apply_full_axis(\n        axis=1,\n        func=lambda df: df,\n        keep_partitioning=False,\n        new_index=[0, 1, 2, 3],\n        new_columns=[\"a\", \"b\", \"c\", \"d\"],\n    )\n\n    assert res._partitions.shape == (1, 1)\n    assert res.column_widths == [4]\n    df_equals(res._partitions[0, 0].to_pandas(), pandas_df)\n    df_equals(res.to_pandas(), pandas_df)\n\n\n@pytest.mark.parametrize(\"col_name\", [\"numeric_col\", \"non_numeric_col\"])\n@pytest.mark.parametrize(\"ascending\", [True, False])\n@pytest.mark.parametrize(\"num_pivots\", [3, 2, 1])\n@pytest.mark.parametrize(\"all_pivots_are_unique\", [True, False])\ndef test_split_partitions_kernel(\n    col_name, ascending, num_pivots, all_pivots_are_unique\n):\n    \"\"\"\n    This test verifies proper work of the `split_partitions_using_pivots_for_sort` function\n    used in partitions reshuffling.\n\n    The function being tested splits the passed dataframe into parts according\n    to the 'pivots' indicating boundary values for the parts.\n\n    Parameters\n    ----------\n    col_name : {\"numeric_col\", \"non_numeric_col\"}\n        The tested function takes a key column name to which the pivot values belong.\n        The function may behave differently depending on the type of that column.\n    ascending : {True, False}\n        The split parts are returned either in ascending or descending order.\n        This parameter helps us to test both of the cases.\n    num_pivots : {3, 2, 1}\n        The function's behavior may depend on the number of boundary values being passed.\n    all_pivots_are_unique : {True, False}\n        Duplicate pivot values cause empty partitions to be produced. This parameter helps\n        to verify that the function still behaves correctly in such cases.\n    \"\"\"\n    random_state = np.random.RandomState(42)\n\n    df = pandas.DataFrame(\n        {\n            \"numeric_col\": range(9),\n            \"non_numeric_col\": list(\"abcdefghi\"),\n        }\n    )\n    min_val, max_val = df[col_name].iloc[0], df[col_name].iloc[-1]\n\n    # Selecting random boundary values for the key column\n    pivots = random_state.choice(df[col_name], num_pivots, replace=False)\n    if not all_pivots_are_unique:\n        # Making the 'pivots' contain only duplicate values\n        pivots = np.repeat(pivots[0], num_pivots)\n    # The tested function assumes that we pass pivots in the ascending order\n    pivots = np.sort(pivots)\n\n    # Randomly reordering rows in the dataframe\n    df = df.reindex(random_state.permutation(df.index))\n    bins = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(\n        df,\n        [\n            ColumnInfo(\n                name=col_name,\n                is_numeric=pandas.api.types.is_numeric_dtype(df.dtypes[col_name]),\n                pivots=pivots,\n            )\n        ],\n        ascending=ascending,\n    )\n\n    # Building reference bounds to make the result verification simpler\n    bounds = np.concatenate([[min_val], pivots, [max_val]])\n    if not ascending:\n        # If the order is descending we want bounds to be in the descending order as well:\n        # Ex: bounds = [0, 2, 5, 10] for ascending and [10, 5, 2, 0] for descending.\n        bounds = bounds[::-1]\n\n    for idx, part in enumerate(bins):\n        if ascending:\n            # Check that each part is in the range of 'bound[i] <= part <= bound[i + 1]'\n            # Example, if the `pivots` were [2, 5] and the min/max values for the colum are min=0, max=10\n            # Then each part satisfies: 0 <= part[0] <= 2; 2 <= part[1] <= 5; 5 <= part[2] <= 10\n            assert (\n                (bounds[idx] <= part[col_name]) & (part[col_name] <= bounds[idx + 1])\n            ).all()\n        else:\n            # Check that each part is in the range of 'bound[i + 1] <= part <= bound[i]'\n            # Example, if the `pivots` were [2, 5] and the min/max values for the colum are min=0, max=10\n            # Then each part satisfies: 5 <= part[0] <= 10; 2 <= part[1] <= 5; 0 <= part[2] <= 2\n            assert (\n                (bounds[idx + 1] <= part[col_name]) & (part[col_name] <= bounds[idx])\n            ).all()\n\n\n@pytest.mark.parametrize(\"col_name\", [\"numeric_col\", \"non_numeric_col\"])\n@pytest.mark.parametrize(\"ascending\", [True, False])\ndef test_split_partitions_with_empty_pivots(col_name, ascending):\n    \"\"\"\n    This test verifies that the splitting function performs correctly when an empty pivots list is passed.\n    The expected behavior is to return a single split consisting of the exact copy of the input dataframe.\n    \"\"\"\n    df = pandas.DataFrame(\n        {\n            \"numeric_col\": range(9),\n            \"non_numeric_col\": list(\"abcdefghi\"),\n        }\n    )\n\n    result = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(\n        df,\n        [\n            ColumnInfo(\n                name=col_name,\n                is_numeric=pandas.api.types.is_numeric_dtype(df.dtypes[col_name]),\n                pivots=[],\n            )\n        ],\n        ascending=ascending,\n    )\n    # We're expecting to recieve a single split here\n    assert isinstance(result, tuple)\n    assert len(result) == 1\n    assert result[0].equals(df)\n\n\n@pytest.mark.parametrize(\"ascending\", [True, False])\ndef test_shuffle_partitions_with_empty_pivots(ascending):\n    \"\"\"\n    This test verifies that the `PartitionMgr.shuffle_partitions` method can handle empty pivots list.\n    \"\"\"\n    modin_frame = pd.DataFrame(\n        np.array([[\"hello\", \"goodbye\"], [\"hello\", \"Hello\"]])\n    )._query_compiler._modin_frame\n\n    assert modin_frame._partitions.shape == (1, 1)\n\n    column_name = modin_frame.columns[1]\n\n    shuffle_functions = ShuffleSortFunctions(\n        # These are the parameters we pass in the `.sort_by()` implementation\n        modin_frame,\n        columns=column_name,\n        ascending=ascending,\n        ideal_num_new_partitions=1,\n    )\n\n    new_partitions = modin_frame._partition_mgr_cls.shuffle_partitions(\n        modin_frame._partitions,\n        index=0,\n        shuffle_functions=shuffle_functions,\n        final_shuffle_func=lambda df: df.sort_values(column_name),\n    )\n    ref = modin_frame.to_pandas().sort_values(column_name)\n    res = new_partitions[0, 0].get()\n\n    assert new_partitions.shape == (1, 1)\n    assert ref.equals(res)\n\n\n@pytest.mark.parametrize(\"ascending\", [True, False])\ndef test_split_partition_preserve_names(ascending):\n    \"\"\"\n    This test verifies that the dataframes being split by ``split_partitions_using_pivots_for_sort``\n    preserve their index/column names.\n    \"\"\"\n    df = pandas.DataFrame(\n        {\n            \"numeric_col\": range(9),\n            \"non_numeric_col\": list(\"abcdefghi\"),\n        }\n    )\n    index_name = \"custom_name\"\n    df.index.name = index_name\n    df.columns.name = index_name\n\n    # Pivots that contain empty bins\n    pivots = [2, 2, 5, 7]\n    splits = ShuffleSortFunctions.split_partitions_using_pivots_for_sort(\n        df,\n        [ColumnInfo(name=\"numeric_col\", is_numeric=True, pivots=pivots)],\n        ascending=ascending,\n    )\n\n    for part in splits:\n        assert part.index.name == index_name\n        assert part.columns.name == index_name\n\n\n@pytest.mark.parametrize(\"has_cols_metadata\", [True, False])\n@pytest.mark.parametrize(\"has_dtypes_metadata\", [True, False])\ndef test_merge_preserves_metadata(has_cols_metadata, has_dtypes_metadata):\n    df1 = pd.DataFrame({\"a\": [1, 1, 2, 2], \"b\": list(\"abcd\")})\n    df2 = pd.DataFrame({\"a\": [4, 2, 1, 3], \"b\": list(\"bcaf\"), \"c\": [3, 2, 1, 0]})\n\n    modin_frame = df1._query_compiler._modin_frame\n\n    if has_cols_metadata:\n        # Verify that there were initially materialized metadata\n        assert modin_frame.has_materialized_columns\n    else:\n        modin_frame._columns_cache = None\n\n    if has_dtypes_metadata:\n        # Verify that there were initially materialized metadata\n        assert modin_frame.has_materialized_dtypes\n    else:\n        modin_frame.set_dtypes_cache(None)\n\n    res = df1.merge(df2, on=\"b\")._query_compiler._modin_frame\n\n    if has_cols_metadata:\n        assert res.has_materialized_columns\n        if has_dtypes_metadata:\n            assert res.has_materialized_dtypes\n        else:\n            # Verify that no materialization was triggered\n            assert not res.has_materialized_dtypes\n            assert not modin_frame.has_materialized_dtypes\n    else:\n        # Verify that no materialization was triggered\n        assert not res.has_materialized_columns\n        assert not res.has_materialized_dtypes\n        assert not modin_frame.has_materialized_columns\n        if not has_dtypes_metadata:\n            assert not modin_frame.has_materialized_dtypes\n\n\ndef test_binary_op_preserve_dtypes():\n    df = pd.DataFrame({\"a\": [1, 2, 3], \"b\": [4.0, 5.0, 6.0]})\n\n    def setup_cache(df, has_cache=True):\n        if has_cache:\n            _ = df.dtypes\n            assert df._query_compiler.frame_has_materialized_dtypes\n        else:\n            df._query_compiler.set_frame_dtypes_cache(None)\n            assert not df._query_compiler.frame_has_materialized_dtypes\n        return df\n\n    def assert_cache(df, has_cache=True):\n        assert not (has_cache ^ df._query_compiler.frame_has_materialized_dtypes)\n\n    # Check when `other` is a non-distributed object\n    assert_cache(setup_cache(df) + 2.0)\n    assert_cache(setup_cache(df) + {\"a\": 2.0, \"b\": 4})\n    assert_cache(setup_cache(df) + [2.0, 4])\n    assert_cache(setup_cache(df) + np.array([2.0, 4]))\n\n    # Check when `other` is a dataframe\n    other = pd.DataFrame({\"b\": [3, 4, 5], \"c\": [4.0, 5.0, 6.0]})\n    assert_cache(setup_cache(df) + setup_cache(other, has_cache=True))\n    assert_cache(setup_cache(df) + setup_cache(other, has_cache=False), has_cache=False)\n\n    # Check when `other` is a series\n    other = pd.Series({\"b\": 3.0, \"c\": 4.0})\n    assert_cache(setup_cache(df) + setup_cache(other, has_cache=True))\n    assert_cache(setup_cache(df) + setup_cache(other, has_cache=False), has_cache=False)\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_concat_dont_materialize_opposite_axis(axis):\n    data = {\"a\": [1, 2, 3], \"b\": [4.0, 5.0, 6.0]}\n    df1, df2 = pd.DataFrame(data), pd.DataFrame(data)\n\n    def assert_no_cache(df, axis):\n        if axis:\n            assert not df._query_compiler.frame_has_materialized_columns\n        else:\n            assert not df._query_compiler.frame_has_materialized_index\n\n    def remove_cache(df, axis):\n        if axis:\n            df._query_compiler.set_frame_columns_cache(None)\n        else:\n            df._query_compiler.set_frame_index_cache(None)\n        assert_no_cache(df, axis)\n        return df\n\n    df1, df2 = remove_cache(df1, axis), remove_cache(df2, axis)\n\n    df_concated = pd.concat((df1, df2), axis=axis)\n    assert_no_cache(df1, axis)\n    assert_no_cache(df2, axis)\n    assert_no_cache(df_concated, axis)\n\n\ndef test_setitem_bool_preserve_dtypes():\n    df = pd.DataFrame({\"a\": [1, 1, 2, 2], \"b\": [3, 4, 5, 6]})\n    indexer = pd.Series([True, False, True, False])\n\n    assert df._query_compiler.frame_has_materialized_dtypes\n\n    # slice(None) as a col_loc\n    df.loc[indexer] = 2.0\n    assert df._query_compiler.frame_has_materialized_dtypes\n\n    # list as a col_loc\n    df.loc[indexer, [\"a\", \"b\"]] = 2.0\n    assert df._query_compiler.frame_has_materialized_dtypes\n\n    # scalar as a col_loc\n    df.loc[indexer, \"a\"] = 2.0\n    assert df._query_compiler.frame_has_materialized_dtypes\n\n\ndef test_setitem_unhashable_preserve_dtypes():\n    df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])\n    assert df._query_compiler.frame_has_materialized_dtypes\n\n    df2 = pd.DataFrame([[9, 9], [5, 5]])\n    assert df2._query_compiler.frame_has_materialized_dtypes\n\n    df[[1, 2]] = df2\n    assert df._query_compiler.frame_has_materialized_dtypes\n\n\n@pytest.mark.parametrize(\"modify_config\", [{RangePartitioning: True}], indirect=True)\ndef test_groupby_size_shuffling(modify_config):\n    # verifies that 'groupby.size()' works with reshuffling implementation\n    # https://github.com/modin-project/modin/issues/6367\n    df = pd.DataFrame({\"a\": [1, 1, 2, 2], \"b\": [3, 4, 5, 6]})\n    modin_frame = df._query_compiler._modin_frame\n\n    with mock.patch.object(\n        modin_frame,\n        \"_apply_func_to_range_partitioning\",\n        wraps=modin_frame._apply_func_to_range_partitioning,\n    ) as shuffling_method:\n        try_cast_to_pandas(df.groupby(\"a\").size())\n\n    shuffling_method.assert_called()\n\n\n@pytest.mark.parametrize(\n    \"kwargs\",\n    [dict(axis=0, labels=[]), dict(axis=1, labels=[\"a\"]), dict(axis=1, labels=[])],\n)\ndef test_reindex_preserve_dtypes(kwargs):\n    df = pd.DataFrame({\"a\": [1, 1, 2, 2], \"b\": [3, 4, 5, 6]})\n\n    reindexed_df = df.reindex(**kwargs)\n    assert reindexed_df._query_compiler.frame_has_materialized_dtypes\n\n\nclass TestModinIndexIds:\n    @staticmethod\n    def _patch_get_index(df, axis=0):\n        \"\"\"Patch the ``.index``/``.columns`` attribute of the passed dataframe.\"\"\"\n        if axis == 0:\n            return mock.patch.object(\n                type(df),\n                \"index\",\n                new_callable=mock.PropertyMock,\n                wraps=functools.partial(type(df).index.__get__, df),\n            )\n        else:\n            return mock.patch.object(\n                type(df),\n                \"columns\",\n                new_callable=mock.PropertyMock,\n                wraps=functools.partial(type(df).columns.__get__, df),\n            )\n\n    def test_setitem_without_copartition(self):\n        \"\"\"Test that setitem for identical indices works without materializing the axis.\"\"\"\n        # simple insertion\n        df = pd.DataFrame({f\"col{i}\": np.arange(256) for i in range(64)})\n        remove_axis_cache(df)\n\n        col = df[\"col0\"]\n        assert_has_no_cache(col)\n        assert_has_no_cache(df)\n\n        # insert the column back and check that no index computation were triggered\n        with self._patch_get_index(df) as get_index_patch:\n            df[\"col0\"] = col\n            # check that no cache computation was triggered\n            assert_has_no_cache(df)\n            assert_has_no_cache(col)\n        get_index_patch.assert_not_called()\n\n        # insertion with few map operations\n        df = pd.DataFrame({f\"col{i}\": np.arange(256) for i in range(64)})\n        remove_axis_cache(df)\n\n        col = df[\"col0\"]\n        # perform some operations that doesn't modify index labels and partitioning\n        col = col * 2 + 10\n        assert_has_no_cache(col)\n        assert_has_no_cache(df)\n\n        # insert the modified column back and check that no index computation were triggered\n        with self._patch_get_index(df) as get_index_patch:\n            df[\"col0\"] = col\n            # check that no cache computation was triggered\n            assert_has_no_cache(df)\n            assert_has_no_cache(col)\n        get_index_patch.assert_not_called()\n\n    @pytest.mark.parametrize(\"axis\", [0, 1])\n    def test_concat_without_copartition(self, axis):\n        \"\"\"Test that concatenation for frames with identical indices works without materializing the axis.\"\"\"\n        df1 = pd.DataFrame({f\"col{i}\": np.arange(256) for i in range(64)})\n        remove_axis_cache(df1, axis)\n\n        # perform some operations that doesn't modify index labels and partitioning\n        df2 = df1.abs().applymap(lambda df: df * 2)\n\n        with self._patch_get_index(df1, axis) as get_index_patch:\n            res = pd.concat([df1, df2], axis=axis ^ 1)\n            # check that no cache computation was triggered\n            assert_has_no_cache(df1, axis)\n            assert_has_no_cache(df2, axis)\n            assert_has_no_cache(res, axis)\n        get_index_patch.assert_not_called()\n\n    def test_index_updates_ref(self):\n        \"\"\"Test that copying the default ModinIndex to a new frame updates frame reference with the new one.\"\"\"\n        df1 = pd.DataFrame({\"a\": [1, 2, 3], \"b\": [4, 5, 6]})\n        remove_axis_cache(df1)\n\n        modin_frame1 = df1._query_compiler._modin_frame\n        # verify that index cache is 'default' and so holds a reference to the `modin_frame`\n        assert modin_frame1._index_cache._is_default_callable\n\n        ref_count_before = sys.getrefcount(modin_frame1)\n\n        df2 = df1 + 1\n        modin_frame2 = df2._query_compiler._modin_frame\n        # verify that new index cache is also the 'default' one\n        assert modin_frame2._index_cache._is_default_callable\n        # verify that there's no new references being created to the old frame\n        assert sys.getrefcount(modin_frame1) == ref_count_before\n\n    def test_index_updates_axis(self):\n        \"\"\"Verify that the ModinIndex `axis` attribute is updated when copied to a new frame but for an opposit axis.\"\"\"\n        df1 = pd.DataFrame({\"a\": [1, 2, 3], \"b\": [4, 5, 6]})\n        remove_axis_cache(df1)\n\n        # now index becomes columns and vice-versa, this means that the 'default callable'\n        # of the ModinIndex now has to update its axis\n        df2 = df1.T\n\n        idx1 = df1._query_compiler._modin_frame._index_cache\n        idx2 = df2._query_compiler._modin_frame._index_cache\n\n        cols1 = df1._query_compiler._modin_frame._columns_cache\n        cols2 = df2._query_compiler._modin_frame._columns_cache\n\n        # check that we can compare df.index == df.T.columns & df.columns == df.T.index\n        # without triggering any axis materialization\n        assert (\n            idx1._index_id == cols2._index_id and idx1._lengths_id == cols2._lengths_id\n        )\n        assert (\n            cols1._index_id == idx2._index_id and cols1._lengths_id == idx2._lengths_id\n        )\n\n        # check that when the materialization is triggered for the transposed frame it produces proper labels\n        assert df2.index.equals(pandas.Index([\"a\", \"b\"]))\n        assert df2.columns.equals(pandas.Index([0, 1, 2]))\n\n    def test_filter_empties_resets_lengths(self):\n        \"\"\"Verify that filtering out empty partitions affects ``ModinIndex._lengths_id`` field.\"\"\"\n        # case1: partitioning is modified by '._filter_empties()', meaning that '._lengths_id' should be changed\n        md_df = construct_modin_df_by_scheme(\n            pandas.DataFrame({\"a\": [1, 1, 2, 2]}),\n            {\"row_lengths\": [2, 2], \"column_widths\": [1]},\n        )\n        mf = md_df.query(\"a < 2\")._query_compiler._modin_frame\n        mf.index  # trigger index materialization\n\n        old_cache = mf._index_cache\n        assert mf._partitions.shape == (2, 1)\n\n        mf._filter_empties()\n        new_cache = mf._index_cache\n\n        assert new_cache._index_id == old_cache._index_id\n        assert new_cache._lengths_id != old_cache._lengths_id\n        assert new_cache._lengths_cache != old_cache._lengths_cache\n\n        # case2: partitioning is NOT modified by '._filter_empties()', meaning that '._lengths_id' should stay the same\n        md_df = construct_modin_df_by_scheme(\n            pandas.DataFrame({\"a\": [1, 1, 2, 2]}),\n            {\"row_lengths\": [2, 2], \"column_widths\": [1]},\n        )\n        mf = md_df._query_compiler._modin_frame\n\n        old_cache = mf._index_cache\n        assert mf._partitions.shape == (2, 1)\n\n        mf._filter_empties()\n        new_cache = mf._index_cache\n\n        assert new_cache._index_id == old_cache._index_id\n        assert new_cache._lengths_id == old_cache._lengths_id\n        assert new_cache._lengths_cache == old_cache._lengths_cache\n\n    def test_binops_without_repartitioning(self):\n        \"\"\"Test that binary operations for identical indices works without materializing the axis.\"\"\"\n        df = pd.DataFrame({f\"col{i}\": np.arange(256) for i in range(64)})\n        remove_axis_cache(df)\n\n        col1 = df[\"col1\"]\n        assert_has_no_cache(col1)\n        assert_has_no_cache(df)\n\n        col2 = df[\"col2\"]\n        assert_has_no_cache(col2)\n        assert_has_no_cache(df)\n\n        # perform a binary op and insert the result back then check that no index computation were triggered\n        with self._patch_get_index(df) as get_index_df:\n            df[\"result\"] = col1 + col2\n            # check that no cache computation was triggered\n            assert_has_no_cache(df)\n            assert_has_no_cache(col1)\n            assert_has_no_cache(col2)\n        get_index_df.assert_not_called()\n\n\ndef test_skip_set_columns():\n    \"\"\"\n    Verifies that the mechanism of skipping the actual ``._set_columns()`` call in case\n    the new columns are identical to the previous ones works properly.\n\n    In this test, we rely on the ``modin_frame._deferred_column`` attribute.\n    The new indices propagation is done lazily, and the ``deferred_column`` attribute\n    indicates whether there's a new indices propagation pending.\n    \"\"\"\n    df = pd.DataFrame({\"col1\": [1, 2, 3], \"col2\": [3, 4, 5]})\n    df.columns = [\"col1\", \"col10\"]\n    # Verifies that the new columns were successfully set in case they're actually new\n    assert df._query_compiler._modin_frame._deferred_column\n    assert np.all(df.columns.values == [\"col1\", \"col10\"])\n\n    df = pd.DataFrame({\"col1\": [1, 2, 3], \"col2\": [3, 4, 5]})\n    df.columns = [\"col1\", \"col2\"]\n    # Verifies that the new columns weren't set if they're equal to the previous ones\n    assert not df._query_compiler._modin_frame._deferred_column\n\n    df = pd.DataFrame({\"col1\": [1, 2, 3], \"col2\": [3, 4, 5]})\n    df.columns = pandas.Index([\"col1\", \"col2\"], name=\"new name\")\n    # Verifies that the new columns were successfully set in case they's new metadata\n    assert df.columns.name == \"new name\"\n\n    df = pd.DataFrame(\n        {(\"a\", \"col1\"): [1, 2, 3], (\"a\", \"col2\"): [3, 4, 5], (\"b\", \"col1\"): [6, 7, 8]}\n    )\n    df.columns = df.columns.copy()\n    # Verifies that the new columns weren't set if they're equal to the previous ones\n    assert not df._query_compiler._modin_frame._deferred_column\n\n    df = pd.DataFrame(\n        {(\"a\", \"col1\"): [1, 2, 3], (\"a\", \"col2\"): [3, 4, 5], (\"b\", \"col1\"): [6, 7, 8]}\n    )\n    new_cols = df.columns[::-1]\n    df.columns = new_cols\n    # Verifies that the new columns were successfully set in case they're actually new\n    assert df._query_compiler._modin_frame._deferred_column\n    assert df.columns.equals(new_cols)\n\n    df = pd.DataFrame({\"col1\": [1, 2, 3], \"col2\": [3, 4, 5]})\n    remove_axis_cache(df, axis=1)\n    df.columns = [\"col1\", \"col2\"]\n    # Verifies that the computation of the old columns wasn't triggered for the sake\n    # of equality comparison, in this case the new columns should be set unconditionally,\n    # meaning that the '_deferred_column' has to be True\n    assert df._query_compiler._modin_frame._deferred_column\n\n\ndef test_query_dispatching():\n    \"\"\"\n    Test whether the logic of determining whether the passed query\n    can be performed row-wise works correctly in ``PandasQueryCompiler.rowwise_query()``.\n\n    The tested method raises a ``NotImpementedError`` if the query cannot be performed row-wise\n    and raises nothing if it can.\n    \"\"\"\n    qc = pd.DataFrame(\n        {\"a\": [1], \"b\": [2], \"c\": [3], \"d\": [4], \"e\": [5]}\n    )._query_compiler\n\n    local_var = 10  # noqa: F841 (unused variable)\n\n    # these queries should be performed row-wise (so no exception)\n    qc.rowwise_query(\"a < 1\")\n    qc.rowwise_query(\"a < b\")\n    qc.rowwise_query(\"a < (b + @local_var) * c > 10\")\n\n    # these queries cannot be performed row-wise (so they must raise an exception)\n    with pytest.raises(NotImplementedError):\n        qc.rowwise_query(\"a < b[0]\")\n    with pytest.raises(NotImplementedError):\n        qc.rowwise_query(\"a < b.min()\")\n    with pytest.raises(NotImplementedError):\n        qc.rowwise_query(\"a < (b + @local_var + (b - e.min())) * c > 10\")\n    with pytest.raises(NotImplementedError):\n        qc.rowwise_query(\"a < b.size\")\n\n\ndef test_sort_values_cache():\n    \"\"\"\n    Test that the column widths cache after ``.sort_values()`` is valid:\n    https://github.com/modin-project/modin/issues/6607\n    \"\"\"\n    # 1 row partition and 2 column partitions, in this case '.sort_values()' will use\n    # row-wise implementation and so the column widths WILL NOT be changed\n    modin_df = construct_modin_df_by_scheme(\n        pandas.DataFrame({f\"col{i}\": range(100) for i in range(64)}),\n        partitioning_scheme={\"row_lengths\": [100], \"column_widths\": [32, 32]},\n    )\n    mf_initial = modin_df._query_compiler._modin_frame\n\n    mf_res = modin_df.sort_values(\"col0\")._query_compiler._modin_frame\n    # check that row-wise implementation was indeed used (col widths were not changed)\n    assert mf_res._column_widths_cache == [32, 32]\n    # check that the cache and actual col widths match\n    validate_partitions_cache(mf_res, axis=1)\n    # check that the initial frame's cache wasn't changed\n    assert mf_initial._column_widths_cache == [32, 32]\n    validate_partitions_cache(mf_initial, axis=1)\n\n    # 2 row partition and 2 column partitions, in this case '.sort_values()' will use\n    # range-partitioning implementation and so the column widths WILL be changed\n    modin_df = construct_modin_df_by_scheme(\n        pandas.DataFrame({f\"col{i}\": range(100) for i in range(64)}),\n        partitioning_scheme={\"row_lengths\": [50, 50], \"column_widths\": [32, 32]},\n    )\n    mf_initial = modin_df._query_compiler._modin_frame\n\n    mf_res = modin_df.sort_values(\"col0\")._query_compiler._modin_frame\n    # check that range-partitioning implementation was indeed used (col widths were changed)\n    assert mf_res._column_widths_cache == [64]\n    # check that the cache and actual col widths match\n    validate_partitions_cache(mf_res, axis=1)\n    # check that the initial frame's cache wasn't changed\n    assert mf_initial._column_widths_cache == [32, 32]\n    validate_partitions_cache(mf_initial, axis=1)\n\n\ndef test_apply_full_axis_preserve_widths():\n    md_df = construct_modin_df_by_scheme(\n        pandas.DataFrame(\n            {\"a\": [1, 2, 3, 4], \"b\": [3, 4, 5, 6], \"c\": [6, 7, 8, 9], \"d\": [0, 1, 2, 3]}\n        ),\n        {\"row_lengths\": [2, 2], \"column_widths\": [2, 2]},\n    )._query_compiler._modin_frame\n\n    assert md_df._row_lengths_cache == [2, 2]\n    assert md_df._column_widths_cache == [2, 2]\n\n    def func(df):\n        if df.iloc[0, 0] == 1:\n            return pandas.DataFrame(\n                {\"a\": [1, 2, 3], \"b\": [3, 4, 5], \"c\": [6, 7, 8], \"d\": [0, 1, 2]}\n            )\n        else:\n            return pandas.DataFrame({\"a\": [4], \"b\": [6], \"c\": [9], \"d\": [3]})\n\n    res = md_df.apply_full_axis(\n        func=func,\n        axis=1,\n        new_index=[0, 1, 2, 3],\n        new_columns=[\"a\", \"b\", \"c\", \"d\"],\n        keep_partitioning=True,\n    )\n    col_widths_cache = res._column_widths_cache\n    actual_column_widths = [part.width() for part in res._partitions[0]]\n\n    assert col_widths_cache == actual_column_widths\n    assert res._row_lengths_cache is None\n\n\ndef test_apply_full_axis_preserve_lengths():\n    md_df = construct_modin_df_by_scheme(\n        pandas.DataFrame(\n            {\"a\": [1, 2, 3, 4], \"b\": [3, 4, 5, 6], \"c\": [6, 7, 8, 9], \"d\": [0, 1, 2, 3]}\n        ),\n        {\"row_lengths\": [2, 2], \"column_widths\": [2, 2]},\n    )._query_compiler._modin_frame\n\n    assert md_df._row_lengths_cache == [2, 2]\n    assert md_df._column_widths_cache == [2, 2]\n\n    def func(df):\n        if df.iloc[0, 0] == 1:\n            return pandas.DataFrame({\"a\": [3, 2, 3, 4], \"b\": [3, 4, 5, 6]})\n        else:\n            return pandas.DataFrame({\"c\": [9, 5, 6, 7]})\n\n    res = md_df.apply_full_axis(\n        func=func,\n        axis=0,\n        new_index=[0, 1, 2, 3],\n        new_columns=[\"a\", \"b\", \"c\"],\n        keep_partitioning=True,\n    )\n\n    row_lengths_cache = res._row_lengths_cache\n    actual_row_lengths = [part.length() for part in res._partitions[:, 0]]\n\n    assert row_lengths_cache == actual_row_lengths\n    assert res._column_widths_cache is None\n\n\nclass DummyFuture:\n    \"\"\"\n    A dummy object emulating future's behaviour, this class is used in ``test_call_queue_serialization``.\n\n    It stores a random numeric value representing its data and `was_materialized` state.\n    Initially this object is considered to be serialized, the state can be changed by calling\n    the ``.materialize()`` method.\n    \"\"\"\n\n    def __init__(self):\n        self._value = np.random.randint(0, 1_000_000)\n        self._was_materialized = False\n\n    def materialize(self):\n        self._was_materialized = True\n        return self\n\n    def __eq__(self, other):\n        if isinstance(other, type(self)) and self._value == other._value:\n            return True\n        return False\n\n\nclass TestModinDtypes:\n    \"\"\"Test ``ModinDtypes`` and ``DtypesDescriptor`` classes.\"\"\"\n\n    schema = pandas.Series(\n        {\n            \"a\": np.dtype(\"int64\"),\n            \"b\": np.dtype(float),\n            \"c\": np.dtype(bool),\n            \"d\": np.dtype(bool),\n            \"e\": np.dtype(\"object\"),\n        }\n    )\n\n    def get_columns_order(self, cols):\n        \"\"\"Return a value to be passed as ``DtypesDescriptor(columns_order=...)`` parameter.\"\"\"\n        return {i: col for i, col in enumerate(cols)}\n\n    class DummyDf:\n        def __init__(self, schema):\n            self._schema = pandas.Series(schema)\n            # record calls to verify that we haven't materialized more than needed\n            self.history = []\n\n        def _compute_dtypes(self, subset=None):\n            self.history.append((\"_compute_dtypes\", subset))\n            return self._schema if subset is None else self._schema[subset]\n\n        @property\n        def columns(self):\n            self.history.append((\"columns\",))\n            return self._schema.index\n\n        @property\n        def has_materialized_columns(self):\n            # False, to make descriptor avoid materialization at all cost\n            return False\n\n    def test_get_dtypes_set_modin_dtypes(self):\n        \"\"\"Test that ``ModinDtypes.get_dtypes_set()`` correctly propagates this request to the underlying value.\"\"\"\n        res = ModinDtypes(lambda: self.schema).get_dtypes_set()\n        exp = set(self.schema.values)\n        assert res == exp\n\n        res = ModinDtypes(self.schema).get_dtypes_set()\n        exp = set(self.schema.values)\n        assert res == exp\n\n        res = ModinDtypes(\n            DtypesDescriptor(\n                self.schema[[\"a\", \"b\", \"e\"]], remaining_dtype=np.dtype(bool)\n            )\n        ).get_dtypes_set()\n        exp = set(self.schema.values)\n        assert res == exp\n\n    def test_get_dtypes_set_desc(self):\n        \"\"\"\n        Test that ``DtypesDescriptor.get_dtypes_set()`` returns valid values and doesn't\n        trigger unnecessary computations.\n        \"\"\"\n        df = self.DummyDf(self.schema)\n        desc = DtypesDescriptor(\n            self.schema[[\"a\", \"b\"]], know_all_names=False, parent_df=df\n        )\n        res = desc.get_dtypes_set()\n        exp = self.schema.values\n        assert res == set(exp)\n        # since 'know_all_names=False', we first have to retrieve columns\n        # in order to determine missing dtypes and then call '._compute_dtypes()'\n        # only on a subset\n        assert len(df.history) == 2 and df.history == [\n            (\"columns\",),\n            (\"_compute_dtypes\", [\"c\", \"d\", \"e\"]),\n        ]\n\n        df = self.DummyDf(self.schema)\n        desc = DtypesDescriptor(\n            self.schema[[\"a\", \"b\"]],\n            cols_with_unknown_dtypes=[\"c\", \"d\", \"e\"],\n            parent_df=df,\n        )\n        res = desc.get_dtypes_set()\n        exp = self.schema.values\n        assert res == set(exp)\n        # here we already know names for cols with unknown dtypes, so only\n        # calling '._compute_dtypes()' on a subset\n        assert len(df.history) == 1 and df.history[0] == (\n            \"_compute_dtypes\",\n            [\"c\", \"d\", \"e\"],\n        )\n\n        df = self.DummyDf(self.schema[[\"a\", \"b\", \"c\", \"d\"]])\n        desc = DtypesDescriptor(\n            self.schema[[\"a\", \"b\"]], remaining_dtype=np.dtype(bool), parent_df=df\n        )\n        res = desc.get_dtypes_set()\n        exp = self.schema[[\"a\", \"b\", \"c\", \"d\"]].values\n        assert res == set(exp)\n        # we don't need to access 'parent_df' in order to get dtypes set, as we\n        # can infer it from 'known_dtypes' and 'remaining_dtype'\n        assert len(df.history) == 0\n\n        df = self.DummyDf(self.schema)\n        desc = DtypesDescriptor(know_all_names=False, parent_df=df)\n        res = desc.get_dtypes_set()\n        exp = self.schema.values\n        assert res == set(exp)\n        # compute dtypes for all columns\n        assert len(df.history) == 1 and df.history[0] == (\"_compute_dtypes\", None)\n\n        df = self.DummyDf(self.schema)\n        desc = DtypesDescriptor(\n            cols_with_unknown_dtypes=self.schema.index.tolist(), parent_df=df\n        )\n        res = desc.get_dtypes_set()\n        exp = self.schema.values\n        assert res == set(exp)\n        # compute dtypes for all columns\n        assert len(df.history) == 1 and df.history[0] == (\n            \"_compute_dtypes\",\n            self.schema.index.tolist(),\n        )\n\n        df = self.DummyDf(self.schema)\n        desc = DtypesDescriptor(\n            cols_with_unknown_dtypes=[\"a\", \"b\", \"e\"],\n            remaining_dtype=np.dtype(bool),\n            parent_df=df,\n        )\n        res = desc.get_dtypes_set()\n        exp = self.schema.values\n        assert res == set(exp)\n        # here we already know names for cols with unknown dtypes, so only\n        # calling '._compute_dtypes()' on a subset\n        assert len(df.history) == 1 and df.history[0] == (\n            \"_compute_dtypes\",\n            [\"a\", \"b\", \"e\"],\n        )\n\n    def test_lazy_get_modin_dtypes(self):\n        \"\"\"Test that ``ModinDtypes.lazy_get()`` correctly propagates this request to the underlying value.\"\"\"\n        res = ModinDtypes(self.schema).lazy_get([\"b\", \"c\", \"a\"])\n        exp = self.schema[[\"b\", \"c\", \"a\"]]\n        assert res._value.equals(exp)\n\n        res = ModinDtypes(lambda: self.schema).lazy_get([\"b\", \"c\", \"a\"])\n        exp = self.schema[[\"b\", \"c\", \"a\"]]\n        assert callable(res._value)\n        assert res._value().equals(exp)\n\n        res = ModinDtypes(\n            DtypesDescriptor(\n                self.schema[[\"a\", \"b\"]], cols_with_unknown_dtypes=[\"c\", \"d\", \"e\"]\n            )\n        ).lazy_get([\"b\", \"c\", \"a\"])\n        exp = DtypesDescriptor(\n            self.schema[[\"a\", \"b\"]],\n            cols_with_unknown_dtypes=[\"c\"],\n            columns_order={0: \"b\", 1: \"c\", 2: \"a\"},\n        )\n        assert res._value.equals(exp)\n\n    def test_lazy_get_desc(self):\n        \"\"\"\n        Test that ``DtypesDescriptor.lazy_get()`` work properly.\n\n        In this test we never specify `parent_df` for a descriptor, verifying that\n        ``.lazy_get()`` never triggers any computations.\n        \"\"\"\n        desc = DtypesDescriptor(self.schema[[\"a\", \"b\"]])\n        subset = [\"a\", \"c\", \"e\"]\n        res = desc.lazy_get(subset)\n        exp = DtypesDescriptor(\n            self.schema[subset[:1]],\n            cols_with_unknown_dtypes=subset[1:],\n            columns_order=self.get_columns_order(subset),\n        )\n        assert res.equals(exp)\n\n        desc = DtypesDescriptor(self.schema[[\"a\", \"b\"]], remaining_dtype=np.dtype(bool))\n        subset = [\"a\", \"c\", \"d\"]\n        res = desc.lazy_get(subset)\n        exp = DtypesDescriptor(\n            # dtypes for 'c' and 'b' were infered from 'remaining_dtype' parameter\n            self.schema[subset],\n            columns_order=self.get_columns_order(subset),\n            _schema_is_known=True,\n        )\n        assert res.equals(exp)\n\n        desc = DtypesDescriptor()\n        subset = [\"a\", \"c\", \"d\"]\n        res = desc.lazy_get(subset)\n        exp = DtypesDescriptor(\n            cols_with_unknown_dtypes=subset,\n            columns_order=self.get_columns_order(subset),\n        )\n        assert res.equals(exp)\n\n        desc = DtypesDescriptor(remaining_dtype=np.dtype(bool))\n        subset = [\"c\", \"d\"]\n        res = desc.lazy_get(subset)\n        exp = DtypesDescriptor(\n            # dtypes for 'c' and 'd' were infered from 'remaining_dtype' parameter\n            self.schema[subset],\n            columns_order=self.get_columns_order(subset),\n            _schema_is_known=True,\n        )\n        assert res.equals(exp)\n\n    def test_concat_axis_0(self):\n        res = DtypesDescriptor.concat(\n            [\n                DtypesDescriptor(self.schema[[\"a\", \"b\"]]),\n                DtypesDescriptor(self.schema[[\"c\", \"d\"]]),\n            ]\n        )\n        # simply concat known schemas\n        exp = DtypesDescriptor(self.schema[[\"a\", \"b\", \"c\", \"d\"]])\n        assert res.equals(exp)\n\n        res = DtypesDescriptor.concat(\n            [\n                DtypesDescriptor(self.schema[[\"a\", \"b\"]]),\n                DtypesDescriptor(remaining_dtype=np.dtype(bool)),\n            ]\n        )\n        # none of the descriptors had missing column names, so we can preserve 'remaining_dtype'\n        exp = DtypesDescriptor(self.schema[[\"a\", \"b\"]], remaining_dtype=np.dtype(bool))\n        assert res.equals(exp)\n\n        res = DtypesDescriptor.concat(\n            [\n                DtypesDescriptor(self.schema[[\"a\", \"b\"]], know_all_names=False),\n                DtypesDescriptor(remaining_dtype=np.dtype(bool)),\n            ]\n        )\n        # can't preserve 'remaining_dtype' since first descriptor has unknown column names\n        exp = DtypesDescriptor(self.schema[[\"a\", \"b\"]], know_all_names=False)\n        assert res.equals(exp)\n\n        res = DtypesDescriptor.concat(\n            [\n                DtypesDescriptor(self.schema[[\"a\", \"b\"]]),\n                DtypesDescriptor(\n                    cols_with_unknown_dtypes=[\"d\", \"e\"], know_all_names=False\n                ),\n                DtypesDescriptor(remaining_dtype=np.dtype(bool)),\n            ]\n        )\n        # can't preserve 'remaining_dtype' since second descriptor has unknown column names\n        exp = DtypesDescriptor(\n            self.schema[[\"a\", \"b\"]],\n            cols_with_unknown_dtypes=[\"d\", \"e\"],\n            know_all_names=False,\n        )\n        assert res.equals(exp)\n\n        res = DtypesDescriptor.concat(\n            [\n                DtypesDescriptor(\n                    self.schema[[\"a\", \"b\"]],\n                ),\n                DtypesDescriptor(\n                    cols_with_unknown_dtypes=[\"d\", \"e\"],\n                ),\n                DtypesDescriptor(remaining_dtype=np.dtype(bool)),\n            ]\n        )\n        # none of the descriptors had missing column names, so we can preserve 'remaining_dtype'\n        exp = DtypesDescriptor(\n            self.schema[[\"a\", \"b\"]],\n            cols_with_unknown_dtypes=[\"d\", \"e\"],\n            remaining_dtype=np.dtype(bool),\n        )\n        assert res.equals(exp)\n\n        res = DtypesDescriptor.concat(\n            [\n                DtypesDescriptor(\n                    self.schema[[\"a\", \"b\"]], remaining_dtype=np.dtype(bool)\n                ),\n                DtypesDescriptor(\n                    cols_with_unknown_dtypes=[\"d\", \"e\"], remaining_dtype=np.dtype(float)\n                ),\n                DtypesDescriptor(remaining_dtype=np.dtype(bool)),\n            ]\n        )\n        # remaining dtypes don't match, so we drop them and set 'know_all_names=False'\n        exp = DtypesDescriptor(\n            self.schema[[\"a\", \"b\"]],\n            cols_with_unknown_dtypes=[\"d\", \"e\"],\n            know_all_names=False,\n        )\n        assert res.equals(exp)\n\n    @pytest.mark.parametrize(\n        \"initial_dtypes, result_cols_with_known_dtypes, result_cols_with_unknown_dtypes\",\n        [\n            [\n                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype):\n                # dtypes for all columns are known\n                [\n                    ([\"a\", \"b\", \"c\", \"d\"], [], None),\n                    ([\"a\", \"b\", \"e\", \"d\"], [], None),\n                    ([\"a\", \"b\"], [], None),\n                ],\n                # result_cols_with_known_dtypes:\n                # all dtypes were known in the beginning, expecting the same\n                # for the result\n                [\"a\", \"b\", \"c\", \"d\", \"e\"],\n                # result_cols_with_unknown_dtypes\n                [],\n            ],\n            [\n                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype)\n                [\n                    ([\"a\", \"b\"], [\"c\", \"d\"], None),\n                    ([\"a\", \"b\", \"d\"], [\"e\"], None),\n                    ([\"a\", \"b\"], [], None),\n                ],\n                # result_cols_with_known_dtypes:\n                # across all dataframes, dtypes were only known for 'a' and 'b' columns\n                [\"a\", \"b\"],\n                # result_cols_with_unknown_dtypes\n                [\"c\", \"d\", \"e\"],\n            ],\n            [\n                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype):\n                # the 'e' column in the second frame is missing here, emulating 'know_all_names=False' case\n                [\n                    ([\"a\", \"b\"], [\"c\", \"d\"], None),\n                    ([\"a\", \"b\", \"d\"], [], None),\n                    ([\"a\", \"b\"], [], None),\n                ],\n                # result_cols_with_known_dtypes\n                [\"a\", \"b\"],\n                # result_cols_with_unknown_dtypes:\n                # the missing 'e' column will be deducted from the resulted frame after '.concat()'\n                [\"c\", \"d\", \"e\"],\n            ],\n            [\n                # initial dtypes (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype)\n                # the 'c' column in the first frame is described using 'remaining_dtype'\n                [\n                    ([\"a\", \"b\", \"d\"], [], np.dtype(bool)),\n                    ([\"a\", \"b\", \"e\", \"d\"], [], None),\n                    ([\"a\", \"b\"], [], None),\n                ],\n                # result_cols_with_known_dtypes:\n                # remaining dtypes are not supported by 'concat(axis=0)', so dtype for the 'c'\n                # column is missing here\n                [\"a\", \"b\", \"e\", \"d\"],\n                # result_cols_with_unknown_dtypes:\n                [\"c\"],\n            ],\n        ],\n    )\n    def test_concat_axis_1(\n        self,\n        initial_dtypes,\n        result_cols_with_known_dtypes,\n        result_cols_with_unknown_dtypes,\n    ):\n        \"\"\"\n        Test that ``DtypesDescriptor.concat(axis=1)`` works as expected.\n\n        Parameters\n        ----------\n        initial_dtypes : list of tuples: (cols_with_known_dtypes, cols_with_unknown_dtypes, remaining_dtype)\n            Describe how to build ``DtypesDescriptor`` for each of the three dataframes.\n        result_cols_with_known_dtypes : list of labels\n            Column names for which dtypes has to be determined after ``.concat()``.\n        result_cols_with_unknown_dtypes : list of labels\n            Column names for which dtypes has to be unknown after ``.concat()``.\n        \"\"\"\n        md_df1, pd_df1 = create_test_dfs(\n            {\n                \"a\": [1, 2, 3],\n                \"b\": [3.5, 4.5, 5.5],\n                \"c\": [True, False, True],\n                \"d\": [\"a\", \"b\", \"c\"],\n            }\n        )\n        md_df2, pd_df2 = create_test_dfs(\n            {\n                \"a\": [1.5, 2.5, 3.5],\n                \"b\": [3.5, 4.5, 5.5],\n                \"e\": [True, False, True],\n                \"d\": [\"a\", \"b\", \"c\"],\n            }\n        )\n        md_df3, pd_df3 = create_test_dfs({\"a\": [1, 2, 3], \"b\": [3.5, 4.5, 5.5]})\n\n        for md_df, (known_cols, unknown_cols, remaining_dtype) in zip(\n            [md_df1, md_df2, md_df3], initial_dtypes\n        ):\n            known_dtypes = {col: md_df.dtypes[col] for col in known_cols}\n            know_all_names = (\n                len(known_cols) + len(unknown_cols) == len(md_df.columns)\n                or remaining_dtype is not None\n            )\n            # setting columns cache to 'None', in order to prevent completing 'dtypes' with the materialized columns\n            md_df._query_compiler.set_frame_columns_cache(None)\n            md_df._query_compiler.set_frame_dtypes_cache(\n                ModinDtypes(\n                    DtypesDescriptor(\n                        known_dtypes,\n                        unknown_cols,\n                        remaining_dtype,\n                        know_all_names=know_all_names,\n                    )\n                )\n            )\n        md_dtypes = pd.concat(\n            [md_df1, md_df2, md_df3]\n        )._query_compiler._modin_frame._dtypes\n        pd_dtypes = pandas.concat([pd_df1, pd_df2, pd_df3]).dtypes\n        if len(result_cols_with_known_dtypes) == len(pd_dtypes):\n            md_dtypes = (\n                md_dtypes if isinstance(md_dtypes, pandas.Series) else md_dtypes._value\n            )\n            assert isinstance(md_dtypes, pandas.Series)\n            assert md_dtypes.equals(pd_dtypes)\n        else:\n            assert set(md_dtypes._value._known_dtypes.keys()) == set(\n                result_cols_with_known_dtypes\n            )\n            # reindexing to ensure proper order\n            md_known_dtypes = pandas.Series(md_dtypes._value._known_dtypes).reindex(\n                result_cols_with_known_dtypes\n            )\n            assert md_known_dtypes.equals(pd_dtypes[result_cols_with_known_dtypes])\n            assert set(md_dtypes._value._cols_with_unknown_dtypes) == set(\n                result_cols_with_unknown_dtypes\n            )\n\n    def test_ModinDtypes_duplicated_concat(self):\n        # test that 'ModinDtypes' is able to perform dtypes concatenation on duplicated labels\n        # if all of them are Serieses\n        res = ModinDtypes.concat([pandas.Series([np.dtype(\"int64\")], index=[\"a\"])] * 2)\n        assert isinstance(res._value, pandas.Series)\n        assert res._value.equals(\n            pandas.Series([np.dtype(\"int64\"), np.dtype(\"int64\")], index=[\"a\", \"a\"])\n        )\n\n        # test that 'ModinDtypes.concat' with duplicated labels raises when not all dtypes are materialized\n        with pytest.raises(NotImplementedError):\n            res = ModinDtypes.concat(\n                [\n                    pandas.Series([np.dtype(\"int64\")], index=[\"a\"]),\n                    DtypesDescriptor(cols_with_unknown_dtypes=[\"a\"]),\n                ]\n            )\n\n    def test_update_parent(self):\n        \"\"\"\n        Test that updating parents in ``DtypesDescriptor`` also propagates to stored lazy categoricals.\n        \"\"\"\n        # 'df1' will have a materialized 'pandas.Series' as dtypes cache\n        df1 = pd.DataFrame({\"a\": [1, 1, 2], \"b\": [3, 4, 5]}).astype({\"a\": \"category\"})\n        assert isinstance(df1.dtypes[\"a\"], LazyProxyCategoricalDtype)\n\n        # 'df2' will have a 'DtypesDescriptor' with unknown dtypes for a column 'c'\n        df2 = pd.DataFrame({\"c\": [2, 3, 4]})\n        df2._query_compiler.set_frame_dtypes_cache(None)\n        dtypes_cache = df2._query_compiler._modin_frame._dtypes\n        assert isinstance(\n            dtypes_cache._value, DtypesDescriptor\n        ) and dtypes_cache._value._cols_with_unknown_dtypes == [\"c\"]\n\n        # concatenating 'df1' and 'df2' to get a 'DtypesDescriptor' storing lazy categories\n        # in its 'known_dtypes' field\n        res = pd.concat([df1, df2], axis=1)\n        old_parent = df1._query_compiler._modin_frame\n        new_parent = res._query_compiler._modin_frame\n        dtypes_cache = new_parent._dtypes._value\n\n        # verifying that the reference for lazy categories to a new parent was updated\n        assert dtypes_cache._parent_df is new_parent\n        assert dtypes_cache._known_dtypes[\"a\"]._parent is new_parent\n        assert old_parent._dtypes[\"a\"]._parent is old_parent\n\n    @pytest.mark.parametrize(\n        \"initial_dtypes, result_dtypes\",\n        [\n            [\n                DtypesDescriptor(\n                    {\"a\": np.dtype(\"int64\"), \"b\": np.dtype(float), \"c\": np.dtype(float)}\n                ),\n                DtypesDescriptor(\n                    cols_with_unknown_dtypes=[\"col1\", \"col2\", \"col3\"],\n                    columns_order={0: \"col1\", 1: \"col2\", 2: \"col3\"},\n                ),\n            ],\n            [\n                DtypesDescriptor(\n                    {\n                        \"a\": np.dtype(\"int64\"),\n                        \"b\": np.dtype(float),\n                        \"c\": np.dtype(float),\n                    },\n                    columns_order={0: \"a\", 1: \"b\", 2: \"c\"},\n                ),\n                DtypesDescriptor(\n                    {\n                        \"col1\": np.dtype(\"int64\"),\n                        \"col2\": np.dtype(float),\n                        \"col3\": np.dtype(float),\n                    },\n                    columns_order={0: \"col1\", 1: \"col2\", 2: \"col3\"},\n                ),\n            ],\n            [\n                DtypesDescriptor(\n                    {\"a\": np.dtype(\"int64\"), \"b\": np.dtype(float)},\n                    cols_with_unknown_dtypes=[\"c\"],\n                    columns_order={0: \"a\", 1: \"b\", 2: \"c\"},\n                ),\n                DtypesDescriptor(\n                    {\"col1\": np.dtype(\"int64\"), \"col2\": np.dtype(float)},\n                    cols_with_unknown_dtypes=[\"col3\"],\n                    columns_order={0: \"col1\", 1: \"col2\", 2: \"col3\"},\n                ),\n            ],\n            [\n                DtypesDescriptor(\n                    {\"a\": np.dtype(\"int64\")},\n                    cols_with_unknown_dtypes=[\"c\"],\n                    know_all_names=False,\n                ),\n                DtypesDescriptor(\n                    cols_with_unknown_dtypes=[\"col1\", \"col2\", \"col3\"],\n                    columns_order={0: \"col1\", 1: \"col2\", 2: \"col3\"},\n                ),\n            ],\n            [\n                DtypesDescriptor(\n                    {\"a\": np.dtype(\"int64\")}, remaining_dtype=np.dtype(float)\n                ),\n                DtypesDescriptor(\n                    cols_with_unknown_dtypes=[\"col1\", \"col2\", \"col3\"],\n                    columns_order={0: \"col1\", 1: \"col2\", 2: \"col3\"},\n                ),\n            ],\n            [\n                lambda: pandas.Series(\n                    [np.dtype(\"int64\"), np.dtype(float), np.dtype(float)],\n                    index=[\"a\", \"b\", \"c\"],\n                ),\n                lambda: pandas.Series(\n                    [np.dtype(\"int64\"), np.dtype(float), np.dtype(float)],\n                    index=[\"col1\", \"col2\", \"col3\"],\n                ),\n            ],\n            [\n                pandas.Series(\n                    [np.dtype(\"int64\"), np.dtype(float), np.dtype(float)],\n                    index=[\"a\", \"b\", \"c\"],\n                ),\n                pandas.Series(\n                    [np.dtype(\"int64\"), np.dtype(float), np.dtype(float)],\n                    index=[\"col1\", \"col2\", \"col3\"],\n                ),\n            ],\n        ],\n    )\n    def test_set_index_dataframe(self, initial_dtypes, result_dtypes):\n        \"\"\"Test that changing labels for a dataframe also updates labels of dtypes.\"\"\"\n        df = pd.DataFrame(\n            {\"a\": [1, 2, 3], \"b\": [3.0, 4.0, 5.0], \"c\": [3.2, 4.5, 5.4]}\n        )._query_compiler._modin_frame\n        df.set_columns_cache(None)\n        if isinstance(initial_dtypes, DtypesDescriptor):\n            initial_dtypes = ModinDtypes(initial_dtypes)\n\n        df.set_dtypes_cache(initial_dtypes)\n        df.columns = [\"col1\", \"col2\", \"col3\"]\n\n        if result_dtypes is not None:\n            if callable(result_dtypes):\n                assert callable(df._dtypes._value)\n                assert df._dtypes._value().equals(result_dtypes())\n            else:\n                assert df._dtypes._value.equals(result_dtypes)\n        assert df.dtypes.index.equals(pandas.Index([\"col1\", \"col2\", \"col3\"]))\n\n    def test_set_index_with_dupl_labels(self):\n        \"\"\"Verify that setting duplicated columns doesn't propagate any errors to a user.\"\"\"\n        df = pd.DataFrame({\"a\": [1, 2, 3, 4], \"b\": [3.5, 4.4, 5.5, 6.6]})\n        # making sure that dtypes are represented by an unmaterialized dtypes-descriptor\n        df._query_compiler.set_frame_dtypes_cache(None)\n\n        df.columns = [\"a\", \"a\"]\n        assert df.dtypes.equals(\n            pandas.Series([np.dtype(int), np.dtype(\"float64\")], index=[\"a\", \"a\"])\n        )\n\n    def test_reset_index_mi_columns(self):\n        # reproducer from: https://github.com/modin-project/modin/issues/6904\n        md_df, pd_df = create_test_dfs({\"a\": [1, 1, 2, 2], \"b\": [3, 3, 4, 4]})\n        eval_general(\n            md_df,\n            pd_df,\n            lambda df: df.groupby(\"a\").agg({\"b\": [\"min\", \"std\"]}).reset_index().dtypes,\n        )\n\n    def test_concat_mi(self):\n        \"\"\"\n        Verify that concatenating dfs with non-MultiIndex and MultiIndex columns results into valid indices for lazy dtypes.\n        \"\"\"\n        md_df1, pd_df1 = create_test_dfs({\"a\": [1, 1, 2, 2], \"b\": [3, 3, 4, 4]})\n        md_df2, pd_df2 = create_test_dfs(\n            {(\"l1\", \"v1\"): [1, 1, 2, 2], (\"l1\", \"v2\"): [3, 3, 4, 4]}\n        )\n\n        # Drop actual dtypes in order to use partially-known dtypes\n        md_df1._query_compiler.set_frame_dtypes_cache(None)\n        md_df2._query_compiler.set_frame_dtypes_cache(None)\n\n        md_res = pd.concat([md_df1, md_df2], axis=1)\n        pd_res = pandas.concat([pd_df1, pd_df2], axis=1)\n        df_equals(md_res.dtypes, pd_res.dtypes)\n\n\nclass TestZeroComputationDtypes:\n    \"\"\"\n    Test cases that shouldn't trigger dtypes computation during their execution.\n    \"\"\"\n\n    @pytest.mark.parametrize(\"self_dtype\", [\"materialized\", \"partial\", \"unknown\"])\n    @pytest.mark.parametrize(\n        \"value, value_dtype\",\n        [\n            [3.5, np.dtype(float)],\n            [[3.5, 2.4], np.dtype(float)],\n            [np.array([3.5, 2.4]), np.dtype(float)],\n            [pd.Series([3.5, 2.4]), np.dtype(float)],\n        ],\n    )\n    def test_preserve_dtypes_setitem(self, self_dtype, value, value_dtype):\n        \"\"\"\n        Test that ``df[single_existing_column] = value`` preserves dtypes cache.\n        \"\"\"\n        with mock.patch.object(PandasDataframe, \"_compute_dtypes\") as patch:\n            df = pd.DataFrame({\"a\": [1, 2], \"b\": [3, 4], \"c\": [3, 4]})\n            if self_dtype == \"materialized\":\n                assert df._query_compiler.frame_has_materialized_dtypes\n            elif self_dtype == \"partial\":\n                df._query_compiler.set_frame_dtypes_cache(\n                    ModinDtypes(\n                        DtypesDescriptor(\n                            {\"a\": np.dtype(\"int64\")},\n                            cols_with_unknown_dtypes=[\"b\", \"c\"],\n                        )\n                    )\n                )\n            elif self_dtype == \"unknown\":\n                df._query_compiler.set_frame_dtypes_cache(None)\n            else:\n                raise NotImplementedError(self_dtype)\n\n            df[\"b\"] = value\n\n            if self_dtype == \"materialized\":\n                result_dtype = pandas.Series(\n                    [np.dtype(\"int64\"), value_dtype, np.dtype(\"int64\")],\n                    index=[\"a\", \"b\", \"c\"],\n                )\n                assert df._query_compiler.frame_has_materialized_dtypes\n                assert df.dtypes.equals(result_dtype)\n            elif self_dtype == \"partial\":\n                result_dtype = DtypesDescriptor(\n                    {\"a\": np.dtype(\"int64\"), \"b\": value_dtype},\n                    cols_with_unknown_dtypes=[\"c\"],\n                    columns_order={0: \"a\", 1: \"b\", 2: \"c\"},\n                )\n                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)\n            elif self_dtype == \"unknown\":\n                result_dtype = DtypesDescriptor(\n                    {\"b\": value_dtype},\n                    cols_with_unknown_dtypes=[\"a\", \"b\"],\n                    columns_order={0: \"a\", 1: \"b\", 2: \"c\"},\n                )\n                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)\n            else:\n                raise NotImplementedError(self_dtype)\n\n        patch.assert_not_called()\n\n    @pytest.mark.parametrize(\"self_dtype\", [\"materialized\", \"partial\", \"unknown\"])\n    @pytest.mark.parametrize(\n        \"value, value_dtype\",\n        [\n            [3.5, np.dtype(float)],\n            [[3.5, 2.4], np.dtype(float)],\n            [np.array([3.5, 2.4]), np.dtype(float)],\n            [pd.Series([3.5, 2.4]), np.dtype(float)],\n        ],\n    )\n    def test_preserve_dtypes_insert(self, self_dtype, value, value_dtype):\n        with mock.patch.object(PandasDataframe, \"_compute_dtypes\") as patch:\n            df = pd.DataFrame({\"a\": [1, 2], \"b\": [3, 4]})\n            if self_dtype == \"materialized\":\n                assert df._query_compiler.frame_has_materialized_dtypes\n            elif self_dtype == \"partial\":\n                df._query_compiler.set_frame_dtypes_cache(\n                    ModinDtypes(\n                        DtypesDescriptor(\n                            {\"a\": np.dtype(\"int64\")}, cols_with_unknown_dtypes=[\"b\"]\n                        )\n                    )\n                )\n            elif self_dtype == \"unknown\":\n                df._query_compiler.set_frame_dtypes_cache(None)\n            else:\n                raise NotImplementedError(self_dtype)\n\n            df.insert(loc=0, column=\"c\", value=value)\n\n            if self_dtype == \"materialized\":\n                result_dtype = pandas.Series(\n                    [value_dtype, np.dtype(\"int64\"), np.dtype(\"int64\")],\n                    index=[\"c\", \"a\", \"b\"],\n                )\n                assert df._query_compiler.frame_has_materialized_dtypes\n                assert df.dtypes.equals(result_dtype)\n            elif self_dtype == \"partial\":\n                result_dtype = DtypesDescriptor(\n                    {\"a\": np.dtype(\"int64\"), \"c\": value_dtype},\n                    cols_with_unknown_dtypes=[\"b\"],\n                    columns_order={0: \"c\", 1: \"a\", 2: \"b\"},\n                )\n                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)\n            elif self_dtype == \"unknown\":\n                result_dtype = DtypesDescriptor(\n                    {\"c\": value_dtype},\n                    cols_with_unknown_dtypes=[\"a\", \"b\"],\n                    columns_order={0: \"c\", 1: \"a\", 2: \"b\"},\n                )\n                df._query_compiler._modin_frame._dtypes._value.equals(result_dtype)\n            else:\n                raise NotImplementedError(self_dtype)\n\n        patch.assert_not_called()\n\n    def test_get_dummies_case(self):\n        with mock.patch.object(PandasDataframe, \"_compute_dtypes\") as patch:\n            df = pd.DataFrame(\n                {\"items\": [1, 2, 3, 4], \"b\": [3, 3, 4, 4], \"c\": [1, 0, 0, 1]}\n            )\n            res = pd.get_dummies(df, columns=[\"b\", \"c\"])\n            cols = [col for col in res.columns if col != \"items\"]\n            res[cols] = res[cols] / res[cols].mean()\n\n            assert res._query_compiler.frame_has_materialized_dtypes\n\n        patch.assert_not_called()\n\n    @pytest.mark.parametrize(\"has_materialized_index\", [True, False])\n    @pytest.mark.parametrize(\"drop\", [True, False])\n    def test_preserve_dtypes_reset_index(self, drop, has_materialized_index):\n        with mock.patch.object(PandasDataframe, \"_compute_dtypes\") as patch:\n            # case 1: 'df' has complete dtype by default\n            df = pd.DataFrame({\"a\": [1, 2, 3]})\n            if has_materialized_index:\n                assert df._query_compiler.frame_has_materialized_index\n            else:\n                df._query_compiler.set_frame_index_cache(None)\n                assert not df._query_compiler.frame_has_materialized_index\n            assert df._query_compiler.frame_has_materialized_dtypes\n\n            res = df.reset_index(drop=drop)\n            if drop:\n                # we droped the index, so columns and dtypes shouldn't change\n                assert res._query_compiler.frame_has_materialized_dtypes\n                assert res.dtypes.equals(df.dtypes)\n            else:\n                if has_materialized_index:\n                    # we should have inserted index dtype into the descriptor,\n                    # and since both of them are materialized, the result should be\n                    # materialized too\n                    assert res._query_compiler.frame_has_materialized_dtypes\n                    assert res.dtypes.equals(\n                        pandas.Series(\n                            [np.dtype(\"int64\"), np.dtype(\"int64\")], index=[\"index\", \"a\"]\n                        )\n                    )\n                else:\n                    # we now know that there are cols with unknown name and dtype in our dataframe,\n                    # so the resulting dtypes should contain information only about original column\n                    expected_dtypes = DtypesDescriptor(\n                        {\"a\": np.dtype(\"int64\")},\n                        know_all_names=False,\n                    )\n                    assert res._query_compiler._modin_frame._dtypes._value.equals(\n                        expected_dtypes\n                    )\n\n            # case 2: 'df' has partial dtype by default\n            df = pd.DataFrame({\"a\": [1, 2, 3], \"b\": [3, 4, 5]})\n            df._query_compiler.set_frame_dtypes_cache(\n                ModinDtypes(\n                    DtypesDescriptor(\n                        {\"a\": np.dtype(\"int64\")}, cols_with_unknown_dtypes=[\"b\"]\n                    )\n                )\n            )\n            if has_materialized_index:\n                assert df._query_compiler.frame_has_materialized_index\n            else:\n                df._query_compiler.set_frame_index_cache(None)\n                assert not df._query_compiler.frame_has_materialized_index\n\n            res = df.reset_index(drop=drop)\n            if drop:\n                # we droped the index, so columns and dtypes shouldn't change\n                assert res._query_compiler._modin_frame._dtypes._value.equals(\n                    df._query_compiler._modin_frame._dtypes._value\n                )\n            else:\n                if has_materialized_index:\n                    # we should have inserted index dtype into the descriptor,\n                    # the resulted dtype should have information about 'index' and 'a' columns,\n                    # and miss dtype info for 'b' column\n                    expected_dtypes = DtypesDescriptor(\n                        {\"index\": np.dtype(\"int64\"), \"a\": np.dtype(\"int64\")},\n                        cols_with_unknown_dtypes=[\"b\"],\n                        columns_order={0: \"index\", 1: \"a\", 2: \"b\"},\n                    )\n                    assert res._query_compiler._modin_frame._dtypes._value.equals(\n                        expected_dtypes\n                    )\n                else:\n                    # we miss info about the 'index' column since it wasn't materialized at\n                    # the time of 'reset_index()' and we're still missing dtype info for 'b' column\n                    expected_dtypes = DtypesDescriptor(\n                        {\"a\": np.dtype(\"int64\")},\n                        cols_with_unknown_dtypes=[\"b\"],\n                        know_all_names=False,\n                    )\n                    assert res._query_compiler._modin_frame._dtypes._value.equals(\n                        expected_dtypes\n                    )\n\n        patch.assert_not_called()\n\n    def test_groupby_index_dtype(self):\n        with mock.patch.object(PandasDataframe, \"_compute_dtypes\") as patch:\n            # case 1: MapReduce impl, Series as an output of groupby\n            df = pd.DataFrame({\"a\": [1, 2, 2], \"b\": [3, 4, 5]})\n            res = df.groupby(\"a\").size().reset_index(name=\"new_name\")\n            res_dtypes = res._query_compiler._modin_frame._dtypes._value\n            assert \"a\" in res_dtypes._known_dtypes\n            assert res_dtypes._known_dtypes[\"a\"] == np.dtype(\"int64\")\n\n            # case 2: ExperimentalImpl impl, Series as an output of groupby\n            RangePartitioning.put(True)\n            try:\n                df = pd.DataFrame({\"a\": [1, 2, 2], \"b\": [3, 4, 5]})\n                res = df.groupby(\"a\").size().reset_index(name=\"new_name\")\n                res_dtypes = res._query_compiler._modin_frame._dtypes._value\n                assert \"a\" in res_dtypes._known_dtypes\n                assert res_dtypes._known_dtypes[\"a\"] == np.dtype(\"int64\")\n            finally:\n                RangePartitioning.put(False)\n\n            # case 3: MapReduce impl, DataFrame as an output of groupby\n            df = pd.DataFrame({\"a\": [1, 2, 2], \"b\": [3, 4, 5]})\n            res = df.groupby(\"a\").sum().reset_index()\n            res_dtypes = res._query_compiler._modin_frame._dtypes._value\n            assert \"a\" in res_dtypes._known_dtypes\n            assert res_dtypes._known_dtypes[\"a\"] == np.dtype(\"int64\")\n\n            # case 4: ExperimentalImpl impl, DataFrame as an output of groupby\n            RangePartitioning.put(True)\n            try:\n                df = pd.DataFrame({\"a\": [1, 2, 2], \"b\": [3, 4, 5]})\n                res = df.groupby(\"a\").sum().reset_index()\n                res_dtypes = res._query_compiler._modin_frame._dtypes._value\n                assert \"a\" in res_dtypes._known_dtypes\n                assert res_dtypes._known_dtypes[\"a\"] == np.dtype(\"int64\")\n            finally:\n                RangePartitioning.put(False)\n\n            # case 5: FullAxis impl, DataFrame as an output of groupby\n            df = pd.DataFrame({\"a\": [1, 2, 2], \"b\": [3, 4, 5]})\n            res = df.groupby(\"a\").quantile().reset_index()\n            res_dtypes = res._query_compiler._modin_frame._dtypes._value\n            assert \"a\" in res_dtypes._known_dtypes\n            assert res_dtypes._known_dtypes[\"a\"] == np.dtype(\"int64\")\n\n        patch.assert_not_called()\n\n\n@pytest.mark.skipif(Engine.get() != \"Ray\", reason=\"Ray specific\")\n@pytest.mark.parametrize(\"mode\", [None, \"Auto\", \"On\", \"Off\"])\ndef test_ray_lazy_exec_mode(mode):\n    import ray\n\n    from modin.config import LazyExecution\n    from modin.core.execution.ray.common.deferred_execution import DeferredExecution\n    from modin.core.execution.ray.common.utils import ObjectIDType\n    from modin.core.execution.ray.implementations.pandas_on_ray.partitioning import (\n        PandasOnRayDataframePartition,\n    )\n\n    orig_mode = LazyExecution.get()\n    try:\n        if mode is None:\n            mode = LazyExecution.get()\n        else:\n            LazyExecution.put(mode)\n            assert mode == LazyExecution.get()\n\n        df = pandas.DataFrame({\"A\": [1, 2, 3]})\n        part = PandasOnRayDataframePartition(ray.put(df))\n\n        def func(df):\n            return len(df)\n\n        ray_func = ray.put(func)\n\n        if mode == \"Auto\":\n            assert isinstance(part.apply(ray_func)._data_ref, ObjectIDType)\n            assert isinstance(\n                part.add_to_apply_calls(ray_func)._data_ref, DeferredExecution\n            )\n        elif mode == \"On\":\n            assert isinstance(part.apply(ray_func)._data_ref, DeferredExecution)\n            assert isinstance(\n                part.add_to_apply_calls(ray_func)._data_ref, DeferredExecution\n            )\n        elif mode == \"Off\":\n            assert isinstance(part.apply(ray_func)._data_ref, ObjectIDType)\n            assert isinstance(part.add_to_apply_calls(ray_func)._data_ref, ObjectIDType)\n        else:\n            pytest.fail(f\"Invalid value: {mode}\")\n    finally:\n        LazyExecution.put(orig_mode)\n\n\n@pytest.mark.skipif(Engine.get() != \"Ray\", reason=\"Ray specific\")\ndef test_materialization_hook_serialization():\n    @ray.remote(num_returns=1)\n    def f1():\n        return [1, 2, 3]\n\n    @ray.remote(num_returns=1)\n    def f2(i):\n        return i\n\n    hook = MetaList(f1.remote())[2]\n    assert ray.get(f2.remote(hook)) == 3\n\n\ndef test_remote_function():\n    def get_func():\n        @remote_function\n        def remote_func(arg):\n            return arg\n\n        return remote_func\n\n    def get_capturing_func(arg):\n        @remote_function\n        def remote_func():\n            return arg\n\n        return remote_func\n\n    if Engine.get() in (\"Ray\", \"Unidist\"):\n        from modin.core.execution.utils import _remote_function_cache\n\n        cache_len = len(_remote_function_cache)\n        assert get_func() is get_func()\n        assert get_func() in _remote_function_cache.values()\n        assert get_capturing_func(1) not in _remote_function_cache.values()\n        assert len(_remote_function_cache) == cache_len + 1\n\n    assert materialize(deploy(get_func(), [123])) == 123\n    assert get_capturing_func(1) is not get_capturing_func(2)\n    assert (\n        materialize(deploy(get_capturing_func(1)))\n        + materialize(deploy(get_capturing_func(2)))\n        == 3\n    )\n\n\n@pytest.mark.parametrize(\n    \"partitioning_scheme,expected_map_approach\",\n    [\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [df.shape[0] // CpuCount.get()] * CpuCount.get(),\n                \"column_widths\": [df.shape[1]],\n            },\n            \"map_partitions\",\n            id=\"one_column_partition\",\n        ),\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [df.shape[0] // (CpuCount.get() * 2)]\n                * (CpuCount.get() * 2),\n                \"column_widths\": [df.shape[1]],\n            },\n            \"map_partitions_joined_by_column\",\n            id=\"very_long_column_partition\",\n        ),\n        pytest.param(\n            lambda df: {\n                \"row_lengths\": [df.shape[0] // CpuCount.get()] * CpuCount.get(),\n                \"column_widths\": [df.shape[1] // CpuCount.get()] * CpuCount.get(),\n            },\n            \"map_axis_partitions\",\n            id=\"perfect_partitioning\",\n        ),\n    ],\n)\ndef test_dynamic_partitioning(partitioning_scheme, expected_map_approach):\n    data_size = MinRowPartitionSize.get() * CpuCount.get()\n    data = {f\"col{i}\": np.ones(data_size) for i in range(data_size)}\n    df = pandas.DataFrame(data)\n\n    modin_df = construct_modin_df_by_scheme(df, partitioning_scheme(df))\n    partitions = modin_df._query_compiler._modin_frame._partitions\n    partition_mgr_cls = modin_df._query_compiler._modin_frame._partition_mgr_cls\n\n    with mock.patch.object(\n        partition_mgr_cls,\n        expected_map_approach,\n        wraps=getattr(partition_mgr_cls, expected_map_approach),\n    ) as expected_method:\n        with context(DynamicPartitioning=True):\n            partition_mgr_cls.map_partitions(partitions, lambda x: x * 2)\n            expected_method.assert_called()\n\n\n@pytest.mark.parametrize(\"npartitions\", [7, CpuCount.get() * 2])\ndef test_map_partitions_joined_by_column(npartitions):\n    with context(NPartitions=npartitions):\n        ncols = MinColumnPartitionSize.get()\n        nrows = MinRowPartitionSize.get() * CpuCount.get() * 2\n        data = {f\"col{i}\": np.ones(nrows) for i in range(ncols)}\n        df = pd.DataFrame(data)\n        partitions = df._query_compiler._modin_frame._partitions\n        partition_mgr_cls = df._query_compiler._modin_frame._partition_mgr_cls\n\n        def map_func(df, first_arg, extra_arg=0):\n            return df.map(lambda x: (x * first_arg) + extra_arg)\n\n        column_splits = 2\n        map_func_args = (2,)\n        map_func_kwargs = {\"extra_arg\": 1}\n\n        # this approach doesn't work if column_splits == 0\n        with pytest.raises(ValueError):\n            partition_mgr_cls.map_partitions_joined_by_column(\n                partitions, 0, map_func, map_func_args, map_func_kwargs\n            )\n\n        result_partitions = partition_mgr_cls.map_partitions_joined_by_column(\n            partitions,\n            column_splits,\n            map_func,\n            map_func_args,\n            map_func_kwargs,\n        )\n        assert (\n            result_partitions.shape == partitions.shape\n        ), \"The result has a different split than the original.\"\n        for i in range(result_partitions.shape[0]):\n            assert np.all(\n                result_partitions[i][0].to_numpy() == 3\n            ), \"Invalid map function result.\"\n\n\ndef test_fold_operator():\n    new_index = list(range(500, 1000))\n    new_columns = [\"b\"]\n\n    initial_df = pandas.DataFrame({\"a\": range(0, 1000)})\n    modin_df = pd.DataFrame(initial_df)\n    expected_df = pandas.DataFrame(\n        list(range(0, 1000, 2)), index=new_index, columns=new_columns\n    )\n\n    def filter_func(df):\n        result = df[df.index % 2 == 0]\n        result.index = new_index\n        result.columns = new_columns\n        return result\n\n    PandasQueryCompiler.filter_func = Fold.register(filter_func)\n\n    def filter_modin_dataframe1(df):\n        return df.__constructor__(\n            query_compiler=df._query_compiler.filter_func(\n                fold_axis=0,\n                new_index=new_index,\n                new_columns=new_columns,\n            )\n        )\n\n    pd.DataFrame.filter_dataframe1 = filter_modin_dataframe1\n\n    filtered_df = modin_df.filter_dataframe1()\n\n    df_equals(filtered_df, expected_df)\n\n    def filter_modin_dataframe2(df):\n        return df.__constructor__(\n            query_compiler=df._query_compiler.filter_func(fold_axis=0)\n        )\n\n    pd.DataFrame.filter_dataframe2 = filter_modin_dataframe2\n\n    filtered_df = modin_df.filter_dataframe2()\n\n    df_equals(filtered_df, expected_df)\n\n\ndef test_default_property_warning_name():\n    # Test that when a property defaults to pandas, the raised warning mentions the full name of\n    # the pandas property rather than a hex address\n\n    @property\n    def _test_default_property(df):\n        return \"suspicious sentinel value\"\n\n    @property\n    def qc_test_default_property(qc):\n        return DataFrameDefault.register(_test_default_property)(qc)\n\n    PandasQueryCompiler.qc_test_default_property = qc_test_default_property\n\n    @property\n    def dataframe_test_default_property(df):\n        return df._query_compiler.qc_test_default_property\n\n    pd.DataFrame.dataframe_test_default_property = dataframe_test_default_property\n\n    with pytest.warns(\n        UserWarning,\n        match=\"<function DataFrame.<property fget:_test_default_property>> is not currently supported\",\n    ):\n        pd.DataFrame([[1]]).dataframe_test_default_property\n\n\n@pytest.mark.parametrize(\n    \"modify_config\",\n    [\n        {Engine: \"Ray\"},\n        {Engine: \"Dask\"},\n    ],\n    indirect=True,\n)\ndef test_daemonic_worker_protection(modify_config):\n    # Test for issue #7346, wherein some operations on Dask cause a second submission of a task to\n    # the Dask client from the worker scope, which should not cause a new client to be created\n\n    def submission_triggering_row_operation(row):\n        row_to_dict = row.to_dict()\n        dict_to_row = pd.Series(row_to_dict)\n        return dict_to_row\n\n    df = pd.DataFrame(\n        {\n            \"A\": [\"a\", \"b\", \"c\", \"d\"],\n            \"B\": [1, 2, 3, 4],\n            \"C\": [1, 2, 3, 4],\n            \"D\": [1, 2, 3, 4],\n        }\n    )\n\n    df.apply(submission_triggering_row_operation, axis=1)\n"
  },
  {
    "path": "modin/tests/core/test_dispatcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom contextlib import contextmanager\n\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Backend, Engine, Execution, Parameter, StorageFormat\nfrom modin.core.execution.dispatching.factories import factories\nfrom modin.core.execution.dispatching.factories.dispatcher import (\n    FactoryDispatcher,\n    FactoryNotFoundError,\n)\nfrom modin.core.execution.python.implementations.pandas_on_python.io import (\n    PandasOnPythonIO,\n)\nfrom modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler\nfrom modin.tests.pandas.utils import switch_execution\n\n\n@contextmanager\ndef _switch_value(config: Parameter, value: str):\n    old_value = config.get()\n    try:\n        yield config.put(value)\n    finally:\n        config.put(old_value)\n\n\nclass PandasOnTestFactory(factories.BaseFactory):\n    \"\"\"\n    Stub factory to ensure we can switch execution engine to 'Test'\n    \"\"\"\n\n    @classmethod\n    def prepare(cls):\n        \"\"\"\n        Fills in .io_cls class attribute lazily\n        \"\"\"\n        cls.io_cls = \"Foo\"\n\n\nclass TestOnPythonFactory(factories.BaseFactory):\n    \"\"\"\n    Stub factory to ensure we can switch partition format to 'Test'\n    \"\"\"\n\n    @classmethod\n    def prepare(cls):\n        \"\"\"\n        Fills in .io_cls class attribute lazily\n        \"\"\"\n        cls.io_cls = \"Bar\"\n\n\nclass FooOnBarFactory(factories.BaseFactory):\n    \"\"\"\n    Stub factory to ensure we can switch engine and partition to 'Foo' and 'Bar'\n    \"\"\"\n\n    @classmethod\n    def prepare(cls):\n        \"\"\"\n        Fills in .io_cls class attribute lazily\n        \"\"\"\n        cls.io_cls = \"Zug-zug\"\n\n\n# inject the stubs\nfactories.PandasOnTestFactory = PandasOnTestFactory\nfactories.TestOnPythonFactory = TestOnPythonFactory\nfactories.FooOnBarFactory = FooOnBarFactory\n\nBackend.register_backend(\n    \"Test1\",\n    Execution(\n        engine=\"Test\",\n        storage_format=\"Pandas\",\n    ),\n)\n\nBackend.register_backend(\n    \"Test2\",\n    Execution(\n        engine=\"Python\",\n        storage_format=\"Test\",\n    ),\n)\nBackend.register_backend(\n    \"Test3\",\n    Execution(\n        engine=\"Bar\",\n        storage_format=\"Foo\",\n    ),\n)\nBackend.register_backend(\n    \"Test4\",\n    Execution(\n        engine=\"Dask\",\n        storage_format=\"Pyarrow\",\n    ),\n)\n\n# register them as known \"no init\" engines for modin.pandas\nEngine.NOINIT_ENGINES |= {\"Test\", \"Bar\"}\n\n\ndef test_default_factory():\n    assert issubclass(FactoryDispatcher.get_factory(), factories.BaseFactory)\n    assert FactoryDispatcher.get_factory().io_cls\n\n\ndef test_factory_switch():\n    with switch_execution(\"Python\", \"Pandas\"):\n        with _switch_value(Engine, \"Test\"):\n            assert FactoryDispatcher.get_factory() == PandasOnTestFactory\n            assert FactoryDispatcher.get_factory().io_cls == \"Foo\"\n\n        with _switch_value(StorageFormat, \"Test\"):\n            assert FactoryDispatcher.get_factory() == TestOnPythonFactory\n            assert FactoryDispatcher.get_factory().io_cls == \"Bar\"\n\n\ndef test_engine_wrong_factory():\n    with pytest.raises(FactoryNotFoundError):\n        with _switch_value(Engine, \"Dask\"):\n            with _switch_value(StorageFormat, \"Pyarrow\"):\n                pass\n\n\ndef test_set_execution():\n    with switch_execution(\"Bar\", \"Foo\"):\n        assert FactoryDispatcher.get_factory() == FooOnBarFactory\n\n\ndef test_add_option():\n    class DifferentlyNamedFactory(factories.BaseFactory):\n        @classmethod\n        def prepare(cls):\n            cls.io_cls = PandasOnPythonIO\n\n    factories.StorageOnExecFactory = DifferentlyNamedFactory\n    StorageFormat.add_option(\"sToragE\")\n    Engine.add_option(\"Exec\")\n    Backend.register_backend(\n        name=\"Test5\",\n        execution=Execution(\n            engine=\"Exec\",\n            storage_format=\"Storage\",\n        ),\n    )\n\n    with switch_execution(\"Exec\", \"Storage\"):\n        df = pd.DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]])\n        assert isinstance(df._query_compiler, PandasQueryCompiler)\n"
  },
  {
    "path": "modin/tests/experimental/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/experimental/spreadsheet/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/experimental/spreadsheet/test_general.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pytest\nfrom modin_spreadsheet import SpreadsheetWidget\n\nimport modin.experimental.spreadsheet as mss\nimport modin.pandas as pd\n\n\ndef get_test_data():\n    return {\n        \"A\": 1.0,\n        \"B\": pd.Timestamp(\"20130102\"),\n        \"C\": pd.Series(1, index=list(range(4)), dtype=\"float32\"),\n        \"D\": np.array([5, 2, 3, 1], dtype=\"int32\"),\n        \"E\": pd.Categorical([\"test\", \"train\", \"foo\", \"bar\"]),\n        \"F\": [\"foo\", \"bar\", \"buzz\", \"fox\"],\n    }\n\n\ndef test_from_dataframe():\n    data = get_test_data()\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    modin_result = mss.from_dataframe(modin_df)\n    assert isinstance(modin_result, SpreadsheetWidget)\n\n    with pytest.raises(TypeError):\n        mss.from_dataframe(pandas_df)\n\n    # Check parameters don't error\n    def can_edit_row(row):\n        return row[\"D\"] > 2\n\n    modin_result = mss.from_dataframe(\n        modin_df,\n        show_toolbar=True,\n        show_history=True,\n        precision=1,\n        grid_options={\"forceFitColumns\": False, \"filterable\": False},\n        column_options={\"D\": {\"editable\": True}},\n        column_definitions={\"editable\": False},\n        row_edit_callback=can_edit_row,\n    )\n    assert isinstance(modin_result, SpreadsheetWidget)\n\n\ndef test_to_dataframe():\n    data = get_test_data()\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    spreadsheet = mss.from_dataframe(modin_df)\n    modin_result = mss.to_dataframe(spreadsheet)\n\n    assert modin_result.equals(modin_df)\n\n    with pytest.raises(TypeError):\n        mss.to_dataframe(\"Not a SpreadsheetWidget\")\n    with pytest.raises(TypeError):\n        mss.to_dataframe(pandas_df)\n"
  },
  {
    "path": "modin/tests/experimental/test_fuzzydata.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport glob\nimport os\nimport shutil\nimport uuid\n\nfrom fuzzydata.clients.modin import ModinWorkflow\nfrom fuzzydata.core.generator import generate_workflow\n\nfrom modin.config import Engine\n\n\ndef test_fuzzydata_sample_workflow():\n    # Workflow Generation Options\n    wf_name = str(uuid.uuid4())[:8]  # Unique name for the generated workflow\n    num_versions = 10  # Number of unique CSV files to generate\n    cols = 33  # Columns in Base Artifact\n    rows = 1000  # Rows in Base Artifact\n    bfactor = 1.0  # Branching Factor - 0.1 is linear, 10.0 is star-like\n    exclude_ops = [\"groupby\"]  # In-Memory groupby operations cause issue #4287\n    matfreq = 2  # How many operations to chain before materialization\n\n    engine = Engine.get().lower()\n\n    # Create Output Directory for Workflow Data\n    base_out_directory = (\n        f\"/tmp/fuzzydata-test-wf-{engine}/\"  # Must match corresponding github-action\n    )\n    if os.path.exists(base_out_directory):\n        shutil.rmtree(base_out_directory)\n    output_directory = f\"{base_out_directory}/{wf_name}/\"\n    os.makedirs(output_directory, exist_ok=True)\n\n    # Start Workflow Generation\n    workflow = generate_workflow(\n        workflow_class=ModinWorkflow,\n        name=wf_name,\n        num_versions=num_versions,\n        base_shape=(cols, rows),\n        out_directory=output_directory,\n        bfactor=bfactor,\n        exclude_ops=exclude_ops,\n        matfreq=matfreq,\n        wf_options={\"modin_engine\": engine},\n    )\n\n    # Assertions that the workflow generation worked correctly\n    assert len(workflow) == num_versions\n    assert len(list(glob.glob(f\"{output_directory}/artifacts/*.csv\"))) == len(\n        workflow.artifact_dict\n    )\n    assert os.path.exists(f\"{output_directory}/{workflow.name}_operations.json\")\n    assert os.path.getsize(f\"{output_directory}/{workflow.name}_operations.json\") > 0\n    assert os.path.exists(f\"{output_directory}/{workflow.name}_gt_graph.csv\")\n"
  },
  {
    "path": "modin/tests/experimental/test_io_exp.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\nimport json\nimport platform\nfrom pathlib import Path\n\nimport numpy as np\nimport pandas\nimport pytest\nfrom pandas._testing import ensure_clean\n\nimport modin.experimental.pandas as pd\nfrom modin.config import AsyncReadMode, Engine\nfrom modin.tests.pandas.utils import (\n    df_equals,\n    eval_general,\n    parse_dates_values_by_id,\n    test_data,\n    time_parsing_csv_path,\n)\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import try_cast_to_pandas\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\ndef test_from_sql_distributed(tmp_path, make_sql_connection):\n    filename = \"test_from_sql_distributed.db\"\n    table = \"test_from_sql_distributed\"\n    conn = make_sql_connection(str(tmp_path / filename), table)\n    query = \"select * from {0}\".format(table)\n\n    pandas_df = pandas.read_sql(query, conn)\n    modin_df_from_query = pd.read_sql(\n        query,\n        conn,\n        partition_column=\"col1\",\n        lower_bound=0,\n        upper_bound=6,\n        max_sessions=2,\n    )\n    modin_df_from_table = pd.read_sql(\n        table,\n        conn,\n        partition_column=\"col1\",\n        lower_bound=0,\n        upper_bound=6,\n        max_sessions=2,\n    )\n\n    df_equals(modin_df_from_query, pandas_df)\n    df_equals(modin_df_from_table, pandas_df)\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\ndef test_from_sql_defaults(tmp_path, make_sql_connection):\n    filename = \"test_from_sql_distributed.db\"\n    table = \"test_from_sql_distributed\"\n    conn = make_sql_connection(str(tmp_path / filename), table)\n    query = \"select * from {0}\".format(table)\n\n    pandas_df = pandas.read_sql(query, conn)\n    with pytest.warns(UserWarning):\n        modin_df_from_query = pd.read_sql(query, conn)\n    with pytest.warns(UserWarning):\n        modin_df_from_table = pd.read_sql(table, conn)\n\n    df_equals(modin_df_from_query, pandas_df)\n    df_equals(modin_df_from_table, pandas_df)\n\n\n@pytest.mark.usefixtures(\"TestReadGlobCSVFixture\")\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental glob API\",\n)\nclass TestCsvGlob:\n    def test_read_multiple_small_csv(self):\n        pandas_df = pandas.concat([pandas.read_csv(fname) for fname in pytest.files])\n        modin_df = pd.read_csv_glob(pytest.glob_path)\n\n        # Indexes get messed up when concatting so we reset both.\n        pandas_df = pandas_df.reset_index(drop=True)\n        modin_df = modin_df.reset_index(drop=True)\n\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.parametrize(\"nrows\", [35, 100])\n    def test_read_multiple_csv_nrows(self, request, nrows):\n        pandas_df = pandas.concat([pandas.read_csv(fname) for fname in pytest.files])\n        pandas_df = pandas_df.iloc[:nrows, :]\n\n        modin_df = pd.read_csv_glob(pytest.glob_path, nrows=nrows)\n\n        # Indexes get messed up when concatting so we reset both.\n        pandas_df = pandas_df.reset_index(drop=True)\n        modin_df = modin_df.reset_index(drop=True)\n\n        df_equals(modin_df, pandas_df)\n\n    def test_read_csv_empty_frame(self):\n        kwargs = {\n            \"usecols\": [0],\n            \"index_col\": 0,\n        }\n\n        modin_df = pd.read_csv_glob(pytest.files[0], **kwargs)\n        pandas_df = pandas.read_csv(pytest.files[0], **kwargs)\n\n        df_equals(modin_df, pandas_df)\n\n    def test_read_csv_without_glob(self):\n        with pytest.raises(FileNotFoundError):\n            with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n                pd.read_csv_glob(\n                    \"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-\",\n                    storage_options={\"anon\": True},\n                )\n\n    def test_read_csv_glob_4373(self, tmp_path):\n        columns, filename = [\"col0\"], str(tmp_path / \"1x1.csv\")\n        df = pd.DataFrame([[1]], columns=columns)\n        with warns_that_defaulting_to_pandas_if(df._query_compiler.engine == \"Dask\"):\n            df.to_csv(filename)\n\n        kwargs = {\"filepath_or_buffer\": filename, \"usecols\": columns}\n        modin_df = pd.read_csv_glob(**kwargs)\n        pandas_df = pandas.read_csv(**kwargs)\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.parametrize(\n        \"parse_dates\",\n        [pytest.param(value, id=id) for id, value in parse_dates_values_by_id.items()],\n    )\n    def test_read_single_csv_with_parse_dates(self, parse_dates):\n        try:\n            pandas_df = pandas.read_csv(time_parsing_csv_path, parse_dates=parse_dates)\n        except Exception as pandas_exception:\n            with pytest.raises(Exception) as modin_exception:\n                modin_df = pd.read_csv_glob(\n                    time_parsing_csv_path, parse_dates=parse_dates\n                )\n                try_cast_to_pandas(modin_df)  # force materialization\n            assert isinstance(\n                modin_exception.value, type(pandas_exception)\n            ), \"Got Modin Exception type {}, but pandas Exception type {} was expected\".format(\n                type(modin_exception.value), type(pandas_exception)\n            )\n        else:\n            modin_df = pd.read_csv_glob(time_parsing_csv_path, parse_dates=parse_dates)\n            df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental glob API\",\n)\n@pytest.mark.parametrize(\n    \"path\",\n    [\n        \"s3://modin-test/modin-bugs/multiple_csv/test_data*.csv\",\n    ],\n)\ndef test_read_multiple_csv_cloud_store(path, s3_resource, s3_storage_options):\n    def _pandas_read_csv_glob(path, storage_options):\n        pandas_dfs = [\n            pandas.read_csv(\n                f\"{path.lower().split('*')[0]}{i}.csv\", storage_options=storage_options\n            )\n            for i in range(2)\n        ]\n        return pandas.concat(pandas_dfs).reset_index(drop=True)\n\n    eval_general(\n        pd,\n        pandas,\n        lambda module, **kwargs: (\n            pd.read_csv_glob(path, **kwargs).reset_index(drop=True)\n            if hasattr(module, \"read_csv_glob\")\n            else _pandas_read_csv_glob(path, **kwargs)\n        ),\n        storage_options=s3_storage_options,\n    )\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\n@pytest.mark.parametrize(\n    \"storage_options_extra\",\n    [{\"anon\": False}, {\"anon\": True}, {\"key\": \"123\", \"secret\": \"123\"}],\n)\ndef test_read_multiple_csv_s3_storage_opts(\n    s3_resource, s3_storage_options, storage_options_extra\n):\n    s3_path = \"s3://modin-test/modin-bugs/multiple_csv/\"\n\n    def _pandas_read_csv_glob(path, storage_options):\n        pandas_df = pandas.concat(\n            [\n                pandas.read_csv(\n                    f\"{s3_path}test_data{i}.csv\",\n                    storage_options=storage_options,\n                )\n                for i in range(2)\n            ],\n        ).reset_index(drop=True)\n        return pandas_df\n\n    expected_exception = None\n    if \"anon\" in storage_options_extra:\n        expected_exception = PermissionError(\"Forbidden\")\n    eval_general(\n        pd,\n        pandas,\n        lambda module, **kwargs: (\n            pd.read_csv_glob(s3_path, **kwargs)\n            if hasattr(module, \"read_csv_glob\")\n            else _pandas_read_csv_glob(s3_path, **kwargs)\n        ),\n        storage_options=s3_storage_options | storage_options_extra,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\n@pytest.mark.parametrize(\"pathlike\", [False, True])\n@pytest.mark.parametrize(\"compression\", [None, \"gzip\"])\n@pytest.mark.parametrize(\n    \"filename\", [\"test_default_to_pickle.pkl\", \"test_to_pickle*.pkl\"]\n)\n@pytest.mark.parametrize(\"read_func\", [\"read_pickle_glob\"])\n@pytest.mark.parametrize(\"to_func\", [\"to_pickle_glob\"])\ndef test_distributed_pickling(\n    tmp_path, filename, compression, pathlike, read_func, to_func\n):\n    data = test_data[\"int_data\"]\n    df = pd.DataFrame(data)\n\n    filename_param = filename\n    if compression:\n        filename = f\"{filename}.gz\"\n\n    filename = Path(filename) if pathlike else filename\n\n    with warns_that_defaulting_to_pandas_if(\n        filename_param == \"test_default_to_pickle.pkl\"\n    ):\n        getattr(df.modin, to_func)(str(tmp_path / filename), compression=compression)\n        pickled_df = getattr(pd, read_func)(\n            str(tmp_path / filename), compression=compression\n        )\n    df_equals(pickled_df, df)\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\n@pytest.mark.parametrize(\n    \"filename\",\n    [\"test_parquet_glob.parquet\", \"test_parquet_glob*.parquet\"],\n)\ndef test_parquet_glob(tmp_path, filename):\n    data = test_data[\"int_data\"]\n    df = pd.DataFrame(data)\n\n    filename_param = filename\n\n    with warns_that_defaulting_to_pandas_if(\n        filename_param == \"test_parquet_glob.parquet\"\n    ):\n        df.modin.to_parquet_glob(str(tmp_path / filename))\n        read_df = pd.read_parquet_glob(str(tmp_path / filename))\n    df_equals(read_df, df)\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\n@pytest.mark.parametrize(\n    \"filename\",\n    [\"test_json_glob.json\", \"test_json_glob*.json\"],\n)\ndef test_json_glob(tmp_path, filename):\n    data = test_data[\"int_data\"]\n    df = pd.DataFrame(data)\n\n    filename_param = filename\n\n    with warns_that_defaulting_to_pandas_if(filename_param == \"test_json_glob.json\"):\n        df.modin.to_json_glob(str(tmp_path / filename))\n        read_df = pd.read_json_glob(str(tmp_path / filename))\n    df_equals(read_df, df)\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\n@pytest.mark.parametrize(\n    \"filename\",\n    [\"test_xml_glob.xml\", \"test_xml_glob*.xml\"],\n)\n@pytest.mark.skipif(\n    platform.system() == \"Windows\",\n    reason=\"https://github.com/modin-project/modin/issues/7497\",\n)\ndef test_xml_glob(tmp_path, filename):\n    data = test_data[\"int_data\"]\n    df = pd.DataFrame(data)\n\n    filename_param = filename\n\n    with warns_that_defaulting_to_pandas_if(filename_param == \"test_xml_glob.xml\"):\n        df.modin.to_xml_glob(str(tmp_path / filename), index=False)\n        read_df = pd.read_xml_glob(str(tmp_path / filename))\n\n    # Index get messed up when concatting so we reset it.\n    read_df = read_df.reset_index(drop=True)\n    df_equals(read_df, df)\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental read_custom_text API\",\n)\n@pytest.mark.parametrize(\"set_async_read_mode\", [False, True], indirect=True)\ndef test_read_custom_json_text(set_async_read_mode):\n    def _generate_json(file_name, nrows, ncols):\n        data = np.random.rand(nrows, ncols)\n        df = pandas.DataFrame(data, columns=[f\"col{x}\" for x in range(ncols)])\n        df.to_json(file_name, lines=True, orient=\"records\")\n\n    # Custom parser allows us to add some specifics to reading files,\n    # which is not available through the ready-made API.\n    # For example, the parser allows us to reduce the amount of RAM\n    # required for reading by selecting a subset of columns.\n    def _custom_parser(io_input, **kwargs):\n        result = {\"col0\": [], \"col1\": [], \"col3\": []}\n        for line in io_input:\n            # for example, simjson can be used here\n            obj = json.loads(line)\n            for key in result:\n                result[key].append(obj[key])\n        return pandas.DataFrame(result).rename(columns={\"col0\": \"testID\"})\n\n    with ensure_clean() as filename:\n        _generate_json(filename, 64, 8)\n\n        df1 = pd.read_custom_text(\n            filename,\n            columns=[\"testID\", \"col1\", \"col3\"],\n            custom_parser=_custom_parser,\n            is_quoting=False,\n        )\n        df2 = pd.read_json(filename, lines=True)[[\"col0\", \"col1\", \"col3\"]].rename(\n            columns={\"col0\": \"testID\"}\n        )\n        if AsyncReadMode.get():\n            # If read operations are asynchronous, then the dataframes\n            # check should be inside `ensure_clean` context\n            # because the file may be deleted before actual reading starts\n            df_equals(df1, df2)\n    if not AsyncReadMode.get():\n        df_equals(df1, df2)\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=f\"{Engine.get()} does not have experimental API\",\n)\n@pytest.mark.parametrize(\"set_async_read_mode\", [False, True], indirect=True)\ndef test_read_evaluated_dict(set_async_read_mode):\n    def _generate_evaluated_dict(file_name, nrows, ncols):\n        result = {}\n        keys = [f\"col{x}\" for x in range(ncols)]\n\n        with open(file_name, mode=\"w\") as _file:\n            for i in range(nrows):\n                data = np.random.rand(ncols)\n                for idx, key in enumerate(keys):\n                    result[key] = data[idx]\n                _file.write(str(result))\n                _file.write(\"\\n\")\n\n    # This parser allows us to read a format not supported by other reading functions\n    def _custom_parser(io_input, **kwargs):\n        cat_list = []\n        asin_list = []\n        for line in io_input:\n            obj = eval(line)\n            cat_list.append(obj[\"col1\"])\n            asin_list.append(obj[\"col2\"])\n        return pandas.DataFrame({\"col1\": asin_list, \"col2\": cat_list})\n\n    def columns_callback(io_input, **kwargs):\n        columns = None\n        for line in io_input:\n            columns = list(eval(line).keys())[1:3]\n            break\n        return columns\n\n    with ensure_clean() as filename:\n        _generate_evaluated_dict(filename, 64, 8)\n\n        df1 = pd.read_custom_text(\n            filename,\n            columns=[\"col1\", \"col2\"],\n            custom_parser=_custom_parser,\n        )\n        assert df1.shape == (64, 2)\n\n        df2 = pd.read_custom_text(\n            filename, columns=columns_callback, custom_parser=_custom_parser\n        )\n        if AsyncReadMode.get():\n            # If read operations are asynchronous, then the dataframes\n            # check should be inside `ensure_clean` context\n            # because the file may be deleted before actual reading starts\n            df_equals(df1, df2)\n    if not AsyncReadMode.get():\n        df_equals(df1, df2)\n"
  },
  {
    "path": "modin/tests/experimental/test_pipeline.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Engine, NPartitions\nfrom modin.core.execution.ray.common import RayWrapper\nfrom modin.distributed.dataframe.pandas.partitions import from_partitions\nfrom modin.experimental.batch.pipeline import PandasQueryPipeline\nfrom modin.tests.pandas.utils import df_equals\n\n\n@pytest.mark.skipif(\n    Engine.get() != \"Ray\",\n    reason=\"Only Ray supports the Batch Pipeline API\",\n)\nclass TestPipelineRayEngine:\n    def test_warnings(self):\n        \"\"\"Ensure that creating a Pipeline object raises the correct warnings.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n        df = pd.DataFrame(arr)\n        # Ensure that building a pipeline warns users that it is an experimental feature\n        with pytest.warns(\n            UserWarning,\n            match=\"The Batch Pipeline API is an experimental feature and still under development in Modin.\",\n        ):\n            pipeline = PandasQueryPipeline(df)\n        with pytest.warns(\n            UserWarning,\n            match=\"No outputs to compute. Returning an empty list. Please specify outputs by calling `add_query` with `is_output=True`.\",\n        ):\n            output = pipeline.compute_batch()\n        assert output == [], \"Empty pipeline did not return an empty list.\"\n\n    def test_pipeline_simple(self):\n        \"\"\"Create a simple pipeline and ensure that it runs end to end correctly.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n        df = pd.DataFrame(arr)\n\n        def add_col(df):\n            df[\"new_col\"] = df.sum(axis=1)\n            return df\n\n        # Build pipeline\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(add_col)\n        pipeline.add_query(lambda df: df * -30)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)})\n        )\n\n        def add_row_to_partition(df):\n            return pandas.concat([df, df.iloc[[-1]]])\n\n        pipeline.add_query(add_row_to_partition, is_output=True)\n        new_df = pipeline.compute_batch()[0]\n        # Build df without pipelining to ensure correctness\n        correct_df = add_col(pd.DataFrame(arr))\n        correct_df *= -30\n        correct_df = pd.DataFrame(\n            correct_df.rename(columns={i: f\"col {i}\" for i in range(1000)})._to_pandas()\n        )\n        correct_modin_frame = correct_df._query_compiler._modin_frame\n        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(\n            correct_modin_frame._partitions\n        )\n        partitions = [\n            partition.add_to_apply_calls(add_row_to_partition)\n            for partition in partitions\n        ]\n        [partition.drain_call_queue() for partition in partitions]\n        partitions = [partition.list_of_blocks for partition in partitions]\n        correct_df = from_partitions(partitions, axis=None)\n        # Compare pipelined and non-pipelined df\n        df_equals(correct_df, new_df)\n        # Ensure that setting `num_partitions` when creating a pipeline does not change `NPartitions`\n        num_partitions = NPartitions.get()\n        PandasQueryPipeline(df, num_partitions=(num_partitions - 1))\n        assert (\n            NPartitions.get() == num_partitions\n        ), \"Pipeline did not change NPartitions.get()\"\n\n    def test_update_df(self):\n        \"\"\"Ensure that `update_df` updates the df that the pipeline runs on.\"\"\"\n        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df + 3, is_output=True)\n        new_df = df * -1\n        pipeline.update_df(new_df)\n        output_df = pipeline.compute_batch()[0]\n        df_equals((df * -1) + 3, output_df)\n\n    def test_multiple_outputs(self):\n        \"\"\"Create a pipeline with multiple outputs, and check that all are computed correctly.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n            is_output=True,\n        )\n        pipeline.add_query(lambda df: df + 30, is_output=True)\n        new_dfs = pipeline.compute_batch()\n        assert len(new_dfs) == 3, \"Pipeline did not return all outputs\"\n        correct_df = pd.DataFrame(arr) * -30\n        df_equals(correct_df, new_dfs[0])  # First output computed correctly\n        correct_df = correct_df.rename(columns={i: f\"col {i}\" for i in range(1000)})\n        df_equals(correct_df, new_dfs[1])  # Second output computed correctly\n        correct_df += 30\n        df_equals(correct_df, new_dfs[2])  # Third output computed correctly\n\n    def test_output_id(self):\n        \"\"\"Ensure `output_id` is handled correctly when passed.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df, 0)\n        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)\n        with pytest.raises(\n            ValueError, match=\"Output ID must be specified for all nodes.\"\n        ):\n            pipeline.add_query(\n                lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n                is_output=True,\n            )\n        assert (\n            len(pipeline.query_list) == 0 and len(pipeline.outputs) == 1\n        ), \"Invalid `add_query` incorrectly added a node to the pipeline.\"\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True)\n        with pytest.raises(\n            ValueError, match=\"Output ID must be specified for all nodes.\"\n        ):\n            pipeline.add_query(\n                lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n                is_output=True,\n                output_id=20,\n            )\n        assert (\n            len(pipeline.query_list) == 0 and len(pipeline.outputs) == 1\n        ), \"Invalid `add_query` incorrectly added a node to the pipeline.\"\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df, is_output=True)\n        with pytest.raises(\n            ValueError,\n            match=(\n                \"`pass_output_id` is set to True, but output ids have not been specified. \"\n                + \"To pass output ids, please specify them using the `output_id` kwarg with pipeline.add_query\"\n            ),\n        ):\n            pipeline.compute_batch(postprocessor=lambda df: df, pass_output_id=True)\n        with pytest.raises(\n            ValueError,\n            match=\"Output ID cannot be specified for non-output node.\",\n        ):\n            pipeline.add_query(lambda df: df, output_id=22)\n        assert (\n            len(pipeline.query_list) == 0 and len(pipeline.outputs) == 1\n        ), \"Invalid `add_query` incorrectly added a node to the pipeline.\"\n\n    def test_output_id_multiple_outputs(self):\n        \"\"\"Ensure `output_id` is handled correctly when multiple outputs are computed.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n            is_output=True,\n            output_id=21,\n        )\n        pipeline.add_query(lambda df: df + 30, is_output=True, output_id=22)\n        new_dfs = pipeline.compute_batch()\n        assert isinstance(\n            new_dfs, dict\n        ), \"Pipeline did not return a dictionary mapping output_ids to dfs\"\n        assert 20 in new_dfs, \"Output ID 1 not cached correctly\"\n        assert 21 in new_dfs, \"Output ID 2 not cached correctly\"\n        assert 22 in new_dfs, \"Output ID 3 not cached correctly\"\n        assert len(new_dfs) == 3, \"Pipeline did not return all outputs\"\n        correct_df = pd.DataFrame(arr) * -30\n        df_equals(correct_df, new_dfs[20])  # First output computed correctly\n        correct_df = correct_df.rename(columns={i: f\"col {i}\" for i in range(1000)})\n        df_equals(correct_df, new_dfs[21])  # Second output computed correctly\n        correct_df += 30\n        df_equals(correct_df, new_dfs[22])  # Third output computed correctly\n\n    def test_postprocessing(self):\n        \"\"\"Check that the `postprocessor` argument to `_compute_batch` is handled correctly.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n            is_output=True,\n        )\n        pipeline.add_query(lambda df: df + 30, is_output=True)\n\n        def new_col_adder(df):\n            df[\"new_col\"] = df.iloc[:, -1]\n            return df\n\n        new_dfs = pipeline.compute_batch(postprocessor=new_col_adder)\n        assert len(new_dfs) == 3, \"Pipeline did not return all outputs\"\n        correct_df = pd.DataFrame(arr) * -30\n        correct_df[\"new_col\"] = correct_df.iloc[:, -1]\n        df_equals(correct_df, new_dfs[0])\n        correct_df = correct_df.drop(columns=[\"new_col\"])\n        correct_df = correct_df.rename(columns={i: f\"col {i}\" for i in range(1000)})\n        correct_df[\"new_col\"] = correct_df.iloc[:, -1]\n        df_equals(correct_df, new_dfs[1])\n        correct_df = correct_df.drop(columns=[\"new_col\"])\n        correct_df += 30\n        correct_df[\"new_col\"] = correct_df.iloc[:, -1]\n        df_equals(correct_df, new_dfs[2])\n\n    def test_postprocessing_with_output_id(self):\n        \"\"\"Check that the `postprocessor` argument is correctly handled when `output_id` is specified.\"\"\"\n\n        def new_col_adder(df):\n            df[\"new_col\"] = df.iloc[:, -1]\n            return df\n\n        arr = np.random.randint(0, 1000, (1000, 1000))\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n            is_output=True,\n            output_id=21,\n        )\n        pipeline.add_query(lambda df: df + 30, is_output=True, output_id=22)\n        new_dfs = pipeline.compute_batch(postprocessor=new_col_adder)\n        assert len(new_dfs) == 3, \"Pipeline did not return all outputs\"\n\n    def test_postprocessing_with_output_id_passed(self):\n        \"\"\"Check that the `postprocessor` argument is correctly passed `output_id` when `pass_output_id` is `True`.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n\n        def new_col_adder(df, o_id):\n            df[\"new_col\"] = o_id\n            return df\n\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n            is_output=True,\n            output_id=21,\n        )\n        pipeline.add_query(lambda df: df + 30, is_output=True, output_id=22)\n        new_dfs = pipeline.compute_batch(\n            postprocessor=new_col_adder, pass_output_id=True\n        )\n        correct_df = pd.DataFrame(arr) * -30\n        correct_df[\"new_col\"] = 20\n        df_equals(correct_df, new_dfs[20])\n        correct_df = correct_df.drop(columns=[\"new_col\"])\n        correct_df = correct_df.rename(columns={i: f\"col {i}\" for i in range(1000)})\n        correct_df[\"new_col\"] = 21\n        df_equals(correct_df, new_dfs[21])\n        correct_df = correct_df.drop(columns=[\"new_col\"])\n        correct_df += 30\n        correct_df[\"new_col\"] = 22\n        df_equals(correct_df, new_dfs[22])\n\n    def test_postprocessing_with_partition_id(self):\n        \"\"\"Check that the postprocessing is correctly handled when `partition_id` is passed.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n\n        def new_col_adder(df, partition_id):\n            df[\"new_col\"] = partition_id\n            return df\n\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n            is_output=True,\n            output_id=21,\n        )\n        new_dfs = pipeline.compute_batch(\n            postprocessor=new_col_adder, pass_partition_id=True\n        )\n        correct_df = pd.DataFrame(arr) * -30\n        correct_modin_frame = correct_df._query_compiler._modin_frame\n        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(\n            correct_modin_frame._partitions\n        )\n        partitions = [\n            partition.add_to_apply_calls(new_col_adder, i)\n            for i, partition in enumerate(partitions)\n        ]\n        [partition.drain_call_queue() for partition in partitions]\n        partitions = [partition.list_of_blocks for partition in partitions]\n        correct_df = from_partitions(partitions, axis=None)\n        df_equals(correct_df, new_dfs[20])\n        correct_df = correct_df.drop(columns=[\"new_col\"])\n        correct_df = pd.DataFrame(\n            correct_df.rename(columns={i: f\"col {i}\" for i in range(1000)})._to_pandas()\n        )\n        correct_modin_frame = correct_df._query_compiler._modin_frame\n        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(\n            correct_modin_frame._partitions\n        )\n        partitions = [\n            partition.add_to_apply_calls(new_col_adder, i)\n            for i, partition in enumerate(partitions)\n        ]\n        [partition.drain_call_queue() for partition in partitions]\n        partitions = [partition.list_of_blocks for partition in partitions]\n        correct_df = from_partitions(partitions, axis=None)\n        df_equals(correct_df, new_dfs[21])\n\n    def test_postprocessing_with_all_metadata(self):\n        \"\"\"Check that postprocessing is correctly handled when `partition_id` and `output_id` are passed.\"\"\"\n        arr = np.random.randint(0, 1000, (1000, 1000))\n\n        def new_col_adder(df, o_id, partition_id):\n            df[\"new_col\"] = f\"{o_id} {partition_id}\"\n            return df\n\n        df = pd.DataFrame(arr)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(lambda df: df * -30, is_output=True, output_id=20)\n        pipeline.add_query(\n            lambda df: df.rename(columns={i: f\"col {i}\" for i in range(1000)}),\n            is_output=True,\n            output_id=21,\n        )\n        new_dfs = pipeline.compute_batch(\n            postprocessor=new_col_adder, pass_partition_id=True, pass_output_id=True\n        )\n        correct_df = pd.DataFrame(arr) * -30\n        correct_modin_frame = correct_df._query_compiler._modin_frame\n        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(\n            correct_modin_frame._partitions\n        )\n        partitions = [\n            partition.add_to_apply_calls(new_col_adder, 20, i)\n            for i, partition in enumerate(partitions)\n        ]\n        [partition.drain_call_queue() for partition in partitions]\n        partitions = [partition.list_of_blocks for partition in partitions]\n        correct_df = from_partitions(partitions, axis=None)\n        df_equals(correct_df, new_dfs[20])\n        correct_df = correct_df.drop(columns=[\"new_col\"])\n        correct_df = pd.DataFrame(\n            correct_df.rename(columns={i: f\"col {i}\" for i in range(1000)})._to_pandas()\n        )\n        correct_modin_frame = correct_df._query_compiler._modin_frame\n        partitions = correct_modin_frame._partition_mgr_cls.row_partitions(\n            correct_modin_frame._partitions\n        )\n        partitions = [\n            partition.add_to_apply_calls(new_col_adder, 21, i)\n            for i, partition in enumerate(partitions)\n        ]\n        [partition.drain_call_queue() for partition in partitions]\n        partitions = [partition.list_of_blocks for partition in partitions]\n        correct_df = from_partitions(partitions, axis=None)\n        df_equals(correct_df, new_dfs[21])\n\n    def test_repartition_after(self):\n        \"\"\"Check that the `repartition_after` argument is appropriately handled.\"\"\"\n        df = pd.DataFrame([list(range(1000))])\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(\n            lambda df: pandas.concat([df] * 1000), repartition_after=True\n        )\n\n        def new_col_adder(df, partition_id):\n            df[\"new_col\"] = partition_id\n            return df\n\n        pipeline.add_query(new_col_adder, is_output=True, pass_partition_id=True)\n        new_dfs = pipeline.compute_batch()\n        # new_col_adder should set `new_col` to the partition ID\n        # throughout the dataframe. We expect there to be\n        # NPartitions.get() partitions by the time new_col_adder runs,\n        # because the previous step has repartitioned.\n        assert len(new_dfs[0][\"new_col\"].unique()) == NPartitions.get()\n        # Test that `repartition_after=True` raises an error when the result has more than\n        # one partition.\n        partition1 = RayWrapper.put(pandas.DataFrame([[0, 1, 2]]))\n        partition2 = RayWrapper.put(pandas.DataFrame([[3, 4, 5]]))\n        df = from_partitions([partition1, partition2], 0)\n        pipeline = PandasQueryPipeline(df, 0)\n        pipeline.add_query(lambda df: df, repartition_after=True, is_output=True)\n\n        with pytest.raises(\n            NotImplementedError,\n            match=\"Dynamic repartitioning is currently only supported for DataFrames with 1 partition.\",\n        ):\n            pipeline.compute_batch()\n\n    def test_fan_out(self):\n        \"\"\"Check that the fan_out argument is appropriately handled.\"\"\"\n        df = pd.DataFrame([[0, 1, 2]])\n\n        def new_col_adder(df, partition_id):\n            df[\"new_col\"] = partition_id\n            return df\n\n        def reducer(dfs):\n            new_cols = \"\".join([str(df[\"new_col\"].values[0]) for df in dfs])\n            dfs[0][\"new_col1\"] = new_cols\n            return dfs[0]\n\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(\n            new_col_adder,\n            fan_out=True,\n            reduce_fn=reducer,\n            pass_partition_id=True,\n            is_output=True,\n        )\n        new_df = pipeline.compute_batch()[0]\n        correct_df = pd.DataFrame([[0, 1, 2]])\n        correct_df[\"new_col\"] = 0\n        correct_df[\"new_col1\"] = \"\".join([str(i) for i in range(NPartitions.get())])\n        df_equals(correct_df, new_df)\n        # Test that `fan_out=True` raises an error when the input has more than\n        # one partition.\n        partition1 = RayWrapper.put(pandas.DataFrame([[0, 1, 2]]))\n        partition2 = RayWrapper.put(pandas.DataFrame([[3, 4, 5]]))\n        df = from_partitions([partition1, partition2], 0)\n        pipeline = PandasQueryPipeline(df)\n        pipeline.add_query(\n            new_col_adder,\n            fan_out=True,\n            reduce_fn=reducer,\n            pass_partition_id=True,\n            is_output=True,\n        )\n        with pytest.raises(\n            NotImplementedError,\n            match=\"Fan out is only supported with DataFrames with 1 partition.\",\n        ):\n            pipeline.compute_batch()[0]\n\n    def test_pipeline_complex(self):\n        \"\"\"Create a complex pipeline with both `fan_out`, `repartition_after` and postprocessing and ensure that it runs end to end correctly.\"\"\"\n        from os import remove\n        from os.path import exists\n        from time import sleep\n\n        df = pd.DataFrame([[0, 1, 2]])\n\n        def new_col_adder(df, partition_id):\n            sleep(60)\n            df[\"new_col\"] = partition_id\n            return df\n\n        def reducer(dfs):\n            new_cols = \"\".join([str(df[\"new_col\"].values[0]) for df in dfs])\n            dfs[0][\"new_col1\"] = new_cols\n            return dfs[0]\n\n        desired_num_partitions = 24\n        pipeline = PandasQueryPipeline(df, num_partitions=desired_num_partitions)\n        pipeline.add_query(\n            new_col_adder,\n            fan_out=True,\n            reduce_fn=reducer,\n            pass_partition_id=True,\n            is_output=True,\n            output_id=20,\n        )\n        pipeline.add_query(\n            lambda df: pandas.concat([df] * 1000),\n            repartition_after=True,\n        )\n\n        def to_csv(df, partition_id):\n            df = df.drop(columns=[\"new_col\"])\n            df.to_csv(f\"{partition_id}.csv\")\n            return df\n\n        pipeline.add_query(to_csv, is_output=True, output_id=21, pass_partition_id=True)\n\n        def post_proc(df, o_id, partition_id):\n            df[\"new_col_proc\"] = f\"{o_id} {partition_id}\"\n            return df\n\n        new_dfs = pipeline.compute_batch(\n            postprocessor=post_proc,\n            pass_partition_id=True,\n            pass_output_id=True,\n        )\n        correct_df = pd.DataFrame([[0, 1, 2]])\n        correct_df[\"new_col\"] = 0\n        correct_df[\"new_col1\"] = \"\".join(\n            [str(i) for i in range(desired_num_partitions)]\n        )\n        correct_df[\"new_col_proc\"] = \"20 0\"\n        df_equals(correct_df, new_dfs[20])\n        correct_df = pd.concat([correct_df] * 1000)\n        correct_df = correct_df.drop(columns=[\"new_col\"])\n        correct_df[\"new_col_proc\"] = \"21 0\"\n        new_length = len(correct_df.index) // desired_num_partitions\n        for i in range(desired_num_partitions):\n            if i == desired_num_partitions - 1:\n                correct_df.iloc[i * new_length :, -1] = f\"21 {i}\"\n            else:\n                correct_df.iloc[i * new_length : (i + 1) * new_length, -1] = f\"21 {i}\"\n        df_equals(correct_df, new_dfs[21])\n        correct_df = correct_df.drop(columns=[\"new_col_proc\"])\n        for i in range(desired_num_partitions):\n            if i == desired_num_partitions - 1:\n                correct_partition = correct_df.iloc[i * new_length :]\n            else:\n                correct_partition = correct_df.iloc[\n                    i * new_length : (i + 1) * new_length\n                ]\n            assert exists(\n                f\"{i}.csv\"\n            ), \"CSV File for Partition {i} does not exist, even though dataframe should have been repartitioned.\"\n            df_equals(\n                correct_partition,\n                pd.read_csv(f\"{i}.csv\", index_col=\"Unnamed: 0\").rename(\n                    columns={\"0\": 0, \"1\": 1, \"2\": 2}\n                ),\n            )\n            remove(f\"{i}.csv\")\n\n\n@pytest.mark.skipif(\n    Engine.get() == \"Ray\",\n    reason=\"Ray supports the Batch Pipeline API\",\n)\ndef test_pipeline_unsupported_engine():\n    \"\"\"Ensure that trying to use the Pipeline API with an unsupported Engine raises errors.\"\"\"\n    # Check that pipeline does not allow `Engine` to not be Ray.\n    df = pd.DataFrame([[1]])\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Batch Pipeline API is only implemented for `PandasOnRay` execution.\",\n    ):\n        PandasQueryPipeline(df)\n\n    eng = Engine.get()\n    Engine.put(\"Ray\")\n    # Check that even if Engine is Ray, if the df is not backed by Ray, the Pipeline does not allow initialization.\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Batch Pipeline API is only implemented for `PandasOnRay` execution.\",\n    ):\n        PandasQueryPipeline(df, 0)\n    df_on_ray_engine = pd.DataFrame([[1]])\n    pipeline = PandasQueryPipeline(df_on_ray_engine)\n    # Check that even if Engine is Ray, if the new df is not backed by Ray, the Pipeline does not allow an update.\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Batch Pipeline API is only implemented for `PandasOnRay` execution.\",\n    ):\n        pipeline.update_df(df)\n    Engine.put(eng)\n    # Check that pipeline does not allow an update when `Engine` is not Ray.\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Batch Pipeline API is only implemented for `PandasOnRay` execution.\",\n    ):\n        pipeline.update_df(df)\n"
  },
  {
    "path": "modin/tests/experimental/torch/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/experimental/torch/test_dataloader.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\nfrom __future__ import annotations\n\nfrom types import ModuleType\nfrom typing import Type\n\nimport numpy as np\nimport pandas\nimport pytest\nimport ray\nimport torch\nfrom torch.utils.data import RandomSampler, Sampler, SequentialSampler\n\nimport modin.pandas as pd\nfrom modin.experimental.torch.datasets import ModinDataLoader\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef ray_fix():\n    ray.init(num_cpus=1)\n    yield None\n    ray.shutdown()\n\n\ndef _load_test_dataframe(lib: ModuleType):\n    df = lib.read_csv(\n        \"https://raw.githubusercontent.com/ponder-org/ponder-datasets/main/USA_Housing.csv\"\n    )\n    return df\n\n\n@pytest.mark.parametrize(\"lib\", [pandas, pd])\n@pytest.mark.parametrize(\"sampler_cls\", [RandomSampler, SequentialSampler])\n@pytest.mark.parametrize(\"batch_size\", [16, 37])\ndef test_torch_dataloader(lib: ModuleType, sampler_cls: Type[Sampler], batch_size: int):\n    df = _load_test_dataframe(lib)\n    np.random.seed(42)\n    torch.manual_seed(42)\n    loader = ModinDataLoader(\n        df,\n        batch_size=batch_size,\n        features=[\n            \"AVG_AREA_INCOME\",\n            \"AVG_AREA_HOUSE_AGE\",\n            \"AVG_AREA_NUM_ROOMS\",\n            \"AVG_AREA_NUM_BEDROOMS\",\n            \"POPULATION\",\n            \"PRICE\",\n        ],\n        sampler=sampler_cls,\n    )\n\n    outputs = []\n    for batch in loader:\n        assert batch.shape[0] <= batch_size, batch.shape\n        assert batch.shape[1] == 6, batch.shape\n\n        outputs.append(batch)\n\n    return outputs\n\n\n@pytest.mark.parametrize(\"sampler_cls\", [RandomSampler, SequentialSampler])\n@pytest.mark.parametrize(\"batch_size\", [16, 37])\ndef test_compare_dataloaders(sampler_cls: Type[Sampler], batch_size: int):\n    by_modin = test_torch_dataloader(pd, sampler_cls, batch_size=batch_size)\n    by_pandas = test_torch_dataloader(pandas, sampler_cls, batch_size=batch_size)\n\n    assert len(by_modin) == len(by_pandas)\n    for tensor_by_modin, tensor_by_pandas in zip(by_modin, by_pandas):\n        assert np.allclose(tensor_by_modin, tensor_by_pandas), (\n            tensor_by_modin - tensor_by_pandas\n        )\n"
  },
  {
    "path": "modin/tests/experimental/xgboost/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/experimental/xgboost/test_default.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nimport pytest\n\nimport modin.experimental.xgboost as xgb\nimport modin.pandas as pd\nfrom modin.config import Engine\n\n\n@pytest.mark.skipif(\n    Engine.get() == \"Ray\",\n    reason=\"This test doesn't make sense on Ray engine.\",\n)\n@pytest.mark.skipif(\n    Engine.get() == \"Python\",\n    reason=\"This test doesn't make sense on non-distributed engine (see issue #2938).\",\n)\ndef test_engine():\n    try:\n        xgb.train({}, xgb.DMatrix(pd.DataFrame([0]), pd.DataFrame([0])))\n    except ValueError:\n        pass\n"
  },
  {
    "path": "modin/tests/experimental/xgboost/test_dmatrix.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pytest\nimport xgboost as xgb\nfrom sklearn.datasets import load_breast_cancer\nfrom sklearn.metrics import accuracy_score\n\nimport modin.experimental.xgboost as mxgb\nimport modin.pandas as pd\nfrom modin.config import Engine\nfrom modin.utils import try_cast_to_pandas\n\nif Engine.get() != \"Ray\":\n    pytest.skip(\n        \"Modin' xgboost extension works only with Ray engine.\",\n        allow_module_level=True,\n    )\n\n\nrng = np.random.RandomState(1994)\n\n\ndef check_dmatrix(data, label=None, **kwargs):\n    modin_data = pd.DataFrame(data)\n    modin_label = label if label is None else pd.Series(label)\n    try:\n        dm = xgb.DMatrix(data, label=label, **kwargs)\n    except Exception as xgb_exception:\n        with pytest.raises(Exception) as mxgb_exception:\n            mxgb.DMatrix(modin_data, label=modin_label, **kwargs)\n        # Thrown exceptions are `XGBoostError`, which is a descendant of `ValueError`, and `ValueError`\n        # for XGBoost and Modin, respectively,  so we intentionally use `xgb_exception`\n        # as a first parameter of `isinstance` to pass the assertion\n        assert isinstance(\n            xgb_exception, type(mxgb_exception.value)\n        ), \"Got Modin Exception type {}, but xgboost Exception type {} was expected\".format(\n            type(mxgb_exception.value), type(xgb_exception)\n        )\n    else:\n        md_dm = mxgb.DMatrix(modin_data, label=modin_label, **kwargs)\n        assert md_dm.num_row() == dm.num_row()\n        assert md_dm.num_col() == dm.num_col()\n        assert md_dm.feature_names == dm.feature_names\n        assert md_dm.feature_types == dm.feature_types\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        np.random.randn(5, 5),\n        np.array([[1, 2], [3, 4]]),\n        np.array([[\"a\", \"b\"], [\"c\", \"d\"]]),\n        [[1, 2], [3, 4]],\n        [[\"a\", \"b\"], [\"c\", \"d\"]],\n    ],\n)\n@pytest.mark.parametrize(\n    \"feature_names\",\n    [\n        list(\"abcdef\"),\n        [\"a\", \"b\", \"c\", \"d\", \"d\"],\n        [\"a\", \"b\", \"c\", \"d\", \"e<1\"],\n        list(\"abcde\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"feature_types\",\n    [None, \"q\", list(\"qiqiq\")],\n)\ndef test_dmatrix_feature_names_and_feature_types(data, feature_names, feature_types):\n    check_dmatrix(data, feature_names=feature_names, feature_types=feature_types)\n\n\n@pytest.mark.skipif(\n    Engine.get() != \"Ray\",\n    reason=\"implemented only for Ray engine.\",\n)\ndef test_feature_names():\n    dataset = load_breast_cancer()\n    X = dataset.data\n    y = dataset.target\n    feature_names = [f\"feat{i}\" for i in range(X.shape[1])]\n\n    check_dmatrix(\n        X,\n        y,\n        feature_names=feature_names,\n    )\n\n    dmatrix = xgb.DMatrix(X, label=y, feature_names=feature_names)\n    md_dmatrix = mxgb.DMatrix(\n        pd.DataFrame(X), label=pd.Series(y), feature_names=feature_names\n    )\n\n    params = {\n        \"objective\": \"binary:logistic\",\n        \"eval_metric\": \"mlogloss\",\n    }\n\n    booster = xgb.train(params, dmatrix, num_boost_round=10)\n    md_booster = mxgb.train(params, md_dmatrix, num_boost_round=10)\n\n    predictions = booster.predict(dmatrix)\n    modin_predictions = md_booster.predict(md_dmatrix)\n\n    preds = pandas.DataFrame(predictions).apply(np.round, axis=0)\n    modin_preds = modin_predictions.apply(np.round, axis=0)\n\n    accuracy = accuracy_score(y, preds)\n    md_accuracy = accuracy_score(y, modin_preds)\n\n    np.testing.assert_allclose(accuracy, md_accuracy, atol=0.005, rtol=0.002)\n\n    # Different feature_names (default) must raise error in this case\n    dm = xgb.DMatrix(X)\n    md_dm = mxgb.DMatrix(pd.DataFrame(X))\n    with pytest.raises(ValueError):\n        booster.predict(dm)\n    with pytest.raises(ValueError):\n        try_cast_to_pandas(md_booster.predict(md_dm))  # force materialization\n\n\ndef test_feature_weights():\n    n_rows = 10\n    n_cols = 50\n    fw = rng.uniform(size=n_cols)\n    X = rng.randn(n_rows, n_cols)\n    dm = xgb.DMatrix(X)\n    md_dm = mxgb.DMatrix(pd.DataFrame(X))\n    dm.set_info(feature_weights=fw)\n    md_dm.set_info(feature_weights=fw)\n    np.testing.assert_allclose(\n        dm.get_float_info(\"feature_weights\"), md_dm.get_float_info(\"feature_weights\")\n    )\n    # Handle empty\n    dm.set_info(feature_weights=np.empty((0,)))\n    md_dm.set_info(feature_weights=np.empty((0,)))\n\n    assert (\n        dm.get_float_info(\"feature_weights\").shape[0]\n        == md_dm.get_float_info(\"feature_weights\").shape[0]\n        == 0\n    )\n"
  },
  {
    "path": "modin/tests/experimental/xgboost/test_xgboost.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nimport multiprocessing as mp\n\nimport numpy as np\nimport pytest\nimport ray\nimport xgboost\nfrom sklearn.datasets import (\n    load_breast_cancer,\n    load_diabetes,\n    load_digits,\n    load_iris,\n    load_wine,\n)\nfrom sklearn.metrics import accuracy_score, mean_squared_error\n\nimport modin\nimport modin.experimental.xgboost as xgb\nimport modin.pandas as pd\nfrom modin.config import Engine\nfrom modin.experimental.sklearn.model_selection.train_test_split import train_test_split\n\nif Engine.get() != \"Ray\":\n    pytest.skip(\"Implemented only for Ray engine.\", allow_module_level=True)\n\nray.init(log_to_driver=False)\n\nnum_cpus = mp.cpu_count()\n\n\n@pytest.mark.parametrize(\n    \"modin_type_y\",\n    [pd.DataFrame, pd.Series],\n)\n@pytest.mark.parametrize(\n    \"num_actors\",\n    [1, num_cpus, None, modin.config.NPartitions.get() + 1],\n)\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        (\n            load_breast_cancer(),\n            {\"objective\": \"binary:logistic\", \"eval_metric\": [\"logloss\", \"error\"]},\n        ),\n    ],\n    ids=[\"load_breast_cancer\"],\n)\ndef test_xgb_with_binary_classification_datasets(data, num_actors, modin_type_y):\n    dataset, param = data\n    num_round = 10\n\n    X = dataset.data\n    y = dataset.target\n    xgb_dmatrix = xgboost.DMatrix(X, label=y)\n\n    modin_X = pd.DataFrame(X)\n    modin_y = modin_type_y(y)\n    mxgb_dmatrix = xgb.DMatrix(modin_X, label=modin_y)\n\n    evals_result_xgb = {}\n    evals_result_mxgb = {}\n    verbose_eval = False\n    bst = xgboost.train(\n        param,\n        xgb_dmatrix,\n        num_round,\n        evals_result=evals_result_xgb,\n        evals=[(xgb_dmatrix, \"train\")],\n        verbose_eval=verbose_eval,\n    )\n    modin_bst = xgb.train(\n        param,\n        mxgb_dmatrix,\n        num_round,\n        evals_result=evals_result_mxgb,\n        evals=[(mxgb_dmatrix, \"train\")],\n        num_actors=num_actors,\n        verbose_eval=verbose_eval,\n    )\n\n    for par in param[\"eval_metric\"]:\n        assert len(evals_result_xgb[\"train\"][par]) == len(\n            evals_result_xgb[\"train\"][par]\n        )\n        for i in range(len(evals_result_xgb[\"train\"][par])):\n            np.testing.assert_allclose(\n                evals_result_xgb[\"train\"][par][i],\n                evals_result_mxgb[\"train\"][par][i],\n                atol=0.011,\n            )\n\n    predictions = bst.predict(xgb_dmatrix)\n    modin_predictions = modin_bst.predict(mxgb_dmatrix)\n\n    preds = pd.DataFrame(predictions).apply(round)\n    modin_preds = modin_predictions.apply(round)\n\n    val = accuracy_score(y, preds)\n    modin_val = accuracy_score(modin_y, modin_preds)\n\n    np.testing.assert_allclose(val, modin_val, atol=0.002, rtol=0.002)\n\n\n@pytest.mark.parametrize(\n    \"modin_type_y\",\n    [pd.DataFrame, pd.Series],\n)\n@pytest.mark.parametrize(\n    \"num_actors\",\n    [1, num_cpus, None, modin.config.NPartitions.get() + 1],\n)\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        (\n            load_iris(),\n            {\"num_class\": 3},\n        ),\n        (\n            load_digits(),\n            {\"num_class\": 10},\n        ),\n        (\n            load_wine(),\n            {\"num_class\": 3},\n        ),\n    ],\n    ids=[\"load_iris\", \"load_digits\", \"load_wine\"],\n)\ndef test_xgb_with_multiclass_classification_datasets(data, num_actors, modin_type_y):\n    dataset, param_ = data\n    num_round = 10\n    part_param = {\"objective\": \"multi:softprob\", \"eval_metric\": \"mlogloss\"}\n    param = {**param_, **part_param}\n\n    X = dataset.data\n    y = dataset.target\n    xgb_dmatrix = xgboost.DMatrix(X, label=y)\n\n    modin_X = pd.DataFrame(X)\n    modin_y = modin_type_y(y)\n    mxgb_dmatrix = xgb.DMatrix(modin_X, label=modin_y)\n\n    evals_result_xgb = {}\n    evals_result_mxgb = {}\n    verbose_eval = False\n    bst = xgboost.train(\n        param,\n        xgb_dmatrix,\n        num_round,\n        evals_result=evals_result_xgb,\n        evals=[(xgb_dmatrix, \"train\")],\n        verbose_eval=verbose_eval,\n    )\n    modin_bst = xgb.train(\n        param,\n        mxgb_dmatrix,\n        num_round,\n        evals_result=evals_result_mxgb,\n        evals=[(mxgb_dmatrix, \"train\")],\n        num_actors=num_actors,\n        verbose_eval=verbose_eval,\n    )\n\n    assert len(evals_result_xgb[\"train\"][\"mlogloss\"]) == len(\n        evals_result_mxgb[\"train\"][\"mlogloss\"]\n    )\n    for i in range(len(evals_result_xgb[\"train\"][\"mlogloss\"])):\n        np.testing.assert_allclose(\n            evals_result_xgb[\"train\"][\"mlogloss\"][i],\n            evals_result_mxgb[\"train\"][\"mlogloss\"][i],\n            atol=0.009,\n        )\n\n    predictions = bst.predict(xgb_dmatrix)\n    modin_predictions = modin_bst.predict(mxgb_dmatrix)\n\n    array_preds = np.asarray([np.argmax(line) for line in predictions])\n    modin_array_preds = np.asarray(\n        [np.argmax(line) for line in modin_predictions.to_numpy()]\n    )\n\n    val = accuracy_score(y, array_preds)\n    modin_val = accuracy_score(modin_y, modin_array_preds)\n\n    np.testing.assert_allclose(val, modin_val)\n\n\n@pytest.mark.parametrize(\n    \"modin_type_y\",\n    [pd.DataFrame, pd.Series],\n)\n@pytest.mark.parametrize(\n    \"num_actors\",\n    [1, num_cpus, None, modin.config.NPartitions.get() + 1],\n)\n@pytest.mark.parametrize(\n    \"data\",\n    [(load_diabetes(), {\"eta\": 0.01})],\n    ids=[\"load_diabetes\"],\n)\ndef test_xgb_with_regression_datasets(data, num_actors, modin_type_y):\n    dataset, param = data\n    num_round = 10\n\n    X_df = pd.DataFrame(dataset.data)\n    y_df = modin_type_y(dataset.target)\n    X_train, X_test = train_test_split(X_df)\n    y_train, y_test = train_test_split(y_df)\n\n    train_xgb_dmatrix = xgboost.DMatrix(X_train, label=y_train)\n    test_xgb_dmatrix = xgboost.DMatrix(X_test, label=y_test)\n\n    train_mxgb_dmatrix = xgb.DMatrix(X_train, label=y_train)\n    test_mxgb_dmatrix = xgb.DMatrix(X_test, label=y_test)\n\n    evals_result_xgb = {}\n    evals_result_mxgb = {}\n    verbose_eval = False\n    bst = xgboost.train(\n        param,\n        train_xgb_dmatrix,\n        num_round,\n        evals_result=evals_result_xgb,\n        evals=[(train_xgb_dmatrix, \"train\"), (test_xgb_dmatrix, \"test\")],\n        verbose_eval=verbose_eval,\n    )\n    modin_bst = xgb.train(\n        param,\n        train_mxgb_dmatrix,\n        num_round,\n        evals_result=evals_result_mxgb,\n        evals=[(train_mxgb_dmatrix, \"train\"), (test_mxgb_dmatrix, \"test\")],\n        num_actors=num_actors,\n        verbose_eval=verbose_eval,\n    )\n\n    for param in [\"train\", \"test\"]:\n        assert len(evals_result_xgb[param][\"rmse\"]) == len(\n            evals_result_mxgb[param][\"rmse\"]\n        )\n        for i in range(len(evals_result_xgb[param][\"rmse\"])):\n            np.testing.assert_allclose(\n                evals_result_xgb[param][\"rmse\"][i],\n                evals_result_mxgb[param][\"rmse\"][i],\n                rtol=0.0007,\n            )\n\n    predictions = bst.predict(train_xgb_dmatrix)\n    modin_predictions = modin_bst.predict(train_mxgb_dmatrix)\n\n    val = mean_squared_error(y_train, predictions)\n    modin_val = mean_squared_error(y_train, modin_predictions)\n\n    np.testing.assert_allclose(val, modin_val, rtol=1.25e-05)\n\n\ndef test_invalid_input():\n    list_df = [[1, 2.0, True], [2, 3.0, False]]\n    with pytest.raises(AssertionError):\n        # Check that DMatrix uses only DataFrame\n        xgb.DMatrix(list_df, label=pd.Series([1, 2]))\n\n    param = {}\n    num_round = 2\n    with pytest.raises(AssertionError):\n        # Check that train uses only DMatrix\n        xgb.train(param, list_df, num_round)\n\n    df = pd.DataFrame([[1, 2.0, True], [2, 3.0, False]], columns=[\"a\", \"b\", \"c\"])\n    modin_dtrain = xgb.DMatrix(df, label=pd.Series([1, 2]))\n\n    modin_bst = xgb.train(param, modin_dtrain, num_round)\n\n    dt = [[1, 2.0, 3.3], [2, 3.0, 4.4]]\n\n    with pytest.raises(AssertionError):\n        # Check that predict uses only DMatrix\n        modin_bst.predict(dt)\n"
  },
  {
    "path": "modin/tests/interchange/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/interchange/dataframe_protocol/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/interchange/dataframe_protocol/base/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/interchange/dataframe_protocol/base/test_sanity.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Basic sanity checks for the DataFrame exchange protocol.\"\"\"\n\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.tests.pandas.utils import default_to_pandas_ignore_string\n\n\ndef test_sanity():\n    \"\"\"Test that the DataFrame protocol module is valid and could be imported correctly.\"\"\"\n    from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (  # noqa\n        ProtocolDataframe,\n    )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_basic_io(get_unique_base_execution):\n    \"\"\"Test that the protocol IO functions actually reach their implementation with no errors.\"\"\"\n\n    class TestPassed(BaseException):\n        pass\n\n    def dummy_io_method(*args, **kwargs):\n        \"\"\"Dummy method emulating that the code path reached the exchange protocol implementation.\"\"\"\n        raise TestPassed\n\n    query_compiler_cls = get_unique_base_execution\n    query_compiler_cls.from_interchange_dataframe = dummy_io_method\n    query_compiler_cls.to_interchange_dataframe = dummy_io_method\n\n    from modin.pandas.io import from_dataframe\n\n    with pytest.raises(TestPassed):\n        from_dataframe(None)\n\n    with pytest.raises(TestPassed):\n        pd.DataFrame([[1]]).__dataframe__()\n"
  },
  {
    "path": "modin/tests/interchange/dataframe_protocol/base/test_utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Tests for common utility functions of the DataFrame exchange protocol.\"\"\"\n\nimport numpy as np\nimport pandas\nimport pytest\n\nfrom modin.core.dataframe.base.interchange.dataframe_protocol.utils import (\n    pandas_dtype_to_arrow_c,\n)\n\n\n# TODO: use ArrowSchema to get reference C-string.\n# At the time, there is no way to access ArrowSchema holding a type format string from python.\n# The only way to 'touch' it is to export the structure to a C-pointer:\n# https://github.com/apache/arrow/blob/5680d209fd870f99134e2d7299b47acd90fabb8e/python/pyarrow/types.pxi#L230-L239\n@pytest.mark.parametrize(\n    \"pandas_dtype, c_string\",\n    [\n        (np.dtype(\"bool\"), \"b\"),\n        (np.dtype(\"int8\"), \"c\"),\n        (np.dtype(\"uint8\"), \"C\"),\n        (np.dtype(\"int16\"), \"s\"),\n        (np.dtype(\"uint16\"), \"S\"),\n        (np.dtype(\"int32\"), \"i\"),\n        (np.dtype(\"uint32\"), \"I\"),\n        (np.dtype(\"int64\"), \"l\"),\n        (np.dtype(\"uint64\"), \"L\"),\n        (np.dtype(\"float16\"), \"e\"),\n        (np.dtype(\"float32\"), \"f\"),\n        (np.dtype(\"float64\"), \"g\"),\n        (pandas.Series([\"a\"]).dtype, \"u\"),\n        (\n            pandas.Series([0]).astype(\"datetime64[ns]\").dtype,\n            \"tsn:\",\n        ),\n    ],\n)\ndef test_dtype_to_arrow_c(pandas_dtype, c_string):  # noqa PR01\n    \"\"\"Test ``pandas_dtype_to_arrow_c`` utility function.\"\"\"\n    assert pandas_dtype_to_arrow_c(pandas_dtype) == c_string\n"
  },
  {
    "path": "modin/tests/interchange/dataframe_protocol/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/interchange/dataframe_protocol/pandas/test_protocol.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Dataframe exchange protocol tests that are specific for pandas storage format implementation.\"\"\"\n\nimport pandas\n\nimport modin.pandas as pd\nfrom modin.pandas.io import from_dataframe\nfrom modin.tests.pandas.utils import df_equals, test_data\nfrom modin.tests.test_utils import (\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\n\n\ndef eval_df_protocol(modin_df_producer):\n    internal_modin_df_producer = modin_df_producer.__dataframe__()\n    # Our configuration in pytest.ini requires that we explicitly catch all\n    # instances of defaulting to pandas, this one raises a warning on `.from_dataframe`\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df_producer)\n    ):\n        modin_df_consumer = from_dataframe(modin_df_producer)\n        internal_modin_df_consumer = from_dataframe(internal_modin_df_producer)\n\n    # TODO: the following assertions verify that `from_dataframe` doesn't return\n    # the same object untouched due to optimization branching, it actually should\n    # do so but the logic is not implemented yet, so the assertions are passing\n    # for now. It's required to replace the producer's type with a different one\n    # to consumer when we have some other implementation of the protocol as the\n    # assertions may start failing shortly.\n    assert modin_df_producer is not modin_df_consumer\n    assert internal_modin_df_producer is not internal_modin_df_consumer\n    assert (\n        modin_df_producer._query_compiler._modin_frame\n        is not modin_df_consumer._query_compiler._modin_frame\n    )\n\n    df_equals(modin_df_producer, modin_df_consumer)\n    df_equals(modin_df_producer, internal_modin_df_consumer)\n\n\ndef test_simple_import():\n    modin_df = pd.DataFrame(test_data[\"int_data\"])\n    eval_df_protocol(modin_df)\n\n\ndef test_categorical_from_dataframe():\n    modin_df = pd.DataFrame(\n        {\"foo\": pd.Series([\"0\", \"1\", \"2\", \"3\", \"0\", \"3\", \"2\", \"3\"], dtype=\"category\")}\n    )\n    eval_df_protocol(modin_df)\n\n\ndef test_from_dataframe_with_empty_dataframe():\n    modin_df = pd.DataFrame({\"foo_col\": pd.Series([], dtype=\"int64\")})\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df)\n    ):\n        eval_df_protocol(modin_df)\n\n\ndef test_interchange_with_pandas_string():\n    modin_df = pd.DataFrame({\"fips\": [\"01001\"]})\n    pandas_df = pandas.api.interchange.from_dataframe(modin_df.__dataframe__())\n    df_equals(modin_df, pandas_df)\n\n\ndef test_interchange_with_datetime():\n    date_range = pd.date_range(\n        start=pd.Timestamp(\"2024-01-01\", unit=\"ns\"),\n        end=pd.Timestamp(\"2024-03-01\", unit=\"ns\"),\n        freq=\"D\",\n    )\n    modin_df = pd.DataFrame(\n        {\n            \"datetime_s\": date_range.astype(\"datetime64[s]\"),\n            \"datetime_ns\": date_range.astype(\"datetime64[ns]\"),\n        }\n    )\n    eval_df_protocol(modin_df)\n"
  },
  {
    "path": "modin/tests/interchange/dataframe_protocol/test_general.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Dataframe exchange protocol tests that are common for every implementation.\"\"\"\n\nimport ctypes\nimport math\n\nimport pytest\n\nimport modin.pandas as pd\n\n\n@pytest.fixture\ndef df_from_dict():\n    def maker(dct, is_categorical=False):\n        df = pd.DataFrame(dct, dtype=(\"category\" if is_categorical else None))\n        return df\n\n    return maker\n\n\n@pytest.mark.parametrize(\n    \"test_data\",\n    [\n        {\"a\": [\"foo\", \"bar\"], \"b\": [\"baz\", \"qux\"]},\n        {\"a\": [1.5, 2.5, 3.5], \"b\": [9.2, 10.5, 11.8]},\n        {\"A\": [1, 2, 3, 4], \"B\": [1, 2, 3, 4]},\n    ],\n    ids=[\"str_data\", \"float_data\", \"int_data\"],\n)\ndef test_only_one_dtype(test_data, df_from_dict):\n    columns = list(test_data.keys())\n    df = df_from_dict(test_data)\n    dfX = df.__dataframe__()\n\n    column_size = len(test_data[columns[0]])\n    for column in columns:\n        assert dfX.get_column_by_name(column).null_count == 0\n        assert dfX.get_column_by_name(column).size() == column_size\n        assert dfX.get_column_by_name(column).offset == 0\n\n\ndef test_float_int(df_from_dict):\n    df = df_from_dict(\n        {\n            \"a\": [1, 2, 3],\n            \"b\": [3, 4, 5],\n            \"c\": [1.5, 2.5, 3.5],\n            \"d\": [9, 10, 11],\n            \"e\": [True, False, True],\n            \"f\": [\"a\", \"\", \"c\"],\n        }\n    )\n    dfX = df.__dataframe__()\n    columns = {\"a\": 0, \"b\": 0, \"c\": 2, \"d\": 0, \"e\": 20, \"f\": 21}\n\n    for column, kind in columns.items():\n        colX = dfX.get_column_by_name(column)\n        assert colX.null_count == 0\n        assert colX.size() == 3\n        assert colX.offset == 0\n\n        assert colX.dtype[0] == kind\n\n    assert dfX.get_column_by_name(\"c\").dtype[1] == 64\n\n\ndef test_na_float(df_from_dict):\n    df = df_from_dict({\"a\": [1.0, math.nan, 2.0]})\n    dfX = df.__dataframe__()\n    colX = dfX.get_column_by_name(\"a\")\n    assert colX.null_count == 1\n\n\ndef test_null_count(df_from_dict):\n    df = df_from_dict({\"foo\": [42]})\n    dfX = df.__dataframe__()\n    colX = dfX.get_column_by_name(\"foo\")\n    null_count = colX.null_count\n    assert null_count == 0 and type(null_count) is int\n\n\ndef test_noncategorical(df_from_dict):\n    df = df_from_dict({\"a\": [1, 2, 3]})\n    dfX = df.__dataframe__()\n    colX = dfX.get_column_by_name(\"a\")\n    with pytest.raises(TypeError):\n        colX.describe_categorical\n\n\ndef test_categorical(df_from_dict):\n    df = df_from_dict(\n        {\"weekday\": [\"Mon\", \"Tue\", \"Mon\", \"Wed\", \"Mon\", \"Thu\", \"Fri\", \"Sat\", \"Sun\"]},\n        is_categorical=True,\n    )\n\n    colX = df.__dataframe__().get_column_by_name(\"weekday\")\n    is_ordered, is_dictionary, _ = colX.describe_categorical.values()\n    assert isinstance(is_ordered, bool)\n    assert isinstance(is_dictionary, bool)\n\n\ndef test_dataframe(df_from_dict):\n    df = df_from_dict(\n        {\"x\": [True, True, False], \"y\": [1, 2, 0], \"z\": [9.2, 10.5, 11.8]}\n    )\n    dfX = df.__dataframe__()\n\n    assert dfX.num_columns() == 3\n    assert dfX.num_rows() == 3\n    assert dfX.num_chunks() == 1\n    assert list(dfX.column_names()) == [\"x\", \"y\", \"z\"]\n    assert list(dfX.select_columns((0, 2)).column_names()) == list(\n        dfX.select_columns_by_name((\"x\", \"z\")).column_names()\n    )\n\n\n@pytest.mark.parametrize([\"size\", \"n_chunks\"], [(10, 3), (12, 3), (12, 5)])\ndef test_df_get_chunks(size, n_chunks, df_from_dict):\n    df = df_from_dict({\"x\": list(range(size))})\n    dfX = df.__dataframe__()\n    chunks = list(dfX.get_chunks(n_chunks))\n    assert len(chunks) == n_chunks\n    assert sum(chunk.num_rows() for chunk in chunks) == size\n\n\n@pytest.mark.parametrize([\"size\", \"n_chunks\"], [(10, 3), (12, 3), (12, 5)])\ndef test_column_get_chunks(size, n_chunks, df_from_dict):\n    df = df_from_dict({\"x\": list(range(size))})\n    dfX = df.__dataframe__()\n    chunks = list(dfX.get_column(0).get_chunks(n_chunks))\n    assert len(chunks) == n_chunks\n    assert sum(chunk.size() for chunk in chunks) == size\n\n\ndef test_get_columns(df_from_dict):\n    df = df_from_dict({\"a\": [0, 1], \"b\": [2.5, 3.5]})\n    dfX = df.__dataframe__()\n    for colX in dfX.get_columns():\n        assert colX.size() == 2\n        assert colX.num_chunks() == 1\n    assert dfX.get_column(0).dtype[0] == 0\n    assert dfX.get_column(1).dtype[0] == 2\n\n\ndef test_buffer(df_from_dict):\n    arr = [0, 1, -1]\n    df = df_from_dict({\"a\": arr})\n    dfX = df.__dataframe__()\n    colX = dfX.get_column(0)\n    bufX = colX.get_buffers()\n\n    dataBuf, dataDtype = bufX[\"data\"]\n    assert dataBuf.bufsize > 0\n    assert dataBuf.ptr != 0\n    device, _ = dataBuf.__dlpack_device__()\n\n    assert dataDtype[0] == 0\n\n    if device == 1:  # CPU-only as we're going to directly read memory here\n        bitwidth = dataDtype[1]\n        ctype = {\n            8: ctypes.c_int8,\n            16: ctypes.c_int16,\n            32: ctypes.c_int32,\n            64: ctypes.c_int64,\n        }[bitwidth]\n\n        for idx, truth in enumerate(arr):\n            val = ctype.from_address(dataBuf.ptr + idx * (bitwidth // 8)).value\n            assert val == truth, f\"Buffer at index {idx} mismatch\"\n"
  },
  {
    "path": "modin/tests/numpy/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/numpy/test_array.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport warnings\n\nimport numpy\nimport pytest\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\n\n@pytest.fixture\ndef change_numpy_print_threshold():\n    prev_threshold = numpy.get_printoptions()[\"threshold\"]\n    numpy.set_printoptions(threshold=50)\n    yield prev_threshold\n    numpy.set_printoptions(threshold=prev_threshold)\n\n\n@pytest.mark.parametrize(\n    \"size\",\n    [\n        100,\n        (2, 100),\n        (100, 2),\n        (1, 100),\n        (100, 1),\n        (100, 100),\n        (6, 100),\n        (100, 6),\n        (100, 7),\n        (7, 100),\n    ],\n)\ndef test_repr(size, change_numpy_print_threshold):\n    numpy_arr = numpy.random.randint(-100, 100, size=size)\n    modin_arr = np.array(numpy_arr)\n    assert repr(modin_arr) == repr(numpy_arr)\n\n\n@pytest.mark.parametrize(\"size\", [100, (2, 100), (100, 2), (1, 100), (100, 1)])\ndef test_shape(size):\n    numpy_arr = numpy.random.randint(-100, 100, size=size)\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.shape == numpy_arr.shape\n\n\ndef test_dtype():\n    numpy_arr = numpy.array([[1, \"2\"], [3, \"4\"]])\n    modin_arr = np.array([[1, \"2\"], [3, \"4\"]])\n    assert modin_arr.dtype == numpy_arr.dtype\n    modin_arr = modin_arr == modin_arr.T\n    numpy_arr = numpy_arr == numpy_arr.T\n    assert modin_arr.dtype == numpy_arr.dtype\n\n\ndef test_conversion():\n    import modin.pandas as pd\n    from modin.numpy.utils import try_convert_from_interoperable_type\n\n    df = pd.DataFrame(numpy.random.randint(0, 100, size=(100, 100)))\n    series = df.iloc[0]\n    df_converted = try_convert_from_interoperable_type(df)\n    assert isinstance(df_converted, np.array)\n    series_converted = try_convert_from_interoperable_type(series)\n    assert isinstance(series_converted, np.array)\n    assert_scalar_or_array_equal(df_converted, df)\n    assert_scalar_or_array_equal(series_converted, series)\n    pandas_df = df._to_pandas()\n    pandas_series = series._to_pandas()\n    pandas_converted = try_convert_from_interoperable_type(pandas_df)\n    assert isinstance(pandas_converted, type(pandas_df))\n    assert pandas_converted.equals(pandas_df)\n    pandas_converted = try_convert_from_interoperable_type(pandas_series)\n    assert isinstance(pandas_converted, type(pandas_series))\n    assert pandas_converted.equals(pandas_series)\n\n\ndef test_to_df():\n    import pandas\n\n    import modin.pandas as pd\n    from modin.tests.pandas.utils import df_equals\n\n    modin_df = pd.DataFrame(np.array([1, 2, 3]))\n    pandas_df = pandas.DataFrame(numpy.array([1, 2, 3]))\n    df_equals(pandas_df, modin_df)\n    modin_df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]))\n    pandas_df = pandas.DataFrame(numpy.array([[1, 2, 3], [4, 5, 6]]))\n    df_equals(pandas_df, modin_df)\n    for kw in [{}, {\"dtype\": str}]:\n        modin_df, pandas_df = [\n            lib[0].DataFrame(\n                lib[1].array([[1, 2, 3], [4, 5, 6]]),\n                columns=[\"col 0\", \"col 1\", \"col 2\"],\n                index=pd.Index([4, 6]),\n                **kw\n            )\n            for lib in ((pd, np), (pandas, numpy))\n        ]\n        df_equals(pandas_df, modin_df)\n    df_equals(pandas_df, modin_df)\n\n\ndef test_to_series():\n    import pandas\n\n    import modin.pandas as pd\n    from modin.tests.pandas.utils import df_equals\n\n    with pytest.raises(ValueError, match=\"Data must be 1-dimensional\"):\n        pd.Series(np.array([[1, 2, 3], [4, 5, 6]]))\n    modin_series = pd.Series(np.array([1, 2, 3]), index=pd.Index([-1, -2, -3]))\n    pandas_series = pandas.Series(\n        numpy.array([1, 2, 3]), index=pandas.Index([-1, -2, -3])\n    )\n    df_equals(modin_series, pandas_series)\n    modin_series = pd.Series(\n        np.array([1, 2, 3]), index=pd.Index([-1, -2, -3]), dtype=str\n    )\n    pandas_series = pandas.Series(\n        numpy.array([1, 2, 3]), index=pandas.Index([-1, -2, -3]), dtype=str\n    )\n    df_equals(modin_series, pandas_series)\n\n\ndef test_update_inplace():\n    out = np.array([1, 2, 3])\n    arr1 = np.array([1, 2, 3])\n    arr2 = np.array(out, copy=False)\n    np.add(arr1, arr1, out=out)\n    assert_scalar_or_array_equal(out, arr2)\n    out = np.array([1, 2, 3])\n    arr2 = np.array(out, copy=False)\n    np.add(arr1, arr1, out=out, where=False)\n    assert_scalar_or_array_equal(out, arr2)\n\n\n@pytest.mark.parametrize(\n    \"data_out\",\n    [\n        numpy.zeros((1, 3)),\n        numpy.zeros((2, 3)),\n    ],\n)\ndef test_out_broadcast(data_out):\n    if data_out.shape == (2, 3):\n        pytest.xfail(\"broadcasting would require duplicating row: see GH#5819\")\n    data1 = [[1, 2, 3]]\n    data2 = [7, 8, 9]\n    modin_out, numpy_out = np.array(data_out), numpy.array(data_out)\n    numpy.add(numpy.array(data1), numpy.array(data2), out=numpy_out)\n    np.add(np.array(data1), np.array(data2), out=modin_out)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n\n\ndef test_out_broadcast_error():\n    with pytest.raises(ValueError):\n        # Incompatible dimensions between inputs\n        np.add(np.array([1, 2, 3]), np.array([[1, 2], [3, 4]]))\n\n    with pytest.raises(ValueError):\n        # Compatible input broadcast dimensions, but output array dimensions are wrong\n        out = np.array([0])\n        np.add(np.array([[1, 2], [3, 4]]), np.array([1, 2]), out=out)\n\n    with pytest.raises(ValueError):\n        # Compatible input broadcast dimensions, but output array dimensions are wrong\n        # (cannot broadcast a 2x2 result into a 1x2 array)\n        out = np.array([0, 0])\n        np.add(np.array([[1, 2], [3, 4]]), np.array([1, 2]), out=out)\n\n    with pytest.raises(ValueError):\n        # Compatible input broadcast dimensions, but output array dimensions are wrong\n        # (cannot broadcast 1x2 into 1D 2-element array)\n        out = np.array([0, 0])\n        np.add(np.array([[1, 2]]), np.array([1, 2]), out=out)\n\n    with pytest.raises(ValueError):\n        # Compatible input broadcast dimensions, but output array dimensions are wrong\n        # (cannot broadcast a 2x2 result into a 3x2 array)\n        # Technically, our error message here does not match numpy's exactly, as the\n        # numpy message will specify both input shapes, whereas we only specify the\n        # shape of the default broadcast between the two inputs\n        out = np.array([[0, 0], [0, 0], [0, 0]])\n        np.add(np.array([[1, 2], [3, 4]]), np.array([1, 2]), out=out)\n\n\n@pytest.mark.parametrize(\"size\", [100, (2, 100), (100, 2), (1, 100), (100, 1)])\ndef test_array_ufunc(size):\n    # Test ufunc.__call__\n    numpy_arr = numpy.random.randint(-100, 100, size=size)\n    modin_arr = np.array(numpy_arr)\n    modin_result = numpy.sign(modin_arr)\n    numpy_result = numpy.sign(numpy_arr)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    # Test ufunc that we have support for.\n    modin_result = numpy.add(modin_arr, modin_arr)\n    numpy_result = numpy.add(numpy_arr, numpy_arr)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    # Test ufunc that we have support for, but method that we do not implement.\n    modin_result = numpy.add.reduce(modin_arr)\n    numpy_result = numpy.add.reduce(numpy_arr)\n    assert numpy_result == modin_result\n    # We do not test ufunc.reduce and ufunc.accumulate, since these require a binary reduce\n    # operation that Modin does not currently support.\n\n\n@pytest.mark.parametrize(\"size\", [100, (2, 100), (100, 2), (1, 100), (100, 1)])\ndef test_array_function(size):\n    numpy_arr = numpy.random.randint(-100, 100, size=size)\n    modin_arr = np.array(numpy_arr)\n    # Test from array shaping\n    modin_result = numpy.ravel(modin_arr)\n    numpy_result = numpy.ravel(numpy_arr)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    # Test from array creation\n    modin_result = numpy.zeros_like(modin_arr)\n    numpy_result = numpy.zeros_like(numpy_arr)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    # Test from math\n    modin_result = numpy.sum(modin_arr)\n    numpy_result = numpy.sum(numpy_arr)\n    assert numpy_result == modin_result\n\n\ndef test_array_where():\n    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)\n    modin_flat_arr = np.array(numpy_flat_arr)\n    with pytest.warns(\n        UserWarning, match=\"np.where method with only condition specified\"\n    ):\n        warnings.filterwarnings(\"ignore\", message=\"Distributing\")\n        (modin_flat_arr <= 0).where()\n    with pytest.raises(ValueError, match=\"np.where requires x and y\"):\n        (modin_flat_arr <= 0).where(x=[\"Should Fail.\"])\n    with pytest.warns(UserWarning, match=\"np.where not supported when both x and y\"):\n        warnings.filterwarnings(\"ignore\", message=\"Distributing\")\n        modin_result = (modin_flat_arr <= 0).where(x=4, y=5)\n    numpy_result = numpy.where(numpy_flat_arr <= 0, 4, 5)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_flat_bool_arr = modin_flat_arr <= 0\n    numpy_flat_bool_arr = numpy_flat_arr <= 0\n    modin_result = modin_flat_bool_arr.where(x=5, y=modin_flat_arr)\n    numpy_result = numpy.where(numpy_flat_bool_arr, 5, numpy_flat_arr)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_flat_bool_arr.where(x=modin_flat_arr, y=5)\n    numpy_result = numpy.where(numpy_flat_bool_arr, numpy_flat_arr, 5)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_flat_bool_arr.where(x=modin_flat_arr, y=(-1 * modin_flat_arr))\n    numpy_result = numpy.where(\n        numpy_flat_bool_arr, numpy_flat_arr, (-1 * numpy_flat_arr)\n    )\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_arr = numpy_flat_arr.reshape((10, 10))\n    modin_arr = np.array(numpy_arr)\n    modin_bool_arr = modin_arr > 0\n    numpy_bool_arr = numpy_arr > 0\n    modin_result = modin_bool_arr.where(modin_arr, 10 * modin_arr)\n    numpy_result = numpy.where(numpy_bool_arr, numpy_arr, 10 * numpy_arr)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.parametrize(\"method\", [\"argmax\", \"argmin\"])\ndef test_argmax_argmin(method):\n    numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.nan]])\n    modin_arr = np.array(numpy_arr)\n    assert_scalar_or_array_equal(\n        getattr(np, method)(modin_arr, axis=1),\n        getattr(numpy, method)(numpy_arr, axis=1),\n    )\n\n\ndef test_flatten():\n    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)\n    modin_flat_arr = np.array(numpy_flat_arr)\n    assert_scalar_or_array_equal(modin_flat_arr.flatten(), numpy_flat_arr.flatten())\n    numpy_arr = numpy_flat_arr.reshape((10, 10))\n    modin_arr = np.array(numpy_arr)\n    assert_scalar_or_array_equal(modin_arr.flatten(), numpy_arr.flatten())\n\n\ndef test_transpose():\n    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)\n    modin_flat_arr = np.array(numpy_flat_arr)\n    assert_scalar_or_array_equal(modin_flat_arr.transpose(), numpy_flat_arr.transpose())\n    numpy_arr = numpy_flat_arr.reshape((10, 10))\n    modin_arr = np.array(numpy_arr)\n    assert_scalar_or_array_equal(modin_arr.transpose(), numpy_arr.transpose())\n    assert_scalar_or_array_equal(modin_arr.T, numpy_arr.T)\n\n\ndef test_astype():\n    numpy_arr = numpy.array([[1, 2], [3, 4]])\n    modin_arr = np.array([[1, 2], [3, 4]])\n    modin_result = modin_arr.astype(numpy.float64)\n    numpy_result = numpy_arr.astype(numpy.float64)\n    assert modin_result.dtype == numpy_result.dtype\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.astype(str)\n    numpy_result = numpy_arr.astype(str)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_arr, numpy_arr)\n    modin_result = modin_arr.astype(str, copy=False)\n    numpy_result = numpy_arr.astype(str, copy=False)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_arr, numpy_arr)\n    modin_result = modin_arr.astype(numpy.float64, copy=False)\n    numpy_result = numpy_arr.astype(numpy.float64, copy=False)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_arr, numpy_arr)\n\n\ndef test_set_shape():\n    numpy_arr = numpy.array([[1, 2, 3], [4, 5, 6]])\n    numpy_arr.shape = (6,)\n    modin_arr = np.array([[1, 2, 3], [4, 5, 6]])\n    modin_arr.shape = (6,)\n    assert_scalar_or_array_equal(modin_arr, numpy_arr)\n    modin_arr.shape = 6  # Same as using (6,)\n    assert_scalar_or_array_equal(modin_arr, numpy_arr)\n    with pytest.raises(ValueError, match=\"cannot reshape\"):\n        modin_arr.shape = (4,)\n\n\ndef test__array__():\n    numpy_arr = numpy.array([[1, 2, 3], [4, 5, 6]])\n    modin_arr = np.array(numpy_arr)\n    # this implicitly calls `__array__`\n    converted_array = numpy.array(modin_arr)\n    assert type(converted_array) is type(numpy_arr)\n    assert_scalar_or_array_equal(converted_array, numpy_arr)\n"
  },
  {
    "path": "modin/tests/numpy/test_array_arithmetic.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nimport pytest\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\n\n@pytest.mark.parametrize(\n    \"operand1_shape\",\n    [\n        100,\n        (1, 100),\n        (3, 100),\n    ],\n)\n@pytest.mark.parametrize(\n    \"operand2_shape\",\n    [\n        100,\n        (1, 100),\n        (3, 100),\n        1,\n    ],\n)\n@pytest.mark.parametrize(\n    \"operator\",\n    [\n        \"__add__\",\n        \"__sub__\",\n        \"__truediv__\",\n        \"__mul__\",\n        \"__rtruediv__\",\n        \"__rmul__\",\n        \"__radd__\",\n        \"__rsub__\",\n        \"__ge__\",\n        \"__gt__\",\n        \"__lt__\",\n        \"__le__\",\n        \"__eq__\",\n        \"__ne__\",\n    ],\n)\ndef test_basic_arithmetic_with_broadcast(operand1_shape, operand2_shape, operator):\n    \"\"\"Test of operators that support broadcasting.\"\"\"\n    if operand1_shape == (1, 100) or operand2_shape == (1, 100):\n        # For some reason, marking the param with xfail leads to [XPASS(strict)] and a reported failure\n        pytest.xfail(reason=\"broadcasting is broken: see GH#5894\")\n    operand1 = numpy.random.randint(-100, 100, size=operand1_shape)\n    operand2 = numpy.random.randint(-100, 100, size=operand2_shape)\n    numpy_result = getattr(operand1, operator)(operand2)\n    if operand2_shape == 1:\n        # Tests binary ops with a scalar\n        modin_result = getattr(np.array(operand1), operator)(operand2[0])\n    else:\n        modin_result = getattr(np.array(operand1), operator)(np.array(operand2))\n    if operator not in [\"__truediv__\", \"__rtruediv__\"]:\n        assert_scalar_or_array_equal(\n            modin_result,\n            numpy_result,\n            err_msg=f\"Binary Op {operator} failed.\",\n        )\n    else:\n        # Truediv can have precision issues, where thanks to floating point error, the numbers\n        # aren't exactly the same across both, but are functionally equivalent, since the difference\n        # is less than 1e-12.\n        numpy.testing.assert_array_almost_equal(\n            modin_result._to_numpy(),\n            numpy_result,\n            decimal=12,\n            err_msg=\"Binary Op __truediv__ failed.\",\n        )\n\n\n@pytest.mark.parametrize(\"matched_axis\", [0, 1])\n@pytest.mark.parametrize(\n    \"operator\",\n    [\n        \"__add__\",\n        \"__sub__\",\n        \"__truediv__\",\n        \"__mul__\",\n        \"__rtruediv__\",\n        \"__rmul__\",\n        \"__radd__\",\n        \"__rsub__\",\n        \"__ge__\",\n        \"__gt__\",\n        \"__lt__\",\n        \"__le__\",\n        \"__eq__\",\n        \"__ne__\",\n    ],\n)\ndef test_binary_bad_broadcast(matched_axis, operator):\n    \"\"\"Tests broadcasts between 2d arrays that should fail.\"\"\"\n    if matched_axis == 0:\n        operand1 = numpy.random.randint(-100, 100, size=(3, 100))\n        operand2 = numpy.random.randint(-100, 100, size=(3, 200))\n    else:\n        operand1 = numpy.random.randint(-100, 100, size=(100, 3))\n        operand2 = numpy.random.randint(-100, 100, size=(200, 3))\n    with pytest.raises(ValueError):\n        getattr(operand1, operator)(operand2)\n    with pytest.raises(ValueError):\n        getattr(np.array(operand1), operator)(np.array(operand2))\n\n\n@pytest.mark.parametrize(\"operator\", [\"__pow__\", \"__floordiv__\", \"__mod__\"])\ndef test_arithmetic(operator):\n    \"\"\"Test of operators that do not yet support broadcasting.\"\"\"\n    for size, textdim in ((100, \"1D\"), ((10, 10), \"2D\")):\n        operand1 = numpy.random.randint(-100, 100, size=size)\n        lower_bound = -100 if operator != \"__pow__\" else 0\n        operand2 = numpy.random.randint(lower_bound, 100, size=size)\n        modin_result = getattr(np.array(operand1), operator)(np.array(operand2))\n        numpy_result = getattr(operand1, operator)(operand2)\n        numpy.testing.assert_array_almost_equal(\n            modin_result._to_numpy(),\n            numpy_result,\n            decimal=12,\n            err_msg=f\"Binary Op {operator} failed on {textdim} arrays.\",\n        )\n\n\ndef test_arithmetic_nans_and_zeros():\n    numpy_arr1 = numpy.array([[1, 0, 3], [numpy.nan, 0, numpy.nan]])\n    numpy_arr2 = numpy.array([1, 0, 0])\n    assert_scalar_or_array_equal(\n        (np.array(numpy_arr1) // np.array(numpy_arr2)),\n        numpy_arr1 // numpy_arr2,\n    )\n    assert_scalar_or_array_equal(\n        (np.array([0]) // 0),\n        numpy.array([0]) // 0,\n    )\n    assert_scalar_or_array_equal(\n        (np.array([0], dtype=numpy.float64) // 0),\n        numpy.array([0], dtype=numpy.float64) // 0,\n    )\n\n\n@pytest.mark.parametrize(\"size\", [100, (2, 100), (100, 2), (1, 100), (100, 1)])\ndef test_scalar_arithmetic(size):\n    numpy_arr = numpy.random.randint(-100, 100, size=size)\n    modin_arr = np.array(numpy_arr)\n    scalar = numpy.random.randint(1, 100)\n    assert_scalar_or_array_equal(\n        (scalar * modin_arr), scalar * numpy_arr, err_msg=\"__mul__ failed.\"\n    )\n    assert_scalar_or_array_equal(\n        (modin_arr * scalar),\n        scalar * numpy_arr,\n        err_msg=\"__rmul__ failed.\",\n    )\n    assert_scalar_or_array_equal(\n        (scalar / modin_arr),\n        scalar / numpy_arr,\n        err_msg=\"__rtruediv__ failed.\",\n    )\n    assert_scalar_or_array_equal(\n        (modin_arr / scalar),\n        numpy_arr / scalar,\n        err_msg=\"__truediv__ failed.\",\n    )\n    assert_scalar_or_array_equal(\n        (scalar + modin_arr),\n        scalar + numpy_arr,\n        err_msg=\"__radd__ failed.\",\n    )\n    assert_scalar_or_array_equal(\n        (modin_arr + scalar), scalar + numpy_arr, err_msg=\"__add__ failed.\"\n    )\n    assert_scalar_or_array_equal(\n        (scalar - modin_arr),\n        scalar - numpy_arr,\n        err_msg=\"__rsub__ failed.\",\n    )\n    assert_scalar_or_array_equal(\n        (modin_arr - scalar), numpy_arr - scalar, err_msg=\"__sub__ failed.\"\n    )\n\n\n@pytest.mark.parametrize(\"op_name\", [\"abs\", \"exp\", \"sqrt\", \"tanh\"])\ndef test_unary_arithmetic(op_name):\n    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)\n    modin_flat_arr = np.array(numpy_flat_arr)\n    assert_scalar_or_array_equal(\n        getattr(np, op_name)(modin_flat_arr),\n        getattr(numpy, op_name)(numpy_flat_arr),\n    )\n    numpy_arr = numpy_flat_arr.reshape((10, 10))\n    modin_arr = np.array(numpy_arr)\n    assert_scalar_or_array_equal(\n        getattr(np, op_name)(modin_arr), getattr(numpy, op_name)(numpy_arr)\n    )\n\n\ndef test_invert():\n    numpy_flat_arr = numpy.random.randint(-100, 100, size=100)\n    modin_flat_arr = np.array(numpy_flat_arr)\n    assert_scalar_or_array_equal(~modin_flat_arr, ~numpy_flat_arr)\n    numpy_arr = numpy_flat_arr.reshape((10, 10))\n    modin_arr = np.array(numpy_arr)\n    assert_scalar_or_array_equal(~modin_arr, ~numpy_arr)\n    numpy_flat_arr = numpy.random.randint(-100, 100, size=100) < 0\n    modin_flat_arr = np.array(numpy_flat_arr)\n    assert_scalar_or_array_equal(~modin_flat_arr, ~numpy_flat_arr)\n    numpy_arr = numpy_flat_arr.reshape((10, 10))\n    modin_arr = np.array(numpy_arr)\n    assert_scalar_or_array_equal(~modin_arr, ~numpy_arr)\n"
  },
  {
    "path": "modin/tests/numpy/test_array_axis_functions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nimport pytest\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\n\ndef test_max():\n    # Test 1D\n    numpy_arr = numpy.random.randint(-100, 100, size=100)\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.max() == numpy_arr.max()\n    modin_result = modin_arr.max(axis=0)\n    numpy_result = modin_arr.max(axis=0)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.max(initial=200)\n    numpy_result = numpy_arr.max(initial=200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.max(initial=0, where=False)\n    numpy_result = numpy_arr.max(initial=0, where=False)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.max(keepdims=True)\n    numpy_result = numpy_arr.max(keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])\n    modin_arr = np.array(numpy_arr)\n    numpy_mask = numpy.array([True, False, True, True, True, True])\n    modin_mask = np.array(numpy_mask)\n    assert numpy_arr.max(where=numpy_mask, initial=5) == modin_arr.max(\n        where=modin_mask, initial=5\n    )\n    # Test 2D\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.max() == numpy_arr.max()\n    modin_result = modin_arr.max(axis=0)\n    numpy_result = numpy_arr.max(axis=0)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.max(axis=0, keepdims=True)\n    numpy_result = numpy_arr.max(axis=0, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.max(axis=1)\n    numpy_result = numpy_arr.max(axis=1)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.max(axis=1, keepdims=True)\n    numpy_result = numpy_arr.max(axis=1, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.max(initial=200)\n    numpy_result = numpy_arr.max(initial=200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.max(initial=0, where=False)\n    numpy_result = numpy_arr.max(initial=0, where=False)\n    assert modin_result == numpy_result\n    with pytest.raises(ValueError):\n        modin_arr.max(out=modin_arr, keepdims=True)\n    modin_out = np.array([[1]])\n    numpy_out = modin_out._to_numpy()\n    modin_result = modin_arr.max(out=modin_out, keepdims=True)\n    numpy_result = numpy_arr.max(out=numpy_out, keepdims=True)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    modin_result = modin_arr.max(axis=0, where=False, initial=4)\n    numpy_result = numpy_arr.max(axis=0, where=False, initial=4)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.max(axis=0, where=False, initial=4, out=modin_out)\n    numpy_result = numpy_arr.max(axis=0, where=False, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.max(axis=0, initial=4, out=modin_out)\n    numpy_result = numpy_arr.max(axis=0, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.max(axis=1, initial=4, out=modin_out)\n    numpy_result = numpy_arr.max(axis=1, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    numpy_where = numpy.full(20, False)\n    numpy_where[:10] = True\n    numpy.random.shuffle(numpy_where)\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.max(axis=0, initial=4, out=modin_out, where=modin_where)\n    numpy_result = numpy_arr.max(axis=0, initial=4, out=numpy_out, where=numpy_where)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.array([[1, 10000, 2], [3, 4, 5]])\n    modin_arr = np.array(numpy_arr)\n    numpy_mask = numpy.array([[True, False, True], [True, True, True]])\n    modin_mask = np.array(numpy_mask)\n    assert_scalar_or_array_equal(\n        modin_arr.max(where=modin_mask, initial=5),\n        numpy_arr.max(where=numpy_mask, initial=5),\n    )\n\n\ndef test_min():\n    # Test 1D\n    numpy_arr = numpy.random.randint(-100, 100, size=100)\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.min() == numpy_arr.min()\n    modin_result = modin_arr.min(axis=0)\n    numpy_result = modin_arr.min(axis=0)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.min(initial=-200)\n    numpy_result = numpy_arr.min(initial=-200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.min(initial=0, where=False)\n    numpy_result = numpy_arr.min(initial=0, where=False)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.min(keepdims=True)\n    numpy_result = numpy_arr.min(keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_arr = numpy.array([1, -10000, 2, 3, 4, 5])\n    modin_arr = np.array(numpy_arr)\n    numpy_mask = numpy.array([True, False, True, True, True, True])\n    modin_mask = np.array(numpy_mask)\n    assert numpy_arr.min(where=numpy_mask, initial=5) == modin_arr.min(\n        where=modin_mask, initial=5\n    )\n    # Test 2D\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.min() == numpy_arr.min()\n    modin_result = modin_arr.min(axis=0)\n    numpy_result = numpy_arr.min(axis=0)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.min(axis=0, keepdims=True)\n    numpy_result = numpy_arr.min(axis=0, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.min(axis=1)\n    numpy_result = numpy_arr.min(axis=1)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.min(axis=1, keepdims=True)\n    numpy_result = numpy_arr.min(axis=1, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.min(initial=-200)\n    numpy_result = numpy_arr.min(initial=-200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.min(initial=0, where=False)\n    numpy_result = numpy_arr.min(initial=0, where=False)\n    assert modin_result == numpy_result\n    with pytest.raises(ValueError):\n        modin_arr.min(out=modin_arr, keepdims=True)\n    modin_out = np.array([[1]])\n    numpy_out = modin_out._to_numpy()\n    modin_result = modin_arr.min(out=modin_out, keepdims=True)\n    numpy_result = numpy_arr.min(out=numpy_out, keepdims=True)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    modin_result = modin_arr.min(axis=0, where=False, initial=4)\n    numpy_result = numpy_arr.min(axis=0, where=False, initial=4)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.min(axis=0, where=False, initial=4, out=modin_out)\n    numpy_result = numpy_arr.min(axis=0, where=False, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.min(axis=0, initial=4, out=modin_out)\n    numpy_result = numpy_arr.min(axis=0, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.min(axis=1, initial=4, out=modin_out)\n    numpy_result = numpy_arr.min(axis=1, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    numpy_where = numpy.full(20, False)\n    numpy_where[:10] = True\n    numpy.random.shuffle(numpy_where)\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.min(axis=0, initial=4, out=modin_out, where=modin_where)\n    numpy_result = numpy_arr.min(axis=0, initial=4, out=numpy_out, where=numpy_where)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.array([[1, -10000, 2], [3, 4, 5]])\n    modin_arr = np.array(numpy_arr)\n    numpy_mask = numpy.array([[True, False, True], [True, True, True]])\n    modin_mask = np.array(numpy_mask)\n    assert_scalar_or_array_equal(\n        modin_arr.min(where=modin_mask, initial=5),\n        numpy_arr.min(where=numpy_mask, initial=5),\n    )\n\n\ndef test_sum():\n    # Test 1D\n    numpy_arr = numpy.random.randint(-100, 100, size=100)\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.sum() == numpy_arr.sum()\n    modin_result = modin_arr.sum(axis=0)\n    numpy_result = modin_arr.sum(axis=0)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.sum(initial=-200)\n    numpy_result = numpy_arr.sum(initial=-200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.sum(initial=0, where=False)\n    numpy_result = numpy_arr.sum(initial=0, where=False)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.sum(keepdims=True)\n    numpy_result = numpy_arr.sum(keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])\n    modin_arr = np.array(numpy_arr)\n    numpy_mask = numpy.array([True, False, True, True, True, True])\n    modin_mask = np.array(numpy_mask)\n    assert numpy_arr.sum(where=numpy_mask) == modin_arr.sum(where=modin_mask)\n    # Test 2D\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.sum() == numpy_arr.sum()\n    modin_result = modin_arr.sum(axis=0)\n    numpy_result = numpy_arr.sum(axis=0)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.sum(axis=0, keepdims=True)\n    numpy_result = numpy_arr.sum(axis=0, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.sum(axis=1)\n    numpy_result = numpy_arr.sum(axis=1)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.sum(axis=1, keepdims=True)\n    numpy_result = numpy_arr.sum(axis=1, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.sum(initial=-200)\n    numpy_result = numpy_arr.sum(initial=-200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.sum(initial=0, where=False)\n    numpy_result = numpy_arr.sum(initial=0, where=False)\n    assert modin_result == numpy_result\n    with pytest.raises(ValueError):\n        modin_arr.sum(out=modin_arr, keepdims=True)\n    modin_out = np.array([[1]])\n    numpy_out = modin_out._to_numpy()\n    modin_result = modin_arr.sum(out=modin_out, keepdims=True)\n    numpy_result = numpy_arr.sum(out=numpy_out, keepdims=True)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    modin_result = modin_arr.sum(axis=0, where=False, initial=4)\n    numpy_result = numpy_arr.sum(axis=0, where=False, initial=4)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.sum(axis=0, where=False, initial=4, out=modin_out)\n    numpy_result = numpy_arr.sum(axis=0, where=False, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.sum(axis=0, initial=4, out=modin_out)\n    numpy_result = numpy_arr.sum(axis=0, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.sum(axis=1, initial=4, out=modin_out)\n    numpy_result = numpy_arr.sum(axis=1, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    numpy_where = numpy.full(20, False)\n    numpy_where[:10] = True\n    numpy.random.shuffle(numpy_where)\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.sum(axis=0, initial=4, out=modin_out, where=modin_where)\n    numpy_result = numpy_arr.sum(axis=0, initial=4, out=numpy_out, where=numpy_where)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_where = numpy.full(400, False)\n    numpy_where[:200] = True\n    numpy.random.shuffle(numpy_where)\n    numpy_where = numpy_where.reshape((20, 20))\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.sum(where=modin_where)\n    numpy_result = numpy_arr.sum(where=numpy_where)\n    assert modin_result == numpy_result\n    # Test NA propagation\n    numpy_arr = numpy.array([[1, 2], [3, 4], [5, numpy.nan]])\n    modin_arr = np.array([[1, 2], [3, 4], [5, np.nan]])\n    assert numpy.isnan(modin_arr.sum())\n    assert_scalar_or_array_equal(\n        modin_arr.sum(axis=1),\n        numpy_arr.sum(axis=1),\n    )\n    assert_scalar_or_array_equal(\n        modin_arr.sum(axis=0),\n        numpy_arr.sum(axis=0),\n    )\n\n\ndef test_mean():\n    # Test 1D\n    numpy_arr = numpy.random.randint(-100, 100, size=100)\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.mean() == numpy_arr.mean()\n    modin_result = modin_arr.mean(axis=0)\n    numpy_result = modin_arr.mean(axis=0)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.mean()\n    numpy_result = numpy_arr.mean()\n    assert modin_result == numpy_result\n    modin_result = modin_arr.mean(keepdims=True)\n    numpy_result = numpy_arr.mean(keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])\n    modin_arr = np.array(numpy_arr)\n    numpy_mask = numpy.array([True, False, True, True, True, True])\n    modin_mask = np.array(numpy_mask)\n    assert numpy_arr.mean(where=numpy_mask) == modin_arr.mean(where=modin_mask)\n    # Test 2D\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.mean() == numpy_arr.mean()\n    modin_result = modin_arr.mean(axis=0)\n    numpy_result = numpy_arr.mean(axis=0)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.mean(axis=0, keepdims=True)\n    numpy_result = numpy_arr.mean(axis=0, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.mean(axis=1)\n    numpy_result = numpy_arr.mean(axis=1)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.mean(axis=1, keepdims=True)\n    numpy_result = numpy_arr.mean(axis=1, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.mean()\n    numpy_result = numpy_arr.mean()\n    assert modin_result == numpy_result\n    with pytest.raises(ValueError):\n        modin_arr.mean(out=modin_arr, keepdims=True)\n    modin_out = np.array([[1]])\n    numpy_out = modin_out._to_numpy()\n    modin_result = modin_arr.mean(out=modin_out, keepdims=True)\n    numpy_result = numpy_arr.mean(out=numpy_out, keepdims=True)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.mean(axis=0, where=False, out=modin_out)\n    numpy_result = numpy_arr.mean(axis=0, where=False, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.mean(axis=0, out=modin_out)\n    numpy_result = numpy_arr.mean(axis=0, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.mean(axis=1, out=modin_out)\n    numpy_result = numpy_arr.mean(axis=1, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    numpy_where = numpy.full(20, False)\n    numpy_where[:10] = True\n    numpy.random.shuffle(numpy_where)\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.mean(axis=0, out=modin_out, where=modin_where)\n    numpy_result = numpy_arr.mean(axis=0, out=numpy_out, where=numpy_where)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_where = numpy.full(400, False)\n    numpy_where[:200] = True\n    numpy.random.shuffle(numpy_where)\n    numpy_where = numpy_where.reshape((20, 20))\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.mean(where=modin_where)\n    numpy_result = numpy_arr.mean(where=numpy_where)\n    assert modin_result == numpy_result\n    # Test NA propagation\n    numpy_arr = numpy.array([[1, 2], [3, 4], [5, numpy.nan]])\n    modin_arr = np.array([[1, 2], [3, 4], [5, np.nan]])\n    assert numpy.isnan(modin_arr.mean())\n    assert_scalar_or_array_equal(\n        modin_arr.mean(axis=1),\n        numpy_arr.mean(axis=1),\n    )\n    assert_scalar_or_array_equal(\n        modin_arr.mean(axis=0),\n        numpy_arr.mean(axis=0),\n    )\n    numpy_where = numpy.array([[True, True], [True, True], [True, False]])\n    modin_where = np.array(numpy_where)\n    assert modin_arr.mean(where=modin_where) == numpy_arr.mean(where=numpy_where)\n\n\ndef test_prod():\n    # Test 1D\n    numpy_arr = numpy.random.randint(-100, 100, size=100)\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.prod() == numpy_arr.prod()\n    modin_result = modin_arr.prod(axis=0)\n    numpy_result = modin_arr.prod(axis=0)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.prod(initial=-200)\n    numpy_result = numpy_arr.prod(initial=-200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.prod(initial=0, where=False)\n    numpy_result = numpy_arr.prod(initial=0, where=False)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.prod(keepdims=True)\n    numpy_result = numpy_arr.prod(keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_arr = numpy.array([1, 10000, 2, 3, 4, 5])\n    modin_arr = np.array(numpy_arr)\n    numpy_mask = numpy.array([True, False, True, True, True, True])\n    modin_mask = np.array(numpy_mask)\n    assert numpy_arr.prod(where=numpy_mask) == modin_arr.prod(where=modin_mask)\n    # Test 2D\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    assert modin_arr.prod() == numpy_arr.prod()\n    modin_result = modin_arr.prod(axis=0)\n    numpy_result = numpy_arr.prod(axis=0)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.prod(axis=0, keepdims=True)\n    numpy_result = numpy_arr.prod(axis=0, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.prod(axis=1)\n    numpy_result = numpy_arr.prod(axis=1)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.prod(axis=1, keepdims=True)\n    numpy_result = numpy_arr.prod(axis=1, keepdims=True)\n    assert modin_result.shape == numpy_result.shape\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    modin_result = modin_arr.prod(initial=-200)\n    numpy_result = numpy_arr.prod(initial=-200)\n    assert modin_result == numpy_result\n    modin_result = modin_arr.prod(initial=0, where=False)\n    numpy_result = numpy_arr.prod(initial=0, where=False)\n    assert modin_result == numpy_result\n    with pytest.raises(ValueError):\n        modin_arr.prod(out=modin_arr, keepdims=True)\n    modin_out = np.array([[1]])\n    numpy_out = modin_out._to_numpy()\n    modin_result = modin_arr.prod(out=modin_out, keepdims=True)\n    numpy_result = numpy_arr.prod(out=numpy_out, keepdims=True)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    modin_result = modin_arr.prod(axis=0, where=False, initial=4)\n    numpy_result = numpy_arr.prod(axis=0, where=False, initial=4)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.prod(axis=0, where=False, initial=4, out=modin_out)\n    numpy_result = numpy_arr.prod(axis=0, where=False, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_arr = numpy.random.randint(-100, 100, size=(20, 20))\n    modin_arr = np.array(numpy_arr)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.prod(axis=0, initial=4, out=modin_out)\n    numpy_result = numpy_arr.prod(axis=0, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    modin_result = modin_arr.prod(axis=1, initial=4, out=modin_out)\n    numpy_result = numpy_arr.prod(axis=1, initial=4, out=numpy_out)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_out = numpy.ones(20)\n    modin_out = np.array(numpy_out)\n    numpy_where = numpy.full(20, False)\n    numpy_where[:10] = True\n    numpy.random.shuffle(numpy_where)\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.prod(axis=0, initial=4, out=modin_out, where=modin_where)\n    numpy_result = numpy_arr.prod(axis=0, initial=4, out=numpy_out, where=numpy_where)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    assert_scalar_or_array_equal(modin_out, numpy_out)\n    numpy_where = numpy.full(400, False)\n    numpy_where[:200] = True\n    numpy.random.shuffle(numpy_where)\n    numpy_where = numpy_where.reshape((20, 20))\n    modin_where = np.array(numpy_where)\n    modin_result = modin_arr.prod(where=modin_where)\n    numpy_result = numpy_arr.prod(where=numpy_where)\n    assert modin_result == numpy_result\n    # Test NA propagation\n    numpy_arr = numpy.array([[1, 2], [3, 4], [5, numpy.nan]])\n    modin_arr = np.array([[1, 2], [3, 4], [5, np.nan]])\n    assert numpy.isnan(modin_arr.prod())\n    assert_scalar_or_array_equal(\n        modin_arr.prod(axis=1),\n        numpy_arr.prod(axis=1),\n    )\n    assert_scalar_or_array_equal(\n        modin_arr.prod(axis=0),\n        numpy_arr.prod(axis=0),\n    )\n"
  },
  {
    "path": "modin/tests/numpy/test_array_creation.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\n\ndef test_zeros_like():\n    modin_arr = np.array([[1.0, 2.0], [3.0, 4.0]])\n    numpy_arr = modin_arr._to_numpy()\n    assert_scalar_or_array_equal(np.zeros_like(modin_arr), numpy.zeros_like(numpy_arr))\n    assert_scalar_or_array_equal(\n        np.zeros_like(modin_arr, dtype=numpy.int8),\n        numpy.zeros_like(numpy_arr, dtype=numpy.int8),\n    )\n    assert_scalar_or_array_equal(\n        np.zeros_like(modin_arr, shape=(10, 10)),\n        numpy.zeros_like(numpy_arr, shape=(10, 10)),\n    )\n    modin_arr = np.array([[1, 2], [3, 4]])\n    numpy_arr = modin_arr._to_numpy()\n    assert_scalar_or_array_equal(\n        np.zeros_like(modin_arr),\n        numpy.zeros_like(numpy_arr),\n    )\n\n\ndef test_ones_like():\n    modin_arr = np.array([[1.0, 2.0], [3.0, 4.0]])\n    numpy_arr = modin_arr._to_numpy()\n    assert_scalar_or_array_equal(\n        np.ones_like(modin_arr),\n        numpy.ones_like(numpy_arr),\n    )\n    assert_scalar_or_array_equal(\n        np.ones_like(modin_arr, dtype=numpy.int8),\n        numpy.ones_like(numpy_arr, dtype=numpy.int8),\n    )\n    assert_scalar_or_array_equal(\n        np.ones_like(modin_arr, shape=(10, 10)),\n        numpy.ones_like(numpy_arr, shape=(10, 10)),\n    )\n    modin_arr = np.array([[1, 2], [3, 4]])\n    numpy_arr = modin_arr._to_numpy()\n    assert_scalar_or_array_equal(\n        np.ones_like(modin_arr),\n        numpy.ones_like(numpy_arr),\n    )\n"
  },
  {
    "path": "modin/tests/numpy/test_array_indexing.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nimport pytest\nfrom pandas.core.dtypes.common import is_list_like\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\n\n@pytest.mark.parametrize(\n    \"index\",\n    (\n        0,\n        1,\n        -1,  # Scalar indices\n        slice(0, 1, 1),\n        slice(1, -1, 1),  # Slices\n        [0, 2],\n        [1, -1],  # Lists\n    ),\n    ids=lambda i: f\"index={i}\",\n)\ndef test_getitem_1d(index):\n    data = [1, 2, 3, 4, 5]\n    numpy_result = numpy.array(data)[index]\n    modin_result = np.array(data)[index]\n    if is_list_like(numpy_result):\n        assert_scalar_or_array_equal(modin_result, numpy_result)\n        assert modin_result.shape == numpy_result.shape\n    else:\n        assert modin_result == numpy_result\n\n\n@pytest.mark.parametrize(\n    \"index\",\n    (\n        0,\n        1,\n        -1,  # Scalar indices\n        slice(0, 1, 1),\n        slice(1, -1, 1),  # Slices\n        slice(None, None, None),\n        slice(None, 1, None),\n        slice(0, 1, None),\n        slice(0, None, None),\n        [0, 2],\n        [2, 0],\n        [1, -1],  # Lists\n    ),\n    ids=lambda i: f\"index={i}\",\n)\ndef test_getitem_2d(index):\n    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]\n    numpy_result = numpy.array(data)[index]\n    modin_result = np.array(data)[index]\n    if is_list_like(numpy_result):\n        assert_scalar_or_array_equal(modin_result, numpy_result)\n        assert modin_result.shape == numpy_result.shape\n    else:\n        assert modin_result == numpy_result\n\n\ndef test_getitem_nested():\n    # Index into the result of slicing a 1D array\n    data = [1, 2, 3, 4, 5]\n    numpy_result = numpy.array(data)[1:3][1]\n    modin_result = np.array(data)[1:3][1]\n    if is_list_like(numpy_result):\n        assert_scalar_or_array_equal(modin_result, numpy_result)\n        assert modin_result.shape == numpy_result.shape\n    else:\n        assert (\n            modin_result == numpy_result\n        )  # Index into the result of indexing a 2D array\n    data = [[1, 2, 3], [4, 5, 6]]\n    numpy_result = numpy.array(data)[1][1]\n    modin_result = np.array(data)[1][1]\n    if is_list_like(numpy_result):\n        assert_scalar_or_array_equal(modin_result, numpy_result)\n        assert modin_result.shape == numpy_result.shape\n    else:\n        assert modin_result == numpy_result\n\n\n@pytest.mark.parametrize(\n    (\"index\", \"value\"),\n    [\n        (0, 1),\n        (1, 1),\n        (-1, 1),  # Scalar indices\n        (slice(0, 1, 1), [7]),\n        (slice(1, -1, 1), [7, 8, 9]),  # Slices\n        (slice(0, 4, 1), 7),  # Slice with broadcast\n        ([0, 2], [7, 8]),\n        ([1, -1], [7, 8]),  # Lists\n    ],\n    ids=lambda i: f\"{i}\",\n)\ndef test_setitem_1d(index, value):\n    data = [1, 2, 3, 4, 5]\n    modin_arr, numpy_arr = np.array(data), numpy.array(data)\n    numpy_arr[index] = value\n    modin_arr[index] = value\n    assert_scalar_or_array_equal(modin_arr, numpy_arr)\n\n\ndef test_setitem_1d_error():\n    arr = np.array([1, 2, 3, 4, 5])\n    with pytest.raises(ValueError, match=\"could not broadcast\"):\n        arr[0:5] = [1, 2]\n\n\n@pytest.mark.parametrize(\n    (\"index\", \"value\"),\n    [\n        (0, 1),\n        (1, 1),\n        (-1, 1),  # Scalar indices\n        (slice(0, 1, 1), [13]),  # arr[0:1:1] = [13]\n        (slice(1, -1, 1), [13]),  # arr[1:-1:1] = 13\n        (slice(None, None, None), [7]),  # arr[:] = [7]\n        (slice(None, 1, None), [7]),  # arr[:1] = [7]\n        (slice(0, 1, None), [7]),  # arr[0:1] = [7]\n        (slice(0, None, None), [7]),  # arr[0:] = [7]\n        ([0, 2], [[13, 14, 15], [16, 17, 18]]),\n        ([2, 0], [[13, 14, 15], [16, 17, 18]]),\n        ([1, -1], [[13, 14, 15], [16, 17, 18]]),  # Lists\n    ],\n    ids=lambda i: f\"{i}\",\n)\ndef test_setitem_2d(index, value):\n    if index == [2, 0]:\n        pytest.xfail(\"indexing with unsorted list would fail: see GH#5886\")\n    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]\n    modin_arr, numpy_arr = np.array(data), numpy.array(data)\n    numpy_arr[index] = value\n    modin_arr[index] = value\n    assert_scalar_or_array_equal(modin_arr, numpy_arr)\n"
  },
  {
    "path": "modin/tests/numpy/test_array_linalg.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nimport numpy\nimport numpy.linalg as NLA\nimport pytest\n\nimport modin.numpy as np\nimport modin.numpy.linalg as LA\nimport modin.pandas as pd\n\nfrom .utils import assert_scalar_or_array_equal\n\n\ndef test_dot_from_pandas_reindex():\n    # Reindexing the dataframe does not change the output of dot\n    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dot.html\n    df = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])\n    s = pd.Series([1, 1, 2, 1])\n    result1 = np.dot(df, s)\n    s2 = s.reindex([1, 0, 2, 3])\n    result2 = np.dot(df, s2)\n    assert_scalar_or_array_equal(result1, result2)\n\n\ndef test_dot_1d():\n    x1 = numpy.random.randint(-100, 100, size=100)\n    x2 = numpy.random.randint(-100, 100, size=100)\n    numpy_result = numpy.dot(x1, x2)\n    x1, x2 = np.array(x1), np.array(x2)\n    modin_result = np.dot(x1, x2)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\ndef test_dot_2d():\n    x1 = numpy.random.randint(-100, 100, size=(100, 3))\n    x2 = numpy.random.randint(-100, 100, size=(3, 50))\n    numpy_result = numpy.dot(x1, x2)\n    x1, x2 = np.array(x1), np.array(x2)\n    modin_result = np.dot(x1, x2)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\ndef test_dot_scalar():\n    x1 = numpy.random.randint(-100, 100, size=(100, 3))\n    x2 = numpy.random.randint(-100, 100)\n    numpy_result = numpy.dot(x1, x2)\n    x1 = np.array(x1)\n    modin_result = np.dot(x1, x2)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\ndef test_matmul_scalar():\n    x1 = numpy.random.randint(-100, 100, size=(100, 3))\n    x2 = numpy.random.randint(-100, 100)\n    x1 = np.array(x1)\n    # Modin error message differs from numpy for readability; the original numpy error is:\n    # ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc\n    # core with signature (n?,k),(k,m?)->(n?,m?) requires 1)\n    with pytest.raises(ValueError):\n        x1 @ x2\n\n\ndef test_dot_broadcast():\n    # 2D @ 1D\n    x1 = numpy.random.randint(-100, 100, size=(100, 3))\n    x2 = numpy.random.randint(-100, 100, size=(3,))\n    numpy_result = numpy.dot(x1, x2)\n    x1, x2 = np.array(x1), np.array(x2)\n    modin_result = np.dot(x1, x2)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n    # 1D @ 2D\n    x1 = numpy.random.randint(-100, 100, size=(100,))\n    x2 = numpy.random.randint(-100, 100, size=(100, 3))\n    numpy_result = numpy.dot(x1, x2)\n    x1, x2 = np.array(x1), np.array(x2)\n    modin_result = np.dot(x1, x2)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.parametrize(\"axis\", [None, 0, 1], ids=[\"axis=None\", \"axis=0\", \"axis=1\"])\ndef test_norm_fro_2d(axis):\n    x1 = numpy.random.randint(-10, 10, size=(100, 3))\n    numpy_result = NLA.norm(x1, axis=axis)\n    x1 = np.array(x1)\n    modin_result = LA.norm(x1, axis=axis)\n    # Result may be a scalar\n    if isinstance(modin_result, np.array):\n        modin_result = modin_result._to_numpy()\n    numpy.testing.assert_allclose(modin_result, numpy_result, rtol=1e-12)\n\n\ndef test_norm_fro_1d():\n    x1 = numpy.random.randint(-10, 10, size=100)\n    numpy_result = NLA.norm(x1)\n    x1 = np.array(x1)\n    modin_result = LA.norm(x1)\n    numpy.testing.assert_allclose(modin_result, numpy_result, rtol=1e-12)\n"
  },
  {
    "path": "modin/tests/numpy/test_array_logic.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nimport pytest\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\nsmall_arr_c_2d = numpy.array(\n    [\n        [1j, 1, 0, -numpy.inf, numpy.inf, 0.5],\n        [1 + 1.1j, numpy.nan, 0, numpy.nan, 2, 0.3],\n    ]\n)\nsmall_arr_c_1d = numpy.array([numpy.nan, 0, -numpy.inf, numpy.inf, 5, -0.1, 1 + 1.1j])\n\nsmall_arr_r_2d = numpy.array(\n    [[1, 0, -numpy.inf, numpy.inf, 0.5], [numpy.nan, 0, numpy.nan, 2, 0.3]]\n)\nsmall_arr_r_1d = numpy.array([numpy.nan, 0, -numpy.inf, numpy.inf, 5, -0.1])\n\n\n@pytest.mark.parametrize(\"operand_shape\", [100, (3, 100)])\n@pytest.mark.parametrize(\"operator\", [\"any\", \"all\"])\n@pytest.mark.parametrize(\"axis\", [None, 0, 1], ids=[\"axis=None\", \"axis=0\", \"axis=1\"])\ndef test_unary_with_axis(operand_shape, operator, axis):\n    if isinstance(operand_shape, int) and axis == 1:\n        pytest.skip(\"cannot use axis=1 on 1D arrays\")\n    x1 = numpy.random.randint(-100, 100, size=operand_shape)\n    numpy_result = getattr(numpy, operator)(x1, axis=axis)\n    x1 = np.array(x1)\n    modin_result = getattr(np, operator)(x1, axis=axis)\n    assert_scalar_or_array_equal(\n        modin_result, numpy_result, err_msg=f\"Unary operator {operator} failed.\"\n    )\n\n\ndef test_all_any_where():\n    arr = np.array([[0, 1], [1, 0]])\n    where = np.array([[False, True], [True, False]])\n    result = arr.all(where=where)\n    # Result should be np.bool_ True, since where mask isolates the non-zero elements\n    assert result\n\n    where = np.array([[True, False], [False, False]])\n    result = arr.all(where=where, axis=1)\n    assert_scalar_or_array_equal(result, numpy.array([False, True]))\n\n    # Results should contain vacuous Trues in the relevant shape\n    result = arr.all(where=False, axis=1)\n    assert_scalar_or_array_equal(result, numpy.array([True, True]))\n    result = arr.all(where=False, axis=0)\n    assert_scalar_or_array_equal(result, numpy.array([True, True]))\n    assert bool(arr.all(where=False, axis=None))\n\n    where = np.array([[True, False], [False, True]])\n    result = arr.any(where=where)\n    # Result should be np.bool_ False, since mask isolates only zero elements\n    assert not result\n\n    where = np.array([[False, True], [False, False]])\n    result = arr.any(where=where, axis=1)\n    assert_scalar_or_array_equal(result, numpy.array([True, False]))\n\n    # Results should contain vacuous Falses in the relevant shape\n    result = arr.any(where=False, axis=1)\n    assert_scalar_or_array_equal(result, numpy.array([False, False]))\n    result = arr.any(where=False, axis=0)\n    assert_scalar_or_array_equal(result, numpy.array([False, False]))\n    assert not bool(arr.any(where=False, axis=None))\n\n\n@pytest.mark.parametrize(\"data\", [small_arr_c_2d, small_arr_c_1d], ids=[\"2D\", \"1D\"])\n@pytest.mark.parametrize(\n    \"operator\", [\"isfinite\", \"isinf\", \"isnan\", \"iscomplex\", \"isreal\"]\n)\ndef test_unary_with_complex(data, operator):\n    x1 = data\n    numpy_result = getattr(numpy, operator)(x1)\n    x1 = np.array(x1)\n    modin_result = getattr(np, operator)(x1)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\ndef test_isnat():\n    x1 = numpy.array([numpy.datetime64(\"2016-01-01\"), numpy.datetime64(\"NaT\")])\n    numpy_result = numpy.isnat(x1)\n    x1 = np.array(x1)\n    modin_result = np.isnat(x1)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.parametrize(\"data\", [small_arr_r_2d, small_arr_r_1d], ids=[\"2D\", \"1D\"])\n@pytest.mark.parametrize(\"operator\", [\"isneginf\", \"isposinf\"])\ndef test_unary_without_complex(data, operator):\n    x1 = data\n    numpy_result = getattr(numpy, operator)(x1)\n    x1 = np.array(x1)\n    modin_result = getattr(np, operator)(x1)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.parametrize(\"data\", [small_arr_r_2d, small_arr_r_1d], ids=[\"2D\", \"1D\"])\ndef test_logical_not(data):\n    x1 = data\n    numpy_result = numpy.logical_not(x1)\n    x1 = np.array(x1)\n    modin_result = np.logical_not(x1)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.parametrize(\"operand1_shape\", [100, (3, 100)])\n@pytest.mark.parametrize(\"operand2_shape\", [100, (3, 100)])\n@pytest.mark.parametrize(\"operator\", [\"logical_and\", \"logical_or\", \"logical_xor\"])\ndef test_logical_binops(operand1_shape, operand2_shape, operator):\n    if operand1_shape != operand2_shape:\n        pytest.xfail(\"TODO fix broadcasting behavior for binary logic operators\")\n    x1 = numpy.random.randint(-100, 100, size=operand1_shape)\n    x2 = numpy.random.randint(-100, 100, size=operand2_shape)\n    numpy_result = getattr(numpy, operator)(x1, x2)\n    x1, x2 = np.array(x1), np.array(x2)\n    modin_result = getattr(np, operator)(x1, x2)\n    assert_scalar_or_array_equal(\n        modin_result, numpy_result, err_msg=f\"Logic binary operator {operator} failed.\"\n    )\n"
  },
  {
    "path": "modin/tests/numpy/test_array_math.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nimport pytest\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        [3, 2, 1, 1],\n        [-87.434, -90.908, -87.152, -84.903],\n        [-87.434, -90.908, np.nan, -87.152, -84.903],\n    ],\n    ids=[\"ints\", \"floats\", \"floats with nan\"],\n)\n@pytest.mark.parametrize(\"op\", [\"argmin\", \"argmax\"])\ndef test_argmax_argmin(data, op):\n    numpy_result = getattr(numpy, op)(numpy.array(data))\n    modin_result = getattr(np, op)(np.array(data))\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\ndef test_rem_mod():\n    \"\"\"Tests remainder and mod, which, unlike the C/matlab equivalents, are identical in numpy.\"\"\"\n    a = numpy.array([[2, -1], [10, -3]])\n    b = numpy.array(([-3, 3], [3, -7]))\n    numpy_result = numpy.remainder(a, b)\n    modin_result = np.remainder(np.array(a), np.array(b))\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n    numpy_result = numpy.mod(a, b)\n    modin_result = np.mod(np.array(a), np.array(b))\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n"
  },
  {
    "path": "modin/tests/numpy/test_array_shaping.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\nimport pytest\n\nimport modin.numpy as np\n\nfrom .utils import assert_scalar_or_array_equal\n\n\n@pytest.mark.parametrize(\"operand_shape\", [100, (100, 3), (3, 100)])\ndef test_ravel(operand_shape):\n    x = numpy.random.randint(-100, 100, size=operand_shape)\n    numpy_result = numpy.ravel(x)\n    modin_result = np.ravel(np.array(x))\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.parametrize(\"operand_shape\", [100, (100, 3), (3, 100)])\ndef test_shape(operand_shape):\n    x = numpy.random.randint(-100, 100, size=operand_shape)\n    numpy_result = numpy.shape(x)\n    modin_result = np.shape(np.array(x))\n    assert modin_result == numpy_result\n\n\n@pytest.mark.parametrize(\"operand_shape\", [100, (100, 3), (3, 100)])\ndef test_transpose(operand_shape):\n    x = numpy.random.randint(-100, 100, size=operand_shape)\n    numpy_result = numpy.transpose(x)\n    modin_result = np.transpose(np.array(x))\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_split_2d(axis):\n    x = numpy.random.randint(-100, 100, size=(6, 4))\n    # Integer argument: split into N equal arrays along axis\n    numpy_result = numpy.split(x, 2, axis=axis)\n    modin_result = np.split(np.array(x), 2, axis=axis)\n    for modin_entry, numpy_entry in zip(modin_result, numpy_result):\n        assert_scalar_or_array_equal(modin_entry, numpy_entry)\n    # List argument: split at specified indices\n    idxs = [2, 3]\n    numpy_result = numpy.split(x, idxs, axis=axis)\n    modin_result = np.split(np.array(x), idxs, axis=axis)\n    for modin_entry, numpy_entry in zip(modin_result, numpy_result):\n        assert_scalar_or_array_equal(modin_entry, numpy_entry)\n\n\ndef test_split_2d_oob():\n    # Supplying an index out of bounds results in an empty sub-array, for which modin\n    # would return a numpy array by default\n    x = numpy.random.randint(-100, 100, size=(6, 4))\n    idxs = [2, 3, 6]\n    numpy_result = numpy.split(x, idxs)\n    modin_result = np.split(np.array(x), idxs)\n    for modin_entry, numpy_entry in zip(modin_result, numpy_result):\n        assert_scalar_or_array_equal(modin_entry, numpy_entry)\n\n\ndef test_split_2d_uneven():\n    x = np.array(numpy.random.randint(-100, 100, size=(3, 2)))\n    with pytest.raises(\n        ValueError, match=\"array split does not result in an equal division\"\n    ):\n        np.split(x, 2)\n\n\ndef test_hstack():\n    # 2D arrays\n    a = numpy.random.randint(-100, 100, size=(5, 3))\n    b = numpy.random.randint(-100, 100, size=(5, 2))\n    numpy_result = numpy.hstack((a, b))\n    modin_result = np.hstack((np.array(a), np.array(b)))\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n    # 1D arrays\n    a = numpy.random.randint(-100, 100, size=(5,))\n    b = numpy.random.randint(-100, 100, size=(3,))\n    numpy_result = numpy.hstack((a, b))\n    modin_result = np.hstack((np.array(a), np.array(b)))\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\ndef test_append():\n    # Examples taken from numpy docs\n    xs = [[1, 2, 3], [[4, 5, 6], [7, 8, 9]]]\n    numpy_result = numpy.append(*xs)\n    modin_result = np.append(*[np.array(x) for x in xs])\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n    numpy_result = numpy.append([[1, 2, 3], [4, 5, 6]], [[7, 8, 9]], axis=0)\n    modin_result = np.append(np.array([[1, 2, 3], [4, 5, 6]]), [[7, 8, 9]], axis=0)\n    assert_scalar_or_array_equal(modin_result, numpy_result)\n\n\n@pytest.mark.xfail(reason=\"append error checking is incorrect: see GH#5896\")\ndef test_append_error():\n    with pytest.raises(ValueError):\n        np.append(np.array([[1, 2, 3], [4, 5, 6]]), np.array([7, 8, 9]), axis=0)\n"
  },
  {
    "path": "modin/tests/numpy/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy\n\nimport modin.numpy as np\n\n\ndef assert_scalar_or_array_equal(x1, x2, err_msg=\"\"):\n    \"\"\"\n    Assert whether the result of the numpy and modin computations are the same.\n\n    If either argument is a modin array object, then `_to_numpy()` is called on it.\n    The arguments are compared with `numpy.testing.assert_array_equals`.\n    \"\"\"\n    if isinstance(x1, np.array):\n        x1 = x1._to_numpy()\n    if isinstance(x2, np.array):\n        x2 = x2._to_numpy()\n    numpy.testing.assert_array_equal(x1, x2, err_msg=err_msg)\n"
  },
  {
    "path": "modin/tests/pandas/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/pandas/conftest.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pytest\n\nfrom modin.config import Engine, StorageFormat\n\n\ndef pytest_collection_modifyitems(items):\n    try:\n        if (\n            Engine.get() in (\"Ray\", \"Unidist\", \"Dask\", \"Python\")\n            and StorageFormat.get() != \"Base\"\n        ):\n            for item in items:\n                if item.name in (\n                    \"test_dataframe_dt_index[3s-both-DateCol-_NoDefault.no_default]\",\n                    \"test_dataframe_dt_index[3s-right-DateCol-_NoDefault.no_default]\",\n                ):\n                    item.add_marker(\n                        pytest.mark.xfail(\n                            reason=\"https://github.com/modin-project/modin/issues/6399\"\n                        )\n                    )\n    except ImportError:\n        # No engine\n        ...\n"
  },
  {
    "path": "modin/tests/pandas/data/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/pandas/data/blah.csv",
    "content": ",Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Presidents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Subcontinents,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes,Themes\n,Pure mentions,Pure mentions,Pure mentions,Pure tags,Pure tags,Pure tags,Mentions + Tags,Mentions + Tags,Mentions + Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,Subcontinent Tags,\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",\"Subcontintents, No POTUS\",Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,Theme Tags,\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\",\"Themes, No POTUS\"\n,IND,DEP,DEP,IND,DEP,DEP,IND,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,IND,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP,DEP\n,all,obama_mention,trump_mention,pr_tags,obama_tag,trump_tag,all,obama_m+t,trump_m+t,pr_tags,Caribbean,Southern Asia,Middle Africa,Northern Europe,Southern Europe,Western Asia,South America,Polynesia,Antarctica,Eastern Africa,Australia and New Zealand,Western Europe,Western Africa,Eastern Europe,Central America,Northern America,South-eastern Asia,Southern Africa,Eastern Asia,Northern Africa,Melanesia,Micronesia,Central Asia,pr_tags_np,Caribbean_np,Southern Asia_np,Middle Africa_np,Northern Europe_np,Southern Europe_np,Western Asia_np,South America_np,Polynesia_np,Antarctica_np,Eastern Africa_np,Australia and New Zealand_np,Western Europe_np,Western Africa_np,Eastern Europe_np,Central America_np,Northern America_np,South-eastern Asia_np,Southern Africa_np,Eastern Asia_np,Northern Africa_np,Melanesia_np,Micronesia_np,Central Asia_np,pr_themes,Top/News,Top/News/Sports,Top/Features/Travel/Guides/Activities and Interests/Golf,Top/News/Sports/Golf,Top/News/Business,Top/Opinion/Opinion,Top/Opinion,Top/News/Education,Top/Classifieds/Job Market/Job Categories/Education,Top/Features/Travel/Guides/Destinations/North America/United States/New York/New York City,Top/News/U.S./Mid-Atlantic,Top/Opinion/Opinion/Op-Ed,Top/Features/Travel/Guides/Destinations/North America/United States,Top/Features/Travel/Guides/Destinations/North America,Top/News/Technology,Top/News/U.S.,Top/News/New York and Region,\"Top/News/U.S./U.S. States, Territories and Possessions/New York\",Top/Features/Travel/Guides/Destinations/North America/United States/California,Top/Features/Books,\"Top/News/U.S./U.S. States, Territories and Possessions/California\",Top/Opinion/Opinion/Editorials,Top/Features/Travel/Guides/Activities and Interests/Family,Top/Opinion/Opinion/Op-Ed/Contributors,Top/Features/Travel/Guides/Destinations/Europe,Top/Features/Movies/News and Features,Top/Features/Arts/Music,Top/Features/Travel/Guides/Activities and Interests/Music,Top/Features/Arts,Top/Classifieds/Paid Death Notices,Top/Features/Movies,Top/Features/Travel/Guides/Destinations/Asia/China,\"Top/Classifieds/Job Market/Job Categories/Marketing, Advertising and PR\",Top/Features/Travel/Guides/Destinations/Asia,\"Top/News/U.S./U.S. States, Territories and Possessions/Arizona\",Top/Features/Travel/Guides/Destinations/North America/United States/Arizona,Top/News/U.S./Rockies,Top/Features/Travel/Guides/Destinations/North America/United States/New Jersey,Top/Features/Books/Book Reviews,Top/Features/Travel/Guides/Destinations/Asia/Pakistan,Top/News/World/Asia Pacific,Top/News/World/Countries and Territories/Pakistan,Top/News/World,Top/News/World/Countries and Territories/Afghanistan,Top/Features/Travel/Guides/Destinations/North America/United States/South Carolina,Top/Features/Travel/Guides/Destinations/Middle East/Israel,Top/Features/Travel/Guides/Destinations/Middle East,Top/News/World/Middle East,Top/Features/Travel/Guides/Destinations/Middle East/Iran,Top/News/World/Countries and Territories/Israel,Top/Features/Travel/Guides/Destinations/North America/United States/Colorado,Top/News/New York and Region/New Jersey,Top/Features/Travel/Guides/Destinations/Central and South America,Top/Features/Travel/Guides/Destinations/Central and South America/Colombia,Top/Features/Travel/Guides/Destinations/Africa/Kenya,Top/Features/Travel/Guides/Activities and Interests/Food and Wine,\"Top/News/U.S./U.S. States, Territories and Possessions/Massachusetts\",Top/News/Sports/Pro Football/National Football League/Washington Redskins,education and schools,teachers and school employees,privacy,politics and government,law and legislation,tests and testing,computers and the internet,finances,abortion,no index terms from nytimes,privatization,books and literature,motion pictures,united states politics and government,christians and christianity,religion and churches,advertising and marketing,budgets and budgeting,elections,medicine and health,presidents and presidency (us),presidential elections (us),minorities (us),recordings (audio),handicapped,homosexuality,labor,suits and litigation,colleges and universities,recordings (video),blacks,public opinion,primaries,lobbying and lobbyists,hispanic-americans,\"armament, defense and military forces\",appointments and executive changes,copyrights,philanthropy,mathematics,recession and depression,reading and writing skills,writing and writers,ratings and rating systems,jews,language and languages,television,computer software,police,taxation,governors (us),oil (petroleum) and gasoline,news and news media,global warming,environment,islam,presidential election of 1988,drug abuse and traffic,marijuana,women,church-state relations,editorials,gun control,election issues,immigration and refugees,sex,\"awards, decorations and honors\",terrorism,nazi policies toward jews and minorities,weather,electronic mail,quotation of the day,decisions and verdicts,equal educational opportunities,libraries and librarians,advertising,baseball,illegal aliens,media,crime and criminals,roads and traffic,automobiles,ethics,art,property taxes,speech,freedom of speech and expression,political advertising,reviews,sex crimes,prostitution,insurance,hurricanes and tropical storms,hurricane katrina,floods,election results,strikes,united states armament and defense,basketball,horse racing,united states international relations,international relations,firearms,health insurance and managed care,health insurance,discrimination,music,airlines and airplanes,drugs (pharmaceuticals),diseases and conditions,banks and banking,college athletics,football,impeachment,frauds and swindling,new year,correction stories,trees and shrubs,home repairs,olympic games,apparel,home furnishings,earthquakes,home repairs and improvements,world trade center (nyc),fish and other marine life,office buildings and commercial properties,noise,legislatures and parliaments,tuition,presidential election of 2004,mayors,soccer,restaurants,unemployment,biographical information,radio,\"conventions, national (us)\",computer and video games,presidential election of 2008,super bowl,demonstrations and riots,marriages,deaths (obituaries),accidents and safety,standards and standardization,referendums,exercise,children and youth,murders and attempted murders,international trade and world market,wages and salaries,coaches and managers,archaeology and anthropology,palestinians,birth control and family planning,economic conditions and trends,united states economy,telephones and telecommunications,restoration and rehabilitation,dairy products,animals,sales,\"prices (fares, fees and rates)\",energy and power,atomic weapons,holidays and special occasions,medicaid,medicare,christmas,\"war crimes, genocide and crimes against humanity\",presidential election of 2000,fires and firefighters,fires and firemen,air pollution,robberies and thefts,conventions and conferences,food,diet and nutrition,stocks and bonds,electric light and power,light,blackouts and brownouts (electrical),theater,hijacking,pentagon building,\"suspensions, dismissals and resignations\",scholarships and fellowships,newspapers,travel and vacations,building (construction),games,torture,cellular telephones,sentences (criminal),bridges and tunnels,affirmative action,credit,birds,space,postal service,pornography and obscenity,steroids,embargoes and economic sanctions,smoking and tobacco,social security (us),child care,inventions and patents,vaccination and immunization,prisons and prisoners,retirement,currency,transit systems,subways,snow and snowstorms,housing,priests,company reports,corporations,layoffs and job reductions,magazines,aged,viruses,biological and chemical warfare,opera,parades,states (us),constitutional amendments,cancer,pensions and retirement plans,child abuse and neglect,government employees,culture,blacks (in us),radiation,documentary films and programs,retail stores and trade,spanish language,\"mergers, acquisitions and divestitures\",small business,poetry and poets,rock music,identification devices,space shuttle,atomic energy,interest rates,police brutality and misconduct,science and technology,running,marathon running,research,weight,homeless persons,cocaine and crack cocaine,suicides and suicide attempts,bicycles and bicycling,buses,pregnancy and obstetrics,contests and prizes,vetoes (us),jewels and jewelry,academy awards (oscars),parties (social),festivals,\"divorce, separations and annulments\",gas (fuel),photography,comedy and humor,world series,hotels and motels,serial murders,textiles,gambling,cooking and cookbooks,recipes,beverages,tennis,shoes and boots,dogs,\"hockey, ice\",extradition,boxing,\"indians, american\",violence,chemicals,sports of the times (times column),arson,vietnam war,boycotts,toys,cruises,ships and shipping,trade shows and fairs,mental health and disorders,wines,alcoholic beverages,dancing,golf,auctions,mutual funds,swimming,historic buildings and sites,weddings and engagements,freedom and human rights,athletics and sports,draft and recruitment (sports),hospitals,genetics and heredity,foreign aid,anthrax,acquired immune deficiency syndrome,insects,consumer protection,mines and mining,blood,doctors,nursing and nurses,airports,water,death and dying,dna (deoxyribonucleic acid),third world and developing countries,food contamination and poisoning,agriculture,livestock,acquired immune deficiency syndrome (aids),no index terms,regulation and deregulation of industry,taxicabs and taxicab drivers,meat,babies,shortages,nasdaq composite index,government bonds,security and warning systems,grain,transplants,freedom of the press,metals and minerals,computer security,bombs and explosives,population,mortgages,customs (tariff),farmers,automobile racing,biology and biochemistry,anatomy and physiology,production,factories and industrial plants,track and field,summer games (olympics),foreign investments,stadiums and arenas,foreign service,ncaa basketball tournament,waste materials and disposal,hunting and trapping,deportation,casinos,world cup (soccer),reproduction (biological),surgery and surgeons,kidnapping,heart,alcohol abuse,domestic violence,capital punishment,hostages,world war ii (1939-45),war and revolution,civil war and guerrilla warfare,jury system,entertainment and amusements,child abuse,sexual harassment,war crimes and criminals,censorship,railroads,asylum (political),legal profession,courts,political prisoners,prisoners of war,men,dow jones stock average,probation and parole,drunken and reckless driving,organized crime,futures and options trading,securities and commodities violations,assaults,physics,bribery,debating,recalls and bans of products,credit and money cards,drought,fines (penalties),perjury,bars,trades (sports),skiing,attacks on police,race,bankruptcies,bridge (card game),renting and leasing,condominiums,commuting,stations and terminals (passenger),shutdowns (institutional),beaches,families and family life,brain,book trade,futures trading,federal taxes (us),tax credits,assassinations and attempted assassinations,recycling of waste materials,automobile insurance and liability,delays (transportation),transportation,flowers and plants,steel and iron,chemistry,anti-semitism,soft drinks,consumer behavior,parks and other recreation areas,leisure,gardens and gardening,figure skating,ice skating,\"fishing, sport\",executives and management,coups d'etat and attempted coups d'etat,supermarkets,rescues,accounting and accountants,rain,judges,water pollution,satellites,trucks and trucking,playoff games,\"fishing, commercial\",antitrust actions and laws,royal family,personal finances,parking,utility vehicles and other light trucks,racketeering and racketeers,interscholastic athletics,chess,jazz,breast,explosions,foster care,classical music,intelligence,navies,architecture,organized labor,bakeries and baked products,espionage,local government,interior design,guards,computer chips,boats and boating,forests and forestry,zoning,hiring and promotion,area planning and renewal,marketing and merchandising,embezzlement,boards of directors,smuggling,land use policies,monuments and memorials,endangered and extinct species,\"age, chronological\",welfare (us),child custody and support,wiretapping and other eavesdropping devices and methods,ferries,history,collectors and collections,missiles and missile defense systems,arms control and limitation and disarmament,constitutions,shopping centers,tax evasion,design,free agents (sports),genetic engineering,pilots,military aircraft,liability for products,real estate,veterans,antiques,missiles,furniture,drug traffic,discount selling,savings,fruit,deaths,united states open (tennis),english language,records and achievements,united states foreign service,recording equipment,nightclubs and cabarets,beer,brokers and brokerage firms,buildings (structures),\"arbitration, conciliation and mediation\",hazardous and toxic substances,defense contracts,arms sales abroad,military personnel,missing persons,concerts and recitals,drug addiction and abuse,geographic profiles,geography,fast food industry,labeling and labels,military bases and installations,vice presidents and vice presidency (us),volunteers,layoffs (labor),income,gifts,treaties,shootings,city councils,social conditions and trends,urban areas,shows (exhibits),data processing (computers),stock prices and trading volume,office buildings,immigration and emigration,kurds,income tax,software products,personal computers,summit conferences,prices,bombs and bomb plots,racial relations,search and seizure,museums,\"health, personal\",contracts,industry profiles,refugees and expatriates,trials,disclosure of information,unemployment and job market,suburbs,special sections,reform and reorganization,cooperatives,federal aid (us),relocation of business,cable television,attorneys general,electronics,book reviews,\"names, organizational\",\"minorities (ethnic, racial, religious)\",\"new models, design and products\",terms not available,account changes,surveys and series,military action,whitewater case,company and organization profiles,savings and loan associations,art shows,independence movements,life styles,suits and claims against government,presidential election of 1996,forecasts,threats and threatening messages,persian gulf war,pr_themes_np,Top/News_np,Top/News/Sports_np,Top/Features/Travel/Guides/Activities and Interests/Golf_np,Top/News/Sports/Golf_np,Top/News/Business_np,Top/Opinion/Opinion_np,Top/Opinion_np,Top/News/Education_np,Top/Classifieds/Job Market/Job Categories/Education_np,Top/Features/Travel/Guides/Destinations/North America/United States/New York/New York City_np,Top/News/U.S./Mid-Atlantic_np,Top/Opinion/Opinion/Op-Ed_np,Top/Features/Travel/Guides/Destinations/North America/United States_np,Top/Features/Travel/Guides/Destinations/North America_np,Top/News/Technology_np,Top/News/U.S._np,Top/News/New York and Region_np,\"Top/News/U.S./U.S. States, Territories and Possessions/New York_np\",Top/Features/Travel/Guides/Destinations/North America/United States/California_np,Top/Features/Books_np,\"Top/News/U.S./U.S. States, Territories and Possessions/California_np\",Top/Opinion/Opinion/Editorials_np,Top/Features/Travel/Guides/Activities and Interests/Family_np,Top/Opinion/Opinion/Op-Ed/Contributors_np,Top/Features/Travel/Guides/Destinations/Europe_np,Top/Features/Movies/News and Features_np,Top/Features/Arts/Music_np,Top/Features/Travel/Guides/Activities and Interests/Music_np,Top/Features/Arts_np,Top/Classifieds/Paid Death Notices_np,Top/Features/Movies_np,Top/Features/Travel/Guides/Destinations/Asia/China_np,\"Top/Classifieds/Job Market/Job Categories/Marketing, Advertising and PR_np\",Top/Features/Travel/Guides/Destinations/Asia_np,\"Top/News/U.S./U.S. States, Territories and Possessions/Arizona_np\",Top/Features/Travel/Guides/Destinations/North America/United States/Arizona_np,Top/News/U.S./Rockies_np,Top/Features/Travel/Guides/Destinations/North America/United States/New Jersey_np,Top/Features/Books/Book Reviews_np,Top/Features/Travel/Guides/Destinations/Asia/Pakistan_np,Top/News/World/Asia Pacific_np,Top/News/World/Countries and Territories/Pakistan_np,Top/News/World_np,Top/News/World/Countries and Territories/Afghanistan_np,Top/Features/Travel/Guides/Destinations/North America/United States/South Carolina_np,Top/Features/Travel/Guides/Destinations/Middle East/Israel_np,Top/Features/Travel/Guides/Destinations/Middle East_np,Top/News/World/Middle East_np,Top/Features/Travel/Guides/Destinations/Middle East/Iran_np,Top/News/World/Countries and Territories/Israel_np,Top/Features/Travel/Guides/Destinations/North America/United States/Colorado_np,Top/News/New York and Region/New Jersey_np,Top/Features/Travel/Guides/Destinations/Central and South America_np,Top/Features/Travel/Guides/Destinations/Central and South America/Colombia_np,Top/Features/Travel/Guides/Destinations/Africa/Kenya_np,Top/Features/Travel/Guides/Activities and Interests/Food and Wine_np,\"Top/News/U.S./U.S. States, Territories and Possessions/Massachusetts_np\",Top/News/Sports/Pro Football/National Football League/Washington Redskins_np,education and schools_np,teachers and school employees_np,privacy_np,politics and government_np,law and legislation_np,tests and testing_np,computers and the internet_np,finances_np,abortion_np,no index terms from nytimes_np,privatization_np,books and literature_np,motion pictures_np,united states politics and government_np,christians and christianity_np,religion and churches_np,advertising and marketing_np,budgets and budgeting_np,elections_np,medicine and health_np,presidents and presidency (us)_np,presidential elections (us)_np,minorities (us)_np,recordings (audio)_np,handicapped_np,homosexuality_np,labor_np,suits and litigation_np,colleges and universities_np,recordings (video)_np,blacks_np,public opinion_np,primaries_np,lobbying and lobbyists_np,hispanic-americans_np,\"armament, defense and military forces_np\",appointments and executive changes_np,copyrights_np,philanthropy_np,mathematics_np,recession and depression_np,reading and writing skills_np,writing and writers_np,ratings and rating systems_np,jews_np,language and languages_np,television_np,computer software_np,police_np,taxation_np,governors (us)_np,oil (petroleum) and gasoline_np,news and news media_np,global warming_np,environment_np,islam_np,presidential election of 1988_np,drug abuse and traffic_np,marijuana_np,women_np,church-state relations_np,editorials_np,gun control_np,election issues_np,immigration and refugees_np,sex_np,\"awards, decorations and honors_np\",terrorism_np,nazi policies toward jews and minorities_np,weather_np,electronic mail_np,quotation of the day_np,decisions and verdicts_np,equal educational opportunities_np,libraries and librarians_np,advertising_np,baseball_np,illegal aliens_np,media_np,crime and criminals_np,roads and traffic_np,automobiles_np,ethics_np,art_np,property taxes_np,speech_np,freedom of speech and expression_np,political advertising_np,reviews_np,sex crimes_np,prostitution_np,insurance_np,hurricanes and tropical storms_np,hurricane katrina_np,floods_np,election results_np,strikes_np,united states armament and defense_np,basketball_np,horse racing_np,united states international relations_np,international relations_np,firearms_np,health insurance and managed care_np,health insurance_np,discrimination_np,music_np,airlines and airplanes_np,drugs (pharmaceuticals)_np,diseases and conditions_np,banks and banking_np,college athletics_np,football_np,impeachment_np,frauds and swindling_np,new year_np,correction stories_np,trees and shrubs_np,home repairs_np,olympic games_np,apparel_np,home furnishings_np,earthquakes_np,home repairs and improvements_np,world trade center (nyc)_np,fish and other marine life_np,office buildings and commercial properties_np,noise_np,legislatures and parliaments_np,tuition_np,presidential election of 2004_np,mayors_np,soccer_np,restaurants_np,unemployment_np,biographical information_np,radio_np,\"conventions, national (us)_np\",computer and video games_np,presidential election of 2008_np,super bowl_np,demonstrations and riots_np,marriages_np,deaths (obituaries)_np,accidents and safety_np,standards and standardization_np,referendums_np,exercise_np,children and youth_np,murders and attempted murders_np,international trade and world market_np,wages and salaries_np,coaches and managers_np,archaeology and anthropology_np,palestinians_np,birth control and family planning_np,economic conditions and trends_np,united states economy_np,telephones and telecommunications_np,restoration and rehabilitation_np,dairy products_np,animals_np,sales_np,\"prices (fares, fees and rates)_np\",energy and power_np,atomic weapons_np,holidays and special occasions_np,medicaid_np,medicare_np,christmas_np,\"war crimes, genocide and crimes against humanity_np\",presidential election of 2000_np,fires and firefighters_np,fires and firemen_np,air pollution_np,robberies and thefts_np,conventions and conferences_np,food_np,diet and nutrition_np,stocks and bonds_np,electric light and power_np,light_np,blackouts and brownouts (electrical)_np,theater_np,hijacking_np,pentagon building_np,\"suspensions, dismissals and resignations_np\",scholarships and fellowships_np,newspapers_np,travel and vacations_np,building (construction)_np,games_np,torture_np,cellular telephones_np,sentences (criminal)_np,bridges and tunnels_np,affirmative action_np,credit_np,birds_np,space_np,postal service_np,pornography and obscenity_np,steroids_np,embargoes and economic sanctions_np,smoking and tobacco_np,social security (us)_np,child care_np,inventions and patents_np,vaccination and immunization_np,prisons and prisoners_np,retirement_np,currency_np,transit systems_np,subways_np,snow and snowstorms_np,housing_np,priests_np,company reports_np,corporations_np,layoffs and job reductions_np,magazines_np,aged_np,viruses_np,biological and chemical warfare_np,opera_np,parades_np,states (us)_np,constitutional amendments_np,cancer_np,pensions and retirement plans_np,child abuse and neglect_np,government employees_np,culture_np,blacks (in us)_np,radiation_np,documentary films and programs_np,retail stores and trade_np,spanish language_np,\"mergers, acquisitions and divestitures_np\",small business_np,poetry and poets_np,rock music_np,identification devices_np,space shuttle_np,atomic energy_np,interest rates_np,police brutality and misconduct_np,science and technology_np,running_np,marathon running_np,research_np,weight_np,homeless persons_np,cocaine and crack cocaine_np,suicides and suicide attempts_np,bicycles and bicycling_np,buses_np,pregnancy and obstetrics_np,contests and prizes_np,vetoes (us)_np,jewels and jewelry_np,academy awards (oscars)_np,parties (social)_np,festivals_np,\"divorce, separations and annulments_np\",gas (fuel)_np,photography_np,comedy and humor_np,world series_np,hotels and motels_np,serial murders_np,textiles_np,gambling_np,cooking and cookbooks_np,recipes_np,beverages_np,tennis_np,shoes and boots_np,dogs_np,\"hockey, ice_np\",extradition_np,boxing_np,\"indians, american_np\",violence_np,chemicals_np,sports of the times (times column)_np,arson_np,vietnam war_np,boycotts_np,toys_np,cruises_np,ships and shipping_np,trade shows and fairs_np,mental health and disorders_np,wines_np,alcoholic beverages_np,dancing_np,golf_np,auctions_np,mutual funds_np,swimming_np,historic buildings and sites_np,weddings and engagements_np,freedom and human rights_np,athletics and sports_np,draft and recruitment (sports)_np,hospitals_np,genetics and heredity_np,foreign aid_np,anthrax_np,acquired immune deficiency syndrome_np,insects_np,consumer protection_np,mines and mining_np,blood_np,doctors_np,nursing and nurses_np,airports_np,water_np,death and dying_np,dna (deoxyribonucleic acid)_np,third world and developing countries_np,food contamination and poisoning_np,agriculture_np,livestock_np,acquired immune deficiency syndrome (aids)_np,no index terms_np,regulation and deregulation of industry_np,taxicabs and taxicab drivers_np,meat_np,babies_np,shortages_np,nasdaq composite index_np,government bonds_np,security and warning systems_np,grain_np,transplants_np,freedom of the press_np,metals and minerals_np,computer security_np,bombs and explosives_np,population_np,mortgages_np,customs (tariff)_np,farmers_np,automobile racing_np,biology and biochemistry_np,anatomy and physiology_np,production_np,factories and industrial plants_np,track and field_np,summer games (olympics)_np,foreign investments_np,stadiums and arenas_np,foreign service_np,ncaa basketball tournament_np,waste materials and disposal_np,hunting and trapping_np,deportation_np,casinos_np,world cup (soccer)_np,reproduction (biological)_np,surgery and surgeons_np,kidnapping_np,heart_np,alcohol abuse_np,domestic violence_np,capital punishment_np,hostages_np,world war ii (1939-45)_np,war and revolution_np,civil war and guerrilla warfare_np,jury system_np,entertainment and amusements_np,child abuse_np,sexual harassment_np,war crimes and criminals_np,censorship_np,railroads_np,asylum (political)_np,legal profession_np,courts_np,political prisoners_np,prisoners of war_np,men_np,dow jones stock average_np,probation and parole_np,drunken and reckless driving_np,organized crime_np,futures and options trading_np,securities and commodities violations_np,assaults_np,physics_np,bribery_np,debating_np,recalls and bans of products_np,credit and money cards_np,drought_np,fines (penalties)_np,perjury_np,bars_np,trades (sports)_np,skiing_np,attacks on police_np,race_np,bankruptcies_np,bridge (card game)_np,renting and leasing_np,condominiums_np,commuting_np,stations and terminals (passenger)_np,shutdowns (institutional)_np,beaches_np,families and family life_np,brain_np,book trade_np,futures trading_np,federal taxes (us)_np,tax credits_np,assassinations and attempted assassinations_np,recycling of waste materials_np,automobile insurance and liability_np,delays (transportation)_np,transportation_np,flowers and plants_np,steel and iron_np,chemistry_np,anti-semitism_np,soft drinks_np,consumer behavior_np,parks and other recreation areas_np,leisure_np,gardens and gardening_np,figure skating_np,ice skating_np,\"fishing, sport_np\",executives and management_np,coups d'etat and attempted coups d'etat_np,supermarkets_np,rescues_np,accounting and accountants_np,rain_np,judges_np,water pollution_np,satellites_np,trucks and trucking_np,playoff games_np,\"fishing, commercial_np\",antitrust actions and laws_np,royal family_np,personal finances_np,parking_np,utility vehicles and other light trucks_np,racketeering and racketeers_np,interscholastic athletics_np,chess_np,jazz_np,breast_np,explosions_np,foster care_np,classical music_np,intelligence_np,navies_np,architecture_np,organized labor_np,bakeries and baked products_np,espionage_np,local government_np,interior design_np,guards_np,computer chips_np,boats and boating_np,forests and forestry_np,zoning_np,hiring and promotion_np,area planning and renewal_np,marketing and merchandising_np,embezzlement_np,boards of directors_np,smuggling_np,land use policies_np,monuments and memorials_np,endangered and extinct species_np,\"age, chronological_np\",welfare (us)_np,child custody and support_np,wiretapping and other eavesdropping devices and methods_np,ferries_np,history_np,collectors and collections_np,missiles and missile defense systems_np,arms control and limitation and disarmament_np,constitutions_np,shopping centers_np,tax evasion_np,design_np,free agents (sports)_np,genetic engineering_np,pilots_np,military aircraft_np,liability for products_np,real estate_np,veterans_np,antiques_np,missiles_np,furniture_np,drug traffic_np,discount selling_np,savings_np,fruit_np,deaths_np,united states open (tennis)_np,english language_np,records and achievements_np,united states foreign service_np,recording equipment_np,nightclubs and cabarets_np,beer_np,brokers and brokerage firms_np,buildings (structures)_np,\"arbitration, conciliation and mediation_np\",hazardous and toxic substances_np,defense contracts_np,arms sales abroad_np,military personnel_np,missing persons_np,concerts and recitals_np,drug addiction and abuse_np,geographic profiles_np,geography_np,fast food industry_np,labeling and labels_np,military bases and installations_np,vice presidents and vice presidency (us)_np,volunteers_np,layoffs (labor)_np,income_np,gifts_np,treaties_np,shootings_np,city councils_np,social conditions and trends_np,urban areas_np,shows (exhibits)_np,data processing (computers)_np,stock prices and trading volume_np,office buildings_np,immigration and emigration_np,kurds_np,income tax_np,software products_np,personal computers_np,summit conferences_np,prices_np,bombs and bomb plots_np,racial relations_np,search and seizure_np,museums_np,\"health, personal_np\",contracts_np,industry profiles_np,refugees and expatriates_np,trials_np,disclosure of information_np,unemployment and job market_np,suburbs_np,special sections_np,reform and reorganization_np,cooperatives_np,federal aid (us)_np,relocation of business_np,cable television_np,attorneys general_np,electronics_np,book reviews_np,\"names, organizational_np\",\"minorities (ethnic, racial, religious)_np\",\"new models, design and products_np\",terms not available_np,account changes_np,surveys and series_np,military action_np,whitewater case_np,company and organization profiles_np,savings and loan associations_np,art shows_np,independence movements_np,life styles_np,suits and claims against government_np,presidential election of 1996_np,forecasts_np,threats and threatening messages_np,persian gulf war_np\n1,blah,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n"
  },
  {
    "path": "modin/tests/pandas/data/issue_1930.csv",
    "content": ",col1,col2,col3,col4,col5\n0,0,4,8,12,0\n1,1,5,9,13,0\n2,2,6,10,14,0\n3,3,7,11,15,0\n"
  },
  {
    "path": "modin/tests/pandas/data/issue_2074.csv",
    "content": "one,two, three, five, six, seven, eight\nthree,three, five, six, seven, eight, nine\none,four, three, five, six, seven, eight\none,two, three, five, six, seven, eight\none,two, three, five, six, seven, eight\none,two, three, five, six, seven, eight\nthree,four, five, six, seven, eight, nine\none,two, three, five, six, seven, eight\nthree,four, five, six, seven, eight, nine\nthree,four, five, six, seven, eight, nine\nthree,four, five, six, seven, eight, nine\nthree,four, five, six, seven, eight, nine\n"
  },
  {
    "path": "modin/tests/pandas/data/issue_2239.csv",
    "content": "1585542839.000000, 1585542839.000000, 1585542839.000000\n32.000000, 32.000000, 32.000000\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-13,51\n-38,-14,51\n-38,-14,50\n-38,-13,51\n-38,-14,50\n-38,-14,51\n-38,-13,51\n"
  },
  {
    "path": "modin/tests/pandas/data/issue_3119.csv",
    "content": ",a,b,c\ni1,0,1,2\ni2,3,4,5\ni3,6,7,8\ni4,9,10,11\n"
  },
  {
    "path": "modin/tests/pandas/data/issue_4543.csv",
    "content": "str_data,float_data,country\nfanta,3.14,usa\ncocacola,9.8,france\nsprite,89.2,china\n"
  },
  {
    "path": "modin/tests/pandas/data/issue_976.csv",
    "content": "1;11800000560005;11800000560005;  ;;-;. ;. i; ; ;105.6000\n1;10200007400477;10200007400477;  ;;-;. ;;³ ; ;696.6400\n1;11100008540930;11100008540930;  ;2;9;. ;.; ; ;124.4800\n1;12300000051493;12300000051493;  ;;50;. ;.;'- ; ;-0.4700\n1;12300000117460;12300000117460; ³ ;;60;. ;;'- ; ;221.0400\n"
  },
  {
    "path": "modin/tests/pandas/data/multiple_csv/test_data0.csv",
    "content": "a,b,c\n0,True,x\n1,False,y\n2,True,z\n3,False,w\n"
  },
  {
    "path": "modin/tests/pandas/data/multiple_csv/test_data1.csv",
    "content": "a,b,c\n4,True,m\n5,False,n\n6,True,t\n7,True,l\n"
  },
  {
    "path": "modin/tests/pandas/data/newlines.csv",
    "content": "col1,col2,col3,col4\n\"This is a very long\nstring with several\nnewline characters\nthat will probably cause some\nproblem for Modin\nand I suspect that\nwe\nwill hopefully\nreproduce the issue\",2,3,4\n\"H\",2,3,4\n\"I\",2,3,4\n\"J\",2,3,4\n\"And there is another\nstring with several\nnewline characters\nthat will probably cause some\nproblem for Modin\nand I suspect that\nwe\nwill hopefully\nreproduce the issue\",2,3,4\n\"I\",2,3,4\n\"J\",2,3,4\n\"H\",2,3,4\n\"I\",2,3,4\n\"J\",2,3,4\n\"H\",2,3,4\n\"I\",2,3,4\n\"And there is another\nstring with several\nnewline characters\nthat will probably cause some\nproblem for Modin\nand I suspect that\nwe\nwill hopefully\nreproduce the issue\",2,\"And\nthere is another\nstring with several\nnewline characters\nthat will probably cause some\nproblem for Modin\nand I suspect that\nwe\nwill hopefully\nreproduce the issue\",4\n\"I\",2,3,4\n\"J\",2,3,4\n\"H\",2,3,4\n\"I\",2,3,4\n\"J\",2,3,4\n\"H\",2,3,4\n\"I\",2,3,4\n\"And there is another\nstring with several\nnewline characters\nthat will probably cause some\nproblem for Modin\nand I suspect that\nwe\nwill hopefully\nreproduce the issue\",2,3,4\n"
  },
  {
    "path": "modin/tests/pandas/data/test_categories.csv",
    "content": "111,AAA\n222,BBB\n333,CCC\n"
  },
  {
    "path": "modin/tests/pandas/data/test_categories.json",
    "content": "{\"one\":{\"0\":111,\"1\":222,\"2\":333},\"two\":{\"0\":\"AAA\",\"1\":\"BBB\",\"2\":\"CCC\"}}"
  },
  {
    "path": "modin/tests/pandas/data/test_data.fwf",
    "content": "ACW000116041961TAVG -142  k  183  k  419  k  720  k 1075  k 1546  k 1517  k 1428  k 1360  k 1121  k  457  k  -92  k\nACW000116041962TAVG   60  k   32  k -207  k  582  k  855  k 1328  k 1457  k 1340  k 1110  k  941  k  270  k -179  k\nACW000116041963TAVG -766  k -606  k -152  k  488  k 1171  k 1574  k 1567  k 1543  k 1279  k  887  k  513  k -161  k\nACW000116041964TAVG    9  k -138  k    2  k  685  k 1166  k 1389  k 1453  k 1504  k 1168  k  735  k  493  k   59  k\nACW000116041965TAVG   -9  k -158  k  -15  k  537  k  934  k 1447  k 1434  k 1424  k 1324  k  921  k  -22  k -231  k\nACW000116041966TAVG -490  k -614  k  108  k  246  k 1082  k 1642  k 1620  k 1471  k 1195  k  803  k  329  k    2  k\nACW000116041967TAVG -270  k   36  k  397  k  481  k 1052  k 1373  k 1655  k 1598  k 1318  k  997  k  559  k  -96  k\nACW000116041968TAVG -306  k -183  k  220  k  714  k  935  k 1635  k 1572  k 1718  k 1331  k  781  k  180  k  -56  k\nACW000116041969TAVG -134  k -494  k -185  k  497  k  962  k 1634  k 1687  k 1773  k 1379  k  932  k  321  k -275  k\nACW000116041970TAVG -483  k -704  k  -75  k  261  k 1093  k 1724  k 1470  k 1609  k 1163  k  836  k  300  k   73  k\nACW000116041971TAVG   -6  k   83  k  -40  k  472  k 1180  k 1411  k 1700  k 1600  k 1165  k  908  k  361  k  383  k\nACW000116041972TAVG -377  k   -4  k  250  k  556  k 1117  k 1444  k 1778  k 1545  k 1073  k  797  k  481  k  404  k\nACW000116041973TAVG   61  k  169  k  453  k  472  k 1075  k 1545  k 1866  k 1579  k 1199  k  563  k  154  k   11  k\nACW000116041974TAVG  191  k  209  k  339  k  748  k 1094  k 1463  k 1498  k 1541  k 1319  k  585  k  428  k  335  k\nACW000116041975TAVG  346  k   88  k  198  k  488  k 1165  k 1483  k 1756  k 1906  k 1374  k  845  k  406  k  387  k\nACW000116041976TAVG -163  k  -62  k -135  k  502  k 1128  k 1461  k 1822  k 1759  k 1136  k  715  k  458  k -205  k\nACW000116041977TAVG -192  k -279  k  234  k  332  k 1128  k 1566  k 1565  k 1556  k 1126  k  949  k  421  k  162  k\nACW000116041978TAVG   55  k -354  k   66  k  493  k 1155  k 1552  k 1564  k 1555  k 1061  k  932  k  688  k -464  k\nACW000116041979TAVG -618  k -632  k   35  k  474  k  993  k 1566  k 1484  k 1483  k 1229  k  647  k  412  k  -40  k\nACW000116041980TAVG -340  k -500  k  -35  k  524  k 1071  k 1534  k 1655  k 1502  k 1269  k  660  k  138  k  125  k"
  },
  {
    "path": "modin/tests/pandas/data/test_data.json",
    "content": "{\"Duration\":60,\"Pulse\":110,\"Maxpulse\":130,\"Calories\":409}\n{\"Duration\":60,\"Pulse\":117,\"Maxpulse\":145,\"Calories\":479}\n{\"Duration\":60,\"Pulse\":103,\"Maxpulse\":135,\"Calories\":340}\n{\"Duration\":45,\"Pulse\":109,\"Maxpulse\":175,\"Calories\":282}\n{\"Duration\":45,\"Pulse\":117,\"Maxpulse\":148,\"Calories\":406}\n{\"Duration\":60,\"Pulse\":102,\"Maxpulse\":127,\"Calories\":300}\n"
  },
  {
    "path": "modin/tests/pandas/data/test_delim.csv",
    "content": "a|b|c|d|e\n1|2|3|4|5\n2|3|4|5|6\n3|4|5|6|7\n4|5|6|7|8\n5|6|7|8|9\n6|7|8|9|0\n"
  },
  {
    "path": "modin/tests/pandas/data/test_different_columns_in_rows.json",
    "content": "{\"a1\": 1}\n{\"a2\": 1}\n{\"a3\": 2}\n{\"a4\": 1}\n{\"a5\": 2}\n{\"a6\": 1}\n{\"a7\": 2}\n{\"a8\": 1}\n{\"a9\": 2}\n{\"a10\": 1}\n{\"a11\": 2}\n{\"a12\": 1}\n{\"a13\": 2}\n{\"a14\": 1}\n{\"a15\": 2}\n{\"a16\": 2}\n"
  },
  {
    "path": "modin/tests/pandas/data/test_null_col.csv",
    "content": "a,b,c\n1,1,\n2,2,\n3,3,\n"
  },
  {
    "path": "modin/tests/pandas/data/test_time_parsing.csv",
    "content": "timestamp,year,month,date,symbol,high,low,open,close,spread,volume\n2010-04-01 00:00:00,2010,04,01,USD/JPY,93.52600,93.36100,93.51800,93.38200,0.00500,3049\n2010-04-01 00:30:00,2010,04,01,USD/JPY,93.47500,93.35200,93.38500,93.39100,0.00600,2251\n2010-04-01 01:00:00,2010,04,01,USD/JPY,93.42100,93.32600,93.39100,93.38400,0.00600,1577"
  },
  {
    "path": "modin/tests/pandas/data/test_usecols.csv",
    "content": "a,b,c,d,e\n1,2,3,4,5\n2,3,4,5,6\n3,4,5,6,7\n4,5,6,7,8\n5,6,7,8,9\n6,7,8,9,0\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_binary.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions, StorageFormat\nfrom modin.core.dataframe.pandas.partitioning.axis_partition import (\n    PandasDataframeAxisPartition,\n)\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    _assert_casting_functions_wrap_same_implementation,\n)\nfrom modin.tests.pandas.utils import (\n    CustomIntegerForAddition,\n    NonCommutativeMultiplyInteger,\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\nfrom modin.tests.test_utils import (\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import get_current_execution\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\n@pytest.mark.parametrize(\n    \"other\",\n    [\n        lambda df, axis: 4,\n        lambda df, axis: df.iloc[0] if axis == \"columns\" else list(df[df.columns[0]]),\n        lambda df, axis: {\n            label: idx + 1\n            for idx, label in enumerate(df.axes[0 if axis == \"rows\" else 1])\n        },\n        lambda df, axis: {\n            label if idx % 2 else f\"random_key{idx}\": idx + 1\n            for idx, label in enumerate(df.axes[0 if axis == \"rows\" else 1][::-1])\n        },\n    ],\n    ids=[\n        \"scalar\",\n        \"series_or_list\",\n        \"dictionary_keys_equal_columns\",\n        \"dictionary_keys_unequal_columns\",\n    ],\n)\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\n    \"op\",\n    [\n        *(\"add\", \"radd\", \"sub\", \"rsub\", \"mod\", \"rmod\", \"pow\", \"rpow\"),\n        *(\"truediv\", \"rtruediv\", \"mul\", \"rmul\", \"floordiv\", \"rfloordiv\"),\n    ],\n)\n@pytest.mark.parametrize(\"backend\", [None, \"pyarrow\"])\ndef test_math_functions(other, axis, op, backend):\n    data = test_data[\"float_nan_data\"]\n    if (op == \"floordiv\" or op == \"rfloordiv\") and axis == \"rows\":\n        # lambda == \"series_or_list\"\n        pytest.xfail(reason=\"different behavior\")\n\n    if op == \"rmod\" and axis == \"rows\":\n        # lambda == \"series_or_list\"\n        pytest.xfail(reason=\"different behavior\")\n\n    if op in (\"mod\", \"rmod\") and backend == \"pyarrow\":\n        pytest.skip(reason=\"These functions are not implemented in pandas itself\")\n    eval_general(\n        *create_test_dfs(data, backend=backend),\n        lambda df: getattr(df, op)(other(df, axis), axis=axis),\n    )\n\n\n@pytest.mark.parametrize(\"other\", [lambda df: 2, lambda df: df])\ndef test___divmod__(other):\n    data = test_data[\"float_nan_data\"]\n    eval_general(*create_test_dfs(data), lambda df: divmod(df, other(df)))\n\n\ndef test___rdivmod__():\n    data = test_data[\"float_nan_data\"]\n    eval_general(*create_test_dfs(data), lambda df: divmod(2, df))\n\n\n@pytest.mark.parametrize(\n    \"other\",\n    [lambda df: df[: -(2**4)], lambda df: df[df.columns[0]].reset_index(drop=True)],\n    ids=[\"check_missing_value\", \"check_different_index\"],\n)\n@pytest.mark.parametrize(\"fill_value\", [None, 3.0])\n@pytest.mark.parametrize(\n    \"op\",\n    [\n        *(\"add\", \"radd\", \"sub\", \"rsub\", \"mod\", \"rmod\", \"pow\", \"rpow\"),\n        *(\"truediv\", \"rtruediv\", \"mul\", \"rmul\", \"floordiv\", \"rfloordiv\"),\n    ],\n)\ndef test_math_functions_fill_value(other, fill_value, op, request):\n    data = test_data[\"int_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    expected_exception = None\n    if \"check_different_index\" in request.node.callspec.id and fill_value == 3.0:\n        expected_exception = NotImplementedError(\"fill_value 3.0 not supported.\")\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value),\n        expected_exception=expected_exception,\n        # This test causes an empty slice to be generated thus triggering:\n        # https://github.com/modin-project/modin/issues/5974\n        comparator_kwargs={\"check_dtypes\": get_current_execution() != \"BaseOnPython\"},\n    )\n\n\n@pytest.mark.parametrize(\n    \"op\",\n    [\n        *(\"add\", \"radd\", \"sub\", \"rsub\", \"mod\", \"rmod\", \"pow\", \"rpow\"),\n        *(\"truediv\", \"rtruediv\", \"mul\", \"rmul\", \"floordiv\", \"rfloordiv\"),\n    ],\n)\ndef test_math_functions_level(op):\n    modin_df = pd.DataFrame(test_data[\"int_data\"])\n    modin_df.index = pandas.MultiIndex.from_tuples(\n        [(i // 4, i // 2, i) for i in modin_df.index]\n    )\n\n    # Defaults to pandas\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df)\n    ):\n        # Operation against self for sanity check\n        getattr(modin_df, op)(modin_df, axis=0, level=1)\n\n\n@pytest.mark.parametrize(\n    \"math_op, alias\",\n    [\n        (\"truediv\", \"divide\"),\n        (\"truediv\", \"div\"),\n        (\"rtruediv\", \"rdiv\"),\n        (\"mul\", \"multiply\"),\n        (\"sub\", \"subtract\"),\n        (\"add\", \"__add__\"),\n        (\"radd\", \"__radd__\"),\n        (\"truediv\", \"__truediv__\"),\n        (\"rtruediv\", \"__rtruediv__\"),\n        (\"floordiv\", \"__floordiv__\"),\n        (\"rfloordiv\", \"__rfloordiv__\"),\n        (\"mod\", \"__mod__\"),\n        (\"rmod\", \"__rmod__\"),\n        (\"mul\", \"__mul__\"),\n        (\"rmul\", \"__rmul__\"),\n        (\"pow\", \"__pow__\"),\n        (\"rpow\", \"__rpow__\"),\n        (\"sub\", \"__sub__\"),\n        (\"rsub\", \"__rsub__\"),\n    ],\n)\ndef test_math_alias(math_op, alias):\n    _assert_casting_functions_wrap_same_implementation(\n        getattr(pd.DataFrame, math_op), getattr(pd.DataFrame, alias)\n    )\n\n\n@pytest.mark.parametrize(\"other\", [\"as_left\", 4, 4.0, \"a\"])\n@pytest.mark.parametrize(\"op\", [\"eq\", \"ge\", \"gt\", \"le\", \"lt\", \"ne\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_comparison(data, op, other, request):\n    def operation(df):\n        return getattr(df, op)(df if other == \"as_left\" else other)\n\n    expected_exception = None\n    if \"int_data\" in request.node.callspec.id and other == \"a\":\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7019\")\n    elif \"float_nan_data\" in request.node.callspec.id and other == \"a\":\n        expected_exception = TypeError(\n            \"Invalid comparison between dtype=float64 and str\"\n        )\n\n    eval_general(\n        *create_test_dfs(data),\n        operation=operation,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.skipif(\n    StorageFormat.get() != \"Pandas\",\n    reason=\"Modin on this engine doesn't create virtual partitions.\",\n)\n@pytest.mark.parametrize(\n    \"left_virtual,right_virtual\", [(True, False), (False, True), (True, True)]\n)\ndef test_virtual_partitions(left_virtual: bool, right_virtual: bool):\n    # This test covers https://github.com/modin-project/modin/issues/4691\n    n: int = 1000\n    pd_df = pandas.DataFrame(list(range(n)))\n\n    def modin_df(is_virtual):\n        if not is_virtual:\n            return pd.DataFrame(pd_df)\n        result = pd.concat([pd.DataFrame([i]) for i in range(n)], ignore_index=True)\n        # Modin should rebalance the partitions after the concat, producing virtual partitions.\n        assert isinstance(\n            result._query_compiler._modin_frame._partitions[0][0],\n            PandasDataframeAxisPartition,\n        )\n        return result\n\n    df_equals(modin_df(left_virtual) + modin_df(right_virtual), pd_df + pd_df)\n\n\n@pytest.mark.parametrize(\"op\", [\"eq\", \"ge\", \"gt\", \"le\", \"lt\", \"ne\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_multi_level_comparison(data, op):\n    modin_df_multi_level = pd.DataFrame(data)\n\n    new_idx = pandas.MultiIndex.from_tuples(\n        [(i // 4, i // 2, i) for i in modin_df_multi_level.index]\n    )\n    modin_df_multi_level.index = new_idx\n\n    # Defaults to pandas\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df_multi_level)\n    ):\n        # Operation against self for sanity check\n        getattr(modin_df_multi_level, op)(modin_df_multi_level, axis=0, level=1)\n\n\n@pytest.mark.parametrize(\n    \"frame1_data,frame2_data,expected_pandas_equals\",\n    [\n        pytest.param({}, {}, True, id=\"two_empty_dataframes\"),\n        pytest.param([[1]], [[0]], False, id=\"single_unequal_values\"),\n        pytest.param([[None]], [[None]], True, id=\"single_none_values\"),\n        pytest.param([[np.nan]], [[np.nan]], True, id=\"single_nan_values\"),\n        pytest.param({1: [10]}, {1.0: [10]}, True, id=\"different_column_types\"),\n        pytest.param({1: [10]}, {2: [10]}, False, id=\"different_columns\"),\n        pytest.param(\n            pandas.DataFrame({1: [10]}, index=[1]),\n            pandas.DataFrame({1: [10]}, index=[1.0]),\n            True,\n            id=\"different_index_types\",\n        ),\n        pytest.param(\n            pandas.DataFrame({1: [10]}, index=[1]),\n            pandas.DataFrame({1: [10]}, index=[2]),\n            False,\n            id=\"different_indexes\",\n        ),\n        pytest.param({1: [10]}, {1: [10.0]}, False, id=\"different_value_types\"),\n        pytest.param(\n            [[1, 2], [3, 4]],\n            [[1, 2], [3, 4]],\n            True,\n            id=\"equal_two_by_two_dataframes\",\n        ),\n        pytest.param(\n            [[1, 2], [3, 4]],\n            [[5, 2], [3, 4]],\n            False,\n            id=\"unequal_two_by_two_dataframes\",\n        ),\n        pytest.param(\n            [[1, 1]],\n            [[1]],\n            False,\n            id=\"different_row_lengths\",\n        ),\n        pytest.param(\n            [[1], [1]],\n            [[1]],\n            False,\n            id=\"different_column_lengths\",\n        ),\n    ],\n)\ndef test_equals(frame1_data, frame2_data, expected_pandas_equals):\n    modin_df1 = pd.DataFrame(frame1_data)\n    pandas_df1 = pandas.DataFrame(frame1_data)\n    modin_df2 = pd.DataFrame(frame2_data)\n    pandas_df2 = pandas.DataFrame(frame2_data)\n\n    pandas_equals = pandas_df1.equals(pandas_df2)\n    assert pandas_equals == expected_pandas_equals, (\n        \"Test expected pandas to say the dataframes were\"\n        + f\"{'' if expected_pandas_equals else ' not'} equal, but they were\"\n        + f\"{' not' if expected_pandas_equals else ''} equal.\"\n    )\n\n    assert modin_df1.equals(modin_df2) == pandas_equals\n    assert modin_df1.equals(pandas_df2) == pandas_equals\n\n\ndef test_equals_several_partitions():\n    modin_series1 = pd.concat([pd.DataFrame([0, 1]), pd.DataFrame([None, 1])])\n    modin_series2 = pd.concat([pd.DataFrame([0, 1]), pd.DataFrame([1, None])])\n    assert not modin_series1.equals(modin_series2)\n\n\ndef test_equals_with_nans():\n    df1 = pd.DataFrame([0, 1, None], dtype=\"uint8[pyarrow]\")\n    df2 = pd.DataFrame([None, None, None], dtype=\"uint8[pyarrow]\")\n    assert not df1.equals(df2)\n\n\n@pytest.mark.parametrize(\"is_more_other_partitions\", [True, False])\n@pytest.mark.parametrize(\n    \"op_type\", [\"df_ser\", \"df_df\", \"ser_ser_same_name\", \"ser_ser_different_name\"]\n)\n@pytest.mark.parametrize(\n    \"is_idx_aligned\", [True, False], ids=[\"idx_aligned\", \"idx_not_aligned\"]\n)\ndef test_mismatched_row_partitions(is_idx_aligned, op_type, is_more_other_partitions):\n    data = [0, 1, 2, 3, 4, 5]\n    modin_df1, pandas_df1 = create_test_dfs({\"a\": data, \"b\": data})\n    modin_df, pandas_df = modin_df1.loc[:2], pandas_df1.loc[:2]\n\n    modin_df2 = pd.concat((modin_df, modin_df))\n    pandas_df2 = pandas.concat((pandas_df, pandas_df))\n    if is_more_other_partitions:\n        modin_df2, modin_df1 = modin_df1, modin_df2\n        pandas_df2, pandas_df1 = pandas_df1, pandas_df2\n\n    if is_idx_aligned:\n        if is_more_other_partitions:\n            modin_df1.index = pandas_df1.index = pandas_df2.index\n        else:\n            modin_df2.index = pandas_df2.index = pandas_df1.index\n\n    # Pandas don't support this case because result will contain duplicate values by col axis.\n    if op_type == \"df_ser\" and not is_idx_aligned and is_more_other_partitions:\n        eval_general(\n            modin_df2,\n            pandas_df2,\n            lambda df: (\n                df / modin_df1.a if isinstance(df, pd.DataFrame) else df / pandas_df1.a\n            ),\n            expected_exception=ValueError(\n                \"cannot reindex on an axis with duplicate labels\"\n            ),\n        )\n        return\n\n    if op_type == \"df_ser\":\n        modin_res = modin_df2 / modin_df1.a\n        pandas_res = pandas_df2 / pandas_df1.a\n    elif op_type == \"df_df\":\n        modin_res = modin_df2 / modin_df1\n        pandas_res = pandas_df2 / pandas_df1\n    elif op_type == \"ser_ser_same_name\":\n        modin_res = modin_df2.a / modin_df1.a\n        pandas_res = pandas_df2.a / pandas_df1.a\n    elif op_type == \"ser_ser_different_name\":\n        modin_res = modin_df2.a / modin_df1.b\n        pandas_res = pandas_df2.a / pandas_df1.b\n    else:\n        raise Exception(f\"op_type: {op_type} not supported in test\")\n    df_equals(modin_res, pandas_res)\n\n\ndef test_duplicate_indexes():\n    data = [0, 1, 2, 3, 4, 5]\n    modin_df1, pandas_df1 = create_test_dfs(\n        {\"a\": data, \"b\": data}, index=[0, 1, 2, 0, 1, 2]\n    )\n    modin_df2, pandas_df2 = create_test_dfs({\"a\": data, \"b\": data})\n    df_equals(modin_df1 / modin_df2, pandas_df1 / pandas_df2)\n    df_equals(modin_df1 / modin_df1, pandas_df1 / pandas_df1)\n\n\n@pytest.mark.parametrize(\"subset_operand\", [\"left\", \"right\"])\ndef test_mismatched_col_partitions(subset_operand):\n    data = [0, 1, 2, 3]\n    modin_df1, pandas_df1 = create_test_dfs({\"a\": data, \"b\": data})\n    modin_df_tmp, pandas_df_tmp = create_test_dfs({\"c\": data})\n\n    modin_df2 = pd.concat([modin_df1, modin_df_tmp], axis=1)\n    pandas_df2 = pandas.concat([pandas_df1, pandas_df_tmp], axis=1)\n\n    if subset_operand == \"right\":\n        modin_res = modin_df2 + modin_df1\n        pandas_res = pandas_df2 + pandas_df1\n    else:\n        modin_res = modin_df1 + modin_df2\n        pandas_res = pandas_df1 + pandas_df2\n\n    df_equals(modin_res, pandas_res)\n\n\n@pytest.mark.parametrize(\"empty_operand\", [\"right\", \"left\", \"both\"])\ndef test_empty_df(empty_operand):\n    modin_df, pandas_df = create_test_dfs([0, 1, 2, 0, 1, 2])\n    modin_df_empty, pandas_df_empty = create_test_dfs()\n\n    if empty_operand == \"right\":\n        modin_res = modin_df + modin_df_empty\n        pandas_res = pandas_df + pandas_df_empty\n    elif empty_operand == \"left\":\n        modin_res = modin_df_empty + modin_df\n        pandas_res = pandas_df_empty + pandas_df\n    else:\n        modin_res = modin_df_empty + modin_df_empty\n        pandas_res = pandas_df_empty + pandas_df_empty\n\n    df_equals(modin_res, pandas_res)\n\n\ndef test_add_string_to_df():\n    modin_df, pandas_df = create_test_dfs([\"a\", \"b\"])\n    eval_general(modin_df, pandas_df, lambda df: \"string\" + df)\n    eval_general(modin_df, pandas_df, lambda df: df + \"string\")\n\n\ndef test_add_custom_class():\n    # see https://github.com/modin-project/modin/issues/5236\n    # Test that we can add any object that is addable to pandas object data\n    # via \"+\".\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: df + CustomIntegerForAddition(4),\n    )\n\n\ndef test_non_commutative_multiply_pandas():\n    # The non commutative integer class implementation is tricky. Check that\n    # multiplying such an integer with a pandas dataframe is really not\n    # commutative.\n    pandas_df = pandas.DataFrame([[1]], dtype=int)\n    integer = NonCommutativeMultiplyInteger(2)\n    assert not (integer * pandas_df).equals(pandas_df * integer)\n\n\ndef test_non_commutative_multiply():\n    # This test checks that mul and rmul do different things when\n    # multiplication is not commutative, e.g. for adding a string to a string.\n    # For context see https://github.com/modin-project/modin/issues/5238\n    modin_df, pandas_df = create_test_dfs([1], dtype=int)\n    integer = NonCommutativeMultiplyInteger(2)\n    eval_general(modin_df, pandas_df, lambda s: integer * s)\n    eval_general(modin_df, pandas_df, lambda s: s * integer)\n\n\n@pytest.mark.parametrize(\n    \"op\",\n    [\n        *(\"add\", \"radd\", \"sub\", \"rsub\", \"mod\", \"rmod\", \"pow\", \"rpow\"),\n        *(\"truediv\", \"rtruediv\", \"mul\", \"rmul\", \"floordiv\", \"rfloordiv\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"val1\",\n    [\n        pytest.param([10, 20], id=\"int\"),\n        pytest.param([10, True], id=\"obj\"),\n        pytest.param([True, True], id=\"bool\"),\n        pytest.param([3.5, 4.5], id=\"float\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"val2\",\n    [\n        pytest.param([10, 20], id=\"int\"),\n        pytest.param([10, True], id=\"obj\"),\n        pytest.param([True, True], id=\"bool\"),\n        pytest.param([3.5, 4.5], id=\"float\"),\n        pytest.param(2, id=\"int scalar\"),\n        pytest.param(True, id=\"bool scalar\"),\n        pytest.param(3.5, id=\"float scalar\"),\n    ],\n)\ndef test_arithmetic_with_tricky_dtypes(val1, val2, op, request):\n    modin_df1, pandas_df1 = create_test_dfs(val1)\n    modin_df2, pandas_df2 = (\n        create_test_dfs(val2) if isinstance(val2, list) else (val2, val2)\n    )\n\n    expected_exception = None\n    if (\n        \"bool-bool\" in request.node.callspec.id\n        or \"bool scalar-bool\" in request.node.callspec.id\n    ) and op in [\n        \"pow\",\n        \"rpow\",\n        \"truediv\",\n        \"rtruediv\",\n        \"floordiv\",\n        \"rfloordiv\",\n    ]:\n        op_name = op[1:] if op.startswith(\"r\") else op\n        expected_exception = NotImplementedError(\n            f\"operator '{op_name}' not implemented for bool dtypes\"\n        )\n    elif (\n        \"bool-bool\" in request.node.callspec.id\n        or \"bool scalar-bool\" in request.node.callspec.id\n    ) and op in [\"sub\", \"rsub\"]:\n        expected_exception = TypeError(\n            \"numpy boolean subtract, the `-` operator, is not supported, \"\n            + \"use the bitwise_xor, the `^` operator, or the logical_xor function instead.\"\n        )\n\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda dfs: getattr(dfs[0], op)(dfs[1]),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\n    \"data, other_data\",\n    [\n        ({\"A\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, 5, 6], \"C\": [7, 8, 9]}),\n        ({\"C\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, 5, 6], \"A\": [7, 8, 9]}),\n    ],\n)\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"match_index\", [True, False])\ndef test_bin_op_mismatched_columns(data, other_data, axis, match_index):\n    modin_df, pandas_df = create_test_dfs(data)\n    other_modin_df, other_pandas_df = create_test_dfs(other_data)\n    if axis == 0:\n        if not match_index:\n            modin_df.index = pandas_df.index = [\"1\", \"2\", \"3\"]\n            other_modin_df.index = other_pandas_df.index = [\"2\", \"1\", \"3\"]\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: (\n            df.add(other_modin_df, axis=axis)\n            if isinstance(df, pd.DataFrame)\n            else df.add(other_pandas_df, axis=axis)\n        ),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_default.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport io\nimport warnings\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pandas._libs.lib as lib\nimport pyarrow as pa\nimport pytest\nfrom numpy.testing import assert_array_equal\nfrom packaging.version import Version\n\nimport modin.pandas as pd\nfrom modin.config import Backend, Engine, NPartitions, StorageFormat\nfrom modin.pandas.io import to_pandas\nfrom modin.tests.pandas.utils import (\n    axis_keys,\n    axis_values,\n    create_test_dfs,\n    create_test_series,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    generate_multiindex,\n    modin_df_almost_equals_pandas,\n    name_contains,\n    numeric_dfs,\n    test_data,\n    test_data_diff_dtype,\n    test_data_keys,\n    test_data_large_categorical_dataframe,\n    test_data_resample,\n    test_data_values,\n)\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import get_current_execution\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = [\n    pytest.mark.filterwarnings(default_to_pandas_ignore_string),\n    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT\n    pytest.mark.filterwarnings(\n        \"ignore:.*bool is now deprecated and will be removed:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:first is deprecated and will be removed:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:last is deprecated and will be removed:FutureWarning\"\n    ),\n]\n\n\n@pytest.mark.parametrize(\n    \"op, make_args\",\n    [\n        (\"align\", lambda df: {\"other\": df}),\n        (\"corrwith\", lambda df: {\"other\": df}),\n        (\"ewm\", lambda df: {\"com\": 0.5}),\n        (\"from_dict\", lambda df: {\"data\": None}),\n        (\"from_records\", lambda df: {\"data\": to_pandas(df)}),\n        (\"hist\", lambda df: {\"column\": \"int_col\"}),\n        (\"interpolate\", None),\n        (\"mask\", lambda df: {\"cond\": df != 0}),\n        (\"pct_change\", None),\n        (\"to_xarray\", None),\n        (\"flags\", None),\n        (\"set_flags\", lambda df: {\"allows_duplicate_labels\": False}),\n    ],\n)\ndef test_ops_defaulting_to_pandas(op, make_args):\n    modin_df = pd.DataFrame(test_data_diff_dtype).drop([\"str_col\", \"bool_col\"], axis=1)\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df)\n    ):\n        operation = getattr(modin_df, op)\n        if make_args is not None:\n            operation(**make_args(modin_df))\n        else:\n            try:\n                operation()\n            # `except` for non callable attributes\n            except TypeError:\n                pass\n\n\ndef test_style():\n    data = test_data_values[0]\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.DataFrame(data).style\n\n\ndef test_to_timestamp():\n    idx = pd.date_range(\"1/1/2012\", periods=5, freq=\"M\")\n    df = pd.DataFrame(np.random.randint(0, 100, size=(len(idx), 4)), index=idx)\n\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(df)\n    ):\n        df.to_period().to_timestamp()\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    test_data_values + [test_data_large_categorical_dataframe],\n    ids=test_data_keys + [\"categorical_ints\"],\n)\ndef test_to_numpy(data):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n    assert_array_equal(modin_df.values, pandas_df.values)\n\n\n@pytest.mark.skipif(\n    StorageFormat.get() != \"Pandas\",\n    reason=\"NativeQueryCompiler does not contain partitions.\",\n)\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_partition_to_numpy(data):\n    frame = pd.DataFrame(data)\n    for partition in frame._query_compiler._modin_frame._partitions.flatten().tolist():\n        assert_array_equal(partition.to_pandas().values, partition.to_numpy())\n\n\ndef test_asfreq():\n    index = pd.date_range(\"1/1/2000\", periods=4, freq=\"min\")\n    series = pd.Series([0.0, None, 2.0, 3.0], index=index)\n    df = pd.DataFrame({\"s\": series})\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(df)\n    ):\n        # We are only testing that this defaults to pandas, so we will just check for\n        # the warning\n        df.asfreq(freq=\"30S\")\n\n\ndef test_assign():\n    data = test_data_values[0]\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    modin_result = modin_df.assign(new_column=pd.Series(modin_df.iloc[:, 0]))\n    pandas_result = pandas_df.assign(new_column=pandas.Series(pandas_df.iloc[:, 0]))\n    df_equals(modin_result, pandas_result)\n    modin_result = modin_df.assign(\n        new_column=pd.Series(modin_df.iloc[:, 0]),\n        new_column2=pd.Series(modin_df.iloc[:, 1]),\n    )\n    pandas_result = pandas_df.assign(\n        new_column=pandas.Series(pandas_df.iloc[:, 0]),\n        new_column2=pandas.Series(pandas_df.iloc[:, 1]),\n    )\n    df_equals(modin_result, pandas_result)\n\n\ndef test_at_time():\n    i = pd.date_range(\"2008-01-01\", periods=1000, freq=\"12H\")\n    modin_df = pd.DataFrame({\"A\": list(range(1000)), \"B\": list(range(1000))}, index=i)\n    pandas_df = pandas.DataFrame(\n        {\"A\": list(range(1000)), \"B\": list(range(1000))}, index=i\n    )\n    df_equals(modin_df.at_time(\"12:00\"), pandas_df.at_time(\"12:00\"))\n    df_equals(modin_df.at_time(\"3:00\"), pandas_df.at_time(\"3:00\"))\n    df_equals(modin_df.T.at_time(\"12:00\", axis=1), pandas_df.T.at_time(\"12:00\", axis=1))\n\n\ndef test_between_time():\n    i = pd.date_range(\"2008-01-01\", periods=1000, freq=\"12H\")\n    modin_df = pd.DataFrame({\"A\": list(range(1000)), \"B\": list(range(1000))}, index=i)\n    pandas_df = pandas.DataFrame(\n        {\"A\": list(range(1000)), \"B\": list(range(1000))}, index=i\n    )\n    df_equals(\n        modin_df.between_time(\"12:00\", \"17:00\"),\n        pandas_df.between_time(\"12:00\", \"17:00\"),\n    )\n    df_equals(\n        modin_df.between_time(\"3:00\", \"4:00\"),\n        pandas_df.between_time(\"3:00\", \"4:00\"),\n    )\n    df_equals(\n        modin_df.T.between_time(\"12:00\", \"17:00\", axis=1),\n        pandas_df.T.between_time(\"12:00\", \"17:00\", axis=1),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_bfill(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    df_equals(modin_df.bfill(), pandas_df.bfill())\n\n\n@pytest.mark.parametrize(\"limit_area\", [None, \"inside\", \"outside\"])\n@pytest.mark.parametrize(\"method\", [\"ffill\", \"bfill\"])\ndef test_ffill_bfill_limit_area(method, limit_area):\n    modin_df, pandas_df = create_test_dfs([1, None, 2, None])\n    eval_general(\n        modin_df, pandas_df, lambda df: getattr(df, method)(limit_area=limit_area)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_bool(data):\n    modin_df = pd.DataFrame(data)\n\n    with pytest.warns(\n        FutureWarning, match=\"bool is now deprecated and will be removed\"\n    ):\n        with pytest.raises(ValueError):\n            modin_df.bool()\n            modin_df.__bool__()\n\n    single_bool_pandas_df = pandas.DataFrame([True])\n    single_bool_modin_df = pd.DataFrame([True])\n\n    assert single_bool_pandas_df.bool() == single_bool_modin_df.bool()\n\n    with pytest.raises(ValueError):\n        # __bool__ always raises this error for DataFrames\n        single_bool_modin_df.__bool__()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_boxplot(data):\n    modin_df = pd.DataFrame(data)\n\n    assert modin_df.boxplot() == to_pandas(modin_df).boxplot()\n\n\ndef test_combine_first():\n    data1 = {\"A\": [None, 0], \"B\": [None, 4]}\n    modin_df1 = pd.DataFrame(data1)\n    pandas_df1 = pandas.DataFrame(data1)\n    data2 = {\"A\": [1, 1], \"B\": [3, 3]}\n    modin_df2 = pd.DataFrame(data2)\n    pandas_df2 = pandas.DataFrame(data2)\n    df_equals(\n        modin_df1.combine_first(modin_df2),\n        pandas_df1.combine_first(pandas_df2),\n        # https://github.com/modin-project/modin/issues/5959\n        check_dtypes=False,\n    )\n\n\nclass TestCorr:\n    @pytest.mark.parametrize(\"method\", [\"pearson\", \"kendall\", \"spearman\"])\n    @pytest.mark.parametrize(\"backend\", [None, \"pyarrow\"])\n    def test_corr(self, method, backend):\n        eval_general(\n            *create_test_dfs(test_data[\"int_data\"], backend=backend),\n            lambda df: df.corr(method=method),\n        )\n        # Modin result may slightly differ from pandas result\n        # due to floating pointing arithmetic.\n        eval_general(\n            *create_test_dfs(test_data[\"float_nan_data\"], backend=backend),\n            lambda df: df.corr(method=method),\n            comparator=modin_df_almost_equals_pandas,\n        )\n\n    @pytest.mark.parametrize(\"min_periods\", [1, 3, 5, 6])\n    def test_corr_min_periods(self, min_periods):\n        # only 3 valid values (a valid value is considered a row with no NaNs)\n        eval_general(\n            *create_test_dfs({\"a\": [1, 2, 3], \"b\": [3, 1, 5]}),\n            lambda df: df.corr(min_periods=min_periods),\n        )\n\n        # only 5 valid values (a valid value is considered a row with no NaNs)\n        eval_general(\n            *create_test_dfs(\n                {\"a\": [1, 2, 3, 4, 5, np.nan], \"b\": [1, 2, 1, 4, 5, np.nan]}\n            ),\n            lambda df: df.corr(min_periods=min_periods),\n        )\n\n        # only 4 valid values (a valid value is considered a row with no NaNs)\n        eval_general(\n            *create_test_dfs(\n                {\"a\": [1, np.nan, 3, 4, 5, 6], \"b\": [1, 2, 1, 4, 5, np.nan]}\n            ),\n            lambda df: df.corr(min_periods=min_periods),\n        )\n\n        if StorageFormat.get() == \"Pandas\":\n            # only 4 valid values located in different partitions (a valid value is considered a row with no NaNs)\n            modin_df, pandas_df = create_test_dfs(\n                {\"a\": [1, np.nan, 3, 4, 5, 6], \"b\": [1, 2, 1, 4, 5, np.nan]}\n            )\n            modin_df = pd.concat([modin_df.iloc[:3], modin_df.iloc[3:]])\n            assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)\n            eval_general(\n                modin_df, pandas_df, lambda df: df.corr(min_periods=min_periods)\n            )\n\n    @pytest.mark.parametrize(\"numeric_only\", [True, False])\n    def test_corr_non_numeric(self, numeric_only):\n        if not numeric_only:\n            pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7023\")\n        eval_general(\n            *create_test_dfs({\"a\": [1, 2, 3], \"b\": [3, 2, 5], \"c\": [\"a\", \"b\", \"c\"]}),\n            lambda df: df.corr(numeric_only=numeric_only),\n        )\n\n    @pytest.mark.skipif(\n        StorageFormat.get() != \"Pandas\",\n        reason=\"doesn't make sense for non-partitioned executions\",\n    )\n    def test_corr_nans_in_different_partitions(self):\n        # NaN in the first partition\n        modin_df, pandas_df = create_test_dfs(\n            {\"a\": [np.nan, 2, 3, 4, 5, 6], \"b\": [3, 4, 2, 0, 7, 8]}\n        )\n        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])\n\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)\n        eval_general(modin_df, pandas_df, lambda df: df.corr())\n\n        # NaN in the last partition\n        modin_df, pandas_df = create_test_dfs(\n            {\"a\": [1, 2, 3, 4, 5, np.nan], \"b\": [3, 4, 2, 0, 7, 8]}\n        )\n        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])\n\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)\n        eval_general(modin_df, pandas_df, lambda df: df.corr())\n\n        # NaN in two partitions\n        modin_df, pandas_df = create_test_dfs(\n            {\"a\": [np.nan, 2, 3, 4, 5, 6], \"b\": [3, 4, 2, 0, 7, np.nan]}\n        )\n        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])\n\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)\n        eval_general(modin_df, pandas_df, lambda df: df.corr())\n\n        # NaN in all partitions\n        modin_df, pandas_df = create_test_dfs(\n            {\"a\": [np.nan, 2, 3, np.nan, 5, 6], \"b\": [3, 4, 2, 0, 7, np.nan]}\n        )\n        modin_df = pd.concat([modin_df.iloc[:2], modin_df.iloc[2:4], modin_df.iloc[4:]])\n\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (3, 1)\n        eval_general(modin_df, pandas_df, lambda df: df.corr())\n\n\n@pytest.mark.parametrize(\"min_periods\", [1, 3, 5], ids=lambda x: f\"min_periods={x}\")\n@pytest.mark.parametrize(\"ddof\", [1, 2, 4], ids=lambda x: f\"ddof={x}\")\n@pytest.mark.parametrize(\"backend\", [None, \"pyarrow\"])\ndef test_cov(min_periods, ddof, backend):\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"], backend=backend),\n        lambda df: df.cov(min_periods=min_periods, ddof=ddof),\n        comparator=df_equals,\n    )\n    # Modin result may slightly differ from pandas result\n    # due to floating pointing arithmetic. That's why we use `modin_df_almost_equals_pandas`.\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"], backend=backend),\n        lambda df: df.cov(min_periods=min_periods),\n        comparator=modin_df_almost_equals_pandas,\n    )\n\n\n@pytest.mark.parametrize(\"numeric_only\", [True, False])\ndef test_cov_numeric_only(numeric_only):\n    if not numeric_only:\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7023\")\n    eval_general(\n        *create_test_dfs({\"a\": [1, 2, 3], \"b\": [3, 2, 5], \"c\": [\"a\", \"b\", \"c\"]}),\n        lambda df: df.cov(numeric_only=numeric_only),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dot(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    col_len = len(modin_df.columns)\n\n    # Test list input\n    arr = np.arange(col_len)\n    modin_result = modin_df.dot(arr)\n    pandas_result = pandas_df.dot(arr)\n    df_equals(modin_result, pandas_result)\n\n    # Test bad dimensions\n    with pytest.raises(ValueError):\n        modin_df.dot(np.arange(col_len + 10))\n\n    # Test series input\n    modin_series = pd.Series(np.arange(col_len), index=modin_df.columns)\n    pandas_series = pandas.Series(np.arange(col_len), index=pandas_df.columns)\n    modin_result = modin_df.dot(modin_series)\n    pandas_result = pandas_df.dot(pandas_series)\n    df_equals(modin_result, pandas_result)\n\n    # Test dataframe input\n    modin_result = modin_df.dot(modin_df.T)\n    pandas_result = pandas_df.dot(pandas_df.T)\n    df_equals(modin_result, pandas_result)\n\n    # Test when input series index doesn't line up with columns\n    with pytest.raises(ValueError):\n        modin_df.dot(pd.Series(np.arange(col_len)))\n\n    # Test case when left dataframe has size (n x 1)\n    # and right dataframe has size (1 x n)\n    modin_df = pd.DataFrame(modin_series)\n    pandas_df = pandas.DataFrame(pandas_series)\n    modin_result = modin_df.dot(modin_df.T)\n    pandas_result = pandas_df.dot(pandas_df.T)\n    df_equals(modin_result, pandas_result)\n\n    # Test case when left dataframe has size (1 x 1)\n    # and right dataframe has size (1 x n)\n    modin_result = pd.DataFrame([1]).dot(modin_df.T)\n    pandas_result = pandas.DataFrame([1]).dot(pandas_df.T)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_matmul(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    col_len = len(modin_df.columns)\n\n    # Test list input\n    arr = np.arange(col_len)\n    modin_result = modin_df @ arr\n    pandas_result = pandas_df @ arr\n    df_equals(modin_result, pandas_result)\n\n    # Test bad dimensions\n    with pytest.raises(ValueError):\n        modin_df @ np.arange(col_len + 10)\n\n    # Test series input\n    modin_series = pd.Series(np.arange(col_len), index=modin_df.columns)\n    pandas_series = pandas.Series(np.arange(col_len), index=pandas_df.columns)\n    modin_result = modin_df @ modin_series\n    pandas_result = pandas_df @ pandas_series\n    df_equals(modin_result, pandas_result)\n\n    # Test dataframe input\n    modin_result = modin_df @ modin_df.T\n    pandas_result = pandas_df @ pandas_df.T\n    df_equals(modin_result, pandas_result)\n\n    # Test when input series index doesn't line up with columns\n    with pytest.raises(ValueError):\n        modin_df @ pd.Series(np.arange(col_len))\n\n\ndef test_first():\n    i = pd.date_range(\"2010-04-09\", periods=400, freq=\"2D\")\n    modin_df = pd.DataFrame({\"A\": list(range(400)), \"B\": list(range(400))}, index=i)\n    pandas_df = pandas.DataFrame(\n        {\"A\": list(range(400)), \"B\": list(range(400))}, index=i\n    )\n    with pytest.warns(FutureWarning, match=\"first is deprecated and will be removed\"):\n        modin_result = modin_df.first(\"3D\")\n    df_equals(modin_result, pandas_df.first(\"3D\"))\n    df_equals(modin_df.first(\"20D\"), pandas_df.first(\"20D\"))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_info_default_param(data):\n    with io.StringIO() as first, io.StringIO() as second:\n        eval_general(\n            pd.DataFrame(data),\n            pandas.DataFrame(data),\n            verbose=None,\n            max_cols=None,\n            memory_usage=None,\n            operation=lambda df, **kwargs: df.info(**kwargs),\n            buf=lambda df: second if isinstance(df, pandas.DataFrame) else first,\n        )\n        modin_info = first.getvalue().splitlines()\n        pandas_info = second.getvalue().splitlines()\n\n        assert modin_info[0] == str(pd.DataFrame)\n        assert pandas_info[0] == str(pandas.DataFrame)\n        assert modin_info[1:] == pandas_info[1:]\n\n\n# randint data covers https://github.com/modin-project/modin/issues/5137\n@pytest.mark.parametrize(\n    \"data\", [test_data_values[0], np.random.randint(0, 100, (10, 10))]\n)\n@pytest.mark.parametrize(\"verbose\", [True, False])\n@pytest.mark.parametrize(\"max_cols\", [10, 99999999])\n@pytest.mark.parametrize(\"memory_usage\", [True, False, \"deep\"])\n@pytest.mark.parametrize(\"show_counts\", [True, False])\ndef test_info(data, verbose, max_cols, memory_usage, show_counts):\n    with io.StringIO() as first, io.StringIO() as second:\n        eval_general(\n            pd.DataFrame(data),\n            pandas.DataFrame(data),\n            operation=lambda df, **kwargs: df.info(**kwargs),\n            verbose=verbose,\n            max_cols=max_cols,\n            memory_usage=memory_usage,\n            show_counts=show_counts,\n            buf=lambda df: second if isinstance(df, pandas.DataFrame) else first,\n        )\n        modin_info = first.getvalue().splitlines()\n        pandas_info = second.getvalue().splitlines()\n\n        assert modin_info[0] == str(pd.DataFrame)\n        assert pandas_info[0] == str(pandas.DataFrame)\n        assert modin_info[1:] == pandas_info[1:]\n\n\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\n@pytest.mark.parametrize(\"method\", [\"kurtosis\", \"kurt\"])\ndef test_kurt_kurtosis(axis, skipna, numeric_only, method):\n    data = test_data[\"float_nan_data\"]\n\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: getattr(df, method)(\n            axis=axis, skipna=skipna, numeric_only=numeric_only\n        ),\n    )\n\n\ndef test_last():\n    modin_index = pd.date_range(\"2010-04-09\", periods=400, freq=\"2D\")\n    pandas_index = pandas.date_range(\"2010-04-09\", periods=400, freq=\"2D\")\n    modin_df = pd.DataFrame(\n        {\"A\": list(range(400)), \"B\": list(range(400))}, index=modin_index\n    )\n    pandas_df = pandas.DataFrame(\n        {\"A\": list(range(400)), \"B\": list(range(400))}, index=pandas_index\n    )\n    with pytest.warns(FutureWarning, match=\"last is deprecated and will be removed\"):\n        modin_result = modin_df.last(\"3D\")\n    df_equals(modin_result, pandas_df.last(\"3D\"))\n    df_equals(modin_df.last(\"20D\"), pandas_df.last(\"20D\"))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"id_vars\", [lambda df: df.columns[0], lambda df: df.columns[:4], None]\n)\n@pytest.mark.parametrize(\n    \"value_vars\", [lambda df: df.columns[-1], lambda df: df.columns[-4:], None]\n)\ndef test_melt(data, id_vars, value_vars):\n    def melt(df, *args, **kwargs):\n        return df.melt(*args, **kwargs).sort_values([\"variable\", \"value\"])\n\n    eval_general(\n        *create_test_dfs(data),\n        lambda df, *args, **kwargs: melt(df, *args, **kwargs).reset_index(drop=True),\n        id_vars=id_vars,\n        value_vars=value_vars,\n    )\n\n\n# Functional test for BUG:7206\ndef test_melt_duplicate_col_names():\n    data = {\"data\": [[1, 2], [3, 4]], \"columns\": [\"dupe\", \"dupe\"]}\n\n    def melt(df, *args, **kwargs):\n        return df.melt(*args, **kwargs).sort_values([\"variable\", \"value\"])\n\n    eval_general(\n        *create_test_dfs(**data),\n        lambda df, *args, **kwargs: melt(df, *args, **kwargs).reset_index(drop=True),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"index\",\n    [lambda df: df.columns[0], lambda df: df.columns[:2], lib.no_default],\n    ids=[\"one_column_index\", \"several_columns_index\", \"default\"],\n)\n@pytest.mark.parametrize(\n    \"columns\", [lambda df: df.columns[len(df.columns) // 2]], ids=[\"one_column\"]\n)\n@pytest.mark.parametrize(\n    \"values\",\n    [lambda df: df.columns[-1], lambda df: df.columns[-2:], lib.no_default],\n    ids=[\"one_column_values\", \"several_columns_values\", \"default\"],\n)\ndef test_pivot(data, index, columns, values, request):\n    current_execution = get_current_execution()\n    if (\n        \"one_column_values-one_column-default-float_nan_data\"\n        in request.node.callspec.id\n        or \"default-one_column-several_columns_index\" in request.node.callspec.id\n        or \"default-one_column-one_column_index\" in request.node.callspec.id\n        or (\n            (current_execution == \"BaseOnPython\" or current_execution_is_native())\n            and index is lib.no_default\n        )\n    ):\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7010\")\n\n    expected_exception = None\n    if index is not lib.no_default:\n        expected_exception = ValueError(\n            \"Index contains duplicate entries, cannot reshape\"\n        )\n    eval_general(\n        *create_test_dfs(data),\n        lambda df, *args, **kwargs: df.pivot(*args, **kwargs),\n        index=index,\n        columns=columns,\n        values=values,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", [test_data[\"int_data\"]], ids=[\"int_data\"])\n@pytest.mark.parametrize(\n    \"index\",\n    [\n        pytest.param(lambda df: df.columns[0], id=\"single_index_col\"),\n        pytest.param(\n            lambda df: [*df.columns[0:2], *df.columns[-7:-4]], id=\"multiple_index_cols\"\n        ),\n        pytest.param(None, id=\"default_index\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"columns\",\n    [\n        pytest.param(lambda df: df.columns[len(df.columns) // 2], id=\"single_col\"),\n        pytest.param(\n            lambda df: [\n                *df.columns[(len(df.columns) // 2) : (len(df.columns) // 2 + 4)],\n                df.columns[-7],\n            ],\n            id=\"multiple_cols\",\n        ),\n        pytest.param(None, id=\"default_columns\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"values\",\n    [\n        pytest.param(lambda df: df.columns[-1], id=\"single_value_col\"),\n        pytest.param(lambda df: df.columns[-4:-1], id=\"multiple_value_cols\"),\n        pytest.param(None, id=\"default_values\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"aggfunc\",\n    [\n        pytest.param(np.mean, id=\"callable_tree_reduce_func\"),\n        pytest.param(\"mean\", id=\"tree_reduce_func\"),\n        pytest.param(\"nunique\", id=\"full_axis_func\"),\n    ],\n)\ndef test_pivot_table_data(data, index, columns, values, aggfunc, request):\n    if (\n        \"callable_tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"callable_tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"full_axis_func-single_value_col-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"full_axis_func-multiple_value_cols-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n    ):\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7011\")\n    md_df, pd_df = create_test_dfs(data)\n\n    # when values is None the output will be huge-dimensional,\n    # so reducing dimension of testing data at that case\n    if values is None:\n        md_df, pd_df = md_df.iloc[:42, :42], pd_df.iloc[:42, :42]\n\n    expected_exception = None\n    if \"default_columns-default_index\" in request.node.callspec.id:\n        expected_exception = ValueError(\"No group keys passed!\")\n    elif (\n        \"callable_tree_reduce_func\" in request.node.callspec.id\n        and \"int_data\" in request.node.callspec.id\n    ):\n        expected_exception = TypeError(\"'numpy.float64' object is not callable\")\n\n    eval_general(\n        md_df,\n        pd_df,\n        operation=lambda df, *args, **kwargs: df.pivot_table(\n            *args, **kwargs\n        ).sort_index(axis=int(index is not None)),\n        index=index,\n        columns=columns,\n        values=values,\n        aggfunc=aggfunc,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", [test_data[\"int_data\"]], ids=[\"int_data\"])\n@pytest.mark.parametrize(\n    \"index\",\n    [\n        pytest.param([], id=\"no_index_cols\"),\n        pytest.param(lambda df: df.columns[0], id=\"single_index_column\"),\n        pytest.param(\n            lambda df: [df.columns[0], df.columns[len(df.columns) // 2 - 1]],\n            id=\"multiple_index_cols\",\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"columns\",\n    [\n        pytest.param(lambda df: df.columns[len(df.columns) // 2], id=\"single_column\"),\n        pytest.param(\n            lambda df: [\n                *df.columns[(len(df.columns) // 2) : (len(df.columns) // 2 + 4)],\n                df.columns[-7],\n            ],\n            id=\"multiple_cols\",\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"values\",\n    [\n        pytest.param(lambda df: df.columns[-1], id=\"single_value\"),\n        pytest.param(lambda df: df.columns[-4:-1], id=\"multiple_values\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"aggfunc\",\n    [\n        pytest.param([\"mean\", \"sum\"], id=\"list_func\"),\n        pytest.param(\n            lambda df: {df.columns[5]: \"mean\", df.columns[-5]: \"sum\"}, id=\"dict_func\"\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"margins_name\",\n    [pytest.param(\"Custom name\", id=\"str_name\")],\n)\n@pytest.mark.parametrize(\"fill_value\", [None, 0])\n@pytest.mark.parametrize(\"backend\", [None, \"pyarrow\"])\ndef test_pivot_table_margins(\n    data,\n    index,\n    columns,\n    values,\n    aggfunc,\n    margins_name,\n    fill_value,\n    backend,\n    request,\n):\n    expected_exception = None\n    if \"dict_func\" in request.node.callspec.id:\n        expected_exception = KeyError(\"Column(s) ['col28', 'col38'] do not exist\")\n    eval_general(\n        *create_test_dfs(data, backend=backend),\n        operation=lambda df, *args, **kwargs: df.pivot_table(*args, **kwargs),\n        index=index,\n        columns=columns,\n        values=values,\n        aggfunc=aggfunc,\n        margins=True,\n        margins_name=margins_name,\n        fill_value=fill_value,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\n    \"aggfunc\",\n    [\n        pytest.param(\"sum\", id=\"MapReduce_func\"),\n        pytest.param(\"nunique\", id=\"FullAxis_func\"),\n    ],\n)\n@pytest.mark.parametrize(\"margins\", [True, False])\ndef test_pivot_table_fill_value(aggfunc, margins):\n    md_df, pd_df = create_test_dfs(test_data[\"int_data\"])\n    eval_general(\n        md_df,\n        pd_df,\n        operation=lambda df, *args, **kwargs: df.pivot_table(*args, **kwargs),\n        index=md_df.columns[0],\n        columns=md_df.columns[1],\n        values=md_df.columns[2],\n        aggfunc=aggfunc,\n        margins=margins,\n        fill_value=10,\n    )\n\n\n@pytest.mark.parametrize(\"data\", [test_data[\"int_data\"]], ids=[\"int_data\"])\ndef test_pivot_table_dropna(data):\n    eval_general(\n        *create_test_dfs(data),\n        operation=lambda df, *args, **kwargs: df.pivot_table(*args, **kwargs),\n        index=lambda df: df.columns[0],\n        columns=lambda df: df.columns[1],\n        values=lambda df: df.columns[-1],\n        dropna=False,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_plot(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if name_contains(request.node.name, numeric_dfs):\n        # We have to test this way because equality in plots means same object.\n        zipped_plot_lines = zip(modin_df.plot().lines, pandas_df.plot().lines)\n        for left, right in zipped_plot_lines:\n            if isinstance(left.get_xdata(), np.ma.core.MaskedArray) and isinstance(\n                right.get_xdata(), np.ma.core.MaskedArray\n            ):\n                assert all((left.get_xdata() == right.get_xdata()).data)\n            else:\n                assert np.array_equal(left.get_xdata(), right.get_xdata())\n            if isinstance(left.get_ydata(), np.ma.core.MaskedArray) and isinstance(\n                right.get_ydata(), np.ma.core.MaskedArray\n            ):\n                assert all((left.get_ydata() == right.get_ydata()).data)\n            else:\n                assert np.array_equal(left.get_xdata(), right.get_xdata())\n\n\ndef test_replace():\n    modin_df = pd.DataFrame(\n        {\"A\": [0, 1, 2, 3, 4], \"B\": [5, 6, 7, 8, 9], \"C\": [\"a\", \"b\", \"c\", \"d\", \"e\"]}\n    )\n    pandas_df = pandas.DataFrame(\n        {\"A\": [0, 1, 2, 3, 4], \"B\": [5, 6, 7, 8, 9], \"C\": [\"a\", \"b\", \"c\", \"d\", \"e\"]}\n    )\n    modin_result = modin_df.replace({\"A\": 0, \"B\": 5}, 100)\n    pandas_result = pandas_df.replace({\"A\": 0, \"B\": 5}, 100)\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.replace({\"A\": {0: 100, 4: 400}})\n    pandas_result = pandas_df.replace({\"A\": {0: 100, 4: 400}})\n    df_equals(modin_result, pandas_result)\n\n    modin_df = pd.DataFrame({\"A\": [\"bat\", \"foo\", \"bait\"], \"B\": [\"abc\", \"bar\", \"xyz\"]})\n    pandas_df = pandas.DataFrame(\n        {\"A\": [\"bat\", \"foo\", \"bait\"], \"B\": [\"abc\", \"bar\", \"xyz\"]}\n    )\n    modin_result = modin_df.replace(regex={r\"^ba.$\": \"new\", \"foo\": \"xyz\"})\n    pandas_result = pandas_df.replace(regex={r\"^ba.$\": \"new\", \"foo\": \"xyz\"})\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.replace(regex=[r\"^ba.$\", \"foo\"], value=\"new\")\n    pandas_result = pandas_df.replace(regex=[r\"^ba.$\", \"foo\"], value=\"new\")\n    df_equals(modin_result, pandas_result)\n\n    modin_df.replace(regex=[r\"^ba.$\", \"foo\"], value=\"new\", inplace=True)\n    pandas_df.replace(regex=[r\"^ba.$\", \"foo\"], value=\"new\", inplace=True)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"rule\", [\"5min\", pandas.offsets.Hour()])\n@pytest.mark.parametrize(\"axis\", [0])\ndef test_resampler(rule, axis):\n    data, index = (\n        test_data_resample[\"data\"],\n        test_data_resample[\"index\"],\n    )\n    modin_resampler = pd.DataFrame(data, index=index).resample(rule, axis=axis)\n    pandas_resampler = pandas.DataFrame(data, index=index).resample(rule, axis=axis)\n\n    assert pandas_resampler.indices == modin_resampler.indices\n    assert pandas_resampler.groups == modin_resampler.groups\n\n    df_equals(\n        modin_resampler.get_group(name=list(modin_resampler.groups)[0]),\n        pandas_resampler.get_group(name=list(pandas_resampler.groups)[0]),\n    )\n\n\n@pytest.mark.parametrize(\"rule\", [\"5min\"])\n@pytest.mark.parametrize(\"axis\", [\"index\", \"columns\"])\n@pytest.mark.parametrize(\n    \"method\",\n    [\n        *(\"count\", \"sum\", \"std\", \"sem\", \"size\", \"prod\", \"ohlc\", \"quantile\"),\n        *(\"min\", \"median\", \"mean\", \"max\", \"last\", \"first\", \"nunique\", \"var\"),\n        *(\"interpolate\", \"asfreq\", \"nearest\", \"bfill\", \"ffill\"),\n    ],\n)\ndef test_resampler_functions(rule, axis, method):\n    data, index = (\n        test_data_resample[\"data\"],\n        test_data_resample[\"index\"],\n    )\n    modin_df = pd.DataFrame(data, index=index)\n    pandas_df = pandas.DataFrame(data, index=index)\n    if axis == \"columns\":\n        columns = pandas.date_range(\n            \"31/12/2000\", periods=len(pandas_df.columns), freq=\"min\"\n        )\n        modin_df.columns = columns\n        pandas_df.columns = columns\n\n    expected_exception = None\n    if method in (\"interpolate\", \"asfreq\", \"nearest\", \"bfill\", \"ffill\"):\n        # It looks like pandas is preparing to completely\n        # remove `axis` parameter for `resample` function.\n        expected_exception = AssertionError(\"axis must be 0\")\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: getattr(df.resample(rule, axis=axis), method)(),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"rule\", [\"5min\"])\n@pytest.mark.parametrize(\"axis\", [\"index\", \"columns\"])\n@pytest.mark.parametrize(\n    \"method_arg\",\n    [\n        (\"pipe\", lambda x: x.max() - x.min()),\n        (\"transform\", lambda x: (x - x.mean()) / x.std()),\n        (\"apply\", [\"sum\", \"mean\", \"max\"]),\n        (\"aggregate\", [\"sum\", \"mean\", \"max\"]),\n    ],\n)\ndef test_resampler_functions_with_arg(rule, axis, method_arg):\n    data, index = (\n        test_data_resample[\"data\"],\n        test_data_resample[\"index\"],\n    )\n    modin_df = pd.DataFrame(data, index=index)\n    pandas_df = pandas.DataFrame(data, index=index)\n    if axis == \"columns\":\n        columns = pandas.date_range(\n            \"31/12/2000\", periods=len(pandas_df.columns), freq=\"min\"\n        )\n        modin_df.columns = columns\n        pandas_df.columns = columns\n\n    method, arg = method_arg[0], method_arg[1]\n\n    expected_exception = None\n    if method in (\"apply\", \"aggregate\"):\n        expected_exception = NotImplementedError(\"axis other than 0 is not supported\")\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: getattr(df.resample(rule, axis=axis), method)(arg),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"rule\", [\"5min\"])\n@pytest.mark.parametrize(\"closed\", [\"left\", \"right\"])\n@pytest.mark.parametrize(\"label\", [\"right\", \"left\"])\n@pytest.mark.parametrize(\n    \"on\",\n    [\n        None,\n        pytest.param(\n            \"DateColumn\",\n            marks=pytest.mark.xfail(\n                condition=Engine.get() in (\"Ray\", \"Unidist\", \"Dask\", \"Python\")\n                and StorageFormat.get() != \"Base\",\n                reason=\"https://github.com/modin-project/modin/issues/6399\",\n            ),\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"level\", [None, 1])\ndef test_resample_specific(rule, closed, label, on, level):\n    data, index = (\n        test_data_resample[\"data\"],\n        test_data_resample[\"index\"],\n    )\n    modin_df = pd.DataFrame(data, index=index)\n    pandas_df = pandas.DataFrame(data, index=index)\n\n    if on is None and level is not None:\n        index = pandas.MultiIndex.from_product(\n            [\n                [\"a\", \"b\", \"c\", \"d\"],\n                pandas.date_range(\"31/12/2000\", periods=len(pandas_df) // 4, freq=\"h\"),\n            ]\n        )\n        pandas_df.index = index\n        modin_df.index = index\n    else:\n        level = None\n\n    if on is not None:\n        pandas_df[on] = pandas.date_range(\n            \"22/06/1941\", periods=len(pandas_df), freq=\"min\"\n        )\n        modin_df[on] = pandas.date_range(\n            \"22/06/1941\", periods=len(modin_df), freq=\"min\"\n        )\n\n    pandas_resampler = pandas_df.resample(\n        rule,\n        closed=closed,\n        label=label,\n        on=on,\n        level=level,\n    )\n    modin_resampler = modin_df.resample(\n        rule,\n        closed=closed,\n        label=label,\n        on=on,\n        level=level,\n    )\n    df_equals(modin_resampler.var(0), pandas_resampler.var(0))\n    if on is None and level is None:\n        df_equals(\n            modin_resampler.fillna(method=\"nearest\"),\n            pandas_resampler.fillna(method=\"nearest\"),\n        )\n\n\n@pytest.mark.parametrize(\n    \"columns\",\n    [\n        \"volume\",\n        \"date\",\n        [\"volume\"],\n        (\"volume\",),\n        pandas.Series([\"volume\"]),\n        pandas.Index([\"volume\"]),\n        [\"volume\", \"volume\", \"volume\"],\n        [\"volume\", \"price\", \"date\"],\n    ],\n    ids=[\n        \"column\",\n        \"only_missed_column\",\n        \"list\",\n        \"tuple\",\n        \"series\",\n        \"index\",\n        \"duplicate_column\",\n        \"missed_column\",\n    ],\n)\ndef test_resample_getitem(columns, request):\n    index = pandas.date_range(\"1/1/2013\", periods=9, freq=\"min\")\n    data = {\n        \"price\": range(9),\n        \"volume\": range(10, 19),\n    }\n    expected_exception = None\n    if \"only_missed_column\" in request.node.callspec.id:\n        expected_exception = KeyError(\"Column not found: date\")\n    elif \"missed_column\" in request.node.callspec.id:\n        expected_exception = KeyError(\"Columns not found: 'date'\")\n    eval_general(\n        *create_test_dfs(data, index=index),\n        lambda df: df.resample(\"3min\")[columns].mean(),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"index\", [\"default\", \"ndarray\", \"has_duplicates\"])\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"periods\", [0, 1, -1, 10, -10, 1000000000, -1000000000])\ndef test_shift(data, index, axis, periods):\n    modin_df, pandas_df = create_test_dfs(data)\n    if index == \"ndarray\":\n        data_column_length = len(data[next(iter(data))])\n        modin_df.index = pandas_df.index = np.arange(2, data_column_length + 2)\n    elif index == \"has_duplicates\":\n        modin_df.index = pandas_df.index = list(modin_df.index[:-3]) + [0, 1, 2]\n\n    df_equals(\n        modin_df.shift(periods=periods, axis=axis),\n        pandas_df.shift(periods=periods, axis=axis),\n    )\n    df_equals(\n        modin_df.shift(periods=periods, axis=axis, fill_value=777),\n        pandas_df.shift(periods=periods, axis=axis, fill_value=777),\n    )\n\n\n@pytest.mark.parametrize(\"is_multi_idx\", [True, False], ids=[\"idx_multi\", \"idx_index\"])\n@pytest.mark.parametrize(\"is_multi_col\", [True, False], ids=[\"col_multi\", \"col_index\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_stack(data, is_multi_idx, is_multi_col):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n\n    if is_multi_idx:\n        if len(pandas_df.index) == 256:\n            index = pd.MultiIndex.from_product(\n                [\n                    [\"a\", \"b\", \"c\", \"d\"],\n                    [\"x\", \"y\", \"z\", \"last\"],\n                    [\"i\", \"j\", \"k\", \"index\"],\n                    [1, 2, 3, 4],\n                ]\n            )\n        elif len(pandas_df.index) == 100:\n            index = pd.MultiIndex.from_product(\n                [\n                    [\"x\", \"y\", \"z\", \"last\"],\n                    [\"a\", \"b\", \"c\", \"d\", \"f\"],\n                    [\"i\", \"j\", \"k\", \"l\", \"index\"],\n                ]\n            )\n        else:\n            index = pd.MultiIndex.from_tuples(\n                [(i, i * 2, i * 3) for i in range(len(pandas_df.index))]\n            )\n    else:\n        index = pandas_df.index\n\n    if is_multi_col:\n        if len(pandas_df.columns) == 64:\n            columns = pd.MultiIndex.from_product(\n                [[\"A\", \"B\", \"C\", \"D\"], [\"xx\", \"yy\", \"zz\", \"LAST\"], [10, 20, 30, 40]]\n            )\n        elif len(pandas_df.columns) == 100:\n            columns = pd.MultiIndex.from_product(\n                [\n                    [\"xx\", \"yy\", \"zz\", \"LAST\"],\n                    [\"A\", \"B\", \"C\", \"D\", \"F\"],\n                    [\"I\", \"J\", \"K\", \"L\", \"INDEX\"],\n                ]\n            )\n        else:\n            columns = pd.MultiIndex.from_tuples(\n                [(i, i * 2, i * 3) for i in range(len(pandas_df.columns))]\n            )\n    else:\n        columns = pandas_df.columns\n\n    pandas_df.columns = columns\n    pandas_df.index = index\n\n    modin_df.columns = columns\n    modin_df.index = index\n\n    df_equals(modin_df.stack(), pandas_df.stack())\n\n    if is_multi_col:\n        df_equals(modin_df.stack(level=0), pandas_df.stack(level=0))\n        df_equals(modin_df.stack(level=[0, 1]), pandas_df.stack(level=[0, 1]))\n        df_equals(modin_df.stack(level=[0, 1, 2]), pandas_df.stack(level=[0, 1, 2]))\n\n\n@pytest.mark.parametrize(\"sort\", [True, False])\ndef test_stack_sort(sort):\n    # Example frame slightly modified from pandas docs to be unsorted\n    cols = pd.MultiIndex.from_tuples([(\"weight\", \"pounds\"), (\"weight\", \"kg\")])\n    modin_df, pandas_df = create_test_dfs(\n        [[1, 2], [2, 4]], index=[\"cat\", \"dog\"], columns=cols\n    )\n    df_equals(modin_df.stack(sort=sort), pandas_df.stack(sort=sort))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis1\", [0, 1])\n@pytest.mark.parametrize(\"axis2\", [0, 1])\ndef test_swapaxes(data, axis1, axis2):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    pandas_result = pandas_df.swapaxes(axis1, axis2)\n    modin_result = modin_df.swapaxes(axis1, axis2)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_swapaxes_axes_names():\n    modin_df = pd.DataFrame(test_data_values[0])\n    modin_result1 = modin_df.swapaxes(0, 1)\n    modin_result2 = modin_df.swapaxes(\"columns\", \"index\")\n    df_equals(modin_result1, modin_result2)\n\n\ndef test_swaplevel():\n    data = np.random.randint(1, 100, 12)\n    modin_df = pd.DataFrame(\n        data,\n        index=pd.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    pandas_df = pandas.DataFrame(\n        data,\n        index=pandas.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    df_equals(\n        modin_df.swaplevel(\"Number\", \"Color\"),\n        pandas_df.swaplevel(\"Number\", \"Color\"),\n    )\n    df_equals(modin_df.swaplevel(), pandas_df.swaplevel())\n    df_equals(modin_df.swaplevel(0, 1), pandas_df.swaplevel(0, 1))\n\n\ndef test_take():\n    modin_df = pd.DataFrame(\n        [\n            (\"falcon\", \"bird\", 389.0),\n            (\"parrot\", \"bird\", 24.0),\n            (\"lion\", \"mammal\", 80.5),\n            (\"monkey\", \"mammal\", np.nan),\n        ],\n        columns=[\"name\", \"class\", \"max_speed\"],\n        index=[0, 2, 3, 1],\n    )\n    pandas_df = pandas.DataFrame(\n        [\n            (\"falcon\", \"bird\", 389.0),\n            (\"parrot\", \"bird\", 24.0),\n            (\"lion\", \"mammal\", 80.5),\n            (\"monkey\", \"mammal\", np.nan),\n        ],\n        columns=[\"name\", \"class\", \"max_speed\"],\n        index=[0, 2, 3, 1],\n    )\n    df_equals(modin_df.take([0, 3]), pandas_df.take([0, 3]))\n    df_equals(modin_df.take([2], axis=1), pandas_df.take([2], axis=1))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_to_records(data):\n    # `to_records` doesn't work when `index` is among column names\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: (\n            df.dropna().drop(\"index\", axis=1) if \"index\" in df.columns else df.dropna()\n        ).to_records(),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_to_string(data):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.to_string(),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_truncate(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    before = 1\n    after = len(modin_df - 3)\n    df_equals(modin_df.truncate(before, after), pandas_df.truncate(before, after))\n\n    before = 1\n    after = 3\n    df_equals(modin_df.truncate(before, after), pandas_df.truncate(before, after))\n\n    before = modin_df.columns[1]\n    after = modin_df.columns[-3]\n    try:\n        pandas_result = pandas_df.truncate(before, after, axis=1)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.truncate(before, after, axis=1)\n    else:\n        modin_result = modin_df.truncate(before, after, axis=1)\n        df_equals(modin_result, pandas_result)\n\n    before = modin_df.columns[1]\n    after = modin_df.columns[3]\n    try:\n        pandas_result = pandas_df.truncate(before, after, axis=1)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.truncate(before, after, axis=1)\n    else:\n        modin_result = modin_df.truncate(before, after, axis=1)\n        df_equals(modin_result, pandas_result)\n\n    before = None\n    after = None\n    df_equals(modin_df.truncate(before, after), pandas_df.truncate(before, after))\n    try:\n        pandas_result = pandas_df.truncate(before, after, axis=1)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.truncate(before, after, axis=1)\n    else:\n        modin_result = modin_df.truncate(before, after, axis=1)\n        df_equals(modin_result, pandas_result)\n\n\ndef test_truncate_before_greater_than_after():\n    df = pd.DataFrame([[1, 2, 3]])\n    with pytest.raises(ValueError, match=\"Truncate: 1 must be after 2\"):\n        df.truncate(before=2, after=1)\n\n\ndef test_tz_convert():\n    modin_idx = pd.date_range(\n        \"1/1/2012\", periods=500, freq=\"2D\", tz=\"America/Los_Angeles\"\n    )\n    pandas_idx = pandas.date_range(\n        \"1/1/2012\", periods=500, freq=\"2D\", tz=\"America/Los_Angeles\"\n    )\n    data = np.random.randint(0, 100, size=(len(modin_idx), 4))\n    modin_df = pd.DataFrame(data, index=modin_idx)\n    pandas_df = pandas.DataFrame(data, index=pandas_idx)\n    modin_result = modin_df.tz_convert(\"UTC\", axis=0)\n    pandas_result = pandas_df.tz_convert(\"UTC\", axis=0)\n    df_equals(modin_result, pandas_result)\n\n    modin_multi = pd.MultiIndex.from_arrays([modin_idx, range(len(modin_idx))])\n    pandas_multi = pandas.MultiIndex.from_arrays([pandas_idx, range(len(modin_idx))])\n    modin_series = pd.DataFrame(data, index=modin_multi)\n    pandas_series = pandas.DataFrame(data, index=pandas_multi)\n    df_equals(\n        modin_series.tz_convert(\"UTC\", axis=0, level=0),\n        pandas_series.tz_convert(\"UTC\", axis=0, level=0),\n    )\n\n\ndef test_tz_localize():\n    idx = pd.date_range(\"1/1/2012\", periods=400, freq=\"2D\")\n    data = np.random.randint(0, 100, size=(len(idx), 4))\n    modin_df = pd.DataFrame(data, index=idx)\n    pandas_df = pandas.DataFrame(data, index=idx)\n    df_equals(modin_df.tz_localize(\"UTC\", axis=0), pandas_df.tz_localize(\"UTC\", axis=0))\n    df_equals(\n        modin_df.tz_localize(\"America/Los_Angeles\", axis=0),\n        pandas_df.tz_localize(\"America/Los_Angeles\", axis=0),\n    )\n\n\n@pytest.mark.parametrize(\"is_multi_idx\", [True, False], ids=[\"idx_multi\", \"idx_index\"])\n@pytest.mark.parametrize(\"is_multi_col\", [True, False], ids=[\"col_multi\", \"col_index\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_unstack(data, is_multi_idx, is_multi_col):\n    modin_df, pandas_df = create_test_dfs(data)\n\n    if is_multi_idx:\n        index = generate_multiindex(len(pandas_df), nlevels=4, is_tree_like=True)\n    else:\n        index = pandas_df.index\n\n    if is_multi_col:\n        columns = generate_multiindex(\n            len(pandas_df.columns), nlevels=3, is_tree_like=True\n        )\n    else:\n        columns = pandas_df.columns\n\n    pandas_df.columns = modin_df.columns = columns\n    pandas_df.index = modin_df.index = index\n\n    df_equals(modin_df.unstack(), pandas_df.unstack())\n    df_equals(modin_df.unstack(level=1), pandas_df.unstack(level=1))\n    if is_multi_idx:\n        df_equals(modin_df.unstack(level=[0, 1]), pandas_df.unstack(level=[0, 1]))\n        df_equals(modin_df.unstack(level=[0, 1, 2]), pandas_df.unstack(level=[0, 1, 2]))\n        df_equals(\n            modin_df.unstack(level=[0, 1, 2, 3]), pandas_df.unstack(level=[0, 1, 2, 3])\n        )\n\n\n@pytest.mark.parametrize(\n    \"multi_col\", [\"col_multi_tree\", \"col_multi_not_tree\", \"col_index\"]\n)\n@pytest.mark.parametrize(\n    \"multi_idx\", [\"idx_multi_tree\", \"idx_multi_not_tree\", \"idx_index\"]\n)\ndef test_unstack_multiindex_types(multi_col, multi_idx):\n    MAX_NROWS = MAX_NCOLS = 36\n\n    pandas_df = pandas.DataFrame(test_data[\"int_data\"]).iloc[:MAX_NROWS, :MAX_NCOLS]\n    modin_df = pd.DataFrame(test_data[\"int_data\"]).iloc[:MAX_NROWS, :MAX_NCOLS]\n\n    def get_new_index(index, cond):\n        if cond == \"col_multi_tree\" or cond == \"idx_multi_tree\":\n            return generate_multiindex(len(index), nlevels=3, is_tree_like=True)\n        elif cond == \"col_multi_not_tree\" or cond == \"idx_multi_not_tree\":\n            return generate_multiindex(len(index), nlevels=3)\n        else:\n            return index\n\n    pandas_df.columns = modin_df.columns = get_new_index(pandas_df.columns, multi_col)\n    pandas_df.index = modin_df.index = get_new_index(pandas_df.index, multi_idx)\n\n    df_equals(modin_df.unstack(), pandas_df.unstack())\n    df_equals(modin_df.unstack(level=1), pandas_df.unstack(level=1))\n    if multi_idx != \"idx_index\":\n        df_equals(modin_df.unstack(level=[0, 1]), pandas_df.unstack(level=[0, 1]))\n        df_equals(modin_df.unstack(level=[0, 1, 2]), pandas_df.unstack(level=[0, 1, 2]))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"copy_kwargs\", ({\"copy\": True}, {\"copy\": None}, {}))\n@pytest.mark.parametrize(\n    \"get_array, get_array_name\",\n    (\n        (lambda df, copy_kwargs: df.__array__(**copy_kwargs), \"__array__\"),\n        (lambda df, copy_kwargs: np.array(df, **copy_kwargs), \"np.array\"),\n    ),\n)\ndef test___array__(data, copy_kwargs, get_array, get_array_name):\n    if (\n        get_array_name == \"np.array\"\n        and Version(np.__version__) < Version(\"2\")\n        and \"copy\" in copy_kwargs\n        and copy_kwargs[\"copy\"] is None\n    ):\n        pytest.skip(reason=\"np.array does not support copy=None before numpy 2.0\")\n    assert_array_equal(*(get_array(df, copy_kwargs) for df in create_test_dfs(data)))\n\n\n@pytest.mark.xfail(\n    condition=Backend.get() != \"Pandas\",\n    raises=AssertionError,\n    reason=\"https://github.com/modin-project/modin/issues/4650\",\n)\ndef test___array__copy_false_creates_view():\n    def do_in_place_update_via_copy(df):\n        array = np.array(df, copy=False)\n        array[0, 0] += 1\n\n    eval_general(\n        *create_test_dfs([[11]]), do_in_place_update_via_copy, __inplace__=True\n    )\n\n\n@pytest.mark.parametrize(\"data\", [[False], [True], [1, 2]])\ndef test___bool__(data):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.__bool__(),\n        expected_exception=ValueError(\n            \"The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\n        ),\n    )\n\n\n@pytest.mark.parametrize(\n    \"is_sparse_data\", [True, False], ids=[\"is_sparse\", \"is_not_sparse\"]\n)\ndef test_hasattr_sparse(is_sparse_data):\n    modin_df, pandas_df = (\n        create_test_dfs(pandas.arrays.SparseArray(test_data[\"float_nan_data\"].values()))\n        if is_sparse_data\n        else create_test_dfs(test_data[\"float_nan_data\"])\n    )\n    eval_general(modin_df, pandas_df, lambda df: hasattr(df, \"sparse\"))\n\n\ndef test_setattr_axes():\n    # Test that setting .index or .columns does not warn\n    df = pd.DataFrame([[1, 2], [3, 4]])\n    with warnings.catch_warnings():\n        if get_current_execution() != \"BaseOnPython\":\n            # In BaseOnPython, setting columns raises a warning because get_axis\n            #  defaults to pandas.\n            warnings.simplefilter(\"error\")\n        df.index = [\"foo\", \"bar\"]\n        # Check that ensure_index was called\n        pd.testing.assert_index_equal(df.index, pandas.Index([\"foo\", \"bar\"]))\n\n        df.columns = [9, 10]\n        pd.testing.assert_index_equal(df.columns, pandas.Index([9, 10]))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_attrs(data):\n    modin_df, pandas_df = create_test_dfs(data)\n    eval_general(modin_df, pandas_df, lambda df: df.attrs)\n\n\ndef test_df_from_series_with_tuple_name():\n    # Tests that creating a DataFrame from a series with a tuple name results in\n    # a DataFrame with MultiIndex columns.\n    pandas_result = pandas.DataFrame(pandas.Series(name=(\"a\", 1)))\n    # 1. Creating a Modin DF from native pandas Series\n    df_equals(pd.DataFrame(pandas.Series(name=(\"a\", 1))), pandas_result)\n    # 2. Creating a Modin DF from Modin Series\n    df_equals(pd.DataFrame(pd.Series(name=(\"a\", 1))), pandas_result)\n\n\ndef test_large_df_warns_distributing_takes_time():\n    # https://github.com/modin-project/modin/issues/6574\n\n    regex = r\"Distributing (.*) object\\. This may take some time\\.\"\n    with pytest.warns(UserWarning, match=regex):\n        pd.DataFrame(np.random.randint(1_000_000, size=(100_000, 10)))\n\n\ndef test_large_series_warns_distributing_takes_time():\n    # https://github.com/modin-project/modin/issues/6574\n\n    regex = r\"Distributing (.*) object\\. This may take some time\\.\"\n    with pytest.warns(UserWarning, match=regex):\n        pd.Series(np.random.randint(1_000_000, size=(2_500_000)))\n\n\ndef test_df_does_not_warn_distributing_takes_time():\n    # https://github.com/modin-project/modin/issues/6574\n\n    regex = r\"Distributing (.*) object\\. This may take some time\\.\"\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"error\", regex, UserWarning)\n        pd.DataFrame(np.random.randint(1_000_000, size=(100_000, 9)))\n\n\ndef test_series_does_not_warn_distributing_takes_time():\n    # https://github.com/modin-project/modin/issues/6574\n\n    regex = r\"Distributing (.*) object\\. This may take some time\\.\"\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"error\", regex, UserWarning)\n        pd.Series(np.random.randint(1_000_000, size=(2_400_000)))\n\n\n@pytest.mark.parametrize(\"dtype\", [np.int64, pd.ArrowDtype(pa.int64())])\ndef test_empty_df_dtypes(dtype):\n    df = pd.DataFrame({\"A\": []}, dtype=dtype)\n    assert df.dtypes[\"A\"] == dtype\n\n\ndef test_array_ufunc():\n    modin_df, pandas_df = create_test_dfs([[1, 2], [3, 4]])\n    eval_general(modin_df, pandas_df, np.sqrt)\n    modin_ser, pandas_ser = create_test_series([1, 2, 3, 4, 9])\n    eval_general(modin_ser, pandas_ser, np.sqrt)\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_indexing.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport os\nimport sys\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\nfrom pandas._testing import ensure_clean\n\nimport modin.pandas as pd\nfrom modin.config import MinRowPartitionSize, NPartitions\nfrom modin.pandas.indexing import is_range_like\nfrom modin.pandas.testing import assert_index_equal\nfrom modin.tests.pandas.utils import (\n    NROWS,\n    RAND_HIGH,\n    RAND_LOW,\n    arg_keys,\n    assert_dtypes_equal,\n    axis_keys,\n    axis_values,\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    generate_multiindex,\n    int_arg_keys,\n    int_arg_values,\n    name_contains,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\nfrom modin.utils import get_current_execution\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\n# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances\n# of defaulting to pandas.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\ndef eval_setitem(md_df, pd_df, value, col=None, loc=None, expected_exception=None):\n    if loc is not None:\n        col = pd_df.columns[loc]\n\n    value_getter = value if callable(value) else (lambda *args, **kwargs: value)\n\n    eval_general(\n        md_df,\n        pd_df,\n        lambda df: df.__setitem__(col, value_getter(df)),\n        __inplace__=True,\n        expected_exception=expected_exception,\n    )\n\n\ndef eval_loc(md_df, pd_df, value, key):\n    if isinstance(value, tuple):\n        assert len(value) == 2\n        # case when value for pandas different\n        md_value, pd_value = value\n    else:\n        md_value, pd_value = value, value\n\n    eval_general(\n        md_df,\n        pd_df,\n        lambda df: df.loc.__setitem__(\n            key, pd_value if isinstance(df, pandas.DataFrame) else md_value\n        ),\n        __inplace__=True,\n    )\n\n\n@pytest.mark.parametrize(\n    \"dates\",\n    [\n        [\"2018-02-27 09:03:30\", \"2018-02-27 09:04:30\"],\n        [\"2018-02-27 09:03:00\", \"2018-02-27 09:05:00\"],\n    ],\n)\n@pytest.mark.parametrize(\"subset\", [\"a\", \"b\", [\"a\", \"b\"], None])\ndef test_asof_with_nan(dates, subset):\n    data = {\"a\": [10, 20, 30, 40, 50], \"b\": [None, None, None, None, 500]}\n    index = pd.DatetimeIndex(\n        [\n            \"2018-02-27 09:01:00\",\n            \"2018-02-27 09:02:00\",\n            \"2018-02-27 09:03:00\",\n            \"2018-02-27 09:04:00\",\n            \"2018-02-27 09:05:00\",\n        ]\n    )\n    modin_where = pd.DatetimeIndex(dates)\n    pandas_where = pandas.DatetimeIndex(dates)\n    compare_asof(data, index, modin_where, pandas_where, subset)\n\n\n@pytest.mark.parametrize(\n    \"dates\",\n    [\n        [\"2018-02-27 09:03:30\", \"2018-02-27 09:04:30\"],\n        [\"2018-02-27 09:03:00\", \"2018-02-27 09:05:00\"],\n    ],\n)\n@pytest.mark.parametrize(\"subset\", [\"a\", \"b\", [\"a\", \"b\"], None])\ndef test_asof_without_nan(dates, subset):\n    data = {\"a\": [10, 20, 30, 40, 50], \"b\": [70, 600, 30, -200, 500]}\n    index = pd.DatetimeIndex(\n        [\n            \"2018-02-27 09:01:00\",\n            \"2018-02-27 09:02:00\",\n            \"2018-02-27 09:03:00\",\n            \"2018-02-27 09:04:00\",\n            \"2018-02-27 09:05:00\",\n        ]\n    )\n    modin_where = pd.DatetimeIndex(dates)\n    pandas_where = pandas.DatetimeIndex(dates)\n    compare_asof(data, index, modin_where, pandas_where, subset)\n\n\n@pytest.mark.parametrize(\n    \"lookup\",\n    [[60, 70, 90], [60.5, 70.5, 100]],\n)\n@pytest.mark.parametrize(\"subset\", [\"col2\", \"col1\", [\"col1\", \"col2\"], None])\ndef test_asof_large(lookup, subset):\n    data = test_data[\"float_nan_data\"]\n    index = list(range(NROWS))\n    modin_where = pd.Index(lookup)\n    pandas_where = pandas.Index(lookup)\n    compare_asof(data, index, modin_where, pandas_where, subset)\n\n\ndef compare_asof(\n    data, index, modin_where: pd.Index, pandas_where: pandas.Index, subset\n):\n    modin_df = pd.DataFrame(data, index=index)\n    pandas_df = pandas.DataFrame(data, index=index)\n    df_equals(\n        modin_df.asof(modin_where, subset=subset),\n        pandas_df.asof(pandas_where, subset=subset),\n    )\n    df_equals(\n        modin_df.asof(modin_where.values, subset=subset),\n        pandas_df.asof(pandas_where.values, subset=subset),\n    )\n    df_equals(\n        modin_df.asof(list(modin_where.values), subset=subset),\n        pandas_df.asof(list(pandas_where.values), subset=subset),\n    )\n    df_equals(\n        modin_df.asof(modin_where.values[0], subset=subset),\n        pandas_df.asof(pandas_where.values[0], subset=subset),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_first_valid_index(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    assert modin_df.first_valid_index() == (pandas_df.first_valid_index())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"n\", int_arg_values, ids=arg_keys(\"n\", int_arg_keys))\ndef test_head(data, n):\n    # Test normal dataframe head\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    df_equals(modin_df.head(n), pandas_df.head(n))\n    df_equals(modin_df.head(len(modin_df) + 1), pandas_df.head(len(pandas_df) + 1))\n\n    # Test head when we call it from a QueryCompilerView\n    modin_result = modin_df.loc[:, [\"col1\", \"col3\", \"col3\"]].head(n)\n    pandas_result = pandas_df.loc[:, [\"col1\", \"col3\", \"col3\"]].head(n)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.skip(reason=\"Defaulting to Pandas\")\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_iat(data):\n    modin_df = pd.DataFrame(data)\n\n    with pytest.raises(NotImplementedError):\n        modin_df.iat()\n\n\n@pytest.mark.gpu\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_iloc(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if not name_contains(request.node.name, [\"empty_data\"]):\n        # Scalar\n        np.testing.assert_equal(modin_df.iloc[0, 1], pandas_df.iloc[0, 1])\n\n        # Series\n        df_equals(modin_df.iloc[0], pandas_df.iloc[0])\n        df_equals(modin_df.iloc[1:, 0], pandas_df.iloc[1:, 0])\n        df_equals(modin_df.iloc[1:2, 0], pandas_df.iloc[1:2, 0])\n\n        # DataFrame\n        df_equals(modin_df.iloc[[1, 2]], pandas_df.iloc[[1, 2]])\n        # See issue #80\n        # df_equals(modin_df.iloc[[1, 2], [1, 0]], pandas_df.iloc[[1, 2], [1, 0]])\n        df_equals(modin_df.iloc[1:2, 0:2], pandas_df.iloc[1:2, 0:2])\n\n        # Issue #43\n        modin_df.iloc[0:3, :]\n\n        # Write Item\n        modin_df.iloc[[1, 2]] = 42\n        pandas_df.iloc[[1, 2]] = 42\n        df_equals(modin_df, pandas_df)\n\n        modin_df = pd.DataFrame(data)\n        pandas_df = pandas.DataFrame(data)\n        modin_df.iloc[0] = modin_df.iloc[1]\n        pandas_df.iloc[0] = pandas_df.iloc[1]\n        df_equals(modin_df, pandas_df)\n\n        modin_df = pd.DataFrame(data)\n        pandas_df = pandas.DataFrame(data)\n        modin_df.iloc[:, 0] = modin_df.iloc[:, 1]\n        pandas_df.iloc[:, 0] = pandas_df.iloc[:, 1]\n        df_equals(modin_df, pandas_df)\n\n        # From issue #1775\n        df_equals(\n            modin_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])],\n            pandas_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])],\n        )\n\n        # Read values, selecting rows with callable and a column with a scalar.\n        df_equals(\n            pandas_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5]), 0],\n            modin_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5]), 0],\n        )\n    else:\n        with pytest.raises(IndexError):\n            modin_df.iloc[0, 1]\n\n\n@pytest.mark.gpu\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_index(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.index, pandas_df.index)\n    modin_df_cp = modin_df.copy()\n    pandas_df_cp = pandas_df.copy()\n\n    modin_df_cp.index = [str(i) for i in modin_df_cp.index]\n    pandas_df_cp.index = [str(i) for i in pandas_df_cp.index]\n    df_equals(modin_df_cp.index, pandas_df_cp.index)\n\n\n@pytest.mark.gpu\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_indexing_duplicate_axis(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    modin_df.index = pandas_df.index = [i // 3 for i in range(len(modin_df))]\n    assert any(modin_df.index.duplicated())\n    assert any(pandas_df.index.duplicated())\n\n    df_equals(modin_df.iloc[0], pandas_df.iloc[0])\n    df_equals(modin_df.loc[0], pandas_df.loc[0])\n    df_equals(modin_df.iloc[0, 0:4], pandas_df.iloc[0, 0:4])\n    df_equals(\n        modin_df.loc[0, modin_df.columns[0:4]],\n        pandas_df.loc[0, pandas_df.columns[0:4]],\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"key_func\",\n    [\n        # test for the case from https://github.com/modin-project/modin/issues/4308\n        lambda df: \"non_existing_column\",\n        lambda df: df.columns[0],\n        lambda df: df.index,\n        lambda df: [df.index, df.columns[0]],\n        lambda df: (\n            pandas.Series(list(range(len(df.index))))\n            if isinstance(df, pandas.DataFrame)\n            else pd.Series(list(range(len(df))))\n        ),\n    ],\n    ids=[\n        \"non_existing_column\",\n        \"first_column_name\",\n        \"original_index\",\n        \"list_of_index_and_first_column_name\",\n        \"series_of_integers\",\n    ],\n)\n@pytest.mark.parametrize(\n    \"drop_kwargs\",\n    [{\"drop\": True}, {\"drop\": False}, {}],\n    ids=[\"drop_True\", \"drop_False\", \"no_drop_param\"],\n)\ndef test_set_index(data, key_func, drop_kwargs, request):\n    if (\n        \"list_of_index_and_first_column_name\" in request.node.name\n        and \"drop_False\" in request.node.name\n    ):\n        pytest.xfail(\n            reason=\"KeyError: https://github.com/modin-project/modin/issues/5636\"\n        )\n    expected_exception = None\n    if \"non_existing_column\" in request.node.callspec.id:\n        expected_exception = KeyError(\n            \"None of ['non_existing_column'] are in the columns\"\n        )\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.set_index(key_func(df), **drop_kwargs),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"index\", [\"a\", [\"a\", (\"b\", \"\")]])\ndef test_set_index_with_multiindex(index):\n    # see #5186 for details\n    kwargs = {\"columns\": [[\"a\", \"b\", \"c\", \"d\"], [\"\", \"\", \"x\", \"y\"]]}\n    modin_df, pandas_df = create_test_dfs(np.random.rand(2, 4), **kwargs)\n    eval_general(modin_df, pandas_df, lambda df: df.set_index(index))\n\n\n@pytest.mark.gpu\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_keys(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.keys(), pandas_df.keys())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_loc(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    key1 = modin_df.columns[0]\n    key2 = modin_df.columns[1]\n    # Scalar\n    df_equals(modin_df.loc[0, key1], pandas_df.loc[0, key1])\n\n    # Series\n    df_equals(modin_df.loc[0], pandas_df.loc[0])\n    df_equals(modin_df.loc[1:, key1], pandas_df.loc[1:, key1])\n    df_equals(modin_df.loc[1:2, key1], pandas_df.loc[1:2, key1])\n    df_equals(modin_df.loc[:, key1], pandas_df.loc[:, key1])\n\n    # DataFrame\n    df_equals(modin_df.loc[[1, 2]], pandas_df.loc[[1, 2]])\n\n    indices = [i % 3 == 0 for i in range(len(modin_df.index))]\n    columns = [i % 5 == 0 for i in range(len(modin_df.columns))]\n\n    # Key is a list of booleans\n    modin_result = modin_df.loc[indices, columns]\n    pandas_result = pandas_df.loc[indices, columns]\n    df_equals(modin_result, pandas_result)\n\n    # Key is a Modin or pandas series of booleans\n    df_equals(\n        modin_df.loc[pd.Series(indices), pd.Series(columns, index=modin_df.columns)],\n        pandas_df.loc[\n            pandas.Series(indices), pandas.Series(columns, index=modin_df.columns)\n        ],\n    )\n\n    modin_result = modin_df.loc[:, columns]\n    pandas_result = pandas_df.loc[:, columns]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[indices]\n    pandas_result = pandas_df.loc[indices]\n    df_equals(modin_result, pandas_result)\n\n    # See issue #80\n    # df_equals(modin_df.loc[[1, 2], ['col1']], pandas_df.loc[[1, 2], ['col1']])\n    df_equals(modin_df.loc[1:2, key1:key2], pandas_df.loc[1:2, key1:key2])\n\n    # From issue #421\n    df_equals(modin_df.loc[:, [key2, key1]], pandas_df.loc[:, [key2, key1]])\n    df_equals(modin_df.loc[[2, 1], :], pandas_df.loc[[2, 1], :])\n\n    # From issue #1023\n    key1 = modin_df.columns[0]\n    key2 = modin_df.columns[-2]\n    df_equals(modin_df.loc[:, key1:key2], pandas_df.loc[:, key1:key2])\n\n    # Write Item\n    modin_df_copy = modin_df.copy()\n    pandas_df_copy = pandas_df.copy()\n    modin_df_copy.loc[[1, 2]] = 42\n    pandas_df_copy.loc[[1, 2]] = 42\n    df_equals(modin_df_copy, pandas_df_copy)\n\n    # Write an item, selecting rows with a callable.\n    modin_df_copy2 = modin_df.copy()\n    pandas_df_copy2 = pandas_df.copy()\n    modin_df_copy2.loc[lambda df: df[key1].isin(list(range(1000)))] = 42\n    pandas_df_copy2.loc[lambda df: df[key1].isin(list(range(1000)))] = 42\n    df_equals(modin_df_copy2, pandas_df_copy2)\n\n    # Write an item, selecting rows with a callable and a column with a scalar.\n    modin_df_copy3 = modin_df.copy()\n    pandas_df_copy3 = pandas_df.copy()\n    modin_df_copy3.loc[lambda df: df[key1].isin(list(range(1000))), key1] = 42\n    pandas_df_copy3.loc[lambda df: df[key1].isin(list(range(1000))), key1] = 42\n    df_equals(modin_df_copy3, pandas_df_copy3)\n\n    # Disabled for `BaseOnPython` because of the issue with `getitem_array`:\n    # https://github.com/modin-project/modin/issues/3701\n    if get_current_execution() != \"BaseOnPython\":\n        # From issue #1775\n        df_equals(\n            modin_df.loc[lambda df: df.iloc[:, 0].isin(list(range(1000)))],\n            pandas_df.loc[lambda df: df.iloc[:, 0].isin(list(range(1000)))],\n        )\n\n        # Read values, selecting rows with a callable and a column with a scalar.\n        df_equals(\n            pandas_df.loc[lambda df: df[key1].isin(list(range(1000))), key1],\n            modin_df.loc[lambda df: df[key1].isin(list(range(1000))), key1],\n        )\n\n    # From issue #1374\n    with pytest.raises(KeyError):\n        modin_df.loc[\"NO_EXIST\"]\n\n\n@pytest.mark.parametrize(\n    \"key_getter, value_getter\",\n    [\n        pytest.param(\n            lambda df, axis: (\n                (slice(None), df.axes[axis][:2])\n                if axis\n                else (df.axes[axis][:2], slice(None))\n            ),\n            lambda df, axis: df.iloc[:, :1] if axis else df.iloc[:1, :],\n            id=\"len(key)_>_len(value)\",\n        ),\n        pytest.param(\n            lambda df, axis: (\n                (slice(None), df.axes[axis][:2])\n                if axis\n                else (df.axes[axis][:2], slice(None))\n            ),\n            lambda df, axis: df.iloc[:, :3] if axis else df.iloc[:3, :],\n            id=\"len(key)_<_len(value)\",\n        ),\n        pytest.param(\n            lambda df, axis: (\n                (slice(None), df.axes[axis][:2])\n                if axis\n                else (df.axes[axis][:2], slice(None))\n            ),\n            lambda df, axis: df.iloc[:, :2] if axis else df.iloc[:2, :],\n            id=\"len(key)_==_len(value)\",\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"key_axis\", [0, 1])\n@pytest.mark.parametrize(\"reverse_value_index\", [True, False])\n@pytest.mark.parametrize(\"reverse_value_columns\", [True, False])\ndef test_loc_4456(\n    key_getter, value_getter, key_axis, reverse_value_index, reverse_value_columns\n):\n    data = test_data[\"float_nan_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    key = key_getter(pandas_df, key_axis)\n\n    # `df.loc` doesn't work right for range-like indexers. Converting them to a list.\n    # https://github.com/modin-project/modin/issues/4497\n    if is_range_like(key[0]):\n        key = (list(key[0]), key[1])\n    if is_range_like(key[1]):\n        key = (key[0], list(key[1]))\n\n    value = pandas.DataFrame(\n        np.random.randint(0, 100, size=pandas_df.shape),\n        index=pandas_df.index,\n        columns=pandas_df.columns,\n    )\n    pdf_value = value_getter(value, key_axis)\n    mdf_value = value_getter(pd.DataFrame(value), key_axis)\n\n    if reverse_value_index:\n        pdf_value = pdf_value.reindex(index=pdf_value.index[::-1])\n        mdf_value = mdf_value.reindex(index=mdf_value.index[::-1])\n    if reverse_value_columns:\n        pdf_value = pdf_value.reindex(columns=pdf_value.columns[::-1])\n        mdf_value = mdf_value.reindex(columns=mdf_value.columns[::-1])\n\n    eval_loc(modin_df, pandas_df, pdf_value, key)\n    eval_loc(modin_df, pandas_df, (mdf_value, pdf_value), key)\n\n\ndef test_loc_6774():\n    modin_df, pandas_df = create_test_dfs(\n        {\"a\": [1, 2, 3, 4, 5], \"b\": [10, 20, 30, 40, 50]}\n    )\n    pandas_df.loc[:, \"c\"] = [10, 20, 30, 40, 51]\n    modin_df.loc[:, \"c\"] = [10, 20, 30, 40, 51]\n    df_equals(modin_df, pandas_df)\n\n    pandas_df.loc[2:, \"y\"] = [30, 40, 51]\n    modin_df.loc[2:, \"y\"] = [30, 40, 51]\n    df_equals(modin_df, pandas_df)\n\n    pandas_df.loc[:, [\"b\", \"c\", \"d\"]] = (\n        pd.DataFrame([[10, 20, 30, 40, 50], [10, 20, 30, 40], [10, 20, 30]])\n        .transpose()\n        .values\n    )\n    modin_df.loc[:, [\"b\", \"c\", \"d\"]] = (\n        pd.DataFrame([[10, 20, 30, 40, 50], [10, 20, 30, 40], [10, 20, 30]])\n        .transpose()\n        .values\n    )\n    df_equals(modin_df, pandas_df)\n\n\ndef test_loc_5829():\n    data = {\"a\": [1, 2, 3, 4, 5], \"b\": [11, 12, 13, 14, 15]}\n    modin_df = pd.DataFrame(data, dtype=object)\n    pandas_df = pandas.DataFrame(data, dtype=object)\n    eval_loc(\n        modin_df,\n        pandas_df,\n        value=np.array([[24, 34, 44], [25, 35, 45]]),\n        key=([3, 4], [\"c\", \"d\", \"e\"]),\n    )\n\n\ndef test_loc_7135():\n    data = np.random.randint(0, 100, size=(2**16, 2**8))\n    modin_df, pandas_df = create_test_dfs(data)\n    key = len(pandas_df)\n    eval_loc(\n        modin_df,\n        pandas_df,\n        value=list(range(2**8)),\n        key=key,\n    )\n\n\n# This tests the bug from https://github.com/modin-project/modin/issues/3736\ndef test_loc_setting_single_categorical_column():\n    modin_df = pd.DataFrame({\"status\": [\"a\", \"b\", \"c\"]}, dtype=\"category\")\n    pandas_df = pandas.DataFrame({\"status\": [\"a\", \"b\", \"c\"]}, dtype=\"category\")\n    modin_df.loc[1:3, \"status\"] = \"a\"\n    pandas_df.loc[1:3, \"status\"] = \"a\"\n    df_equals(modin_df, pandas_df)\n\n\ndef test_loc_multi_index():\n    modin_df = pd.read_csv(\n        \"modin/tests/pandas/data/blah.csv\", header=[0, 1, 2, 3], index_col=0\n    )\n    pandas_df = pandas.read_csv(\n        \"modin/tests/pandas/data/blah.csv\", header=[0, 1, 2, 3], index_col=0\n    )\n\n    df_equals(modin_df.loc[1], pandas_df.loc[1])\n    df_equals(modin_df.loc[1, \"Presidents\"], pandas_df.loc[1, \"Presidents\"])\n    df_equals(\n        modin_df.loc[1, (\"Presidents\", \"Pure mentions\")],\n        pandas_df.loc[1, (\"Presidents\", \"Pure mentions\")],\n    )\n    assert (\n        modin_df.loc[1, (\"Presidents\", \"Pure mentions\", \"IND\", \"all\")]\n        == pandas_df.loc[1, (\"Presidents\", \"Pure mentions\", \"IND\", \"all\")]\n    )\n    df_equals(modin_df.loc[(1, 2), \"Presidents\"], pandas_df.loc[(1, 2), \"Presidents\"])\n\n    tuples = [\n        (\"bar\", \"one\"),\n        (\"bar\", \"two\"),\n        (\"bar\", \"three\"),\n        (\"bar\", \"four\"),\n        (\"baz\", \"one\"),\n        (\"baz\", \"two\"),\n        (\"baz\", \"three\"),\n        (\"baz\", \"four\"),\n        (\"foo\", \"one\"),\n        (\"foo\", \"two\"),\n        (\"foo\", \"three\"),\n        (\"foo\", \"four\"),\n        (\"qux\", \"one\"),\n        (\"qux\", \"two\"),\n        (\"qux\", \"three\"),\n        (\"qux\", \"four\"),\n    ]\n\n    modin_index = pd.MultiIndex.from_tuples(tuples, names=[\"first\", \"second\"])\n    pandas_index = pandas.MultiIndex.from_tuples(tuples, names=[\"first\", \"second\"])\n    frame_data = np.random.randint(0, 100, size=(16, 100))\n    modin_df = pd.DataFrame(\n        frame_data,\n        index=modin_index,\n        columns=[\"col{}\".format(i) for i in range(100)],\n    )\n    pandas_df = pandas.DataFrame(\n        frame_data,\n        index=pandas_index,\n        columns=[\"col{}\".format(i) for i in range(100)],\n    )\n    df_equals(modin_df.loc[\"bar\", \"col1\"], pandas_df.loc[\"bar\", \"col1\"])\n    assert modin_df.loc[(\"bar\", \"one\"), \"col1\"] == pandas_df.loc[(\"bar\", \"one\"), \"col1\"]\n    df_equals(\n        modin_df.loc[\"bar\", (\"col1\", \"col2\")],\n        pandas_df.loc[\"bar\", (\"col1\", \"col2\")],\n    )\n\n    # From issue #1456\n    transposed_modin = modin_df.T\n    transposed_pandas = pandas_df.T\n    df_equals(\n        transposed_modin.loc[transposed_modin.index[:-2], :],\n        transposed_pandas.loc[transposed_pandas.index[:-2], :],\n    )\n\n    # From issue #1610\n    df_equals(modin_df.loc[modin_df.index], pandas_df.loc[pandas_df.index])\n    df_equals(modin_df.loc[modin_df.index[:7]], pandas_df.loc[pandas_df.index[:7]])\n\n\ndef test_loc_multi_index_with_tuples():\n    arrays = [\n        [\"bar\", \"bar\", \"baz\", \"baz\"],\n        [\"one\", \"two\", \"one\", \"two\"],\n    ]\n    nrows = 5\n    columns = pd.MultiIndex.from_tuples(zip(*arrays), names=[\"a\", \"b\"])\n    data = np.arange(0, nrows * len(columns)).reshape(nrows, len(columns))\n    modin_df, pandas_df = create_test_dfs(data, columns=columns)\n    eval_general(modin_df, pandas_df, lambda df: df.loc[:, (\"bar\", \"two\")])\n\n\ndef test_loc_multi_index_rows_with_tuples_5721():\n    arrays = [\n        [\"bar\", \"bar\", \"baz\", \"baz\"],\n        [\"one\", \"two\", \"one\", \"two\"],\n    ]\n    ncols = 5\n    index = pd.MultiIndex.from_tuples(zip(*arrays), names=[\"a\", \"b\"])\n    data = np.arange(0, ncols * len(index)).reshape(len(index), ncols)\n    modin_df, pandas_df = create_test_dfs(data, index=index)\n    eval_general(modin_df, pandas_df, lambda df: df.loc[(\"bar\",)])\n    eval_general(modin_df, pandas_df, lambda df: df.loc[(\"bar\", \"two\")])\n\n\ndef test_loc_multi_index_level_two_has_same_name_as_column():\n    eval_general(\n        *create_test_dfs(\n            pandas.DataFrame(\n                [[0]], index=[pd.Index([\"foo\"]), pd.Index([\"bar\"])], columns=[\"bar\"]\n            )\n        ),\n        lambda df: df.loc[(\"foo\", \"bar\")],\n    )\n\n\ndef test_loc_multi_index_duplicate_keys():\n    modin_df, pandas_df = create_test_dfs([1, 2], index=[[\"a\", \"a\"], [\"b\", \"b\"]])\n    eval_general(modin_df, pandas_df, lambda df: df.loc[(\"a\", \"b\"), 0])\n    eval_general(modin_df, pandas_df, lambda df: df.loc[(\"a\", \"b\"), :])\n\n\ndef test_loc_multi_index_both_axes():\n    multi_index = pd.MultiIndex.from_tuples(\n        [(\"r0\", \"rA\"), (\"r1\", \"rB\")], names=[\"Courses\", \"Fee\"]\n    )\n    cols = pd.MultiIndex.from_tuples(\n        [\n            (\"Gasoline\", \"Toyota\"),\n            (\"Gasoline\", \"Ford\"),\n            (\"Electric\", \"Tesla\"),\n            (\"Electric\", \"Nio\"),\n        ]\n    )\n    data = [[100, 300, 900, 400], [200, 500, 300, 600]]\n    modin_df, pandas_df = create_test_dfs(data, columns=cols, index=multi_index)\n    eval_general(modin_df, pandas_df, lambda df: df.loc[(\"r0\", \"rA\"), :])\n    eval_general(modin_df, pandas_df, lambda df: df.loc[:, (\"Gasoline\", \"Toyota\")])\n\n\ndef test_loc_empty():\n    pandas_df = pandas.DataFrame(index=range(5))\n    modin_df = pd.DataFrame(index=range(5))\n\n    df_equals(pandas_df.loc[1], modin_df.loc[1])\n    pandas_df.loc[1] = 3\n    modin_df.loc[1] = 3\n    df_equals(pandas_df, modin_df)\n\n\n@pytest.mark.parametrize(\"locator_name\", [\"iloc\", \"loc\"])\ndef test_loc_iloc_2064(locator_name):\n    modin_df, pandas_df = create_test_dfs(columns=[\"col1\", \"col2\"])\n    if locator_name == \"iloc\":\n        expected_exception = IndexError(\n            \"index 1 is out of bounds for axis 0 with size 0\"\n        )\n    else:\n        _type = \"int32\" if os.name == \"nt\" else \"int64\"\n        expected_exception = KeyError(\n            f\"None of [Index([1], dtype='{_type}')] are in the [index]\"\n        )\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: getattr(df, locator_name).__setitem__([1], [11, 22]),\n        __inplace__=True,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"index\", [[\"row1\", \"row2\", \"row3\"]])\n@pytest.mark.parametrize(\"columns\", [[\"col1\", \"col2\"]])\ndef test_loc_assignment(index, columns):\n    md_df, pd_df = create_test_dfs(index=index, columns=columns)\n    for i, ind in enumerate(index):\n        for j, col in enumerate(columns):\n            value_to_assign = int(str(i) + str(j))\n            md_df.loc[ind][col] = value_to_assign\n            pd_df.loc[ind][col] = value_to_assign\n    df_equals(md_df, pd_df)\n\n\n@pytest.mark.parametrize(\"left, right\", [(2, 1), (6, 1), (lambda df: 70, 1), (90, 70)])\ndef test_loc_insert_row(left, right):\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/3764\n    pandas_df = pandas.DataFrame([[1, 2, 3], [4, 5, 6]])\n    modin_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])\n\n    def _test_loc_rows(df):\n        df.loc[left] = df.loc[right]\n        return df\n\n    expected_exception = None\n    if right == 70:\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7024\")\n    eval_general(\n        modin_df, pandas_df, _test_loc_rows, expected_exception=expected_exception\n    )\n\n\n@pytest.mark.parametrize(\n    \"columns\", [10, (100, 102), (2, 6), [10, 11, 12], \"a\", [\"b\", \"c\", \"d\"]]\n)\ndef test_loc_insert_col(columns):\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/3764\n    pandas_df = pandas.DataFrame([[1, 2, 3], [4, 5, 6]])\n    modin_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])\n\n    if isinstance(columns, tuple) and len(columns) == 2:\n\n        def _test_loc_cols(df):\n            df.loc[:, columns[0] : columns[1]] = 1\n\n    else:\n\n        def _test_loc_cols(df):\n            df.loc[:, columns] = 1\n\n    eval_general(modin_df, pandas_df, _test_loc_cols)\n\n\n@pytest.fixture\ndef loc_iter_dfs():\n    columns = [\"col1\", \"col2\", \"col3\"]\n    index = [\"row1\", \"row2\", \"row3\"]\n    return create_test_dfs(\n        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},\n        columns=columns,\n        index=index,\n    )\n\n\n@pytest.mark.parametrize(\"reverse_order\", [False, True])\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_loc_iter_assignment(loc_iter_dfs, reverse_order, axis):\n    if reverse_order and axis:\n        pytest.xfail(\n            \"Due to internal sorting of lookup values assignment order is lost, see GH-#2552\"\n        )\n\n    md_df, pd_df = loc_iter_dfs\n\n    select = [slice(None), slice(None)]\n    select[axis] = sorted(pd_df.axes[axis][:-1], reverse=reverse_order)\n    select = tuple(select)\n\n    pd_df.loc[select] = pd_df.loc[select] + pd_df.loc[select]\n    md_df.loc[select] = md_df.loc[select] + md_df.loc[select]\n    df_equals(md_df, pd_df)\n\n\n@pytest.mark.parametrize(\"reverse_order\", [False, True])\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_loc_order(loc_iter_dfs, reverse_order, axis):\n    md_df, pd_df = loc_iter_dfs\n\n    select = [slice(None), slice(None)]\n    select[axis] = sorted(pd_df.axes[axis][:-1], reverse=reverse_order)\n    select = tuple(select)\n\n    df_equals(pd_df.loc[select], md_df.loc[select])\n\n\n@pytest.mark.gpu\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_loc_nested_assignment(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    key1 = modin_df.columns[0]\n    key2 = modin_df.columns[1]\n\n    modin_df[key1].loc[0] = 500\n    pandas_df[key1].loc[0] = 500\n    df_equals(modin_df, pandas_df)\n\n    modin_df[key2].loc[0] = None\n    pandas_df[key2].loc[0] = None\n    df_equals(modin_df, pandas_df)\n\n\ndef test_iloc_assignment():\n    modin_df = pd.DataFrame(index=[\"row1\", \"row2\", \"row3\"], columns=[\"col1\", \"col2\"])\n    pandas_df = pandas.DataFrame(\n        index=[\"row1\", \"row2\", \"row3\"], columns=[\"col1\", \"col2\"]\n    )\n    modin_df.iloc[0][\"col1\"] = 11\n    modin_df.iloc[1][\"col1\"] = 21\n    modin_df.iloc[2][\"col1\"] = 31\n    modin_df.iloc[lambda df: 0][\"col2\"] = 12\n    modin_df.iloc[1][lambda df: [\"col2\"]] = 22\n    modin_df.iloc[lambda df: 2][lambda df: [\"col2\"]] = 32\n    pandas_df.iloc[0][\"col1\"] = 11\n    pandas_df.iloc[1][\"col1\"] = 21\n    pandas_df.iloc[2][\"col1\"] = 31\n    pandas_df.iloc[lambda df: 0][\"col2\"] = 12\n    pandas_df.iloc[1][lambda df: [\"col2\"]] = 22\n    pandas_df.iloc[lambda df: 2][lambda df: [\"col2\"]] = 32\n\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_iloc_nested_assignment(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    key1 = modin_df.columns[0]\n    key2 = modin_df.columns[1]\n\n    modin_df[key1].iloc[0] = 500\n    pandas_df[key1].iloc[0] = 500\n    df_equals(modin_df, pandas_df)\n\n    modin_df[key2].iloc[0] = None\n    pandas_df[key2].iloc[0] = None\n    df_equals(modin_df, pandas_df)\n\n\ndef test_iloc_empty():\n    pandas_df = pandas.DataFrame(index=range(5))\n    modin_df = pd.DataFrame(index=range(5))\n\n    df_equals(pandas_df.iloc[1], modin_df.iloc[1])\n    pandas_df.iloc[1] = 3\n    modin_df.iloc[1] = 3\n    df_equals(pandas_df, modin_df)\n\n\ndef test_iloc_loc_key_length_except():\n    modin_ser, pandas_ser = pd.Series(0), pandas.Series(0)\n    eval_general(\n        modin_ser,\n        pandas_ser,\n        lambda ser: ser.iloc[0, 0],\n        expected_exception=pandas.errors.IndexingError(\"Too many indexers\"),\n    )\n    eval_general(\n        modin_ser,\n        pandas_ser,\n        lambda ser: ser.loc[0, 0],\n        expected_exception=pandas.errors.IndexingError(\"Too many indexers\"),\n    )\n\n\ndef test_loc_series():\n    md_df, pd_df = create_test_dfs({\"a\": [1, 2], \"b\": [3, 4]})\n\n    pd_df.loc[pd_df[\"a\"] > 1, \"b\"] = np.log(pd_df[\"b\"])\n    md_df.loc[md_df[\"a\"] > 1, \"b\"] = np.log(md_df[\"b\"])\n\n    df_equals(pd_df, md_df)\n\n\n@pytest.mark.parametrize(\"locator_name\", [\"loc\", \"iloc\"])\n@pytest.mark.parametrize(\n    \"slice_indexer\",\n    [\n        slice(None, None, -2),\n        slice(1, 10, None),\n        slice(None, 10, None),\n        slice(10, None, None),\n        slice(10, None, -2),\n        slice(-10, None, -2),\n        slice(None, 1_000_000_000, None),\n    ],\n)\ndef test_loc_iloc_slice_indexer(locator_name, slice_indexer):\n    md_df, pd_df = create_test_dfs(test_data_values[0])\n    # Shifting the index, so labels won't match its position\n    shifted_index = pandas.RangeIndex(1, len(md_df) + 1)\n    md_df.index = shifted_index\n    pd_df.index = shifted_index\n\n    eval_general(md_df, pd_df, lambda df: getattr(df, locator_name)[slice_indexer])\n\n\n@pytest.mark.parametrize(\n    \"indexer_size\",\n    [\n        1,\n        2,\n        NROWS,\n        pytest.param(\n            NROWS + 1,\n            marks=pytest.mark.xfail(\n                reason=\"https://github.com/modin-project/modin/issues/5739\", strict=True\n            ),\n        ),\n    ],\n)\nclass TestLocRangeLikeIndexer:\n    \"\"\"Test cases related to https://github.com/modin-project/modin/issues/5702\"\"\"\n\n    def test_range_index_getitem_single_value(self, indexer_size):\n        eval_general(\n            *create_test_dfs(test_data[\"int_data\"]),\n            lambda df: df.loc[pd.RangeIndex(indexer_size)],\n        )\n\n    def test_range_index_getitem_two_values(self, indexer_size):\n        eval_general(\n            *create_test_dfs(test_data[\"int_data\"]),\n            lambda df: df.loc[pd.RangeIndex(indexer_size), :],\n        )\n\n    def test_range_getitem_single_value(self, indexer_size):\n        eval_general(\n            *create_test_dfs(test_data[\"int_data\"]),\n            lambda df: df.loc[range(indexer_size)],\n        )\n\n    def test_range_getitem_two_values_5702(self, indexer_size):\n        eval_general(\n            *create_test_dfs(test_data[\"int_data\"]),\n            lambda df: df.loc[range(indexer_size), :],\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_pop(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if \"empty_data\" not in request.node.name:\n        key = modin_df.columns[0]\n        temp_modin_df = modin_df.copy()\n        temp_pandas_df = pandas_df.copy()\n        modin_popped = temp_modin_df.pop(key)\n        pandas_popped = temp_pandas_df.pop(key)\n        df_equals(modin_popped, pandas_popped)\n        df_equals(temp_modin_df, temp_pandas_df)\n\n\ndef test_reindex():\n    frame_data = {\n        \"col1\": [0, 1, 2, 3],\n        \"col2\": [4, 5, 6, 7],\n        \"col3\": [8, 9, 10, 11],\n        \"col4\": [12, 13, 14, 15],\n        \"col5\": [0, 0, 0, 0],\n    }\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    df_equals(modin_df.reindex([0, 3, 2, 1]), pandas_df.reindex([0, 3, 2, 1]))\n    df_equals(modin_df.reindex([0, 6, 2]), pandas_df.reindex([0, 6, 2]))\n    df_equals(\n        modin_df.reindex([\"col1\", \"col3\", \"col4\", \"col2\"], axis=1),\n        pandas_df.reindex([\"col1\", \"col3\", \"col4\", \"col2\"], axis=1),\n    )\n    df_equals(\n        modin_df.reindex([\"col1\", \"col7\", \"col4\", \"col8\"], axis=1),\n        pandas_df.reindex([\"col1\", \"col7\", \"col4\", \"col8\"], axis=1),\n    )\n    df_equals(\n        modin_df.reindex(index=[0, 1, 5], columns=[\"col1\", \"col7\", \"col4\", \"col8\"]),\n        pandas_df.reindex(index=[0, 1, 5], columns=[\"col1\", \"col7\", \"col4\", \"col8\"]),\n    )\n    df_equals(\n        modin_df.T.reindex([\"col1\", \"col7\", \"col4\", \"col8\"], axis=0),\n        pandas_df.T.reindex([\"col1\", \"col7\", \"col4\", \"col8\"], axis=0),\n    )\n\n\ndef test_reindex_4438():\n    index = pd.date_range(end=\"1/1/2018\", periods=3, freq=\"h\", name=\"some meta\")\n    new_index = list(reversed(index))\n\n    # index case\n    modin_df = pd.DataFrame([1, 2, 3], index=index)\n    pandas_df = pandas.DataFrame([1, 2, 3], index=index)\n    new_modin_df = modin_df.reindex(new_index)\n    new_pandas_df = pandas_df.reindex(new_index)\n    df_equals(new_modin_df, new_pandas_df)\n\n    # column case\n    modin_df = pd.DataFrame(np.array([[1], [2], [3]]).T, columns=index)\n    pandas_df = pandas.DataFrame(np.array([[1], [2], [3]]).T, columns=index)\n    new_modin_df = modin_df.reindex(columns=new_index)\n    new_pandas_df = pandas_df.reindex(columns=new_index)\n    df_equals(new_modin_df, new_pandas_df)\n\n    # multiindex case\n    multi_index = pandas.MultiIndex.from_arrays(\n        [(\"a\", \"b\", \"c\"), (\"a\", \"b\", \"c\")], names=[\"first\", \"second\"]\n    )\n    new_multi_index = list(reversed(multi_index))\n\n    modin_df = pd.DataFrame([1, 2, 3], index=multi_index)\n    pandas_df = pandas.DataFrame([1, 2, 3], index=multi_index)\n    new_modin_df = modin_df.reindex(new_multi_index)\n    new_pandas_df = pandas_df.reindex(new_multi_index)\n    df_equals(new_modin_df, new_pandas_df)\n\n    # multicolumn case\n    modin_df = pd.DataFrame(np.array([[1], [2], [3]]).T, columns=multi_index)\n    pandas_df = pandas.DataFrame(np.array([[1], [2], [3]]).T, columns=multi_index)\n    new_modin_df = modin_df.reindex(columns=new_multi_index)\n    new_pandas_df = pandas_df.reindex(columns=new_multi_index)\n    df_equals(new_modin_df, new_pandas_df)\n\n    # index + multiindex case\n    modin_df = pd.DataFrame([1, 2, 3], index=index)\n    pandas_df = pandas.DataFrame([1, 2, 3], index=index)\n    new_modin_df = modin_df.reindex(new_multi_index)\n    new_pandas_df = pandas_df.reindex(new_multi_index)\n    df_equals(new_modin_df, new_pandas_df)\n\n\ndef test_reindex_like():\n    o_data = [\n        [24.3, 75.7, \"high\"],\n        [31, 87.8, \"high\"],\n        [22, 71.6, \"medium\"],\n        [35, 95, \"medium\"],\n    ]\n    o_columns = [\"temp_celsius\", \"temp_fahrenheit\", \"windspeed\"]\n    o_index = pd.date_range(start=\"2014-02-12\", end=\"2014-02-15\", freq=\"D\")\n    new_data = [[28, \"low\"], [30, \"low\"], [35.1, \"medium\"]]\n    new_columns = [\"temp_celsius\", \"windspeed\"]\n    new_index = pd.DatetimeIndex([\"2014-02-12\", \"2014-02-13\", \"2014-02-15\"])\n    modin_df1 = pd.DataFrame(o_data, columns=o_columns, index=o_index)\n    modin_df2 = pd.DataFrame(new_data, columns=new_columns, index=new_index)\n    modin_result = modin_df2.reindex_like(modin_df1)\n\n    pandas_df1 = pandas.DataFrame(o_data, columns=o_columns, index=o_index)\n    pandas_df2 = pandas.DataFrame(new_data, columns=new_columns, index=new_index)\n    pandas_result = pandas_df2.reindex_like(pandas_df1)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_rename_sanity():\n    source_df = pandas.DataFrame(test_data[\"int_data\"])[\n        [\"col1\", \"index\", \"col3\", \"col4\"]\n    ]\n    mapping = {\"col1\": \"a\", \"index\": \"b\", \"col3\": \"c\", \"col4\": \"d\"}\n\n    modin_df = pd.DataFrame(source_df)\n    df_equals(modin_df.rename(columns=mapping), source_df.rename(columns=mapping))\n\n    renamed2 = source_df.rename(columns=str.lower)\n    df_equals(modin_df.rename(columns=str.lower), renamed2)\n\n    modin_df = pd.DataFrame(renamed2)\n    df_equals(modin_df.rename(columns=str.upper), renamed2.rename(columns=str.upper))\n\n    # index\n    data = {\"A\": {\"foo\": 0, \"bar\": 1}}\n\n    # gets sorted alphabetical\n    df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n    assert_index_equal(\n        modin_df.rename(index={\"foo\": \"bar\", \"bar\": \"foo\"}).index,\n        df.rename(index={\"foo\": \"bar\", \"bar\": \"foo\"}).index,\n    )\n\n    assert_index_equal(\n        modin_df.rename(index=str.upper).index, df.rename(index=str.upper).index\n    )\n\n    # Using the `mapper` functionality with `axis`\n    assert_index_equal(\n        modin_df.rename(str.upper, axis=0).index, df.rename(str.upper, axis=0).index\n    )\n    assert_index_equal(\n        modin_df.rename(str.upper, axis=1).columns,\n        df.rename(str.upper, axis=1).columns,\n    )\n    assert_index_equal(modin_df.rename(str.upper).index, df.rename(str.upper).index)\n\n    # have to pass something\n    with pytest.raises(TypeError):\n        modin_df.rename()\n\n    # partial columns\n    source_df.rename(columns={\"col3\": \"foo\", \"col4\": \"bar\"})\n    modin_df = pd.DataFrame(source_df)\n    assert_index_equal(\n        modin_df.rename(columns={\"col3\": \"foo\", \"col4\": \"bar\"}).index,\n        source_df.rename(columns={\"col3\": \"foo\", \"col4\": \"bar\"}).index,\n    )\n\n    # other axis\n    source_df.T.rename(index={\"col3\": \"foo\", \"col4\": \"bar\"})\n    assert_index_equal(\n        source_df.T.rename(index={\"col3\": \"foo\", \"col4\": \"bar\"}).index,\n        modin_df.T.rename(index={\"col3\": \"foo\", \"col4\": \"bar\"}).index,\n    )\n\n    # index with name\n    index = pandas.Index([\"foo\", \"bar\"], name=\"name\")\n    renamer = pandas.DataFrame(data, index=index)\n    modin_df = pd.DataFrame(data, index=index)\n\n    renamed = renamer.rename(index={\"foo\": \"bar\", \"bar\": \"foo\"})\n    modin_renamed = modin_df.rename(index={\"foo\": \"bar\", \"bar\": \"foo\"})\n    assert_index_equal(renamed.index, modin_renamed.index)\n\n    assert renamed.index.name == modin_renamed.index.name\n\n\ndef test_rename_multiindex():\n    tuples_index = [(\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")]\n    tuples_columns = [(\"fizz1\", \"buzz1\"), (\"fizz2\", \"buzz2\")]\n    index = pandas.MultiIndex.from_tuples(tuples_index, names=[\"foo\", \"bar\"])\n    columns = pandas.MultiIndex.from_tuples(tuples_columns, names=[\"fizz\", \"buzz\"])\n\n    frame_data = [(0, 0), (1, 1)]\n    df = pandas.DataFrame(frame_data, index=index, columns=columns)\n    modin_df = pd.DataFrame(frame_data, index=index, columns=columns)\n\n    #\n    # without specifying level -> accross all levels\n    renamed = df.rename(\n        index={\"foo1\": \"foo3\", \"bar2\": \"bar3\"},\n        columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"},\n    )\n    modin_renamed = modin_df.rename(\n        index={\"foo1\": \"foo3\", \"bar2\": \"bar3\"},\n        columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"},\n    )\n    assert_index_equal(renamed.index, modin_renamed.index)\n\n    renamed = df.rename(\n        index={\"foo1\": \"foo3\", \"bar2\": \"bar3\"},\n        columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"},\n    )\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n    assert renamed.index.names == modin_renamed.index.names\n    assert renamed.columns.names == modin_renamed.columns.names\n\n    #\n    # with specifying a level\n\n    # dict\n    renamed = df.rename(columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=0)\n    modin_renamed = modin_df.rename(\n        columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=0\n    )\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n    renamed = df.rename(columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=\"fizz\")\n    modin_renamed = modin_df.rename(\n        columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=\"fizz\"\n    )\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n\n    renamed = df.rename(columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=1)\n    modin_renamed = modin_df.rename(\n        columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=1\n    )\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n    renamed = df.rename(columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=\"buzz\")\n    modin_renamed = modin_df.rename(\n        columns={\"fizz1\": \"fizz3\", \"buzz2\": \"buzz3\"}, level=\"buzz\"\n    )\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n\n    # function\n    func = str.upper\n    renamed = df.rename(columns=func, level=0)\n    modin_renamed = modin_df.rename(columns=func, level=0)\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n    renamed = df.rename(columns=func, level=\"fizz\")\n    modin_renamed = modin_df.rename(columns=func, level=\"fizz\")\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n\n    renamed = df.rename(columns=func, level=1)\n    modin_renamed = modin_df.rename(columns=func, level=1)\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n    renamed = df.rename(columns=func, level=\"buzz\")\n    modin_renamed = modin_df.rename(columns=func, level=\"buzz\")\n    assert_index_equal(renamed.columns, modin_renamed.columns)\n\n    # index\n    renamed = df.rename(index={\"foo1\": \"foo3\", \"bar2\": \"bar3\"}, level=0)\n    modin_renamed = modin_df.rename(index={\"foo1\": \"foo3\", \"bar2\": \"bar3\"}, level=0)\n    assert_index_equal(modin_renamed.index, renamed.index)\n\n\n@pytest.mark.xfail(reason=\"Pandas does not pass this test\")\ndef test_rename_nocopy():\n    source_df = pandas.DataFrame(test_data[\"int_data\"])[\n        [\"col1\", \"index\", \"col3\", \"col4\"]\n    ]\n    modin_df = pd.DataFrame(source_df)\n    modin_renamed = modin_df.rename(columns={\"col3\": \"foo\"}, copy=False)\n    modin_renamed[\"foo\"] = 1\n    assert (modin_df[\"col3\"] == 1).all()\n\n\ndef test_rename_inplace():\n    source_df = pandas.DataFrame(test_data[\"int_data\"])[\n        [\"col1\", \"index\", \"col3\", \"col4\"]\n    ]\n    modin_df = pd.DataFrame(source_df)\n\n    df_equals(\n        modin_df.rename(columns={\"col3\": \"foo\"}),\n        source_df.rename(columns={\"col3\": \"foo\"}),\n    )\n\n    frame = source_df.copy()\n    modin_frame = modin_df.copy()\n    frame.rename(columns={\"col3\": \"foo\"}, inplace=True)\n    modin_frame.rename(columns={\"col3\": \"foo\"}, inplace=True)\n\n    df_equals(modin_frame, frame)\n\n\ndef test_rename_bug():\n    # rename set ref_locs, and set_index was not resetting\n    frame_data = {0: [\"foo\", \"bar\"], 1: [\"bah\", \"bas\"], 2: [1, 2]}\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    df = df.rename(columns={0: \"a\"})\n    df = df.rename(columns={1: \"b\"})\n    df = df.set_index([\"a\", \"b\"])\n    df.columns = [\"2001-01-01\"]\n\n    modin_df = modin_df.rename(columns={0: \"a\"})\n    modin_df = modin_df.rename(columns={1: \"b\"})\n    modin_df = modin_df.set_index([\"a\", \"b\"])\n    modin_df.columns = [\"2001-01-01\"]\n\n    df_equals(modin_df, df)\n\n\ndef test_index_to_datetime_using_set_index():\n    data = {\"YEAR\": [\"1992\", \"1993\", \"1994\"], \"ALIENS\": [1, 99, 1]}\n    modin_df_years = pd.DataFrame(data=data)\n    df_years = pandas.DataFrame(data=data)\n    modin_df_years = modin_df_years.set_index(\"YEAR\")\n    df_years = df_years.set_index(\"YEAR\")\n    modin_datetime_index = pd.to_datetime(modin_df_years.index, format=\"%Y\")\n    pandas_datetime_index = pandas.to_datetime(df_years.index, format=\"%Y\")\n\n    modin_df_years.index = modin_datetime_index\n    df_years.index = pandas_datetime_index\n\n    modin_df_years.set_index(modin_datetime_index)\n    df_years.set_index(pandas_datetime_index)\n    df_equals(modin_df_years, df_years)\n\n\ndef test_rename_axis():\n    data = {\"num_legs\": [4, 4, 2], \"num_arms\": [0, 0, 2]}\n    index = [\"dog\", \"cat\", \"monkey\"]\n    modin_df = pd.DataFrame(data, index)\n    pandas_df = pandas.DataFrame(data, index)\n    df_equals(modin_df.rename_axis(\"animal\"), pandas_df.rename_axis(\"animal\"))\n    df_equals(\n        modin_df.rename_axis(\"limbs\", axis=\"columns\"),\n        pandas_df.rename_axis(\"limbs\", axis=\"columns\"),\n    )\n\n    modin_df.rename_axis(\"limbs\", axis=\"columns\", inplace=True)\n    pandas_df.rename_axis(\"limbs\", axis=\"columns\", inplace=True)\n    df_equals(modin_df, pandas_df)\n\n    new_index = pd.MultiIndex.from_product(\n        [[\"mammal\"], [\"dog\", \"cat\", \"monkey\"]], names=[\"type\", \"name\"]\n    )\n    modin_df.index = new_index\n    pandas_df.index = new_index\n\n    df_equals(\n        modin_df.rename_axis(index={\"type\": \"class\"}),\n        pandas_df.rename_axis(index={\"type\": \"class\"}),\n    )\n    df_equals(\n        modin_df.rename_axis(columns=str.upper),\n        pandas_df.rename_axis(columns=str.upper),\n    )\n    df_equals(\n        modin_df.rename_axis(columns=[str.upper(o) for o in modin_df.columns.names]),\n        pandas_df.rename_axis(columns=[str.upper(o) for o in pandas_df.columns.names]),\n    )\n\n    with pytest.raises(ValueError):\n        df_equals(\n            modin_df.rename_axis(str.upper, axis=1),\n            pandas_df.rename_axis(str.upper, axis=1),\n        )\n\n\ndef test_rename_axis_inplace():\n    test_frame = pandas.DataFrame(test_data[\"int_data\"])\n    modin_df = pd.DataFrame(test_frame)\n\n    result = test_frame.copy()\n    modin_result = modin_df.copy()\n    no_return = result.rename_axis(\"foo\", inplace=True)\n    modin_no_return = modin_result.rename_axis(\"foo\", inplace=True)\n\n    assert no_return is modin_no_return\n    df_equals(modin_result, result)\n\n    result = test_frame.copy()\n    modin_result = modin_df.copy()\n    no_return = result.rename_axis(\"bar\", axis=1, inplace=True)\n    modin_no_return = modin_result.rename_axis(\"bar\", axis=1, inplace=True)\n\n    assert no_return is modin_no_return\n    df_equals(modin_result, result)\n\n\ndef test_rename_issue5600():\n    # Check the issue for more details\n    # https://github.com/modin-project/modin/issues/5600\n    df = pd.DataFrame({\"a\": [1, 2]})\n    df_renamed = df.rename(columns={\"a\": \"new_a\"}, copy=True, inplace=False)\n\n    # Check that the source frame was untouched\n    assert df.dtypes.keys().tolist() == [\"a\"]\n    assert df.columns.tolist() == [\"a\"]\n\n    assert df_renamed.dtypes.keys().tolist() == [\"new_a\"]\n    assert df_renamed.columns.tolist() == [\"new_a\"]\n\n\ndef test_reorder_levels():\n    data = np.random.randint(1, 100, 12)\n    modin_df = pd.DataFrame(\n        data,\n        index=pd.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    pandas_df = pandas.DataFrame(\n        data,\n        index=pandas.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    df_equals(\n        modin_df.reorder_levels([\"Letter\", \"Color\", \"Number\"]),\n        pandas_df.reorder_levels([\"Letter\", \"Color\", \"Number\"]),\n    )\n\n\ndef test_reindex_multiindex():\n    data1, data2 = np.random.randint(1, 20, (5, 5)), np.random.randint(10, 25, 6)\n    index = np.array([\"AUD\", \"BRL\", \"CAD\", \"EUR\", \"INR\"])\n    modin_midx = pd.MultiIndex.from_product(\n        [[\"Bank_1\", \"Bank_2\"], [\"AUD\", \"CAD\", \"EUR\"]], names=[\"Bank\", \"Curency\"]\n    )\n    pandas_midx = pandas.MultiIndex.from_product(\n        [[\"Bank_1\", \"Bank_2\"], [\"AUD\", \"CAD\", \"EUR\"]], names=[\"Bank\", \"Curency\"]\n    )\n    modin_df1, modin_df2 = (\n        pd.DataFrame(data=data1, index=index, columns=index),\n        pd.DataFrame(data2, modin_midx),\n    )\n    pandas_df1, pandas_df2 = (\n        pandas.DataFrame(data=data1, index=index, columns=index),\n        pandas.DataFrame(data2, pandas_midx),\n    )\n    modin_df2.columns, pandas_df2.columns = [\"Notional\"], [\"Notional\"]\n    md_midx = pd.MultiIndex.from_product([modin_df2.index.levels[0], modin_df1.index])\n    pd_midx = pandas.MultiIndex.from_product(\n        [pandas_df2.index.levels[0], pandas_df1.index]\n    )\n    # reindex without axis, index, or columns\n    modin_result = modin_df1.reindex(md_midx, fill_value=0)\n    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0)\n    df_equals(modin_result, pandas_result)\n    # reindex with only axis\n    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0)\n    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0)\n    df_equals(modin_result, pandas_result)\n    # reindex with axis and level\n    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0, level=0)\n    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0, level=0)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"test_async_reset_index\", [False, True])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_reset_index(data, test_async_reset_index):\n    modin_df, pandas_df = create_test_dfs(data)\n    if test_async_reset_index:\n        modin_df._query_compiler.set_frame_index_cache(None)\n    modin_result = modin_df.reset_index(inplace=False)\n    pandas_result = pandas_df.reset_index(inplace=False)\n    df_equals(modin_result, pandas_result)\n\n    modin_df_cp = modin_df.copy()\n    pd_df_cp = pandas_df.copy()\n    if test_async_reset_index:\n        modin_df._query_compiler.set_frame_index_cache(None)\n    modin_df_cp.reset_index(inplace=True)\n    pd_df_cp.reset_index(inplace=True)\n    df_equals(modin_df_cp, pd_df_cp)\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        test_data[\"int_data\"],\n        test_data[\"float_nan_data\"],\n    ],\n)\ndef test_reset_index_multiindex_groupby(data):\n    # GH#4394\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_df.index = pd.MultiIndex.from_tuples(\n        [(i // 10, i // 5, i) for i in range(len(modin_df))]\n    )\n    pandas_df.index = pandas.MultiIndex.from_tuples(\n        [(i // 10, i // 5, i) for i in range(len(pandas_df))]\n    )\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.reset_index().groupby(list(df.columns[:2])).count(),\n    )\n\n\n@pytest.mark.parametrize(\"test_async_reset_index\", [False, True])\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        pytest.param(\n            test_data[\"int_data\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        test_data[\"float_nan_data\"],\n    ],\n    ids=[\"int_data\", \"float_nan_data\"],\n)\n@pytest.mark.parametrize(\"nlevels\", [3])\n@pytest.mark.parametrize(\"columns_multiindex\", [True, False])\n@pytest.mark.parametrize(\n    \"level\",\n    [\n        \"no_level\",\n        None,\n        0,\n        1,\n        2,\n        [2, 0],\n        [2, 1],\n        [1, 0],\n        pytest.param(\n            [2, 1, 2],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [0, 0, 0, 0],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [\"level_name_1\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [\"level_name_2\", \"level_name_1\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [2, \"level_name_0\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"col_level\", [\"no_col_level\", 0, 1, 2])\n@pytest.mark.parametrize(\"col_fill\", [\"no_col_fill\", None, 0, \"new\"])\n@pytest.mark.parametrize(\"drop\", [False])\n@pytest.mark.parametrize(\n    \"multiindex_levels_names_max_levels\",\n    [\n        0,\n        1,\n        2,\n        pytest.param(3, marks=pytest.mark.exclude_by_default),\n        pytest.param(4, marks=pytest.mark.exclude_by_default),\n    ],\n)\n@pytest.mark.parametrize(\n    \"none_in_index_names\",\n    [\n        pytest.param(\n            False,\n            marks=pytest.mark.exclude_by_default,\n        ),\n        True,\n        \"mixed_1st_None\",\n        pytest.param(\n            \"mixed_2nd_None\",\n            marks=pytest.mark.exclude_by_default,\n        ),\n    ],\n)\ndef test_reset_index_with_multi_index_no_drop(\n    data,\n    nlevels,\n    columns_multiindex,\n    level,\n    col_level,\n    col_fill,\n    drop,\n    multiindex_levels_names_max_levels,\n    none_in_index_names,\n    test_async_reset_index,\n):\n    data_rows = len(data[list(data.keys())[0]])\n    index = generate_multiindex(data_rows, nlevels=nlevels)\n    data_columns = len(data.keys())\n    columns = (\n        generate_multiindex(data_columns, nlevels=nlevels)\n        if columns_multiindex\n        else pandas.RangeIndex(0, data_columns)\n    )\n    # Replace original data columns with generated\n    data = {columns[ind]: data[key] for ind, key in enumerate(data)}\n    index.names = (\n        [f\"level_{i}\" for i in range(index.nlevels)]\n        if multiindex_levels_names_max_levels == 0\n        else [\n            (\n                tuple(\n                    [\n                        f\"level_{i}_name_{j}\"\n                        for j in range(\n                            0,\n                            max(\n                                multiindex_levels_names_max_levels + 1 - index.nlevels,\n                                0,\n                            )\n                            + i,\n                        )\n                    ]\n                )\n                if max(multiindex_levels_names_max_levels + 1 - index.nlevels, 0) + i\n                > 0\n                else f\"level_{i}\"\n            )\n            for i in range(index.nlevels)\n        ]\n    )\n\n    if none_in_index_names is True:\n        index.names = [None] * len(index.names)\n    elif none_in_index_names:\n        names_list = list(index.names)\n        start_index = 0 if none_in_index_names == \"mixed_1st_None\" else 1\n        names_list[start_index::2] = [None] * len(names_list[start_index::2])\n        index.names = names_list\n\n    modin_df = pd.DataFrame(data, index=index, columns=columns)\n    pandas_df = pandas.DataFrame(data, index=index, columns=columns)\n\n    if isinstance(level, list):\n        level = [\n            (\n                index.names[int(x[len(\"level_name_\") :])]\n                if isinstance(x, str) and x.startswith(\"level_name_\")\n                else x\n            )\n            for x in level\n        ]\n\n    kwargs = {\"drop\": drop}\n    if level != \"no_level\":\n        kwargs[\"level\"] = level\n    if col_level != \"no_col_level\":\n        kwargs[\"col_level\"] = col_level\n    if col_fill != \"no_col_fill\":\n        kwargs[\"col_fill\"] = col_fill\n    if test_async_reset_index:\n        modin_df._query_compiler.set_frame_index_cache(None)\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.reset_index(**kwargs),\n        # https://github.com/modin-project/modin/issues/5960\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"test_async_reset_index\", [False, True])\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        pytest.param(\n            test_data[\"int_data\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        test_data[\"float_nan_data\"],\n    ],\n    ids=[\"int_data\", \"float_nan_data\"],\n)\n@pytest.mark.parametrize(\"nlevels\", [3])\n@pytest.mark.parametrize(\n    \"level\",\n    [\n        \"no_level\",\n        None,\n        0,\n        1,\n        2,\n        [2, 0],\n        [2, 1],\n        [1, 0],\n        pytest.param(\n            [2, 1, 2],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [0, 0, 0, 0],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [\"level_name_1\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [\"level_name_2\", \"level_name_1\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            [2, \"level_name_0\"],\n            marks=pytest.mark.exclude_by_default,\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"multiindex_levels_names_max_levels\",\n    [\n        0,\n        1,\n        2,\n        pytest.param(3, marks=pytest.mark.exclude_by_default),\n        pytest.param(4, marks=pytest.mark.exclude_by_default),\n    ],\n)\n@pytest.mark.parametrize(\n    \"none_in_index_names\",\n    [\n        pytest.param(\n            False,\n            marks=pytest.mark.exclude_by_default,\n        ),\n        True,\n        \"mixed_1st_None\",\n        pytest.param(\n            \"mixed_2nd_None\",\n            marks=pytest.mark.exclude_by_default,\n        ),\n    ],\n)\ndef test_reset_index_with_multi_index_drop(\n    data,\n    nlevels,\n    level,\n    multiindex_levels_names_max_levels,\n    none_in_index_names,\n    test_async_reset_index,\n):\n    test_reset_index_with_multi_index_no_drop(\n        data,\n        nlevels,\n        True,\n        level,\n        \"no_col_level\",\n        \"no_col_fill\",\n        True,\n        multiindex_levels_names_max_levels,\n        none_in_index_names,\n        test_async_reset_index,\n    )\n\n\n@pytest.mark.parametrize(\"test_async_reset_index\", [False, True])\n@pytest.mark.parametrize(\"index_levels_names_max_levels\", [0, 1, 2])\ndef test_reset_index_with_named_index(\n    index_levels_names_max_levels, test_async_reset_index\n):\n    modin_df = pd.DataFrame(test_data_values[0])\n    pandas_df = pandas.DataFrame(test_data_values[0])\n\n    index_name = (\n        tuple([f\"name_{j}\" for j in range(0, index_levels_names_max_levels)])\n        if index_levels_names_max_levels > 0\n        else \"NAME_OF_INDEX\"\n    )\n    modin_df.index.name = pandas_df.index.name = index_name\n    df_equals(modin_df, pandas_df)\n    if test_async_reset_index:\n        # The change in index is not automatically handled by Modin. See #3941.\n        modin_df.index = modin_df.index\n        modin_df.modin.to_pandas()\n\n        modin_df._query_compiler.set_frame_index_cache(None)\n    df_equals(modin_df.reset_index(drop=False), pandas_df.reset_index(drop=False))\n\n    if test_async_reset_index:\n        # The change in index is not automatically handled by Modin. See #3941.\n        modin_df.index = modin_df.index\n        modin_df.modin.to_pandas()\n\n        modin_df._query_compiler.set_frame_index_cache(None)\n    modin_df.reset_index(drop=True, inplace=True)\n    pandas_df.reset_index(drop=True, inplace=True)\n    df_equals(modin_df, pandas_df)\n\n    modin_df = pd.DataFrame(test_data_values[0])\n    pandas_df = pandas.DataFrame(test_data_values[0])\n    modin_df.index.name = pandas_df.index.name = index_name\n    if test_async_reset_index:\n        # The change in index is not automatically handled by Modin. See #3941.\n        modin_df.index = modin_df.index\n        modin_df._to_pandas()\n\n        modin_df._query_compiler.set_frame_index_cache(None)\n    df_equals(modin_df.reset_index(drop=False), pandas_df.reset_index(drop=False))\n\n\n@pytest.mark.parametrize(\"test_async_reset_index\", [False, True])\n@pytest.mark.parametrize(\n    \"index\",\n    [\n        pandas.Index([11, 22, 33, 44], name=\"col0\"),\n        pandas.MultiIndex.from_product(\n            [[100, 200], [300, 400]], names=[\"level1\", \"col0\"]\n        ),\n    ],\n    ids=[\"index\", \"multiindex\"],\n)\ndef test_reset_index_metadata_update(index, test_async_reset_index):\n    modin_df, pandas_df = create_test_dfs({\"col0\": [0, 1, 2, 3]}, index=index)\n    modin_df.columns = pandas_df.columns = [\"col1\"]\n    if test_async_reset_index:\n        # The change in index is not automatically handled by Modin. See #3941.\n        modin_df.index = modin_df.index\n        modin_df._to_pandas()\n\n        modin_df._query_compiler.set_frame_index_cache(None)\n    eval_general(modin_df, pandas_df, lambda df: df.reset_index())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\ndef test_sample(data, axis):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    with pytest.raises(ValueError):\n        modin_df.sample(n=3, frac=0.4, axis=axis)\n\n    with pytest.raises(KeyError):\n        modin_df.sample(frac=0.5, weights=\"CoLuMn_No_ExIsT\", axis=0)\n\n    with pytest.raises(ValueError):\n        modin_df.sample(frac=0.5, weights=modin_df.columns[0], axis=1)\n\n    with pytest.raises(ValueError):\n        modin_df.sample(\n            frac=0.5, weights=[0.5 for _ in range(len(modin_df.index[:-1]))], axis=0\n        )\n\n    with pytest.raises(ValueError):\n        modin_df.sample(\n            frac=0.5,\n            weights=[0.5 for _ in range(len(modin_df.columns[:-1]))],\n            axis=1,\n        )\n\n    with pytest.raises(ValueError):\n        modin_df.sample(n=-3, axis=axis)\n\n    with pytest.raises(ValueError):\n        modin_df.sample(frac=0.2, weights=pandas.Series(), axis=axis)\n\n    if isinstance(axis, str):\n        num_axis = pandas.DataFrame()._get_axis_number(axis)\n    else:\n        num_axis = axis\n\n    # weights that sum to 1\n    sums = sum(i % 2 for i in range(len(modin_df.axes[num_axis])))\n    weights = [i % 2 / sums for i in range(len(modin_df.axes[num_axis]))]\n\n    modin_result = modin_df.sample(\n        frac=0.5, random_state=42, weights=weights, axis=axis\n    )\n    pandas_result = pandas_df.sample(\n        frac=0.5, random_state=42, weights=weights, axis=axis\n    )\n    df_equals(modin_result, pandas_result)\n\n    # weights that don't sum to 1\n    weights = [i % 2 for i in range(len(modin_df.axes[num_axis]))]\n    modin_result = modin_df.sample(\n        frac=0.5, random_state=42, weights=weights, axis=axis\n    )\n    pandas_result = pandas_df.sample(\n        frac=0.5, random_state=42, weights=weights, axis=axis\n    )\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.sample(n=0, axis=axis)\n    pandas_result = pandas_df.sample(n=0, axis=axis)\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.sample(frac=0.5, random_state=42, axis=axis)\n    pandas_result = pandas_df.sample(frac=0.5, random_state=42, axis=axis)\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.sample(n=2, random_state=42, axis=axis)\n    pandas_result = pandas_df.sample(n=2, random_state=42, axis=axis)\n    df_equals(modin_result, pandas_result)\n\n    # issue #1692, numpy RandomState object\n    # We must create a new random state for each iteration because the values that\n    # are selected will be impacted if the object has already been used.\n    random_state = np.random.RandomState(42)\n    modin_result = modin_df.sample(frac=0.5, random_state=random_state, axis=axis)\n\n    random_state = np.random.RandomState(42)\n    pandas_result = pandas_df.sample(frac=0.5, random_state=random_state, axis=axis)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_empty_sample():\n    modin_df, pandas_df = create_test_dfs([1])\n    # issue #4983\n    # If we have a fraction of the dataset that results in n=0, we should\n    # make sure that we don't pass in both n and frac to sample internally.\n    eval_general(modin_df, pandas_df, lambda df: df.sample(frac=0.12))\n\n\ndef test_select_dtypes():\n    frame_data = {\n        \"test1\": list(\"abc\"),\n        \"test2\": np.arange(3, 6).astype(\"u1\"),\n        \"test3\": np.arange(8.0, 11.0, dtype=\"float64\"),\n        \"test4\": [True, False, True],\n        \"test5\": pandas.date_range(\"now\", periods=3).values,\n        \"test6\": list(range(5, 8)),\n    }\n    df = pandas.DataFrame(frame_data)\n    rd = pd.DataFrame(frame_data)\n\n    include = np.float64, \"integer\"\n    exclude = (np.bool_,)\n    r = rd.select_dtypes(include=include, exclude=exclude)\n\n    e = df[[\"test2\", \"test3\", \"test6\"]]\n    df_equals(r, e)\n\n    r = rd.select_dtypes(include=np.bool_)\n    e = df[[\"test4\"]]\n    df_equals(r, e)\n\n    r = rd.select_dtypes(exclude=np.bool_)\n    e = df[[\"test1\", \"test2\", \"test3\", \"test5\", \"test6\"]]\n    df_equals(r, e)\n\n    try:\n        pd.DataFrame().select_dtypes()\n        assert False\n    except ValueError:\n        assert True\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"n\", int_arg_values, ids=arg_keys(\"n\", int_arg_keys))\ndef test_tail(data, n):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.tail(n), pandas_df.tail(n))\n    df_equals(modin_df.tail(len(modin_df)), pandas_df.tail(len(pandas_df)))\n\n\ndef test_xs():\n    # example is based on the doctest in the upstream pandas docstring\n    data = {\n        \"num_legs\": [4, 4, 2, 2],\n        \"num_wings\": [0, 0, 2, 2],\n        \"class\": [\"mammal\", \"mammal\", \"mammal\", \"bird\"],\n        \"animal\": [\"cat\", \"dog\", \"bat\", \"penguin\"],\n        \"locomotion\": [\"walks\", \"walks\", \"flies\", \"walks\"],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n\n    def prepare_dataframes(df):\n        # to make several partitions (only for Modin dataframe)\n        df = (pd if isinstance(df, pd.DataFrame) else pandas).concat([df, df], axis=0)\n        # looks like pandas is sorting the index whereas modin is not, performing a join operation.\n        df = df.reset_index(drop=True)\n        df = df.join(df, rsuffix=\"_y\")\n        return df.set_index([\"class\", \"animal\", \"locomotion\"])\n\n    modin_df = prepare_dataframes(modin_df)\n    pandas_df = prepare_dataframes(pandas_df)\n    eval_general(modin_df, pandas_df, lambda df: df.xs(\"mammal\"))\n    eval_general(modin_df, pandas_df, lambda df: df.xs(\"cat\", level=1))\n    eval_general(modin_df, pandas_df, lambda df: df.xs(\"num_legs\", axis=1))\n    eval_general(\n        modin_df, pandas_df, lambda df: df.xs(\"cat\", level=1, drop_level=False)\n    )\n    eval_general(modin_df, pandas_df, lambda df: df.xs((\"mammal\", \"cat\")))\n    eval_general(\n        modin_df, pandas_df, lambda df: df.xs((\"mammal\", \"cat\"), drop_level=False)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___getitem__(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    key = modin_df.columns[0]\n    modin_col = modin_df.__getitem__(key)\n    assert isinstance(modin_col, pd.Series)\n\n    pd_col = pandas_df[key]\n    df_equals(pd_col, modin_col)\n\n    slices = [\n        (None, -1),\n        (-1, None),\n        (1, 2),\n        (1, None),\n        (None, 1),\n        (1, -1),\n        (-3, -1),\n        (1, -1, 2),\n        (-1, 1, -1),\n        (None, None, 2),\n    ]\n\n    # slice test\n    for slice_param in slices:\n        s = slice(*slice_param)\n        df_equals(modin_df[s], pandas_df[s])\n\n    # Test empty\n    df_equals(pd.DataFrame([])[:10], pandas.DataFrame([])[:10])\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___getitem_bool_indexers(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    indices = [i % 3 == 0 for i in range(len(modin_df.index))]\n    columns = [i % 5 == 0 for i in range(len(modin_df.columns))]\n\n    # Key is a list of booleans\n    modin_result = modin_df.loc[indices, columns]\n    pandas_result = pandas_df.loc[indices, columns]\n    df_equals(modin_result, pandas_result)\n\n    # Key is a Modin or pandas series of booleans\n    df_equals(\n        modin_df.loc[pd.Series(indices), pd.Series(columns, index=modin_df.columns)],\n        pandas_df.loc[\n            pandas.Series(indices), pandas.Series(columns, index=modin_df.columns)\n        ],\n    )\n\n\ndef test_getitem_empty_mask():\n    # modin-project/modin#517\n    modin_frames = []\n    pandas_frames = []\n    data1 = np.random.randint(0, 100, size=(100, 4))\n    mdf1 = pd.DataFrame(data1, columns=list(\"ABCD\"))\n    pdf1 = pandas.DataFrame(data1, columns=list(\"ABCD\"))\n    modin_frames.append(mdf1)\n    pandas_frames.append(pdf1)\n\n    data2 = np.random.randint(0, 100, size=(100, 4))\n    mdf2 = pd.DataFrame(data2, columns=list(\"ABCD\"))\n    pdf2 = pandas.DataFrame(data2, columns=list(\"ABCD\"))\n    modin_frames.append(mdf2)\n    pandas_frames.append(pdf2)\n\n    data3 = np.random.randint(0, 100, size=(100, 4))\n    mdf3 = pd.DataFrame(data3, columns=list(\"ABCD\"))\n    pdf3 = pandas.DataFrame(data3, columns=list(\"ABCD\"))\n    modin_frames.append(mdf3)\n    pandas_frames.append(pdf3)\n\n    modin_data = pd.concat(modin_frames)\n    pandas_data = pandas.concat(pandas_frames)\n    df_equals(\n        modin_data[[False for _ in modin_data.index]],\n        pandas_data[[False for _ in modin_data.index]],\n    )\n\n\ndef test_getitem_datetime_slice():\n    data = {\"data\": range(1000)}\n    index = pd.date_range(\"2017/1/4\", periods=1000)\n    modin_df = pd.DataFrame(data=data, index=index)\n    pandas_df = pandas.DataFrame(data=data, index=index)\n\n    s = slice(\"2017-01-06\", \"2017-01-09\")\n    df_equals(modin_df[s], pandas_df[s])\n\n\ndef test_getitem_same_name():\n    data = [\n        [1, 2, 3, 4],\n        [5, 6, 7, 8],\n        [9, 10, 11, 12],\n        [13, 14, 15, 16],\n        [17, 18, 19, 20],\n    ]\n    columns = [\"c1\", \"c2\", \"c1\", \"c3\"]\n    modin_df = pd.DataFrame(data, columns=columns)\n    pandas_df = pandas.DataFrame(data, columns=columns)\n    df_equals(modin_df[\"c1\"], pandas_df[\"c1\"])\n    df_equals(modin_df[\"c2\"], pandas_df[\"c2\"])\n    df_equals(modin_df[[\"c1\", \"c2\"]], pandas_df[[\"c1\", \"c2\"]])\n    df_equals(modin_df[\"c3\"], pandas_df[\"c3\"])\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___getattr__(request, data):\n    modin_df = pd.DataFrame(data)\n\n    if \"empty_data\" not in request.node.name:\n        key = modin_df.columns[0]\n        modin_df.__getattr__(key)\n\n        col = modin_df.__getattr__(\"col1\")\n        assert isinstance(col, pd.Series)\n\n        col = getattr(modin_df, \"col1\")\n        assert isinstance(col, pd.Series)\n\n        # Check that lookup in column doesn't override other attributes\n        df2 = modin_df.rename(index=str, columns={key: \"columns\"})\n        assert isinstance(df2.columns, pandas.Index)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___setitem__(data):\n    eval_setitem(*create_test_dfs(data), loc=-1, value=1)\n    eval_setitem(\n        *create_test_dfs(data), loc=-1, value=lambda df: type(df)(df[df.columns[0]])\n    )\n\n    nrows = len(data[list(data.keys())[0]])\n    arr = np.arange(nrows * 2).reshape(-1, 2)\n\n    eval_setitem(*create_test_dfs(data), loc=-1, value=arr)\n    eval_setitem(*create_test_dfs(data), col=\"___NON EXISTENT COLUMN\", value=arr.T[0])\n    eval_setitem(*create_test_dfs(data), loc=0, value=np.arange(nrows))\n\n    modin_df = pd.DataFrame(columns=data.keys())\n    pandas_df = pandas.DataFrame(columns=data.keys())\n\n    for col in modin_df.columns:\n        modin_df[col] = np.arange(1000)\n\n    for col in pandas_df.columns:\n        pandas_df[col] = np.arange(1000)\n\n    df_equals(modin_df, pandas_df)\n\n    # Test df assignment to a columns selection\n    modin_df[modin_df.columns[[0, -1]]] = modin_df[modin_df.columns[[0, -1]]]\n    pandas_df[pandas_df.columns[[0, -1]]] = pandas_df[pandas_df.columns[[0, -1]]]\n    df_equals(modin_df, pandas_df)\n\n    # Test series assignment to column\n    modin_df = pd.DataFrame(columns=modin_df.columns)\n    pandas_df = pandas.DataFrame(columns=pandas_df.columns)\n    modin_df[modin_df.columns[-1]] = modin_df[modin_df.columns[0]]\n    pandas_df[pandas_df.columns[-1]] = pandas_df[pandas_df.columns[0]]\n    df_equals(modin_df, pandas_df)\n\n    if not sys.version_info.major == 3 and sys.version_info.minor > 6:\n        # This test doesn't work correctly on Python 3.6\n        # Test 2d ndarray assignment to column\n        modin_df = pd.DataFrame(data)\n        pandas_df = pandas.DataFrame(data)\n        modin_df[\"new_col\"] = modin_df[[modin_df.columns[0]]].values\n        pandas_df[\"new_col\"] = pandas_df[[pandas_df.columns[0]]].values\n        df_equals(modin_df, pandas_df)\n        assert isinstance(modin_df[\"new_col\"][0], type(pandas_df[\"new_col\"][0]))\n\n    modin_df[1:5] = 10\n    pandas_df[1:5] = 10\n    df_equals(modin_df, pandas_df)\n\n    # Transpose test\n    modin_df = pd.DataFrame(data).T\n    pandas_df = pandas.DataFrame(data).T\n\n    modin_df[modin_df.columns[0]] = 0\n    pandas_df[pandas_df.columns[0]] = 0\n    df_equals(modin_df, pandas_df)\n\n    modin_df.columns = [str(i) for i in modin_df.columns]\n    pandas_df.columns = [str(i) for i in pandas_df.columns]\n\n    modin_df[modin_df.columns[0]] = 0\n    pandas_df[pandas_df.columns[0]] = 0\n\n    df_equals(modin_df, pandas_df)\n\n    modin_df[modin_df.columns[0]][modin_df.index[0]] = 12345\n    pandas_df[pandas_df.columns[0]][pandas_df.index[0]] = 12345\n    df_equals(modin_df, pandas_df)\n\n    modin_df[1:5] = 10\n    pandas_df[1:5] = 10\n    df_equals(modin_df, pandas_df)\n\n\ndef test___setitem__partitions_aligning():\n    # from issue #2390\n    modin_df = pd.DataFrame({\"a\": [1, 2, 3]})\n    pandas_df = pandas.DataFrame({\"a\": [1, 2, 3]})\n    modin_df[\"b\"] = pd.Series([4, 5, 6, 7, 8])\n    pandas_df[\"b\"] = pandas.Series([4, 5, 6, 7, 8])\n    df_equals(modin_df, pandas_df)\n\n    # from issue #2442\n    data = {\"a\": [1, 2, 3, 4]}\n    # Index with duplicated timestamp\n    index = pandas.to_datetime([\"2020-02-06\", \"2020-02-06\", \"2020-02-22\", \"2020-03-26\"])\n\n    md_df, pd_df = create_test_dfs(data, index=index)\n    # Setting new column\n    pd_df[\"b\"] = pandas.Series(np.arange(4))\n    md_df[\"b\"] = pd.Series(np.arange(4))\n    df_equals(md_df, pd_df)\n\n    # Setting existing column\n    pd_df[\"b\"] = pandas.Series(np.arange(4))\n    md_df[\"b\"] = pd.Series(np.arange(4))\n    df_equals(md_df, pd_df)\n\n    pd_df[\"a\"] = pandas.Series(np.arange(4))\n    md_df[\"a\"] = pd.Series(np.arange(4))\n    df_equals(md_df, pd_df)\n\n\ndef test___setitem__with_mismatched_partitions():\n    with ensure_clean(\".csv\") as fname:\n        np.savetxt(fname, np.random.randint(0, 100, size=(200_000, 99)), delimiter=\",\")\n        modin_df = pd.read_csv(fname)\n        pandas_df = pandas.read_csv(fname)\n        modin_df[\"new\"] = pd.Series(list(range(len(modin_df))))\n        pandas_df[\"new\"] = pandas.Series(list(range(len(pandas_df))))\n        df_equals(modin_df, pandas_df)\n\n\ndef test___setitem__mask():\n    # DataFrame mask:\n    data = test_data[\"int_data\"]\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    mean = int((RAND_HIGH + RAND_LOW) / 2)\n    pandas_df[pandas_df > mean] = -50\n    modin_df[modin_df > mean] = -50\n\n    df_equals(modin_df, pandas_df)\n\n    # Array mask:\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n    array = (pandas_df > mean).to_numpy()\n\n    modin_df[array] = -50\n    pandas_df[array] = -50\n\n    df_equals(modin_df, pandas_df)\n\n    # Array mask of wrong size:\n    with pytest.raises(ValueError):\n        array = np.array([[1, 2], [3, 4]])\n        modin_df[array] = 20\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        {},\n        {\"id\": [], \"max_speed\": [], \"health\": []},\n        {\"id\": [1], \"max_speed\": [2], \"health\": [3]},\n        {\"id\": [4, 40, 400], \"max_speed\": [111, 222, 333], \"health\": [33, 22, 11]},\n    ],\n    ids=[\"empty_frame\", \"empty_cols\", \"1_length_cols\", \"2_length_cols\"],\n)\n@pytest.mark.parametrize(\n    \"value\",\n    [[11, 22], [11, 22, 33]],\n    ids=[\"2_length_val\", \"3_length_val\"],\n)\n@pytest.mark.parametrize(\"convert_to_series\", [False, True])\n@pytest.mark.parametrize(\"new_col_id\", [123, \"new_col\"], ids=[\"integer\", \"string\"])\ndef test_setitem_on_empty_df(data, value, convert_to_series, new_col_id):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n\n    def applyier(df):\n        if convert_to_series:\n            converted_value = (\n                pandas.Series(value)\n                if isinstance(df, pandas.DataFrame)\n                else pd.Series(value)\n            )\n        else:\n            converted_value = value\n        df[new_col_id] = converted_value\n        return df\n\n    expected_exception = None\n    if not convert_to_series:\n        values_length = len(value)\n        index_length = len(pandas_df.index)\n        expected_exception = ValueError(\n            f\"Length of values ({values_length}) does not match length of index ({index_length})\"\n        )\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        applyier,\n        expected_exception=expected_exception,\n        __inplace__=True,\n    )\n    # Because of https://github.com/modin-project/modin/issues/7600,\n    # df_equals does not check dtypes equality for empty frames.\n    assert_dtypes_equal(modin_df, pandas_df)\n\n\ndef test_setitem_on_empty_df_4407():\n    data = {}\n    index = pd.date_range(end=\"1/1/2018\", periods=0, freq=\"D\")\n    column = pd.date_range(end=\"1/1/2018\", periods=1, freq=\"h\")[0]\n    modin_df = pd.DataFrame(data, columns=index)\n    pandas_df = pandas.DataFrame(data, columns=index)\n\n    modin_df[column] = pd.Series([1])\n    pandas_df[column] = pandas.Series([1])\n\n    df_equals(modin_df, pandas_df)\n    assert modin_df.columns.freq == pandas_df.columns.freq\n\n\ndef test_setitem_on_empty_df_does_not_change_other_dtypes_5961():\n    def _do_setitem(df):\n        df[\"col0\"] = df[\"col0\"].astype(float)\n\n    modin_df, pandas_df = create_test_dfs(pandas.DataFrame(columns=[\"col0\", \"col1\"]))\n\n    _do_setitem(modin_df)\n    _do_setitem(pandas_df)\n    # Because of  https://github.com/modin-project/modin/issues/7600, we cannot\n    # use df_equals to check dtypes equality.\n    assert_dtypes_equal(modin_df, pandas_df)\n\n\ndef test___setitem__unhashable_list():\n    # from #3258 and #3291\n    cols = [\"a\", \"b\"]\n    modin_df = pd.DataFrame([[0, 0]], columns=cols)\n    modin_df[cols] = modin_df[cols]\n    pandas_df = pandas.DataFrame([[0, 0]], columns=cols)\n    pandas_df[cols] = pandas_df[cols]\n    df_equals(modin_df, pandas_df)\n\n\ndef test_setitem_unhashable_key():\n    source_modin_df, source_pandas_df = create_test_dfs(test_data[\"float_nan_data\"])\n    row_count = source_modin_df.shape[0]\n\n    def _make_copy(df1, df2):\n        return df1.copy(deep=True), df2.copy(deep=True)\n\n    for key in ([\"col1\", \"col2\"], [\"new_col1\", \"new_col2\"]):\n        # 1d list case\n        value = [1, 2]\n        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)\n        eval_setitem(modin_df, pandas_df, value, key)\n\n        # 2d list case\n        value = [[1, 2]] * row_count\n        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)\n        eval_setitem(modin_df, pandas_df, value, key)\n\n        # pandas DataFrame case\n        df_value = pandas.DataFrame(value, columns=[\"value_col1\", \"value_col2\"])\n        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)\n        eval_setitem(modin_df, pandas_df, df_value, key)\n\n        # numpy array case\n        value = df_value.to_numpy()\n        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)\n        eval_setitem(modin_df, pandas_df, value, key)\n\n        # pandas Series case\n        value = df_value[\"value_col1\"]\n        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)\n        eval_setitem(\n            modin_df,\n            pandas_df,\n            value,\n            key[:1],\n            expected_exception=ValueError(\"Columns must be same length as key\"),\n        )\n\n        # pandas Index case\n        value = df_value.index\n        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)\n        eval_setitem(\n            modin_df,\n            pandas_df,\n            value,\n            key[:1],\n            expected_exception=ValueError(\"Columns must be same length as key\"),\n        )\n\n        # scalar case\n        value = 3\n        modin_df, pandas_df = _make_copy(source_modin_df, source_pandas_df)\n        eval_setitem(modin_df, pandas_df, value, key)\n\n        # test failed case: ValueError('Columns must be same length as key')\n        eval_setitem(\n            modin_df,\n            pandas_df,\n            df_value[[\"value_col1\"]],\n            key,\n            expected_exception=ValueError(\"Columns must be same length as key\"),\n        )\n\n\ndef test_setitem_2d_insertion():\n    def build_value_picker(modin_value, pandas_value):\n        \"\"\"Build a function that returns either Modin or pandas DataFrame depending on the passed frame.\"\"\"\n        return lambda source_df, *args, **kwargs: (\n            modin_value\n            if isinstance(source_df, (pd.DataFrame, pd.Series))\n            else pandas_value\n        )\n\n    modin_df, pandas_df = create_test_dfs(test_data[\"int_data\"])\n\n    # Easy case - key and value.columns are equal\n    modin_value, pandas_value = create_test_dfs(\n        {\"new_value1\": np.arange(len(modin_df)), \"new_value2\": np.arange(len(modin_df))}\n    )\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value, pandas_value),\n        col=[\"new_value1\", \"new_value2\"],\n    )\n\n    # Key and value.columns have equal values but in different order\n    new_columns = [\"new_value3\", \"new_value4\"]\n    modin_value.columns, pandas_value.columns = new_columns, new_columns\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value, pandas_value),\n        col=[\"new_value4\", \"new_value3\"],\n    )\n\n    # Key and value.columns have different values\n    new_columns = [\"new_value5\", \"new_value6\"]\n    modin_value.columns, pandas_value.columns = new_columns, new_columns\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value, pandas_value),\n        col=[\"__new_value5\", \"__new_value6\"],\n    )\n\n    # Key and value.columns have different lengths, testing that both raise the same exception\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value.iloc[:, [0]], pandas_value.iloc[:, [0]]),\n        col=[\"new_value7\", \"new_value8\"],\n        expected_exception=ValueError(\"Columns must be same length as key\"),\n    )\n\n\n@pytest.mark.parametrize(\"does_value_have_different_columns\", [True, False])\ndef test_setitem_2d_update(does_value_have_different_columns):\n    def test(dfs, iloc):\n        \"\"\"Update columns on the given numeric indices.\"\"\"\n        df1, df2 = dfs\n        cols1 = df1.columns[iloc].tolist()\n        cols2 = df2.columns[iloc].tolist()\n        df1[cols1] = df2[cols2]\n        return df1\n\n    modin_df, pandas_df = create_test_dfs(test_data[\"int_data\"])\n    modin_df2, pandas_df2 = create_test_dfs(test_data[\"int_data\"])\n    modin_df2 *= 10\n    pandas_df2 *= 10\n\n    if does_value_have_different_columns:\n        new_columns = [f\"{col}_new\" for col in modin_df.columns]\n        modin_df2.columns = new_columns\n        pandas_df2.columns = new_columns\n\n    modin_dfs = (modin_df, modin_df2)\n    pandas_dfs = (pandas_df, pandas_df2)\n\n    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 2])\n    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, -1])\n    eval_general(\n        modin_dfs, pandas_dfs, test, iloc=slice(1, None)\n    )  # (start=1, stop=None)\n    eval_general(\n        modin_dfs, pandas_dfs, test, iloc=slice(None, -2)\n    )  # (start=None, stop=-2)\n    eval_general(\n        modin_dfs,\n        pandas_dfs,\n        test,\n        iloc=[0, 1, 5, 6, 9, 10, -2, -1],\n    )\n    eval_general(\n        modin_dfs,\n        pandas_dfs,\n        test,\n        iloc=[5, 4, 0, 10, 1, -1],\n    )\n    eval_general(\n        modin_dfs, pandas_dfs, test, iloc=slice(None, None, 2)\n    )  # (start=None, stop=None, step=2)\n\n\ndef test___setitem__single_item_in_series():\n    # Test assigning a single item in a Series for issue\n    # https://github.com/modin-project/modin/issues/3860\n    modin_series = pd.Series(99)\n    pandas_series = pandas.Series(99)\n    modin_series[:1] = pd.Series(100)\n    pandas_series[:1] = pandas.Series(100)\n    df_equals(modin_series, pandas_series)\n\n\ndef test___setitem__assigning_single_categorical_sets_correct_dtypes():\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/3895\n    modin_df = pd.DataFrame({\"categories\": [\"A\"]})\n    modin_df[\"categories\"] = pd.Categorical([\"A\"])\n    pandas_df = pandas.DataFrame({\"categories\": [\"A\"]})\n    pandas_df[\"categories\"] = pandas.Categorical([\"A\"])\n    df_equals(modin_df, pandas_df)\n\n\ndef test_iloc_assigning_scalar_none_to_string_frame():\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/3981\n    data = [[\"A\"]]\n    modin_df = pd.DataFrame(data, dtype=\"string\")\n    modin_df.iloc[0, 0] = None\n    pandas_df = pandas.DataFrame(data, dtype=\"string\")\n    pandas_df.iloc[0, 0] = None\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\n    \"value\",\n    [\n        1,\n        np.int32(1),\n        1.0,\n        \"str val\",\n        pandas.Timestamp(\"1/4/2018\"),\n        np.datetime64(0, \"ms\"),\n        True,\n    ],\n)\ndef test_loc_boolean_assignment_scalar_dtypes(value):\n    modin_df, pandas_df = create_test_dfs(\n        {\n            \"a\": [1, 2, 3],\n            \"b\": [3.0, 5.0, 6.0],\n            \"c\": [\"a\", \"b\", \"c\"],\n            \"d\": [1.0, \"c\", 2.0],\n            \"e\": pandas.to_datetime([\"1/1/2018\", \"1/2/2018\", \"1/3/2018\"]),\n            \"f\": [True, False, True],\n        }\n    )\n    modin_idx, pandas_idx = pd.Series([False, True, True]), pandas.Series(\n        [False, True, True]\n    )\n\n    modin_df.loc[modin_idx] = value\n    pandas_df.loc[pandas_idx] = value\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___len__(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    assert len(modin_df) == len(pandas_df)\n\n\ndef test_index_order():\n    # see #1708 and #1869 for details\n    df_modin, df_pandas = (\n        pd.DataFrame(test_data[\"float_nan_data\"]),\n        pandas.DataFrame(test_data[\"float_nan_data\"]),\n    )\n    rows_number = len(df_modin.index)\n    level_0 = np.random.choice([x for x in range(10)], rows_number)\n    level_1 = np.random.choice([x for x in range(10)], rows_number)\n    index = pandas.MultiIndex.from_arrays([level_0, level_1])\n\n    df_modin.index = index\n    df_pandas.index = index\n\n    for func in [\"all\", \"any\", \"count\"]:\n        df_equals(\n            getattr(df_modin, func)().index,\n            getattr(df_pandas, func)().index,\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"sortorder\", [0, 3, 5])\ndef test_multiindex_from_frame(data, sortorder):\n    modin_df, pandas_df = create_test_dfs(data)\n\n    def call_from_frame(df):\n        if type(df).__module__.startswith(\"pandas\"):\n            return pandas.MultiIndex.from_frame(df, sortorder)\n        else:\n            return pd.MultiIndex.from_frame(df, sortorder)\n\n    eval_general(modin_df, pandas_df, call_from_frame, comparator=assert_index_equal)\n\n\ndef test__getitem_bool_single_row_dataframe():\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/4845\n    eval_general(pd, pandas, lambda lib: lib.DataFrame([1])[lib.Series([True])])\n\n\ndef test__getitem_bool_with_empty_partition():\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/5188\n\n    size = MinRowPartitionSize.get()\n\n    pandas_series = pandas.Series([True if i % 2 else False for i in range(size)])\n    modin_series = pd.Series(pandas_series)\n\n    pandas_df = pandas.DataFrame([i for i in range(size + 1)])\n    pandas_df.iloc[size] = np.nan\n    modin_df = pd.DataFrame(pandas_df)\n\n    pandas_tmp_result = pandas_df.dropna()\n    modin_tmp_result = modin_df.dropna()\n\n    eval_general(\n        modin_tmp_result,\n        pandas_tmp_result,\n        lambda df: (\n            df[modin_series] if isinstance(df, pd.DataFrame) else df[pandas_series]\n        ),\n    )\n\n\n# This is a very subtle bug that comes from:\n# https://github.com/modin-project/modin/issues/4945\ndef test_lazy_eval_index():\n    modin_df, pandas_df = create_test_dfs({\"col0\": [0, 1]})\n\n    def func(df):\n        df_copy = df[df[\"col0\"] < 6].copy()\n        # The problem here is that the index is not copied over so it needs\n        # to get recomputed at some point. Our implementation of __setitem__\n        # requires us to build a mask and insert the value from the right\n        # handside into the new DataFrame. However, it's possible that we\n        # won't have any new partitions, so we will end up computing an empty\n        # index.\n        df_copy[\"col0\"] = df_copy[\"col0\"].apply(lambda x: x + 1)\n        return df_copy\n\n    eval_general(modin_df, pandas_df, func)\n\n\ndef test_index_of_empty_frame():\n    # Test on an empty frame created by user\n    md_df, pd_df = create_test_dfs(\n        {}, index=pandas.Index([], name=\"index name\"), columns=[\"a\", \"b\"]\n    )\n    assert md_df.empty and pd_df.empty\n    df_equals(md_df.index, pd_df.index)\n\n    # Test on an empty frame produced by Modin's logic\n    data = test_data_values[0]\n    md_df, pd_df = create_test_dfs(\n        data, index=pandas.RangeIndex(len(next(iter(data.values()))), name=\"index name\")\n    )\n\n    md_res = md_df.query(f\"{md_df.columns[0]} > {RAND_HIGH}\")\n    pd_res = pd_df.query(f\"{pd_df.columns[0]} > {RAND_HIGH}\")\n\n    assert md_res.empty and pd_res.empty\n    df_equals(md_res.index, pd_res.index)\n\n\n# https://github.com/modin-project/modin/issues/7405\n@pytest.mark.parametrize(\"indexer\", [\"loc\", \"iloc\"])\ndef test_loc_and_iloc_set_order(indexer):\n    rng = np.random.default_rng(seed=0)\n    is_loc = indexer == \"loc\"\n    data = {\"col\": rng.integers(0, 100, size=100)}\n    set_count = 20\n    # Pick a bunch of unsorted row indices; may contain repeat values.\n    row_indexer = rng.integers(0, 100, size=set_count)\n    col_indexer = \"col\" if is_loc else 0\n    set_data = range(100, 100 + set_count)\n    md_df, pd_df = create_test_dfs(data)\n\n    def get_helper(df):\n        if is_loc:\n            return df.loc[row_indexer, col_indexer]\n        else:\n            return df.iloc[row_indexer, col_indexer]\n\n    # First, ensure loc/iloc read succeeds.\n    eval_general(md_df, pd_df, get_helper)\n\n    def set_helper(df):\n        if is_loc:\n            df.loc[row_indexer, col_indexer] = set_data\n        else:\n            df.iloc[row_indexer, col_indexer] = set_data\n\n    # Second, check results of loc/iloc write.\n    eval_general(\n        md_df,\n        pd_df,\n        set_helper,\n        __inplace__=True,\n    )\n    # Finally, check the result of a loc/iloc read again.\n    eval_general(md_df, pd_df, get_helper)\n\n\ndef test_iloc_set_negative_index():\n    rng = np.random.default_rng(seed=0)\n    row_count = 50\n    col_count = 80\n    data = {f\"col_{i}\": rng.integers(0, 100, size=row_count) for i in range(col_count)}\n    row_set_count = 20\n    col_set_count = 30\n    # Pick a bunch of unsorted row indices; may contain repeat values and negative numbers.\n    row_indexer = rng.integers(-row_count, row_count, size=row_set_count)\n    col_indexer = rng.integers(-col_count, col_count, size=col_set_count)\n    set_data = np.reshape(\n        range(100, 100 + row_set_count * col_set_count), (row_set_count, col_set_count)\n    )\n    md_df, pd_df = create_test_dfs(data)\n\n    def get_helper(df):\n        return df.iloc[row_indexer, col_indexer]\n\n    # First, ensure loc/iloc read succeeds.\n    eval_general(md_df, pd_df, get_helper)\n\n    def set_helper(df):\n        df.iloc[row_indexer, col_indexer] = set_data\n\n    # Second, check results of loc/iloc write.\n    eval_general(\n        md_df,\n        pd_df,\n        set_helper,\n        __inplace__=True,\n    )\n    # Finally, check the result of a loc/iloc read again.\n    eval_general(md_df, pd_df, get_helper)\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_iter.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport io\nimport warnings\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.pandas.utils import SET_DATAFRAME_ATTRIBUTE_WARNING\nfrom modin.tests.pandas.utils import (\n    RAND_HIGH,\n    RAND_LOW,\n    create_test_dfs,\n    df_equals,\n    eval_general,\n    random_state,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    warns_that_defaulting_to_pandas_if,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n\n@pytest.mark.parametrize(\"method\", [\"items\", \"iterrows\"])\ndef test_items_iterrows(method):\n    data = test_data[\"float_nan_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    for modin_item, pandas_item in zip(\n        getattr(modin_df, method)(), getattr(pandas_df, method)()\n    ):\n        modin_index, modin_series = modin_item\n        pandas_index, pandas_series = pandas_item\n        df_equals(pandas_series, modin_series)\n        assert pandas_index == modin_index\n\n\n@pytest.mark.parametrize(\"name\", [None, \"NotPandas\"])\ndef test_itertuples_name(name):\n    data = test_data[\"float_nan_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    modin_it_custom = modin_df.itertuples(name=name)\n    pandas_it_custom = pandas_df.itertuples(name=name)\n    for modin_row, pandas_row in zip(modin_it_custom, pandas_it_custom):\n        np.testing.assert_equal(modin_row, pandas_row)\n\n\ndef test_itertuples_multiindex():\n    data = test_data[\"int_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    new_idx = pd.MultiIndex.from_tuples(\n        [(i // 4, i // 2, i) for i in range(len(modin_df.columns))]\n    )\n    modin_df.columns = new_idx\n    pandas_df.columns = new_idx\n    modin_it_custom = modin_df.itertuples()\n    pandas_it_custom = pandas_df.itertuples()\n    for modin_row, pandas_row in zip(modin_it_custom, pandas_it_custom):\n        np.testing.assert_equal(modin_row, pandas_row)\n\n\ndef test___iter__():\n    modin_df = pd.DataFrame(test_data_values[0])\n    pandas_df = pandas.DataFrame(test_data_values[0])\n\n    modin_iterator = modin_df.__iter__()\n\n    # Check that modin_iterator implements the iterator interface\n    assert hasattr(modin_iterator, \"__iter__\")\n    assert hasattr(modin_iterator, \"next\") or hasattr(modin_iterator, \"__next__\")\n\n    pd_iterator = pandas_df.__iter__()\n    assert list(modin_iterator) == list(pd_iterator)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___contains__(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    result = False\n    key = \"Not Exist\"\n    assert result == modin_df.__contains__(key)\n    assert result == (key in modin_df)\n\n    if \"empty_data\" not in request.node.name:\n        result = True\n        key = pandas_df.columns[0]\n        assert result == modin_df.__contains__(key)\n        assert result == (key in modin_df)\n\n\n@pytest.mark.parametrize(\"expand_frame_repr\", [False, True])\n@pytest.mark.parametrize(\n    \"max_rows_columns\",\n    [(5, 5), (10, 10), (50, 50), (51, 51), (52, 52), (75, 75), (None, None)],\n)\n@pytest.mark.parametrize(\"frame_size\", [101, 102])\ndef test_display_options_for___repr__(max_rows_columns, expand_frame_repr, frame_size):\n    frame_data = random_state.randint(\n        RAND_LOW, RAND_HIGH, size=(frame_size, frame_size)\n    )\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    context_arg = [\n        \"display.max_rows\",\n        max_rows_columns[0],\n        \"display.max_columns\",\n        max_rows_columns[1],\n        \"display.expand_frame_repr\",\n        expand_frame_repr,\n    ]\n    with pd.option_context(*context_arg):\n        modin_df_repr = repr(modin_df)\n    with pandas.option_context(*context_arg):\n        pandas_df_repr = repr(pandas_df)\n    assert modin_df_repr == pandas_df_repr\n\n\ndef test___finalize__():\n    data = test_data_values[0]\n    # NOTE: __finalize__() defaults to pandas at the API layer.\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.DataFrame(data).__finalize__(None)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___copy__(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    modin_df_copy, pandas_df_copy = modin_df.__copy__(), pandas_df.__copy__()\n    df_equals(modin_df_copy, pandas_df_copy)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___deepcopy__(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    modin_df_copy, pandas_df_copy = (\n        modin_df.__deepcopy__(),\n        pandas_df.__deepcopy__(),\n    )\n    df_equals(modin_df_copy, pandas_df_copy)\n\n\ndef test___repr__():\n    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 100))\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    assert repr(pandas_df) == repr(modin_df)\n\n    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 99))\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    assert repr(pandas_df) == repr(modin_df)\n\n    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 101))\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    assert repr(pandas_df) == repr(modin_df)\n\n    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(1000, 102))\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    assert repr(pandas_df) == repr(modin_df)\n\n    # ___repr___ method has a different code path depending on\n    # whether the number of rows is >60; and a different code path\n    # depending on the number of columns is >20.\n    # Previous test cases already check the case when cols>20\n    # and rows>60. The cases that follow exercise the other three\n    # combinations.\n    # rows <= 60, cols > 20\n    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(10, 100))\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    assert repr(pandas_df) == repr(modin_df)\n\n    # rows <= 60, cols <= 20\n    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(10, 10))\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    assert repr(pandas_df) == repr(modin_df)\n\n    # rows > 60, cols <= 20\n    frame_data = random_state.randint(RAND_LOW, RAND_HIGH, size=(100, 10))\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    assert repr(pandas_df) == repr(modin_df)\n\n    # Empty\n    pandas_df = pandas.DataFrame(columns=[\"col{}\".format(i) for i in range(100)])\n    modin_df = pd.DataFrame(columns=[\"col{}\".format(i) for i in range(100)])\n\n    assert repr(pandas_df) == repr(modin_df)\n\n    # From Issue #1705\n    string_data = \"\"\"\"time\",\"device_id\",\"lat\",\"lng\",\"accuracy\",\"activity_1\",\"activity_1_conf\",\"activity_2\",\"activity_2_conf\",\"activity_3\",\"activity_3_conf\"\n\"2016-08-26 09:00:00.206\",2,60.186805,24.821049,33.6080017089844,\"STILL\",75,\"IN_VEHICLE\",5,\"ON_BICYCLE\",5\n\"2016-08-26 09:00:05.428\",5,60.192928,24.767222,5,\"WALKING\",62,\"ON_BICYCLE\",29,\"RUNNING\",6\n\"2016-08-26 09:00:05.818\",1,60.166382,24.700443,3,\"WALKING\",75,\"IN_VEHICLE\",5,\"ON_BICYCLE\",5\n\"2016-08-26 09:00:15.816\",1,60.166254,24.700671,3,\"WALKING\",75,\"IN_VEHICLE\",5,\"ON_BICYCLE\",5\n\"2016-08-26 09:00:16.413\",5,60.193055,24.767427,5,\"WALKING\",85,\"ON_BICYCLE\",15,\"UNKNOWN\",0\n\"2016-08-26 09:00:20.578\",3,60.152996,24.745216,3.90000009536743,\"STILL\",69,\"IN_VEHICLE\",31,\"UNKNOWN\",0\"\"\"\n    pandas_df = pandas.read_csv(io.StringIO(string_data))\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_df = pd.read_csv(io.StringIO(string_data))\n    assert repr(pandas_df) == repr(modin_df)\n\n\ndef test___repr__does_not_raise_attribute_column_warning():\n    # See https://github.com/modin-project/modin/issues/5380\n    df = pd.DataFrame([1])\n    with warnings.catch_warnings():\n        warnings.filterwarnings(action=\"error\", message=SET_DATAFRAME_ATTRIBUTE_WARNING)\n        repr(df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_inplace_series_ops(data):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n\n    if len(modin_df.columns) > len(pandas_df.columns):\n        col0 = modin_df.columns[0]\n        col1 = modin_df.columns[1]\n        pandas_df[col1].dropna(inplace=True)\n        modin_df[col1].dropna(inplace=True)\n        df_equals(modin_df, pandas_df)\n\n        pandas_df[col0].fillna(0, inplace=True)\n        modin_df[col0].fillna(0, inplace=True)\n        df_equals(modin_df, pandas_df)\n\n\n# Note: Tests setting an attribute that is not an existing column label\ndef test___setattr__not_column():\n    pandas_df = pandas.DataFrame([1, 2, 3])\n    modin_df = pd.DataFrame([1, 2, 3])\n\n    pandas_df.new_col = [4, 5, 6]\n    modin_df.new_col = [4, 5, 6]\n\n    df_equals(modin_df, pandas_df)\n\n    # While `new_col` is not a column of the dataframe,\n    # it should be accessible with __getattr__.\n    assert modin_df.new_col == pandas_df.new_col\n\n\ndef test___setattr__mutating_column():\n    # Use case from issue #4577\n    pandas_df = pandas.DataFrame([[1]], columns=[\"col0\"])\n    modin_df = pd.DataFrame([[1]], columns=[\"col0\"])\n\n    # Replacing a column with a list should mutate the column in place.\n    pandas_df.col0 = [3]\n    modin_df.col0 = [3]\n\n    df_equals(modin_df, pandas_df)\n    # Check that the col0 attribute reflects the value update.\n    df_equals(modin_df.col0, pandas_df.col0)\n\n    pandas_df.col0 = pandas.Series([5])\n    modin_df.col0 = pd.Series([5])\n\n    # Check that the col0 attribute reflects this update\n    df_equals(modin_df, pandas_df)\n\n    pandas_df.loc[0, \"col0\"] = 4\n    modin_df.loc[0, \"col0\"] = 4\n\n    # Check that the col0 attribute reflects update via loc\n    df_equals(modin_df, pandas_df)\n    assert modin_df.col0.equals(modin_df[\"col0\"])\n\n    # Check that attempting to add a new col via attributes raises warning\n    # and adds the provided list as a new attribute and not a column.\n    with pytest.warns(\n        UserWarning,\n        match=SET_DATAFRAME_ATTRIBUTE_WARNING,\n    ):\n        modin_df.col1 = [4]\n\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\n            action=\"error\",\n            message=SET_DATAFRAME_ATTRIBUTE_WARNING,\n        )\n        modin_df.col1 = [5]\n        modin_df.new_attr = 6\n        modin_df.col0 = 7\n\n    assert \"new_attr\" in dir(\n        modin_df\n    ), \"Modin attribute was not correctly added to the df.\"\n    assert (\n        \"new_attr\" not in modin_df\n    ), \"New attribute was not correctly added to columns.\"\n    assert modin_df.new_attr == 6, \"Modin attribute value was set incorrectly.\"\n    assert isinstance(\n        modin_df.col0, pd.Series\n    ), \"Scalar was not broadcasted properly to an existing column.\"\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_isin(data):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n\n    val = [1, 2, 3, 4]\n    pandas_result = pandas_df.isin(val)\n    modin_result = modin_df.isin(val)\n\n    df_equals(modin_result, pandas_result)\n\n\ndef test_isin_with_modin_objects():\n    modin_df1, pandas_df1 = create_test_dfs({\"a\": [1, 2], \"b\": [3, 4]})\n    modin_series, pandas_series = pd.Series([1, 4, 5, 6]), pandas.Series([1, 4, 5, 6])\n\n    eval_general(\n        (modin_df1, modin_series),\n        (pandas_df1, pandas_series),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n\n    modin_df2 = modin_series.to_frame(\"a\")\n    pandas_df2 = pandas_series.to_frame(\"a\")\n\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n\n    # Check case when indices are not matching\n    modin_df1, pandas_df1 = create_test_dfs({\"a\": [1, 2], \"b\": [3, 4]}, index=[10, 11])\n\n    eval_general(\n        (modin_df1, modin_series),\n        (pandas_df1, pandas_series),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_join_sort.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport warnings\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Engine, NPartitions, StorageFormat\nfrom modin.pandas.io import to_pandas\nfrom modin.tests.pandas.utils import (\n    arg_keys,\n    axis_keys,\n    axis_values,\n    bool_arg_keys,\n    bool_arg_values,\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    generate_multiindex,\n    random_state,\n    rotate_decimal_digits_or_symbols,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\nfrom modin.tests.test_utils import (\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n# Initialize env for storage format detection in @pytest.mark.*\npd.DataFrame()\n\n\ndef df_equals_and_sort(df1, df2):\n    \"\"\"Sort dataframe's rows and run ``df_equals()`` for them.\"\"\"\n    df1 = df1.sort_values(by=df1.columns.tolist(), ignore_index=True)\n    df2 = df2.sort_values(by=df2.columns.tolist(), ignore_index=True)\n    df_equals(df1, df2)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_combine(data):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n\n    modin_df.combine(modin_df + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2)\n    pandas_df.combine(\n        pandas_df + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2\n    )\n\n\n@pytest.mark.parametrize(\n    \"test_data, test_data2\",\n    [\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(128, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(128, 64)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(64, 128)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 128)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n    ],\n)\ndef test_join(test_data, test_data2):\n    modin_df = pd.DataFrame(\n        test_data,\n        columns=[\"col{}\".format(i) for i in range(test_data.shape[1])],\n        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name=\"key\"),\n    )\n    pandas_df = pandas.DataFrame(\n        test_data,\n        columns=[\"col{}\".format(i) for i in range(test_data.shape[1])],\n        index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)], name=\"key\"),\n    )\n    modin_df2 = pd.DataFrame(\n        test_data2,\n        columns=[\"col{}\".format(i) for i in range(test_data2.shape[1])],\n        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name=\"key\"),\n    )\n    pandas_df2 = pandas.DataFrame(\n        test_data2,\n        columns=[\"col{}\".format(i) for i in range(test_data2.shape[1])],\n        index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)], name=\"key\"),\n    )\n\n    hows = [\"inner\", \"left\", \"right\", \"outer\"]\n    ons = [\"col33\", \"col34\"]\n    sorts = [False, True]\n    assert len(ons) == len(sorts), \"the loop below is designed for this condition\"\n    for i in range(len(hows)):\n        for j in range(len(ons)):\n            modin_result = modin_df.join(\n                modin_df2,\n                how=hows[i],\n                on=ons[j],\n                sort=sorts[j],\n                lsuffix=\"_caller\",\n                rsuffix=\"_other\",\n            )\n            pandas_result = pandas_df.join(\n                pandas_df2,\n                how=hows[i],\n                on=ons[j],\n                sort=sorts[j],\n                lsuffix=\"_caller\",\n                rsuffix=\"_other\",\n            )\n            if sorts[j]:\n                # sorting in `join` is implemented through range partitioning technique\n                # therefore the order of the rows after it does not match the pandas,\n                # so additional sorting is needed in order to get the same result as for pandas\n                df_equals_and_sort(modin_result, pandas_result)\n            else:\n                df_equals(modin_result, pandas_result)\n\n    frame_data = {\n        \"col1\": [0, 1, 2, 3],\n        \"col2\": [4, 5, 6, 7],\n        \"col3\": [8, 9, 0, 1],\n        \"col4\": [2, 4, 5, 6],\n    }\n\n    modin_df = pd.DataFrame(frame_data)\n    pandas_df = pandas.DataFrame(frame_data)\n\n    frame_data2 = {\"col5\": [0], \"col6\": [1]}\n    modin_df2 = pd.DataFrame(frame_data2)\n    pandas_df2 = pandas.DataFrame(frame_data2)\n\n    join_types = [\"left\", \"right\", \"outer\", \"inner\"]\n    for how in join_types:\n        modin_join = modin_df.join(modin_df2, how=how)\n        pandas_join = pandas_df.join(pandas_df2, how=how)\n        df_equals(modin_join, pandas_join)\n\n    frame_data3 = {\"col7\": [1, 2, 3, 5, 6, 7, 8]}\n\n    modin_df3 = pd.DataFrame(frame_data3)\n    pandas_df3 = pandas.DataFrame(frame_data3)\n\n    join_types = [\"left\", \"outer\", \"inner\"]\n    for how in join_types:\n        modin_join = modin_df.join([modin_df2, modin_df3], how=how)\n        pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)\n        df_equals(modin_join, pandas_join)\n\n\n@pytest.mark.parametrize(\"how\", [\"left\", \"inner\", \"right\"])\ndef test_join_empty(how):\n    data = np.random.randint(0, 100, size=(64, 64))\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.join(df.iloc[:0], on=1, how=how, lsuffix=\"_caller\"),\n    )\n\n\ndef test_join_cross_6786():\n    data = [[7, 8, 9], [10, 11, 12]]\n    modin_df, pandas_df = create_test_dfs(data, columns=[\"x\", \"y\", \"z\"])\n\n    modin_join = modin_df.join(\n        modin_df[[\"x\"]].set_axis([\"p\", \"q\"], axis=0), how=\"cross\", lsuffix=\"p\"\n    )\n    pandas_join = pandas_df.join(\n        pandas_df[[\"x\"]].set_axis([\"p\", \"q\"], axis=0), how=\"cross\", lsuffix=\"p\"\n    )\n    df_equals(modin_join, pandas_join)\n\n\ndef test_join_5203():\n    data = np.ones([2, 4])\n    kwargs = {\"columns\": [\"a\", \"b\", \"c\", \"d\"]}\n    modin_dfs, pandas_dfs = [None] * 3, [None] * 3\n    for idx in range(len(modin_dfs)):\n        modin_dfs[idx], pandas_dfs[idx] = create_test_dfs(data, **kwargs)\n\n    for dfs in (modin_dfs, pandas_dfs):\n        with pytest.raises(\n            ValueError,\n            match=\"Joining multiple DataFrames only supported for joining on index\",\n        ):\n            dfs[0].join([dfs[1], dfs[2]], how=\"inner\", on=\"a\")\n\n\ndef test_join_6602():\n    abbreviations = pd.Series(\n        [\"Major League Baseball\", \"National Basketball Association\"],\n        index=[\"MLB\", \"NBA\"],\n    )\n    teams = pd.DataFrame(\n        {\n            \"name\": [\"Mariners\", \"Lakers\"] * 50,\n            \"league_abbreviation\": [\"MLB\", \"NBA\"] * 50,\n        }\n    )\n\n    with warnings.catch_warnings():\n        # check that join doesn't show UserWarning\n        warnings.filterwarnings(\n            \"error\", \"Distributing <class 'dict'> object\", category=UserWarning\n        )\n        teams.set_index(\"league_abbreviation\").join(abbreviations.rename(\"league_name\"))\n\n\n@pytest.mark.parametrize(\n    \"test_data, test_data2\",\n    [\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(128, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(128, 64)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(64, 128)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 128)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n    ],\n)\ndef test_merge(test_data, test_data2):\n    modin_df = pd.DataFrame(\n        test_data,\n        columns=[\"col{}\".format(i) for i in range(test_data.shape[1])],\n        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name=\"key\"),\n    )\n    pandas_df = pandas.DataFrame(\n        test_data,\n        columns=[\"col{}\".format(i) for i in range(test_data.shape[1])],\n        index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)], name=\"key\"),\n    )\n    modin_df2 = pd.DataFrame(\n        test_data2,\n        columns=[\"col{}\".format(i) for i in range(test_data2.shape[1])],\n        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name=\"key\"),\n    )\n    pandas_df2 = pandas.DataFrame(\n        test_data2,\n        columns=[\"col{}\".format(i) for i in range(test_data2.shape[1])],\n        index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)], name=\"key\"),\n    )\n\n    hows = [\"left\", \"inner\", \"right\"]\n    ons = [\"col33\", [\"col33\", \"col34\"]]\n    sorts = [False, True]\n    assert len(ons) == len(sorts), \"the loop below is designed for this condition\"\n    for i in range(len(hows)):\n        for j in range(len(ons)):\n            modin_result = modin_df.merge(\n                modin_df2, how=hows[i], on=ons[j], sort=sorts[j]\n            )\n            pandas_result = pandas_df.merge(\n                pandas_df2, how=hows[i], on=ons[j], sort=sorts[j]\n            )\n            # FIXME: https://github.com/modin-project/modin/issues/2246\n            df_equals_and_sort(modin_result, pandas_result)\n\n            modin_result = modin_df.merge(\n                modin_df2,\n                how=hows[i],\n                left_on=\"key\",\n                right_on=\"key\",\n                sort=sorts[j],\n            )\n            pandas_result = pandas_df.merge(\n                pandas_df2,\n                how=hows[i],\n                left_on=\"key\",\n                right_on=\"key\",\n                sort=sorts[j],\n            )\n            # FIXME: https://github.com/modin-project/modin/issues/2246\n            df_equals_and_sort(modin_result, pandas_result)\n\n    # Test for issue #1771\n    modin_df = pd.DataFrame({\"name\": np.arange(40)})\n    modin_df2 = pd.DataFrame({\"name\": [39], \"position\": [0]})\n    pandas_df = pandas.DataFrame({\"name\": np.arange(40)})\n    pandas_df2 = pandas.DataFrame({\"name\": [39], \"position\": [0]})\n    modin_result = modin_df.merge(modin_df2, on=\"name\", how=\"inner\")\n    pandas_result = pandas_df.merge(pandas_df2, on=\"name\", how=\"inner\")\n    # FIXME: https://github.com/modin-project/modin/issues/2246\n    df_equals_and_sort(modin_result, pandas_result)\n\n    frame_data = {\n        \"col1\": [0, 1, 2, 3],\n        \"col2\": [4, 5, 6, 7],\n        \"col3\": [8, 9, 0, 1],\n        \"col4\": [2, 4, 5, 6],\n    }\n\n    modin_df = pd.DataFrame(frame_data)\n    pandas_df = pandas.DataFrame(frame_data)\n\n    frame_data2 = {\"col1\": [0, 1, 2], \"col2\": [1, 5, 6]}\n    modin_df2 = pd.DataFrame(frame_data2)\n    pandas_df2 = pandas.DataFrame(frame_data2)\n\n    join_types = [\"outer\", \"inner\"]\n    for how in join_types:\n        # Defaults\n        modin_result = modin_df.merge(modin_df2, how=how)\n        pandas_result = pandas_df.merge(pandas_df2, how=how)\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        df_equals_and_sort(modin_result, pandas_result)\n\n        # left_on and right_index\n        modin_result = modin_df.merge(\n            modin_df2, how=how, left_on=\"col1\", right_index=True\n        )\n        pandas_result = pandas_df.merge(\n            pandas_df2, how=how, left_on=\"col1\", right_index=True\n        )\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        df_equals_and_sort(modin_result, pandas_result)\n\n        # left_index and right_on\n        modin_result = modin_df.merge(\n            modin_df2, how=how, left_index=True, right_on=\"col1\"\n        )\n        pandas_result = pandas_df.merge(\n            pandas_df2, how=how, left_index=True, right_on=\"col1\"\n        )\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        df_equals_and_sort(modin_result, pandas_result)\n\n        # left_on and right_on col1\n        modin_result = modin_df.merge(\n            modin_df2, how=how, left_on=\"col1\", right_on=\"col1\"\n        )\n        pandas_result = pandas_df.merge(\n            pandas_df2, how=how, left_on=\"col1\", right_on=\"col1\"\n        )\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        df_equals_and_sort(modin_result, pandas_result)\n\n        # left_on and right_on col2\n        modin_result = modin_df.merge(\n            modin_df2, how=how, left_on=\"col2\", right_on=\"col2\"\n        )\n        pandas_result = pandas_df.merge(\n            pandas_df2, how=how, left_on=\"col2\", right_on=\"col2\"\n        )\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        df_equals_and_sort(modin_result, pandas_result)\n\n        # left_index and right_index\n        modin_result = modin_df.merge(\n            modin_df2, how=how, left_index=True, right_index=True\n        )\n        pandas_result = pandas_df.merge(\n            pandas_df2, how=how, left_index=True, right_index=True\n        )\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        df_equals_and_sort(modin_result, pandas_result)\n\n    # Cannot merge a Series without a name\n    ps = pandas.Series(frame_data2.get(\"col1\"))\n    ms = pd.Series(frame_data2.get(\"col1\"))\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.merge(ms if isinstance(df, pd.DataFrame) else ps),\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        comparator=df_equals_and_sort,\n        expected_exception=ValueError(\"Cannot merge a Series without a name\"),\n    )\n\n    # merge a Series with a name\n    ps = pandas.Series(frame_data2.get(\"col1\"), name=\"col1\")\n    ms = pd.Series(frame_data2.get(\"col1\"), name=\"col1\")\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.merge(ms if isinstance(df, pd.DataFrame) else ps),\n        # FIXME: https://github.com/modin-project/modin/issues/2246\n        comparator=df_equals_and_sort,\n    )\n\n    with pytest.raises(TypeError):\n        modin_df.merge(\"Non-valid type\")\n\n\n@pytest.mark.parametrize(\"how\", [\"left\", \"inner\", \"right\"])\ndef test_merge_empty(how):\n    data = np.random.randint(0, 100, size=(64, 64))\n    eval_general(*create_test_dfs(data), lambda df: df.merge(df.iloc[:0], how=how))\n\n\ndef test_merge_with_mi_columns():\n    modin_df1, pandas_df1 = create_test_dfs(\n        {\n            (\"col0\", \"a\"): [1, 2, 3, 4],\n            (\"col0\", \"b\"): [2, 3, 4, 5],\n            (\"col1\", \"a\"): [3, 4, 5, 6],\n        }\n    )\n\n    modin_df2, pandas_df2 = create_test_dfs(\n        {\n            (\"col0\", \"a\"): [1, 2, 3, 4],\n            (\"col0\", \"c\"): [2, 3, 4, 5],\n            (\"col1\", \"a\"): [3, 4, 5, 6],\n        }\n    )\n\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda dfs: dfs[0].merge(dfs[1], on=[(\"col0\", \"a\")]),\n    )\n\n\n@pytest.mark.parametrize(\"has_index_cache\", [True, False])\ndef test_merge_on_index(has_index_cache):\n    modin_df1, pandas_df1 = create_test_dfs(\n        {\n            \"idx_key1\": [1, 2, 3, 4],\n            \"idx_key2\": [2, 3, 4, 5],\n            \"idx_key3\": [3, 4, 5, 6],\n            \"data_col1\": [10, 2, 3, 4],\n            \"col_key1\": [3, 4, 5, 6],\n            \"col_key2\": [3, 4, 5, 6],\n        }\n    )\n\n    modin_df1 = modin_df1.set_index([\"idx_key1\", \"idx_key2\"])\n    pandas_df1 = pandas_df1.set_index([\"idx_key1\", \"idx_key2\"])\n\n    modin_df2, pandas_df2 = create_test_dfs(\n        {\n            \"idx_key1\": [4, 3, 2, 1],\n            \"idx_key2\": [5, 4, 3, 2],\n            \"idx_key3\": [6, 5, 4, 3],\n            \"data_col2\": [10, 2, 3, 4],\n            \"col_key1\": [6, 5, 4, 3],\n            \"col_key2\": [6, 5, 4, 3],\n        }\n    )\n\n    modin_df2 = modin_df2.set_index([\"idx_key2\", \"idx_key3\"])\n    pandas_df2 = pandas_df2.set_index([\"idx_key2\", \"idx_key3\"])\n\n    def setup_cache():\n        if has_index_cache:\n            modin_df1.index  # triggering index materialization\n            modin_df2.index\n            assert modin_df1._query_compiler.frame_has_index_cache\n            assert modin_df2._query_compiler.frame_has_index_cache\n        else:\n            # Propagate deferred indices to partitions\n            # The change in index is not automatically handled by Modin. See #3941.\n            modin_df1.index = modin_df1.index\n            modin_df1._to_pandas()\n            modin_df1._query_compiler.set_frame_index_cache(None)\n            modin_df2.index = modin_df2.index\n            modin_df2._to_pandas()\n            modin_df2._query_compiler.set_frame_index_cache(None)\n\n    for on in (\n        [\"col_key1\", \"idx_key1\"],\n        [\"col_key1\", \"idx_key2\"],\n        [\"col_key1\", \"idx_key3\"],\n        [\"idx_key1\"],\n        [\"idx_key2\"],\n        [\"idx_key3\"],\n    ):\n        setup_cache()\n        eval_general(\n            (modin_df1, modin_df2),\n            (pandas_df1, pandas_df2),\n            lambda dfs: dfs[0].merge(dfs[1], on=on),\n        )\n\n    for left_on, right_on in (\n        ([\"idx_key1\"], [\"col_key1\"]),\n        ([\"col_key1\"], [\"idx_key3\"]),\n        ([\"idx_key1\"], [\"idx_key3\"]),\n        ([\"idx_key2\"], [\"idx_key2\"]),\n        ([\"col_key1\", \"idx_key2\"], [\"col_key2\", \"idx_key2\"]),\n    ):\n        setup_cache()\n        eval_general(\n            (modin_df1, modin_df2),\n            (pandas_df1, pandas_df2),\n            lambda dfs: dfs[0].merge(dfs[1], left_on=left_on, right_on=right_on),\n        )\n\n\n@pytest.mark.parametrize(\n    \"left_index\", [[], [\"key\"], [\"key\", \"b\"], [\"key\", \"b\", \"c\"], [\"b\"], [\"b\", \"c\"]]\n)\n@pytest.mark.parametrize(\n    \"right_index\", [[], [\"key\"], [\"key\", \"e\"], [\"key\", \"e\", \"f\"], [\"e\"], [\"e\", \"f\"]]\n)\ndef test_merge_on_single_index(left_index, right_index):\n    \"\"\"\n    Test ``.merge()`` method when merging on a single column, that is located in an index level of one of the frames.\n    \"\"\"\n    modin_df1, pandas_df1 = create_test_dfs(\n        {\"b\": [3, 4, 4, 5], \"key\": [1, 1, 2, 2], \"c\": [2, 3, 2, 2], \"d\": [2, 1, 3, 1]}\n    )\n    if len(left_index):\n        modin_df1 = modin_df1.set_index(left_index)\n        pandas_df1 = pandas_df1.set_index(left_index)\n\n    modin_df2, pandas_df2 = create_test_dfs(\n        {\"e\": [3, 4, 4, 5], \"f\": [2, 3, 2, 2], \"key\": [1, 1, 2, 2], \"h\": [2, 1, 3, 1]}\n    )\n    if len(right_index):\n        modin_df2 = modin_df2.set_index(right_index)\n        pandas_df2 = pandas_df2.set_index(right_index)\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda dfs: dfs[0].merge(dfs[1], on=\"key\"),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"ascending\", [False, True])\n@pytest.mark.parametrize(\"na_position\", [\"first\", \"last\"], ids=[\"first\", \"last\"])\ndef test_sort_index(axis, ascending, na_position):\n    data = test_data[\"float_nan_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    # Change index value so sorting will actually make a difference\n    if axis == 0:\n        length = len(modin_df.index)\n        for df in [modin_df, pandas_df]:\n            df.index = [(i - length / 2) % length for i in range(length)]\n\n    dfs = [modin_df, pandas_df]\n    # Add NaNs to sorted index\n    for idx in range(len(dfs)):\n        sort_index = dfs[idx].axes[axis]\n        dfs[idx] = dfs[idx].set_axis(\n            [np.nan if i % 2 == 0 else sort_index[i] for i in range(len(sort_index))],\n            axis=axis,\n            copy=False,\n        )\n    modin_df, pandas_df = dfs\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.sort_index(\n            axis=axis, ascending=ascending, na_position=na_position\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\ndef test_sort_index_inplace(axis):\n    data = test_data[\"int_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    for df in [modin_df, pandas_df]:\n        df.sort_index(axis=axis, inplace=True)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\n    \"sort_remaining\", bool_arg_values, ids=arg_keys(\"sort_remaining\", bool_arg_keys)\n)\ndef test_sort_multiindex(sort_remaining):\n    data = test_data[\"int_data\"]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    for index in [\"index\", \"columns\"]:\n        new_index = generate_multiindex(len(getattr(modin_df, index)))\n        for df in [modin_df, pandas_df]:\n            setattr(df, index, new_index)\n\n    for kwargs in [{\"level\": 0}, {\"axis\": 0}, {\"axis\": 1}]:\n        with warns_that_defaulting_to_pandas_if(\n            not df_or_series_using_native_execution(modin_df)\n        ):\n            df_equals(\n                modin_df.sort_index(sort_remaining=sort_remaining, **kwargs),\n                pandas_df.sort_index(sort_remaining=sort_remaining, **kwargs),\n            )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"by\",\n    [\n        pytest.param(\n            \"first\",\n            marks=pytest.mark.exclude_by_default,\n        ),\n        pytest.param(\n            \"first,last\",\n            marks=pytest.mark.exclude_by_default,\n        ),\n        \"first,last,middle\",\n    ],\n)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\n    \"ascending\",\n    [False, True] + [\"list_first_True\", \"list_first_False\"],\n    ids=arg_keys(\n        \"ascending\", [\"False\", \"True\"] + [\"list_first_True\", \"list_first_False\"]\n    ),\n)\n@pytest.mark.parametrize(\n    \"inplace\", bool_arg_values, ids=arg_keys(\"inplace\", bool_arg_keys)\n)\n@pytest.mark.parametrize(\n    \"kind\",\n    [\n        pytest.param(\n            \"mergesort\",\n            marks=pytest.mark.exclude_by_default,\n        ),\n        \"quicksort\",\n        pytest.param(\n            \"heapsort\",\n            marks=pytest.mark.exclude_by_default,\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"na_position\", [\"first\", \"last\"], ids=[\"first\", \"last\"])\n@pytest.mark.parametrize(\n    \"ignore_index\",\n    bool_arg_values,\n    ids=arg_keys(\"ignore_index\", bool_arg_keys),\n)\n@pytest.mark.parametrize(\"key\", [None, rotate_decimal_digits_or_symbols])\ndef test_sort_values(\n    data, by, axis, ascending, inplace, kind, na_position, ignore_index, key\n):\n    if ascending is None:\n        pytest.skip(\"None is not a valid value for ascending.\")\n    if (axis == 1 or axis == \"columns\") and ignore_index:\n        pytest.skip(\"Pandas bug #39426 which is fixed in Pandas 1.3\")\n\n    if ascending is None and key is not None:\n        pytest.skip(\"Pandas bug #41318\")\n\n    if \"multiindex\" in by:\n        index = generate_multiindex(len(data[list(data.keys())[0]]), nlevels=2)\n        columns = generate_multiindex(len(data.keys()), nlevels=2)\n        data = {columns[ind]: data[key] for ind, key in enumerate(data)}\n    else:\n        index = None\n        columns = None\n\n    modin_df = pd.DataFrame(data, index=index, columns=columns)\n    pandas_df = pandas.DataFrame(data, index=index, columns=columns)\n\n    index = modin_df.index if axis == 1 or axis == \"columns\" else modin_df.columns\n\n    # Parse \"by\" spec\n    by_list = []\n    for b in by.split(\",\"):\n        if b == \"first\":\n            by_list.append(index[0])\n        elif b == \"last\":\n            by_list.append(index[-1])\n        elif b == \"middle\":\n            by_list.append(index[len(index) // 2])\n        elif b.startswith(\"multiindex_level\"):\n            by_list.append(index.names[int(b[len(\"multiindex_level\") :])])\n        else:\n            raise Exception('Unknown \"by\" specifier:' + b)\n\n    # Create \"ascending\" list\n    if ascending in [\"list_first_True\", \"list_first_False\"]:\n        start = 0 if ascending == \"list_first_False\" else 1\n        ascending = [i & 1 > 0 for i in range(start, len(by_list) + start)]\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.sort_values(\n            by_list,\n            axis=axis,\n            ascending=ascending,\n            inplace=inplace,\n            kind=kind,\n            na_position=na_position,\n            ignore_index=ignore_index,\n            key=key,\n        ),\n        __inplace__=inplace,\n    )\n\n\ndef test_sort_values_descending_with_only_two_bins():\n    # test case from https://github.com/modin-project/modin/issues/5781\n    part1 = pd.DataFrame({\"a\": [1, 2, 3, 4]})\n    part2 = pd.DataFrame({\"a\": [5, 6, 7, 8]})\n\n    modin_df = pd.concat([part1, part2])\n    pandas_df = modin_df._to_pandas()\n\n    if StorageFormat.get() == \"Pandas\":\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)\n\n    eval_general(\n        modin_df, pandas_df, lambda df: df.sort_values(by=\"a\", ascending=False)\n    )\n\n\n@pytest.mark.parametrize(\"ignore_index\", [True, False])\ndef test_sort_values_preserve_index_names(ignore_index):\n    modin_df, pandas_df = create_test_dfs(\n        np.random.choice(128, 128, replace=False).reshape((128, 1))\n    )\n\n    pandas_df.index.names, pandas_df.columns.names = [\"custom_name\"], [\"custom_name\"]\n    modin_df.index.names, modin_df.columns.names = [\"custom_name\"], [\"custom_name\"]\n    # workaround for #1618 to actually propagate index change\n    modin_df.index = modin_df.index\n    modin_df.columns = modin_df.columns\n\n    def comparator(df1, df2):\n        assert df1.index.names == df2.index.names\n        assert df1.columns.names == df2.columns.names\n        df_equals(df1, df2)\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.sort_values(df.columns[0], ignore_index=ignore_index),\n        comparator=comparator,\n    )\n\n\n@pytest.mark.parametrize(\"ascending\", [True, False])\ndef test_sort_values_with_one_partition(ascending):\n    # Test case from https://github.com/modin-project/modin/issues/5859\n    modin_df, pandas_df = create_test_dfs(\n        np.array([[\"hello\", \"goodbye\"], [\"hello\", \"Hello\"]])\n    )\n\n    if StorageFormat.get() == \"Pandas\":\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (1, 1)\n\n    eval_general(\n        modin_df, pandas_df, lambda df: df.sort_values(by=1, ascending=ascending)\n    )\n\n\ndef test_sort_overpartitioned_df():\n    # First we test when the final df will have only 1 row and column partition.\n    data = [[4, 5, 6], [1, 2, 3]]\n    modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(drop=True)\n    pandas_df = pandas.DataFrame(data)\n\n    eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))\n\n    # Next we test when the final df will only have 1 row, but starts with multiple column\n    # partitions.\n    data = [list(range(100)), list(range(100, 200))]\n    modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(drop=True)\n    pandas_df = pandas.DataFrame(data)\n\n    eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))\n\n    # Next we test when the final df will have multiple row partitions.\n    data = np.random.choice(650, 650, replace=False).reshape((65, 10))\n    modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(drop=True)\n    pandas_df = pandas.DataFrame(data)\n\n    eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))\n\n    old_nptns = NPartitions.get()\n    NPartitions.put(24)\n    try:\n        # Next we test when there's only one row per partition.\n        data = np.random.choice(650, 650, replace=False).reshape((65, 10))\n        modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(\n            drop=True\n        )\n        pandas_df = pandas.DataFrame(data)\n\n        eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))\n\n        # And again, when there's more than one column partition.\n        data = np.random.choice(6500, 6500, replace=False).reshape((65, 100))\n        modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(\n            drop=True\n        )\n        pandas_df = pandas.DataFrame(data)\n\n        eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))\n\n        # Additionally, we should test when we have a number of partitions\n        # that doesn't divide cleanly into our desired number of partitions.\n        # In this case, we start with 17 partitions, and want 2.\n        NPartitions.put(21)\n        data = np.random.choice(6500, 6500, replace=False).reshape((65, 100))\n        modin_df = pd.concat([pd.DataFrame(row).T for row in data]).reset_index(\n            drop=True\n        )\n        pandas_df = pandas.DataFrame(data)\n\n        eval_general(modin_df, pandas_df, lambda df: df.sort_values(by=0))\n\n    finally:\n        NPartitions.put(old_nptns)\n\n\ndef test_sort_values_with_duplicates():\n    modin_df = pd.DataFrame({\"col\": [2, 1, 1]}, index=[1, 1, 0])\n    pandas_df = pandas.DataFrame({\"col\": [2, 1, 1]}, index=[1, 1, 0])\n\n    key = modin_df.columns[0]\n    modin_result = modin_df.sort_values(key, inplace=False)\n    pandas_result = pandas_df.sort_values(key, inplace=False)\n    df_equals(modin_result, pandas_result)\n\n    modin_df.sort_values(key, inplace=True)\n    pandas_df.sort_values(key, inplace=True)\n    df_equals(modin_df, pandas_df)\n\n\ndef test_sort_values_with_string_index():\n    modin_df = pd.DataFrame({\"col\": [25, 17, 1]}, index=[\"ccc\", \"bbb\", \"aaa\"])\n    pandas_df = pandas.DataFrame({\"col\": [25, 17, 1]}, index=[\"ccc\", \"bbb\", \"aaa\"])\n\n    key = modin_df.columns[0]\n    modin_result = modin_df.sort_values(key, inplace=False)\n    pandas_result = pandas_df.sort_values(key, inplace=False)\n    df_equals(modin_result, pandas_result)\n\n    modin_df.sort_values(key, inplace=True)\n    pandas_df.sort_values(key, inplace=True)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.skipif(\n    StorageFormat.get() != \"Pandas\",\n    reason=\"We only need to test this case where sort does not default to pandas.\",\n)\n@pytest.mark.parametrize(\"ascending\", [True, False], ids=[\"True\", \"False\"])\n@pytest.mark.parametrize(\"na_position\", [\"first\", \"last\"], ids=[\"first\", \"last\"])\ndef test_sort_values_with_only_one_non_na_row_in_partition(ascending, na_position):\n    pandas_df = pandas.DataFrame(\n        np.random.rand(1000, 100), columns=[f\"col {i}\" for i in range(100)]\n    )\n    # Need to ensure that one of the partitions has all NA values except for one row\n    pandas_df.iloc[340:] = np.nan\n    pandas_df.iloc[-1] = -4.0\n    modin_df = pd.DataFrame(pandas_df)\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.sort_values(\n            \"col 3\", ascending=ascending, na_position=na_position\n        ),\n    )\n\n\n@pytest.mark.skipif(\n    Engine.get() not in (\"Ray\", \"Unidist\", \"Dask\"),\n    reason=\"We only need to test this case where sort does not default to pandas.\",\n)\ndef test_sort_values_with_sort_key_on_partition_boundary():\n    modin_df = pd.DataFrame(\n        np.random.rand(1000, 100), columns=[f\"col {i}\" for i in range(100)]\n    )\n    sort_key = modin_df.columns[modin_df._query_compiler._modin_frame.column_widths[0]]\n    eval_general(modin_df, modin_df._to_pandas(), lambda df: df.sort_values(sort_key))\n\n\ndef test_where():\n    columns = list(\"abcdefghij\")\n\n    frame_data = random_state.randn(100, 10)\n    modin_df, pandas_df = create_test_dfs(frame_data, columns=columns)\n    pandas_cond_df = pandas_df % 5 < 2\n    modin_cond_df = modin_df % 5 < 2\n\n    pandas_result = pandas_df.where(pandas_cond_df, -pandas_df)\n    modin_result = modin_df.where(modin_cond_df, -modin_df)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n    # test case when other is Series\n    other_data = random_state.randn(len(pandas_df))\n    modin_other, pandas_other = pd.Series(other_data), pandas.Series(other_data)\n    pandas_result = pandas_df.where(pandas_cond_df, pandas_other, axis=0)\n    modin_result = modin_df.where(modin_cond_df, modin_other, axis=0)\n    df_equals(modin_result, pandas_result)\n\n    # Test that we choose the right values to replace when `other` == `True`\n    # everywhere.\n    other_data = np.full(shape=pandas_df.shape, fill_value=True)\n    modin_other, pandas_other = create_test_dfs(other_data, columns=columns)\n    pandas_result = pandas_df.where(pandas_cond_df, pandas_other)\n    modin_result = modin_df.where(modin_cond_df, modin_other)\n    df_equals(modin_result, pandas_result)\n\n    other = pandas_df.loc[3]\n    pandas_result = pandas_df.where(pandas_cond_df, other, axis=1)\n    modin_result = modin_df.where(modin_cond_df, other, axis=1)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n    other = pandas_df[\"e\"]\n    pandas_result = pandas_df.where(pandas_cond_df, other, axis=0)\n    modin_result = modin_df.where(modin_cond_df, other, axis=0)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n    pandas_result = pandas_df.where(pandas_df < 2, True)\n    modin_result = modin_df.where(modin_df < 2, True)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n\ndef test_where_different_axis_order():\n    # Test `where` when `cond`, `df`, and `other` each have columns and index\n    # in different orders.\n    data = test_data[\"float_nan_data\"]\n    pandas_df = pandas.DataFrame(data)\n    pandas_cond_df = pandas_df % 5 < 2\n    pandas_cond_df = pandas_cond_df.reindex(\n        columns=pandas_df.columns[::-1], index=pandas_df.index[::-1]\n    )\n    pandas_other_df = -pandas_df\n    pandas_other_df = pandas_other_df.reindex(\n        columns=pandas_df.columns[-1:].append(pandas_df.columns[:-1]),\n        index=pandas_df.index[-1:].append(pandas_df.index[:-1]),\n    )\n\n    modin_df = pd.DataFrame(pandas_df)\n    modin_cond_df = pd.DataFrame(pandas_cond_df)\n    modin_other_df = pd.DataFrame(pandas_other_df)\n\n    pandas_result = pandas_df.where(pandas_cond_df, pandas_other_df)\n    modin_result = modin_df.where(modin_cond_df, modin_other_df)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"align_axis\", [\"index\", \"columns\"])\n@pytest.mark.parametrize(\"keep_shape\", [False, True])\n@pytest.mark.parametrize(\"keep_equal\", [False, True])\ndef test_compare(align_axis, keep_shape, keep_equal):\n    kwargs = {\n        \"align_axis\": align_axis,\n        \"keep_shape\": keep_shape,\n        \"keep_equal\": keep_equal,\n    }\n    frame_data1 = random_state.randn(100, 10)\n    frame_data2 = random_state.randn(100, 10)\n    pandas_df = pandas.DataFrame(frame_data1, columns=list(\"abcdefghij\"))\n    pandas_df2 = pandas.DataFrame(frame_data2, columns=list(\"abcdefghij\"))\n    modin_df = pd.DataFrame(frame_data1, columns=list(\"abcdefghij\"))\n    modin_df2 = pd.DataFrame(frame_data2, columns=list(\"abcdefghij\"))\n\n    modin_result = modin_df.compare(modin_df2, **kwargs)\n    pandas_result = pandas_df.compare(pandas_df2, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n\n    modin_result = modin_df2.compare(modin_df, **kwargs)\n    pandas_result = pandas_df2.compare(pandas_df, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n\n    series_data1 = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n    series_data2 = [\"a\", \"a\", \"c\", \"b\", \"e\"]\n    pandas_series1 = pandas.Series(series_data1)\n    pandas_series2 = pandas.Series(series_data2)\n    modin_series1 = pd.Series(series_data1)\n    modin_series2 = pd.Series(series_data2)\n\n    modin_result = modin_series1.compare(modin_series2, **kwargs)\n    pandas_result = pandas_series1.compare(pandas_series2, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n\n    modin_result = modin_series2.compare(modin_series1, **kwargs)\n    pandas_result = pandas_series2.compare(pandas_series1, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"params\",\n    [\n        {\"ascending\": True},\n        {\"normalize\": True},\n        pytest.param(\n            {\"sort\": False},\n            marks=(\n                pytest.mark.xfail(\n                    reason=\"Known issue with sort=False in `groupby()` \"\n                    + \"(https://github.com/modin-project/modin/issues/3571)\",\n                    strict=True,\n                )\n                if Engine.get() in (\"Python\", \"Ray\", \"Dask\", \"Unidist\")\n                and StorageFormat.get() != \"Base\"\n                else []\n            ),\n        ),\n    ],\n)\ndef test_value_counts(params):\n    data = [[4, 1, 3, 2], [2, 5, 6, 5], [4, 3, 3, 5]]\n    columns = [\"col1\", \"col2\", \"col3\", \"col4\"]\n\n    eval_general(\n        *create_test_dfs(data, columns=columns),\n        lambda df: df[\"col1\"].value_counts(**params),\n    )\n\n\ndef test_value_counts_with_nulls():\n    data = [[5, 6, None, 7, 7], [None, None, 5, 8]]\n    eval_general(*create_test_dfs(data), lambda df: df[0].value_counts(dropna=False))\n\n\ndef test_value_counts_with_multiindex():\n    data = [[1, 2, 2, 4]]\n    index = pd.MultiIndex.from_arrays(\n        arrays=[[\"a\", \"a\", \"b\", \"b\"], [1, 2, 1, 2]], names=(\"l1\", \"l2\")\n    )\n\n    eval_general(\n        *create_test_dfs(data, index=index),\n        lambda df: df[0].value_counts(),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_map_metadata.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom decimal import Decimal\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import MinRowPartitionSize, NPartitions, StorageFormat\nfrom modin.core.dataframe.pandas.metadata import LazyProxyCategoricalDtype\nfrom modin.core.storage_formats.pandas.utils import split_result_of_axis_func_pandas\nfrom modin.pandas.testing import assert_index_equal, assert_series_equal\nfrom modin.tests.pandas.utils import (\n    RAND_HIGH,\n    RAND_LOW,\n    arg_keys,\n    axis_keys,\n    axis_values,\n    bool_arg_keys,\n    bool_arg_values,\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    df_is_empty,\n    eval_general,\n    indices_keys,\n    indices_values,\n    name_contains,\n    numeric_dfs,\n    random_state,\n    sort_if_range_partitioning,\n    test_data,\n    test_data_keys,\n    test_data_values,\n    test_data_with_duplicates_keys,\n    test_data_with_duplicates_values,\n    test_func_keys,\n    test_func_values,\n)\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import get_current_execution\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\ndef eval_insert(modin_df, pandas_df, **kwargs):\n    if \"col\" in kwargs and \"column\" not in kwargs:\n        kwargs[\"column\"] = kwargs.pop(\"col\")\n    _kwargs = {\"loc\": 0, \"column\": \"New column\"}\n    _kwargs.update(kwargs)\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        operation=lambda df, **kwargs: df.insert(**kwargs),\n        __inplace__=True,\n        **_kwargs,\n    )\n\n\ndef test_indexing():\n    modin_df = pd.DataFrame(\n        dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[\"a\", \"b\", \"c\"]\n    )\n    pandas_df = pandas.DataFrame(\n        dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]), index=[\"a\", \"b\", \"c\"]\n    )\n\n    modin_result = modin_df\n    pandas_result = pandas_df\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df[\"b\"]\n    pandas_result = pandas_df[\"b\"]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df[[\"b\"]]\n    pandas_result = pandas_df[[\"b\"]]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df[[\"b\", \"a\"]]\n    pandas_result = pandas_df[[\"b\", \"a\"]]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[\"b\"]\n    pandas_result = pandas_df.loc[\"b\"]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[[\"b\"]]\n    pandas_result = pandas_df.loc[[\"b\"]]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[[\"b\", \"a\"]]\n    pandas_result = pandas_df.loc[[\"b\", \"a\"]]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[[\"b\", \"a\"], [\"a\", \"c\"]]\n    pandas_result = pandas_df.loc[[\"b\", \"a\"], [\"a\", \"c\"]]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[:, [\"a\", \"c\"]]\n    pandas_result = pandas_df.loc[:, [\"a\", \"c\"]]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[:, [\"c\"]]\n    pandas_result = pandas_df.loc[:, [\"c\"]]\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.loc[[]]\n    pandas_result = pandas_df.loc[[]]\n    df_equals(modin_result, pandas_result)\n\n\ndef test_empty_df():\n    df = pd.DataFrame(index=[\"a\", \"b\"])\n    df_is_empty(df)\n    assert_index_equal(df.index, pd.Index([\"a\", \"b\"]))\n    assert len(df.columns) == 0\n\n    df = pd.DataFrame(columns=[\"a\", \"b\"])\n    df_is_empty(df)\n    assert len(df.index) == 0\n    assert_index_equal(df.columns, pd.Index([\"a\", \"b\"]))\n\n    df = pd.DataFrame()\n    df_is_empty(df)\n    assert len(df.index) == 0\n    assert len(df.columns) == 0\n\n    df = pd.DataFrame(index=[\"a\", \"b\"])\n    df_is_empty(df)\n    assert_index_equal(df.index, pd.Index([\"a\", \"b\"]))\n    assert len(df.columns) == 0\n\n    df = pd.DataFrame(columns=[\"a\", \"b\"])\n    df_is_empty(df)\n    assert len(df.index) == 0\n    assert_index_equal(df.columns, pd.Index([\"a\", \"b\"]))\n\n    df = pd.DataFrame()\n    df_is_empty(df)\n    assert len(df.index) == 0\n    assert len(df.columns) == 0\n\n    df = pd.DataFrame()\n    pd_df = pandas.DataFrame()\n    df[\"a\"] = [1, 2, 3, 4, 5]\n    pd_df[\"a\"] = [1, 2, 3, 4, 5]\n    df_equals(df, pd_df)\n\n    df = pd.DataFrame()\n    pd_df = pandas.DataFrame()\n    df[\"a\"] = list(\"ABCDEF\")\n    pd_df[\"a\"] = list(\"ABCDEF\")\n    df_equals(df, pd_df)\n\n    df = pd.DataFrame()\n    pd_df = pandas.DataFrame()\n    df[\"a\"] = pd.Series([1, 2, 3, 4, 5])\n    pd_df[\"a\"] = pandas.Series([1, 2, 3, 4, 5])\n    df_equals(df, pd_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_abs(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    try:\n        pandas_result = pandas_df.abs()\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.abs()\n    else:\n        modin_result = modin_df.abs()\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_add_prefix(data, axis):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    test_prefix = \"TEST\"\n    new_modin_df = modin_df.add_prefix(test_prefix, axis=axis)\n    new_pandas_df = pandas_df.add_prefix(test_prefix, axis=axis)\n    df_equals(new_modin_df.columns, new_pandas_df.columns)\n    # TODO(https://github.com/modin-project/modin/issues/3804):\n    # make df_equals always check dtypes.\n    df_equals(new_modin_df.dtypes, new_pandas_df.dtypes)\n\n\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_add_suffix(data, axis):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    test_suffix = \"TEST\"\n    new_modin_df = modin_df.add_suffix(test_suffix, axis=axis)\n    new_pandas_df = pandas_df.add_suffix(test_suffix, axis=axis)\n\n    df_equals(new_modin_df.columns, new_pandas_df.columns)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"testfunc\", test_func_values, ids=test_func_keys)\n@pytest.mark.parametrize(\n    \"na_action\", [None, \"ignore\"], ids=[\"no_na_action\", \"ignore_na\"]\n)\ndef test_applymap(data, testfunc, na_action):\n    modin_df, pandas_df = create_test_dfs(data)\n\n    with pytest.raises(ValueError):\n        x = 2\n        modin_df.applymap(x)\n\n    eval_general(modin_df, pandas_df, lambda df: df.applymap(testfunc, na_action))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"testfunc\", test_func_values, ids=test_func_keys)\ndef test_applymap_numeric(request, data, testfunc):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if name_contains(request.node.name, numeric_dfs):\n        try:\n            pandas_result = pandas_df.applymap(testfunc)\n        except Exception as err:\n            with pytest.raises(type(err)):\n                modin_df.applymap(testfunc)\n        else:\n            modin_result = modin_df.applymap(testfunc)\n            df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_at(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    key1 = modin_df.columns[0]\n    # Scalar\n    df_equals(modin_df.at[0, key1], pandas_df.at[0, key1])\n\n    # Series\n    df_equals(modin_df.loc[0].at[key1], pandas_df.loc[0].at[key1])\n\n    # Write Item\n    modin_df_copy = modin_df.copy()\n    pandas_df_copy = pandas_df.copy()\n    modin_df_copy.at[1, key1] = modin_df.at[0, key1]\n    pandas_df_copy.at[1, key1] = pandas_df.at[0, key1]\n    df_equals(modin_df_copy, pandas_df_copy)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_axes(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    for modin_axis, pd_axis in zip(modin_df.axes, pandas_df.axes):\n        assert np.array_equal(modin_axis, pd_axis)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_copy(data):\n    modin_df = pd.DataFrame(data)\n\n    # pandas_df is unused but there so there won't be confusing list comprehension\n    # stuff in the pytest.mark.parametrize\n    new_modin_df = modin_df.copy(deep=True)\n\n    assert new_modin_df is not modin_df\n    assert new_modin_df.index is not modin_df.index\n    assert new_modin_df.columns is not modin_df.columns\n    assert new_modin_df.dtypes is not modin_df.dtypes\n\n    if get_current_execution() != \"BaseOnPython\" and not current_execution_is_native():\n        assert np.array_equal(\n            new_modin_df._query_compiler._modin_frame._partitions,\n            modin_df._query_compiler._modin_frame._partitions,\n        )\n    df_equals(new_modin_df, modin_df)\n\n    # Shallow copy tests\n    modin_df = pd.DataFrame(data)\n    modin_df_cp = modin_df.copy(deep=False)\n\n    assert modin_df_cp is not modin_df\n    assert modin_df_cp.index is modin_df.index\n    assert modin_df_cp.columns is modin_df.columns\n    # FIXME: we're different from pandas here as modin doesn't copy dtypes for a shallow copy\n    # https://github.com/modin-project/modin/issues/5602\n    # assert modin_df_cp.dtypes is not modin_df.dtypes\n\n    modin_df[modin_df.columns[0]] = 0\n    df_equals(modin_df, modin_df_cp)\n\n\ndef test_copy_empty_dataframe():\n    df = pd.DataFrame(range(3))\n    res = df[:0].copy()\n    assert res.dtypes.equals(df.dtypes)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dtypes(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.dtypes, pandas_df.dtypes)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"key\", indices_values, ids=indices_keys)\ndef test_get(data, key):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.get(key), pandas_df.get(key))\n    df_equals(\n        modin_df.get(key, default=\"default\"), pandas_df.get(key, default=\"default\")\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"dummy_na\", bool_arg_values, ids=arg_keys(\"dummy_na\", bool_arg_keys)\n)\n@pytest.mark.parametrize(\n    \"drop_first\", bool_arg_values, ids=arg_keys(\"drop_first\", bool_arg_keys)\n)\ndef test_get_dummies(request, data, dummy_na, drop_first):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    try:\n        pandas_result = pandas.get_dummies(\n            pandas_df, dummy_na=dummy_na, drop_first=drop_first\n        )\n    except Exception as err:\n        with pytest.raises(type(err)):\n            pd.get_dummies(modin_df, dummy_na=dummy_na, drop_first=drop_first)\n    else:\n        modin_result = pd.get_dummies(\n            modin_df, dummy_na=dummy_na, drop_first=drop_first\n        )\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_isna(data):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n\n    pandas_result = pandas_df.isna()\n    modin_result = modin_df.isna()\n\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_isnull(data):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n\n    pandas_result = pandas_df.isnull()\n    modin_result = modin_df.isnull()\n\n    df_equals(modin_result, pandas_result)\n\n\ndef test_astype():\n    td = pandas.DataFrame(test_data[\"int_data\"])[[\"col1\", \"index\", \"col3\", \"col4\"]]\n    modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns)\n    expected_df = pandas.DataFrame(td.values, index=td.index, columns=td.columns)\n\n    modin_df_casted = modin_df.astype(np.int32)\n    expected_df_casted = expected_df.astype(np.int32)\n    df_equals(modin_df_casted, expected_df_casted)\n\n    modin_df_casted = modin_df.astype(np.float64)\n    expected_df_casted = expected_df.astype(np.float64)\n    df_equals(modin_df_casted, expected_df_casted)\n\n    modin_df_casted = modin_df.astype(str)\n    expected_df_casted = expected_df.astype(str)\n    df_equals(modin_df_casted, expected_df_casted)\n\n    # pandas nullable dtype\n    modin_df_casted = modin_df.astype(\"Float64\")\n    expected_df_casted = expected_df.astype(\"Float64\")\n    df_equals(modin_df_casted, expected_df_casted)\n\n    modin_df_casted = modin_df.astype(\"category\")\n    expected_df_casted = expected_df.astype(\"category\")\n    df_equals(modin_df_casted, expected_df_casted)\n\n    dtype_dict = {\"col1\": np.int32, \"index\": np.int64, \"col3\": str}\n    modin_df_casted = modin_df.astype(dtype_dict)\n    expected_df_casted = expected_df.astype(dtype_dict)\n    df_equals(modin_df_casted, expected_df_casted)\n\n    modin_df = pd.DataFrame(index=[\"row1\"], columns=[\"col1\"])\n    modin_df[\"col1\"][\"row1\"] = 11\n    modin_df_casted = modin_df.astype(int)\n    expected_df = pandas.DataFrame(index=[\"row1\"], columns=[\"col1\"])\n    expected_df[\"col1\"][\"row1\"] = 11\n    expected_df_casted = expected_df.astype(int)\n    df_equals(modin_df_casted, expected_df_casted)\n\n    with pytest.raises(KeyError):\n        modin_df.astype({\"not_exists\": np.uint8})\n\n    # The dtypes series must have a unique index.\n    eval_general(\n        modin_df,\n        expected_df,\n        lambda df: df.astype(\n            pd.Series([str, str], index=[\"col1\", \"col1\"])\n            if isinstance(df, pd.DataFrame)\n            else pandas.Series([str, str], index=[\"col1\", \"col1\"])\n        ),\n        expected_exception=ValueError(\n            \"cannot reindex on an axis with duplicate labels\"\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"errors\", [\"raise\", \"ignore\"])\ndef test_astype_errors(errors):\n    data = {\"a\": [\"a\", 2, -1]}\n    modin_df, pandas_df = create_test_dfs(data)\n    expected_exception = None\n    if errors == \"raise\":\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7025\")\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.astype(\"int\", errors=errors),\n        # https://github.com/modin-project/modin/issues/5962\n        comparator_kwargs={\"check_dtypes\": errors != \"ignore\"},\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"has_dtypes\", [False, True])\ndef test_astype_copy(has_dtypes):\n    data = [1]\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n    if not has_dtypes:\n        modin_df._query_compiler.set_frame_dtypes_cache(None)\n    eval_general(modin_df, pandas_df, lambda df: df.astype(str, copy=False))\n\n    # trivial case where copying can be avoided, behavior should match pandas\n    s1 = pd.Series([1, 2])\n    if not has_dtypes:\n        modin_df._query_compiler.set_frame_dtypes_cache(None)\n    s2 = s1.astype(\"int64\", copy=False)\n    s2[0] = 10\n    df_equals(s1, s2)\n\n\n@pytest.mark.parametrize(\"dtypes_are_dict\", [True, False])\ndef test_astype_dict_or_series_multiple_column_partitions(dtypes_are_dict):\n    # Test astype with a dtypes dict that is complex in that:\n    # - It applies to columns spanning multiple column partitions\n    # - Within a partition frame df:\n    #   - dtypes.index is not a subset of df.columns\n    #   - df.columns is not a subset of dtypes.index\n\n    modin_df, pandas_df = create_test_dfs(test_data[\"int_data\"])\n    if dtypes_are_dict:\n        new_dtypes = {}\n    else:\n        new_dtypes = pandas.Series()\n    for i, column in enumerate(pandas_df.columns):\n        if i % 3 == 1:\n            new_dtypes[column] = \"string\"\n        elif i % 3 == 2:\n            new_dtypes[column] = float\n    eval_general(modin_df, pandas_df, lambda df: df.astype(new_dtypes))\n\n\ndef test_astype_category():\n    modin_df = pd.DataFrame(\n        {\"col1\": [\"A\", \"A\", \"B\", \"B\", \"A\"], \"col2\": [1, 2, 3, 4, 5]}\n    )\n    pandas_df = pandas.DataFrame(\n        {\"col1\": [\"A\", \"A\", \"B\", \"B\", \"A\"], \"col2\": [1, 2, 3, 4, 5]}\n    )\n\n    modin_result = modin_df.astype({\"col1\": \"category\"})\n    pandas_result = pandas_df.astype({\"col1\": \"category\"})\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n    modin_result = modin_df.astype(\"category\")\n    pandas_result = pandas_df.astype(\"category\")\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n    dtype = pd.CategoricalDtype(categories=[\"A\", \"B\"])\n    modin_result = modin_df.astype({\"col1\": dtype})\n    pandas_result = pandas_df.astype({\"col1\": dtype})\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n    dtype = pd.CategoricalDtype(categories=[\"A\", \"B\"])\n    modin_result = modin_df.astype(dtype)\n    pandas_result = pandas_df.astype(dtype)\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n\ndef test_astype_category_large():\n    series_length = 10_000\n    modin_df = pd.DataFrame(\n        {\n            \"col1\": [\"str{0}\".format(i) for i in range(0, series_length)],\n            \"col2\": [i for i in range(0, series_length)],\n        }\n    )\n    pandas_df = pandas.DataFrame(\n        {\n            \"col1\": [\"str{0}\".format(i) for i in range(0, series_length)],\n            \"col2\": [i for i in range(0, series_length)],\n        }\n    )\n\n    modin_result = modin_df.astype({\"col1\": \"category\"})\n    pandas_result = pandas_df.astype({\"col1\": \"category\"})\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n    modin_result = modin_df.astype(\"category\")\n    pandas_result = pandas_df.astype(\"category\")\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n    dtype = pd.CategoricalDtype(categories=[\"str0\", \"str1\"])\n    modin_result = modin_df.astype({\"col1\": dtype})\n    pandas_result = pandas_df.astype({\"col1\": dtype})\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n    dtype = pd.CategoricalDtype(categories=[\"str0\", \"str1\"])\n    modin_result = modin_df.astype(dtype)\n    pandas_result = pandas_df.astype(dtype)\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n\ndef test_astype_int64_to_astype_category_github_issue_6259():\n    eval_general(\n        *create_test_dfs(\n            {\"c0\": [0, 1, 2, 3, 4], \"par\": [\"foo\", \"boo\", \"bar\", \"foo\", \"boo\"]},\n            index=[\"a\", \"b\", \"c\", \"d\", \"e\"],\n        ),\n        lambda df: df[\"c0\"].astype(\"Int64\").astype(\"category\"),\n    )\n\n\n@pytest.mark.skipif(\n    get_current_execution() == \"BaseOnPython\" or current_execution_is_native(),\n    reason=\"BaseOnPython and NativeQueryCompiler don't have proxy categories\",\n)\nclass TestCategoricalProxyDtype:\n    \"\"\"This class contains test and test usilities for the ``LazyProxyCategoricalDtype`` class.\"\"\"\n\n    @staticmethod\n    def _get_lazy_proxy():\n        \"\"\"\n        Build a dataframe containing a column that has a proxy type and return\n        this proxy together with an original dtype that this proxy is emulating.\n\n        Returns\n        -------\n        (LazyProxyCategoricalDtype, pandas.CategoricalDtype, modin.pandas.DataFrame)\n        \"\"\"\n        nchunks = 3\n        pandas_df = pandas.DataFrame({\"a\": [1, 1, 2, 2, 3, 2], \"b\": [1, 2, 3, 4, 5, 6]})\n        original_dtype = pandas_df.astype({\"a\": \"category\"}).dtypes[\"a\"]\n\n        chunks = split_result_of_axis_func_pandas(\n            axis=0,\n            num_splits=nchunks,\n            result=pandas_df,\n            min_block_size=MinRowPartitionSize.get(),\n            length_list=[2, 2, 2],\n        )\n\n        if StorageFormat.get() == \"Pandas\":\n            df = pd.concat([pd.DataFrame(chunk) for chunk in chunks])\n            assert df._query_compiler._modin_frame._partitions.shape == (nchunks, 1)\n\n            df = df.astype({\"a\": \"category\"})\n            return df.dtypes[\"a\"], original_dtype, df\n        else:\n            raise NotImplementedError()\n\n    def test_update_proxy(self):\n        \"\"\"Verify that ``LazyProxyCategoricalDtype._update_proxy`` method works as expected.\"\"\"\n        lazy_proxy, _, _ = self._get_lazy_proxy()\n        new_parent = pd.DataFrame({\"a\": [10, 20, 30]})._query_compiler._modin_frame\n\n        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)\n        # When we try to create a new proxy from the same arguments it should return itself\n        assert (\n            lazy_proxy._update_proxy(lazy_proxy._parent, lazy_proxy._column_name)\n            is lazy_proxy\n        )\n\n        # When any of the arguments is changing we should create a new proxy\n        proxy_with_new_column = lazy_proxy._update_proxy(\n            lazy_proxy._parent, \"other_column\"\n        )\n        assert proxy_with_new_column is not lazy_proxy and isinstance(\n            proxy_with_new_column, LazyProxyCategoricalDtype\n        )\n\n        # When any of the arguments is changing we should create a new proxy\n        proxy_with_new_parent = lazy_proxy._update_proxy(\n            new_parent, lazy_proxy._column_name\n        )\n        assert proxy_with_new_parent is not lazy_proxy and isinstance(\n            proxy_with_new_parent, LazyProxyCategoricalDtype\n        )\n\n        lazy_proxy.categories  # trigger materialization\n        # `._update_proxy` now should produce pandas Categoricals instead of a proxy as it already has materialized data\n        assert (\n            type(lazy_proxy._update_proxy(lazy_proxy._parent, lazy_proxy._column_name))\n            == pandas.CategoricalDtype\n        )\n\n    def test_update_proxy_implicit(self):\n        \"\"\"\n        Verify that a lazy proxy correctly updates its parent when passed from one parent to another.\n        \"\"\"\n        lazy_proxy, _, parent = self._get_lazy_proxy()\n        parent_frame = parent._query_compiler._modin_frame\n\n        if StorageFormat.get() == \"Pandas\":\n            assert lazy_proxy._parent is parent_frame\n        else:\n            raise NotImplementedError(\n                f\"The test is not implemented for {StorageFormat.get()} storage format\"\n            )\n\n        # Making a copy of the dataframe, the new proxy should now start pointing to the new parent\n        new_parent = parent.copy()\n        new_parent_frame = new_parent._query_compiler._modin_frame\n        new_lazy_proxy = new_parent_frame.dtypes[lazy_proxy._column_name]\n\n        if StorageFormat.get() == \"Pandas\":\n            # Make sure that the old proxy still pointing to the old parent\n            assert lazy_proxy._parent is parent_frame\n            assert new_lazy_proxy._parent is new_parent_frame\n        else:\n            raise NotImplementedError(\n                f\"The test is not implemented for {StorageFormat.get()} storage format\"\n            )\n\n    def test_if_proxy_lazy(self):\n        \"\"\"Verify that proxy is able to pass simple comparison checks without triggering materialization.\"\"\"\n        lazy_proxy, actual_dtype, _ = self._get_lazy_proxy()\n\n        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)\n        assert not lazy_proxy._is_materialized\n\n        assert lazy_proxy == \"category\"\n        assert isinstance(lazy_proxy, pd.CategoricalDtype)\n        assert isinstance(lazy_proxy, pandas.CategoricalDtype)\n        assert str(lazy_proxy) == \"category\"\n        assert str(lazy_proxy) == str(actual_dtype)\n        assert not lazy_proxy.ordered\n        assert not lazy_proxy._is_materialized\n\n        # Further, there are all checks that materialize categories\n        assert lazy_proxy == actual_dtype\n        assert actual_dtype == lazy_proxy\n        assert repr(lazy_proxy) == repr(actual_dtype)\n        assert lazy_proxy.categories.equals(actual_dtype.categories)\n        assert lazy_proxy._is_materialized\n\n    def test_proxy_as_dtype(self):\n        \"\"\"Verify that proxy can be used as an actual dtype.\"\"\"\n        lazy_proxy, actual_dtype, _ = self._get_lazy_proxy()\n\n        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)\n        assert not lazy_proxy._is_materialized\n\n        modin_df2, pandas_df2 = create_test_dfs({\"c\": [2, 2, 3, 4, 5, 6]})\n        eval_general(\n            (modin_df2, lazy_proxy),\n            (pandas_df2, actual_dtype),\n            lambda args: args[0].astype({\"c\": args[1]}),\n        )\n\n    def test_proxy_with_pandas_constructor(self):\n        \"\"\"Verify that users still can use pandas' constructor using `type(cat)(...)` notation.\"\"\"\n        lazy_proxy, _, _ = self._get_lazy_proxy()\n        assert isinstance(lazy_proxy, LazyProxyCategoricalDtype)\n\n        new_cat_values = pandas.Index([3, 4, 5])\n        new_category_dtype = type(lazy_proxy)(categories=new_cat_values, ordered=True)\n        assert not lazy_proxy._is_materialized\n        assert new_category_dtype._is_materialized\n        assert new_category_dtype.categories.equals(new_cat_values)\n        assert new_category_dtype.ordered\n\n\ndef test_infer_objects_single_partition():\n    data = {\"a\": [\"s\", 2, 3]}\n    modin_df = pd.DataFrame(data).iloc[1:]\n    pandas_df = pandas.DataFrame(data).iloc[1:]\n    modin_result = modin_df.infer_objects()\n    pandas_result = pandas_df.infer_objects()\n\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n\n@pytest.mark.parametrize(\n    \"infer_objects\", bool_arg_values, ids=arg_keys(\"infer_objects\", bool_arg_keys)\n)\n@pytest.mark.parametrize(\n    \"convert_string\", bool_arg_values, ids=arg_keys(\"convert_string\", bool_arg_keys)\n)\n@pytest.mark.parametrize(\n    \"convert_integer\", bool_arg_values, ids=arg_keys(\"convert_integer\", bool_arg_keys)\n)\n@pytest.mark.parametrize(\n    \"convert_boolean\", bool_arg_values, ids=arg_keys(\"convert_boolean\", bool_arg_keys)\n)\n@pytest.mark.parametrize(\n    \"convert_floating\", bool_arg_values, ids=arg_keys(\"convert_floating\", bool_arg_keys)\n)\n@pytest.mark.exclude_in_sanity\ndef test_convert_dtypes_single_partition(\n    infer_objects, convert_string, convert_integer, convert_boolean, convert_floating\n):\n    # Sanity check, copied from pandas documentation:\n    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.convert_dtypes.html\n    data = {\n        \"a\": pd.Series([1, 2, 3], dtype=np.dtype(\"int32\")),\n        \"b\": pd.Series([\"x\", \"y\", \"z\"], dtype=np.dtype(\"O\")),\n        \"c\": pd.Series([True, False, np.nan], dtype=np.dtype(\"O\")),\n        \"d\": pd.Series([\"h\", \"i\", np.nan], dtype=np.dtype(\"O\")),\n        \"e\": pd.Series([10, np.nan, 20], dtype=np.dtype(\"float\")),\n        \"f\": pd.Series([np.nan, 100.5, 200], dtype=np.dtype(\"float\")),\n    }\n    kwargs = {\n        \"infer_objects\": infer_objects,\n        \"convert_string\": convert_string,\n        \"convert_integer\": convert_integer,\n        \"convert_boolean\": convert_boolean,\n        \"convert_floating\": convert_floating,\n    }\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    modin_result = modin_df.convert_dtypes(**kwargs)\n    pandas_result = pandas_df.convert_dtypes(**kwargs)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n\n@pytest.mark.parametrize(\"dtype_backend\", [\"numpy_nullable\", \"pyarrow\"])\ndef test_convert_dtypes_dtype_backend(dtype_backend):\n    data = {\n        \"a\": pd.Series([1, 2, 3], dtype=np.dtype(\"int32\")),\n        \"b\": pd.Series([\"x\", \"y\", \"z\"], dtype=np.dtype(\"O\")),\n        \"c\": pd.Series([True, False, np.nan], dtype=np.dtype(\"O\")),\n        \"d\": pd.Series([\"h\", \"i\", np.nan], dtype=np.dtype(\"O\")),\n        \"e\": pd.Series([10, np.nan, 20], dtype=np.dtype(\"float\")),\n        \"f\": pd.Series([np.nan, 100.5, 200], dtype=np.dtype(\"float\")),\n    }\n\n    def comparator(df1, df2):\n        df_equals(df1, df2)\n        df_equals(df1.dtypes, df2.dtypes)\n\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.convert_dtypes(dtype_backend=dtype_backend),\n        comparator=comparator,\n    )\n\n\n@pytest.mark.skipif(\n    current_execution_is_native(),\n    reason=\"NativeQueryCompiler does not contain partitions.\",\n)\ndef test_convert_dtypes_multiple_row_partitions():\n    # Column 0 should have string dtype\n    modin_part1 = pd.DataFrame([\"a\"]).convert_dtypes()\n    # Column 0 should have an int dtype\n    modin_part2 = pd.DataFrame([1]).convert_dtypes()\n    modin_df = pd.concat([modin_part1, modin_part2])\n    if StorageFormat.get() == \"Pandas\":\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)\n    pandas_df = pandas.DataFrame([\"a\", 1], index=[0, 0])\n    # The initial dataframes should be the same\n    df_equals(modin_df, pandas_df)\n    # TODO(https://github.com/modin-project/modin/pull/3805): delete\n    # this assert once df_equals checks dtypes\n    assert modin_df.dtypes.equals(pandas_df.dtypes)\n    modin_result = modin_df.convert_dtypes()\n    pandas_result = pandas_df.convert_dtypes()\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n\ndef test_convert_dtypes_5653():\n    modin_part1 = pd.DataFrame({\"col1\": [\"a\", \"b\", \"c\", \"d\"]})\n    modin_part2 = pd.DataFrame({\"col1\": [None, None, None, None]})\n    modin_df = pd.concat([modin_part1, modin_part2])\n    if StorageFormat.get() == \"Pandas\":\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)\n    modin_df = modin_df.convert_dtypes()\n    assert len(modin_df.dtypes) == 1\n    assert modin_df.dtypes.iloc[0] == \"string\"\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"bound_type\", [\"list\", \"series\"], ids=[\"list\", \"series\"])\n@pytest.mark.exclude_in_sanity\ndef test_clip(request, data, axis, bound_type):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if name_contains(request.node.name, numeric_dfs):\n        ind_len = (\n            len(modin_df.index)\n            if not pandas.DataFrame()._get_axis_number(axis)\n            else len(modin_df.columns)\n        )\n        # set bounds\n        lower, upper = np.sort(random_state.randint(RAND_LOW, RAND_HIGH, 2))\n\n        # test only upper scalar bound\n        modin_result = modin_df.clip(None, upper, axis=axis)\n        pandas_result = pandas_df.clip(None, upper, axis=axis)\n        df_equals(modin_result, pandas_result)\n\n        # test lower and upper scalar bound\n        modin_result = modin_df.clip(lower, upper, axis=axis)\n        pandas_result = pandas_df.clip(lower, upper, axis=axis)\n        df_equals(modin_result, pandas_result)\n\n        lower = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)\n        upper = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)\n\n        if bound_type == \"series\":\n            modin_lower = pd.Series(lower)\n            pandas_lower = pandas.Series(lower)\n            modin_upper = pd.Series(upper)\n            pandas_upper = pandas.Series(upper)\n        else:\n            modin_lower = pandas_lower = lower\n            modin_upper = pandas_upper = upper\n\n        # test lower and upper list bound on each column\n        modin_result = modin_df.clip(modin_lower, modin_upper, axis=axis)\n        pandas_result = pandas_df.clip(pandas_lower, pandas_upper, axis=axis)\n        df_equals(modin_result, pandas_result)\n\n        # test only upper list bound on each column\n        modin_result = modin_df.clip(np.nan, modin_upper, axis=axis)\n        pandas_result = pandas_df.clip(np.nan, pandas_upper, axis=axis)\n        df_equals(modin_result, pandas_result)\n\n        with pytest.raises(ValueError):\n            modin_df.clip(lower=[1, 2, 3], axis=None)\n\n\ndef test_clip_4485():\n    modin_result = pd.DataFrame([1]).clip([3])\n    pandas_result = pandas.DataFrame([1]).clip([3])\n    df_equals(modin_result, pandas_result)\n\n\ndef test_drop():\n    frame_data = {\"A\": [1, 2, 3, 4], \"B\": [0, 1, 2, 3]}\n    simple = pandas.DataFrame(frame_data)\n    modin_simple = pd.DataFrame(frame_data)\n    df_equals(modin_simple.drop(\"A\", axis=1), simple[[\"B\"]])\n    df_equals(modin_simple.drop([\"A\", \"B\"], axis=\"columns\"), simple[[]])\n    df_equals(modin_simple.drop([0, 1, 3], axis=0), simple.loc[[2], :])\n    df_equals(modin_simple.drop([0, 3], axis=\"index\"), simple.loc[[1, 2], :])\n\n    pytest.raises(KeyError, modin_simple.drop, 5)\n    pytest.raises(KeyError, modin_simple.drop, \"C\", axis=1)\n    pytest.raises(KeyError, modin_simple.drop, [1, 5])\n    pytest.raises(KeyError, modin_simple.drop, [\"A\", \"C\"], axis=1)\n\n    # errors = 'ignore'\n    df_equals(modin_simple.drop(5, errors=\"ignore\"), simple)\n    df_equals(modin_simple.drop([0, 5], errors=\"ignore\"), simple.loc[[1, 2, 3], :])\n    df_equals(modin_simple.drop(\"C\", axis=1, errors=\"ignore\"), simple)\n    df_equals(modin_simple.drop([\"A\", \"C\"], axis=1, errors=\"ignore\"), simple[[\"B\"]])\n\n    # non-unique\n    nu_df = pandas.DataFrame(\n        zip(range(3), range(-3, 1), list(\"abc\")), columns=[\"a\", \"a\", \"b\"]\n    )\n    modin_nu_df = pd.DataFrame(nu_df)\n    df_equals(modin_nu_df.drop(\"a\", axis=1), nu_df[[\"b\"]])\n    df_equals(modin_nu_df.drop(\"b\", axis=\"columns\"), nu_df[\"a\"])\n    df_equals(modin_nu_df.drop([]), nu_df)\n\n    nu_df = nu_df.set_index(pandas.Index([\"X\", \"Y\", \"X\"]))\n    nu_df.columns = list(\"abc\")\n    modin_nu_df = pd.DataFrame(nu_df)\n    df_equals(modin_nu_df.drop(\"X\", axis=\"rows\"), nu_df.loc[[\"Y\"], :])\n    df_equals(modin_nu_df.drop([\"X\", \"Y\"], axis=0), nu_df.loc[[], :])\n\n    # inplace cache issue\n    frame_data = random_state.randn(10, 3)\n    df = pandas.DataFrame(frame_data, columns=list(\"abc\"))\n    modin_df = pd.DataFrame(frame_data, columns=list(\"abc\"))\n    expected = df[~(df.b > 0)]\n    modin_df.drop(labels=df[df.b > 0].index, inplace=True)\n    df_equals(modin_df, expected)\n\n    midx = pd.MultiIndex(\n        levels=[[\"lama\", \"cow\", \"falcon\"], [\"speed\", \"weight\", \"length\"]],\n        codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],\n    )\n    df = pd.DataFrame(\n        index=midx,\n        columns=[\"big\", \"small\"],\n        data=[\n            [45, 30],\n            [200, 100],\n            [1.5, 1],\n            [30, 20],\n            [250, 150],\n            [1.5, 0.8],\n            [320, 250],\n            [1, 0.8],\n            [0.3, 0.2],\n        ],\n    )\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(df)\n    ):\n        df.drop(index=\"length\", level=1)\n\n\ndef test_drop_api_equivalence():\n    # equivalence of the labels/axis and index/columns API's\n    frame_data = [[1, 2, 3], [3, 4, 5], [5, 6, 7]]\n\n    modin_df = pd.DataFrame(frame_data, index=[\"a\", \"b\", \"c\"], columns=[\"d\", \"e\", \"f\"])\n\n    modin_df1 = modin_df.drop(\"a\")\n    modin_df2 = modin_df.drop(index=\"a\")\n    df_equals(modin_df1, modin_df2)\n\n    modin_df1 = modin_df.drop(\"d\", axis=1)\n    modin_df2 = modin_df.drop(columns=\"d\")\n    df_equals(modin_df1, modin_df2)\n\n    modin_df1 = modin_df.drop(labels=\"e\", axis=1)\n    modin_df2 = modin_df.drop(columns=\"e\")\n    df_equals(modin_df1, modin_df2)\n\n    modin_df1 = modin_df.drop([\"a\"], axis=0)\n    modin_df2 = modin_df.drop(index=[\"a\"])\n    df_equals(modin_df1, modin_df2)\n\n    modin_df1 = modin_df.drop([\"a\"], axis=0).drop([\"d\"], axis=1)\n    modin_df2 = modin_df.drop(index=[\"a\"], columns=[\"d\"])\n    df_equals(modin_df1, modin_df2)\n\n    with pytest.raises(ValueError):\n        modin_df.drop(labels=\"a\", index=\"b\")\n\n    with pytest.raises(ValueError):\n        modin_df.drop(labels=\"a\", columns=\"b\")\n\n    with pytest.raises(ValueError):\n        modin_df.drop(axis=1)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_drop_transpose(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    modin_result = modin_df.T.drop(columns=[0, 1, 2])\n    pandas_result = pandas_df.T.drop(columns=[0, 1, 2])\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.T.drop(index=[\"col3\", \"col1\"])\n    pandas_result = pandas_df.T.drop(index=[\"col3\", \"col1\"])\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.T.drop(columns=[0, 1, 2], index=[\"col3\", \"col1\"])\n    pandas_result = pandas_df.T.drop(columns=[0, 1, 2], index=[\"col3\", \"col1\"])\n    df_equals(modin_result, pandas_result)\n\n\ndef test_droplevel():\n    df = (\n        pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])\n        .set_index([0, 1])\n        .rename_axis([\"a\", \"b\"])\n    )\n    df.columns = pd.MultiIndex.from_tuples(\n        [(\"c\", \"e\"), (\"d\", \"f\")], names=[\"level_1\", \"level_2\"]\n    )\n    df.droplevel(\"a\")\n    df.droplevel(\"level_2\", axis=1)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys\n)\n@pytest.mark.parametrize(\n    \"keep\", [\"last\", \"first\", False], ids=[\"last\", \"first\", \"False\"]\n)\n@pytest.mark.parametrize(\n    \"subset\",\n    [None, \"col1\", \"name\", (\"col1\", \"col3\"), [\"col1\", \"col3\", \"col7\"]],\n    ids=[\"None\", \"string\", \"name\", \"tuple\", \"list\"],\n)\n@pytest.mark.parametrize(\"ignore_index\", [True, False], ids=[\"True\", \"False\"])\n@pytest.mark.exclude_in_sanity\ndef test_drop_duplicates(data, keep, subset, ignore_index):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    try:\n        pandas_df.drop_duplicates(\n            keep=keep, inplace=False, subset=subset, ignore_index=ignore_index\n        )\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.drop_duplicates(\n                keep=keep, inplace=False, subset=subset, ignore_index=ignore_index\n            )\n    else:\n        sort_if_range_partitioning(\n            pandas_df.drop_duplicates(\n                keep=keep, inplace=False, subset=subset, ignore_index=ignore_index\n            ),\n            modin_df.drop_duplicates(\n                keep=keep, inplace=False, subset=subset, ignore_index=ignore_index\n            ),\n        )\n\n    try:\n        pandas_df.drop_duplicates(\n            keep=keep, inplace=True, subset=subset, ignore_index=ignore_index\n        )\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.drop_duplicates(\n                keep=keep, inplace=True, subset=subset, ignore_index=ignore_index\n            )\n    else:\n        modin_df.drop_duplicates(\n            keep=keep, inplace=True, subset=subset, ignore_index=ignore_index\n        )\n        sort_if_range_partitioning(modin_df, pandas_df)\n\n\ndef test_drop_duplicates_with_missing_index_values():\n    data = {\n        \"columns\": [\"value\", \"time\", \"id\"],\n        \"index\": [\n            4,\n            5,\n            6,\n            7,\n            8,\n            9,\n            10,\n            11,\n            12,\n            13,\n            14,\n            15,\n            20,\n            21,\n            22,\n            23,\n            24,\n            25,\n            26,\n            27,\n            32,\n            33,\n            34,\n            35,\n            36,\n            37,\n            38,\n            39,\n            40,\n            41,\n        ],\n        \"data\": [\n            [\"3\", 1279213398000.0, 88.0],\n            [\"3\", 1279204682000.0, 88.0],\n            [\"0\", 1245772835000.0, 448.0],\n            [\"0\", 1270564258000.0, 32.0],\n            [\"0\", 1267106669000.0, 118.0],\n            [\"7\", 1300621123000.0, 5.0],\n            [\"0\", 1251130752000.0, 957.0],\n            [\"0\", 1311683506000.0, 62.0],\n            [\"9\", 1283692698000.0, 89.0],\n            [\"9\", 1270234253000.0, 64.0],\n            [\"0\", 1285088818000.0, 50.0],\n            [\"0\", 1218212725000.0, 695.0],\n            [\"2\", 1383933968000.0, 348.0],\n            [\"0\", 1368227625000.0, 257.0],\n            [\"1\", 1454514093000.0, 446.0],\n            [\"1\", 1428497427000.0, 134.0],\n            [\"1\", 1459184936000.0, 568.0],\n            [\"1\", 1502293302000.0, 599.0],\n            [\"1\", 1491833358000.0, 829.0],\n            [\"1\", 1485431534000.0, 806.0],\n            [\"8\", 1351800505000.0, 101.0],\n            [\"0\", 1357247721000.0, 916.0],\n            [\"0\", 1335804423000.0, 370.0],\n            [\"24\", 1327547726000.0, 720.0],\n            [\"0\", 1332334140000.0, 415.0],\n            [\"0\", 1309543100000.0, 30.0],\n            [\"18\", 1309541141000.0, 30.0],\n            [\"0\", 1298979435000.0, 48.0],\n            [\"14\", 1276098160000.0, 59.0],\n            [\"0\", 1233936302000.0, 109.0],\n        ],\n    }\n\n    pandas_df = pandas.DataFrame(\n        data[\"data\"], index=data[\"index\"], columns=data[\"columns\"]\n    )\n    modin_df = pd.DataFrame(data[\"data\"], index=data[\"index\"], columns=data[\"columns\"])\n    modin_result = modin_df.sort_values([\"id\", \"time\"]).drop_duplicates([\"id\"])\n    pandas_result = pandas_df.sort_values([\"id\", \"time\"]).drop_duplicates([\"id\"])\n    sort_if_range_partitioning(modin_result, pandas_result)\n\n\ndef test_drop_duplicates_after_sort():\n    data = [\n        {\"value\": 1, \"time\": 2},\n        {\"value\": 1, \"time\": 1},\n        {\"value\": 2, \"time\": 1},\n        {\"value\": 2, \"time\": 2},\n    ]\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    modin_result = modin_df.sort_values([\"value\", \"time\"]).drop_duplicates([\"value\"])\n    pandas_result = pandas_df.sort_values([\"value\", \"time\"]).drop_duplicates([\"value\"])\n    sort_if_range_partitioning(modin_result, pandas_result)\n\n\ndef test_drop_duplicates_with_repeated_index_values():\n    # This tests for issue #4467: https://github.com/modin-project/modin/issues/4467\n    data = [[0], [1], [0]]\n    index = [0, 0, 0]\n    modin_df, pandas_df = create_test_dfs(data, index=index)\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.drop_duplicates(),\n        comparator=sort_if_range_partitioning,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"how\", [\"any\", \"all\"], ids=[\"any\", \"all\"])\ndef test_dropna(data, axis, how):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    with pytest.raises(ValueError):\n        modin_df.dropna(axis=axis, how=\"invalid\")\n\n    with pytest.raises(TypeError):\n        modin_df.dropna(axis=axis, how=None, thresh=None)\n\n    with pytest.raises(KeyError):\n        modin_df.dropna(axis=axis, subset=[\"NotExists\"], how=how)\n\n    modin_result = modin_df.dropna(axis=axis, how=how)\n    pandas_result = pandas_df.dropna(axis=axis, how=how)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dropna_inplace(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    pandas_result = pandas_df.dropna()\n    modin_df.dropna(inplace=True)\n    df_equals(modin_df, pandas_result)\n\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    pandas_df.dropna(thresh=2, inplace=True)\n    modin_df.dropna(thresh=2, inplace=True)\n    df_equals(modin_df, pandas_df)\n\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    pandas_df.dropna(axis=1, how=\"any\", inplace=True)\n    modin_df.dropna(axis=1, how=\"any\", inplace=True)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dropna_multiple_axes(data):\n    modin_df = pd.DataFrame(data)\n\n    with pytest.raises(TypeError):\n        modin_df.dropna(how=\"all\", axis=[0, 1])\n    with pytest.raises(TypeError):\n        modin_df.dropna(how=\"all\", axis=(0, 1))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dropna_subset(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if \"empty_data\" not in request.node.name:\n        column_subset = modin_df.columns[0:2]\n        df_equals(\n            modin_df.dropna(how=\"all\", subset=column_subset),\n            pandas_df.dropna(how=\"all\", subset=column_subset),\n        )\n        df_equals(\n            modin_df.dropna(how=\"any\", subset=column_subset),\n            pandas_df.dropna(how=\"any\", subset=column_subset),\n        )\n\n        row_subset = modin_df.index[0:2]\n        df_equals(\n            modin_df.dropna(how=\"all\", axis=1, subset=row_subset),\n            pandas_df.dropna(how=\"all\", axis=1, subset=row_subset),\n        )\n        df_equals(\n            modin_df.dropna(how=\"any\", axis=1, subset=row_subset),\n            pandas_df.dropna(how=\"any\", axis=1, subset=row_subset),\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis,subset\", [(0, list(\"EF\")), (1, [4, 5])])\ndef test_dropna_subset_error(data, axis, subset):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.dropna(axis=axis, subset=subset),\n        expected_exception=KeyError([\"E\", \"F\"]),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"astype\", [\"category\", \"int32\", \"float\"])\ndef test_insert_dtypes(data, astype, request):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    # categories with NaN works incorrect for now\n    if astype == \"category\" and pandas_df.iloc[:, 0].isnull().any():\n        return\n\n    expected_exception = None\n    if \"int32-float_nan_data\" in request.node.callspec.id:\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7026\")\n    eval_insert(\n        modin_df,\n        pandas_df,\n        col=\"TypeSaver\",\n        value=lambda df: df.iloc[:, 0].astype(astype),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"loc\", [-3, 0, 3])\ndef test_insert_loc(data, loc):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n    expected_exception = None\n    if loc == -3:\n        expected_exception = ValueError(\"unbounded slice\")\n    eval_insert(\n        modin_df,\n        pandas_df,\n        loc=loc,\n        value=lambda df: df.iloc[:, 0],\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_insert(data):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    eval_insert(\n        modin_df, pandas_df, col=\"Duplicate\", value=lambda df: df[df.columns[0]]\n    )\n    eval_insert(modin_df, pandas_df, col=\"Scalar\", value=100)\n    eval_insert(\n        pd.DataFrame(columns=list(\"ab\")),\n        pandas.DataFrame(columns=list(\"ab\")),\n        col=\"Series insert\",\n        value=lambda df: df[df.columns[0]],\n    )\n    eval_insert(\n        modin_df,\n        pandas_df,\n        col=\"DataFrame insert\",\n        value=lambda df: df[[df.columns[0]]],\n    )\n    eval_insert(\n        modin_df,\n        pandas_df,\n        col=\"Different indices\",\n        value=lambda df: df[[df.columns[0]]].set_index(df.index[::-1]),\n    )\n    eval_insert(\n        modin_df,\n        pandas_df,\n        col=\"2d list insert\",\n        value=lambda df: [[1, 2]] * len(df),\n    )\n\n    # Bad inserts\n    eval_insert(\n        modin_df,\n        pandas_df,\n        col=\"Bad Column\",\n        value=lambda df: df,\n        expected_exception=ValueError(\n            f\"Expected a one-dimensional object, got a DataFrame with {len(pandas_df.columns)} columns instead.\"\n        ),\n    )\n    eval_insert(\n        modin_df,\n        pandas_df,\n        col=\"Too Short\",\n        value=lambda df: list(df[df.columns[0]])[:-1],\n        expected_exception=ValueError(\n            f\"Length of values ({len(pandas_df)-1}) does not match length of index ({len(pandas_df)})\"\n        ),\n    )\n    eval_insert(\n        modin_df,\n        pandas_df,\n        col=lambda df: df.columns[0],\n        value=lambda df: df[df.columns[0]],\n        expected_exception=ValueError(\"cannot insert 2d list insert, already exists\"),\n    )\n    eval_insert(\n        modin_df,\n        pandas_df,\n        loc=lambda df: len(df.columns) + 100,\n        col=\"Bad Loc\",\n        value=100,\n        expected_exception=IndexError(\n            f\"index {len(pandas_df.columns) + 100} is out of bounds for axis 0 with size {len(pandas_df.columns)}\"\n        ),\n    )\n\n\ndef test_insert_4407():\n    data = {\"col1\": [1, 2, 3], \"col2\": [2, 3, 4]}\n    modin_df, pandas_df = create_test_dfs(data)\n\n    def comparator(df1, df2):\n        assert_series_equal(df1.dtypes, df2.dtypes, check_index=False)\n        return df_equals(df1, df2)\n\n    for idx, value in enumerate(\n        (pandas_df.to_numpy(), np.array([[1]] * 3), np.array([[1, 2, 3], [4, 5, 6]]))\n    ):\n        expected_exception = None\n        if idx == 0:\n            expected_exception = ValueError(\n                \"Expected a 1D array, got an array with shape (3, 2)\"\n            )\n        elif idx == 2:\n            # FIXME: https://github.com/modin-project/modin/issues/7080\n            expected_exception = False\n        eval_insert(\n            modin_df,\n            pandas_df,\n            loc=0,\n            col=f\"test_col{idx}\",\n            value=value,\n            comparator=lambda df1, df2: comparator(df1, df2),\n            expected_exception=expected_exception,\n        )\n\n\ndef test_insert_modin_array():\n    from modin.numpy import array\n\n    data = {\"col1\": [1, 2, 3], \"col2\": [2, 3, 4]}\n    modin_df1, modin_df2 = pd.DataFrame(data), pd.DataFrame(data)\n    np_value = np.array([7, 7, 7])\n    md_np_value = array(np_value)\n\n    modin_df1.insert(1, \"new_col\", np_value)\n    modin_df2.insert(1, \"new_col\", md_np_value)\n    df_equals(modin_df1, modin_df2)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_ndim(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    assert modin_df.ndim == pandas_df.ndim\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_notna(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.notna(), pandas_df.notna())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_notnull(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.notnull(), pandas_df.notnull())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_round(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.round(), pandas_df.round())\n    df_equals(modin_df.round(1), pandas_df.round(1))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\ndef test_set_axis(data, axis):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    x = pandas.DataFrame()._get_axis_number(axis)\n    index = modin_df.columns if x else modin_df.index\n    labels = [\"{0}_{1}\".format(index[i], i) for i in range(modin_df.shape[x])]\n\n    eval_general(\n        modin_df, pandas_df, lambda df: df.set_axis(labels, axis=axis, copy=True)\n    )\n\n    modin_df_copy = modin_df.copy()\n    modin_df = modin_df.set_axis(labels, axis=axis, copy=False)\n\n    # Check that the copy and original are different\n    try:\n        df_equals(modin_df, modin_df_copy)\n    except AssertionError:\n        assert True\n    else:\n        assert False\n\n    pandas_df = pandas_df.set_axis(labels, axis=axis)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"drop\", bool_arg_values, ids=arg_keys(\"drop\", bool_arg_keys))\n@pytest.mark.parametrize(\n    \"append\", bool_arg_values, ids=arg_keys(\"append\", bool_arg_keys)\n)\ndef test_set_index(request, data, drop, append):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if \"empty_data\" not in request.node.name:\n        key = modin_df.columns[0]\n        modin_result = modin_df.set_index(key, drop=drop, append=append, inplace=False)\n        pandas_result = pandas_df.set_index(\n            key, drop=drop, append=append, inplace=False\n        )\n        df_equals(modin_result, pandas_result)\n\n        modin_df_copy = modin_df.copy()\n        modin_df.set_index(key, drop=drop, append=append, inplace=True)\n\n        # Check that the copy and original are different\n        try:\n            df_equals(modin_df, modin_df_copy)\n        except AssertionError:\n            assert True\n        else:\n            assert False\n\n        pandas_df.set_index(key, drop=drop, append=append, inplace=True)\n        df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_shape(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    assert modin_df.shape == pandas_df.shape\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_size(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    assert modin_df.size == pandas_df.size\n\n\ndef test_squeeze():\n    frame_data = {\n        \"col1\": [0, 1, 2, 3],\n        \"col2\": [4, 5, 6, 7],\n        \"col3\": [8, 9, 10, 11],\n        \"col4\": [12, 13, 14, 15],\n        \"col5\": [0, 0, 0, 0],\n    }\n    frame_data_2 = {\"col1\": [0, 1, 2, 3]}\n    frame_data_3 = {\n        \"col1\": [0],\n        \"col2\": [4],\n        \"col3\": [8],\n        \"col4\": [12],\n        \"col5\": [0],\n    }\n    frame_data_4 = {\"col1\": [2]}\n    frame_data_5 = {\"col1\": [\"string\"]}\n    # Different data for different cases\n    pandas_df = pandas.DataFrame(frame_data).squeeze()\n    modin_df = pd.DataFrame(frame_data).squeeze()\n    df_equals(modin_df, pandas_df)\n\n    pandas_df_2 = pandas.DataFrame(frame_data_2).squeeze()\n    modin_df_2 = pd.DataFrame(frame_data_2).squeeze()\n    df_equals(modin_df_2, pandas_df_2)\n\n    pandas_df_3 = pandas.DataFrame(frame_data_3).squeeze()\n    modin_df_3 = pd.DataFrame(frame_data_3).squeeze()\n    df_equals(modin_df_3, pandas_df_3)\n\n    pandas_df_4 = pandas.DataFrame(frame_data_4).squeeze()\n    modin_df_4 = pd.DataFrame(frame_data_4).squeeze()\n    df_equals(modin_df_4, pandas_df_4)\n\n    pandas_df_5 = pandas.DataFrame(frame_data_5).squeeze()\n    modin_df_5 = pd.DataFrame(frame_data_5).squeeze()\n    df_equals(modin_df_5, pandas_df_5)\n\n    data = [\n        [\n            pd.Timestamp(\"2019-01-02\"),\n            pd.Timestamp(\"2019-01-03\"),\n            pd.Timestamp(\"2019-01-04\"),\n            pd.Timestamp(\"2019-01-05\"),\n        ],\n        [1, 1, 1, 2],\n    ]\n    df = pd.DataFrame(data, index=[\"date\", \"value\"]).T\n    pf = pandas.DataFrame(data, index=[\"date\", \"value\"]).T\n    df.set_index(\"date\", inplace=True)\n    pf.set_index(\"date\", inplace=True)\n    df_equals(df.iloc[0], pf.iloc[0])\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_transpose(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.T, pandas_df.T)\n    df_equals(modin_df.transpose(), pandas_df.transpose())\n\n    # Test for map across full axis for select indices\n    df_equals(modin_df.T.dropna(), pandas_df.T.dropna())\n    # Test for map across full axis\n    df_equals(modin_df.T.nunique(), pandas_df.T.nunique())\n    # Test for map across blocks\n    df_equals(modin_df.T.notna(), pandas_df.T.notna())\n\n\n@pytest.mark.parametrize(\n    \"data, other_data\",\n    [\n        ({\"A\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, 5, 6], \"C\": [7, 8, 9]}),\n        ({\"C\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, 5, 6], \"A\": [7, 8, 9]}),\n        (\n            {\"A\": [\"a\", \"b\", \"c\"], \"B\": [\"x\", \"y\", \"z\"]},\n            {\"B\": [\"d\", \"e\", \"f\", \"g\", \"h\", \"i\"]},\n        ),\n        ({\"A\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, np.nan, 6]}),\n    ],\n)\n@pytest.mark.parametrize(\"errors\", [\"raise\", \"ignore\"])\ndef test_update(data, other_data, errors):\n    modin_df, pandas_df = create_test_dfs(data)\n    other_modin_df, other_pandas_df = create_test_dfs(other_data)\n    expected_exception = None\n    if errors == \"raise\":\n        expected_exception = ValueError(\"Data overlaps.\")\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: (\n            df.update(other_modin_df, errors=errors)\n            if isinstance(df, pd.DataFrame)\n            else df.update(other_pandas_df, errors=errors)\n        ),\n        __inplace__=True,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___neg__(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    try:\n        pandas_result = pandas_df.__neg__()\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.__neg__()\n    else:\n        modin_result = modin_df.__neg__()\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___invert__(data, request):\n    expected_exception = None\n    if \"float_nan_data\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7081\n        expected_exception = False\n    eval_general(\n        *create_test_dfs(data), lambda df: ~df, expected_exception=expected_exception\n    )\n\n\ndef test___invert___bool():\n    data = [False]\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    modin_result = ~modin_df\n    pandas_result = ~pandas_df\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___delitem__(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if \"empty_data\" not in request.node.name:\n        key = pandas_df.columns[0]\n\n        modin_df = modin_df.copy()\n        pandas_df = pandas_df.copy()\n        modin_df.__delitem__(key)\n        pandas_df.__delitem__(key)\n        df_equals(modin_df, pandas_df)\n\n        # Issue 2027\n        last_label = pandas_df.iloc[:, -1].name\n        modin_df.__delitem__(last_label)\n        pandas_df.__delitem__(last_label)\n        df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___nonzero__(data):\n    modin_df = pd.DataFrame(data)\n\n    with pytest.raises(ValueError):\n        # Always raises ValueError\n        modin_df.__nonzero__()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___abs__(request, data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    try:\n        pandas_result = abs(pandas_df)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            abs(modin_df)\n    else:\n        modin_result = abs(modin_df)\n        df_equals(modin_result, pandas_result)\n\n\ndef test___round__():\n    data = test_data_values[0]\n    eval_general(pd.DataFrame(data), pandas.DataFrame(data), lambda df: df.__round__())\n\n\n@pytest.mark.parametrize(\n    \"get_index\",\n    [\n        pytest.param(lambda idx: None, id=\"None_idx\"),\n        pytest.param(lambda idx: [\"a\", \"b\", \"c\"], id=\"No_intersection_idx\"),\n        pytest.param(lambda idx: idx, id=\"Equal_idx\"),\n        pytest.param(lambda idx: idx[::-1], id=\"Reversed_idx\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"get_columns\",\n    [\n        pytest.param(lambda idx: None, id=\"None_idx\"),\n        pytest.param(lambda idx: [\"a\", \"b\", \"c\"], id=\"No_intersection_idx\"),\n        pytest.param(lambda idx: idx, id=\"Equal_idx\"),\n        pytest.param(lambda idx: idx[::-1], id=\"Reversed_idx\"),\n    ],\n)\n@pytest.mark.parametrize(\"dtype\", [None, \"str\"])\n@pytest.mark.exclude_in_sanity\ndef test_constructor_from_modin_series(get_index, get_columns, dtype):\n    modin_df, pandas_df = create_test_dfs(test_data_values[0])\n\n    modin_data = {f\"new_col{i}\": modin_df.iloc[:, i] for i in range(modin_df.shape[1])}\n    pandas_data = {\n        f\"new_col{i}\": pandas_df.iloc[:, i] for i in range(pandas_df.shape[1])\n    }\n\n    index = get_index(modin_df.index)\n    columns = get_columns(list(modin_data.keys()))\n\n    new_modin = pd.DataFrame(modin_data, index=index, columns=columns, dtype=dtype)\n    new_pandas = pandas.DataFrame(\n        pandas_data, index=index, columns=columns, dtype=dtype\n    )\n    df_equals(new_modin, new_pandas)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_constructor(data):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(data)\n    df_equals(pandas_df, modin_df)\n\n    pandas_df = pandas.DataFrame({k: pandas.Series(v) for k, v in data.items()})\n    modin_df = pd.DataFrame({k: pd.Series(v) for k, v in data.items()})\n    df_equals(pandas_df, modin_df)\n\n\ndef test_pyarrow_constructor():\n    pa = pytest.importorskip(\"pyarrow\")\n\n    data = [[Decimal(\"3.19\"), None], [None, Decimal(\"-1.23\")]]\n    df_equals(*create_test_dfs(data, dtype=pd.ArrowDtype(pa.decimal128(3, scale=2))))\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        np.arange(1, 10000, dtype=np.float32),\n        [\n            pd.Series([1, 2, 3], dtype=\"int32\"),\n            pandas.Series([4, 5, 6], dtype=\"int64\"),\n            np.array([7, 8, 9], dtype=np.float32),\n        ],\n        pandas.Categorical([1, 2, 3, 4, 5]),\n    ],\n)\ndef test_constructor_dtypes(data):\n    modin_df, pandas_df = create_test_dfs(data)\n    df_equals(modin_df, pandas_df)\n\n\ndef test_constructor_columns_and_index():\n    modin_df = pd.DataFrame(\n        [[1, 1, 10], [2, 4, 20], [3, 7, 30]],\n        index=[1, 2, 3],\n        columns=[\"id\", \"max_speed\", \"health\"],\n    )\n    pandas_df = pandas.DataFrame(\n        [[1, 1, 10], [2, 4, 20], [3, 7, 30]],\n        index=[1, 2, 3],\n        columns=[\"id\", \"max_speed\", \"health\"],\n    )\n    df_equals(modin_df, pandas_df)\n    df_equals(pd.DataFrame(modin_df), pandas.DataFrame(pandas_df))\n    df_equals(\n        pd.DataFrame(modin_df, columns=[\"max_speed\", \"health\"]),\n        pandas.DataFrame(pandas_df, columns=[\"max_speed\", \"health\"]),\n    )\n    df_equals(\n        pd.DataFrame(modin_df, index=[1, 2]),\n        pandas.DataFrame(pandas_df, index=[1, 2]),\n    )\n    df_equals(\n        pd.DataFrame(modin_df, index=[1, 2], columns=[\"health\"]),\n        pandas.DataFrame(pandas_df, index=[1, 2], columns=[\"health\"]),\n    )\n    df_equals(\n        pd.DataFrame(modin_df.iloc[:, 0], index=[1, 2, 3]),\n        pandas.DataFrame(pandas_df.iloc[:, 0], index=[1, 2, 3]),\n    )\n    df_equals(\n        pd.DataFrame(modin_df.iloc[:, 0], columns=[\"NO_EXIST\"]),\n        pandas.DataFrame(pandas_df.iloc[:, 0], columns=[\"NO_EXIST\"]),\n    )\n    with pytest.raises(NotImplementedError):\n        pd.DataFrame(modin_df, index=[1, 2, 99999])\n    with pytest.raises(NotImplementedError):\n        pd.DataFrame(modin_df, columns=[\"NO_EXIST\"])\n\n\ndef test_constructor_from_index():\n    data = pd.Index([1, 2, 3], name=\"pricing_date\")\n    modin_df, pandas_df = create_test_dfs(data)\n    df_equals(modin_df, pandas_df)\n\n\ndef test_insert_datelike_string_issue_7371():\n    # When a new value is inserted into a frame, we call pandas.api.types.pandas_dtype(value) to\n    # extract the dtype of an object like a pandas Series or numpy array. When a scalar value is passed,\n    # this usually raises a TypeError, so we construct a local pandas Series from the object and\n    # extract the dtype from there.\n    # When the passed value is a date-like string, pandas will instead raise a ValueError because\n    # it tries to parse it as a numpy structured dtype. After fixing GH#7371, we now catch\n    # ValueError in addition to TypeError to handle this case.\n    modin_df = pd.DataFrame({\"a\": [0]})\n    modin_df[\"c\"] = \"2020-01-01\"\n    pandas_df = pandas.DataFrame({\"a\": [0]})\n    pandas_df[\"c\"] = \"2020-01-01\"\n    df_equals(modin_df, pandas_df)\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_pickle.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pickle\n\nimport numpy as np\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import PersistentPickle\nfrom modin.tests.pandas.utils import create_test_dfs, df_equals\n\n\n@pytest.fixture\ndef modin_df_non_empty():\n    return pd.DataFrame({\"col1\": np.arange(1000), \"col2\": np.arange(2000, 3000)})\n\n\n@pytest.fixture\ndef modin_df_empty():\n    return pd.DataFrame()\n\n\n@pytest.fixture\ndef modin_column(modin_df_non_empty):\n    return modin_df_non_empty[\"col1\"]\n\n\n@pytest.fixture(params=[True, False])\ndef persistent(request):\n    old = PersistentPickle.get()\n    PersistentPickle.put(request.param)\n    yield request.param\n    PersistentPickle.put(old)\n\n\n@pytest.mark.parametrize(\"modin_df_name\", [\"modin_df_non_empty\", \"modin_df_empty\"])\ndef test_dataframe_pickle(request, modin_df_name):\n    modin_df = request.getfixturevalue(modin_df_name)\n    other = pickle.loads(pickle.dumps(modin_df))\n    df_equals(modin_df, other)\n\n\ndef test__reduce__():\n    # `DataFrame.__reduce__` will be called implicitly when lambda expressions are\n    # pre-processed for the distributed engine.\n    dataframe_data = [\"Major League Baseball\", \"National Basketball Association\"]\n    abbr_md, abbr_pd = create_test_dfs(dataframe_data, index=[\"MLB\", \"NBA\"])\n\n    dataframe_data = {\n        \"name\": [\"Mariners\", \"Lakers\"] * 500,\n        \"league_abbreviation\": [\"MLB\", \"NBA\"] * 500,\n    }\n    teams_md, teams_pd = create_test_dfs(dataframe_data)\n\n    result_md = (\n        teams_md.set_index(\"name\")\n        .league_abbreviation.apply(lambda abbr: abbr_md[0].loc[abbr])\n        .rename(\"league\")\n    )\n\n    result_pd = (\n        teams_pd.set_index(\"name\")\n        .league_abbreviation.apply(lambda abbr: abbr_pd[0].loc[abbr])\n        .rename(\"league\")\n    )\n    df_equals(result_md, result_pd)\n\n\ndef test_column_pickle(modin_column, modin_df_non_empty, persistent):\n    dmp = pickle.dumps(modin_column)\n    other = pickle.loads(dmp)\n    df_equals(modin_column.to_frame(), other.to_frame())\n\n    # make sure we don't pickle the whole frame if doing persistent storage\n    if persistent:\n        assert len(dmp) < len(pickle.dumps(modin_df_non_empty))\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_reduce.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.tests.pandas.utils import (\n    arg_keys,\n    axis_keys,\n    axis_values,\n    bool_arg_keys,\n    bool_arg_values,\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    df_equals_with_non_stable_indices,\n    eval_general,\n    int_arg_keys,\n    int_arg_values,\n    test_data,\n    test_data_diff_dtype,\n    test_data_keys,\n    test_data_large_categorical_dataframe,\n    test_data_values,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\n@pytest.mark.parametrize(\"method\", [\"all\", \"any\"])\n@pytest.mark.parametrize(\"is_transposed\", [False, True])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"data\", [test_data[\"float_nan_data\"]])\ndef test_all_any(data, axis, skipna, is_transposed, method):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: getattr((df.T if is_transposed else df), method)(\n            axis=axis, skipna=skipna, bool_only=None\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"method\", [\"all\", \"any\"])\n@pytest.mark.parametrize(\n    \"bool_only\", bool_arg_values, ids=arg_keys(\"bool_only\", bool_arg_keys)\n)\ndef test_all_any_specific(bool_only, method):\n    eval_general(\n        *create_test_dfs(test_data_diff_dtype),\n        lambda df: getattr(df, method)(bool_only=bool_only),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\n    \"data\", [test_data[\"float_nan_data\"], test_data_large_categorical_dataframe]\n)\ndef test_count(data, axis):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.count(axis=axis),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"dropna\", [True, False])\ndef test_nunique(data, axis, dropna):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.nunique(axis=axis, dropna=dropna),\n    )\n\n\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\ndef test_count_specific(numeric_only):\n    eval_general(\n        *create_test_dfs(test_data_diff_dtype),\n        lambda df: df.count(numeric_only=numeric_only),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_count_dtypes(data):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.isna().count(axis=0),\n    )\n\n\n@pytest.mark.parametrize(\"percentiles\", [None, 0.10, 0.11, 0.44, 0.78, 0.99])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_describe(data, percentiles):\n    if percentiles is not None:\n        percentiles = [percentiles]\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.describe(percentiles=percentiles),\n    )\n\n\n@pytest.mark.parametrize(\"has_numeric_column\", [False, True])\ndef test_2195(has_numeric_column):\n    data = {\n        \"categorical\": pd.Categorical([\"d\"] * 10**2),\n        \"date\": [np.datetime64(\"2000-01-01\")] * 10**2,\n    }\n\n    if has_numeric_column:\n        data.update({\"numeric\": [5] * 10**2})\n\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.describe(),\n    )\n\n\n# Issue: https://github.com/modin-project/modin/issues/4641\ndef test_describe_column_partition_has_different_index():\n    pandas_df = pandas.DataFrame(test_data[\"int_data\"])\n    # We add a string column to test the case where partitions with mixed data\n    # types have different 'describe' rows, which causes an index mismatch.\n    pandas_df[\"string_column\"] = \"abc\"\n    modin_df = pd.DataFrame(pandas_df)\n    eval_general(modin_df, pandas_df, lambda df: df.describe(include=\"all\"))\n\n\n@pytest.mark.parametrize(\n    \"exclude,include\",\n    [\n        ([np.float64], None),\n        (np.float64, None),\n        (None, [np.timedelta64, np.datetime64, np.object_, np.bool_]),\n        (None, \"all\"),\n        (None, np.number),\n    ],\n)\ndef test_describe_specific(exclude, include):\n    eval_general(\n        *create_test_dfs(test_data_diff_dtype),\n        lambda df: df.drop(\"str_col\", axis=1).describe(\n            exclude=exclude, include=include\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"data\", [test_data[\"int_data\"]])\ndef test_describe_str(data):\n    modin_df = pd.DataFrame(data).applymap(str)\n    pandas_df = pandas.DataFrame(data).applymap(str)\n\n    try:\n        df_equals(modin_df.describe(), pandas_df.describe())\n    except AssertionError:\n        # We have to do this because we choose the highest count slightly differently\n        # than pandas. Because there is no true guarantee which one will be first,\n        # If they don't match, make sure that the `freq` is the same at least.\n        df_equals(\n            modin_df.describe().loc[[\"count\", \"unique\", \"freq\"]],\n            pandas_df.describe().loc[[\"count\", \"unique\", \"freq\"]],\n        )\n\n\ndef test_describe_dtypes():\n    data = {\n        \"col1\": list(\"abc\"),\n        \"col2\": list(\"abc\"),\n        \"col3\": list(\"abc\"),\n        \"col4\": [1, 2, 3],\n    }\n    eval_general(*create_test_dfs(data), lambda df: df.describe())\n\n\n@pytest.mark.parametrize(\"method\", [\"idxmin\", \"idxmax\"])\n@pytest.mark.parametrize(\"is_transposed\", [False, True])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"data\", [test_data[\"float_nan_data\"]])\ndef test_idxmin_idxmax(data, axis, skipna, is_transposed, method):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: getattr((df.T if is_transposed else df), method)(\n            axis=axis, skipna=skipna\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_idxmin_idxmax_string_columns(axis):\n    # https://github.com/modin-project/modin/issues/7093\n    modin_df, pandas_df = create_test_dfs([[\"a\", \"b\"]])\n    eval_general(modin_df, pandas_df, lambda df: df.idxmax(axis=axis))\n    eval_general(modin_df, pandas_df, lambda df: df.idxmin(axis=axis))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_last_valid_index(data):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n    assert modin_df.last_valid_index() == pandas_df.last_valid_index()\n\n\n@pytest.mark.parametrize(\"index\", bool_arg_values, ids=arg_keys(\"index\", bool_arg_keys))\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_memory_usage(data, index):\n    eval_general(*create_test_dfs(data), lambda df: df.memory_usage(index=index))\n\n\n@pytest.mark.parametrize(\"method\", [\"min\", \"max\", \"mean\"])\n@pytest.mark.parametrize(\"is_transposed\", [False, True])\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"data\", [test_data[\"float_nan_data\"]])\ndef test_min_max_mean(data, axis, skipna, numeric_only, is_transposed, method):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: getattr((df.T if is_transposed else df), method)(\n            axis=axis, skipna=skipna, numeric_only=numeric_only\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"method\", [\"prod\", \"product\"])\n@pytest.mark.parametrize(\"is_transposed\", [False, True])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"data\", [test_data[\"float_nan_data\"]])\ndef test_prod(\n    data,\n    axis,\n    skipna,\n    is_transposed,\n    method,\n):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df, *args, **kwargs: getattr(df.T if is_transposed else df, method)(\n            axis=axis,\n            skipna=skipna,\n        ),\n    )\n\n    # test for issue #1953\n    arrays = [[\"1\", \"1\", \"2\", \"2\"], [\"1\", \"2\", \"3\", \"4\"]]\n    modin_df = pd.DataFrame(\n        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays\n    )\n    pandas_df = pandas.DataFrame(\n        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays\n    )\n    modin_result = modin_df.prod()\n    pandas_result = pandas_df.prod()\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"is_transposed\", [False, True])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"data\", [test_data[\"float_nan_data\"]])\ndef test_sum(data, axis, skipna, is_transposed, request):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: (df.T if is_transposed else df).sum(\n            axis=axis,\n            skipna=skipna,\n        ),\n    )\n\n    # test for issue #1953\n    arrays = [[\"1\", \"1\", \"2\", \"2\"], [\"1\", \"2\", \"3\", \"4\"]]\n    modin_df = pd.DataFrame(\n        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays\n    )\n    pandas_df = pandas.DataFrame(\n        [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays\n    )\n    modin_result = modin_df.sum()\n    pandas_result = pandas_df.sum()\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"dtype\", [\"int64\", \"Int64\", \"int64[pyarrow]\"])\ndef test_dtype_consistency(dtype):\n    # test for issue #6781\n    res_dtype = pd.DataFrame([1, 2, 3, 4], dtype=dtype).sum().dtype\n    assert res_dtype == pandas.api.types.pandas_dtype(dtype)\n\n\n@pytest.mark.parametrize(\"fn\", [\"prod\", \"sum\"])\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\n@pytest.mark.parametrize(\n    \"min_count\", int_arg_values, ids=arg_keys(\"min_count\", int_arg_keys)\n)\ndef test_sum_prod_specific(fn, min_count, numeric_only):\n    expected_exception = None\n    if not numeric_only and fn == \"prod\":\n        # FIXME: https://github.com/modin-project/modin/issues/7029\n        expected_exception = False\n    elif not numeric_only and fn == \"sum\":\n        expected_exception = TypeError('can only concatenate str (not \"int\") to str')\n    if numeric_only and fn == \"sum\":\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7029\")\n    if min_count == 5 and not numeric_only:\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7029\")\n\n    eval_general(\n        *create_test_dfs(test_data_diff_dtype),\n        lambda df: getattr(df, fn)(min_count=min_count, numeric_only=numeric_only),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"backend\", [None, \"pyarrow\"])\ndef test_sum_prod_min_count(backend):\n    md_df, pd_df = create_test_dfs(test_data[\"float_nan_data\"], backend=backend)\n    eval_general(md_df, pd_df, lambda df: df.prod(min_count=len(pd_df) + 1))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_sum_single_column(data):\n    modin_df = pd.DataFrame(data).iloc[:, [0]]\n    pandas_df = pandas.DataFrame(data).iloc[:, [0]]\n    df_equals(modin_df.sum(), pandas_df.sum())\n    df_equals(modin_df.sum(axis=1), pandas_df.sum(axis=1))\n\n\ndef test_sum_datetime64():\n    pd_ser = pandas.date_range(start=\"1/1/2018\", end=\"1/08/2018\")\n    modin_df, pandas_df = create_test_dfs({\"A\": pd_ser, \"B\": [1, 2, 3, 4, 5, 6, 7, 8]})\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.sum(),\n        expected_exception=TypeError(\n            \"'DatetimeArray' with dtype datetime64[ns] does not support reduction 'sum'\"\n        ),\n    )\n\n\ndef test_min_datetime64():\n    pd_ser = pandas.date_range(start=\"1/1/2018\", end=\"1/08/2018\")\n    modin_df, pandas_df = create_test_dfs({\"A\": pd_ser, \"B\": [1, 2, 3, 4, 5, 6, 7, 8]})\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.min(),\n    )\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.min(axis=1),\n        # pandas raises: `TypeError: '<=' not supported between instances of 'Timestamp' and 'int'`\n        # while modin raises quite general: `TypeError(\"Cannot compare Numeric and Non-Numeric Types\")`\n        expected_exception=False,\n    )\n\n\n@pytest.mark.parametrize(\n    \"fn\", [\"max\", \"min\", \"median\", \"mean\", \"skew\", \"kurt\", \"sem\", \"std\", \"var\"]\n)\n@pytest.mark.parametrize(\"axis\", [0, 1, None])\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\ndef test_reduce_specific(fn, numeric_only, axis):\n    expected_exception = None\n    if not numeric_only:\n        if fn in (\"max\", \"min\"):\n            if axis == 0:\n                operator = \">=\" if fn == \"max\" else \"<=\"\n                expected_exception = TypeError(\n                    f\"'{operator}' not supported between instances of 'str' and 'float'\"\n                )\n            else:\n                # FIXME: https://github.com/modin-project/modin/issues/7030\n                expected_exception = False\n        elif fn in (\"skew\", \"kurt\", \"sem\", \"std\", \"var\", \"median\", \"mean\"):\n            # FIXME: https://github.com/modin-project/modin/issues/7030\n            expected_exception = False\n\n    eval_general(\n        *create_test_dfs(test_data_diff_dtype),\n        lambda df: getattr(df, fn)(numeric_only=numeric_only, axis=axis),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"subset_len\", [1, 2])\n@pytest.mark.parametrize(\"sort\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"normalize\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"dropna\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"ascending\", [False, True])\ndef test_value_counts(subset_len, sort, normalize, dropna, ascending):\n    def comparator(md_res, pd_res):\n        if subset_len == 1:\n            # 'pandas.DataFrame.value_counts' always returns frames with MultiIndex,\n            # even when 'subset_len == 1' it returns MultiIndex with 'nlevels == 1'.\n            # This behavior is expensive to mimic, so Modin 'value_counts' returns frame\n            # with non-multi index in that case. That's why we flatten indices here.\n            assert md_res.index.nlevels == pd_res.index.nlevels == 1\n            for df in [md_res, pd_res]:\n                df.index = df.index.get_level_values(0)\n\n        if sort:\n            # We sort indices for the result because of:\n            # https://github.com/modin-project/modin/issues/1650\n            df_equals_with_non_stable_indices(md_res, pd_res)\n        else:\n            df_equals(md_res.sort_index(), pd_res.sort_index())\n\n    data = test_data_values[0]\n    md_df, pd_df = create_test_dfs(data)\n    # We're picking columns with different index signs to involve columns from different partitions\n    subset = [pd_df.columns[-i if i % 2 else i] for i in range(subset_len)]\n\n    eval_general(\n        md_df,\n        pd_df,\n        lambda df: df.value_counts(\n            subset=subset,\n            sort=sort,\n            normalize=normalize,\n            dropna=dropna,\n            ascending=ascending,\n        ),\n        comparator=comparator,\n    )\n\n\ndef test_value_counts_categorical():\n    # from issue #3571\n    data = np.array([\"a\"] * 50000 + [\"b\"] * 10000 + [\"c\"] * 1000)\n    random_state = np.random.RandomState(seed=42)\n    random_state.shuffle(data)\n    modin_df, pandas_df = create_test_dfs(\n        {\"col1\": data, \"col2\": data}, dtype=\"category\"\n    )\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.value_counts(),\n        comparator=df_equals,\n    )\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_udf.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\nfrom pandas.core.dtypes.common import is_list_like\n\nimport modin.pandas as pd\nfrom modin.config import MinRowPartitionSize, NPartitions\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    _assert_casting_functions_wrap_same_implementation,\n)\nfrom modin.tests.pandas.utils import (\n    UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS,\n    agg_func_except_keys,\n    agg_func_except_values,\n    agg_func_keys,\n    agg_func_values,\n    arg_keys,\n    bool_arg_keys,\n    bool_arg_values,\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    query_func_keys,\n    query_func_values,\n    random_state,\n    test_data,\n    test_data_keys,\n    test_data_values,\n    udf_func_keys,\n    udf_func_values,\n)\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import get_current_execution\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\n# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances\n# of defaulting to pandas.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\ndef test_agg_dict():\n    md_df, pd_df = create_test_dfs(test_data_values[0])\n    agg_dict = {pd_df.columns[0]: \"sum\", pd_df.columns[-1]: (\"sum\", \"count\")}\n    eval_general(md_df, pd_df, lambda df: df.agg(agg_dict))\n\n    agg_dict = {\n        \"new_col1\": (pd_df.columns[0], \"sum\"),\n        \"new_col2\": (pd_df.columns[-1], \"count\"),\n    }\n    eval_general(md_df, pd_df, lambda df: df.agg(**agg_dict))\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\n    \"func\",\n    agg_func_values + agg_func_except_values,\n    ids=agg_func_keys + agg_func_except_keys,\n)\n@pytest.mark.parametrize(\"op\", [\"agg\", \"apply\"])\ndef test_agg_apply(axis, func, op, request):\n    expected_exception = None\n    if \"sum sum\" in request.node.callspec.id:\n        expected_exception = pandas.errors.SpecificationError(\n            \"Function names must be unique if there is no new column names assigned\"\n        )\n    elif \"should raise AssertionError\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7031\n        expected_exception = False\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: getattr(df, op)(func, axis),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\n    \"func\",\n    agg_func_values + agg_func_except_values,\n    ids=agg_func_keys + agg_func_except_keys,\n)\n@pytest.mark.parametrize(\"op\", [\"agg\", \"apply\"])\ndef test_agg_apply_axis_names(axis, func, op, request):\n    expected_exception = None\n    if \"sum sum\" in request.node.callspec.id:\n        expected_exception = pandas.errors.SpecificationError(\n            \"Function names must be unique if there is no new column names assigned\"\n        )\n    elif \"should raise AssertionError\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7031\n        expected_exception = False\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: getattr(df, op)(func, axis),\n        expected_exception=expected_exception,\n    )\n\n\ndef test_aggregate_alias():\n    _assert_casting_functions_wrap_same_implementation(\n        pd.DataFrame.agg, pd.DataFrame.aggregate\n    )\n\n\ndef test_aggregate_error_checking():\n    modin_df = pd.DataFrame(test_data[\"float_nan_data\"])\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_df.aggregate({modin_df.columns[0]: \"sum\", modin_df.columns[1]: \"mean\"})\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_df.aggregate(\"arcsin\")\n\n\n@pytest.mark.parametrize(\n    \"func\",\n    agg_func_values + agg_func_except_values,\n    ids=agg_func_keys + agg_func_except_keys,\n)\ndef test_apply_key_error(func):\n    if not (is_list_like(func) or callable(func) or isinstance(func, str)):\n        pytest.xfail(\n            reason=\"Because index materialization is expensive Modin first\"\n            + \"checks the validity of the function itself and only then the engine level\"\n            + \"checks the validity of the indices. Pandas order of such checks is reversed,\"\n            + \"so we get different errors when both (function and index) are invalid.\"\n        )\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: df.apply({\"row\": func}, axis=1),\n        expected_exception=KeyError(\"Column(s) ['row'] do not exist\"),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"func\", [\"kurt\", \"count\", \"sum\", \"mean\", \"all\", \"any\"])\ndef test_apply_text_func(data, func, axis):\n    func_kwargs = {\"axis\": axis}\n    rows_number = len(next(iter(data.values())))  # length of the first data column\n    level_0 = np.random.choice([0, 1, 2], rows_number)\n    level_1 = np.random.choice([3, 4, 5], rows_number)\n    index = pd.MultiIndex.from_arrays([level_0, level_1])\n\n    eval_general(\n        *create_test_dfs(data, index=index),\n        lambda df, *args, **kwargs: df.apply(func, *args, **kwargs),\n        **func_kwargs,\n    )\n\n\n@pytest.mark.parametrize(\n    \"column\", [\"A\", [\"A\", \"C\"]], ids=arg_keys(\"column\", [\"A\", [\"A\", \"C\"]])\n)\n@pytest.mark.parametrize(\n    \"ignore_index\", bool_arg_values, ids=arg_keys(\"ignore_index\", bool_arg_keys)\n)\ndef test_explode_single_partition(column, ignore_index):\n    # This test data has two columns where some items are lists that\n    # explode() should expand. In some rows, the columns have list-like\n    # elements that must be expanded, and in others, they have empty lists\n    # or items that aren't list-like at all.\n    data = {\n        \"A\": [[0, 1, 2], \"foo\", [], [3, 4]],\n        \"B\": 1,\n        \"C\": [[\"a\", \"b\", \"c\"], np.nan, [], [\"d\", \"e\"]],\n    }\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.explode(column, ignore_index=ignore_index),\n    )\n\n\n@pytest.mark.parametrize(\n    \"column\", [\"A\", [\"A\", \"C\"]], ids=arg_keys(\"column\", [\"A\", [\"A\", \"C\"]])\n)\n@pytest.mark.parametrize(\n    \"ignore_index\", bool_arg_values, ids=arg_keys(\"ignore_index\", bool_arg_keys)\n)\ndef test_explode_all_partitions(column, ignore_index):\n    # Test explode with enough rows to fill all partitions. explode should\n    # expand every row in the input data into two rows. It's especially\n    # important that the input data has list-like elements that must be\n    # expanded at the boundaries of the partitions, e.g. at row 31.\n    num_rows = NPartitions.get() * MinRowPartitionSize.get()\n    data = {\"A\": [[3, 4]] * num_rows, \"C\": [[\"a\", \"b\"]] * num_rows}\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: df.explode(column, ignore_index=ignore_index),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\"args\", [(1,), (\"_A\",)])\ndef test_apply_args(axis, args):\n    def apply_func(series, y):\n        try:\n            return series + y\n        except TypeError:\n            return series.map(str) + str(y)\n\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: df.apply(apply_func, axis=axis, args=args),\n    )\n\n\ndef test_apply_metadata():\n    def add(a, b, c):\n        return a + b + c\n\n    data = {\"A\": [1, 2, 3], \"B\": [4, 5, 6], \"C\": [7, 8, 9]}\n\n    modin_df = pd.DataFrame(data)\n    modin_df[\"add\"] = modin_df.apply(\n        lambda row: add(row[\"A\"], row[\"B\"], row[\"C\"]), axis=1\n    )\n\n    pandas_df = pandas.DataFrame(data)\n    pandas_df[\"add\"] = pandas_df.apply(\n        lambda row: add(row[\"A\"], row[\"B\"], row[\"C\"]), axis=1\n    )\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"func\", udf_func_values, ids=udf_func_keys)\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_apply_udf(data, func):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df, *args, **kwargs: df.apply(func, *args, **kwargs),\n        other=lambda df: df,\n    )\n\n\ndef test_apply_dict_4828():\n    data = [[2, 4], [1, 3]]\n    modin_df1, pandas_df1 = create_test_dfs(data)\n    eval_general(\n        modin_df1,\n        pandas_df1,\n        lambda df: df.apply({0: (lambda x: x**2)}),\n    )\n    eval_general(\n        modin_df1,\n        pandas_df1,\n        lambda df: df.apply({0: (lambda x: x**2)}, axis=1),\n    )\n\n    # several partitions along axis 0\n    modin_df2, pandas_df2 = create_test_dfs(data, index=[2, 3])\n    modin_df3 = pd.concat([modin_df1, modin_df2], axis=0)\n    pandas_df3 = pandas.concat([pandas_df1, pandas_df2], axis=0)\n    eval_general(\n        modin_df3,\n        pandas_df3,\n        lambda df: df.apply({0: (lambda x: x**2)}),\n    )\n    eval_general(\n        modin_df3,\n        pandas_df3,\n        lambda df: df.apply({0: (lambda x: x**2)}, axis=1),\n    )\n\n    # several partitions along axis 1\n    modin_df4, pandas_df4 = create_test_dfs(data, columns=[2, 3])\n    modin_df5 = pd.concat([modin_df1, modin_df4], axis=1)\n    pandas_df5 = pandas.concat([pandas_df1, pandas_df4], axis=1)\n    eval_general(\n        modin_df5,\n        pandas_df5,\n        lambda df: df.apply({0: (lambda x: x**2)}),\n    )\n    eval_general(\n        modin_df5,\n        pandas_df5,\n        lambda df: df.apply({0: (lambda x: x**2)}, axis=1),\n    )\n\n\ndef test_apply_modin_func_4635():\n    data = [1]\n    modin_df, pandas_df = create_test_dfs(data)\n    df_equals(modin_df.apply(pd.Series.sum), pandas_df.apply(pandas.Series.sum))\n\n    data = {\"a\": [1, 2, 3], \"b\": [1, 2, 3], \"c\": [1, 2, 3]}\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_df = modin_df.set_index([\"a\"])\n    pandas_df = pandas_df.set_index([\"a\"])\n\n    df_equals(\n        modin_df.groupby(\"a\", group_keys=False).apply(pd.DataFrame.sample, n=1),\n        pandas_df.groupby(\"a\", group_keys=False).apply(pandas.DataFrame.sample, n=1),\n    )\n\n\n@pytest.mark.parametrize(\n    \"apply_function\",\n    (\n        lambda df, function: function(df),\n        lambda df, function: df.apply(function, axis=0),\n        lambda df, function: df.apply(function, axis=1),\n    ),\n)\n@pytest.mark.parametrize(\"function\", UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS)\ndef test_apply_unary_numpy_universal_function_issue_7645(function, apply_function):\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: apply_function(df, function),\n    )\n\n\ndef test_eval_df_use_case():\n    frame_data = {\"a\": random_state.randn(10), \"b\": random_state.randn(10)}\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    # test eval for series results\n    tmp_pandas = df.eval(\"arctan2(sin(a), b)\", engine=\"python\", parser=\"pandas\")\n    tmp_modin = modin_df.eval(\"arctan2(sin(a), b)\", engine=\"python\", parser=\"pandas\")\n\n    assert isinstance(tmp_modin, pd.Series)\n    df_equals(tmp_modin, tmp_pandas)\n\n    # Test not inplace assignments\n    tmp_pandas = df.eval(\"e = arctan2(sin(a), b)\", engine=\"python\", parser=\"pandas\")\n    tmp_modin = modin_df.eval(\n        \"e = arctan2(sin(a), b)\", engine=\"python\", parser=\"pandas\"\n    )\n    df_equals(tmp_modin, tmp_pandas)\n\n    # Test inplace assignments\n    df.eval(\"e = arctan2(sin(a), b)\", engine=\"python\", parser=\"pandas\", inplace=True)\n    modin_df.eval(\n        \"e = arctan2(sin(a), b)\", engine=\"python\", parser=\"pandas\", inplace=True\n    )\n    # TODO: Use a series equality validator.\n    df_equals(modin_df, df)\n\n\ndef test_eval_df_arithmetic_subexpression():\n    frame_data = {\"a\": random_state.randn(10), \"b\": random_state.randn(10)}\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    df.eval(\"not_e = sin(a + b)\", engine=\"python\", parser=\"pandas\", inplace=True)\n    modin_df.eval(\"not_e = sin(a + b)\", engine=\"python\", parser=\"pandas\", inplace=True)\n    # TODO: Use a series equality validator.\n    df_equals(modin_df, df)\n\n\ndef test_eval_groupby_transform():\n    # see #5511 for details\n    df = pd.DataFrame({\"num\": range(1, 1001), \"group\": [\"A\"] * 500 + [\"B\"] * 500})\n    assert df.eval(\"num.groupby(group).transform('min')\").unique().tolist() == [1, 501]\n\n\ndef test_eval_scalar():\n    # see #4477 for details\n    df = pd.DataFrame([[2]])\n    assert df.eval(\"1\") == 1\n\n\n@pytest.mark.parametrize(\"engine\", (\"numexpr\", \"python\"))\ndef test_eval_not_inplace_does_not_change_input_dataframe(engine):\n    snow_df, pandas_df = create_test_dfs({\"a\": [1, 2, 3]})\n    original_pandas = pandas_df.copy()\n    snow_result = snow_df.eval(\"b = a + 1\", inplace=False, engine=engine)\n    pandas_result = pandas_df.eval(\"b = a + 1\", inplace=False, engine=engine)\n    df_equals(snow_df, original_pandas)\n    df_equals(pandas_df, original_pandas)\n    df_equals(snow_result, pandas_result)\n\n\nTEST_VAR = 2\n\n\n@pytest.mark.parametrize(\"method\", [\"query\", \"eval\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"local_var\", [2])\n@pytest.mark.parametrize(\"engine\", [\"python\", \"numexpr\"])\ndef test_eval_and_query_with_local_and_global_var(method, data, engine, local_var):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n    op = \"+\" if method == \"eval\" else \"<\"\n    for expr in (f\"col1 {op} @local_var\", f\"col1 {op} @TEST_VAR\"):\n        df_equals(\n            getattr(modin_df, method)(expr, engine=engine),\n            getattr(pandas_df, method)(expr, engine=engine),\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_filter(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    by = {\"items\": [\"col1\", \"col5\"], \"regex\": \"4$|3$\", \"like\": \"col\"}\n    df_equals(modin_df.filter(items=by[\"items\"]), pandas_df.filter(items=by[\"items\"]))\n\n    df_equals(\n        modin_df.filter(regex=by[\"regex\"], axis=0),\n        pandas_df.filter(regex=by[\"regex\"], axis=0),\n    )\n    df_equals(\n        modin_df.filter(regex=by[\"regex\"], axis=1),\n        pandas_df.filter(regex=by[\"regex\"], axis=1),\n    )\n\n    df_equals(modin_df.filter(like=by[\"like\"]), pandas_df.filter(like=by[\"like\"]))\n\n    with pytest.raises(TypeError):\n        modin_df.filter(items=by[\"items\"], regex=by[\"regex\"])\n\n    with pytest.raises(TypeError):\n        modin_df.filter()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_pipe(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    n = len(modin_df.index)\n    a, b, c = 2 % n, 0, 3 % n\n    col = modin_df.columns[3 % len(modin_df.columns)]\n\n    def h(x):\n        return x.drop(columns=[col])\n\n    def g(x, arg1=0):\n        for _ in range(arg1):\n            x = (pd if isinstance(x, pd.DataFrame) else pandas).concat((x, x))\n        return x\n\n    def f(x, arg2=0, arg3=0):\n        return x.drop([arg2, arg3])\n\n    df_equals(\n        f(g(h(modin_df), arg1=a), arg2=b, arg3=c),\n        (modin_df.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),\n    )\n    df_equals(\n        (modin_df.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),\n        (pandas_df.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"funcs\", query_func_values, ids=query_func_keys)\n@pytest.mark.parametrize(\"engine\", [\"python\", \"numexpr\"])\ndef test_query(data, funcs, engine):\n    if get_current_execution() == \"BaseOnPython\" and funcs != \"col3 > col4\":\n        pytest.xfail(\n            reason=\"In this case, we are faced with the problem of handling empty data frames - #4934\"\n        )\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    try:\n        pandas_result = pandas_df.query(funcs, engine=engine)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_df.query(funcs, engine=engine)\n    else:\n        modin_result = modin_df.query(funcs, engine=engine)\n        # `dtypes` must be evaluated after `query` so we need to check cache\n        assert modin_result._query_compiler.frame_has_dtypes_cache\n        df_equals(modin_result, pandas_result)\n        df_equals(modin_result.dtypes, pandas_result.dtypes)\n\n\ndef test_query_named_index():\n    eval_general(\n        *(df.set_index(\"col1\") for df in create_test_dfs(test_data[\"int_data\"])),\n        lambda df: df.query(\"col1 % 2 == 0 | col3 % 2 == 1\"),\n    )\n\n\ndef test_query_named_multiindex():\n    eval_general(\n        *(\n            df.set_index([\"col1\", \"col3\"])\n            for df in create_test_dfs(test_data[\"int_data\"])\n        ),\n        lambda df: df.query(\"col1 % 2 == 1 | col3 % 2 == 1\"),\n    )\n\n\ndef test_query_multiindex_without_names():\n    def make_df(without_index):\n        new_df = without_index.set_index([\"col1\", \"col3\"])\n        new_df.index.names = [None, None]\n        return new_df\n\n    eval_general(\n        *(make_df(df) for df in create_test_dfs(test_data[\"int_data\"])),\n        lambda df: df.query(\"ilevel_0 % 2 == 0 | ilevel_1 % 2 == 1 | col4 % 2 == 1\"),\n    )\n\n\ndef test_empty_query():\n    modin_df = pd.DataFrame([1, 2, 3, 4, 5])\n\n    with pytest.raises(ValueError):\n        modin_df.query(\"\")\n\n\n@pytest.mark.parametrize(\"engine\", [\"python\", \"numexpr\"])\ndef test_query_after_insert(engine):\n    modin_df = pd.DataFrame({\"x\": [-1, 0, 1, None], \"y\": [1, 2, None, 3]})\n    modin_df[\"z\"] = modin_df.eval(\"x / y\")\n    modin_df = modin_df.query(\"z >= 0\", engine=engine)\n    modin_result = modin_df.reset_index(drop=True)\n    modin_result.columns = [\"a\", \"b\", \"c\"]\n\n    pandas_df = pd.DataFrame({\"x\": [-1, 0, 1, None], \"y\": [1, 2, None, 3]})\n    pandas_df[\"z\"] = pandas_df.eval(\"x / y\")\n    pandas_df = pandas_df.query(\"z >= 0\", engine=engine)\n    pandas_result = pandas_df.reset_index(drop=True)\n    pandas_result.columns = [\"a\", \"b\", \"c\"]\n\n    df_equals(modin_result, pandas_result)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.parametrize(\"engine\", [\"python\", \"numexpr\"])\ndef test_query_with_element_access_issue_4580(engine):\n    pdf = pandas.DataFrame({\"a\": [0, 1, 2]})\n    # get two row partitions by concatenating\n    df = pd.concat([pd.DataFrame(pdf[:1]), pd.DataFrame(pdf[1:])])\n    eval_general(df, pdf, lambda df: df.query(\"a == a[0]\", engine=engine))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"func\", [lambda x: x + 1, [np.sqrt, np.exp]], ids=[\"lambda\", \"list_udfs\"]\n)\ndef test_transform(data, func, request):\n    if \"list_udfs\" in request.node.callspec.id:\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/6998\")\n    eval_general(*create_test_dfs(data), lambda df: df.transform(func))\n"
  },
  {
    "path": "modin/tests/pandas/dataframe/test_window.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.tests.pandas.utils import (\n    arg_keys,\n    axis_keys,\n    axis_values,\n    bool_arg_keys,\n    bool_arg_values,\n    create_test_dfs,\n    df_equals,\n    eval_general,\n    int_arg_keys,\n    int_arg_values,\n    is_native_shallow_copy,\n    name_contains,\n    no_numeric_dfs,\n    quantiles_keys,\n    quantiles_values,\n    random_state,\n    test_data,\n    test_data_keys,\n    test_data_values,\n    test_data_with_duplicates_keys,\n    test_data_with_duplicates_values,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"method\", [\"cumprod\", \"cummin\", \"cummax\", \"cumsum\"])\ndef test_cumprod_cummin_cummax_cumsum(axis, skipna, method):\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: getattr(df, method)(axis=axis, skipna=skipna),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\"method\", [\"cumprod\", \"cummin\", \"cummax\", \"cumsum\"])\ndef test_cumprod_cummin_cummax_cumsum_transposed(axis, method):\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: getattr(df.T, method)(axis=axis),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"method\", [\"cummin\", \"cummax\"])\ndef test_cummin_cummax_int_and_float(axis, method):\n    data = {\"col1\": list(range(1000)), \"col2\": [i * 0.1 for i in range(1000)]}\n    eval_general(*create_test_dfs(data), lambda df: getattr(df, method)(axis=axis))\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\n    \"periods\", int_arg_values, ids=arg_keys(\"periods\", int_arg_keys)\n)\ndef test_diff(axis, periods):\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: df.diff(axis=axis, periods=periods),\n    )\n\n\ndef test_diff_with_datetime_types():\n    pandas_df = pandas.DataFrame(\n        [[1, 2.0, 3], [4, 5.0, 6], [7, np.nan, 9], [10, 11.3, 12], [13, 14.5, 15]]\n    )\n    data = pandas.date_range(\"2018-01-01\", periods=5, freq=\"h\").values\n    pandas_df = pandas.concat([pandas_df, pandas.Series(data)], axis=1)\n    modin_df = pd.DataFrame(pandas_df)\n\n    # Test `diff` with datetime type.\n    pandas_result = pandas_df.diff()\n    modin_result = modin_df.diff()\n    df_equals(modin_result, pandas_result)\n\n    # Test `diff` with timedelta type.\n    td_pandas_result = pandas_result.diff()\n    td_modin_result = modin_result.diff()\n    df_equals(td_modin_result, td_pandas_result)\n\n\ndef test_diff_error_handling():\n    df = pd.DataFrame([[\"a\", \"b\", \"c\"]], columns=[\"col 0\", \"col 1\", \"col 2\"])\n    with pytest.raises(\n        ValueError, match=\"periods must be an int. got <class 'str'> instead\"\n    ):\n        df.diff(axis=0, periods=\"1\")\n\n    with pytest.raises(TypeError, match=\"unsupported operand type for -: got object\"):\n        df.diff()\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\ndef test_diff_transposed(axis):\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: df.T.diff(axis=axis),\n    )\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys\n)\n@pytest.mark.parametrize(\n    \"keep\", [\"last\", \"first\", False], ids=[\"last\", \"first\", \"False\"]\n)\ndef test_duplicated(data, keep):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    pandas_result = pandas_df.duplicated(keep=keep)\n    modin_result = modin_df.duplicated(keep=keep)\n    df_equals(modin_result, pandas_result)\n\n    import random\n\n    subset = random.sample(\n        list(pandas_df.columns), random.randint(1, len(pandas_df.columns))\n    )\n    pandas_result = pandas_df.duplicated(keep=keep, subset=subset)\n    modin_result = modin_df.duplicated(keep=keep, subset=subset)\n\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_ffill(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    df_equals(modin_df.ffill(), pandas_df.ffill())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"method\",\n    [\"backfill\", \"bfill\", \"pad\", \"ffill\", None],\n    ids=[\"backfill\", \"bfill\", \"pad\", \"ffill\", \"None\"],\n)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"limit\", int_arg_values, ids=int_arg_keys)\ndef test_fillna(data, method, axis, limit):\n    # We are not testing when axis is over rows until pandas-17399 gets fixed.\n    if axis != 1 and axis != \"columns\":\n        modin_df = pd.DataFrame(data)\n        pandas_df = pandas.DataFrame(data)\n\n        try:\n            pandas_result = pandas_df.fillna(0, method=method, axis=axis, limit=limit)\n        except Exception as err:\n            with pytest.raises(type(err)):\n                modin_df.fillna(0, method=method, axis=axis, limit=limit)\n        else:\n            modin_result = modin_df.fillna(0, method=method, axis=axis, limit=limit)\n            df_equals(modin_result, pandas_result)\n\n\ndef test_fillna_sanity():\n    # with different dtype\n    frame_data = [\n        [\"a\", \"a\", np.nan, \"a\"],\n        [\"b\", \"b\", np.nan, \"b\"],\n        [\"c\", \"c\", np.nan, \"c\"],\n    ]\n    df = pandas.DataFrame(frame_data)\n\n    result = df.fillna({2: \"foo\"})\n    modin_df = pd.DataFrame(frame_data).fillna({2: \"foo\"})\n\n    df_equals(modin_df, result)\n\n    modin_df = pd.DataFrame(df)\n    df.fillna({2: \"foo\"}, inplace=True)\n    modin_df.fillna({2: \"foo\"}, inplace=True)\n    df_equals(modin_df, result)\n\n    frame_data = {\n        \"Date\": [pandas.NaT, pandas.Timestamp(\"2014-1-1\")],\n        \"Date2\": [pandas.Timestamp(\"2013-1-1\"), pandas.NaT],\n    }\n    df = pandas.DataFrame(frame_data)\n    result = df.fillna(value={\"Date\": df[\"Date2\"]})\n    modin_df = pd.DataFrame(frame_data).fillna(value={\"Date\": df[\"Date2\"]})\n    df_equals(modin_df, result)\n\n    frame_data = {\"A\": [pandas.Timestamp(\"2012-11-11 00:00:00+01:00\"), pandas.NaT]}\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    df_equals(modin_df.fillna(method=\"pad\"), df.fillna(method=\"pad\"))\n\n    frame_data = {\"A\": [pandas.NaT, pandas.Timestamp(\"2012-11-11 00:00:00+01:00\")]}\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data).fillna(method=\"bfill\")\n    df_equals(modin_df, df.fillna(method=\"bfill\"))\n\n\ndef test_fillna_downcast():\n    # infer int64 from float64\n    frame_data = {\"a\": [1.0, np.nan]}\n    df = pandas.DataFrame(frame_data)\n    result = df.fillna(0, downcast=\"infer\")\n    modin_df = pd.DataFrame(frame_data).fillna(0, downcast=\"infer\")\n    df_equals(modin_df, result)\n\n    # infer int64 from float64 when fillna value is a dict\n    df = pandas.DataFrame(frame_data)\n    result = df.fillna({\"a\": 0}, downcast=\"infer\")\n    modin_df = pd.DataFrame(frame_data).fillna({\"a\": 0}, downcast=\"infer\")\n    df_equals(modin_df, result)\n\n\ndef test_fillna_4660():\n    eval_general(\n        *create_test_dfs({\"a\": [\"a\"], \"b\": [\"b\"], \"c\": [pd.NA]}, index=[\"row1\"]),\n        lambda df: df[\"c\"].fillna(df[\"b\"]),\n    )\n\n\n@pytest.mark.xfail(\n    condition=is_native_shallow_copy(),\n    reason=\"native pandas backend does not deep copy inputs by default\",\n    strict=True,\n)\ndef test_fillna_inplace():\n    frame_data = random_state.randn(10, 4)\n    df = pandas.DataFrame(frame_data)\n    df[1][:4] = np.nan\n    df[3][-4:] = np.nan\n\n    modin_df = pd.DataFrame(df)\n    df.fillna(value=0, inplace=True)\n    try:\n        df_equals(modin_df, df)\n    except AssertionError:\n        pass\n    else:\n        assert False\n\n    modin_df.fillna(value=0, inplace=True)\n    df_equals(modin_df, df)\n\n    modin_df = pd.DataFrame(df).fillna(value={0: 0}, inplace=True)\n    assert modin_df is None\n\n    df[1][:4] = np.nan\n    df[3][-4:] = np.nan\n    modin_df = pd.DataFrame(df)\n    df.fillna(method=\"ffill\", inplace=True)\n    try:\n        df_equals(modin_df, df)\n    except AssertionError:\n        pass\n    else:\n        assert False\n\n    modin_df.fillna(method=\"ffill\", inplace=True)\n    df_equals(modin_df, df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"limit\", [1, 2, 0.5, -1, -2, 1.5])\ndef test_frame_fillna_limit(data, limit):\n    pandas_df = pandas.DataFrame(data)\n\n    replace_pandas_series = pandas_df.columns.to_series().sample(frac=1)\n    replace_dict = replace_pandas_series.to_dict()\n    replace_pandas_df = pandas.DataFrame(\n        {col: pandas_df.index.to_series() for col in pandas_df.columns},\n        index=pandas_df.index,\n    ).sample(frac=1)\n    replace_modin_series = pd.Series(replace_pandas_series)\n    replace_modin_df = pd.DataFrame(replace_pandas_df)\n\n    index = pandas_df.index\n    result = pandas_df[:2].reindex(index)\n    modin_df = pd.DataFrame(result)\n\n    if isinstance(limit, float):\n        limit = int(len(modin_df) * limit)\n    if limit is not None and limit < 0:\n        limit = len(modin_df) + limit\n\n    df_equals(\n        modin_df.fillna(method=\"pad\", limit=limit),\n        result.fillna(method=\"pad\", limit=limit),\n    )\n    df_equals(\n        modin_df.fillna(replace_dict, limit=limit),\n        result.fillna(replace_dict, limit=limit),\n    )\n    df_equals(\n        modin_df.fillna(replace_modin_series, limit=limit),\n        result.fillna(replace_pandas_series, limit=limit),\n    )\n    df_equals(\n        modin_df.fillna(replace_modin_df, limit=limit),\n        result.fillna(replace_pandas_df, limit=limit),\n    )\n\n    result = pandas_df[-2:].reindex(index)\n    modin_df = pd.DataFrame(result)\n    df_equals(\n        modin_df.fillna(method=\"backfill\", limit=limit),\n        result.fillna(method=\"backfill\", limit=limit),\n    )\n    df_equals(\n        modin_df.fillna(replace_dict, limit=limit),\n        result.fillna(replace_dict, limit=limit),\n    )\n    df_equals(\n        modin_df.fillna(replace_modin_series, limit=limit),\n        result.fillna(replace_pandas_series, limit=limit),\n    )\n    df_equals(\n        modin_df.fillna(replace_modin_df, limit=limit),\n        result.fillna(replace_pandas_df, limit=limit),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_frame_pad_backfill_limit(data):\n    pandas_df = pandas.DataFrame(data)\n\n    index = pandas_df.index\n\n    result = pandas_df[:2].reindex(index)\n    modin_df = pd.DataFrame(result)\n    df_equals(\n        modin_df.fillna(method=\"pad\", limit=2), result.fillna(method=\"pad\", limit=2)\n    )\n\n    result = pandas_df[-2:].reindex(index)\n    modin_df = pd.DataFrame(result)\n    df_equals(\n        modin_df.fillna(method=\"backfill\", limit=2),\n        result.fillna(method=\"backfill\", limit=2),\n    )\n\n\ndef test_fillna_dtype_conversion():\n    # make sure that fillna on an empty frame works\n    df = pandas.DataFrame(index=range(3), columns=[\"A\", \"B\"], dtype=\"float64\")\n    modin_df = pd.DataFrame(index=range(3), columns=[\"A\", \"B\"], dtype=\"float64\")\n    df_equals(modin_df.fillna(\"nan\"), df.fillna(\"nan\"))\n\n    frame_data = {\"A\": [1, np.nan], \"B\": [1.0, 2.0]}\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    for v in [\"\", 1, np.nan, 1.0]:\n        df_equals(modin_df.fillna(v), df.fillna(v))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_fillna_skip_certain_blocks(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    # don't try to fill boolean, int blocks\n    df_equals(modin_df.fillna(np.nan), pandas_df.fillna(np.nan))\n\n\ndef test_fillna_dict_series():\n    frame_data = {\n        \"a\": [np.nan, 1, 2, np.nan, np.nan],\n        \"b\": [1, 2, 3, np.nan, np.nan],\n        \"c\": [np.nan, 1, 2, 3, 4],\n    }\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    df_equals(modin_df.fillna({\"a\": 0, \"b\": 5}), df.fillna({\"a\": 0, \"b\": 5}))\n\n    df_equals(\n        modin_df.fillna({\"a\": 0, \"b\": 5, \"d\": 7}),\n        df.fillna({\"a\": 0, \"b\": 5, \"d\": 7}),\n    )\n\n    # Series treated same as dict\n    df_equals(modin_df.fillna(modin_df.max()), df.fillna(df.max()))\n\n\ndef test_fillna_dataframe():\n    frame_data = {\n        \"a\": [np.nan, 1, 2, np.nan, np.nan],\n        \"b\": [1, 2, 3, np.nan, np.nan],\n        \"c\": [np.nan, 1, 2, 3, 4],\n    }\n    df = pandas.DataFrame(frame_data, index=list(\"VWXYZ\"))\n    modin_df = pd.DataFrame(frame_data, index=list(\"VWXYZ\"))\n\n    # df2 may have different index and columns\n    df2 = pandas.DataFrame(\n        {\"a\": [np.nan, 10, 20, 30, 40], \"b\": [50, 60, 70, 80, 90], \"foo\": [\"bar\"] * 5},\n        index=list(\"VWXuZ\"),\n    )\n    modin_df2 = pd.DataFrame(df2)\n\n    # only those columns and indices which are shared get filled\n    df_equals(modin_df.fillna(modin_df2), df.fillna(df2))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_fillna_columns(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(\n        modin_df.fillna(method=\"ffill\", axis=1),\n        pandas_df.fillna(method=\"ffill\", axis=1),\n    )\n\n    df_equals(\n        modin_df.fillna(method=\"ffill\", axis=1),\n        pandas_df.fillna(method=\"ffill\", axis=1),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_fillna_invalid_method(data):\n    modin_df = pd.DataFrame(data)\n\n    with pytest.raises(ValueError):\n        modin_df.fillna(method=\"ffil\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_fillna_invalid_value(data):\n    modin_df = pd.DataFrame(data)\n    # list\n    pytest.raises(TypeError, modin_df.fillna, [1, 2])\n    # tuple\n    pytest.raises(TypeError, modin_df.fillna, (1, 2))\n    # frame with series\n    pytest.raises(TypeError, modin_df.iloc[:, 0].fillna, modin_df)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_fillna_col_reordering(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    df_equals(modin_df.fillna(method=\"ffill\"), pandas_df.fillna(method=\"ffill\"))\n\n\ndef test_fillna_datetime_columns():\n    frame_data = {\n        \"A\": [-1, -2, np.nan],\n        \"B\": pd.date_range(\"20130101\", periods=3),\n        \"C\": [\"foo\", \"bar\", None],\n        \"D\": [\"foo2\", \"bar2\", None],\n    }\n    df = pandas.DataFrame(frame_data, index=pd.date_range(\"20130110\", periods=3))\n    modin_df = pd.DataFrame(frame_data, index=pd.date_range(\"20130110\", periods=3))\n    df_equals(modin_df.fillna(\"?\"), df.fillna(\"?\"))\n\n    frame_data = {\n        \"A\": [-1, -2, np.nan],\n        \"B\": [\n            pandas.Timestamp(\"2013-01-01\"),\n            pandas.Timestamp(\"2013-01-02\"),\n            pandas.NaT,\n        ],\n        \"C\": [\"foo\", \"bar\", None],\n        \"D\": [\"foo2\", \"bar2\", None],\n    }\n    df = pandas.DataFrame(frame_data, index=pd.date_range(\"20130110\", periods=3))\n    modin_df = pd.DataFrame(frame_data, index=pd.date_range(\"20130110\", periods=3))\n    df_equals(modin_df.fillna(\"?\"), df.fillna(\"?\"))\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"method\", [\"median\", \"skew\"])\ndef test_median_skew(axis, skipna, method):\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: getattr(df, method)(axis=axis, skipna=skipna),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\"method\", [\"median\", \"skew\"])\ndef test_median_skew_transposed(axis, method):\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: getattr(df.T, method)(axis=axis),\n    )\n\n\n@pytest.mark.parametrize(\"method\", [\"median\", \"skew\", \"std\", \"var\", \"sem\"])\ndef test_median_skew_std_var_sem_1953(method):\n    # See #1953 for details\n    arrays = [[\"1\", \"1\", \"2\", \"2\"], [\"1\", \"2\", \"3\", \"4\"]]\n    data = [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]\n    modin_df = pd.DataFrame(data, index=arrays)\n    pandas_df = pandas.DataFrame(data, index=arrays)\n\n    eval_general(modin_df, pandas_df, lambda df: getattr(df, method)())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\ndef test_mode(data, axis, numeric_only):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    try:\n        pandas_result = pandas_df.mode(axis=axis, numeric_only=numeric_only)\n    except Exception:\n        with pytest.raises(TypeError):\n            modin_df.mode(axis=axis, numeric_only=numeric_only)\n    else:\n        modin_result = modin_df.mode(axis=axis, numeric_only=numeric_only)\n        df_equals(modin_result, pandas_result)\n\n\ndef test_nlargest():\n    data = {\n        \"population\": [\n            59000000,\n            65000000,\n            434000,\n            434000,\n            434000,\n            337000,\n            11300,\n            11300,\n            11300,\n        ],\n        \"GDP\": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311],\n        \"alpha-2\": [\"IT\", \"FR\", \"MT\", \"MV\", \"BN\", \"IS\", \"NR\", \"TV\", \"AI\"],\n    }\n    index = [\n        \"Italy\",\n        \"France\",\n        \"Malta\",\n        \"Maldives\",\n        \"Brunei\",\n        \"Iceland\",\n        \"Nauru\",\n        \"Tuvalu\",\n        \"Anguilla\",\n    ]\n    modin_df = pd.DataFrame(data=data, index=index)\n    pandas_df = pandas.DataFrame(data=data, index=index)\n    df_equals(modin_df.nlargest(3, \"population\"), pandas_df.nlargest(3, \"population\"))\n\n\ndef test_nsmallest():\n    data = {\n        \"population\": [\n            59000000,\n            65000000,\n            434000,\n            434000,\n            434000,\n            337000,\n            11300,\n            11300,\n            11300,\n        ],\n        \"GDP\": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311],\n        \"alpha-2\": [\"IT\", \"FR\", \"MT\", \"MV\", \"BN\", \"IS\", \"NR\", \"TV\", \"AI\"],\n    }\n    index = [\n        \"Italy\",\n        \"France\",\n        \"Malta\",\n        \"Maldives\",\n        \"Brunei\",\n        \"Iceland\",\n        \"Nauru\",\n        \"Tuvalu\",\n        \"Anguilla\",\n    ]\n    modin_df = pd.DataFrame(data=data, index=index)\n    pandas_df = pandas.DataFrame(data=data, index=index)\n    df_equals(\n        modin_df.nsmallest(n=3, columns=\"population\"),\n        pandas_df.nsmallest(n=3, columns=\"population\"),\n    )\n    df_equals(\n        modin_df.nsmallest(n=2, columns=[\"population\", \"GDP\"], keep=\"all\"),\n        pandas_df.nsmallest(n=2, columns=[\"population\", \"GDP\"], keep=\"all\"),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\n    \"dropna\", bool_arg_values, ids=arg_keys(\"dropna\", bool_arg_keys)\n)\ndef test_nunique(data, axis, dropna):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    modin_result = modin_df.nunique(axis=axis, dropna=dropna)\n    pandas_result = pandas_df.nunique(axis=axis, dropna=dropna)\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_df.T.nunique(axis=axis, dropna=dropna)\n    pandas_result = pandas_df.T.nunique(axis=axis, dropna=dropna)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"q\", quantiles_values, ids=quantiles_keys)\ndef test_quantile(request, data, q):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    if not name_contains(request.node.name, no_numeric_dfs):\n        df_equals(modin_df.quantile(q), pandas_df.quantile(q))\n        df_equals(modin_df.quantile(q, axis=1), pandas_df.quantile(q, axis=1))\n\n        try:\n            pandas_result = pandas_df.quantile(q, axis=1, numeric_only=False)\n        except Exception as err:\n            with pytest.raises(type(err)):\n                modin_df.quantile(q, axis=1, numeric_only=False)\n        else:\n            modin_result = modin_df.quantile(q, axis=1, numeric_only=False)\n            df_equals(modin_result, pandas_result)\n    else:\n        with pytest.raises(ValueError):\n            modin_df.quantile(q)\n\n    if not name_contains(request.node.name, no_numeric_dfs):\n        df_equals(modin_df.T.quantile(q), pandas_df.T.quantile(q))\n        df_equals(modin_df.T.quantile(q, axis=1), pandas_df.T.quantile(q, axis=1))\n\n        try:\n            pandas_result = pandas_df.T.quantile(q, axis=1, numeric_only=False)\n        except Exception as err:\n            with pytest.raises(type(err)):\n                modin_df.T.quantile(q, axis=1, numeric_only=False)\n        else:\n            modin_result = modin_df.T.quantile(q, axis=1, numeric_only=False)\n            df_equals(modin_result, pandas_result)\n    else:\n        with pytest.raises(ValueError):\n            modin_df.T.quantile(q)\n\n\ndef test_quantile_7157():\n    # for details: https://github.com/modin-project/modin/issues/7157\n    n_rows = 100\n    n_fcols = 10\n    n_mcols = 5\n\n    df1_md, df1_pd = create_test_dfs(\n        random_state.rand(n_rows, n_fcols),\n        columns=[f\"feat_{i}\" for i in range(n_fcols)],\n    )\n    df2_md, df2_pd = create_test_dfs(\n        {\n            \"test_string1\": [\"test_string2\" for _ in range(n_rows)]\n            for _ in range(n_mcols)\n        }\n    )\n    df3_md = pd.concat([df2_md, df1_md], axis=1)\n    df3_pd = pandas.concat([df2_pd, df1_pd], axis=1)\n\n    eval_general(df3_md, df3_pd, lambda df: df.quantile(0.25, numeric_only=True))\n    eval_general(df3_md, df3_pd, lambda df: df.quantile((0.25,), numeric_only=True))\n    eval_general(\n        df3_md, df3_pd, lambda df: df.quantile((0.25, 0.75), numeric_only=True)\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\n    \"na_option\", [\"keep\", \"top\", \"bottom\"], ids=[\"keep\", \"top\", \"bottom\"]\n)\ndef test_rank_transposed(axis, na_option):\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: df.rank(axis=axis, na_option=na_option),\n    )\n\n\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"ddof\", int_arg_values, ids=arg_keys(\"ddof\", int_arg_keys))\ndef test_sem_float_nan_only(skipna, ddof):\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: df.sem(skipna=skipna, ddof=ddof),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\"ddof\", int_arg_values, ids=arg_keys(\"ddof\", int_arg_keys))\ndef test_sem_int_only(axis, ddof):\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: df.sem(axis=axis, ddof=ddof),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"method\", [\"std\", \"var\"])\ndef test_std_var(axis, skipna, method):\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: getattr(df, method)(axis=axis, skipna=skipna),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1, None])\ndef test_rank(axis):\n    expected_exception = None\n    if axis is None:\n        expected_exception = ValueError(\"No axis named None for object type DataFrame\")\n    eval_general(\n        *create_test_dfs(test_data[\"float_nan_data\"]),\n        lambda df: df.rank(axis=axis),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\"ddof\", int_arg_values, ids=arg_keys(\"ddof\", int_arg_keys))\n@pytest.mark.parametrize(\"method\", [\"std\", \"var\"])\ndef test_std_var_transposed(axis, ddof, method):\n    eval_general(\n        *create_test_dfs(test_data[\"int_data\"]),\n        lambda df: getattr(df.T, method)(axis=axis, ddof=ddof),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_values(data):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    np.testing.assert_equal(modin_df.values, pandas_df.values)\n"
  },
  {
    "path": "modin/tests/pandas/extensions/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/pandas/extensions/conftest.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pytest\n\nfrom modin.config import Backend, Engine, Execution, StorageFormat\nfrom modin.core.execution.dispatching.factories import factories\nfrom modin.core.execution.dispatching.factories.factories import BaseFactory, NativeIO\nfrom modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler\nfrom modin.pandas.api.extensions.extensions import _NON_EXTENDABLE_ATTRIBUTES\n\n\nclass Test1QueryCompiler(NativeQueryCompiler):\n    storage_format = property(lambda self: \"Test1_Storage_Format\")\n    engine = property(lambda self: \"Test1_Engine\")\n\n\nclass Test1IO(NativeIO):\n    query_compiler_cls = Test1QueryCompiler\n\n\nclass Test1Factory(BaseFactory):\n\n    @classmethod\n    def prepare(cls):\n        cls.io_cls = Test1IO\n\n\n@pytest.fixture\ndef Backend1():\n    factories.Test1_Storage_FormatOnTest1_EngineFactory = Test1Factory\n    if \"Backend1\" not in Backend.choices:\n        StorageFormat.add_option(\"Test1_storage_format\")\n        Engine.add_option(\"Test1_engine\")\n        Backend.register_backend(\n            \"Backend1\",\n            Execution(storage_format=\"Test1_Storage_Format\", engine=\"Test1_Engine\"),\n        )\n    return \"Backend1\"\n\n\n@pytest.fixture(\n    # sort the set of non-extendable attributes to make the sequence of test\n    # cases deterministic for pytest-xdist.\n    params=sorted(_NON_EXTENDABLE_ATTRIBUTES),\n)\ndef non_extendable_attribute_name(request) -> str:\n    return request.param\n"
  },
  {
    "path": "modin/tests/pandas/extensions/test_api_reexport.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nimport pandas\n\nimport modin.pandas as pd\n\n\ndef test_extensions_does_not_overwrite_pandas_api():\n    # Ensure that importing modin.pandas.api.extensions does not overwrite our re-export\n    # of pandas.api submodules.\n    import modin.pandas.api.extensions as ext\n\n    # Top-level submodules should remain the same\n    assert set(pd.api.__all__) == set(pandas.api.__all__)\n    # Methods we define, like ext.register_dataframe_accessor should be different\n    assert (\n        ext.register_dataframe_accessor\n        is not pandas.api.extensions.register_dataframe_accessor\n    )\n    # Methods from other submodules, like pd.api.types.is_bool_dtype, should be the same\n    assert pd.api.types.is_bool_dtype is pandas.api.types.is_bool_dtype\n"
  },
  {
    "path": "modin/tests/pandas/extensions/test_base_extensions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport re\n\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.pandas.api.extensions import register_base_accessor\nfrom modin.tests.pandas.utils import df_equals\n\n\n@pytest.mark.parametrize(\"data_class\", [pd.DataFrame, pd.Series])\nclass TestOverrideMethodForOneBackend:\n    def test_add_simple_method(self, Backend1, data_class):\n        expected_string_val = \"Some string value\"\n        method_name = \"new_method\"\n        modin_object = data_class([1, 2, 3]).set_backend(Backend1)\n\n        @register_base_accessor(name=method_name, backend=Backend1)\n        def my_method_implementation(self):\n            return expected_string_val\n\n        assert hasattr(data_class, method_name)\n        assert getattr(modin_object, method_name)() == expected_string_val\n        with pytest.raises(\n            AttributeError,\n            match=re.escape(\n                f\"{data_class.__name__} object has no attribute {method_name}\"\n            ),\n        ):\n            getattr(modin_object.set_backend(\"pandas\"), method_name)()\n\n    def test_add_non_method(self, Backend1, data_class):\n        expected_val = 4\n        attribute_name = \"four\"\n        register_base_accessor(name=attribute_name, backend=Backend1)(expected_val)\n\n        assert data_class().set_backend(Backend1).four == expected_val\n        assert not hasattr(data_class().set_backend(\"pandas\"), attribute_name)\n\n    def test_method_uses_existing_methods(self, Backend1, data_class):\n        modin_object = data_class([1, 2, 3]).set_backend(Backend1)\n        method_name = \"self_accessor\"\n        expected_result = modin_object.sum() / modin_object.count()\n\n        @register_base_accessor(name=method_name, backend=Backend1)\n        def my_average(self):\n            return self.sum() / self.count()\n\n        if data_class is pd.DataFrame:\n            df_equals(modin_object.self_accessor(), expected_result)\n        else:\n            assert modin_object.self_accessor() == expected_result\n\n    def test_override_existing_method(self, Backend1, data_class):\n        modin_object = data_class([3, 2, 1])\n\n        @register_base_accessor(name=\"copy\", backend=Backend1)\n        def my_copy(self, *args, **kwargs):\n            return self + 1\n\n        df_equals(modin_object.set_backend(Backend1).copy(), modin_object + 1)\n\n\n@pytest.mark.parametrize(\"data_class\", [pd.DataFrame, pd.Series])\n@pytest.mark.parametrize(\"backend\", [\"pandas\", \"python_test\"])\nclass TestOverrideMethodForAllBackends:\n    def test_add_simple_method(self, backend, data_class):\n        expected_string_val = \"Some string value\"\n        method_name = \"new_method\"\n\n        @register_base_accessor(name=method_name)\n        def my_method_implementation(self):\n            return expected_string_val\n\n        modin_object = data_class([1, 2, 3]).set_backend(backend)\n\n        assert getattr(modin_object, method_name)() == expected_string_val\n        assert modin_object.new_method() == expected_string_val\n\n    def test_add_non_method(self, data_class, backend):\n        expected_val = 4\n        attribute_name = \"four\"\n        register_base_accessor(name=attribute_name)(expected_val)\n\n        assert data_class().set_backend(backend).four == expected_val\n\n    def test_method_uses_existing_methods(self, data_class, backend):\n        modin_object = data_class([1, 2, 3]).set_backend(backend)\n        method_name = \"self_accessor\"\n        expected_result = modin_object.sum() / modin_object.count()\n\n        @register_base_accessor(name=method_name)\n        def my_average(self):\n            return self.sum() / self.count()\n\n        if data_class is pd.DataFrame:\n            df_equals(modin_object.self_accessor(), expected_result)\n        else:\n            assert modin_object.self_accessor() == expected_result\n\n    def test_override_existing_method(self, data_class, backend):\n        modin_object = data_class([3, 2, 1])\n\n        @register_base_accessor(name=\"copy\")\n        def my_copy(self, *args, **kwargs):\n            return self + 1\n\n        df_equals(modin_object.set_backend(backend).copy(), modin_object + 1)\n\n\nclass TestDunders:\n    \"\"\"\n    Make sure to test that we override special \"dunder\" methods like __len__\n    correctly. python calls these methods with DataFrame.__len__(obj)\n    rather than getattr(obj, \"__len__\")().\n    source: https://docs.python.org/3/reference/datamodel.html#special-lookup\n    \"\"\"\n\n    @pytest.mark.parametrize(\"data_class\", [pd.DataFrame, pd.Series])\n    def test_len(self, Backend1, data_class):\n        @register_base_accessor(name=\"__len__\", backend=Backend1)\n        def always_get_1(self):\n            return 1\n\n        modin_object = data_class([1, 2, 3])\n        assert len(modin_object) == 3\n        backend_object = modin_object.set_backend(Backend1)\n        assert len(backend_object) == 1\n        assert backend_object.__len__() == 1\n\n\n@pytest.mark.parametrize(\"data_class\", [pd.DataFrame, pd.Series])\nclass TestProperty:\n    def test_override_loc_for_one_backend(self, Backend1, data_class):\n        modin_object = data_class([1, 2, 3])\n\n        @register_base_accessor(name=\"loc\", backend=Backend1)\n        @property\n        def my_loc(self):\n            return self.index[0]\n\n        assert isinstance(modin_object.set_backend(Backend1).loc, int)\n        assert modin_object.set_backend(Backend1).loc == 0\n\n    @pytest.mark.parametrize(\"backend\", [\"pandas\", \"python_test\"])\n    def test_override_loc_for_all_backends(self, backend, data_class):\n        @register_base_accessor(name=\"loc\", backend=None)\n        @property\n        def my_loc(self):\n            return self.index[0]\n\n        modin_object = data_class([1, 2, 3])\n\n        assert isinstance(modin_object.set_backend(backend).loc, int)\n        assert modin_object.set_backend(backend).loc == 0\n\n    def test_add_deletable_property(self, Backend1, data_class):\n        # register a public property `public_property_name` that is backed by\n        # a private attribute `private_property_name`.\n\n        public_property_name = \"property_name\"\n        private_property_name = \"_property_name\"\n\n        def get_property(self):\n            return getattr(self, private_property_name)\n\n        def set_property(self, value):\n            setattr(self, private_property_name, value)\n\n        def del_property(self):\n            delattr(self, private_property_name)\n\n        register_base_accessor(name=public_property_name, backend=Backend1)(\n            property(fget=get_property, fset=set_property, fdel=del_property)\n        )\n\n        modin_object = data_class({\"a\": [1, 2, 3], \"b\": [4, 5, 6]})\n        assert not hasattr(modin_object, public_property_name)\n        backend_object = modin_object.set_backend(Backend1)\n        setattr(backend_object, public_property_name, \"value\")\n        assert getattr(backend_object, public_property_name) == \"value\"\n        delattr(backend_object, public_property_name)\n        # check that the deletion works.\n        assert not hasattr(backend_object, private_property_name)\n\n    @pytest.mark.parametrize(\"backend\", [\"pandas\", \"python_test\"])\n    def test_add_deletable_property_for_all_backends(self, data_class, backend):\n        # register a public property `public_property_name` that is backed by\n        # a private attribute `private_property_name`.\n\n        public_property_name = \"property_name\"\n        private_property_name = \"_property_name\"\n\n        def get_property(self):\n            return getattr(self, private_property_name)\n\n        def set_property(self, value):\n            setattr(self, private_property_name, value)\n\n        def del_property(self):\n            delattr(self, private_property_name)\n\n        register_base_accessor(name=public_property_name)(\n            property(fget=get_property, fset=set_property, fdel=del_property)\n        )\n\n        modin_object = data_class({\"a\": [1, 2, 3], \"b\": [4, 5, 6]}).set_backend(backend)\n        setattr(modin_object, public_property_name, \"value\")\n        assert getattr(modin_object, public_property_name) == \"value\"\n        delattr(modin_object, public_property_name)\n        # check that the deletion works.\n        assert not hasattr(modin_object, private_property_name)\n\n    def test_get_property_that_raises_attribute_error_on_get_modin_issue_7562(\n        self, data_class\n    ):\n        def get_property(self):\n            raise AttributeError\n\n        register_base_accessor(name=\"extension_property\")(property(fget=get_property))\n        modin_object = data_class()\n        with pytest.raises(AttributeError):\n            getattr(modin_object, \"extension_property\")\n\n    def test_non_settable_extension_property(self, Backend1, data_class):\n        modin_object = data_class([0])\n        property_name = \"property_name\"\n        register_base_accessor(name=property_name, backend=Backend1)(\n            property(fget=(lambda self: 4))\n        )\n\n        assert not hasattr(modin_object, property_name)\n        backend_object = modin_object.set_backend(Backend1)\n        assert getattr(backend_object, property_name) == 4\n        with pytest.raises(AttributeError):\n            setattr(backend_object, property_name, \"value\")\n\n    def test_delete_non_deletable_extension_property(self, Backend1, data_class):\n        modin_object = data_class([0])\n        property_name = \"property_name\"\n        register_base_accessor(name=property_name, backend=Backend1)(\n            property(fget=(lambda self: \"value\"))\n        )\n\n        assert not hasattr(modin_object, property_name)\n        backend_object = modin_object.set_backend(Backend1)\n        assert hasattr(backend_object, property_name)\n        with pytest.raises(AttributeError):\n            delattr(backend_object, property_name)\n\n\n@pytest.mark.parametrize(\"data_class\", [pd.DataFrame, pd.Series])\ndef test_deleting_extension_that_is_not_property_raises_attribute_error(\n    Backend1, data_class\n):\n    expected_string_val = \"Some string value\"\n    method_name = \"new_method\"\n\n    @register_base_accessor(name=method_name, backend=Backend1)\n    def my_method_implementation(self):\n        return expected_string_val\n\n    modin_object = data_class([0]).set_backend(Backend1)\n    assert hasattr(data_class, method_name)\n    with pytest.raises(AttributeError):\n        delattr(modin_object, method_name)\n\n\ndef test_disallowed_extensions(Backend1, non_extendable_attribute_name):\n    with pytest.raises(\n        ValueError,\n        match=re.escape(\n            f\"Cannot register an extension with the reserved name {non_extendable_attribute_name}.\"\n        ),\n    ):\n        register_base_accessor(name=non_extendable_attribute_name, backend=Backend1)(\n            \"unused_value\"\n        )\n"
  },
  {
    "path": "modin/tests/pandas/extensions/test_dataframe_extensions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport re\nfrom unittest import mock\n\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import AutoSwitchBackend, Backend\nfrom modin.config import context as config_context\nfrom modin.pandas.api.extensions import register_dataframe_accessor\n\ndefault___init__ = pd.DataFrame._extensions[None][\"__init__\"]\n\n\ndef test_dataframe_extension_simple_method(Backend1):\n    expected_string_val = \"Some string value\"\n    method_name = \"new_method\"\n    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)\n\n    @register_dataframe_accessor(name=method_name, backend=Backend1)\n    def my_method_implementation(self):\n        return expected_string_val\n\n    assert hasattr(pd.DataFrame, method_name)\n    assert df.new_method() == expected_string_val\n\n\ndef test_dataframe_extension_non_method(Backend1):\n    expected_val = 4\n    attribute_name = \"four\"\n    register_dataframe_accessor(name=attribute_name, backend=Backend1)(expected_val)\n    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)\n\n    assert df.four == expected_val\n\n\ndef test_dataframe_extension_accessing_existing_methods(Backend1):\n    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)\n    method_name = \"self_accessor\"\n    expected_result = df.sum() / df.count()\n\n    @register_dataframe_accessor(name=method_name, backend=Backend1)\n    def my_average(self):\n        return self.sum() / self.count()\n\n    assert df.self_accessor().equals(expected_result)\n\n\ndef test_dataframe_extension_overrides_existing_method(Backend1):\n    df = pd.DataFrame([3, 2, 1])\n    assert df.sort_values(0).iloc[0, 0] == 1\n\n    @register_dataframe_accessor(name=\"sort_values\", backend=Backend1)\n    def my_sort_values(self):\n        return self\n\n    assert df.set_backend(Backend1).sort_values().iloc[0, 0] == 3\n\n\n@pytest.mark.parametrize(\n    \"method_name\",\n    [\n        \"pow\",\n        \"__pow__\",\n        \"__ipow__\",\n    ],\n)\ndef test_dataframe_extension_overrides_pow_github_issue_7495(method_name):\n    register_dataframe_accessor(method_name, backend=\"Pandas\")(\n        lambda *args, **kwargs: 4\n    )\n    assert getattr(pd.DataFrame([1]).set_backend(\"Pandas\"), method_name)() == 4\n\n\ndef test_override_pow_and__pow__to_different_implementations():\n    register_dataframe_accessor(\"pow\", backend=\"Pandas\")(\n        lambda *args, **kwargs: \"pow_result\"\n    )\n    register_dataframe_accessor(\"__pow__\", backend=\"Pandas\")(\n        lambda *args, **kwargs: \"__pow___result\"\n    )\n    df = pd.DataFrame([1]).set_backend(\"pandas\")\n    assert df.pow() == \"pow_result\"\n    assert df.__pow__() == \"__pow___result\"\n\n\ndef test_dataframe_extension_method_uses_superclass_method(Backend1):\n    df = pd.DataFrame([3, 2, 1])\n    assert df.sort_values(0).iloc[0, 0] == 1\n\n    @register_dataframe_accessor(name=\"sort_values\", backend=Backend1)\n    def my_sort_values(self, by):\n        return super(pd.DataFrame, self).sort_values(by=by, ascending=False)\n\n    assert df.set_backend(Backend1).sort_values(by=0).iloc[0, 0] == 3\n\n\nclass TestOverride__init__:\n    def test_override_one_backend_and_pass_no_query_compilers(self):\n        default_backend = Backend.get()\n        backend_init = mock.Mock(wraps=default___init__)\n        register_dataframe_accessor(name=\"__init__\", backend=default_backend)(\n            backend_init\n        )\n        output_df = pd.DataFrame([1], index=[\"a\"], columns=[\"b\"])\n        assert output_df.get_backend() == default_backend\n        backend_init.assert_has_calls(\n            [\n                mock.call(output_df, [1], index=[\"a\"], columns=[\"b\"]),\n                # There's a second, internal call to the dataframe constructor that\n                # uses a different dataframe as `self`.\n                mock.call(mock.ANY, query_compiler=output_df._query_compiler),\n            ]\n        )\n\n    def test_override_one_backend_and_pass_query_compiler_kwarg(self):\n        backend = \"Pandas\"\n        backend_init = mock.Mock(wraps=default___init__)\n        register_dataframe_accessor(name=\"__init__\", backend=backend)(backend_init)\n\n        with config_context(Backend=backend):\n            input_df = pd.DataFrame()\n\n        backend_init.reset_mock()\n        output_df = pd.DataFrame(query_compiler=input_df._query_compiler)\n        assert output_df.get_backend() == backend\n        backend_init.assert_called_once_with(\n            output_df, query_compiler=input_df._query_compiler\n        )\n\n    @pytest.mark.parametrize(\"input_backend\", [\"Python_Test\", \"Pandas\"])\n    def test_override_all_backends_and_pass_query_compiler_kwarg(self, input_backend):\n        backend_init = mock.Mock(wraps=default___init__)\n        register_dataframe_accessor(name=\"__init__\")(backend_init)\n\n        with config_context(Backend=input_backend):\n            input_df = pd.DataFrame()\n\n        backend_init.reset_mock()\n        output_df = pd.DataFrame(query_compiler=input_df._query_compiler)\n        assert output_df.get_backend() == input_backend\n        backend_init.assert_called_once_with(\n            output_df, query_compiler=input_df._query_compiler\n        )\n\n\nclass TestDunders:\n    \"\"\"\n    Make sure to test that we override special \"dunder\" methods like __len__\n    correctly. python calls these methods with DataFrame.__len__(obj)\n    rather than getattr(obj, \"__len__\")().\n    source: https://docs.python.org/3/reference/datamodel.html#special-lookup\n    \"\"\"\n\n    def test_len(self, Backend1):\n        @register_dataframe_accessor(name=\"__len__\", backend=Backend1)\n        def always_get_1(self):\n            return 1\n\n        df = pd.DataFrame([1, 2, 3])\n        assert len(df) == 3\n        backend_df = df.set_backend(Backend1)\n        assert len(backend_df) == 1\n        assert backend_df.__len__() == 1\n\n    def test_repr(self, Backend1):\n        @register_dataframe_accessor(name=\"__repr__\", backend=Backend1)\n        def simple_repr(self) -> str:\n            return \"dataframe_string\"\n\n        df = pd.DataFrame([1, 2, 3])\n        assert repr(df) == repr(df.modin.to_pandas())\n        backend_df = df.set_backend(Backend1)\n        assert repr(backend_df) == \"dataframe_string\"\n        assert backend_df.__repr__() == \"dataframe_string\"\n\n\nclass TestProperty:\n    def test_override_columns(self, Backend1):\n        df = pd.DataFrame([[\"a\", \"b\"]])\n\n        def set_columns(self, new_columns):\n            self._query_compiler.columns = [f\"{v}_custom\" for v in new_columns]\n\n        register_dataframe_accessor(name=\"columns\", backend=Backend1)(\n            property(\n                fget=(lambda self: self._query_compiler.columns[::-1]), fset=set_columns\n            )\n        )\n\n        assert list(df.columns) == [0, 1]\n        backend_df = df.set_backend(Backend1)\n        assert list(backend_df.columns) == [1, 0]\n        backend_df.columns = [2, 3]\n        assert list(backend_df.columns) == [\n            \"3_custom\",\n            \"2_custom\",\n        ]\n\n    def test_search_for_missing_attribute_in_overridden_columns(self, Backend1):\n        \"\"\"\n        Test a scenario where we override the columns getter, then search for a\n        missing dataframe attribute. Modin should look in the dataframe's\n        overridden columns for the attribute.\n        \"\"\"\n        column_name = \"column_name\"\n        column_getter = mock.Mock(wraps=(lambda self: self._query_compiler.columns))\n        register_dataframe_accessor(name=\"columns\", backend=Backend1)(\n            property(fget=column_getter)\n        )\n\n        df = pd.DataFrame({column_name: [\"a\"]}).set_backend(Backend1)\n\n        with pytest.raises(\n            AttributeError,\n            match=\"'DataFrame' object has no attribute 'non_existent_column'\",\n        ):\n            getattr(df, \"non_existent_column\")\n        column_getter.assert_called_once_with(df)\n\n    def test_add_deletable_property(self, Backend1):\n        public_property_name = \"property_name\"\n        private_property_name = \"_property_name\"\n\n        # register a public property `public_property_name` that is backed by\n        # a private attribute `private_property_name`.\n\n        def get_property(self):\n            return getattr(self, private_property_name)\n\n        def set_property(self, value):\n            setattr(self, private_property_name, value)\n\n        def del_property(self):\n            delattr(self, private_property_name)\n\n        register_dataframe_accessor(name=public_property_name, backend=Backend1)(\n            property(get_property, set_property, del_property)\n        )\n\n        df = pd.DataFrame([0])\n        assert not hasattr(df, public_property_name)\n        backend_df = df.set_backend(Backend1)\n        setattr(backend_df, public_property_name, \"value\")\n        assert hasattr(backend_df, private_property_name)\n        assert getattr(backend_df, private_property_name) == \"value\"\n        delattr(backend_df, public_property_name)\n        # check that the deletion works.\n        assert not hasattr(backend_df, private_property_name)\n\n    def test_non_settable_extension_property(self, Backend1):\n        df = pd.DataFrame([0])\n        property_name = \"property_name\"\n\n        register_dataframe_accessor(name=property_name, backend=Backend1)(\n            property(fget=(lambda self: 4))\n        )\n\n        assert not hasattr(df, property_name)\n        backend_df = df.set_backend(Backend1)\n        assert getattr(backend_df, property_name) == 4\n        with pytest.raises(AttributeError):\n            setattr(backend_df, property_name, \"value\")\n\n    def test_delete_non_deletable_extension_property(self, Backend1):\n        property_name = \"property_name\"\n\n        register_dataframe_accessor(name=property_name, backend=Backend1)(\n            property(fget=(lambda self: \"value\"))\n        )\n\n        df = pd.DataFrame([0])\n        assert not hasattr(df, property_name)\n        backend_df = df.set_backend(Backend1)\n        assert hasattr(backend_df, property_name)\n        with pytest.raises(AttributeError):\n            delattr(backend_df, property_name)\n\n\ndef test_deleting_extension_that_is_not_property_raises_attribute_error(Backend1):\n    expected_string_val = \"Some string value\"\n    method_name = \"new_method\"\n\n    @register_dataframe_accessor(name=method_name, backend=Backend1)\n    def my_method_implementation(self):\n        return expected_string_val\n\n    df = pd.DataFrame([1, 2, 3]).set_backend(Backend1)\n    assert hasattr(pd.DataFrame, method_name)\n    assert df.new_method() == expected_string_val\n    with pytest.raises(AttributeError):\n        delattr(df, method_name)\n\n\ndef test_disallowed_extensions(Backend1, non_extendable_attribute_name):\n    with pytest.raises(\n        ValueError,\n        match=re.escape(\n            f\"Cannot register an extension with the reserved name {non_extendable_attribute_name}.\"\n        ),\n    ):\n        register_dataframe_accessor(\n            name=non_extendable_attribute_name, backend=Backend1\n        )(\"unused_value\")\n\n\ndef test_correct_backend_with_pin(Backend1):\n    # Ensures that the correct implementation is used when dispatching an operation on a pinned\n    # frame, as an earlier implementation used the wrong extension method while preserving the\n    # correct backend.\n\n    assert not AutoSwitchBackend.get()\n\n    @register_dataframe_accessor(name=\"__repr__\", backend=Backend1)\n    def my_repr(self):\n        return \"fake_repr\"\n\n    with config_context(Backend=\"Python_Test\"):\n        df = pd.DataFrame([1])\n        assert df.get_backend() == \"Python_Test\"\n        assert repr(df) == repr(pandas.DataFrame([1]))\n        df.set_backend(Backend1, inplace=True)\n        df.pin_backend(inplace=True)\n        assert df.get_backend() == Backend1\n        assert repr(df) == \"fake_repr\"\n\n\ndef test_get_extension_from_dataframe_that_is_on_non_default_backend_when_auto_switch_is_false(\n    Backend1,\n):\n    assert not AutoSwitchBackend.get()\n    with config_context(Backend=Backend1):\n        pandas_df = pd.DataFrame([1, 2]).move_to(\"Pandas\")\n        register_dataframe_accessor(\"sum\", backend=\"Pandas\")(\n            lambda df: \"small_sum_result\"\n        )\n        assert pandas_df.sum() == \"small_sum_result\"\n"
  },
  {
    "path": "modin/tests/pandas/extensions/test_groupby_extensions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom functools import cached_property\n\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import AutoSwitchBackend, Backend\nfrom modin.config import context as config_context\nfrom modin.pandas.api.extensions import (\n    register_dataframe_groupby_accessor,\n    register_series_groupby_accessor,\n)\nfrom modin.pandas.groupby import DataFrameGroupBy, SeriesGroupBy\nfrom modin.tests.pandas.utils import default_to_pandas_ignore_string, df_equals\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    warns_that_defaulting_to_pandas_if,\n)\n\n\n@pytest.mark.parametrize(\n    \"get_groupby,register_accessor\",\n    (\n        (lambda df: df.groupby(\"col0\"), register_dataframe_groupby_accessor),\n        (lambda df: df.groupby(\"col0\")[\"col1\"], register_series_groupby_accessor),\n    ),\n)\n@config_context(Backend=\"Pandas\")\n@pytest.mark.parametrize(\"extension_backend\", [None, \"Pandas\"])\n@pytest.mark.parametrize(\"method_name\", [\"new_method\", \"sum\"])\ndef test_add_simple_method(\n    get_groupby, register_accessor, extension_backend, method_name\n):\n    expected_string_val = \"expected_string_val\"\n    df = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]})\n\n    @register_accessor(method_name, backend=extension_backend)\n    def new_method(self):\n        return expected_string_val\n\n    groupby = get_groupby(df)\n    assert hasattr(groupby, method_name)\n    assert getattr(groupby, method_name)() == expected_string_val\n\n\ndef test_dataframe_accessor_for_method_that_series_groupby_does_not_override():\n    \"\"\"\n    Test sum(), a DataFrameGroupBy method that SeriesGroupBy inherits without overriding.\n\n    Registering an extension method for DataFrameGroupBy should override sum()\n    behavior for both DataFrameGroupBy and SeriesGroupBy.\n    \"\"\"\n    # Check that SeriesGroupBy inherits sum() from DataFrameGroupBy, with the only\n    # difference being that SeriesGroupBy's sum() is wrapped in a method for handling\n    # extensions and casting.\n    assert DataFrameGroupBy.sum is SeriesGroupBy.sum._wrapped_method_for_casting\n    df = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]})\n    accessor_result = \"test_result\"\n    register_dataframe_groupby_accessor(\"sum\", backend=Backend.get())(\n        lambda self, *args, **kwargs: accessor_result\n    )\n    groupby_sum_result = df.groupby(\"col0\").sum()\n    assert groupby_sum_result == accessor_result\n    series_groupby_sum_result = df.groupby(\"col0\")[\"col1\"].sum()\n    assert series_groupby_sum_result == accessor_result\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_dataframe_accessor_for_method_that_series_groupby_overrides():\n    \"\"\"\n    Test describe(), a DataFrameGroupBy method that SeriesGroupBy overrides.\n\n    Registering an extension method for DataFrameGroupBy should not affect\n    SeriesGroupBy's describe() method.\n    \"\"\"\n    # Check that SeriesGroupBy overrides describe().\n    assert (\n        DataFrameGroupBy.describe\n        is not SeriesGroupBy.describe._wrapped_method_for_casting\n    )\n    df = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]})\n    original_series_groupby_describe_result = df.groupby(\"col0\")[\"col1\"].describe()\n    accessor_result = \"test_result\"\n    register_dataframe_groupby_accessor(\"describe\", backend=Backend.get())(\n        lambda self, *args, **kwargs: accessor_result\n    )\n    groupby_describe_result = df.groupby(\"col0\").describe()\n    assert groupby_describe_result == accessor_result\n    series_groupby_describe_result = df.groupby(\"col0\")[\"col1\"].describe()\n    df_equals(series_groupby_describe_result, original_series_groupby_describe_result)\n\n\n@pytest.mark.parametrize(\n    \"get_groupby,register_accessor\",\n    (\n        (lambda df: df.groupby(\"col0\"), register_dataframe_groupby_accessor),\n        (lambda df: df.groupby(\"col0\")[\"col1\"], register_series_groupby_accessor),\n    ),\n)\nclass TestProperty:\n\n    @pytest.mark.parametrize(\"df_backend\", [\"Pandas\", \"Python_Test\"])\n    def test_add_read_only_property_for_all_backends(\n        self, df_backend, get_groupby, register_accessor\n    ):\n        expected_string_val = \"expected_string_val\"\n        property_name = \"new_property\"\n\n        @register_dataframe_groupby_accessor(property_name)\n        @property\n        def new_property(self):\n            return expected_string_val\n\n        with config_context(Backend=df_backend):\n            df = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]})\n            assert get_groupby(df).new_property == expected_string_val\n\n            with pytest.raises(AttributeError):\n                del df.groupby(\"col0\").new_property\n\n            with pytest.raises(AttributeError):\n                df.groupby(\"col0\").new_property = \"new_value\"\n\n    def test_override_ngroups_getter_for_one_backend(\n        self, get_groupby, register_accessor\n    ):\n        accessor_ngroups = -1\n        property_name = \"ngroups\"\n\n        @register_accessor(property_name, backend=\"Pandas\")\n        @property\n        def ngroups(self):\n            return accessor_ngroups\n\n        pandas_df = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]}).move_to(\n            \"pandas\"\n        )\n        groupby = get_groupby(pandas_df)\n        assert groupby.ngroups == accessor_ngroups\n\n        # Check that the accessor doesn't work on the Python_Test backend.\n        python_test_df = pandas_df.move_to(\"Python_Test\")\n        groupby = get_groupby(python_test_df)\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            assert groupby.ngroups == 3\n\n    def test_add_ngroups_setter_and_deleter_for_one_backend(\n        self, get_groupby, register_accessor\n    ):\n\n        def _get_ngroups(self):\n            return self._ngroups\n\n        def _delete_ngroups(self):\n            delattr(self, \"_ngroups\")\n\n        def _set_ngroups(self, value):\n            self._ngroups = value\n\n        register_accessor(\"ngroups\", backend=\"Pandas\")(\n            property(fget=_get_ngroups, fset=_set_ngroups, fdel=_delete_ngroups)\n        )\n\n        python_test_df = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]}).move_to(\n            \"python_test\"\n        )\n\n        python_test_groupby = get_groupby(python_test_df)\n\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            assert python_test_groupby.ngroups == 3\n\n        with pytest.raises(AttributeError):\n            python_test_groupby.ngroups = 4\n\n        with pytest.raises(AttributeError):\n            del python_test_groupby.ngroups\n\n        pandas_groupby = get_groupby(python_test_df.move_to(\"Pandas\"))\n\n        assert not hasattr(pandas_groupby, \"ngroups\")\n\n        pandas_groupby.ngroups = -1\n\n        assert pandas_groupby.ngroups == -1\n\n        # Deleting ngroups should delete the private attribute _ngroups.\n        del pandas_groupby.ngroups\n\n        # now getting ngroups should raise an AttributeError because the\n        # private attribute _ngroups is missing.\n        assert not hasattr(pandas_groupby, \"ngroups\")\n\n    def test_add_deletable_property_for_one_backend(\n        self, get_groupby, register_accessor\n    ):\n        public_property_name = \"property_name\"\n        private_property_name = \"_property_name\"\n\n        # register a public property `public_property_name` that is backed by\n        # a private attribute `private_property_name`.\n\n        def get_property(self):\n            return getattr(self, private_property_name)\n\n        def set_property(self, value):\n            setattr(self, private_property_name, value)\n\n        def del_property(self):\n            # Note that deleting the public property deletes the private\n            # attribute, not the public property itself.\n            delattr(self, private_property_name)\n\n        register_accessor(name=public_property_name, backend=\"Pandas\")(\n            property(get_property, set_property, del_property)\n        )\n\n        python_test_df = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]}).move_to(\n            \"python_test\"\n        )\n\n        python_test_groupby = get_groupby(python_test_df)\n\n        assert not hasattr(python_test_groupby, public_property_name)\n\n        pandas_df = python_test_df.move_to(\"pandas\")\n        pandas_groupby = get_groupby(pandas_df)\n\n        setattr(pandas_groupby, public_property_name, \"value\")\n        assert getattr(pandas_groupby, public_property_name) == \"value\"\n        delattr(pandas_groupby, public_property_name)\n        assert not hasattr(pandas_groupby, private_property_name)\n\n    @pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n    def test_override_cached_property(self, get_groupby, register_accessor):\n        @cached_property\n        def groups(self):\n            return {\"group\": pd.Index([\"test\"])}\n\n        register_accessor(\"groups\", backend=\"Pandas\")(groups)\n        pandas_df = pd.DataFrame({\"col0\": [1], \"col1\": [2]}).move_to(\"pandas\")\n        assert get_groupby(pandas_df).groups == {\"group\": pd.Index([\"test\"])}\n\n\ndef test_deleting_extension_that_is_not_property_raises_attribute_error():\n    expected_string_val = \"Some string value\"\n    method_name = \"new_method\"\n\n    @register_dataframe_groupby_accessor(name=method_name)\n    def my_method_implementation(self):\n        return expected_string_val\n\n    groupby = pd.DataFrame({\"col0\": [1, 2, 3], \"col1\": [4, 5, 6]}).groupby(\"col0\")\n    assert hasattr(DataFrameGroupBy, method_name)\n    assert getattr(groupby, method_name)() == expected_string_val\n    with pytest.raises(AttributeError):\n        delattr(groupby, method_name)\n\n\n@pytest.mark.skipif(Backend.get() == \"Pandas\", reason=\"already on pandas backend\")\ndef test_get_extension_from_dataframe_that_is_on_non_default_backend_when_auto_switch_is_false():\n    assert not AutoSwitchBackend.get()\n    pandas_df = pd.DataFrame([1, 2]).move_to(\"Pandas\")\n    register_dataframe_groupby_accessor(\"sum\", backend=\"Pandas\")(\n        lambda df: \"small_sum_result\"\n    )\n    assert pandas_df.groupby(0).sum() == \"small_sum_result\"\n"
  },
  {
    "path": "modin/tests/pandas/extensions/test_pd_extensions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport re\nfrom types import FunctionType\n\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import context as config_context\nfrom modin.pandas.api.extensions import register_pd_accessor\nfrom modin.tests.pandas.utils import df_equals, eval_general\n\n\n@pytest.fixture(\n    params=sorted(\n        key\n        for key, value in pd.__dict__.items()\n        if isinstance(value, FunctionType) and value.__module__ == pd.general.__name__\n    )\n)\ndef pd_general_function(request):\n    return request.param\n\n\n@pytest.fixture(\n    params=sorted(\n        key\n        for key, value in pd.__dict__.items()\n        if isinstance(value, FunctionType) and value.__module__ == pd.io.__name__\n    )\n)\ndef pd_io_function(request):\n    return request.param\n\n\nclass TestRegisterForAllBackends:\n    def test_add_new_function(self):\n        expected_string_val = \"Some string value\"\n        method_name = \"new_method\"\n\n        @register_pd_accessor(method_name)\n        def my_method_implementation():\n            return expected_string_val\n\n        assert pd.new_method() == expected_string_val\n\n    def test_add_new_non_method(self):\n        expected_val = 4\n        attribute_name = \"four\"\n        register_pd_accessor(attribute_name)(expected_val)\n        assert pd.four == expected_val\n\n    def test_override_io_function(self, pd_io_function):\n        sentinel = object()\n        register_pd_accessor(pd_io_function)(lambda: sentinel)\n        assert getattr(pd, pd_io_function)() == sentinel\n\n    def test_override_general_function(self, pd_general_function):\n        sentinel = object()\n        register_pd_accessor(pd_general_function)(lambda: sentinel)\n        assert getattr(pd, pd_general_function)() == sentinel\n\n\nclass TestRegisterForOneBackend:\n    def test_add_new_function(self):\n        backend = \"Pandas\"\n        expected_string_val = \"Some string value\"\n        method_name = \"new_method\"\n\n        @register_pd_accessor(method_name, backend=backend)\n        def my_method_implementation():\n            return expected_string_val\n\n        with config_context(Backend=backend):\n            assert getattr(pd, method_name)() == expected_string_val\n        with config_context(Backend=\"Python_Test\"):\n            with pytest.raises(\n                AttributeError,\n                match=re.escape(\n                    f\"module 'modin.pandas' has no attribute {method_name}\"\n                ),\n            ):\n                getattr(pd, method_name)()\n\n    def test_override_function(self):\n        backend = \"Pandas\"\n        expected_string_val = \"Some string value\"\n\n        @register_pd_accessor(\"to_datetime\", backend=backend)\n        def my_method_implementation(*args, **kwargs):\n            return expected_string_val\n\n        with config_context(Backend=backend):\n            # Since there are no query compiler inputs to to_datetime(), use\n            # the to_datetime() implementation for Backend.get()\n            assert pd.to_datetime(1) == expected_string_val\n\n        with config_context(Backend=\"Python_Test\"):\n            # There are no query compiler inputs to to_datetime(), and\n            # the current Backend.get() does not have a to_datetime() extension,\n            # so fall back to the default to_datetime() implementation, which\n            # should return the same result as pandas.to_datetime().\n            eval_general(pd, pandas, lambda lib: lib.to_datetime(1))\n\n    def test_add_new_non_method(self):\n        backend = \"Pandas\"\n        expected_val = 4\n        attribute_name = \"four\"\n        register_pd_accessor(attribute_name, backend=backend)(expected_val)\n        with config_context(Backend=backend):\n            assert pd.four == expected_val\n        with config_context(Backend=\"Python_Test\"):\n            assert not hasattr(pd, attribute_name)\n\n    def test_to_datetime_dispatches_to_implementation_for_input(self):\n\n        @register_pd_accessor(\"to_datetime\", backend=\"Pandas\")\n        def pandas_to_datetime(*args, **kwargs):\n            return \"pandas_to_datetime_result\"\n\n        with config_context(Backend=\"Pandas\"):\n            pandas_backend_series = pd.Series(1)\n\n        with config_context(Backend=\"Python_Test\"):\n            python_backend_df = pd.Series(1)\n\n        assert pd.to_datetime(pandas_backend_series) == \"pandas_to_datetime_result\"\n        df_equals(\n            pd.to_datetime(python_backend_df),\n            pandas.to_datetime(python_backend_df._to_pandas()),\n        )\n\n    def test_concat_with_two_different_backends(self):\n        with config_context(Backend=\"Pandas\"):\n            modin_on_pandas_df = pd.DataFrame({\"a\": [1, 2, 3]})\n        with config_context(Backend=\"Python_Test\"):\n            modin_on_python_df = pd.DataFrame({\"a\": [4, 5, 6]})\n\n        @register_pd_accessor(\"concat\", backend=\"Pandas\")\n        def pandas_concat(*args, **kwargs):\n            return \"pandas_concat_result\"\n\n        @register_pd_accessor(\"concat\", backend=\"Python_Test\")\n        def python_concat(*args, **kwargs):\n            return \"python_concat_result\"\n\n        # If the backends are different, we dispatch to the concat() override\n        # for the backend of the first argument.\n        assert (\n            pd.concat([modin_on_pandas_df, modin_on_python_df])\n            == \"pandas_concat_result\"\n        )\n\n        # With inplace casting we need to reset the original dataframes\n        modin_on_pandas_df.move_to(\"Pandas\", inplace=True)\n        modin_on_python_df.move_to(\"Python_Test\", inplace=True)\n\n        assert (\n            pd.concat([modin_on_python_df, modin_on_pandas_df])\n            == \"python_concat_result\"\n        )\n\n    def test_index_class_override(self):\n        class FakeIndex:\n            def __init__(self, _values):\n                pass\n\n            def fake_method(self) -> str:\n                return \"python_fake_index\"\n\n        register_pd_accessor(\"Index\", backend=\"Python_Test\")(FakeIndex)\n\n        with config_context(Backend=\"Pandas\"):\n            # Should return an actual native pandas index object\n            df_equals(pd.Index([1]).to_series(), pd.Series([1], index=[1]))\n\n        with config_context(Backend=\"Python_Test\"):\n            # Should just return a string\n            assert pd.Index([1]).fake_method() == \"python_fake_index\"\n"
  },
  {
    "path": "modin/tests/pandas/extensions/test_series_extensions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport re\nfrom unittest import mock\n\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Backend\nfrom modin.config import context as config_context\nfrom modin.pandas.api.extensions import register_series_accessor\n\ndefault___init__ = pd.Series._extensions[None][\"__init__\"]\n\n\ndef test_series_extension_simple_method(Backend1):\n    expected_string_val = \"Some string value\"\n    method_name = \"new_method\"\n    ser = pd.Series([1, 2, 3]).set_backend(Backend1)\n\n    @register_series_accessor(name=method_name, backend=Backend1)\n    def my_method_implementation(self):\n        return expected_string_val\n\n    assert hasattr(pd.Series, method_name)\n    assert ser.new_method() == expected_string_val\n\n\ndef test_series_extension_non_method(Backend1):\n    expected_val = 4\n    attribute_name = \"four\"\n    register_series_accessor(name=attribute_name, backend=Backend1)(expected_val)\n    ser = pd.Series([1, 2, 3]).set_backend(Backend1)\n\n    assert ser.four == expected_val\n\n\ndef test_series_extension_accessing_existing_methods(Backend1):\n    ser = pd.Series([1, 2, 3]).set_backend(Backend1)\n    method_name = \"self_accessor\"\n    expected_result = ser.sum() / ser.count()\n\n    @register_series_accessor(name=method_name, backend=Backend1)\n    def my_average(self):\n        return self.sum() / self.count()\n\n    assert ser.self_accessor() == expected_result\n\n\ndef test_series_extension_overrides_existing_method(Backend1):\n    series = pd.Series([3, 2, 1])\n    assert series.sort_values().iloc[0] == 1\n\n    @register_series_accessor(name=\"sort_values\", backend=Backend1)\n    def my_sort_values(self):\n        return self\n\n    assert series.set_backend(Backend1).sort_values().iloc[0] == 3\n\n\ndef test_series_extension_method_uses_superclass_method(Backend1):\n    series = pd.Series([3, 2, 1], name=\"name\")\n    assert series.sort_values().iloc[0] == 1\n\n    @register_series_accessor(name=\"sort_values\", backend=Backend1)\n    def my_sort_values(self):\n        return super(pd.Series, self).sort_values(by=\"name\", ascending=False)\n\n    assert series.set_backend(Backend1).sort_values().iloc[0] == 3\n\n\nclass TestOverride__init__:\n    def test_override_one_backend_and_pass_no_query_compilers(self):\n        default_backend = Backend.get()\n        backend_init = mock.Mock(wraps=default___init__)\n        register_series_accessor(name=\"__init__\", backend=default_backend)(backend_init)\n        output_series = pd.Series([1], index=[\"a\"])\n        assert output_series.get_backend() == default_backend\n        backend_init.assert_has_calls(\n            [\n                mock.call(output_series, [1], index=[\"a\"]),\n            ]\n        )\n\n    def test_override_one_backend_and_pass_query_compiler_kwarg(self):\n        backend_init = mock.Mock(wraps=default___init__)\n        register_series_accessor(name=\"__init__\", backend=\"Pandas\")(backend_init)\n\n        with config_context(Backend=\"Pandas\"):\n            input_series = pd.Series()\n\n        backend_init.reset_mock()\n        output_series = pd.Series(query_compiler=input_series._query_compiler)\n        assert output_series.get_backend() == \"Pandas\"\n        backend_init.assert_called_once_with(\n            output_series, query_compiler=input_series._query_compiler\n        )\n\n    @pytest.mark.parametrize(\"input_backend\", [\"Python_Test\", \"Pandas\"])\n    def test_override_all_backends_and_pass_query_compiler_kwarg(self, input_backend):\n        backend_init = mock.Mock(wraps=default___init__)\n        register_series_accessor(name=\"__init__\")(backend_init)\n\n        with config_context(Backend=input_backend):\n            input_series = pd.Series()\n\n        backend_init.reset_mock()\n        output_series = pd.Series(query_compiler=input_series._query_compiler)\n        assert output_series.get_backend() == input_backend\n        backend_init.assert_called_once_with(\n            output_series, query_compiler=input_series._query_compiler\n        )\n\n\nclass TestDunders:\n    \"\"\"\n    Make sure to test that we override special \"dunder\" methods like __len__\n    correctly. python calls these methods with DataFrame.__len__(obj)\n    rather than getattr(obj, \"__len__\")().\n    source: https://docs.python.org/3/reference/datamodel.html#special-lookup\n    \"\"\"\n\n    def test_len(self, Backend1):\n        @register_series_accessor(name=\"__len__\", backend=Backend1)\n        def always_get_1(self):\n            return 1\n\n        series = pd.Series([1, 2, 3])\n        assert len(series) == 3\n        backend_series = series.set_backend(Backend1)\n        assert len(backend_series) == 1\n        assert backend_series.__len__() == 1\n\n    def test_repr(self, Backend1):\n        @register_series_accessor(name=\"__repr__\", backend=Backend1)\n        def simple_repr(self) -> str:\n            return \"series_string\"\n\n        series = pd.Series([1, 2, 3])\n        assert repr(series) == repr(series.modin.to_pandas())\n        backend_series = series.set_backend(Backend1)\n        assert repr(backend_series) == \"series_string\"\n        assert backend_series.__repr__() == \"series_string\"\n\n\nclass TestProperty:\n    def test_override_index(self, Backend1):\n        series = pd.Series([\"a\", \"b\"])\n\n        def set_index(self, new_index):\n            self._query_compiler.index = [f\"{v}_custom\" for v in new_index]\n\n        register_series_accessor(name=\"index\", backend=Backend1)(\n            property(fget=lambda self: self._query_compiler.index[::-1], fset=set_index)\n        )\n\n        assert list(series.index) == [0, 1]\n        backend_series = series.set_backend(Backend1)\n        assert list(backend_series.index) == [1, 0]\n        backend_series.index = [2, 3]\n        assert list(backend_series.index) == [\"3_custom\", \"2_custom\"]\n\n    def test_add_deletable_property(self, Backend1):\n\n        # register a public property `public_property_name` that is backed by\n        # a private attribute `private_property_name`.\n\n        public_property_name = \"property_name\"\n        private_property_name = \"_property_name\"\n\n        def get_property(self):\n            return getattr(self, private_property_name)\n\n        def set_property(self, value):\n            setattr(self, private_property_name, value)\n\n        def del_property(self):\n            delattr(self, private_property_name)\n\n        register_series_accessor(name=public_property_name, backend=Backend1)(\n            property(get_property, set_property, del_property)\n        )\n\n        series = pd.Series([0])\n        assert not hasattr(series, public_property_name)\n        backend_series = series.set_backend(Backend1)\n        setattr(backend_series, public_property_name, \"value\")\n        assert hasattr(backend_series, private_property_name)\n        assert getattr(backend_series, public_property_name) == \"value\"\n        delattr(backend_series, public_property_name)\n        # check that the deletion works.\n        assert not hasattr(backend_series, private_property_name)\n\n    def test_non_settable_extension_property(self, Backend1):\n\n        property_name = \"property_name\"\n        register_series_accessor(name=property_name, backend=Backend1)(\n            property(fget=(lambda self: 4))\n        )\n\n        series = pd.Series([0])\n        assert not hasattr(series, property_name)\n        backend_series = series.set_backend(Backend1)\n        assert getattr(backend_series, property_name) == 4\n        with pytest.raises(AttributeError):\n            setattr(backend_series, property_name, \"value\")\n\n    def test_delete_non_deletable_extension_property(self, Backend1):\n\n        property_name = \"property_name\"\n        register_series_accessor(name=property_name, backend=Backend1)(\n            property(fget=(lambda self: \"value\"))\n        )\n\n        series = pd.Series([0])\n        assert not hasattr(series, property_name)\n        backend_series = series.set_backend(Backend1)\n        with pytest.raises(AttributeError):\n            delattr(backend_series, property_name)\n\n\ndef test_deleting_extension_that_is_not_property_raises_attribute_error(Backend1):\n    expected_string_val = \"Some string value\"\n    method_name = \"new_method\"\n    series = pd.Series([1, 2, 3]).set_backend(Backend1)\n\n    @register_series_accessor(name=method_name, backend=Backend1)\n    def my_method_implementation(self):\n        return expected_string_val\n\n    assert hasattr(pd.Series, method_name)\n    assert series.new_method() == expected_string_val\n    with pytest.raises(AttributeError):\n        delattr(series, method_name)\n\n\ndef test_disallowed_extensions(Backend1, non_extendable_attribute_name):\n    with pytest.raises(\n        ValueError,\n        match=re.escape(\n            f\"Cannot register an extension with the reserved name {non_extendable_attribute_name}.\"\n        ),\n    ):\n        register_series_accessor(name=non_extendable_attribute_name, backend=Backend1)(\n            \"unused_value\"\n        )\n\n\ndef test_wrapped_extension(Backend1):\n    \"\"\"\n    Tests using the extensions system to overwrite a method with a wrapped version of the original method\n    obtained via getattr.\n    Because the QueryCompilerCaster ABC automatically wraps all methods with a dispatch to the appropriate\n    backend, we must use the __wrapped__ property of the originally-defined attribute to avoid\n    infinite recursion.\n    \"\"\"\n    original_item = pd.Series.item.__wrapped__\n\n    @register_series_accessor(name=\"item\", backend=Backend1)\n    def item_implementation(self):\n        return (original_item(self) + 2) * 5\n\n    series = pd.Series([3])\n    assert series.item() == 3\n    assert series.set_backend(Backend1).item() == 25\n"
  },
  {
    "path": "modin/tests/pandas/integrations/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/pandas/integrations/test_lazy_import.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport lazy_import\n\npandas = lazy_import.lazy_module(\"pandas\")\npyarrow = lazy_import.lazy_module(\"pyarrow\")\nfrom modin import pandas as pd  # noqa: E402\n\n\ndef test_dataframe_constructor():\n    pd.DataFrame({\"col1\": [1, 2, 3], \"col2\": list(\"abc\")})\n"
  },
  {
    "path": "modin/tests/pandas/internals/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "modin/tests/pandas/internals/test_benchmark_mode.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport unittest.mock as mock\n\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Engine\n\nengine = Engine.get()\n\n# We have to explicitly mock subclass implementations of wait_partitions.\nif engine == \"Ray\":\n    wait_method = (\n        \"modin.core.execution.ray.implementations.\"\n        + \"pandas_on_ray.partitioning.\"\n        + \"PandasOnRayDataframePartitionManager.wait_partitions\"\n    )\nelif engine == \"Dask\":\n    wait_method = (\n        \"modin.core.execution.dask.implementations.\"\n        + \"pandas_on_dask.partitioning.\"\n        + \"PandasOnDaskDataframePartitionManager.wait_partitions\"\n    )\nelif engine == \"Unidist\":\n    wait_method = (\n        \"modin.core.execution.unidist.implementations.\"\n        + \"pandas_on_unidist.partitioning.\"\n        + \"PandasOnUnidistDataframePartitionManager.wait_partitions\"\n    )\nelse:\n    wait_method = (\n        \"modin.core.dataframe.pandas.partitioning.\"\n        + \"partition_manager.PandasDataframePartitionManager.wait_partitions\"\n    )\n\n\n@pytest.mark.parametrize(\"set_benchmark_mode\", [False], indirect=True)\ndef test_turn_off(set_benchmark_mode):\n    df = pd.DataFrame([0])\n    with mock.patch(wait_method) as wait:\n        df.dropna()\n    wait.assert_not_called()\n\n\n@pytest.mark.parametrize(\"set_benchmark_mode\", [True], indirect=True)\ndef test_turn_on(set_benchmark_mode):\n    df = pd.DataFrame([0])\n    with mock.patch(wait_method) as wait:\n        df.dropna()\n    wait.assert_called()\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nThis module contains tests for interoperability between Modin dataframes using \"native\" execution and Modin dataframes using other execution modes.\n\"\"\"\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/conftest.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport itertools\n\nimport pytest\n\n\ndef _get_native_bool_descriptor(v: bool) -> str:\n    return \"native\" if v else \"default\"\n\n\n@pytest.fixture(\n    params=list(itertools.product([True, False], repeat=2)),\n    ids=lambda param: \"_\".join(_get_native_bool_descriptor(v) for v in param),\n)\ndef df_mode_pair(request):\n    return request.param\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_binary.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport pytest\n\nfrom modin.config import NPartitions\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n    eval_general_interop,\n)\nfrom modin.tests.pandas.utils import (\n    default_to_pandas_ignore_string,\n    df_equals,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\n@pytest.mark.parametrize(\n    \"other\",\n    [\n        lambda df, axis: 4,\n        lambda df, axis: df.iloc[0] if axis == \"columns\" else list(df[df.columns[0]]),\n        lambda df, axis: {\n            label: idx + 1\n            for idx, label in enumerate(df.axes[0 if axis == \"rows\" else 1])\n        },\n        lambda df, axis: {\n            label if idx % 2 else f\"random_key{idx}\": idx + 1\n            for idx, label in enumerate(df.axes[0 if axis == \"rows\" else 1][::-1])\n        },\n    ],\n    ids=[\n        \"scalar\",\n        \"series_or_list\",\n        \"dictionary_keys_equal_columns\",\n        \"dictionary_keys_unequal_columns\",\n    ],\n)\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\n    \"op\",\n    [\n        *(\"add\", \"radd\", \"sub\", \"rsub\", \"mod\", \"rmod\", \"pow\", \"rpow\"),\n        *(\"truediv\", \"rtruediv\", \"mul\", \"rmul\", \"floordiv\", \"rfloordiv\"),\n    ],\n)\n@pytest.mark.parametrize(\"backend\", [None, \"pyarrow\"])\ndef test_math_functions(other, axis, op, backend, df_mode_pair):\n    data = test_data[\"float_nan_data\"]\n    if (op == \"floordiv\" or op == \"rfloordiv\") and axis == \"rows\":\n        # lambda == \"series_or_list\"\n        pytest.xfail(reason=\"different behavior\")\n\n    if op == \"rmod\" and axis == \"rows\":\n        # lambda == \"series_or_list\"\n        pytest.xfail(reason=\"different behavior\")\n\n    if op in (\"mod\", \"rmod\") and backend == \"pyarrow\":\n        pytest.skip(reason=\"These functions are not implemented in pandas itself\")\n\n    eval_general_interop(\n        data,\n        backend,\n        lambda df1, df2: getattr(df1, op)(other(df2, axis), axis=axis),\n        df_mode_pair,\n    )\n\n\n@pytest.mark.parametrize(\"other\", [lambda df: 2, lambda df: df])\ndef test___divmod__(other, df_mode_pair):\n    data = test_data[\"float_nan_data\"]\n    eval_general_interop(\n        data, None, lambda df1, df2: divmod(df1, other(df2)), df_mode_pair\n    )\n\n\n@pytest.mark.parametrize(\"other\", [\"as_left\", 4])\n@pytest.mark.parametrize(\"op\", [\"eq\", \"ge\", \"gt\", \"le\", \"lt\", \"ne\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_comparison(data, op, other, request, df_mode_pair):\n    def operation(df1, df2):\n        return getattr(df1, op)(df2 if other == \"as_left\" else other)\n\n    expected_exception = None\n    if \"int_data\" in request.node.callspec.id and other == \"a\":\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7019\")\n    elif \"float_nan_data\" in request.node.callspec.id and other == \"a\":\n        expected_exception = TypeError(\n            \"Invalid comparison between dtype=float64 and str\"\n        )\n    eval_general_interop(\n        data,\n        None,\n        operation,\n        df_mode_pair,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\n    \"frame1_data,frame2_data,expected_pandas_equals\",\n    [\n        pytest.param({}, {}, True, id=\"two_empty_dataframes\"),\n        pytest.param([[1]], [[0]], False, id=\"single_unequal_values\"),\n        pytest.param([[None]], [[None]], True, id=\"single_none_values\"),\n        pytest.param(\n            [[1, 2], [3, 4]],\n            [[1, 2], [3, 4]],\n            True,\n            id=\"equal_two_by_two_dataframes\",\n        ),\n        pytest.param(\n            [[1, 2], [3, 4]],\n            [[5, 2], [3, 4]],\n            False,\n            id=\"unequal_two_by_two_dataframes\",\n        ),\n    ],\n)\ndef test_equals(frame1_data, frame2_data, expected_pandas_equals, df_mode_pair):\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        frame1_data, native=df_mode_pair[0]\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        frame2_data, native=df_mode_pair[1]\n    )\n\n    pandas_equals = pandas_df1.equals(pandas_df2)\n    assert pandas_equals == expected_pandas_equals, (\n        \"Test expected pandas to say the dataframes were\"\n        + f\"{'' if expected_pandas_equals else ' not'} equal, but they were\"\n        + f\"{' not' if expected_pandas_equals else ''} equal.\"\n    )\n\n    assert modin_df1.equals(modin_df2) == pandas_equals\n    assert modin_df1.equals(pandas_df2) == pandas_equals\n\n\n@pytest.mark.parametrize(\"empty_operand\", [\"right\", \"left\", \"both\"])\ndef test_empty_df(empty_operand, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        [0, 1, 2, 0, 1, 2], native=df_mode_pair[0]\n    )\n    modin_df_empty, pandas_df_empty = create_test_df_in_defined_mode(\n        native=df_mode_pair[1]\n    )\n\n    if empty_operand == \"right\":\n        modin_res = modin_df + modin_df_empty\n        pandas_res = pandas_df + pandas_df_empty\n    elif empty_operand == \"left\":\n        modin_res = modin_df_empty + modin_df\n        pandas_res = pandas_df_empty + pandas_df\n    else:\n        modin_res = modin_df_empty + modin_df_empty\n        pandas_res = pandas_df_empty + pandas_df_empty\n\n    df_equals(modin_res, pandas_res)\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_compiler_caster.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport contextlib\nimport json\nimport logging\nfrom io import StringIO\nfrom types import MappingProxyType\nfrom typing import Iterator, Optional\nfrom unittest import mock\n\nimport pandas\nimport pytest\nfrom pytest import param\n\nimport modin.pandas as pd\nfrom modin.config import context as config_context\nfrom modin.config.envvars import (\n    Backend,\n    Engine,\n    Execution,\n    NativePandasMaxRows,\n    NativePandasTransferThreshold,\n)\nfrom modin.core.execution.dispatching.factories import factories\nfrom modin.core.execution.dispatching.factories.factories import BaseFactory\nfrom modin.core.io.io import BaseIO\nfrom modin.core.storage_formats.base.query_compiler import QCCoercionCost\nfrom modin.core.storage_formats.base.query_compiler_calculator import (\n    BackendCostCalculator,\n)\nfrom modin.core.storage_formats.pandas.native_query_compiler import NativeQueryCompiler\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    _GENERAL_EXTENSIONS,\n    register_function_for_post_op_switch,\n    register_function_for_pre_op_switch,\n)\nfrom modin.logging import DEFAULT_LOGGER_NAME\nfrom modin.logging.metrics import add_metric_handler, clear_metric_handler\nfrom modin.pandas.api.extensions import register_pd_accessor\nfrom modin.tests.pandas.utils import (\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n)\n\n# Some modin methods warn about defaulting to pandas at the API layer. That's\n# expected and not an error as it would be normally.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\nBIG_DATA_CLOUD_MIN_NUM_ROWS = 10\nSMALL_DATA_NUM_ROWS = 5\n\n\nclass CalculatorTestQc(NativeQueryCompiler):\n    \"\"\"\n    A subclass of NativeQueryCompiler with simpler cost functions.\n\n    We MAY eventually want to stop overriding the superclass's cost functions.\n    \"\"\"\n\n    @classmethod\n    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):\n        if isinstance(other_qc, cls):\n            return QCCoercionCost.COST_ZERO\n        return None\n\n    def stay_cost(self, api_cls_name, operation, arguments):\n        return QCCoercionCost.COST_ZERO\n\n    def move_to_cost(self, other_qc_type, api_cls_name, operation, arguments):\n        if isinstance(self, other_qc_type):\n            return QCCoercionCost.COST_ZERO\n        return None\n\n\nclass CloudQC(CalculatorTestQc):\n    \"Represents a cloud-hosted query compiler\"\n\n    def get_backend(self):\n        return \"Cloud\"\n\n    @classmethod\n    def max_cost(cls):\n        return QCCoercionCost.COST_IMPOSSIBLE\n\n    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):\n        assert op is not None\n        assert api_cls_name in [\n            None,\n            \"_iLocIndexer\",\n            \"_LocationIndexerBase\",\n            \"Series\",\n            \"DataFrame\",\n            \"BasePandasDataset\",\n        ]\n        return {\n            CloudQC: QCCoercionCost.COST_ZERO,\n            CloudQCHighSelf: QCCoercionCost.COST_LOW,\n            ClusterQC: QCCoercionCost.COST_MEDIUM,\n            DefaultQC: QCCoercionCost.COST_MEDIUM,\n            LocalMachineQC: QCCoercionCost.COST_HIGH,\n            PicoQC: QCCoercionCost.COST_IMPOSSIBLE,\n            OmniscientEagerQC: None,\n            OmniscientLazyQC: None,\n        }.get(other_qc_cls)\n\n    def stay_cost(self, api_cls_name, op, arguments):\n        return QCCoercionCost.COST_ZERO\n\n\nclass CloudQCHighSelf(CloudQC):\n    def get_backend(self):\n        return \"Cloud_High_Self\"\n\n    def stay_cost(self, api_cls_name, op, arguments):\n        return QCCoercionCost.COST_HIGH\n\n\nclass ClusterQC(CalculatorTestQc):\n    \"Represents a local network cluster query compiler\"\n\n    def get_backend(self):\n        return \"Cluster\"\n\n    @classmethod\n    def max_cost(cls):\n        return QCCoercionCost.COST_HIGH\n\n    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):\n        return {\n            CloudQC: QCCoercionCost.COST_MEDIUM,\n            CloudQCHighSelf: QCCoercionCost.COST_MEDIUM,\n            ClusterQC: QCCoercionCost.COST_ZERO,\n            DefaultQC: None,  # cluster qc knows nothing about default qc\n            LocalMachineQC: QCCoercionCost.COST_MEDIUM,\n            PicoQC: QCCoercionCost.COST_HIGH,\n        }.get(other_qc_cls)\n\n\nclass LocalMachineQC(CalculatorTestQc):\n    \"Represents a local machine query compiler\"\n\n    def get_backend(self):\n        return \"Local_Machine\"\n\n    @classmethod\n    def max_cost(cls):\n        return QCCoercionCost.COST_MEDIUM\n\n    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):\n        return {\n            CloudQC: QCCoercionCost.COST_MEDIUM,\n            CloudQCHighSelf: QCCoercionCost.COST_MEDIUM,\n            ClusterQC: QCCoercionCost.COST_LOW,\n            LocalMachineQC: QCCoercionCost.COST_ZERO,\n            PicoQC: QCCoercionCost.COST_MEDIUM,\n        }.get(other_qc_cls)\n\n\nclass PicoQC(CalculatorTestQc):\n    \"Represents a query compiler with very few resources\"\n\n    def get_backend(self):\n        return \"Pico\"\n\n    @classmethod\n    def max_cost(cls):\n        return QCCoercionCost.COST_LOW\n\n    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):\n        return {\n            CloudQC: QCCoercionCost.COST_LOW,\n            CloudQCHighSelf: QCCoercionCost.COST_LOW,\n            ClusterQC: QCCoercionCost.COST_LOW,\n            LocalMachineQC: QCCoercionCost.COST_LOW,\n            PicoQC: QCCoercionCost.COST_ZERO,\n        }.get(other_qc_cls)\n\n\nclass AdversarialQC(CalculatorTestQc):\n    \"Represents a query compiler which returns non-sensical costs\"\n\n    def get_backend(self):\n        return \"Adversarial\"\n\n    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):\n        return {\n            CloudQC: -1000,\n            CloudQCHighSelf: -1000,\n            ClusterQC: 10000,\n            AdversarialQC: QCCoercionCost.COST_ZERO,\n        }.get(other_qc_cls)\n\n\nclass OmniscientEagerQC(CalculatorTestQc):\n    \"Represents a query compiler which knows a lot, and wants to steal work\"\n\n    def get_backend(self):\n        return \"Eager\"\n\n    # keep other workloads from getting my workload\n    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):\n        if OmniscientEagerQC is other_qc_cls:\n            return QCCoercionCost.COST_ZERO\n        return QCCoercionCost.COST_IMPOSSIBLE\n\n    # try to force other workloads to my engine\n    @classmethod\n    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):\n        return QCCoercionCost.COST_ZERO\n\n\nclass OmniscientLazyQC(CalculatorTestQc):\n    \"Represents a query compiler which knows a lot, and wants to avoid work\"\n\n    def get_backend(self):\n        return \"Lazy\"\n\n    # encorage other engines to take my workload\n    def move_to_cost(self, other_qc_cls, api_cls_name, op, arguments):\n        return QCCoercionCost.COST_ZERO\n\n    # try to keep other workloads from getting my workload\n    @classmethod\n    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):\n        if isinstance(other_qc, cls):\n            return QCCoercionCost.COST_ZERO\n        return QCCoercionCost.COST_IMPOSSIBLE\n\n\nclass DefaultQC(CalculatorTestQc):\n    \"Represents a query compiler with no costing information\"\n\n    def get_backend(self):\n        return \"Test_Casting_Default\"\n\n\nclass DefaultQC2(CalculatorTestQc):\n    \"Represents a query compiler with no costing information, but different.\"\n\n    def get_backend(self):\n        return \"Test_Casting_Default_2\"\n\n\nclass BaseTestAutoMover(NativeQueryCompiler):\n\n    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE = BIG_DATA_CLOUD_MIN_NUM_ROWS\n\n    def __init__(self, pandas_frame):\n        super().__init__(pandas_frame)\n\n\nclass CloudForBigDataQC(BaseTestAutoMover):\n    \"\"\"Represents a cloud-hosted query compiler that prefers to stay on the cloud only for big data\"\"\"\n\n    # Operations are more costly on this engine, even though it can handle larger datasets\n    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE = BIG_DATA_CLOUD_MIN_NUM_ROWS * 10\n    _OPERATION_INITIALIZATION_OVERHEAD = QCCoercionCost.COST_MEDIUM\n    _OPERATION_PER_ROW_OVERHEAD = 10\n\n    def __init__(self, pandas_frame):\n        super().__init__(pandas_frame)\n\n    def stay_cost(self, api_cls_name, operation, arguments):\n        if operation == \"read_json\":\n            return QCCoercionCost.COST_IMPOSSIBLE\n        return super().stay_cost(api_cls_name, operation, arguments)\n\n    def get_backend(self) -> str:\n        return \"Big_Data_Cloud\"\n\n    @classmethod\n    def max_cost(cls):\n        return QCCoercionCost.COST_IMPOSSIBLE * 10\n\n    @classmethod\n    def move_to_me_cost(cls, other_qc, api_cls_name, operation, arguments):\n        if api_cls_name in (\"DataFrame\", \"Series\") and operation == \"__init__\":\n            if (query_compiler := arguments.get(\"query_compiler\")) is not None:\n                # When we create a dataframe or series with a query compiler\n                # input, we should not switch the resulting dataframe or series\n                # to a different backend.\n                return (\n                    QCCoercionCost.COST_ZERO\n                    if isinstance(query_compiler, cls)\n                    else QCCoercionCost.COST_IMPOSSIBLE\n                )\n            else:\n                # Moving the in-memory __init__ inputs to the cloud is expensive.\n                return QCCoercionCost.COST_HIGH\n        return super().move_to_me_cost(other_qc, api_cls_name, operation, arguments)\n\n\nclass LocalForSmallDataQC(BaseTestAutoMover):\n    \"\"\"Represents a local query compiler that prefers small data.\"\"\"\n\n    # Operations are cheap on this engine for small data, but there is an upper bound\n    _MAX_SIZE_THIS_ENGINE_CAN_HANDLE = BIG_DATA_CLOUD_MIN_NUM_ROWS\n    _OPERATION_PER_ROW_OVERHEAD = 1\n\n    def __init__(self, pandas_frame):\n        super().__init__(pandas_frame)\n\n    def get_backend(self) -> str:\n        return \"Small_Data_Local\"\n\n    @classmethod\n    def max_cost(cls):\n        return QCCoercionCost.COST_IMPOSSIBLE * 10\n\n\ndef register_backend(name, qc):\n    class TestCasterIO(BaseIO):\n        _should_warn_on_default_to_pandas: bool = False\n        query_compiler_cls = qc\n\n    class TestCasterFactory(BaseFactory):\n        @classmethod\n        def prepare(cls):\n            cls.io_cls = TestCasterIO\n\n    TestCasterFactory.prepare()\n\n    factory_name = f\"{name}OnNativeFactory\"\n    setattr(factories, factory_name, TestCasterFactory)\n    Engine.add_option(name)\n    Backend.register_backend(name, Execution(name, \"Native\"))\n\n\nALL_BACKENDS = {\n    \"Pico\": PicoQC,\n    \"Cluster\": ClusterQC,\n    \"Cloud\": CloudQC,\n    \"Cloud_High_Self\": CloudQCHighSelf,\n    \"Local_Machine\": LocalMachineQC,\n    \"Adversarial\": AdversarialQC,\n    \"Eager\": OmniscientEagerQC,\n    \"Lazy\": OmniscientLazyQC,\n    \"Test_Casting_Default\": DefaultQC,\n    \"Test_Casting_Default_2\": DefaultQC2,\n    \"Big_Data_Cloud\": CloudForBigDataQC,\n    \"Small_Data_Local\": LocalForSmallDataQC,\n}\n\nfor backend, qc in ALL_BACKENDS.items():\n    register_backend(backend, qc)\n\nDEFAULT_TEST_BACKENDS = (\n    \"Pico\",\n    \"Cluster\",\n    \"Cloud\",\n    \"Cloud_High_Self\",\n    \"Local_Machine\",\n    \"Lazy\",\n)\n\n\n@pytest.fixture(autouse=True)\ndef turn_on_auto_switch_backend():\n    with config_context(AutoSwitchBackend=True):\n        yield\n\n\n@contextlib.contextmanager\ndef backend_test_context(\n    *, test_backend: Optional[str] = None, choices: Optional[tuple] = None\n) -> Iterator[None]:\n    if choices is None:\n        # Consider only a select set custom-defined test backends by default for easier testing.\n        # This is necessary because n-ary operations consider _all_ possible active backends, so\n        # we may observe unexpected behavior if too many backends are activated at once.\n        # If a QC is explicitly created for an inactive backend, the QC calculator should still\n        # be able to accept it.\n        choices = DEFAULT_TEST_BACKENDS\n    if test_backend is None:\n        test_backend = choices[0]\n    old_default_backend = Backend.get()\n    old_backend_choices = Backend.get_active_backends()\n    try:\n        Backend.set_active_backends(choices)\n        Backend.put(test_backend)\n        yield\n    finally:\n        Backend.set_active_backends(old_backend_choices)\n        Backend.put(old_default_backend)\n\n\n@pytest.fixture()\ndef cloud_df():\n    return pd.DataFrame(query_compiler=CloudQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef cloud_high_self_df():\n    return pd.DataFrame(query_compiler=CloudQCHighSelf(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef cluster_df():\n    return pd.DataFrame(query_compiler=ClusterQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef local_df():\n    return pd.DataFrame(query_compiler=LocalMachineQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef pico_df():\n    return pd.DataFrame(query_compiler=PicoQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef adversarial_df():\n    return pd.DataFrame(query_compiler=AdversarialQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef eager_df():\n    return pd.DataFrame(query_compiler=OmniscientEagerQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef lazy_df():\n    return pd.DataFrame(query_compiler=OmniscientLazyQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef default_df():\n    return pd.DataFrame(query_compiler=DefaultQC(pandas.DataFrame([0, 1, 2])))\n\n\n@pytest.fixture()\ndef default2_df():\n    return pd.DataFrame(query_compiler=DefaultQC2(pandas.DataFrame([0, 1, 2])))\n\n\ndef test_two_same_backend(pico_df):\n    df3 = pd.concat([pico_df, pico_df], axis=1)\n    assert pico_df.get_backend() == \"Pico\"\n    assert df3.get_backend() == \"Pico\"\n\n\ndef test_cast_to_second_backend_with_concat(pico_df, cluster_df, caplog):\n    with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):\n        # We have to copy the input dataframes because of inplace merging\n        df3 = pd.concat([pico_df.copy(), cluster_df.copy()], axis=1)\n    assert pico_df.get_backend() == \"Pico\"\n    assert cluster_df.get_backend() == \"Cluster\"\n    assert df3.get_backend() == \"Cluster\"  # result should be on cluster\n\n    log_records = caplog.records\n    assert len(log_records) == 1\n    assert log_records[0].name == DEFAULT_LOGGER_NAME\n    assert log_records[0].levelno == logging.INFO\n    assert log_records[0].message.startswith(\n        \"BackendCostCalculator results for pd.concat: \"\n    )\n\n\ndef test_cast_to_second_backend_with_concat_uses_second_backend_api_override(\n    pico_df, cluster_df\n):\n    register_pd_accessor(name=\"concat\", backend=\"Cluster\")(\n        lambda *args, **kwargs: \"custom_concat_result\"\n    )\n    # copy dataframes for concat to allow for in-place merging\n    assert (\n        pd.concat([pico_df.copy(), cluster_df.copy()], axis=1) == \"custom_concat_result\"\n    )\n    assert pico_df.get_backend() == \"Pico\"\n    assert cluster_df.get_backend() == \"Cluster\"\n\n\ndef test_moving_pico_to_cluster_in_place_calls_set_backend_only_once_github_issue_7490(\n    pico_df, cluster_df\n):\n    with mock.patch.object(\n        pd.DataFrame, \"set_backend\", wraps=pico_df.set_backend\n    ) as mock_set_backend:\n        pico_df.set_backend(cluster_df.get_backend(), inplace=True)\n    assert pico_df.get_backend() == \"Cluster\"\n    mock_set_backend.assert_called_once_with(\"Cluster\", inplace=True)\n\n\ndef test_cast_to_second_backend_with___init__(pico_df, cluster_df):\n    df3 = pd.DataFrame({\"pico\": pico_df.iloc[:, 0], \"cluster\": cluster_df.iloc[:, 0]})\n    assert (\n        pico_df.get_backend() == \"Pico\"\n    )  # pico stays despite in-place casting by iloc\n    assert cluster_df.get_backend() == \"Cluster\"\n    assert df3.get_backend() == \"Cluster\"  # result should be on cluster\n\n\ndef test_cast_to_first_backend(pico_df, cluster_df):\n    df3 = pd.concat([cluster_df, pico_df], axis=1)\n    assert pico_df.get_backend() == \"Cluster\"  # pico_df was cast in place by concat\n    assert cluster_df.get_backend() == \"Cluster\"\n    assert df3.get_backend() == cluster_df.get_backend()  # result should be on cluster\n\n\ndef test_cast_to_first_backend_with_concat_uses_first_backend_api_override(\n    pico_df, cluster_df\n):\n    register_pd_accessor(name=\"concat\", backend=\"Cluster\")(\n        lambda *args, **kwargs: \"custom_concat_result\"\n    )\n    assert pd.concat([cluster_df, pico_df], axis=1) == \"custom_concat_result\"\n    assert pico_df.get_backend() == \"Cluster\"  # pico was cast in place by concat\n    assert cluster_df.get_backend() == \"Cluster\"\n\n\ndef test_cast_to_first_backend_with___init__(pico_df, cluster_df):\n    df3 = pd.DataFrame(\n        {\n            \"cluster\": cluster_df.iloc[:, 0],\n            \"pico\": pico_df.iloc[:, 0],\n        }\n    )\n    assert pico_df.get_backend() == \"Pico\"  # Pico not cast in place by iloc\n    assert cluster_df.get_backend() == \"Cluster\"\n    assert df3.get_backend() == \"Cluster\"  # result should be on cluster\n\n\ndef test_self_cost_causes_move(cloud_high_self_df, cluster_df):\n    \"\"\"\n    Test that ``self_cost`` is being properly considered.\n\n    Cost to stay on cloud_high_self is HIGH, but moving to cluster is MEDIUM.\n    Cost to stay on cluster is ZERO, and moving to cloud_high_self is MEDIUM.\n\n    With two dataframes, one on each backend, the total cost of using\n    ``cloud_high_self`` as the final backend is:\n    ``stay_cost(cloud_high_self) + move_cost(cluster->cloud_high_self)``\n    which is ``HIGH + MEDIUM``.\n    The total cost of using ``cluster`` as the final backend is:\n    ``stay_cost(cluster) + move_cost(cloud_high_self->cluster)``\n    which is ``ZERO + MEDIUM``.\n\n    So we should select ``cluster``.\n    \"\"\"\n    result = pd.concat([cloud_high_self_df, cluster_df])\n    assert result.get_backend() == \"Cluster\"\n\n    result = pd.concat([cluster_df, cloud_high_self_df])\n    assert result.get_backend() == \"Cluster\"\n\n\n@pytest.mark.parametrize(\n    \"df1, df2, df3, df4, expected_result_backend\",\n    [\n        # no-op\n        (\"cloud_df\", \"cloud_df\", \"cloud_df\", \"cloud_df\", \"Cloud\"),\n        # moving all dfs to cloud is 1250, moving to cluster is 1000\n        # regardless of how they are ordered\n        (\"pico_df\", \"local_df\", \"cluster_df\", \"cloud_df\", \"Cluster\"),\n        (\"cloud_df\", \"local_df\", \"cluster_df\", \"pico_df\", \"Cluster\"),\n        (\"cloud_df\", \"cluster_df\", \"local_df\", \"pico_df\", \"Cluster\"),\n        (\"cloud_df\", \"cloud_df\", \"local_df\", \"pico_df\", \"Cloud\"),\n        # Still move everything to cloud\n        (\"pico_df\", \"pico_df\", \"pico_df\", \"cloud_df\", \"Cloud\"),\n        (\"pico_df\", \"pico_df\", \"local_df\", \"cloud_df\", \"Cloud\"),\n    ],\n)\ndef test_mixed_dfs(df1, df2, df3, df4, expected_result_backend, request):\n    df1 = request.getfixturevalue(df1)\n    df2 = request.getfixturevalue(df2)\n    df3 = request.getfixturevalue(df3)\n    df4 = request.getfixturevalue(df4)\n    if expected_result_backend is None:\n        with pytest.raises(ValueError):\n            pd.concat(axis=1, objs=[df1, df2, df3, df4])\n    else:\n        result = pd.concat(axis=1, objs=[df1, df2, df3, df4])\n        assert result.get_backend() == expected_result_backend\n\n\ndef test_adversarial_high(adversarial_df, cluster_df):\n    with pytest.raises(ValueError):\n        pd.concat([adversarial_df, cluster_df], axis=1)\n\n\ndef test_adversarial_low(adversarial_df, cloud_df):\n    with pytest.raises(ValueError):\n        pd.concat([adversarial_df, cloud_df], axis=1)\n\n\ndef test_two_two_qc_types_default_rhs(default_df, cluster_df):\n    # none of the query compilers know about each other here\n    # so we default to the caller\n    df3 = pd.concat([default_df, cluster_df], axis=1)\n    assert default_df.get_backend() == \"Test_Casting_Default\"\n    assert (\n        cluster_df.get_backend() == \"Test_Casting_Default\"\n    )  # in place cast to default by concat\n    assert df3.get_backend() == default_df.get_backend()  # should move to default\n\n\ndef test_two_two_qc_types_default_lhs(default_df, cluster_df):\n    # none of the query compilers know about each other here\n    # so we default to the caller\n    df3 = pd.concat([cluster_df, default_df], axis=1)\n    assert default_df.get_backend() == \"Cluster\"  # in place cast to Cluster by concat\n    assert cluster_df.get_backend() == \"Cluster\"\n    assert df3.get_backend() == cluster_df.get_backend()  # should move to cluster\n\n\ndef test_two_two_qc_types_default_2_rhs(default_df, cloud_df):\n    # cloud knows a bit about costing; so we prefer moving to there\n    df3 = pd.concat([default_df, cloud_df], axis=1)\n    assert default_df.get_backend() == \"Cloud\"  # inplace cast to Cloud by concat\n    assert cloud_df.get_backend() == \"Cloud\"\n    assert df3.get_backend() == cloud_df.get_backend()  # should move to cloud\n\n\ndef test_two_two_qc_types_default_2_lhs(default_df, cloud_df):\n    # cloud knows a bit about costing; so we prefer moving to there\n    df3 = pd.concat([cloud_df, default_df], axis=1)\n    assert default_df.get_backend() == \"Cloud\"  # inplace cast to Cloud by concat\n    assert cloud_df.get_backend() == \"Cloud\"\n    assert df3.get_backend() == cloud_df.get_backend()  # should move to cloud\n\n\ndef test_default_to_caller(default_df, default2_df):\n    # No qc knows anything; default to caller\n\n    df3 = pd.concat([default_df, default2_df], axis=1)\n    assert df3.get_backend() == default_df.get_backend()  # should stay on caller\n\n    df3 = pd.concat([default2_df, default_df], axis=1)\n    assert df3.get_backend() == default2_df.get_backend()  # should stay on caller\n\n    df3 = pd.concat([default_df, default_df], axis=1)\n    assert df3.get_backend() == default_df.get_backend()  # no change\n\n\ndef test_no_qc_to_calculate():\n    calculator = BackendCostCalculator(\n        operation_arguments=MappingProxyType({}),\n        api_cls_name=None,\n        operation=\"operation0\",\n        query_compilers=[],\n        preop_switch=False,\n    )\n    with pytest.raises(ValueError):\n        calculator.calculate()\n\n\ndef test_qc_default_self_cost(default_df, default2_df):\n    assert (\n        default_df._query_compiler.move_to_cost(\n            other_qc_type=type(default2_df._query_compiler),\n            api_cls_name=None,\n            operation=\"operation0\",\n            arguments=MappingProxyType({}),\n        )\n        is None\n    )\n    assert (\n        default_df._query_compiler.move_to_cost(\n            other_qc_type=type(default_df._query_compiler),\n            api_cls_name=None,\n            operation=\"operation0\",\n            arguments=MappingProxyType({}),\n        )\n        is QCCoercionCost.COST_ZERO\n    )\n\n\ndef test_qc_casting_changed_operation(pico_df, cloud_df):\n    pico_df1 = pico_df\n    cloud_df1 = cloud_df\n    native_cdf2 = cloud_df1._to_pandas()\n    native_pdf2 = pico_df1._to_pandas()\n    expected = native_cdf2 + native_pdf2\n    # test both directions\n    df_cast_to_rhs = pico_df1 + cloud_df1\n    df_cast_to_lhs = cloud_df1 + pico_df1\n    assert df_cast_to_rhs._to_pandas().equals(expected)\n    assert df_cast_to_lhs._to_pandas().equals(expected)\n\n\ndef test_qc_mixed_loc(pico_df, cloud_df):\n    pico_df1 = pico_df\n    cloud_df1 = cloud_df\n    assert pico_df1[pico_df1[0][0]][cloud_df1[0][1]] == 1\n    assert pico_df1[cloud_df1[0][0]][pico_df1[0][1]] == 1\n    assert cloud_df1[pico_df1[0][0]][pico_df1[0][1]] == 1\n\n\ndef test_merge_in_place(default_df, lazy_df, cloud_df):\n    # lazy_df tries to pawn off work on other engines\n    df = default_df.merge(lazy_df)\n    assert df.get_backend() is default_df.get_backend()\n    # Both arguments now have the same qc type\n    assert lazy_df.get_backend() is default_df.get_backend()\n\n    with config_context(BackendMergeCastInPlace=False):\n        lazy_df = lazy_df.move_to(\"Lazy\")\n        cloud_df = cloud_df.move_to(\"Cloud\")\n        df = cloud_df.merge(lazy_df)\n        assert df.get_backend() == cloud_df.get_backend()\n        assert lazy_df.get_backend() == \"Lazy\"\n        assert cloud_df.get_backend() == \"Cloud\"\n\n\ndef test_information_asymmetry(default_df, cloud_df, eager_df, lazy_df):\n    # normally, the default query compiler should be chosen\n    # here, but since eager knows about default, but not\n    # the other way around, eager has a special ability to\n    # control the directionality of the cast.\n    df = default_df.merge(eager_df)\n    assert df.get_backend() == eager_df.get_backend()\n    df = cloud_df.merge(eager_df)\n    assert df.get_backend() == eager_df.get_backend()\n\n    # lazy_df tries to pawn off work on other engines\n    df = default_df.merge(lazy_df)\n    assert df.get_backend() == default_df.get_backend()\n    df = cloud_df.merge(lazy_df)\n    assert df.get_backend() == cloud_df.get_backend()\n\n\ndef test_setitem_in_place_with_self_switching_backend(cloud_df, local_df):\n    local_df.iloc[1, 0] = cloud_df.iloc[1, 0] + local_df.iloc[1, 0]\n    # compute happens in cloud, but we have to make sure that we propagate the\n    # in-place update to the local_df\n    df_equals(\n        local_df,\n        pandas.DataFrame(\n            [\n                0,\n                2,\n                2,\n            ]\n        ),\n    )\n    assert local_df.get_backend() == \"Local_Machine\"\n    assert cloud_df.get_backend() == \"Cloud\"\n\n\n@pytest.mark.parametrize(\"pin_local\", [True, False], ids=[\"pinned\", \"unpinned\"])\ndef test_switch_local_to_cloud_with_iloc___setitem__(local_df, cloud_df, pin_local):\n    if pin_local:\n        local_df = local_df.pin_backend()\n    local_df.iloc[:, 0] = cloud_df.iloc[:, 0] + 1\n    expected_pandas = local_df._to_pandas()\n    expected_pandas.iloc[:, 0] = cloud_df._to_pandas().iloc[:, 0] + 1\n    df_equals(local_df, expected_pandas)\n    assert local_df.get_backend() == \"Local_Machine\" if pin_local else \"Cloud\"\n\n\n# This test should force the creation of a dataframe which\n# is too large for the backend and verify that it stays there\n# because there are no other options\ndef test_single_backend_merge_no_good_options():\n    with backend_test_context(\n        test_backend=\"Small_Data_Local\",\n        choices=[\"Small_Data_Local\"],\n    ):\n        df1 = pd.DataFrame({\"a\": [1] * 100})\n        df1[\"two\"] = pd.to_datetime(df1[\"a\"])\n        assert df1.get_backend() == \"Small_Data_Local\"\n\n\ndef test_stay_or_move_evaluation(cloud_high_self_df, default_df):\n    default_cls = type(default_df._get_query_compiler())\n    cloud_cls = type(cloud_high_self_df._get_query_compiler())\n    empty_arguments = MappingProxyType({})\n\n    stay_cost = cloud_high_self_df._get_query_compiler().stay_cost(\n        \"Series\", \"myop\", arguments=empty_arguments\n    )\n    move_cost = cloud_high_self_df._get_query_compiler().move_to_cost(\n        default_cls, \"Series\", \"myop\", arguments=empty_arguments\n    )\n    if stay_cost > move_cost:\n        df = cloud_high_self_df.move_to(\"Test_Casting_Default\")\n    else:\n        assert False\n\n    stay_cost = df._get_query_compiler().stay_cost(\n        \"Series\", \"myop\", arguments=empty_arguments\n    )\n    move_cost = df._get_query_compiler().move_to_cost(\n        cloud_cls, \"Series\", \"myop\", arguments=empty_arguments\n    )\n    assert stay_cost is not None\n    assert move_cost is None\n\n\ndef test_max_shape(cloud_df):\n    # default implementation matches df.shape\n    assert cloud_df.shape == cloud_df._query_compiler._max_shape()\n\n\nclass TestSwitchBackendPostOpDependingOnDataSize:\n    def test_read_json(self):\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            big_json = json.dumps({\"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS))})\n            small_json = json.dumps(\n                {\"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}\n            )\n            assert pd.read_json(StringIO(big_json)).get_backend() == \"Big_Data_Cloud\"\n            assert pd.read_json(StringIO(small_json)).get_backend() == \"Big_Data_Cloud\"\n            register_function_for_post_op_switch(\n                class_name=None, backend=\"Big_Data_Cloud\", method=\"read_json\"\n            )\n            assert pd.read_json(StringIO(big_json)).get_backend() == \"Big_Data_Cloud\"\n            assert (\n                pd.read_json(StringIO(small_json)).get_backend() == \"Small_Data_Local\"\n            )\n\n    @backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    )\n    def test_read_json_logging_for_post_op_switch(self, caplog):\n        register_function_for_post_op_switch(\n            class_name=None, backend=\"Big_Data_Cloud\", method=\"read_json\"\n        )\n        with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):\n            assert (\n                pd.read_json(\n                    StringIO(\n                        json.dumps(\n                            {\"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}\n                        )\n                    )\n                ).get_backend()\n                == \"Small_Data_Local\"\n            )\n        log_records = caplog.records\n        assert len(log_records) == 2\n\n        assert log_records[0].name == DEFAULT_LOGGER_NAME\n        assert log_records[0].levelno == logging.INFO\n        assert log_records[0].message.startswith(\n            \"After modin.pandas function read_json, considered moving to backend Small_Data_Local with\"\n        )\n\n        assert log_records[1].name == DEFAULT_LOGGER_NAME\n        assert log_records[1].levelno == logging.INFO\n        assert log_records[1].message.startswith(\n            \"Chose to move to backend Small_Data_Local\"\n        )\n\n    @backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    )\n    def test_read_json_logging_for_post_op_not_switch(self, caplog):\n        register_function_for_post_op_switch(\n            class_name=None, backend=\"Big_Data_Cloud\", method=\"read_json\"\n        )\n        with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):\n            assert (\n                pd.read_json(\n                    StringIO(\n                        json.dumps({\"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS))})\n                    )\n                ).get_backend()\n                == \"Big_Data_Cloud\"\n            )\n        log_records = caplog.records\n        assert len(log_records) == 2\n\n        assert log_records[0].name == DEFAULT_LOGGER_NAME\n        assert log_records[0].levelno == logging.INFO\n        assert log_records[0].message.startswith(\n            \"After modin.pandas function read_json, considered moving to backend Small_Data_Local with\"\n        )\n\n        assert log_records[1].name == DEFAULT_LOGGER_NAME\n        assert log_records[1].levelno == logging.INFO\n        assert log_records[1].message.startswith(\n            \"Chose not to switch backends after operation read_json\"\n        )\n\n    @backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    )\n    def test_progress_bar_shows_modin_pandas_for_general_functions(self):\n        \"\"\"Test that progress bar messages show 'modin.pandas.read_json' instead of 'None.read_json' for general functions.\"\"\"\n        with mock.patch(\"tqdm.auto.trange\") as mock_trange:\n            mock_trange.return_value = range(2)\n\n            # Register a post-op switch for read_json (general function with class_name=None)\n            register_function_for_post_op_switch(\n                class_name=None, backend=\"Big_Data_Cloud\", method=\"read_json\"\n            )\n\n            # Create a small dataset that will trigger backend switch and show progress bar\n            json_input = json.dumps(\n                {\"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}\n            )\n\n            # This should trigger a backend switch and show progress bar\n            result_df = pd.read_json(StringIO(json_input))\n            assert result_df.get_backend() == \"Small_Data_Local\"\n\n            # Verify that trange was called with correct progress bar message\n            mock_trange.assert_called_once()\n            call_args = mock_trange.call_args\n            desc = call_args[1][\"desc\"]  # Get the 'desc' keyword argument\n\n            assert desc.startswith(\n                \"Transfer: Big_Dat... → Small_D...  |    read_json    ≃ (9, 1)    \"\n            )\n\n    def test_agg(self):\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            df = pd.DataFrame([[1, 2], [3, 4]])\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            assert df.sum().get_backend() == \"Big_Data_Cloud\"\n            register_function_for_post_op_switch(\n                class_name=\"DataFrame\", backend=\"Big_Data_Cloud\", method=\"sum\"\n            )\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            assert df.sum().get_backend() == \"Small_Data_Local\"\n\n    def test_agg_pinned(self):\n        # The operation in test_agg would naturally cause an automatic switch, but the\n        # absence of AutoSwitchBackend or the presence of a pin on the frame prevent this\n        # switch from happening.\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            register_function_for_post_op_switch(\n                class_name=\"DataFrame\", backend=\"Big_Data_Cloud\", method=\"sum\"\n            )\n            # No pin or config, should switch\n            df = pd.DataFrame([[1, 2], [3, 4]])\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            assert df.sum().get_backend() == \"Small_Data_Local\"\n            # config set to false, should not switch\n            with config_context(AutoSwitchBackend=False):\n                df = pd.DataFrame([[1, 2], [3, 4]])\n                assert df.get_backend() == \"Big_Data_Cloud\"\n                assert df.sum().get_backend() == \"Big_Data_Cloud\"\n            # no config, but data is pinned\n            df = pd.DataFrame([[1, 2], [3, 4]]).pin_backend()\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            assert df.sum().get_backend() == \"Big_Data_Cloud\"\n            # a frame-level pin remains valid across a transformation\n            df_copy = df + 1\n            assert df_copy.get_backend() == \"Big_Data_Cloud\"\n            assert df_copy.sum().get_backend() == \"Big_Data_Cloud\"\n            # unpinning df allows a switch again\n            df = df.unpin_backend()\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            assert df.sum().get_backend() == \"Small_Data_Local\"\n            df_copy = df + 1\n            assert df_copy.get_backend() == \"Big_Data_Cloud\"\n            assert df_copy.sum().get_backend() == \"Small_Data_Local\"\n            # check in-place pin/unpin operations\n            df.pin_backend(inplace=True)\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            assert df.sum().get_backend() == \"Big_Data_Cloud\"\n            df.unpin_backend(inplace=True)\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            assert df.sum().get_backend() == \"Small_Data_Local\"\n\n    @pytest.mark.parametrize(\n        \"num_groups, expected_backend\",\n        [\n            (BIG_DATA_CLOUD_MIN_NUM_ROWS - 1, \"Small_Data_Local\"),\n            (BIG_DATA_CLOUD_MIN_NUM_ROWS, \"Big_Data_Cloud\"),\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"groupby_class,operation\",\n        [\n            param(\n                \"DataFrameGroupBy\",\n                lambda df: df.groupby(\"col0\").sum(),\n                id=\"DataFrameGroupBy\",\n            ),\n            param(\n                \"SeriesGroupBy\",\n                lambda df: df.groupby(\"col0\")[\"col1\"].sum(),\n                id=\"SeriesGroupBy\",\n            ),\n        ],\n    )\n    def test_dataframe_groupby_agg_switches_for_small_result(\n        self, num_groups, expected_backend, operation, groupby_class\n    ):\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            modin_df, pandas_df = create_test_dfs(\n                {\n                    \"col0\": list(range(num_groups)),\n                    \"col1\": list(range(1, num_groups + 1)),\n                }\n            )\n\n            assert modin_df.get_backend() == \"Big_Data_Cloud\"\n            assert operation(modin_df).get_backend() == \"Big_Data_Cloud\"\n\n            register_function_for_post_op_switch(\n                class_name=groupby_class, backend=\"Big_Data_Cloud\", method=\"sum\"\n            )\n\n            assert modin_df.get_backend() == \"Big_Data_Cloud\"\n            modin_result = operation(modin_df)\n            pandas_result = operation(pandas_df)\n            df_equals(modin_result, pandas_result)\n            assert modin_result.get_backend() == expected_backend\n            assert modin_df.get_backend() == \"Big_Data_Cloud\"\n\n    @pytest.mark.parametrize(\n        \"groupby_class,operation\",\n        [\n            param(\n                \"DataFrameGroupBy\",\n                lambda groupby: groupby.sum(),\n                id=\"DataFrameGroupBy\",\n            ),\n            param(\n                \"SeriesGroupBy\",\n                lambda groupby: groupby[\"col1\"].sum(),\n                id=\"SeriesGroupBy\",\n            ),\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"auto_switch_backend\",\n        [True, False],\n        ids=lambda param: f\"auto_switch_backend_{param}\",\n    )\n    def test_auto_switch_config_can_disable_groupby_agg_auto_switch(\n        self,\n        operation,\n        groupby_class,\n        auto_switch_backend,\n    ):\n        num_groups = BIG_DATA_CLOUD_MIN_NUM_ROWS - 1\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ), config_context(AutoSwitchBackend=auto_switch_backend):\n            modin_groupby, pandas_groupby = (\n                df.groupby(\"col0\")\n                for df in create_test_dfs(\n                    {\n                        \"col0\": list(range(num_groups)),\n                        \"col1\": list(range(1, num_groups + 1)),\n                    }\n                )\n            )\n\n            assert modin_groupby.get_backend() == \"Big_Data_Cloud\"\n            assert operation(modin_groupby).get_backend() == \"Big_Data_Cloud\"\n\n            register_function_for_post_op_switch(\n                class_name=groupby_class, backend=\"Big_Data_Cloud\", method=\"sum\"\n            )\n\n            assert modin_groupby.get_backend() == \"Big_Data_Cloud\"\n            modin_result = operation(modin_groupby)\n            pandas_result = operation(pandas_groupby)\n            df_equals(modin_result, pandas_result)\n            assert modin_result.get_backend() == (\n                \"Small_Data_Local\" if auto_switch_backend else \"Big_Data_Cloud\"\n            )\n            assert modin_groupby.get_backend() == \"Big_Data_Cloud\"\n\n    @pytest.mark.parametrize(\n        \"groupby_class,groupby_operation,agg_operation\",\n        [\n            param(\n                \"DataFrameGroupBy\",\n                lambda df: df.groupby(\"col0\"),\n                lambda groupby: groupby.sum(),\n                id=\"DataFrameGroupBy\",\n            ),\n            param(\n                \"SeriesGroupBy\",\n                lambda df: df.groupby(\"col0\")[\"col1\"],\n                lambda groupby: groupby.sum(),\n                id=\"SeriesGroupBy\",\n            ),\n        ],\n    )\n    @backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    )\n    def test_pinned_dataframe_prevents_groupby_backend_switch(\n        self, groupby_class, groupby_operation, agg_operation\n    ):\n        \"\"\"Test that pinning a DataFrame prevents groupby operations from switching backends.\"\"\"\n        modin_df, pandas_df = create_test_dfs(\n            {\n                \"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),\n                \"col1\": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),\n            }\n        )\n\n        assert modin_df.get_backend() == \"Big_Data_Cloud\"\n\n        # Pin the DataFrame\n        modin_df.pin_backend(inplace=True)\n        assert modin_df.is_backend_pinned()\n\n        # Create groupby object - should inherit pin status from dataframe\n        modin_groupby = groupby_operation(modin_df)\n        pandas_groupby = groupby_operation(pandas_df)\n        assert modin_groupby.is_backend_pinned()  # Inherited from DataFrame\n\n        # Register a post-op switch that would normally move to Small_Data_Local\n        register_function_for_post_op_switch(\n            class_name=groupby_class, backend=\"Big_Data_Cloud\", method=\"sum\"\n        )\n\n        # The operation should stay on Big_Data_Cloud due to inherited pinning\n        modin_result = agg_operation(modin_groupby)\n        pandas_result = agg_operation(pandas_groupby)\n        df_equals(modin_result, pandas_result)\n        assert modin_result.get_backend() == \"Big_Data_Cloud\"\n\n    @pytest.mark.parametrize(\n        \"groupby_class,groupby_operation,agg_operation\",\n        [\n            param(\n                \"DataFrameGroupBy\",\n                lambda df: df.groupby(\"col0\"),\n                lambda groupby: groupby.sum(),\n                id=\"DataFrameGroupBy\",\n            ),\n            param(\n                \"SeriesGroupBy\",\n                lambda df: df.groupby(\"col0\")[\"col1\"],\n                lambda groupby: groupby.sum(),\n                id=\"SeriesGroupBy\",\n            ),\n        ],\n    )\n    @pytest.mark.parametrize(\"inplace\", [True, False], ids=[\"inplace\", \"not_inplace\"])\n    @backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    )\n    def test_pinned_groupby_prevents_backend_switch(\n        self, groupby_class, groupby_operation, agg_operation, inplace\n    ):\n        \"\"\"Test that pinning a GroupBy object prevents operations from switching backends.\"\"\"\n        modin_df, pandas_df = create_test_dfs(\n            {\n                \"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),\n                \"col1\": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),\n            }\n        )\n\n        assert modin_df.get_backend() == \"Big_Data_Cloud\"\n\n        # Create groupby object and pin it\n        modin_groupby = groupby_operation(modin_df)\n        pandas_groupby = groupby_operation(pandas_df)\n\n        if inplace:\n            modin_groupby.pin_backend(inplace=True)\n            assert modin_groupby.is_backend_pinned()\n        else:\n            pinned_groupby = modin_groupby.pin_backend(inplace=False)\n            assert not modin_groupby.is_backend_pinned()\n            assert pinned_groupby.is_backend_pinned()\n            modin_groupby = pinned_groupby\n\n        # Register a post-op switch that would normally move to Small_Data_Local\n        register_function_for_post_op_switch(\n            class_name=groupby_class, backend=\"Big_Data_Cloud\", method=\"sum\"\n        )\n\n        # The operation should stay on Big_Data_Cloud due to pinning\n        modin_result = agg_operation(modin_groupby)\n        pandas_result = agg_operation(pandas_groupby)\n        df_equals(modin_result, pandas_result)\n        assert modin_result.get_backend() == \"Big_Data_Cloud\"\n\n\nclass TestSwitchBackendPreOp:\n    @pytest.mark.parametrize(\n        \"data_size, expected_backend\",\n        [\n            param(\n                BIG_DATA_CLOUD_MIN_NUM_ROWS - 1,\n                \"Small_Data_Local\",\n                id=\"small_data_should_move_to_small_engine\",\n            ),\n            param(\n                BIG_DATA_CLOUD_MIN_NUM_ROWS,\n                \"Big_Data_Cloud\",\n                id=\"big_data_should_stay_in_cloud\",\n            ),\n        ],\n    )\n    def test_describe_switches_depending_on_data_size(\n        self, data_size, expected_backend\n    ):\n        # Mock the default describe() implementation so that we can check that we\n        # are calling it with the correct backend as an input. We can't just inspect\n        # the mock's call_args_list because call_args_list keeps a reference to the\n        # input dataframe, whose backend may change in place.\n        mock_describe = mock.Mock(\n            wraps=pd.DataFrame._extensions[None][\"describe\"],\n            side_effect=(\n                # 1) Record the input backend\n                lambda self, *args, **kwargs: setattr(\n                    mock_describe, \"_last_input_backend\", self.get_backend()\n                )\n                # 2) Return mock.DEFAULT so that we fall back to the original\n                #    describe() implementation\n                or mock.DEFAULT\n            ),\n        )\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            df = pd.DataFrame(list(range(data_size)))\n            with mock.patch.dict(\n                pd.DataFrame._extensions[None], {\"describe\": mock_describe}\n            ):\n                # Before we register the post-op switch, the describe() method\n                # should not trigger auto-switch.\n                assert df.get_backend() == \"Big_Data_Cloud\"\n                describe_result = df.describe()\n                df_equals(describe_result, df._to_pandas().describe())\n                assert describe_result.get_backend() == \"Big_Data_Cloud\"\n                assert df.get_backend() == \"Big_Data_Cloud\"\n                mock_describe.assert_called_once()\n                assert mock_describe._last_input_backend == \"Big_Data_Cloud\"\n\n                mock_describe.reset_mock()\n\n                register_function_for_pre_op_switch(\n                    class_name=\"DataFrame\", backend=\"Big_Data_Cloud\", method=\"describe\"\n                )\n\n                # Now that we've registered the pre-op switch, the describe() call\n                # should trigger auto-switch.\n                assert df.get_backend() == \"Big_Data_Cloud\"\n                describe_result = df.describe()\n                df_equals(describe_result, df._to_pandas().describe())\n                assert describe_result.get_backend() == expected_backend\n                assert df.get_backend() == expected_backend\n                mock_describe.assert_called_once()\n                assert mock_describe._last_input_backend == expected_backend\n\n    def test_read_json_with_extensions(self):\n        json_input = json.dumps({\"col0\": [1]})\n        # Mock the read_json implementation for each backend so that we can check\n        # that we are calling the correct implementation. Also, we have to make\n        # the extension methods produce dataframes with the correct backends.\n        pandas_read_json = mock.Mock(\n            wraps=(\n                lambda *args, **kwargs: _GENERAL_EXTENSIONS[None][\"read_json\"](\n                    *args, **kwargs\n                ).move_to(\"Small_Data_Local\")\n            )\n        )\n        pandas_read_json.__name__ = \"read_json\"\n        cloud_read_json = mock.Mock(\n            wraps=(\n                lambda *args, **kwargs: _GENERAL_EXTENSIONS[None][\"read_json\"](\n                    *args, **kwargs\n                ).move_to(\"Big_Data_Cloud\")\n            )\n        )\n        cloud_read_json.__name__ = \"read_json\"\n\n        register_pd_accessor(\"read_json\", backend=\"Small_Data_Local\")(pandas_read_json)\n        register_pd_accessor(\"read_json\", backend=\"Big_Data_Cloud\")(cloud_read_json)\n\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            df = pd.read_json(StringIO(json_input))\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            pandas_read_json.assert_not_called()\n            cloud_read_json.assert_called_once()\n\n            register_function_for_pre_op_switch(\n                class_name=None, backend=\"Big_Data_Cloud\", method=\"read_json\"\n            )\n\n            pandas_read_json.reset_mock()\n            cloud_read_json.reset_mock()\n\n            df = pd.read_json(StringIO(json_input))\n\n            assert df.get_backend() == \"Small_Data_Local\"\n            pandas_read_json.assert_called_once()\n            cloud_read_json.assert_not_called()\n\n    def test_read_json_without_extensions(self):\n        json_input = json.dumps({\"col0\": [1]})\n\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            df = pd.read_json(StringIO(json_input))\n            assert df.get_backend() == \"Big_Data_Cloud\"\n\n            register_function_for_pre_op_switch(\n                class_name=None, backend=\"Big_Data_Cloud\", method=\"read_json\"\n            )\n\n            df = pd.read_json(StringIO(json_input))\n\n            assert df.get_backend() == \"Small_Data_Local\"\n\n    @pytest.mark.parametrize(\n        \"data_size, expected_backend\",\n        [\n            param(\n                BIG_DATA_CLOUD_MIN_NUM_ROWS - 1,\n                \"Small_Data_Local\",\n                id=\"small_data_should_move_to_small_engine\",\n            ),\n            param(\n                BIG_DATA_CLOUD_MIN_NUM_ROWS,\n                \"Big_Data_Cloud\",\n                id=\"big_data_should_stay_in_cloud\",\n            ),\n        ],\n    )\n    def test_iloc_setitem_switches_depending_on_data_size(\n        self, data_size, expected_backend\n    ):\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            md_df, pd_df = create_test_dfs(list(range(data_size)))\n            assert md_df.get_backend() == \"Big_Data_Cloud\"\n            eval_general(\n                md_df,\n                pd_df,\n                lambda df: df.iloc.__setitem__((0, 0), -1),\n                __inplace__=True,\n            )\n            assert md_df.get_backend() == \"Big_Data_Cloud\"\n\n            register_function_for_pre_op_switch(\n                class_name=\"_iLocIndexer\",\n                backend=\"Big_Data_Cloud\",\n                method=\"__setitem__\",\n            )\n            eval_general(\n                md_df,\n                pd_df,\n                lambda df: df.iloc.__setitem__((0, 0), 0),\n                __inplace__=True,\n            )\n            assert md_df.get_backend() == expected_backend\n\n    def test_iloc_pinned(self):\n        # The operation in test_iloc would naturally cause an automatic switch, but the\n        # absence of AutoSwitchBackend or the presence of a pin on the frame prevent this\n        # switch from happening.\n        data_size = BIG_DATA_CLOUD_MIN_NUM_ROWS - 1\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            register_function_for_pre_op_switch(\n                class_name=\"_iLocIndexer\",\n                backend=\"Big_Data_Cloud\",\n                method=\"__setitem__\",\n            )\n            # No pin or config, should switch\n            df = pd.DataFrame(list(range(data_size)))\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            df.iloc[(0, 0)] = -1\n            assert df.get_backend() == \"Small_Data_Local\"\n            # config set to false, should not switch\n            with config_context(AutoSwitchBackend=False):\n                df = pd.DataFrame(list(range(data_size)))\n                assert df.get_backend() == \"Big_Data_Cloud\"\n                df.iloc[(0, 0)] = -2\n                assert df.get_backend() == \"Big_Data_Cloud\"\n            # no config, but data is pinned\n            df = pd.DataFrame(list(range(data_size))).pin_backend()\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            df.iloc[(0, 0)] = -3\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            # a frame-level pin remains valid across a transformation\n            df_copy = df + 1\n            assert df_copy.get_backend() == \"Big_Data_Cloud\"\n            df_copy.iloc[(0, 0)] = -4\n            assert df_copy.get_backend() == \"Big_Data_Cloud\"\n            # unpinning df allows a switch again\n            df.unpin_backend(inplace=True)\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            df.iloc[(0, 0)] = -5\n            assert df.get_backend() == \"Small_Data_Local\"\n            # An in-place set_backend operation clears the pin\n            df.move_to(\"Big_Data_Cloud\", inplace=True)\n            # check in-place pin/unpin operations\n            df.pin_backend(inplace=True)\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            df.iloc[(0, 0)] = -6\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            df.unpin_backend(inplace=True)\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            df.iloc[(0, 0)] = -7\n            assert df.get_backend() == \"Small_Data_Local\"\n\n    @pytest.mark.parametrize(\n        \"args, kwargs, expected_backend\",\n        (\n            param((), {}, \"Small_Data_Local\", id=\"no_args_or_kwargs\"),\n            param(([1],), {}, \"Small_Data_Local\", id=\"small_list_data_in_arg\"),\n            param(\n                (list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS)),),\n                {},\n                \"Small_Data_Local\",\n                id=\"big_list_data_in_arg\",\n            ),\n            param((), {\"data\": [1]}, \"Small_Data_Local\", id=\"list_data_in_kwarg\"),\n            param(\n                (),\n                {\"data\": pandas.Series([1])},\n                \"Small_Data_Local\",\n                id=\"series_data_in_kwarg\",\n            ),\n            param(\n                (),\n                {\"query_compiler\": CloudForBigDataQC(pandas.DataFrame([0, 1, 2]))},\n                \"Big_Data_Cloud\",\n                id=\"cloud_query_compiler_in_kwarg\",\n            ),\n            param(\n                (),\n                {\"query_compiler\": LocalForSmallDataQC(pandas.DataFrame([0, 1, 2]))},\n                \"Small_Data_Local\",\n                id=\"small_query_compiler_in_kwarg\",\n            ),\n        ),\n    )\n    @pytest.mark.parametrize(\"data_class\", [pd.DataFrame, pd.Series])\n    def test___init___with_in_memory_data_uses_native_query_compiler(\n        self, args, kwargs, expected_backend, data_class\n    ):\n        register_function_for_pre_op_switch(\n            class_name=data_class.__name__,\n            method=\"__init__\",\n            backend=\"Big_Data_Cloud\",\n        )\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            assert data_class(*args, **kwargs).get_backend() == expected_backend\n\n    @pytest.mark.parametrize(\"data_class\", [pd.DataFrame, pd.Series])\n    @backend_test_context(\n        test_backend=\"Big_Data_Cloud\", choices=(\"Big_Data_Cloud\", \"Small_Data_Local\")\n    )\n    @pytest.mark.parametrize(\n        \"auto_switch_backend,expected_backend\",\n        [\n            (True, \"Small_Data_Local\"),\n            (False, \"Big_Data_Cloud\"),\n        ],\n    )\n    def test_auto_switch_backend_disabled_prevents___init__auto_switch(\n        self, auto_switch_backend, expected_backend, data_class\n    ):\n        register_function_for_pre_op_switch(\n            class_name=data_class.__name__,\n            method=\"__init__\",\n            backend=\"Big_Data_Cloud\",\n        )\n        with config_context(AutoSwitchBackend=auto_switch_backend):\n            assert data_class([1, 2, 3]).get_backend() == expected_backend\n\n    @pytest.mark.parametrize(\n        \"num_input_rows, expected_backend\",\n        [\n            param(\n                BIG_DATA_CLOUD_MIN_NUM_ROWS - 1,\n                \"Small_Data_Local\",\n            ),\n            (BIG_DATA_CLOUD_MIN_NUM_ROWS, \"Big_Data_Cloud\"),\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"groupby_class,operation\",\n        [\n            param(\n                \"DataFrameGroupBy\",\n                lambda df: df.groupby(\"col0\").apply(lambda x: x + 1),\n                id=\"DataFrameGroupBy\",\n            ),\n            param(\n                \"SeriesGroupBy\",\n                lambda df: df.groupby(\"col0\")[\"col1\"].apply(lambda x: x + 1),\n                id=\"SeriesGroupBy\",\n            ),\n        ],\n    )\n    def test_groupby_apply_switches_for_small_input(\n        self, num_input_rows, expected_backend, operation, groupby_class\n    ):\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            modin_df, pandas_df = create_test_dfs(\n                {\n                    \"col0\": list(range(num_input_rows)),\n                    \"col1\": list(range(1, num_input_rows + 1)),\n                }\n            )\n            assert modin_df.get_backend() == \"Big_Data_Cloud\"\n            assert operation(modin_df).get_backend() == \"Big_Data_Cloud\"\n\n            register_function_for_pre_op_switch(\n                class_name=groupby_class, backend=\"Big_Data_Cloud\", method=\"apply\"\n            )\n\n            modin_result = operation(modin_df)\n            pandas_result = operation(pandas_df)\n            df_equals(modin_result, pandas_result)\n            assert modin_result.get_backend() == expected_backend\n            if groupby_class == \"DataFrameGroupBy\":\n                assert modin_df.get_backend() == expected_backend\n            # The original dataframe does not move with the SeriesGroupBy\n            if groupby_class == \"SeriesGroupBy\":\n                assert modin_df.get_backend() == \"Big_Data_Cloud\"\n\n    def test_T_switches(self):\n        # Ensure that calling df.T triggers a switch (GH#7653)\n        with backend_test_context(\n            test_backend=\"Big_Data_Cloud\",\n            choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n        ):\n            modin_df, pandas_df = create_test_dfs(\n                {\"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}\n            )\n            assert modin_df.get_backend() == \"Big_Data_Cloud\"\n            # Registering transpose should be sufficient to cause T to trigger a switch.\n            register_function_for_pre_op_switch(\n                class_name=\"DataFrame\", backend=\"Big_Data_Cloud\", method=\"transpose\"\n            )\n            modin_result = modin_df.T\n            pandas_result = pandas_df.T\n            df_equals(modin_result, pandas_result)\n            assert modin_result.get_backend() == \"Small_Data_Local\"\n\n    def test_concat_switch_point(self, pico_df, cloud_df, cloud_high_self_df):\n        # When concat is a switch point, backends other than those present in arguments should be considered.\n        with backend_test_context(\n            test_backend=\"Cloud\", choices=(*DEFAULT_TEST_BACKENDS, \"Eager\")\n        ):\n            register_function_for_pre_op_switch(\n                class_name=None, backend=\"Cloud\", method=\"concat\"\n            )\n            result = pd.concat([cloud_df, pico_df])\n            # concat causes in-place switching\n            # the Eager backend will always steal everything\n            assert pico_df.get_backend() == \"Eager\"\n            assert cloud_df.get_backend() == \"Eager\"\n            assert result.get_backend() == \"Eager\"\n            pico_df.move_to(\"Pico\", inplace=True)\n            cloud_df.move_to(\"Cloud\", inplace=True)\n        with backend_test_context(\n            test_backend=\"Cloud_High_Self\", choices=(\"Cloud_High_Self\", \"Cloud\")\n        ):\n            register_function_for_pre_op_switch(\n                class_name=None, backend=\"Cloud_High_Self\", method=\"concat\"\n            )\n            result = pd.concat([cloud_high_self_df, cloud_high_self_df])\n            assert cloud_high_self_df.get_backend() == \"Cloud\"\n            assert result.get_backend() == \"Cloud\"\n\n    @pytest.mark.parametrize(\"consider_all_backends\", [True, False])\n    def test_consider_all_backends_flag(\n        self, pico_df, cloud_df, cloud_high_self_df, consider_all_backends\n    ):\n        # When concat is a switch point, backends other than those present in arguments should be considered\n        # if BackendJoinConsiderAllBackends is set.\n        with backend_test_context(\n            test_backend=\"Cloud\", choices=(*DEFAULT_TEST_BACKENDS, \"Eager\")\n        ), config_context(BackendJoinConsiderAllBackends=consider_all_backends):\n            register_function_for_pre_op_switch(\n                class_name=None, backend=\"Cloud\", method=\"concat\"\n            )\n            result = pd.concat([cloud_df, pico_df])\n            # concat causes in-place switching\n            if consider_all_backends:\n                assert pico_df.get_backend() == \"Eager\"\n                assert cloud_df.get_backend() == \"Eager\"\n                assert result.get_backend() == \"Eager\"\n            else:\n                assert pico_df.get_backend() == \"Cloud\"\n                assert cloud_df.get_backend() == \"Cloud\"\n                assert result.get_backend() == \"Cloud\"\n\n\ndef test_move_to_clears_pin():\n    # Pin status is reset to false after a set_backend call\n    with backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    ):\n        df = pd.DataFrame(list(range(10)))\n        # in-place\n        df.pin_backend(inplace=True)\n        assert df.is_backend_pinned()\n        df.move_to(\"Small_Data_Local\", inplace=True)\n        assert not df.is_backend_pinned()\n        # not in-place\n        intermediate = df.pin_backend().move_to(\"Big_Data_Cloud\")\n        assert not intermediate.is_backend_pinned()\n        assert intermediate.pin_backend().is_backend_pinned()\n\n\n@pytest.mark.parametrize(\n    \"pin_backends, expected_backend\",\n    [\n        param(\n            [(\"Small_Data_Local\", False), (\"Big_Data_Cloud\", False)],\n            \"Small_Data_Local\",\n            id=\"no_pin\",\n        ),  # no backend pinned\n        param(\n            [(\"Small_Data_Local\", True), (\"Big_Data_Cloud\", False)],\n            \"Small_Data_Local\",\n            id=\"one_pin\",\n        ),  # one backend is pinned, so move there\n        param(\n            [\n                (\"Big_Data_Cloud\", False),\n                (\"Small_Data_Local\", True),\n                (\"Small_Data_Local\", True),\n            ],\n            \"Small_Data_Local\",\n            id=\"two_pin\",\n        ),  # two identical pinned backends\n        param(\n            [(\"Small_Data_Local\", True), (\"Big_Data_Cloud\", True)],\n            None,\n            id=\"conflict_pin\",\n        ),  # conflicting pins raises ValueError\n    ],\n)\ndef test_concat_with_pin(pin_backends, expected_backend):\n    with backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    ):\n        dfs = [\n            pd.DataFrame([1] * 10).move_to(backend)._set_backend_pinned(should_pin)\n            for backend, should_pin in pin_backends\n        ]\n        if expected_backend is None:\n            with pytest.raises(\n                ValueError,\n                match=\"Cannot combine arguments that are pinned to conflicting backends\",\n            ):\n                pd.concat(dfs)\n        else:\n            result = pd.concat(dfs)\n            assert result.is_backend_pinned() == any(\n                df.is_backend_pinned() for df in dfs\n            )\n            assert result.get_backend() == expected_backend\n            df_equals(\n                result, pandas.concat([pandas.DataFrame([1] * 10)] * len(pin_backends))\n            )\n\n\n@pytest.mark.parametrize(\n    \"groupby_operation\",\n    [\n        param(\n            lambda df: df.groupby(\"col0\"),\n            id=\"DataFrameGroupBy\",\n        ),\n        param(\n            lambda df: df.groupby(\"col0\")[\"col1\"],\n            id=\"SeriesGroupBy\",\n        ),\n    ],\n)\ndef test_pin_groupby_in_place(groupby_operation):\n    \"\"\"Test that groupby objects can be pinned with inplace=True.\"\"\"\n    modin_df = pd.DataFrame(\n        {\n            \"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),\n            \"col1\": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),\n        }\n    )\n\n    groupby_object = groupby_operation(modin_df)\n    assert not groupby_object.is_backend_pinned()\n\n    groupby_object.pin_backend(inplace=True)\n    assert groupby_object.is_backend_pinned()\n\n    groupby_object.unpin_backend(inplace=True)\n    assert not groupby_object.is_backend_pinned()\n\n\n@pytest.mark.parametrize(\n    \"groupby_operation\",\n    [\n        param(\n            lambda df: df.groupby(\"col0\"),\n            id=\"DataFrameGroupBy\",\n        ),\n        param(\n            lambda df: df.groupby(\"col0\")[\"col1\"],\n            id=\"SeriesGroupBy\",\n        ),\n    ],\n)\ndef test_pin_groupby_not_in_place(groupby_operation):\n    \"\"\"Test that pin_backend works with inplace=False for groupby objects.\"\"\"\n    original_groupby = groupby_operation(pd.DataFrame(columns=[\"col0\", \"col1\"]))\n    assert not original_groupby.is_backend_pinned()\n    new_groupby = original_groupby.pin_backend(inplace=False)\n    assert not original_groupby.is_backend_pinned()\n    assert new_groupby.is_backend_pinned()\n\n\n@pytest.mark.parametrize(\n    \"groupby_operation\",\n    [\n        param(\n            lambda df: df.groupby(\"col0\"),\n            id=\"DataFrameGroupBy\",\n        ),\n        param(\n            lambda df: df.groupby(\"col0\")[\"col1\"],\n            id=\"SeriesGroupBy\",\n        ),\n    ],\n)\ndef test_unpin_groupby_not_in_place(groupby_operation):\n    \"\"\"Test that unpin_backend works with inplace=False for groupby objects.\"\"\"\n    original_groupby = groupby_operation(pd.DataFrame(columns=[\"col0\", \"col1\"]))\n    original_groupby.pin_backend(inplace=True)\n    assert original_groupby.is_backend_pinned()\n    new_groupby = original_groupby.unpin_backend(inplace=False)\n    assert original_groupby.is_backend_pinned()\n    assert not new_groupby.is_backend_pinned()\n\n\n@pytest.mark.parametrize(\n    \"data_type,data_factory,groupby_factory\",\n    [\n        param(\n            \"DataFrame\",\n            lambda: pd.DataFrame(\n                {\n                    \"col0\": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),\n                    \"col1\": list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)),\n                }\n            ),\n            lambda obj: obj.groupby(\"col0\"),\n            id=\"DataFrame\",\n        ),\n        param(\n            \"Series\",\n            lambda: pd.Series(list(range(1, BIG_DATA_CLOUD_MIN_NUM_ROWS)), name=\"data\"),\n            lambda obj: obj.groupby([0] * (BIG_DATA_CLOUD_MIN_NUM_ROWS - 1)),\n            id=\"Series\",\n        ),\n    ],\n)\ndef test_groupby_pinning_reflects_parent_object_pin_status(\n    data_type, data_factory, groupby_factory\n):\n    \"\"\"Test that groupby pinning inherits from parent object (DataFrame/Series) pin status but can be modified independently.\"\"\"\n    modin_obj = data_factory()\n\n    old_groupby_obj = groupby_factory(modin_obj)\n\n    # Initially not pinned\n    assert not old_groupby_obj.is_backend_pinned()\n    assert not modin_obj.is_backend_pinned()\n\n    # Pin the parent object - new groupby objects should inherit this\n    modin_obj.pin_backend(inplace=True)\n\n    # Create a new groupby object after pinning parent object\n    new_groupby_obj = groupby_factory(modin_obj)\n\n    # New groupby should inherit the pinned status\n    assert new_groupby_obj.is_backend_pinned()\n    assert modin_obj.is_backend_pinned()\n\n    # But we can still modify groupby pinning independently\n    new_groupby_obj.unpin_backend(inplace=True)\n\n    # Parent object should remain pinned, groupby should be unpinned\n    assert not new_groupby_obj.is_backend_pinned()\n    assert modin_obj.is_backend_pinned()\n\n    assert not old_groupby_obj.is_backend_pinned()\n    old_groupby_obj.pin_backend(inplace=True)\n    assert old_groupby_obj.is_backend_pinned()\n\n\ndef test_second_init_only_calls_from_pandas_once_github_issue_7559():\n    with config_context(Backend=\"Big_Data_Cloud\"):\n        # Create a dataframe once first so that we can initialize the dummy\n        # query compiler for the Big_Data_Cloud backend.\n        pd.DataFrame([1])\n        with mock.patch.object(\n            factories.Big_Data_CloudOnNativeFactory.io_cls.query_compiler_cls,\n            \"from_pandas\",\n            wraps=factories.Big_Data_CloudOnNativeFactory.io_cls.query_compiler_cls.from_pandas,\n        ) as mock_from_pandas:\n            pd.DataFrame([1])\n            mock_from_pandas.assert_called_once()\n\n\ndef test_native_config():\n    qc = NativeQueryCompiler(pandas.DataFrame([0, 1, 2]))\n\n    # Native Query Compiler gets a special configuration\n    assert qc._TRANSFER_THRESHOLD == 0\n    assert qc._transfer_threshold() == NativePandasTransferThreshold.get()\n    assert qc._MAX_SIZE_THIS_ENGINE_CAN_HANDLE == 1\n    assert qc._engine_max_size() == NativePandasMaxRows.get()\n\n    oldmax = qc._engine_max_size()\n    oldthresh = qc._transfer_threshold()\n\n    with config_context(NativePandasMaxRows=123, NativePandasTransferThreshold=321):\n        qc2 = NativeQueryCompiler(pandas.DataFrame([0, 1, 2]))\n        assert qc2._transfer_threshold() == 321\n        assert qc2._engine_max_size() == 123\n        assert qc._engine_max_size() == 123\n        assert qc._transfer_threshold() == 321\n\n        # sub class configuration is unchanged\n        class AQC(NativeQueryCompiler):\n            pass\n\n        subqc = AQC(pandas.DataFrame([0, 1, 2]))\n        assert subqc._TRANSFER_THRESHOLD == 0\n        assert subqc._MAX_SIZE_THIS_ENGINE_CAN_HANDLE == 1\n\n    assert qc._engine_max_size() == oldmax\n    assert qc._transfer_threshold() == oldthresh\n\n\ndef test_cast_metrics(pico_df, cluster_df):\n    try:\n        count = 0\n\n        def test_handler(metric: str, value) -> None:\n            nonlocal count\n            if metric.startswith(\"modin.hybrid.merge\"):\n                count += 1\n\n        add_metric_handler(test_handler)\n        df3 = pd.concat([pico_df, cluster_df], axis=1)\n        assert df3.get_backend() == \"Cluster\"  # result should be on cluster\n        assert count == 7\n    finally:\n        clear_metric_handler(test_handler)\n\n\ndef test_switch_metrics(pico_df, cluster_df):\n    with backend_test_context(\n        test_backend=\"Big_Data_Cloud\",\n        choices=(\"Big_Data_Cloud\", \"Small_Data_Local\"),\n    ):\n        try:\n            count = 0\n\n            def test_handler(metric: str, value) -> None:\n                nonlocal count\n                if metric.startswith(\"modin.hybrid.auto\"):\n                    count += 1\n\n            add_metric_handler(test_handler)\n\n            register_function_for_pre_op_switch(\n                class_name=\"DataFrame\",\n                backend=\"Big_Data_Cloud\",\n                method=\"describe\",\n            )\n            df = pd.DataFrame([1] * 10)\n            assert df.get_backend() == \"Big_Data_Cloud\"\n            df.describe()\n            assert count == 8\n        finally:\n            clear_metric_handler(test_handler)\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_copy_on_write.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n# Tests interactions between a modin frame and a parent or child native pandas frame when one\n# object's metadata or data is modified.\n# Only valid on the native pandas backend.\n\nimport functools\n\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Backend\nfrom modin.config import context as config_context\n\n\n@pytest.fixture(scope=\"module\", autouse=True)\ndef mutation_cow_test():\n    if Backend.get() != \"Pandas\":\n        pytest.skip(\n            reason=\"tests are only meaningful with pandas backend\",\n            allow_module_level=True,\n        )\n\n\n@pytest.fixture(scope=\"function\")\ndef copy_on_write(request):\n    # Indirect fixture for toggling copy-on-write when tests are run\n    with config_context(\n        Backend=\"Pandas\", NativePandasDeepCopy=False\n    ), pandas.option_context(\"mode.copy_on_write\", request.param):\n        yield request.param\n\n\ndef get_mutation_fixtures(data, **kwargs):\n    # Return a fixture that sets the copy_on_write fixture, then passes a modin and native DF together for mutation testing.\n    # One parameter combination creates a modin DF from a native DF.\n    # The other creates a native DF by calling to_pandas on a modin DF.\n    def wrapper(f):\n        # Need to create separate functions so parametrized runs don't affect each other.\n        def native_first():\n            native_input = pandas.DataFrame(data, **kwargs)\n            return native_input, pd.DataFrame(native_input)\n\n        def modin_first():\n            modin_input = pd.DataFrame(data, **kwargs)\n            return modin_input, modin_input.modin.to_pandas()\n\n        @pytest.mark.parametrize(\"df_factory\", [native_first, modin_first])\n        @pytest.mark.parametrize(\n            \"copy_on_write\",\n            [pytest.param(True, id=\"CoW\"), pytest.param(False, id=\"no_CoW\")],\n            indirect=True,\n        )\n        @functools.wraps(f)\n        def test_runner(*args, **kwargs):\n            return f(*args, **kwargs)\n\n        return test_runner\n\n    return wrapper\n\n\n@pytest.mark.parametrize(\n    \"axis\", [pytest.param(0, id=\"index\"), pytest.param(1, id=\"columns\")]\n)\n@get_mutation_fixtures({\"A\": [0, 1], \"B\": [2, 3]})\ndef test_set_axis_name(axis, copy_on_write, df_factory):\n    df1, df2 = df_factory()\n    df1.axes[axis].name = \"x\"\n    assert df1.axes[axis].name == \"x\"\n    # Changes do not propagate when copy-on-write is enabled.\n    if copy_on_write:\n        assert df2.axes[axis].name is None\n    else:\n        assert df2.axes[axis].name == \"x\"\n    df2.axes[axis].name = \"y\"\n    assert df1.axes[axis].name == (\"x\" if copy_on_write else \"y\")\n    assert df2.axes[axis].name == \"y\"\n\n\n@pytest.mark.parametrize(\n    \"axis\", [pytest.param(0, id=\"index\"), pytest.param(1, id=\"columns\")]\n)\n@get_mutation_fixtures({\"A\": [0, 1], \"B\": [2, 3]}, index=[\"A\", \"B\"])\ndef test_rename_axis(axis, copy_on_write, df_factory):\n    df1, df2 = df_factory()\n    # Renames don't propagate, regardless of CoW.\n    df1.rename({\"A\": \"aprime\"}, axis=axis, inplace=True)\n    assert df1.axes[axis].tolist() == [\"aprime\", \"B\"]\n    assert df2.axes[axis].tolist() == [\"A\", \"B\"]\n    df2.rename({\"B\": \"bprime\"}, axis=axis, inplace=True)\n    assert df1.axes[axis].tolist() == [\"aprime\", \"B\"]\n    assert df2.axes[axis].tolist() == [\"A\", \"bprime\"]\n\n\n@get_mutation_fixtures({\"A\": [0, 1], \"B\": [2, 3]})\ndef test_locset(copy_on_write, df_factory):\n    df1, df2 = df_factory()\n    df1.loc[0, \"A\"] = -1\n    assert df1.loc[0, \"A\"] == -1\n    assert df2.loc[0, \"A\"] == (0 if copy_on_write else -1)\n    df2.loc[1, \"B\"] = 999\n    assert df1.loc[1, \"B\"] == (3 if copy_on_write else 999)\n    assert df2.loc[1, \"B\"] == 999\n\n\n@get_mutation_fixtures({\"A\": [0, 1], \"B\": [2, 3]})\ndef test_add_column(copy_on_write, df_factory):\n    df1, df2 = df_factory()\n    df1[\"C\"] = [4, 5]\n    assert df1[\"C\"].tolist() == [4, 5]\n    # Even with CoW disabled, the new column is not added to df2.\n    assert df2.columns.tolist() == [\"A\", \"B\"]\n    df2[\"D\"] = [6, 7]\n    assert df2[\"D\"].tolist() == [6, 7]\n    assert df1.columns.tolist() == [\"A\", \"B\", \"C\"]\n\n\n@get_mutation_fixtures({\"A\": [0, 1], \"B\": [2, 3]})\ndef test_add_row(copy_on_write, df_factory):\n    df1, df2 = df_factory()\n    df1.loc[9] = [4, 5]\n    assert df1.loc[9].tolist() == [4, 5]\n    # Even with CoW disabled, the new row is not added to df2.\n    assert df2.index.tolist() == [0, 1]\n    df2.loc[10] = [6, 7]\n    assert df2.loc[10].tolist() == [6, 7]\n    assert df1.index.tolist() == [0, 1, 9]\n\n\n@pytest.mark.filterwarnings(\"ignore::FutureWarning\")\n@pytest.mark.filterwarnings(\"ignore::pandas.errors.ChainedAssignmentError\")\n@get_mutation_fixtures({\"A\": [0, 1], \"B\": [2, 3]})\ndef test_chained_assignment(copy_on_write, df_factory):\n    df1, df2 = df_factory()\n    is_assign_noop = copy_on_write and isinstance(df1, pandas.DataFrame)\n    df1[\"A\"][0] = -1\n    assert df1[\"A\"][0] == (0 if is_assign_noop else -1)\n    assert df2[\"A\"][0] == (\n        0 if copy_on_write or isinstance(df2, pandas.DataFrame) else -1\n    )\n    is_assign_noop = copy_on_write and isinstance(df2, pandas.DataFrame)\n    df2[\"B\"][1] = 999\n    assert df1[\"B\"][1] == (\n        3 if copy_on_write or isinstance(df1, pandas.DataFrame) else 999\n    )\n    assert df2[\"B\"][1] == (3 if is_assign_noop else 999)\n\n\n@get_mutation_fixtures({\"A\": [0, 1], \"B\": [2, 3]})\ndef test_column_reassign(copy_on_write, df_factory):\n    df1, df2 = df_factory()\n    df1[\"A\"] = df1[\"A\"] - 1\n    assert df1[\"A\"].tolist() == [-1, 0]\n    assert df2[\"A\"].tolist() == [0, 1]\n    df2[\"B\"] = df2[\"B\"] + 1\n    assert df1[\"B\"].tolist() == [2, 3]\n    assert df2[\"B\"].tolist() == [3, 4]\n\n\n@pytest.mark.parametrize(\"always_deep\", [True, False])\ndef test_explicit_copy(always_deep):\n    # Test that making an explicit copy with deep=True actually makes a deep copy.\n    with config_context(NativePandasDeepCopy=always_deep):\n        df = pd.DataFrame([[0]])\n        # We don't really care about behavior with shallow copy, since modin semantics don't line up\n        # perfectly with native pandas.\n        df_copy = df.copy(deep=True)\n        df.loc[0, 0] = -1\n        assert df.loc[0, 0] == -1\n        assert df_copy.loc[0, 0] == 0\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_default.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\nfrom numpy.testing import assert_array_equal\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.pandas.io import to_pandas\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n    create_test_series_in_defined_mode,\n    eval_general_interop,\n)\nfrom modin.tests.pandas.utils import (\n    default_to_pandas_ignore_string,\n    df_equals,\n    test_data,\n    test_data_diff_dtype,\n    test_data_keys,\n    test_data_large_categorical_dataframe,\n    test_data_values,\n)\nfrom modin.tests.test_utils import (\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = [\n    pytest.mark.filterwarnings(default_to_pandas_ignore_string),\n    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT\n    pytest.mark.filterwarnings(\n        \"ignore:.*bool is now deprecated and will be removed:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:first is deprecated and will be removed:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:last is deprecated and will be removed:FutureWarning\"\n    ),\n]\n\n\n@pytest.mark.parametrize(\n    \"op, make_args\",\n    [\n        (\"align\", lambda df: {\"other\": df}),\n        (\"corrwith\", lambda df: {\"other\": df}),\n        (\"ewm\", lambda df: {\"com\": 0.5}),\n        (\"from_dict\", lambda df: {\"data\": None}),\n        (\"from_records\", lambda df: {\"data\": to_pandas(df)}),\n        (\"hist\", lambda df: {\"column\": \"int_col\"}),\n        (\"interpolate\", None),\n        (\"mask\", lambda df: {\"cond\": df != 0}),\n        (\"pct_change\", None),\n        (\"to_xarray\", None),\n        (\"flags\", None),\n        (\"set_flags\", lambda df: {\"allows_duplicate_labels\": False}),\n    ],\n)\ndef test_ops_defaulting_to_pandas(op, make_args, df_mode_pair):\n    modin_df1, _ = create_test_df_in_defined_mode(\n        test_data_diff_dtype,\n        post_fn=lambda df: df.drop([\"str_col\", \"bool_col\"], axis=1),\n        native=df_mode_pair[0],\n    )\n    modin_df2, _ = create_test_df_in_defined_mode(\n        test_data_diff_dtype,\n        post_fn=lambda df: df.drop([\"str_col\", \"bool_col\"], axis=1),\n        native=df_mode_pair[1],\n    )\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df1)\n    ):\n        operation = getattr(modin_df1, op)\n        if make_args is not None:\n            operation(**make_args(modin_df2))\n        else:\n            try:\n                operation()\n            # `except` for non callable attributes\n            except TypeError:\n                pass\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    test_data_values + [test_data_large_categorical_dataframe],\n    ids=test_data_keys + [\"categorical_ints\"],\n)\ndef test_to_numpy(data):\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n    assert_array_equal(modin_df.values, pandas_df.values)\n\n\ndef test_array_ufunc():\n    modin_df, pandas_df = create_test_df_in_defined_mode([[1, 2], [3, 4]], native=True)\n    df_equals(np.sqrt(modin_df), np.sqrt(pandas_df))\n    modin_ser, pandas_ser = create_test_series_in_defined_mode(\n        [1, 2, 3, 4, 9], native=True\n    )\n    df_equals(np.sqrt(modin_ser), np.sqrt(pandas_ser))\n\n\ndef test_asfreq(df_mode_pair):\n    index = pd.date_range(\"1/1/2000\", periods=4, freq=\"min\")\n    series, _ = create_test_series_in_defined_mode(\n        [0.0, None, 2.0, 3.0], index=index, native=df_mode_pair[0]\n    )\n    df, _ = create_test_df_in_defined_mode({\"s\": series}, native=df_mode_pair[1])\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(df)\n    ):\n        # We are only testing that this defaults to pandas, so we will just check for\n        # the warning\n        df.asfreq(freq=\"30S\")\n\n\ndef test_assign(df_mode_pair):\n    data = test_data_values[0]\n\n    def assign_one_column(df1, df2):\n        df1.assign(new_column=pd.Series(df2.iloc[:, 0]))\n\n    eval_general_interop(data, None, assign_one_column, df_mode_pair)\n\n    def assign_multiple_columns(df1, df2):\n        df1.assign(\n            new_column=pd.Series(df2.iloc[:, 0]), new_column2=pd.Series(df2.iloc[:, 1])\n        )\n\n    eval_general_interop(data, None, assign_multiple_columns, df_mode_pair)\n\n\ndef test_combine_first(df_mode_pair):\n    data1 = {\"A\": [None, 0], \"B\": [None, 4]}\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        data1, native=df_mode_pair[0]\n    )\n    data2 = {\"A\": [1, 1], \"B\": [3, 3]}\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        data2, native=df_mode_pair[1]\n    )\n\n    df_equals(\n        modin_df1.combine_first(modin_df2),\n        pandas_df1.combine_first(pandas_df2),\n        # https://github.com/modin-project/modin/issues/5959\n        check_dtypes=False,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dot(data, df_mode_pair):\n\n    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n    col_len = len(modin_df.columns)\n\n    # Test series input\n    modin_series, pandas_series = create_test_series_in_defined_mode(\n        np.arange(col_len),\n        index=pandas_df.columns,\n        native=df_mode_pair[1],\n    )\n    modin_result = modin_df.dot(modin_series)\n    pandas_result = pandas_df.dot(pandas_series)\n    df_equals(modin_result, pandas_result)\n\n    def dot_func(df1, df2):\n        return df1.dot(df2.T)\n\n    # modin_result = modin_df.dot(modin_df.T)\n    # pandas_result = pandas_df.dot(pandas_df.T)\n    # df_equals(modin_result, pandas_result)\n    # Test dataframe input\n    eval_general_interop(data, None, dot_func, df_mode_pair)\n\n    # Test when input series index doesn't line up with columns\n    with pytest.raises(ValueError):\n        modin_series_without_index, _ = create_test_series_in_defined_mode(\n            np.arange(col_len), native=df_mode_pair[1]\n        )\n        modin_df.dot(modin_series_without_index)\n\n    # Test case when left dataframe has size (n x 1)\n    # and right dataframe has size (1 x n)\n    eval_general_interop(pandas_series, None, dot_func, df_mode_pair)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_matmul(data, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n    col_len = len(modin_df.columns)\n\n    # Test list input\n    arr = np.arange(col_len)\n    modin_result = modin_df @ arr\n    pandas_result = pandas_df @ arr\n    df_equals(modin_result, pandas_result)\n\n    # Test bad dimensions\n    with pytest.raises(ValueError):\n        modin_df @ np.arange(col_len + 10)\n\n    # Test series input\n    modin_series, pandas_series = create_test_series_in_defined_mode(\n        np.arange(col_len),\n        index=pandas_df.columns,\n        native=df_mode_pair[1],\n    )\n    modin_result = modin_df @ modin_series\n    pandas_result = pandas_df @ pandas_series\n    df_equals(modin_result, pandas_result)\n\n    # Test dataframe input\n    def matmul_func(df1, df2):\n        return df1 @ df2.T\n\n    # Test dataframe input\n    eval_general_interop(data, None, matmul_func, df_mode_pair)\n\n    # Test when input series index doesn't line up with columns\n    with pytest.raises(ValueError):\n        modin_series_without_index, _ = create_test_series_in_defined_mode(\n            np.arange(col_len), native=df_mode_pair[1]\n        )\n        modin_df @ modin_series_without_index\n\n\n@pytest.mark.parametrize(\"data\", [test_data[\"int_data\"]], ids=[\"int_data\"])\n@pytest.mark.parametrize(\n    \"index\",\n    [\n        pytest.param(lambda _, df: df.columns[0], id=\"single_index_col\"),\n        pytest.param(\n            lambda _, df: [*df.columns[0:2], *df.columns[-7:-4]],\n            id=\"multiple_index_cols\",\n        ),\n        pytest.param(None, id=\"default_index\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"columns\",\n    [\n        pytest.param(lambda _, df: df.columns[len(df.columns) // 2], id=\"single_col\"),\n        pytest.param(\n            lambda _, df: [\n                *df.columns[(len(df.columns) // 2) : (len(df.columns) // 2 + 4)],\n                df.columns[-7],\n            ],\n            id=\"multiple_cols\",\n        ),\n        pytest.param(None, id=\"default_columns\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"values\",\n    [\n        pytest.param(lambda _, df: df.columns[-1], id=\"single_value_col\"),\n        pytest.param(lambda _, df: df.columns[-4:-1], id=\"multiple_value_cols\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"aggfunc\",\n    [\n        pytest.param(lambda df, _: np.mean(df), id=\"callable_tree_reduce_func\"),\n        pytest.param(\"mean\", id=\"tree_reduce_func\"),\n        pytest.param(\"nunique\", id=\"full_axis_func\"),\n    ],\n)\ndef test_pivot_table_data(data, index, columns, values, aggfunc, request, df_mode_pair):\n    if (\n        \"callable_tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"callable_tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"tree_reduce_func-single_value_col-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"tree_reduce_func-multiple_value_cols-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"full_axis_func-single_value_col-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n        or \"full_axis_func-multiple_value_cols-multiple_cols-multiple_index_cols\"\n        in request.node.callspec.id\n    ):\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7011\")\n\n    expected_exception = None\n    if \"default_columns-default_index\" in request.node.callspec.id:\n        expected_exception = ValueError(\"No group keys passed!\")\n    elif (\n        \"callable_tree_reduce_func\" in request.node.callspec.id\n        and \"int_data\" in request.node.callspec.id\n    ):\n        expected_exception = TypeError(\"'numpy.float64' object is not callable\")\n\n    eval_general_interop(\n        data,\n        None,\n        operation=lambda df, _, *args, **kwargs: df.pivot_table(\n            *args, **kwargs\n        ).sort_index(axis=int(index is not None)),\n        df_mode_pair=df_mode_pair,\n        index=index,\n        columns=columns,\n        values=values,\n        aggfunc=aggfunc,\n        expected_exception=expected_exception,\n    )\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_default_to_pandas_without_warnings.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n# While other modin backends raise a warning when defaulting to pandas, it does not make sense to\n# do so when we're running on the native pandas backend already. These tests ensure such warnings\n# are not raised with the pandas backend.\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Backend\nfrom modin.tests.pandas.utils import df_equals\n\npytestmark = [\n    pytest.mark.skipif(\n        Backend.get() != \"Pandas\",\n        reason=\"warnings only suppressed on native pandas backend\",\n        allow_module_level=True,\n    ),\n    # Error if a default to pandas warning is detected.\n    pytest.mark.filterwarnings(\"error:is not supported by NativeOnNative:UserWarning\"),\n]\n\n\ndef test_crosstab_no_warning():\n    # Example from pandas docs\n    # https://pandas.pydata.org/docs/reference/api/pandas.crosstab.html\n    a = np.array(\n        [\"foo\", \"foo\", \"foo\", \"foo\", \"bar\", \"bar\", \"bar\", \"bar\", \"foo\", \"foo\", \"foo\"],\n        dtype=object,\n    )\n    b = np.array(\n        [\"one\", \"one\", \"one\", \"two\", \"one\", \"one\", \"one\", \"two\", \"two\", \"two\", \"one\"],\n        dtype=object,\n    )\n    c = np.array(\n        [\n            \"dull\",\n            \"dull\",\n            \"shiny\",\n            \"dull\",\n            \"dull\",\n            \"shiny\",\n            \"shiny\",\n            \"dull\",\n            \"shiny\",\n            \"shiny\",\n            \"shiny\",\n        ],\n        dtype=object,\n    )\n    df_equals(\n        pd.crosstab(a, [b, c], rownames=[\"a\"], colnames=[\"b\", \"c\"]),\n        pandas.crosstab(a, [b, c], rownames=[\"a\"], colnames=[\"b\", \"c\"]),\n    )\n\n\ndef test_json_normalize_no_warning():\n    # Example from pandas docs\n    # https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html\n    data = [\n        {\"id\": 1, \"name\": {\"first\": \"Coleen\", \"last\": \"Volk\"}},\n        {\"name\": {\"given\": \"Mark\", \"family\": \"Regner\"}},\n        {\"id\": 2, \"name\": \"Faye Raker\"},\n    ]\n    df_equals(pd.json_normalize(data), pandas.json_normalize(data))\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_general.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n    create_test_series_in_defined_mode,\n)\nfrom modin.tests.pandas.utils import default_to_pandas_ignore_string, df_equals\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\ndef test_cut(df_mode_pair):\n    modin_x, pandas_x = create_test_series_in_defined_mode(\n        [1, 3], native=df_mode_pair[0]\n    )\n    modin_bins, pandas_bins = create_test_series_in_defined_mode(\n        [0, 2], native=df_mode_pair[1]\n    )\n\n    def operation(*, lib, x, bins):\n        return lib.cut(x, bins)\n\n    df_equals(\n        operation(lib=pd, x=modin_x, bins=modin_bins),\n        operation(lib=pandas, x=pandas_x, bins=pandas_bins),\n    )\n\n\ndef test_qcut(df_mode_pair):\n    modin_x, pandas_x = create_test_series_in_defined_mode(\n        [1, 2, 3, 4], native=df_mode_pair[0]\n    )\n    modin_quantiles, pandas_quantiles = create_test_series_in_defined_mode(\n        [0, 0.5, 1], native=df_mode_pair[1]\n    )\n\n    def operation(*, lib, x, quantiles):\n        return lib.qcut(x, quantiles)\n\n    df_equals(\n        operation(lib=pd, x=modin_x, quantiles=modin_quantiles),\n        operation(lib=pandas, x=pandas_x, quantiles=pandas_quantiles),\n    )\n\n\ndef test_merge_ordered(df_mode_pair):\n    modin_left, pandas_left = create_test_df_in_defined_mode(\n        {\n            \"key\": [\"a\", \"c\", \"e\", \"a\", \"c\", \"e\"],\n            \"lvalue\": [1, 2, 3, 1, 2, 3],\n            \"group\": [\"a\", \"a\", \"a\", \"b\", \"b\", \"b\"],\n        },\n        native=df_mode_pair[0],\n    )\n    modin_right, pandas_right = create_test_df_in_defined_mode(\n        {\"key\": [\"b\", \"c\", \"d\"], \"rvalue\": [1, 2, 3]},\n        native=df_mode_pair[1],\n    )\n\n    def operation(*, lib, left, right):\n        return lib.merge_ordered(left, right, fill_method=\"ffill\", left_by=\"group\")\n\n    df_equals(\n        operation(lib=pd, left=modin_left, right=modin_right),\n        operation(lib=pandas, left=pandas_left, right=pandas_right),\n    )\n\n\ndef test_merge_asof(df_mode_pair):\n    modin_left, pandas_left = create_test_df_in_defined_mode(\n        {\"a\": [1, 5, 10], \"left_val\": [\"a\", \"b\", \"c\"]}, native=df_mode_pair[0]\n    )\n    modin_right, pandas_right = create_test_df_in_defined_mode(\n        {\"a\": [1, 2, 3, 6, 7], \"right_val\": [1, 2, 3, 6, 7]},\n        native=df_mode_pair[1],\n    )\n\n    def operation(*, lib, left, right):\n        return lib.merge_asof(left, right, on=\"a\")\n\n    df_equals(\n        operation(lib=pd, left=modin_left, right=modin_right),\n        operation(lib=pandas, left=pandas_left, right=pandas_right),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_indexing.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\nfrom itertools import product\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n    create_test_series_in_defined_mode,\n    eval_general_interop,\n)\nfrom modin.tests.pandas.utils import (\n    RAND_HIGH,\n    RAND_LOW,\n    assert_dtypes_equal,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\n# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances\n# of defaulting to pandas.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\ndef eval_setitem(md_df, pd_df, value, col=None, loc=None, expected_exception=None):\n    if loc is not None:\n        col = pd_df.columns[loc]\n\n    value_getter = value if callable(value) else (lambda *args, **kwargs: value)\n\n    eval_general(\n        md_df,\n        pd_df,\n        lambda df: df.__setitem__(col, value_getter(df)),\n        __inplace__=True,\n        expected_exception=expected_exception,\n    )\n    for pair in list(product([True, False], repeat=2)):\n        eval_general_interop(\n            pd_df,\n            None,\n            lambda df1, df2: df1.__setitem__(col, value_getter(df2)),\n            pair,\n            __inplace__=True,\n            expected_exception=expected_exception,\n        )\n\n\ndef eval_loc(md_df, pd_df, value, key):\n    if isinstance(value, tuple):\n        assert len(value) == 2\n        # case when value for pandas different\n        md_value, pd_value = value\n    else:\n        md_value, pd_value = value, value\n\n    eval_general(\n        md_df,\n        pd_df,\n        lambda df: df.loc.__setitem__(\n            key, pd_value if isinstance(df, pandas.DataFrame) else md_value\n        ),\n        __inplace__=True,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"key_func\",\n    [\n        # test for the case from https://github.com/modin-project/modin/issues/4308\n        lambda df: \"non_existing_column\",\n        lambda df: df.columns[0],\n        lambda df: df.index,\n        lambda df: [df.index, df.columns[0]],\n        lambda df: (\n            pandas.Series(list(range(len(df.index))))\n            if isinstance(df, pandas.DataFrame)\n            else pd.Series(list(range(len(df))))\n        ),\n    ],\n    ids=[\n        \"non_existing_column\",\n        \"first_column_name\",\n        \"original_index\",\n        \"list_of_index_and_first_column_name\",\n        \"series_of_integers\",\n    ],\n)\n@pytest.mark.parametrize(\n    \"drop_kwargs\",\n    [{\"drop\": True}, {\"drop\": False}, {}],\n    ids=[\"drop_True\", \"drop_False\", \"no_drop_param\"],\n)\ndef test_set_index(data, key_func, drop_kwargs, request, df_mode_pair):\n    if (\n        \"list_of_index_and_first_column_name\" in request.node.name\n        and \"drop_False\" in request.node.name\n    ):\n        pytest.xfail(\n            reason=\"KeyError: https://github.com/modin-project/modin/issues/5636\"\n        )\n    expected_exception = None\n    if \"non_existing_column\" in request.node.callspec.id:\n        expected_exception = KeyError(\n            \"None of ['non_existing_column'] are in the columns\"\n        )\n\n    eval_general_interop(\n        data,\n        None,\n        lambda df1, df2: df1.set_index(key_func(df2), **drop_kwargs),\n        expected_exception=expected_exception,\n        df_mode_pair=df_mode_pair,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_loc(data, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n\n    indices = [i % 3 == 0 for i in range(len(modin_df.index))]\n    columns = [i % 5 == 0 for i in range(len(modin_df.columns))]\n\n    # Key is a Modin or pandas series of booleans\n    series1, _ = create_test_series_in_defined_mode(indices, native=df_mode_pair[0])\n    series2, _ = create_test_series_in_defined_mode(\n        columns, index=modin_df.columns, native=df_mode_pair[0]\n    )\n    df_equals(\n        modin_df.loc[series1, series2],\n        pandas_df.loc[\n            pandas.Series(indices), pandas.Series(columns, index=modin_df.columns)\n        ],\n    )\n\n\n@pytest.mark.parametrize(\"left, right\", [(2, 1), (6, 1), (lambda df: 70, 1), (90, 70)])\ndef test_loc_insert_row(left, right, df_mode_pair):\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/3764\n    data = [[1, 2, 3], [4, 5, 6]]\n\n    def _test_loc_rows(df1, df2):\n        df1.loc[left] = df2.loc[right]\n        return df1\n\n    expected_exception = None\n    if right == 70:\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/7024\")\n\n    eval_general_interop(\n        data,\n        None,\n        _test_loc_rows,\n        expected_exception=expected_exception,\n        df_mode_pair=df_mode_pair,\n    )\n\n\n@pytest.fixture\ndef loc_iter_dfs_interop(df_mode_pair):\n    columns = [\"col1\", \"col2\", \"col3\"]\n    index = [\"row1\", \"row2\", \"row3\"]\n    md_df1, pd_df1 = create_test_df_in_defined_mode(\n        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},\n        columns=columns,\n        index=index,\n        native=df_mode_pair[0],\n    )\n    md_df2, pd_df2 = create_test_df_in_defined_mode(\n        {col: ([idx] * len(index)) for idx, col in enumerate(columns)},\n        columns=columns,\n        index=index,\n        native=df_mode_pair[1],\n    )\n    return md_df1, pd_df1, md_df2, pd_df2\n\n\n@pytest.mark.parametrize(\"reverse_order\", [False, True])\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_loc_iter_assignment(loc_iter_dfs_interop, reverse_order, axis):\n    if reverse_order and axis:\n        pytest.xfail(\n            \"Due to internal sorting of lookup values assignment order is lost, see GH-#2552\"\n        )\n\n    md_df1, pd_df1, md_df2, pd_df2 = loc_iter_dfs_interop\n\n    select = [slice(None), slice(None)]\n    select[axis] = sorted(pd_df1.axes[axis][:-1], reverse=reverse_order)\n    select = tuple(select)\n\n    pd_df1.loc[select] = pd_df1.loc[select] + pd_df2.loc[select]\n    md_df1.loc[select] = md_df1.loc[select] + md_df2.loc[select]\n    df_equals(md_df1, pd_df1)\n\n\ndef test_loc_series(df_mode_pair):\n    md_df1, pd_df1 = create_test_df_in_defined_mode(\n        {\"a\": [1, 2], \"b\": [3, 4]}, native=df_mode_pair[0]\n    )\n    md_df2, pd_df2 = create_test_df_in_defined_mode(\n        {\"a\": [1, 2], \"b\": [3, 4]}, native=df_mode_pair[1]\n    )\n\n    pd_df1.loc[pd_df2[\"a\"] > 1, \"b\"] = np.log(pd_df1[\"b\"])\n    md_df1.loc[md_df2[\"a\"] > 1, \"b\"] = np.log(md_df1[\"b\"])\n\n    df_equals(pd_df1, md_df1)\n\n\ndef test_reindex_like(df_mode_pair):\n    o_data = [\n        [24.3, 75.7, \"high\"],\n        [31, 87.8, \"high\"],\n        [22, 71.6, \"medium\"],\n        [35, 95, \"medium\"],\n    ]\n    o_columns = [\"temp_celsius\", \"temp_fahrenheit\", \"windspeed\"]\n    o_index = pd.date_range(start=\"2014-02-12\", end=\"2014-02-15\", freq=\"D\")\n    new_data = [[28, \"low\"], [30, \"low\"], [35.1, \"medium\"]]\n    new_columns = [\"temp_celsius\", \"windspeed\"]\n    new_index = pd.DatetimeIndex([\"2014-02-12\", \"2014-02-13\", \"2014-02-15\"])\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        o_data,\n        columns=o_columns,\n        index=o_index,\n        native=df_mode_pair[0],\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        new_data,\n        columns=new_columns,\n        index=new_index,\n        native=df_mode_pair[1],\n    )\n    modin_result = modin_df2.reindex_like(modin_df1)\n    pandas_result = pandas_df2.reindex_like(pandas_df1)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_reindex_multiindex(df_mode_pair):\n    data1, data2 = np.random.randint(1, 20, (5, 5)), np.random.randint(10, 25, 6)\n    index = np.array([\"AUD\", \"BRL\", \"CAD\", \"EUR\", \"INR\"])\n    pandas_midx = pandas.MultiIndex.from_product(\n        [[\"Bank_1\", \"Bank_2\"], [\"AUD\", \"CAD\", \"EUR\"]], names=[\"Bank\", \"Curency\"]\n    )\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        data=data1, index=index, columns=index, native=df_mode_pair[0]\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        data=data2, index=pandas_midx, native=df_mode_pair[1]\n    )\n\n    modin_df2.columns, pandas_df2.columns = [\"Notional\"], [\"Notional\"]\n    md_midx = pd.MultiIndex.from_product([modin_df2.index.levels[0], modin_df1.index])\n    pd_midx = pandas.MultiIndex.from_product(\n        [pandas_df2.index.levels[0], pandas_df1.index]\n    )\n    # reindex without axis, index, or columns\n    modin_result = modin_df1.reindex(md_midx, fill_value=0)\n    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0)\n    df_equals(modin_result, pandas_result)\n    # reindex with only axis\n    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0)\n    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0)\n    df_equals(modin_result, pandas_result)\n    # reindex with axis and level\n    modin_result = modin_df1.reindex(md_midx, fill_value=0, axis=0, level=0)\n    pandas_result = pandas_df1.reindex(pd_midx, fill_value=0, axis=0, level=0)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_getitem_empty_mask(df_mode_pair):\n    # modin-project/modin#517\n    modin_frames = []\n    pandas_frames = []\n    data1 = np.random.randint(0, 100, size=(100, 4))\n    mdf1, pdf1 = create_test_df_in_defined_mode(\n        data1, columns=list(\"ABCD\"), native=df_mode_pair[0]\n    )\n\n    modin_frames.append(mdf1)\n    pandas_frames.append(pdf1)\n\n    data2 = np.random.randint(0, 100, size=(100, 4))\n    mdf2, pdf2 = create_test_df_in_defined_mode(\n        data2, columns=list(\"ABCD\"), native=df_mode_pair[1]\n    )\n    modin_frames.append(mdf2)\n    pandas_frames.append(pdf2)\n\n    data3 = np.random.randint(0, 100, size=(100, 4))\n    mdf3, pdf3 = create_test_df_in_defined_mode(\n        data3, columns=list(\"ABCD\"), native=df_mode_pair[0]\n    )\n    modin_frames.append(mdf3)\n    pandas_frames.append(pdf3)\n\n    modin_data = pd.concat(modin_frames)\n    pandas_data = pandas.concat(pandas_frames)\n    df_equals(\n        modin_data[[False for _ in modin_data.index]],\n        pandas_data[[False for _ in modin_data.index]],\n    )\n\n\ndef test___setitem__mask(df_mode_pair):\n    # DataFrame mask:\n    data = test_data[\"int_data\"]\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n\n    mean = int((RAND_HIGH + RAND_LOW) / 2)\n    pandas_df1[pandas_df2 > mean] = -50\n    modin_df1[modin_df2 > mean] = -50\n\n    df_equals(modin_df1, pandas_df1)\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        {},\n        {\"id\": [], \"max_speed\": [], \"health\": []},\n        {\"id\": [1], \"max_speed\": [2], \"health\": [3]},\n        {\"id\": [4, 40, 400], \"max_speed\": [111, 222, 333], \"health\": [33, 22, 11]},\n    ],\n    ids=[\"empty_frame\", \"empty_cols\", \"1_length_cols\", \"2_length_cols\"],\n)\n@pytest.mark.parametrize(\n    \"value\",\n    [[11, 22], [11, 22, 33]],\n    ids=[\"2_length_val\", \"3_length_val\"],\n)\n@pytest.mark.parametrize(\"convert_to_series\", [False, True])\n@pytest.mark.parametrize(\"new_col_id\", [123, \"new_col\"], ids=[\"integer\", \"string\"])\ndef test_setitem_on_empty_df(data, value, convert_to_series, new_col_id, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n\n    def applyier(df):\n        if convert_to_series:\n            converted_value = (\n                pandas.Series(value)\n                if isinstance(df, pandas.DataFrame)\n                else create_test_series_in_defined_mode(value, native=df_mode_pair[1])[\n                    1\n                ]\n            )\n        else:\n            converted_value = value\n        df[new_col_id] = converted_value\n        return df\n\n    expected_exception = None\n    if not convert_to_series:\n        values_length = len(value)\n        index_length = len(pandas_df.index)\n        expected_exception = ValueError(\n            f\"Length of values ({values_length}) does not match length of index ({index_length})\"\n        )\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        applyier,\n        expected_exception=expected_exception,\n        check_for_execution_propagation=False,\n        no_check_for_execution_propagation_reason=(\n            \"https://github.com/modin-project/modin/issues/7428\"\n        ),\n        __inplace__=True,\n    )\n    # Because of https://github.com/modin-project/modin/issues/7600,\n    # df_equals does not check dtypes equality for empty frames.\n    assert_dtypes_equal(modin_df, pandas_df)\n\n\ndef test_setitem_on_empty_df_4407(df_mode_pair):\n    data = {}\n    index = pd.date_range(end=\"1/1/2018\", periods=0, freq=\"D\")\n    column = pd.date_range(end=\"1/1/2018\", periods=1, freq=\"h\")[0]\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        data, columns=index, native=df_mode_pair[0]\n    )\n    modin_ser, pandas_ser = create_test_series_in_defined_mode(\n        [1], native=df_mode_pair[1]\n    )\n    modin_df[column] = modin_ser\n    pandas_df[column] = pandas_ser\n\n    df_equals(modin_df, pandas_df)\n    assert modin_df.columns.freq == pandas_df.columns.freq\n\n\ndef test_setitem_2d_insertion(df_mode_pair):\n    def build_value_picker(modin_value, pandas_value):\n        \"\"\"Build a function that returns either Modin or pandas DataFrame depending on the passed frame.\"\"\"\n        return lambda source_df, *args, **kwargs: (\n            modin_value\n            if isinstance(source_df, (pd.DataFrame, pd.Series))\n            else pandas_value\n        )\n\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        test_data[\"int_data\"], native=df_mode_pair[0]\n    )\n\n    # Easy case - key and value.columns are equal\n    modin_value, pandas_value = create_test_df_in_defined_mode(\n        {\n            \"new_value1\": np.arange(len(modin_df)),\n            \"new_value2\": np.arange(len(modin_df)),\n        },\n        native=df_mode_pair[1],\n    )\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value, pandas_value),\n        col=[\"new_value1\", \"new_value2\"],\n    )\n\n    # Key and value.columns have equal values but in different order\n    new_columns = [\"new_value3\", \"new_value4\"]\n    modin_value.columns, pandas_value.columns = new_columns, new_columns\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value, pandas_value),\n        col=[\"new_value4\", \"new_value3\"],\n    )\n\n    # Key and value.columns have different values\n    new_columns = [\"new_value5\", \"new_value6\"]\n    modin_value.columns, pandas_value.columns = new_columns, new_columns\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value, pandas_value),\n        col=[\"__new_value5\", \"__new_value6\"],\n    )\n\n    # Key and value.columns have different lengths, testing that both raise the same exception\n    eval_setitem(\n        modin_df,\n        pandas_df,\n        build_value_picker(modin_value.iloc[:, [0]], pandas_value.iloc[:, [0]]),\n        col=[\"new_value7\", \"new_value8\"],\n        expected_exception=ValueError(\"Columns must be same length as key\"),\n    )\n\n\n@pytest.mark.parametrize(\"does_value_have_different_columns\", [True, False])\ndef test_setitem_2d_update(does_value_have_different_columns, df_mode_pair):\n    def test(dfs, iloc):\n        \"\"\"Update columns on the given numeric indices.\"\"\"\n        df1, df2 = dfs\n        cols1 = df1.columns[iloc].tolist()\n        cols2 = df2.columns[iloc].tolist()\n        df1[cols1] = df2[cols2]\n        return df1\n\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        test_data[\"int_data\"], native=df_mode_pair[0]\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        test_data[\"int_data\"], native=df_mode_pair[1]\n    )\n    modin_df2 *= 10\n    pandas_df2 *= 10\n\n    if does_value_have_different_columns:\n        new_columns = [f\"{col}_new\" for col in modin_df.columns]\n        modin_df2.columns = new_columns\n        pandas_df2.columns = new_columns\n\n    modin_dfs = (modin_df, modin_df2)\n    pandas_dfs = (pandas_df, pandas_df2)\n\n    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, 1, 2])\n    eval_general(modin_dfs, pandas_dfs, test, iloc=[0, -1])\n    eval_general(\n        modin_dfs, pandas_dfs, test, iloc=slice(1, None)\n    )  # (start=1, stop=None)\n    eval_general(\n        modin_dfs, pandas_dfs, test, iloc=slice(None, -2)\n    )  # (start=None, stop=-2)\n    eval_general(\n        modin_dfs,\n        pandas_dfs,\n        test,\n        iloc=[0, 1, 5, 6, 9, 10, -2, -1],\n    )\n    eval_general(\n        modin_dfs,\n        pandas_dfs,\n        test,\n        iloc=[5, 4, 0, 10, 1, -1],\n    )\n    eval_general(\n        modin_dfs, pandas_dfs, test, iloc=slice(None, None, 2)\n    )  # (start=None, stop=None, step=2)\n\n\ndef test___setitem__single_item_in_series(df_mode_pair):\n    # Test assigning a single item in a Series for issue\n    # https://github.com/modin-project/modin/issues/3860\n    modin_series1, pandas_series1 = create_test_series_in_defined_mode(\n        99, native=df_mode_pair[0]\n    )\n    modin_series2, pandas_series2 = create_test_series_in_defined_mode(\n        100, native=df_mode_pair[1]\n    )\n    modin_series1[:1] = modin_series2\n    pandas_series1[:1] = pandas_series2\n    df_equals(modin_series1, pandas_series1)\n\n\n@pytest.mark.parametrize(\n    \"value\",\n    [\n        1,\n        np.int32(1),\n        1.0,\n        \"str val\",\n        pandas.Timestamp(\"1/4/2018\"),\n        np.datetime64(0, \"ms\"),\n        True,\n    ],\n)\ndef test_loc_boolean_assignment_scalar_dtypes(value, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        {\n            \"a\": [1, 2, 3],\n            \"b\": [3.0, 5.0, 6.0],\n            \"c\": [\"a\", \"b\", \"c\"],\n            \"d\": [1.0, \"c\", 2.0],\n            \"e\": pandas.to_datetime([\"1/1/2018\", \"1/2/2018\", \"1/3/2018\"]),\n            \"f\": [True, False, True],\n        },\n        native=df_mode_pair[1],\n    )\n    modin_idx, pandas_idx = create_test_series_in_defined_mode(\n        [False, True, True], native=df_mode_pair[1]\n    )\n\n    modin_df.loc[modin_idx] = value\n    pandas_df.loc[pandas_idx] = value\n    df_equals(modin_df, pandas_df)\n\n\n# This is a very subtle bug that comes from:\n# https://github.com/modin-project/modin/issues/4945\ndef test_lazy_eval_index(df_mode_pair):\n    data = {\"col0\": [0, 1]}\n\n    def func(df1, df2):\n        df_copy = df1[df2[\"col0\"] < 6].copy()\n        # The problem here is that the index is not copied over so it needs\n        # to get recomputed at some point. Our implementation of __setitem__\n        # requires us to build a mask and insert the value from the right\n        # handside into the new DataFrame. However, it's possible that we\n        # won't have any new partitions, so we will end up computing an empty\n        # index.\n        df_copy[\"col0\"] = df_copy[\"col0\"].apply(lambda x: x + 1)\n        return df_copy\n\n    eval_general_interop(data, None, func, df_mode_pair=df_mode_pair)\n\n\ndef test_index_of_empty_frame(df_mode_pair):\n    # Test on an empty frame created by user\n\n    # Test on an empty frame produced by Modin's logic\n    data = test_data_values[0]\n    md_df1, pd_df1 = create_test_df_in_defined_mode(\n        data,\n        index=pandas.RangeIndex(len(next(iter(data.values()))), name=\"index name\"),\n        native=df_mode_pair[0],\n    )\n    md_df2, pd_df2 = create_test_df_in_defined_mode(\n        data,\n        index=pandas.RangeIndex(len(next(iter(data.values()))), name=\"index name\"),\n        native=df_mode_pair[1],\n    )\n\n    md_res = md_df1.query(f\"{md_df2.columns[0]} > {RAND_HIGH}\")\n    pd_res = pd_df1.query(f\"{pd_df2.columns[0]} > {RAND_HIGH}\")\n\n    assert md_res.empty and pd_res.empty\n    df_equals(md_res.index, pd_res.index)\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_iter.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport warnings\n\nimport matplotlib\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.pandas.utils import SET_DATAFRAME_ATTRIBUTE_WARNING\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n    create_test_series_in_defined_mode,\n)\nfrom modin.tests.pandas.utils import df_equals, eval_general\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n\ndef test___setattr__mutating_column(df_mode_pair):\n    # Use case from issue #4577\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        [[1]], columns=[\"col0\"], native=df_mode_pair[0]\n    )\n    # Replacing a column with a list should mutate the column in place.\n    pandas_df.col0 = [3]\n    modin_df.col0 = [3]\n    modin_ser, pandas_ser = create_test_series_in_defined_mode(\n        [3], native=df_mode_pair[1]\n    )\n    df_equals(modin_df, pandas_df)\n    # Check that the col0 attribute reflects the value update.\n    df_equals(modin_df.col0, pandas_df.col0)\n\n    pandas_df.col0 = pandas_ser\n    modin_df.col0 = modin_ser\n\n    # Check that the col0 attribute reflects this update\n    df_equals(modin_df, pandas_df)\n\n    pandas_df.loc[0, \"col0\"] = 4\n    modin_df.loc[0, \"col0\"] = 4\n\n    # Check that the col0 attribute reflects update via loc\n    df_equals(modin_df, pandas_df)\n    assert modin_df.col0.equals(modin_df[\"col0\"])\n\n    # Check that attempting to add a new col via attributes raises warning\n    # and adds the provided list as a new attribute and not a column.\n    with pytest.warns(\n        UserWarning,\n        match=SET_DATAFRAME_ATTRIBUTE_WARNING,\n    ):\n        modin_df.col1 = [4]\n\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\n            action=\"error\",\n            message=SET_DATAFRAME_ATTRIBUTE_WARNING,\n        )\n        modin_df.col1 = [5]\n        modin_df.new_attr = 6\n        modin_df.col0 = 7\n\n    assert \"new_attr\" in dir(\n        modin_df\n    ), \"Modin attribute was not correctly added to the df.\"\n    assert (\n        \"new_attr\" not in modin_df\n    ), \"New attribute was not correctly added to columns.\"\n    assert modin_df.new_attr == 6, \"Modin attribute value was set incorrectly.\"\n    assert isinstance(\n        modin_df.col0, pd.Series\n    ), \"Scalar was not broadcasted properly to an existing column.\"\n\n\ndef test_isin_with_modin_objects(df_mode_pair):\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        {\"a\": [1, 2], \"b\": [3, 4]}, native=df_mode_pair[0]\n    )\n    modin_series, pandas_series = create_test_series_in_defined_mode(\n        [1, 4, 5, 6], native=df_mode_pair[1]\n    )\n\n    eval_general(\n        (modin_df1, modin_series),\n        (pandas_df1, pandas_series),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n\n    modin_df2 = modin_series.to_frame(\"a\")\n    pandas_df2 = pandas_series.to_frame(\"a\")\n\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n\n    # Check case when indices are not matching\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        {\"a\": [1, 2], \"b\": [3, 4]},\n        index=[10, 11],\n        native=df_mode_pair[0],\n    )\n\n    eval_general(\n        (modin_df1, modin_series),\n        (pandas_df1, pandas_series),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_join_sort.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\nfrom pytest import param\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.pandas.io import to_pandas\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n    create_test_series_in_defined_mode,\n    eval_general_interop,\n)\nfrom modin.tests.pandas.utils import (\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    random_state,\n    test_data_keys,\n    test_data_values,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n# Initialize env for storage format detection in @pytest.mark.*\npd.DataFrame()\n\n\ndef df_equals_and_sort(df1, df2):\n    \"\"\"Sort dataframe's rows and run ``df_equals()`` for them.\"\"\"\n    df1 = df1.sort_values(by=df1.columns.tolist(), ignore_index=True)\n    df2 = df2.sort_values(by=df2.columns.tolist(), ignore_index=True)\n    df_equals(df1, df2)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_combine(data, df_mode_pair):\n    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(\n        data, native=df_mode_pair[0]\n    )\n    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(\n        data, native=df_mode_pair[1]\n    )\n    modin_df_1.combine(\n        modin_df_2 + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2\n    )\n    pandas_df_1.combine(\n        pandas_df_2 + 1, lambda s1, s2: s1 if s1.count() < s2.count() else s2\n    )\n\n\n@pytest.mark.parametrize(\n    \"test_data, test_data2\",\n    [\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(128, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(128, 64)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(64, 128)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 128)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n    ],\n)\ndef test_join(test_data, test_data2, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        test_data,\n        columns=[\"col{}\".format(i) for i in range(test_data.shape[1])],\n        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name=\"key\"),\n        native=df_mode_pair[0],\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        test_data2,\n        columns=[\"col{}\".format(i) for i in range(test_data2.shape[1])],\n        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name=\"key\"),\n        native=df_mode_pair[1],\n    )\n\n    hows = [\"inner\", \"left\", \"right\", \"outer\"]\n    ons = [\"col33\", \"col34\"]\n    sorts = [False, True]\n    assert len(ons) == len(sorts), \"the loop below is designed for this condition\"\n    for i in range(len(hows)):\n        for j in range(len(ons)):\n            modin_result = modin_df.join(\n                modin_df2,\n                how=hows[i],\n                on=ons[j],\n                sort=sorts[j],\n                lsuffix=\"_caller\",\n                rsuffix=\"_other\",\n            )\n            pandas_result = pandas_df.join(\n                pandas_df2,\n                how=hows[i],\n                on=ons[j],\n                sort=sorts[j],\n                lsuffix=\"_caller\",\n                rsuffix=\"_other\",\n            )\n            if sorts[j]:\n                # sorting in `join` is implemented through range partitioning technique\n                # therefore the order of the rows after it does not match the pandas,\n                # so additional sorting is needed in order to get the same result as for pandas\n                df_equals_and_sort(modin_result, pandas_result)\n            else:\n                df_equals(modin_result, pandas_result)\n\n    frame_data = {\n        \"col1\": [0, 1, 2, 3],\n        \"col2\": [4, 5, 6, 7],\n        \"col3\": [8, 9, 0, 1],\n        \"col4\": [2, 4, 5, 6],\n    }\n\n    modin_df = pd.DataFrame(frame_data)\n    pandas_df = pandas.DataFrame(frame_data)\n\n    frame_data2 = {\"col5\": [0], \"col6\": [1]}\n    modin_df2 = pd.DataFrame(frame_data2)\n    pandas_df2 = pandas.DataFrame(frame_data2)\n\n    join_types = [\"left\", \"right\", \"outer\", \"inner\"]\n    for how in join_types:\n        modin_join = modin_df.join(modin_df2, how=how)\n        pandas_join = pandas_df.join(pandas_df2, how=how)\n        df_equals(modin_join, pandas_join)\n\n    frame_data3 = {\"col7\": [1, 2, 3, 5, 6, 7, 8]}\n\n    modin_df3 = pd.DataFrame(frame_data3)\n    pandas_df3 = pandas.DataFrame(frame_data3)\n\n    join_types = [\"left\", \"outer\", \"inner\"]\n    for how in join_types:\n        modin_join = modin_df.join([modin_df2, modin_df3], how=how)\n        pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)\n        df_equals(modin_join, pandas_join)\n\n\ndef test_join_cross_6786(df_mode_pair):\n    data = [[7, 8, 9], [10, 11, 12]]\n    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(\n        data, columns=[\"x\", \"y\", \"z\"], native=df_mode_pair[0]\n    )\n    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(\n        data, columns=[\"x\", \"y\", \"z\"], native=df_mode_pair[1]\n    )\n    modin_join = modin_df_1.join(\n        modin_df_2[[\"x\"]].set_axis([\"p\", \"q\"], axis=0), how=\"cross\", lsuffix=\"p\"\n    )\n    pandas_join = pandas_df_1.join(\n        pandas_df_2[[\"x\"]].set_axis([\"p\", \"q\"], axis=0), how=\"cross\", lsuffix=\"p\"\n    )\n    df_equals(modin_join, pandas_join)\n\n\n@pytest.mark.parametrize(\n    \"test_data, test_data2\",\n    [\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(128, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(128, 64)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 64)),\n            np.random.randint(0, 100, size=(64, 128)),\n        ),\n        (\n            np.random.randint(0, 100, size=(64, 128)),\n            np.random.randint(0, 100, size=(64, 64)),\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"merge_with_on, merge_with_left_on_right_on\",\n    [\n        param(\n            lambda df1, df2, *, lib, how, sort, on=None: df1.merge(\n                df2, how=how, on=on, sort=sort\n            ),\n            lambda df1, df2, *, lib, how, sort: df1.merge(\n                df2, how=how, left_on=\"key\", right_on=\"key\", sort=sort\n            ),\n            id=\"merge_with_dataframe_method\",\n        ),\n        param(\n            lambda df1, df2, *, lib, how, sort, on=None: lib.merge(\n                df1,\n                df2,\n                how=how,\n                on=on,\n                sort=sort,\n            ),\n            lambda df1, df2, *, lib, how, sort: lib.merge(\n                df1, df2, how=how, left_on=\"key\", right_on=\"key\", sort=sort\n            ),\n            id=\"merge_with_general_function\",\n        ),\n    ],\n)\ndef test_merge(\n    test_data,\n    test_data2,\n    df_mode_pair,\n    merge_with_on,\n    merge_with_left_on_right_on,\n):\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        test_data,\n        columns=[\"col{}\".format(i) for i in range(test_data.shape[1])],\n        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name=\"key\"),\n        native=df_mode_pair[0],\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        test_data2,\n        columns=[\"col{}\".format(i) for i in range(test_data2.shape[1])],\n        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name=\"key\"),\n        native=df_mode_pair[1],\n    )\n    hows = [\"left\", \"inner\", \"right\"]\n    ons = [\"col33\", [\"col33\", \"col34\"]]\n    sorts = [False, True]\n    assert len(ons) == len(sorts), \"the loop below is designed for this condition\"\n    for i in range(len(hows)):\n        for j in range(len(ons)):\n            modin_result = merge_with_on(\n                modin_df, modin_df2, how=hows[i], on=ons[j], sort=sorts[j], lib=pd\n            )\n            pandas_result = merge_with_on(\n                pandas_df, pandas_df2, how=hows[i], on=ons[j], sort=sorts[j], lib=pandas\n            )\n            # FIXME: https://github.com/modin-project/modin/issues/2246\n            df_equals_and_sort(modin_result, pandas_result)\n\n            modin_result = merge_with_left_on_right_on(\n                modin_df, modin_df2, how=hows[i], sort=sorts[j], lib=pd\n            )\n            pandas_result = merge_with_left_on_right_on(\n                pandas_df, pandas_df2, how=hows[i], sort=sorts[j], lib=pandas\n            )\n            # FIXME: https://github.com/modin-project/modin/issues/2246\n            df_equals_and_sort(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"how\", [\"left\", \"inner\", \"right\"])\ndef test_merge_empty(\n    how,\n    df_mode_pair,\n):\n    data = np.random.randint(0, 100, size=(64, 64))\n    eval_general_interop(\n        data,\n        None,\n        lambda df1, df2: df1.merge(df2.iloc[:0], how=how),\n        df_mode_pair,\n    )\n\n\ndef test_merge_with_mi_columns(df_mode_pair):\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        {\n            (\"col0\", \"a\"): [1, 2, 3, 4],\n            (\"col0\", \"b\"): [2, 3, 4, 5],\n            (\"col1\", \"a\"): [3, 4, 5, 6],\n        },\n        native=df_mode_pair[0],\n    )\n\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        {\n            (\"col0\", \"a\"): [1, 2, 3, 4],\n            (\"col0\", \"c\"): [2, 3, 4, 5],\n            (\"col1\", \"a\"): [3, 4, 5, 6],\n        },\n        native=df_mode_pair[1],\n    )\n\n    eval_general(\n        (modin_df1, modin_df2),\n        (pandas_df1, pandas_df2),\n        lambda dfs: dfs[0].merge(dfs[1], on=[(\"col0\", \"a\")]),\n    )\n\n\ndef test_where(df_mode_pair):\n    columns = list(\"abcdefghij\")\n\n    frame_data = random_state.randn(100, 10)\n    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(\n        frame_data, columns=columns, native=df_mode_pair[0]\n    )\n    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(\n        frame_data, columns=columns, native=df_mode_pair[1]\n    )\n    pandas_cond_df = pandas_df_2 % 5 < 2\n    modin_cond_df = modin_df_2 % 5 < 2\n\n    pandas_result = pandas_df_1.where(pandas_cond_df, -pandas_df_2)\n    modin_result = modin_df_1.where(modin_cond_df, -modin_df_2)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n    # test case when other is Series\n    other_data = random_state.randn(len(pandas_df_1))\n    modin_other, pandas_other = create_test_series_in_defined_mode(\n        other_data, native=df_mode_pair[0]\n    )\n    pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other, axis=0)\n    modin_result = modin_df_1.where(modin_cond_df, modin_other, axis=0)\n    df_equals(modin_result, pandas_result)\n\n    # Test that we choose the right values to replace when `other` == `True`\n    # everywhere.\n    other_data = np.full(shape=pandas_df_1.shape, fill_value=True)\n    modin_other, pandas_other = create_test_df_in_defined_mode(\n        other_data, columns=columns, native=df_mode_pair[0]\n    )\n    pandas_result = pandas_df_1.where(pandas_cond_df, pandas_other)\n    modin_result = modin_df_1.where(modin_cond_df, modin_other)\n    df_equals(modin_result, pandas_result)\n\n    other = pandas_df_1.loc[3]\n    pandas_result = pandas_df_1.where(pandas_cond_df, other, axis=1)\n    modin_result = modin_df_1.where(modin_cond_df, other, axis=1)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n    other = pandas_df_1[\"e\"]\n    pandas_result = pandas_df_1.where(pandas_cond_df, other, axis=0)\n    modin_result = modin_df_1.where(modin_cond_df, other, axis=0)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n    pandas_result = pandas_df_1.where(pandas_df_2 < 2, True)\n    modin_result = modin_df_1.where(modin_df_2 < 2, True)\n    assert all((to_pandas(modin_result) == pandas_result).all())\n\n\n@pytest.mark.parametrize(\"align_axis\", [\"index\", \"columns\"])\n@pytest.mark.parametrize(\"keep_shape\", [False, True])\n@pytest.mark.parametrize(\"keep_equal\", [False, True])\ndef test_compare(align_axis, keep_shape, keep_equal, df_mode_pair):\n    kwargs = {\n        \"align_axis\": align_axis,\n        \"keep_shape\": keep_shape,\n        \"keep_equal\": keep_equal,\n    }\n    frame_data1 = random_state.randn(100, 10)\n    frame_data2 = random_state.randn(100, 10)\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        frame_data1, columns=list(\"abcdefghij\"), native=df_mode_pair[0]\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        frame_data2, columns=list(\"abcdefghij\"), native=df_mode_pair[0]\n    )\n    modin_result = modin_df.compare(modin_df2, **kwargs)\n    pandas_result = pandas_df.compare(pandas_df2, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n\n    modin_result = modin_df2.compare(modin_df, **kwargs)\n    pandas_result = pandas_df2.compare(pandas_df, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n\n    series_data1 = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n    series_data2 = [\"a\", \"a\", \"c\", \"b\", \"e\"]\n    modin_series1, pandas_series1 = create_test_series_in_defined_mode(\n        series_data1, native=df_mode_pair[0]\n    )\n    modin_series2, pandas_series2 = create_test_series_in_defined_mode(\n        series_data2, native=df_mode_pair[1]\n    )\n\n    modin_result = modin_series1.compare(modin_series2, **kwargs)\n    pandas_result = pandas_series1.compare(pandas_series2, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n\n    modin_result = modin_series2.compare(modin_series1, **kwargs)\n    pandas_result = pandas_series2.compare(pandas_series1, **kwargs)\n    assert to_pandas(modin_result).equals(pandas_result)\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_map_metadata.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n    create_test_series_in_defined_mode,\n)\nfrom modin.tests.pandas.utils import (\n    RAND_HIGH,\n    RAND_LOW,\n    axis_keys,\n    axis_values,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    name_contains,\n    numeric_dfs,\n    random_state,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\ndef eval_insert(modin_df, pandas_df, **kwargs):\n    if \"col\" in kwargs and \"column\" not in kwargs:\n        kwargs[\"column\"] = kwargs.pop(\"col\")\n    _kwargs = {\"loc\": 0, \"column\": \"New column\"}\n    _kwargs.update(kwargs)\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        operation=lambda df, **kwargs: df.insert(**kwargs),\n        __inplace__=True,\n        **_kwargs,\n    )\n\n\ndef test_empty_df(df_mode_pair):\n    modin_df, pd_df = create_test_df_in_defined_mode(None, native=df_mode_pair[0])\n    md_series, pd_series = create_test_series_in_defined_mode(\n        [1, 2, 3, 4, 5], native=df_mode_pair[1]\n    )\n    modin_df[\"a\"] = md_series\n    pd_df[\"a\"] = pd_series\n    df_equals(modin_df, pd_df)\n\n\ndef test_astype(df_mode_pair):\n    td = pandas.DataFrame(test_data[\"int_data\"])[[\"col1\", \"index\", \"col3\", \"col4\"]]\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        td.values,\n        index=td.index,\n        columns=td.columns,\n        native=df_mode_pair[0],\n    )\n\n    def astype_func(df):\n        md_ser, pd_ser = create_test_series_in_defined_mode(\n            [str, str], index=[\"col1\", \"col1\"], native=df_mode_pair[1]\n        )\n        if isinstance(df, pd.DataFrame):\n            return df.astype(md_ser)\n        else:\n            return df.astype(pd_ser)\n\n    # The dtypes series must have a unique index.\n    eval_general(\n        modin_df,\n        pandas_df,\n        astype_func,\n        expected_exception=ValueError(\n            \"cannot reindex on an axis with duplicate labels\"\n        ),\n    )\n\n\n###########################################################################\n\n\ndef test_convert_dtypes_5653(df_mode_pair):\n    modin_part1, _ = create_test_df_in_defined_mode(\n        {\"col1\": [\"a\", \"b\", \"c\", \"d\"]}, native=df_mode_pair[0]\n    )\n    modin_part2, _ = create_test_df_in_defined_mode(\n        {\"col1\": [None, None, None, None]}, native=df_mode_pair[1]\n    )\n    modin_df = pd.concat([modin_part1, modin_part2])\n    if modin_df._query_compiler.storage_format == \"Pandas\":\n        assert modin_df._query_compiler._modin_frame._partitions.shape == (2, 1)\n    modin_df = modin_df.convert_dtypes()\n    assert len(modin_df.dtypes) == 1\n    assert modin_df.dtypes.iloc[0] == \"string\"\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis\", axis_values, ids=axis_keys)\n@pytest.mark.parametrize(\"bound_type\", [\"list\", \"series\"], ids=[\"list\", \"series\"])\n@pytest.mark.exclude_in_sanity\ndef test_clip(request, data, axis, bound_type, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n\n    if name_contains(request.node.name, numeric_dfs):\n        ind_len = (\n            len(modin_df.index)\n            if not pandas.DataFrame()._get_axis_number(axis)\n            else len(modin_df.columns)\n        )\n\n        lower = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)\n        upper = random_state.randint(RAND_LOW, RAND_HIGH, ind_len)\n\n        if bound_type == \"series\":\n            modin_lower, pandas_lower = create_test_series_in_defined_mode(\n                lower, native=df_mode_pair[1]\n            )\n            modin_upper, pandas_upper = create_test_series_in_defined_mode(\n                upper, native=df_mode_pair[0]\n            )\n        else:\n            modin_lower = pandas_lower = lower\n            modin_upper = pandas_upper = upper\n\n        # test lower and upper list bound on each column\n        modin_result = modin_df.clip(modin_lower, modin_upper, axis=axis)\n        pandas_result = pandas_df.clip(pandas_lower, pandas_upper, axis=axis)\n        df_equals(modin_result, pandas_result)\n\n        # test only upper list bound on each column\n        modin_result = modin_df.clip(np.nan, modin_upper, axis=axis)\n        pandas_result = pandas_df.clip(np.nan, pandas_upper, axis=axis)\n        df_equals(modin_result, pandas_result)\n\n        with pytest.raises(ValueError):\n            modin_df.clip(lower=[1, 2, 3], axis=None)\n\n\n@pytest.mark.parametrize(\n    \"data, other_data\",\n    [\n        ({\"A\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, 5, 6], \"C\": [7, 8, 9]}),\n        ({\"C\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, 5, 6], \"A\": [7, 8, 9]}),\n        (\n            {\"A\": [\"a\", \"b\", \"c\"], \"B\": [\"x\", \"y\", \"z\"]},\n            {\"B\": [\"d\", \"e\", \"f\", \"g\", \"h\", \"i\"]},\n        ),\n        ({\"A\": [1, 2, 3], \"B\": [400, 500, 600]}, {\"B\": [4, np.nan, 6]}),\n    ],\n)\n@pytest.mark.parametrize(\"errors\", [\"raise\", \"ignore\"])\ndef test_update(data, other_data, errors, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(data, native=df_mode_pair[0])\n    other_modin_df, other_pandas_df = create_test_df_in_defined_mode(\n        other_data, native=df_mode_pair[1]\n    )\n    expected_exception = None\n    if errors == \"raise\":\n        expected_exception = ValueError(\"Data overlaps.\")\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: (\n            df.update(other_modin_df, errors=errors)\n            if isinstance(df, pd.DataFrame)\n            else df.update(other_pandas_df, errors=errors)\n        ),\n        __inplace__=True,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\n    \"get_index\",\n    [\n        pytest.param(lambda idx: None, id=\"None_idx\"),\n        pytest.param(lambda idx: [\"a\", \"b\", \"c\"], id=\"No_intersection_idx\"),\n        pytest.param(lambda idx: idx, id=\"Equal_idx\"),\n        pytest.param(lambda idx: idx[::-1], id=\"Reversed_idx\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"get_columns\",\n    [\n        pytest.param(lambda idx: None, id=\"None_idx\"),\n        pytest.param(lambda idx: [\"a\", \"b\", \"c\"], id=\"No_intersection_idx\"),\n        pytest.param(lambda idx: idx, id=\"Equal_idx\"),\n        pytest.param(lambda idx: idx[::-1], id=\"Reversed_idx\"),\n    ],\n)\n@pytest.mark.parametrize(\"dtype\", [None, \"str\"])\n@pytest.mark.exclude_in_sanity\ndef test_constructor_from_modin_series(get_index, get_columns, dtype, df_mode_pair):\n    modin_df, pandas_df = create_test_df_in_defined_mode(\n        test_data_values[0], native=df_mode_pair[0]\n    )\n\n    modin_data = {f\"new_col{i}\": modin_df.iloc[:, i] for i in range(modin_df.shape[1])}\n    pandas_data = {\n        f\"new_col{i}\": pandas_df.iloc[:, i] for i in range(pandas_df.shape[1])\n    }\n\n    index = get_index(modin_df.index)\n    columns = get_columns(list(modin_data.keys()))\n\n    new_modin = pd.DataFrame(modin_data, index=index, columns=columns, dtype=dtype)\n    new_pandas = pandas.DataFrame(\n        pandas_data, index=index, columns=columns, dtype=dtype\n    )\n    df_equals(new_modin, new_pandas)\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_pickle.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\nimport numpy as np\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import PersistentPickle\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n)\nfrom modin.tests.pandas.utils import df_equals\n\n\n@pytest.fixture\ndef modin_df():\n    return pd.DataFrame({\"col1\": np.arange(1000), \"col2\": np.arange(2000, 3000)})\n\n\n@pytest.fixture\ndef modin_column(modin_df):\n    return modin_df[\"col1\"]\n\n\n@pytest.fixture(params=[True, False])\ndef persistent(request):\n    old = PersistentPickle.get()\n    PersistentPickle.put(request.param)\n    yield request.param\n    PersistentPickle.put(old)\n\n\ndef test__reduce__(df_mode_pair):\n    # `DataFrame.__reduce__` will be called implicitly when lambda expressions are\n    # pre-processed for the distributed engine.\n    dataframe_data = [\"Major League Baseball\", \"National Basketball Association\"]\n    abbr_md, abbr_pd = create_test_df_in_defined_mode(\n        dataframe_data, index=[\"MLB\", \"NBA\"], native=df_mode_pair[0]\n    )\n\n    dataframe_data = {\n        \"name\": [\"Mariners\", \"Lakers\"] * 500,\n        \"league_abbreviation\": [\"MLB\", \"NBA\"] * 500,\n    }\n    teams_md, teams_pd = create_test_df_in_defined_mode(\n        dataframe_data, native=df_mode_pair[1]\n    )\n\n    result_md = (\n        teams_md.set_index(\"name\")\n        .league_abbreviation.apply(lambda abbr: abbr_md[0].loc[abbr])\n        .rename(\"league\")\n    )\n\n    result_pd = (\n        teams_pd.set_index(\"name\")\n        .league_abbreviation.apply(lambda abbr: abbr_pd[0].loc[abbr])\n        .rename(\"league\")\n    )\n    df_equals(result_md, result_pd)\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/test_window.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport matplotlib\nimport numpy as np\nimport pandas\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.tests.pandas.native_df_interoperability.utils import (\n    create_test_df_in_defined_mode,\n)\nfrom modin.tests.pandas.utils import df_equals\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n\ndef test_fillna_4660(df_mode_pair):\n    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(\n        {\"a\": [\"a\"], \"b\": [\"b\"], \"c\": [pd.NA]},\n        index=[\"row1\"],\n        native=df_mode_pair[0],\n    )\n    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(\n        {\"a\": [\"a\"], \"b\": [\"b\"], \"c\": [pd.NA]},\n        index=[\"row1\"],\n        native=df_mode_pair[1],\n    )\n    modin_result = modin_df_1[\"c\"].fillna(modin_df_2[\"b\"])\n    pandas_result = pandas_df_1[\"c\"].fillna(pandas_df_2[\"b\"])\n    df_equals(modin_result, pandas_result)\n\n\ndef test_fillna_dict_series(df_mode_pair):\n    frame_data = {\n        \"a\": [np.nan, 1, 2, np.nan, np.nan],\n        \"b\": [1, 2, 3, np.nan, np.nan],\n        \"c\": [np.nan, 1, 2, 3, 4],\n    }\n    df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(\n        frame_data, native=df_mode_pair[0]\n    )\n    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(\n        frame_data, native=df_mode_pair[1]\n    )\n\n    df_equals(modin_df.fillna({\"a\": 0, \"b\": 5}), df.fillna({\"a\": 0, \"b\": 5}))\n\n    df_equals(\n        modin_df.fillna({\"a\": 0, \"b\": 5, \"d\": 7}),\n        df.fillna({\"a\": 0, \"b\": 5, \"d\": 7}),\n    )\n\n    # Series treated same as dict\n    df_equals(\n        modin_df_1.fillna(modin_df_2.max()), pandas_df_1.fillna(pandas_df_2.max())\n    )\n\n\ndef test_fillna_dataframe(df_mode_pair):\n    frame_data = {\n        \"a\": [np.nan, 1, 2, np.nan, np.nan],\n        \"b\": [1, 2, 3, np.nan, np.nan],\n        \"c\": [np.nan, 1, 2, 3, 4],\n    }\n    modin_df_1, pandas_df_1 = create_test_df_in_defined_mode(\n        frame_data, index=list(\"VWXYZ\"), native=df_mode_pair[0]\n    )\n    modin_df_2, pandas_df_2 = create_test_df_in_defined_mode(\n        {\"a\": [np.nan, 10, 20, 30, 40], \"b\": [50, 60, 70, 80, 90], \"foo\": [\"bar\"] * 5},\n        index=list(\"VWXuZ\"),\n        native=df_mode_pair[1],\n    )\n\n    # only those columns and indices which are shared get filled\n    df_equals(modin_df_1.fillna(modin_df_2), pandas_df_1.fillna(pandas_df_2))\n"
  },
  {
    "path": "modin/tests/pandas/native_df_interoperability/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom contextlib import contextmanager, nullcontext\n\nfrom modin import set_execution\nfrom modin.config import Engine, StorageFormat\nfrom modin.config import context as config_context\nfrom modin.config.envvars import Backend\nfrom modin.tests.pandas.utils import (\n    NoModinException,\n    create_test_dfs,\n    create_test_series,\n    df_equals,\n)\nfrom modin.tests.test_utils import current_execution_is_native\nfrom modin.utils import try_cast_to_pandas\n\n\n@contextmanager\ndef switch_to_native_execution():\n    engine = Engine.get()\n    storage_format = StorageFormat.get()\n    try:\n        set_execution(\"Native\", \"Native\")\n        yield\n    finally:\n        set_execution(engine=engine, storage_format=storage_format)\n\n\ndef create_test_df_in_defined_mode(\n    *args, post_fn=None, backend=None, native=None, **kwargs\n):\n    assert not current_execution_is_native(), \"already in native dataframe mode.\"\n\n    if not isinstance(native, bool):\n        raise ValueError(\"`native` should be True or False.\")\n\n    # Use the default backend unless native\n    hybrid_backend = \"Pandas\" if native else Backend.get()\n    with switch_to_native_execution() if native else nullcontext():\n        with config_context(AutoSwitchBackend=False, Backend=hybrid_backend):\n            modin_df, pandas_df = create_test_dfs(\n                *args, post_fn=post_fn, backend=backend, **kwargs\n            )\n            return modin_df, pandas_df\n\n\ndef create_test_series_in_defined_mode(\n    vals, sort=False, backend=None, native=None, **kwargs\n):\n    assert not current_execution_is_native(), \"already in native dataframe mode.\"\n\n    if not isinstance(native, bool):\n        raise ValueError(\"`native` should be True or False.\")\n\n    # Use the default backend unless native\n    hybrid_backend = \"Pandas\" if native else Backend.get()\n    with switch_to_native_execution() if native else nullcontext():\n        with config_context(AutoSwitchBackend=False, Backend=hybrid_backend):\n            modin_ser, pandas_ser = create_test_series(\n                vals, sort=sort, backend=backend, **kwargs\n            )\n        return modin_ser, pandas_ser\n\n\ndef eval_general_interop(\n    data,\n    backend,\n    operation,\n    df_mode_pair,\n    comparator=df_equals,\n    __inplace__=False,\n    expected_exception=None,\n    check_kwargs_callable=True,\n    md_extra_kwargs=None,\n    comparator_kwargs=None,\n    **kwargs,\n):\n    df1_native, df2_native = df_mode_pair\n    modin_df1, pandas_df1 = create_test_df_in_defined_mode(\n        data, backend=backend, native=df1_native\n    )\n    modin_df2, pandas_df2 = create_test_df_in_defined_mode(\n        data, backend=backend, native=df2_native\n    )\n    md_kwargs, pd_kwargs = {}, {}\n\n    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):\n        try:\n            pd_result = fn(pandas_df1, pandas_df2, **pd_kwargs)\n        except Exception as pd_e:\n            try:\n                if inplace:\n                    _ = fn(modin_df1, modin_df2, **md_kwargs)\n                    try_cast_to_pandas(modin_df1)  # force materialization\n                else:\n                    try_cast_to_pandas(\n                        fn(modin_df1, modin_df2, **md_kwargs)\n                    )  # force materialization\n            except Exception as md_e:\n                assert isinstance(\n                    md_e, type(pd_e)\n                ), \"Got Modin Exception type {}, but pandas Exception type {} was expected\".format(\n                    type(md_e), type(pd_e)\n                )\n                if expected_exception:\n                    if Engine.get() == \"Ray\":\n                        from ray.exceptions import RayTaskError\n\n                        # unwrap ray exceptions from remote worker\n                        if isinstance(md_e, RayTaskError):\n                            md_e = md_e.args[0]\n                    assert (\n                        type(md_e) is type(expected_exception)\n                        and md_e.args == expected_exception.args\n                    ), f\"not acceptable Modin's exception: [{repr(md_e)}] expected {expected_exception}\"\n                    assert (\n                        pd_e.args == expected_exception.args\n                    ), f\"not acceptable Pandas' exception: [{repr(pd_e)}]\"\n                elif expected_exception is False:\n                    # The only way to disable exception message checking.\n                    pass\n                else:\n                    # It’s not enough that Modin and pandas have the same types of exceptions;\n                    # we need to explicitly specify the instance of an exception\n                    # (using `expected_exception`) in tests so that we can check exception messages.\n                    # This allows us to eliminate situations where exceptions are thrown\n                    # that we don't expect, which could hide different bugs.\n                    raise pd_e\n            else:\n                raise NoModinException(\n                    f\"Modin doesn't throw an exception, while pandas does: [{repr(pd_e)}]\"\n                )\n        else:\n            md_result = fn(modin_df1, modin_df2, **md_kwargs)\n            return (md_result, pd_result) if not inplace else (modin_df1, pandas_df1)\n\n    for key, value in kwargs.items():\n        if check_kwargs_callable and callable(value):\n            values = execute_callable(value)\n            # that means, that callable raised an exception\n            if values is None:\n                return\n            else:\n                md_value, pd_value = values\n        else:\n            md_value, pd_value = value, value\n\n        md_kwargs[key] = md_value\n        pd_kwargs[key] = pd_value\n\n        if md_extra_kwargs:\n            assert isinstance(md_extra_kwargs, dict)\n            md_kwargs.update(md_extra_kwargs)\n\n    values = execute_callable(\n        operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__\n    )\n    if values is not None:\n        comparator(*values, **(comparator_kwargs or {}))\n"
  },
  {
    "path": "modin/tests/pandas/test_api.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport inspect\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\n\n_MODIN_EXTRA_ATTRIBUTES = (\n    # modin - namespace for accessing additional Modin functions that are not available in Pandas\n    \"modin\",\n    # get_backend - get storage and engine backend for the current DataFrame\n    \"get_backend\",\n    # set_backend - set storage and engine backend for the current DataFrame\n    \"set_backend\",\n    # move_to - set storage and engine backend for the current DataFrame\n    \"move_to\",\n    # is_backend_pinned, pin_backend, unpin_backend - change automatic switching behavior\n    \"is_backend_pinned\",\n    \"pin_backend\",\n    \"unpin_backend\",\n)\n\n\ndef test_top_level_api_equality():\n    modin_dir = [obj for obj in dir(pd) if obj[0] != \"_\"]\n    pandas_dir = [obj for obj in dir(pandas) if obj[0] != \"_\"]\n    missing_from_modin = set(pandas_dir) - set(modin_dir)\n    extra_in_modin = set(modin_dir) - set(pandas_dir)\n    ignore_pandas = [\n        \"annotations\",\n        \"np\",\n        \"tests\",\n        \"pandas\",\n        \"core\",\n        \"compat\",\n        \"util\",\n        \"offsets\",\n        \"datetime\",\n        \"api\",\n        \"tseries\",\n        \"to_msgpack\",  # This one is experimental, and doesn't look finished\n        \"Panel\",  # This is deprecated and throws a warning every time.\n    ]\n\n    ignore_modin = [\n        \"indexing\",\n        \"iterator\",\n        \"series\",\n        \"accessor\",\n        \"base\",\n        \"utils\",\n        \"dataframe\",\n        \"groupby\",\n        \"general\",\n        \"datetime\",\n        \"warnings\",\n        \"os\",\n        \"series_utils\",\n        \"window\",\n    ]\n\n    assert not len(\n        missing_from_modin - set(ignore_pandas)\n    ), \"Differences found in API: {}\".format(missing_from_modin - set(ignore_pandas))\n\n    assert not len(\n        extra_in_modin - set(ignore_modin)\n    ), \"Differences found in API: {}\".format(extra_in_modin - set(ignore_modin))\n\n    difference = []\n    allowed_different = [\"Interval\", \"datetime\", \"StringDtype\"]\n\n    # Check that we have all keywords and defaults in pandas\n    for m in set(pandas_dir) - set(ignore_pandas):\n        if m in allowed_different:\n            continue\n        try:\n            pandas_sig = dict(inspect.signature(getattr(pandas, m)).parameters)\n        except (TypeError, ValueError):\n            continue\n        try:\n            modin_sig = dict(inspect.signature(getattr(pd, m)).parameters)\n        except (TypeError, ValueError):\n            continue\n\n        if not pandas_sig == modin_sig:\n            try:\n                append_val = (\n                    m,\n                    {\n                        i: pandas_sig[i]\n                        for i in pandas_sig.keys()\n                        if i not in modin_sig\n                        or pandas_sig[i].default != modin_sig[i].default\n                        and not (\n                            pandas_sig[i].default is np.nan\n                            and modin_sig[i].default is np.nan\n                        )\n                    },\n                )\n            except Exception:\n                raise\n            try:\n                # This validates that there are actually values to add to the difference\n                # based on the condition above.\n                if len(list(append_val[-1])[-1]) > 0:\n                    difference.append(append_val)\n            except IndexError:\n                pass\n\n    assert not len(difference), \"Missing params found in API: {}\".format(difference)\n\n    # Check that we have no extra keywords or defaults\n    for m in set(pandas_dir) - set(ignore_pandas):\n        if m in allowed_different:\n            continue\n        try:\n            pandas_sig = dict(inspect.signature(getattr(pandas, m)).parameters)\n        except (TypeError, ValueError):\n            continue\n        try:\n            modin_sig = dict(inspect.signature(getattr(pd, m)).parameters)\n        except (TypeError, ValueError):\n            continue\n        if not pandas_sig == modin_sig:\n            try:\n                append_val = (\n                    m,\n                    {\n                        i: modin_sig[i]\n                        for i in modin_sig.keys()\n                        if i not in pandas_sig and i != \"query_compiler\"\n                    },\n                )\n            except Exception:\n                raise\n            try:\n                # This validates that there are actually values to add to the difference\n                # based on the condition above.\n                if len(list(append_val[-1])[-1]) > 0:\n                    difference.append(append_val)\n            except IndexError:\n                pass\n\n    assert not len(difference), \"Extra params found in API: {}\".format(difference)\n\n\ndef test_dataframe_api_equality():\n    modin_dir = [obj for obj in dir(pd.DataFrame) if obj[0] != \"_\"]\n    pandas_dir = [obj for obj in dir(pandas.DataFrame) if obj[0] != \"_\"]\n\n    ignore_in_pandas = [\"timetuple\"]\n    # modin - namespace for accessing additional Modin functions that are not available in Pandas\n    missing_from_modin = set(pandas_dir) - set(modin_dir)\n    assert not len(\n        missing_from_modin - set(ignore_in_pandas)\n    ), \"Differences found in API: {}\".format(\n        len(missing_from_modin - set(ignore_in_pandas))\n    )\n    assert not len(\n        set(modin_dir) - set(_MODIN_EXTRA_ATTRIBUTES) - set(pandas_dir)\n    ), \"Differences found in API: {}\".format(set(modin_dir) - set(pandas_dir))\n\n    assert_parameters_eq(\n        (pandas.DataFrame, pd.DataFrame),\n        modin_dir,\n        allowed_different=_MODIN_EXTRA_ATTRIBUTES,\n    )\n\n\ndef test_series_str_api_equality():\n    modin_dir = [obj for obj in dir(pd.Series.str) if obj[0] != \"_\"]\n    pandas_dir = [obj for obj in dir(pandas.Series.str) if obj[0] != \"_\"]\n\n    missing_from_modin = set(pandas_dir) - set(modin_dir)\n    assert not len(missing_from_modin), \"Differences found in API: {}\".format(\n        missing_from_modin\n    )\n    extra_in_modin = set(modin_dir) - set(pandas_dir)\n    assert not len(extra_in_modin), \"Differences found in API: {}\".format(\n        extra_in_modin\n    )\n    assert_parameters_eq((pandas.Series.str, pd.Series.str), modin_dir, [])\n\n\ndef test_series_dt_api_equality():\n    modin_dir = [obj for obj in dir(pd.Series.dt) if obj[0] != \"_\"]\n    pandas_dir = [obj for obj in dir(pandas.Series.dt) if obj[0] != \"_\"]\n\n    # should be deleted, but for some reason the check fails\n    # https://github.com/pandas-dev/pandas/pull/33595\n    ignore = [\"week\", \"weekofyear\"]\n    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)\n    assert not len(missing_from_modin), \"Differences found in API: {}\".format(\n        missing_from_modin\n    )\n    extra_in_modin = set(modin_dir) - set(pandas_dir)\n    assert not len(extra_in_modin), \"Differences found in API: {}\".format(\n        extra_in_modin\n    )\n    assert_parameters_eq((pandas.Series.dt, pd.Series.dt), modin_dir, [])\n\n\ndef test_series_cat_api_equality():\n    modin_dir = [obj for obj in dir(pd.Series.cat) if obj[0] != \"_\"]\n    pandas_dir = [obj for obj in dir(pandas.Series.cat) if obj[0] != \"_\"]\n\n    missing_from_modin = set(pandas_dir) - set(modin_dir)\n    assert not len(missing_from_modin), \"Differences found in API: {}\".format(\n        len(missing_from_modin)\n    )\n    extra_in_modin = set(modin_dir) - set(pandas_dir)\n    assert not len(extra_in_modin), \"Differences found in API: {}\".format(\n        extra_in_modin\n    )\n    # all methods of `pandas.Series.cat` don't have any information about parameters,\n    # just method(*args, **kwargs)\n    assert_parameters_eq((pandas.core.arrays.Categorical, pd.Series.cat), modin_dir, [])\n\n\n@pytest.mark.parametrize(\"obj\", [\"DataFrame\", \"Series\"])\ndef test_sparse_accessor_api_equality(obj):\n    modin_dir = [x for x in dir(getattr(pd, obj).sparse) if x[0] != \"_\"]\n    pandas_dir = [x for x in dir(getattr(pandas, obj).sparse) if x[0] != \"_\"]\n\n    missing_from_modin = set(pandas_dir) - set(modin_dir)\n    assert not len(missing_from_modin), \"Differences found in API: {}\".format(\n        len(missing_from_modin)\n    )\n    extra_in_modin = set(modin_dir) - set(pandas_dir)\n    assert not len(extra_in_modin), \"Differences found in API: {}\".format(\n        extra_in_modin\n    )\n\n\n@pytest.mark.parametrize(\"obj\", [\"SeriesGroupBy\", \"DataFrameGroupBy\"])\ndef test_groupby_api_equality(obj):\n    modin_dir = [x for x in dir(getattr(pd.groupby, obj)) if x[0] != \"_\"]\n    pandas_dir = [x for x in dir(getattr(pandas.core.groupby, obj)) if x[0] != \"_\"]\n    # These attributes are not mentioned in the pandas documentation,\n    # but we might want to implement them someday.\n    ignore = [\"keys\", \"level\", \"grouper\"]\n    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)\n    assert not len(missing_from_modin), \"Differences found in API: {}\".format(\n        len(missing_from_modin)\n    )\n    # FIXME: wrong inheritance\n    ignore = (\n        {\"boxplot\", \"corrwith\", \"dtypes\"} if obj == \"SeriesGroupBy\" else {\"boxplot\"}\n    ) | set(_MODIN_EXTRA_ATTRIBUTES)\n    extra_in_modin = (\n        set(modin_dir) - set(pandas_dir) - set(ignore) - set(_MODIN_EXTRA_ATTRIBUTES)\n    )\n    assert not len(extra_in_modin), \"Differences found in API: {}\".format(\n        extra_in_modin\n    )\n    assert_parameters_eq(\n        (getattr(pandas.core.groupby, obj), getattr(pd.groupby, obj)), modin_dir, ignore\n    )\n\n\ndef test_series_api_equality():\n    modin_dir = [obj for obj in dir(pd.Series) if obj[0] != \"_\"]\n    pandas_dir = [obj for obj in dir(pandas.Series) if obj[0] != \"_\"]\n\n    ignore = [\"timetuple\"]\n    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)\n    assert not len(missing_from_modin), \"Differences found in API: {}\".format(\n        missing_from_modin\n    )\n\n    extra_in_modin = set(modin_dir) - set(_MODIN_EXTRA_ATTRIBUTES) - set(pandas_dir)\n    assert not len(extra_in_modin), \"Differences found in API: {}\".format(\n        extra_in_modin\n    )\n\n    assert_parameters_eq(\n        (pandas.Series, pd.Series), modin_dir, allowed_different=_MODIN_EXTRA_ATTRIBUTES\n    )\n\n\ndef assert_parameters_eq(objects, attributes, allowed_different):\n    pandas_obj, modin_obj = objects\n    difference = []\n\n    # Check that Modin functions/methods don't have extra params\n    for m in attributes:\n        if m in allowed_different:\n            continue\n        try:\n            pandas_sig = dict(inspect.signature(getattr(pandas_obj, m)).parameters)\n        except TypeError:\n            continue\n        try:\n            modin_sig = dict(inspect.signature(getattr(modin_obj, m)).parameters)\n        except TypeError:\n            continue\n\n        if not pandas_sig == modin_sig:\n            append_val = (\n                m,\n                {\n                    i: pandas_sig[i]\n                    for i in pandas_sig.keys()\n                    if i not in modin_sig\n                    or pandas_sig[i].default != modin_sig[i].default\n                    and not (\n                        pandas_sig[i].default is np.nan\n                        and modin_sig[i].default is np.nan\n                    )\n                },\n            )\n            try:\n                # This validates that there are actually values to add to the difference\n                # based on the condition above.\n                if len(list(append_val[-1])[-1]) > 0:\n                    difference.append(append_val)\n            except IndexError:\n                pass\n    assert not len(difference), \"Missing params found in API: {}\".format(difference)\n\n    difference = []\n    # Check that Modin functions/methods have all params as pandas\n    for m in attributes:\n        if m in allowed_different:\n            continue\n        try:\n            pandas_sig = dict(inspect.signature(getattr(pandas_obj, m)).parameters)\n        except TypeError:\n            continue\n        try:\n            modin_sig = dict(inspect.signature(getattr(modin_obj, m)).parameters)\n        except TypeError:\n            continue\n\n        if not pandas_sig == modin_sig:\n            append_val = (\n                m,\n                {i: modin_sig[i] for i in modin_sig.keys() if i not in pandas_sig},\n            )\n            try:\n                # This validates that there are actually values to add to the difference\n                # based on the condition above.\n                if len(list(append_val[-1])[-1]) > 0:\n                    difference.append(append_val)\n            except IndexError:\n                pass\n    assert not len(difference), \"Extra params found in API: {}\".format(difference)\n"
  },
  {
    "path": "modin/tests/pandas/test_backend.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport platform\nimport re\nfrom unittest.mock import patch\n\nimport pandas\nimport pytest\nimport tqdm.auto\n\nimport modin.pandas as pd\nfrom modin.config import Backend\nfrom modin.config import context as config_context\nfrom modin.tests.pandas.utils import (\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n)\n\nWINDOWS_RAY_SKIP_MARK = pytest.mark.skipif(\n    platform.system() == \"Windows\",\n    reason=(\n        \"Some windows tests with engine != ray use 2 cores, but that \"\n        + \"doesn't work with ray due to \"\n        + \"https://github.com/modin-project/modin/issues/7387\"\n    ),\n)\n\n# Some modin methods warn about defaulting to pandas at the API layer. That's\n# expected and not an error as it would be normally.\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n\ndef test_new_dataframe_uses_default_backend():\n    # We run this test with `Backend` set to just one value (instead of\n    # trying to look for every possible `Backend` value in the same pytest\n    # process) because switching to the MPI backend within a test process\n    # that's not set up to run MPI (i.e. because the test process has been\n    # started `mpiexec` instead of just `pytest`) would cause errors. We assume\n    # that CI runs this test file once with every possible `Backend`.\n    assert pd.DataFrame([1]).get_backend() == Backend.get()\n\n\n@pytest.mark.parametrize(\"setter_method\", [\"set_backend\", \"move_to\"])\n@pytest.mark.parametrize(\n    \"inplace_kwargs\",\n    [\n        pytest.param({\"inplace\": True}, id=\"inplace\"),\n        pytest.param({\"inplace\": False}, id=\"not_inplace\"),\n        pytest.param({}, id=\"no_inplace_kwargs\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"starting_backend, new_backend, expected_result_backend\",\n    [\n        pytest.param(Backend.get(), \"pandas\", \"Pandas\", id=\"current_to_pandas\"),\n        pytest.param(\"pandas\", Backend.get(), Backend.get(), id=\"pandas_to_current\"),\n        pytest.param(\n            Backend.get(), \"python_test\", \"Python_Test\", id=\"current_to_python\"\n        ),\n        pytest.param(\n            \"python_test\", Backend.get(), Backend.get(), id=\"python_to_current\"\n        ),\n        pytest.param(\"python_test\", \"pandas\", \"Pandas\", id=\"python_to_pandas1\"),\n        pytest.param(\"PYTHON_test\", \"PANDAS\", \"Pandas\", id=\"python_to_pandas2\"),\n        pytest.param(\"pandas\", \"python_test\", \"Python_Test\", id=\"pandas_to_python\"),\n        pytest.param(\"pandas\", \"pandas\", \"Pandas\", id=\"pandas_to_pandas\"),\n        pytest.param(\n            \"python_test\", \"python_test\", \"Python_Test\", id=\"python_to_python\"\n        ),\n        pytest.param(\n            \"ray\",\n            \"dask\",\n            \"Dask\",\n            id=\"ray_to_dask\",\n            marks=WINDOWS_RAY_SKIP_MARK,\n        ),\n        pytest.param(\n            \"dask\",\n            \"ray\",\n            \"Ray\",\n            id=\"dask_to_ray\",\n            marks=WINDOWS_RAY_SKIP_MARK,\n        ),\n        pytest.param(\n            \"ray\",\n            \"python_test\",\n            \"Python_Test\",\n            id=\"ray_to_python\",\n            marks=WINDOWS_RAY_SKIP_MARK,\n        ),\n        pytest.param(\"dask\", \"python_test\", \"Python_Test\", id=\"dask_to_python\"),\n        pytest.param(\n            \"python_test\",\n            \"ray\",\n            \"Ray\",\n            id=\"python_to_ray\",\n            marks=WINDOWS_RAY_SKIP_MARK,\n        ),\n        pytest.param(\"python_test\", \"dask\", \"Dask\", id=\"python_to_dask\"),\n        pytest.param(\"ray\", \"ray\", \"Ray\", id=\"ray_to_ray\", marks=WINDOWS_RAY_SKIP_MARK),\n        pytest.param(\"dask\", \"dask\", \"Dask\", id=\"dask_to_dask\"),\n    ],\n)\n@pytest.mark.parametrize(\n    \"data_class\",\n    [\n        pytest.param(pd.DataFrame, id=\"dataframe\"),\n        pytest.param(pd.Series, id=\"series\"),\n    ],\n)\ndef test_set_valid_backend(\n    setter_method,\n    inplace_kwargs,\n    starting_backend,\n    new_backend,\n    data_class,\n    expected_result_backend,\n):\n    progress_iter_count = 2\n    with patch.object(\n        tqdm.auto, \"trange\", return_value=range(progress_iter_count)\n    ) as mock_trange, config_context(Backend=starting_backend):\n        original_df = data_class([1])\n        # convert to pandas for comparison while still on the `starting_backend`.\n        original_df_as_pandas = original_df.modin.to_pandas()\n        method_result = getattr(original_df, setter_method)(\n            new_backend, **inplace_kwargs\n        )\n        if inplace_kwargs.get(\"inplace\", False):\n            assert method_result is None\n            result_df = original_df\n        else:\n            assert method_result is not None\n            result_df = method_result\n        assert result_df.get_backend() == expected_result_backend\n        df_equals(result_df, original_df_as_pandas)\n        # The global Backend should remain the same even if we change the\n        # backend for a single dataframe.\n        assert Backend.get() == Backend.normalize(starting_backend)\n        if Backend.normalize(starting_backend) == Backend.normalize(\n            expected_result_backend\n        ):\n            mock_trange.assert_not_called()\n        else:\n            # trange constructor is only called once and the iterator is consumed\n            # progress_iter_count times, but we can't easily assert on the number of iterations\n            mock_trange.assert_called_once()\n\n\ndef test_same_backend():\n    with patch.object(\n        tqdm.auto, \"trange\", return_value=range(2)\n    ) as mock_trange, config_context(Backend=\"Python_Test\"):\n        df = pd.DataFrame([1])\n        new_df = df.set_backend(\"Python_Test\")\n        mock_trange.assert_not_called()\n        assert new_df.get_backend() == \"Python_Test\"\n        new_df = df.set_backend(\"Python_Test\", inplace=True)\n        mock_trange.assert_not_called()\n        assert new_df is None\n        assert df.get_backend() == \"Python_Test\"\n\n\ndef test_set_nonexistent_backend():\n    backend_choice_string = \", \".join(f\"'{choice}'\" for choice in Backend.choices)\n    with pytest.raises(\n        ValueError,\n        match=re.escape(\n            \"Unknown backend 'does_not_exist'. \"\n            + f\"Available backends are: {backend_choice_string}\"\n        ),\n    ):\n        pd.DataFrame([1]).set_backend(\"does_not_exist\")\n\n\n@pytest.mark.parametrize(\"backend\", [None, 1, [], {}])\ndef test_wrong_backend_type(backend):\n    with pytest.raises(\n        TypeError,\n        match=re.escape(\n            \"Backend value should be a string, but instead it is \"\n            + f\"{repr(backend)} of type {type(backend)}\"\n        ),\n    ):\n        pd.DataFrame([1]).set_backend(backend)\n\n\ndef test_get_backend_docstrings():\n    dataframe_method = pd.DataFrame.get_backend\n    series_method = pd.Series.get_backend\n    assert dataframe_method.__doc__ != series_method.__doc__\n    assert dataframe_method.__doc__ == series_method.__doc__.replace(\n        \"Series\", \"DataFrame\"\n    )\n\n\n@pytest.mark.parametrize(\"setter_method\", [\"set_backend\", \"move_to\"])\ndef test_set_backend_docstrings(setter_method):\n    dataframe_method = getattr(pd.DataFrame, setter_method)\n    series_method = getattr(pd.Series, setter_method)\n    assert dataframe_method.__doc__ != series_method.__doc__\n    assert dataframe_method.__doc__ == series_method.__doc__.replace(\n        \"Series\", \"DataFrame\"\n    )\n\n\nclass TestGroupbySetBackend:\n    @pytest.mark.parametrize(\"setter_method\", [\"set_backend\", \"move_to\"])\n    @pytest.mark.parametrize(\n        \"inplace_kwargs\",\n        [\n            pytest.param({\"inplace\": True}, id=\"inplace\"),\n            pytest.param({\"inplace\": False}, id=\"not_inplace\"),\n            pytest.param({}, id=\"no_inplace_kwargs\"),\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"starting_backend, new_backend\",\n        [\n            pytest.param(Backend.get(), \"Pandas\", id=\"current_to_pandas\"),\n            pytest.param(\"Pandas\", Backend.get(), id=\"pandas_to_current\"),\n            pytest.param(Backend.get(), \"Python_Test\", id=\"current_to_python\"),\n            pytest.param(\"Python_Test\", Backend.get(), id=\"python_to_current\"),\n            pytest.param(\"Python_Test\", \"Pandas\", id=\"python_to_pandas\"),\n            pytest.param(\"Pandas\", \"Python_Test\", id=\"pandas_to_python\"),\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"by_level_factory\",\n        [\n            pytest.param(lambda df: (\"C\", None), id=\"by_string_column\"),\n            pytest.param(lambda df: ([\"C\", \"D\"], None), id=\"by_list_of_strings\"),\n            pytest.param(lambda df: (df[\"C\"], None), id=\"by_series\"),\n            pytest.param(lambda df: ([\"C\", df[\"D\"]], None), id=\"by_list_mixed\"),\n            pytest.param(lambda df: (pandas.Grouper(key=\"C\"), None), id=\"by_grouper\"),\n            pytest.param(lambda df: (None, 0), id=\"level_scalar\"),\n            pytest.param(lambda df: (None, [0, 1]), id=\"level_list\"),\n            pytest.param(\n                lambda df: ([\"C\", df[\"D\"]], None), id=\"by_mixed_string_series\"\n            ),\n        ],\n    )\n    def test_dataframe(\n        self,\n        setter_method,\n        inplace_kwargs,\n        starting_backend,\n        new_backend,\n        by_level_factory,\n    ):\n        \"\"\"Test set_backend functionality for DataFrame groupby objects with various 'by' and 'level' combinations.\"\"\"\n        with config_context(Backend=starting_backend):\n\n            def do_groupby(df):\n                by, level = by_level_factory(df)\n                return df.groupby(by=by, level=level)\n\n            inplace = inplace_kwargs.get(\"inplace\", False)\n            original_modin_df, original_pandas_df = create_test_dfs(\n                pandas.DataFrame(\n                    data={\n                        \"A\": [1, 2, 3, 4, 5, 6],\n                        \"B\": [10, 20, 30, 40, 50, 60],\n                        \"C\": [\"x\", \"y\", \"x\", \"y\", \"x\", \"y\"],\n                        \"D\": [\"p\", \"p\", \"q\", \"q\", \"r\", \"r\"],\n                    },\n                    index=pd.MultiIndex.from_tuples(\n                        [\n                            (\"foo\", 1),\n                            (\"foo\", 2),\n                            (\"bar\", 1),\n                            (\"bar\", 2),\n                            (\"baz\", 1),\n                            (\"baz\", 2),\n                        ],\n                        names=[\"first\", \"second\"],\n                    ),\n                )\n            )\n\n            # Create DataFrame groupby object\n            original_groupby = do_groupby(original_modin_df)\n\n            setter_result = getattr(original_groupby, setter_method)(\n                new_backend, **inplace_kwargs\n            )\n\n            if inplace:\n                assert setter_result is None\n                result_groupby = original_groupby\n                # Verify that the underlying DataFrame's backend was also changed\n                assert original_groupby._df.get_backend() == new_backend\n            else:\n                assert setter_result is not original_groupby\n                result_groupby = setter_result\n                # Verify original DataFrame's backend was not changed\n                assert original_groupby._df.get_backend() == starting_backend\n\n            # Verify backend was changed\n            assert result_groupby.get_backend() == new_backend\n\n            # Verify that groupby still works correctly after backend switch\n            # Create a fresh groupby for comparison to avoid mixed backend states\n            pandas_groupby_sum = do_groupby(original_pandas_df).sum()\n            df_equals(\n                result_groupby.sum(),\n                pandas_groupby_sum,\n            )\n            if not inplace:\n                df_equals(\n                    original_groupby.sum(),\n                    pandas_groupby_sum,\n                )\n\n    @pytest.mark.parametrize(\"setter_method\", [\"set_backend\", \"move_to\"])\n    @pytest.mark.parametrize(\n        \"inplace_kwargs\",\n        [\n            pytest.param({\"inplace\": True}, id=\"inplace\"),\n            pytest.param({\"inplace\": False}, id=\"not_inplace\"),\n            pytest.param({}, id=\"no_inplace_kwargs\"),\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"starting_backend, new_backend\",\n        [\n            pytest.param(Backend.get(), \"Pandas\", id=\"current_to_pandas\"),\n            pytest.param(\"Pandas\", Backend.get(), id=\"pandas_to_current\"),\n            pytest.param(Backend.get(), \"Python_Test\", id=\"current_to_python\"),\n            pytest.param(\"Python_Test\", Backend.get(), id=\"python_to_current\"),\n            pytest.param(\"Python_Test\", \"Pandas\", id=\"python_to_pandas\"),\n            pytest.param(\"Pandas\", \"Python_Test\", id=\"pandas_to_python\"),\n        ],\n    )\n    @pytest.mark.parametrize(\n        \"by_level_factory\",\n        [\n            pytest.param(lambda series: (None, 0), id=\"by_index_level_0\"),\n            pytest.param(\n                lambda series: (None, [0, 1]),\n                id=\"by_index_levels_list\",\n            ),\n            pytest.param(\n                lambda series: (pandas.Grouper(level=0), None),\n                id=\"by_grouper_level\",\n            ),\n            pytest.param(lambda series: (None, 0), id=\"level_scalar\"),\n            pytest.param(lambda series: (None, [0, 1]), id=\"level_list\"),\n            pytest.param(lambda series: (series, None), id=\"by_self\"),\n            pytest.param(lambda series: (series % 2, None), id=\"by_self_modulo_2\"),\n        ],\n    )\n    def test_series(\n        self,\n        setter_method,\n        inplace_kwargs,\n        starting_backend,\n        new_backend,\n        by_level_factory,\n    ):\n        \"\"\"Test set_backend functionality for Series groupby objects with various 'by' and 'level' combinations.\"\"\"\n        with config_context(Backend=starting_backend):\n            inplace = inplace_kwargs.get(\"inplace\", False)\n            # Create test data with MultiIndex to support level-based grouping\n            idx = pd.MultiIndex.from_tuples(\n                [\n                    (\"foo\", 1),\n                    (\"foo\", 2),\n                    (\"bar\", 1),\n                    (\"bar\", 2),\n                    (\"baz\", 1),\n                    (\"baz\", 2),\n                ],\n                names=[\"first\", \"second\"],\n            )\n            original_pandas_series = pandas.Series([1, 2, 1, 3, 4, 5], index=idx)\n            original_modin_series = pd.Series([1, 2, 1, 3, 4, 5], index=idx)\n\n            def do_groupby(series):\n                by, level = by_level_factory(series)\n                return series.groupby(by=by, level=level)\n\n            # Create Series groupby object\n            original_groupby = do_groupby(original_modin_series)\n\n            setter_result = getattr(original_groupby, setter_method)(\n                new_backend, **inplace_kwargs\n            )\n\n            if inplace:\n                assert setter_result is None\n                result_groupby = original_groupby\n                # Verify that the underlying Series's backend was also changed\n                assert original_groupby._df.get_backend() == new_backend\n            else:\n                assert setter_result is not original_groupby\n                result_groupby = setter_result\n                # Verify original Series's backend was not changed\n                assert original_groupby._df.get_backend() == starting_backend\n\n            assert result_groupby.get_backend() == new_backend\n\n            pandas_groupby_sum = do_groupby(original_pandas_series).sum()\n            df_equals(result_groupby.sum(), pandas_groupby_sum)\n            if not inplace:\n                df_equals(original_groupby.sum(), pandas_groupby_sum)\n\n\n# Tests for fallback progress printing when tqdm is not available\n@pytest.mark.parametrize(\n    \"switch_operation,expected_output\",\n    [\n        (\n            None,\n            \"Transfer: Python_... → Pandas      |                 ≃ (3, 1)    \",\n        ),\n        (\n            \"test_operation\",\n            \"Transfer: Python_... → Pandas      | test_operation  ≃ (3, 1)    \",\n        ),\n    ],\n)\n@patch(\"tqdm.auto.trange\", side_effect=ImportError(\"tqdm not available\"))\n@config_context(Backend=\"python_test\")\ndef test_fallback_progress_printing(\n    mock_trange, capsys, switch_operation, expected_output\n):\n    \"\"\"Test that fallback progress printing works when tqdm is not available and ShowBackendSwitchProgress is enabled.\"\"\"\n    df = pd.DataFrame([1, 2, 3])\n\n    df.set_backend(\"pandas\", switch_operation=switch_operation)\n\n    captured = capsys.readouterr()\n    assert expected_output in captured.err\n    assert captured.out == \"\"  # Nothing should go to stdout\n\n\n@config_context(Backend=\"python_test\")\ndef test_bigger_df_progress_message():\n    # Insiginificant digits in the size get truncated\n    df = pd.DataFrame([[1] * 144] * 121)\n    with patch.object(tqdm.auto, \"trange\", return_value=range(2)) as mock_trange:\n        df.set_backend(\"pandas\")\n        mock_trange.assert_called_once()\n        call_args = mock_trange.call_args\n        desc = call_args[1][\"desc\"]  # Get the 'desc' keyword argument\n        assert desc.startswith(\n            \"Transfer: Python_... → Pandas      |                 ≃ (1e+02, 1e+02)\"\n        )\n\n\n@patch(\"tqdm.auto.trange\", side_effect=ImportError(\"tqdm not available\"))\n@config_context(Backend=\"python_test\")\ndef test_fallback_progress_printing_silent_when_disabled(mock_trange, capsys):\n    \"\"\"Test that fallback progress printing is silent when ShowBackendSwitchProgress is disabled.\"\"\"\n\n    df = pd.DataFrame([1, 2, 3])\n\n    with config_context(ShowBackendSwitchProgress=False):\n        df.set_backend(\"pandas\")\n\n    captured = capsys.readouterr()\n    assert captured.out == \"\"\n    assert captured.err == \"\"\n\n\n@config_context(Backend=\"python_test\")\ndef test_tqdm_progress_bar_disabled_when_backend_switch_progress_false(capsys):\n    \"\"\"Test that tqdm progress bar doesn't appear when ShowBackendSwitchProgress is disabled.\"\"\"\n    df = pd.DataFrame([1, 2, 3])\n\n    with config_context(ShowBackendSwitchProgress=False), patch(\n        \"tqdm.auto.trange\"\n    ) as mock_trange:\n        df.set_backend(\"pandas\")\n\n    mock_trange.assert_not_called()\n    captured = capsys.readouterr()\n    assert captured.out == \"\"\n    assert captured.err == \"\"\n"
  },
  {
    "path": "modin/tests/pandas/test_concat.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions, StorageFormat\nfrom modin.pandas.io import from_pandas\nfrom modin.utils import get_current_execution\n\nfrom .utils import (\n    create_test_dfs,\n    default_to_pandas_ignore_string,\n    df_equals,\n    generate_dfs,\n    generate_multiindex_dfs,\n    generate_none_dfs,\n)\n\nNPartitions.put(4)\n\npytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n\n# Initialize env for storage format detection in @pytest.mark.*\npd.DataFrame()\n\n\ndef test_df_concat():\n    df, df2 = generate_dfs()\n\n    df_equals(pd.concat([df, df2]), pandas.concat([df, df2]))\n\n\ndef test_concat():\n    df, df2 = generate_dfs()\n    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)\n\n    df_equals(pd.concat([modin_df, modin_df2]), pandas.concat([df, df2]))\n\n\ndef test_concat_with_series():\n    df, df2 = generate_dfs()\n    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)\n    pandas_series = pandas.Series([1, 2, 3, 4], name=\"new_col\")\n\n    df_equals(\n        pd.concat([modin_df, modin_df2, pandas_series], axis=0),\n        pandas.concat([df, df2, pandas_series], axis=0),\n    )\n\n    df_equals(\n        pd.concat([modin_df, modin_df2, pandas_series], axis=1),\n        pandas.concat([df, df2, pandas_series], axis=1),\n    )\n\n\ndef test_concat_on_index():\n    df, df2 = generate_dfs()\n    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)\n\n    df_equals(\n        pd.concat([modin_df, modin_df2], axis=\"index\"),\n        pandas.concat([df, df2], axis=\"index\"),\n    )\n\n    df_equals(\n        pd.concat([modin_df, modin_df2], axis=\"rows\"),\n        pandas.concat([df, df2], axis=\"rows\"),\n    )\n\n    df_equals(\n        pd.concat([modin_df, modin_df2], axis=0), pandas.concat([df, df2], axis=0)\n    )\n\n\n@pytest.mark.parametrize(\"no_dup_cols\", [True, False])\n@pytest.mark.parametrize(\"different_len\", [True, False])\ndef test_concat_on_column(no_dup_cols, different_len):\n    df, df2 = generate_dfs()\n    if no_dup_cols:\n        df = df.drop(set(df.columns) & set(df2.columns), axis=\"columns\")\n    if different_len:\n        df = pandas.concat([df, df], ignore_index=True)\n\n    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)\n\n    df_equals(\n        pd.concat([modin_df, modin_df2], axis=1), pandas.concat([df, df2], axis=1)\n    )\n\n    df_equals(\n        pd.concat([modin_df, modin_df2], axis=\"columns\"),\n        pandas.concat([df, df2], axis=\"columns\"),\n    )\n\n    modin_result = pd.concat(\n        [pd.Series(np.ones(10)), pd.Series(np.ones(10))], axis=1, ignore_index=True\n    )\n    pandas_result = pandas.concat(\n        [pandas.Series(np.ones(10)), pandas.Series(np.ones(10))],\n        axis=1,\n        ignore_index=True,\n    )\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtypes.equals(pandas_result.dtypes)\n\n\ndef test_invalid_axis_errors():\n    df, df2 = generate_dfs()\n    modin_df, modin_df2 = from_pandas(df), from_pandas(df2)\n\n    with pytest.raises(ValueError):\n        pd.concat([modin_df, modin_df2], axis=2)\n\n\ndef test_mixed_concat():\n    df, df2 = generate_dfs()\n    df3 = df.copy()\n\n    mixed_dfs = [from_pandas(df), from_pandas(df2), df3]\n\n    df_equals(pd.concat(mixed_dfs), pandas.concat([df, df2, df3]))\n\n\ndef test_mixed_inner_concat():\n    df, df2 = generate_dfs()\n    df3 = df.copy()\n\n    mixed_dfs = [from_pandas(df), from_pandas(df2), df3]\n\n    df_equals(\n        pd.concat(mixed_dfs, join=\"inner\"),\n        pandas.concat([df, df2, df3], join=\"inner\"),\n        # https://github.com/modin-project/modin/issues/5963\n        check_dtypes=False,\n    )\n\n\ndef test_mixed_none_concat():\n    df, df2 = generate_none_dfs()\n    df3 = df.copy()\n\n    mixed_dfs = [from_pandas(df), from_pandas(df2), df3]\n\n    df_equals(pd.concat(mixed_dfs), pandas.concat([df, df2, df3]))\n\n\ndef test_ignore_index_concat():\n    df, df2 = generate_dfs()\n\n    df_equals(\n        pd.concat([df, df2], ignore_index=True),\n        pandas.concat([df, df2], ignore_index=True),\n    )\n\n\ndef test_concat_non_subscriptable_keys():\n    frame_data = np.random.randint(0, 100, size=(2**10, 2**6))\n    df = pd.DataFrame(frame_data).add_prefix(\"col\")\n    pdf = pandas.DataFrame(frame_data).add_prefix(\"col\")\n\n    modin_dict = {\"c\": df.copy(), \"b\": df.copy()}\n    pandas_dict = {\"c\": pdf.copy(), \"b\": pdf.copy()}\n    modin_result = pd.concat(modin_dict.values(), keys=modin_dict.keys())\n    pandas_result = pandas.concat(pandas_dict.values(), keys=pandas_dict.keys())\n    df_equals(modin_result, pandas_result)\n\n\ndef test_concat_series_only():\n    modin_series = pd.Series(list(range(1000)))\n    pandas_series = pandas.Series(list(range(1000)))\n\n    df_equals(\n        pd.concat([modin_series, modin_series]),\n        pandas.concat([pandas_series, pandas_series]),\n    )\n\n\ndef test_concat_5776():\n    modin_data = {key: pd.Series(index=range(3)) for key in [\"a\", \"b\"]}\n    pandas_data = {key: pandas.Series(index=range(3)) for key in [\"a\", \"b\"]}\n    df_equals(\n        pd.concat(modin_data, axis=\"columns\"),\n        pandas.concat(pandas_data, axis=\"columns\"),\n    )\n\n\ndef test_concat_6840():\n    groupby_objs = []\n    for idx, lib in enumerate((pd, pandas)):\n        df1 = lib.DataFrame(\n            [[\"a\", 1], [\"b\", 2], [\"b\", 4]], columns=[\"letter\", \"number\"]\n        )\n        df1_g = df1.groupby(\"letter\", as_index=False)[\"number\"].agg(\"sum\")\n\n        df2 = lib.DataFrame(\n            [[\"a\", 3], [\"a\", 4], [\"b\", 1]], columns=[\"letter\", \"number\"]\n        )\n        df2_g = df2.groupby(\"letter\", as_index=False)[\"number\"].agg(\"sum\")\n        groupby_objs.append([df1_g, df2_g])\n\n    df_equals(\n        pd.concat(groupby_objs[0]),\n        pandas.concat(groupby_objs[1]),\n    )\n\n\ndef test_concat_with_empty_frame():\n    modin_empty_df = pd.DataFrame()\n    pandas_empty_df = pandas.DataFrame()\n    modin_row = pd.Series({0: \"a\", 1: \"b\"})\n    pandas_row = pandas.Series({0: \"a\", 1: \"b\"})\n    df_equals(\n        pd.concat([modin_empty_df, modin_row]),\n        pandas.concat([pandas_empty_df, pandas_row]),\n    )\n\n    md_empty1, pd_empty1 = create_test_dfs(index=[1, 2, 3])\n    md_empty2, pd_empty2 = create_test_dfs(index=[2, 3, 4])\n\n    df_equals(\n        pd.concat([md_empty1, md_empty2], axis=0),\n        pandas.concat([pd_empty1, pd_empty2], axis=0),\n    )\n    df_equals(\n        pd.concat([md_empty1, md_empty2], axis=1),\n        pandas.concat([pd_empty1, pd_empty2], axis=1),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"names\", [False, True])\ndef test_concat_multiindex(axis, names):\n    pd_df1, pd_df2 = generate_multiindex_dfs(axis=axis)\n    md_df1, md_df2 = map(from_pandas, [pd_df1, pd_df2])\n\n    keys = [\"first\", \"second\"]\n    if names:\n        names = [str(i) for i in np.arange(pd_df1.axes[axis].nlevels + 1)]\n    else:\n        names = None\n\n    df_equals(\n        pd.concat([md_df1, md_df2], keys=keys, axis=axis, names=names),\n        pandas.concat([pd_df1, pd_df2], keys=keys, axis=axis, names=names),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_concat_dictionary(axis):\n    pandas_df, pandas_df2 = generate_dfs()\n    modin_df, modin_df2 = from_pandas(pandas_df), from_pandas(pandas_df2)\n\n    df_equals(\n        pd.concat({\"A\": modin_df, \"B\": modin_df2}, axis=axis),\n        pandas.concat({\"A\": pandas_df, \"B\": pandas_df2}, axis=axis),\n    )\n\n\n@pytest.mark.parametrize(\"sort\", [False, True])\n@pytest.mark.parametrize(\"join\", [\"inner\", \"outer\"])\n@pytest.mark.parametrize(\"axis\", [0, 1])\ndef test_sort_order(sort, join, axis):\n    pandas_df = pandas.DataFrame({\"c\": [3], \"d\": [4]}, columns=[\"d\", \"c\"])\n    pandas_df2 = pandas.DataFrame({\"a\": [1], \"b\": [2]}, columns=[\"b\", \"a\"])\n    modin_df, modin_df2 = from_pandas(pandas_df), from_pandas(pandas_df2)\n    pandas_concat = pandas.concat([pandas_df, pandas_df2], join=join, sort=sort)\n    modin_concat = pd.concat([modin_df, modin_df2], join=join, sort=sort)\n    df_equals(\n        pandas_concat,\n        modin_concat,\n        # https://github.com/modin-project/modin/issues/5963\n        check_dtypes=join != \"inner\",\n    )\n    assert list(pandas_concat.columns) == list(modin_concat.columns)\n\n\n@pytest.mark.parametrize(\n    \"data1, index1, data2, index2\",\n    [\n        (None, None, None, None),\n        (None, None, {\"A\": [1, 2, 3]}, pandas.Index([1, 2, 3], name=\"Test\")),\n        ({\"A\": [1, 2, 3]}, pandas.Index([1, 2, 3], name=\"Test\"), None, None),\n        ({\"A\": [1, 2, 3]}, None, None, None),\n        (None, None, {\"A\": [1, 2, 3]}, None),\n        (None, pandas.Index([1, 2, 3], name=\"Test\"), None, None),\n        (None, None, None, pandas.Index([1, 2, 3], name=\"Test\")),\n    ],\n)\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"join\", [\"inner\", \"outer\"])\ndef test_concat_empty(data1, index1, data2, index2, axis, join):\n    pdf1 = pandas.DataFrame(data1, index=index1)\n    pdf2 = pandas.DataFrame(data2, index=index2)\n    pdf = pandas.concat((pdf1, pdf2), axis=axis, join=join)\n    mdf1 = pd.DataFrame(data1, index=index1)\n    mdf2 = pd.DataFrame(data2, index=index2)\n    mdf = pd.concat((mdf1, mdf2), axis=axis, join=join)\n    df_equals(\n        pdf,\n        mdf,\n        # https://github.com/modin-project/modin/issues/5963\n        check_dtypes=join != \"inner\",\n    )\n\n\ndef test_concat_empty_df_series():\n    pdf = pandas.concat((pandas.DataFrame({\"A\": [1, 2, 3]}), pandas.Series()))\n    mdf = pd.concat((pd.DataFrame({\"A\": [1, 2, 3]}), pd.Series()))\n    df_equals(\n        pdf,\n        mdf,\n        # https://github.com/modin-project/modin/issues/5964\n        check_dtypes=False,\n    )\n    pdf = pandas.concat((pandas.DataFrame(), pandas.Series([1, 2, 3])))\n    mdf = pd.concat((pd.DataFrame(), pd.Series([1, 2, 3])))\n    df_equals(\n        pdf,\n        mdf,\n        # https://github.com/modin-project/modin/issues/5964\n        check_dtypes=False,\n    )\n\n\n@pytest.mark.skipif(\n    StorageFormat.get() != \"Base\",\n    reason=\"https://github.com/modin-project/modin/issues/5696\",\n)\n@pytest.mark.parametrize(\"col_type\", [None, \"str\"])\n@pytest.mark.parametrize(\"df1_cols\", [0, 90, 100])\n@pytest.mark.parametrize(\"df2_cols\", [0, 90, 100])\n@pytest.mark.parametrize(\"df1_rows\", [0, 100])\n@pytest.mark.parametrize(\"df2_rows\", [0, 100])\n@pytest.mark.parametrize(\"idx_type\", [None, \"str\"])\n@pytest.mark.parametrize(\"ignore_index\", [True, False])\n@pytest.mark.parametrize(\"sort\", [True, False])\n@pytest.mark.parametrize(\"join\", [\"inner\", \"outer\"])\ndef test_concat_different_num_cols(\n    col_type,\n    df1_cols,\n    df2_cols,\n    df1_rows,\n    df2_rows,\n    idx_type,\n    ignore_index,\n    sort,\n    join,\n):\n    def create_frame(frame_type, ncols, nrows):\n        def to_str(val):\n            return f\"str_{val}\"\n\n        off = 0\n        data = {}\n        for n in range(1, ncols + 1):\n            row = range(off + 1, off + nrows + 1)\n            if col_type == \"str\":\n                row = map(to_str, row)\n            data[f\"Col_{n}\"] = list(row)\n            off += nrows\n\n        idx = None\n        if idx_type == \"str\":\n            idx = pandas.Index(map(to_str, range(1, nrows + 1)), name=f\"Index_{nrows}\")\n        df = frame_type(data=data, index=idx)\n        return df\n\n    def concat(frame_type, lib):\n        df1 = create_frame(frame_type, df1_cols, df1_rows)\n        df2 = create_frame(frame_type, df2_cols, df2_rows)\n        return lib.concat([df1, df2], ignore_index=ignore_index, sort=sort, join=join)\n\n    mdf = concat(pd.DataFrame, pd)\n    pdf = concat(pandas.DataFrame, pandas)\n    df_equals(\n        pdf,\n        mdf,\n        # Empty slicing causes this bug:\n        # https://github.com/modin-project/modin/issues/5974\n        check_dtypes=not (\n            get_current_execution() == \"BaseOnPython\"\n            and any(o == 0 for o in (df1_cols, df2_cols, df1_rows, df2_rows))\n        ),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/test_expanding.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    warns_that_defaulting_to_pandas_if,\n)\n\nfrom .utils import (\n    create_test_dfs,\n    create_test_series,\n    df_equals,\n    eval_general,\n    test_data,\n    test_data_keys,\n    test_data_values,\n)\n\nNPartitions.put(4)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\n    \"method, kwargs\",\n    [\n        (\"count\", {}),\n        (\"sum\", {}),\n        (\"mean\", {}),\n        (\"median\", {}),\n        (\"skew\", {}),\n        (\"kurt\", {}),\n        (\"var\", {\"ddof\": 0}),\n        (\"std\", {\"ddof\": 0}),\n        (\"min\", {}),\n        (\"max\", {}),\n        (\"rank\", {}),\n        (\"sem\", {\"ddof\": 0}),\n        (\"quantile\", {\"q\": 0.1}),\n    ],\n)\ndef test_dataframe(data, min_periods, axis, method, kwargs):\n    eval_general(\n        *create_test_dfs(data),\n        lambda df: getattr(df.expanding(min_periods=min_periods, axis=axis), method)(\n            **kwargs\n        )\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"method\", [\"corr\", \"cov\"])\ndef test_dataframe_corr_cov(data, min_periods, axis, method):\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        eval_general(\n            *create_test_dfs(data),\n            lambda df: getattr(\n                df.expanding(min_periods=min_periods, axis=axis), method\n            )()\n        )\n\n\n@pytest.mark.parametrize(\"method\", [\"corr\", \"cov\"])\ndef test_dataframe_corr_cov_with_self(method):\n    mdf, pdf = create_test_dfs(test_data[\"float_nan_data\"])\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        eval_general(\n            mdf,\n            pdf,\n            lambda df, other: getattr(df.expanding(), method)(other=other),\n            other=pdf,\n            md_extra_kwargs={\"other\": mdf},\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\ndef test_dataframe_agg(data, min_periods):\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    pandas_expanded = pandas_df.expanding(\n        min_periods=min_periods,\n        axis=0,\n    )\n    modin_expanded = modin_df.expanding(\n        min_periods=min_periods,\n        axis=0,\n    )\n    # aggregates are only supported on axis 0\n    df_equals(modin_expanded.aggregate(np.sum), pandas_expanded.aggregate(np.sum))\n    df_equals(\n        pandas_expanded.aggregate([np.sum, np.mean]),\n        modin_expanded.aggregate([np.sum, np.mean]),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\n    \"method, kwargs\",\n    [\n        (\"count\", {}),\n        (\"sum\", {}),\n        (\"mean\", {}),\n        (\"median\", {}),\n        (\"skew\", {}),\n        (\"kurt\", {}),\n        (\"corr\", {}),\n        (\"cov\", {}),\n        (\"var\", {\"ddof\": 0}),\n        (\"std\", {\"ddof\": 0}),\n        (\"min\", {}),\n        (\"max\", {}),\n        (\"rank\", {}),\n        (\"sem\", {\"ddof\": 0}),\n        (\"quantile\", {\"q\": 0.1}),\n    ],\n)\ndef test_series(data, min_periods, method, kwargs):\n    eval_general(\n        *create_test_series(data),\n        lambda df: getattr(df.expanding(min_periods=min_periods), method)(**kwargs)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\ndef test_series_agg(data, min_periods):\n    modin_series, pandas_series = create_test_series(data)\n    pandas_expanded = pandas_series.expanding(min_periods=min_periods)\n    modin_expanded = modin_series.expanding(min_periods=min_periods)\n\n    df_equals(modin_expanded.aggregate(np.sum), pandas_expanded.aggregate(np.sum))\n    df_equals(\n        pandas_expanded.aggregate([np.sum, np.mean]),\n        modin_expanded.aggregate([np.sum, np.mean]),\n    )\n\n\n@pytest.mark.parametrize(\"method\", [\"corr\", \"cov\"])\ndef test_series_corr_cov_with_self(method):\n    mdf, pdf = create_test_series(test_data[\"float_nan_data\"])\n    eval_general(\n        mdf,\n        pdf,\n        lambda df, other: getattr(df.expanding(), method)(other=other),\n        other=pdf,\n        md_extra_kwargs={\"other\": mdf},\n    )\n"
  },
  {
    "path": "modin/tests/pandas/test_general.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pytest\nfrom numpy.testing import assert_array_equal\n\nimport modin.pandas as pd\nfrom modin.pandas.io import to_pandas\nfrom modin.pandas.testing import assert_frame_equal\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import get_current_execution\n\nfrom .utils import (\n    bool_arg_keys,\n    bool_arg_values,\n    create_test_dfs,\n    df_equals,\n    eval_general,\n    is_native_shallow_copy,\n    sort_if_range_partitioning,\n    sort_index_for_equal_values,\n    test_data_keys,\n    test_data_values,\n)\n\npytestmark = pytest.mark.filterwarnings(\n    \"default:`DataFrame.insert` for empty DataFrame is not currently supported.*:UserWarning\"\n)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"append_na\", [True, False])\n@pytest.mark.parametrize(\"op\", [\"isna\", \"isnull\", \"notna\", \"notnull\"])\ndef test_isna_isnull_notna_notnull(data, append_na, op):\n    pandas_df = pandas.DataFrame(data)\n    modin_df = pd.DataFrame(pandas_df)\n    if append_na:\n        pandas_df[\"NONE_COL\"] = None\n        pandas_df[\"NAN_COL\"] = np.nan\n        modin_df[\"NONE_COL\"] = None\n        modin_df[\"NAN_COL\"] = np.nan\n\n    pandas_result = getattr(pandas, op)(pandas_df)\n    modin_result = getattr(pd, op)(modin_df)\n    df_equals(modin_result, pandas_result)\n\n    modin_result = getattr(pd, op)(pd.Series([1, np.nan, 2]))\n    pandas_result = getattr(pandas, op)(pandas.Series([1, np.nan, 2]))\n    df_equals(modin_result, pandas_result)\n\n    assert pd.isna(np.nan) == pandas.isna(np.nan)\n\n\ndef test_merge():\n    frame_data = {\n        \"col1\": [0, 1, 2, 3],\n        \"col2\": [4, 5, 6, 7],\n        \"col3\": [8, 9, 0, 1],\n        \"col4\": [2, 4, 5, 6],\n    }\n\n    modin_df = pd.DataFrame(frame_data)\n    pandas_df = pandas.DataFrame(frame_data)\n\n    frame_data2 = {\"col1\": [0, 1, 2], \"col2\": [1, 5, 6]}\n    modin_df2 = pd.DataFrame(frame_data2)\n    pandas_df2 = pandas.DataFrame(frame_data2)\n\n    join_types = [\"outer\", \"inner\"]\n    for how in join_types:\n        with warns_that_defaulting_to_pandas_if(\n            how == \"outer\" and not df_or_series_using_native_execution(modin_df)\n        ):\n            modin_result = pd.merge(modin_df, modin_df2, how=how)\n        pandas_result = pandas.merge(pandas_df, pandas_df2, how=how)\n        df_equals(modin_result, pandas_result)\n\n        # left_on and right_index\n        with warns_that_defaulting_to_pandas_if(\n            not df_or_series_using_native_execution(modin_df)\n        ):\n            modin_result = pd.merge(\n                modin_df, modin_df2, how=how, left_on=\"col1\", right_index=True\n            )\n        pandas_result = pandas.merge(\n            pandas_df, pandas_df2, how=how, left_on=\"col1\", right_index=True\n        )\n        df_equals(modin_result, pandas_result)\n\n        # left_index and right_on\n        with warns_that_defaulting_to_pandas_if(\n            not df_or_series_using_native_execution(modin_df)\n        ):\n            modin_result = pd.merge(\n                modin_df, modin_df2, how=how, left_index=True, right_on=\"col1\"\n            )\n        pandas_result = pandas.merge(\n            pandas_df, pandas_df2, how=how, left_index=True, right_on=\"col1\"\n        )\n        df_equals(modin_result, pandas_result)\n\n        # left_on and right_on col1\n        with warns_that_defaulting_to_pandas_if(\n            how == \"outer\" and not df_or_series_using_native_execution(modin_df)\n        ):\n            modin_result = pd.merge(\n                modin_df, modin_df2, how=how, left_on=\"col1\", right_on=\"col1\"\n            )\n        pandas_result = pandas.merge(\n            pandas_df, pandas_df2, how=how, left_on=\"col1\", right_on=\"col1\"\n        )\n        df_equals(modin_result, pandas_result)\n\n        # left_on and right_on col2\n        with warns_that_defaulting_to_pandas_if(\n            how == \"outer\" and not df_or_series_using_native_execution(modin_df)\n        ):\n            modin_result = pd.merge(\n                modin_df, modin_df2, how=how, left_on=\"col2\", right_on=\"col2\"\n            )\n        pandas_result = pandas.merge(\n            pandas_df, pandas_df2, how=how, left_on=\"col2\", right_on=\"col2\"\n        )\n        df_equals(modin_result, pandas_result)\n\n        # left_index and right_index\n        modin_result = pd.merge(\n            modin_df, modin_df2, how=how, left_index=True, right_index=True\n        )\n        pandas_result = pandas.merge(\n            pandas_df, pandas_df2, how=how, left_index=True, right_index=True\n        )\n        df_equals(modin_result, pandas_result)\n\n    s = pd.Series(frame_data.get(\"col1\"))\n    with pytest.raises(ValueError):\n        pd.merge(s, modin_df2)\n\n    with pytest.raises(TypeError):\n        pd.merge(\"Non-valid type\", modin_df2)\n\n\ndef test_merge_ordered():\n    data_a = {\n        \"key\": list(\"aceace\"),\n        \"lvalue\": [1, 2, 3, 1, 2, 3],\n        \"group\": list(\"aaabbb\"),\n    }\n    data_b = {\"key\": list(\"bcd\"), \"rvalue\": [1, 2, 3]}\n\n    modin_df_a = pd.DataFrame(data_a)\n    modin_df_b = pd.DataFrame(data_b)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.merge_ordered(\n            modin_df_a, modin_df_b, fill_method=\"ffill\", left_by=\"group\"\n        )\n        assert isinstance(df, pd.DataFrame)\n\n    with pytest.raises(TypeError):\n        pd.merge_ordered(data_a, data_b, fill_method=\"ffill\", left_by=\"group\")\n\n\n@pytest.mark.parametrize(\"right_index\", [None, [0] * 5], ids=[\"default\", \"non_unique\"])\ndef test_merge_asof(right_index):\n    left = pd.DataFrame({\"a\": [1, 5, 10], \"left_val\": [\"a\", \"b\", \"c\"]})\n    right = pd.DataFrame(\n        {\"a\": [1, 2, 3, 6, 7], \"right_val\": [1, 2, 3, 6, 7]}, index=right_index\n    )\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.merge_asof(left, right, on=\"a\")\n        assert isinstance(df, pd.DataFrame)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.merge_asof(left, right, on=\"a\", allow_exact_matches=False)\n        assert isinstance(df, pd.DataFrame)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.merge_asof(left, right, on=\"a\", direction=\"forward\")\n        assert isinstance(df, pd.DataFrame)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.merge_asof(left, right, on=\"a\", direction=\"nearest\")\n        assert isinstance(df, pd.DataFrame)\n\n    left = pd.DataFrame({\"left_val\": [\"a\", \"b\", \"c\"]}, index=[1, 5, 10])\n    right = pd.DataFrame({\"right_val\": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7])\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.merge_asof(left, right, left_index=True, right_index=True)\n        assert isinstance(df, pd.DataFrame)\n\n    with pytest.raises(ValueError):\n        pd.merge_asof(\n            {\"left_val\": [\"a\", \"b\", \"c\"]},\n            {\"right_val\": [1, 2, 3, 6, 7]},\n            left_index=True,\n            right_index=True,\n        )\n\n\ndef test_merge_asof_on_variations():\n    \"\"\"on=,left_on=,right_on=,right_index=,left_index= options match Pandas.\"\"\"\n    left = {\"a\": [1, 5, 10], \"left_val\": [\"a\", \"b\", \"c\"]}\n    left_index = [6, 8, 12]\n    right = {\"a\": [1, 2, 3, 6, 7], \"right_val\": [\"d\", \"e\", \"f\", \"g\", \"h\"]}\n    right_index = [6, 7, 8, 9, 15]\n    pandas_left, pandas_right = (\n        pandas.DataFrame(left, index=left_index),\n        pandas.DataFrame(right, index=right_index),\n    )\n    modin_left, modin_right = (\n        pd.DataFrame(left, index=left_index),\n        pd.DataFrame(right, index=right_index),\n    )\n    for on_arguments in [\n        {\"on\": \"a\"},\n        {\"left_on\": \"a\", \"right_on\": \"a\"},\n        {\"left_on\": \"a\", \"right_index\": True},\n        {\"left_index\": True, \"right_on\": \"a\"},\n        {\"left_index\": True, \"right_index\": True},\n    ]:\n        pandas_merged = pandas.merge_asof(pandas_left, pandas_right, **on_arguments)\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            modin_merged = pd.merge_asof(modin_left, modin_right, **on_arguments)\n        df_equals(pandas_merged, modin_merged)\n\n\ndef test_merge_asof_suffixes():\n    \"\"\"Suffix variations are handled the same as Pandas.\"\"\"\n    left = {\"a\": [1, 5, 10]}\n    right = {\"a\": [2, 3, 6]}\n    pandas_left, pandas_right = (pandas.DataFrame(left), pandas.DataFrame(right))\n    modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right)\n    for suffixes in [(\"a\", \"b\"), (False, \"c\"), (\"d\", False)]:\n        pandas_merged = pandas.merge_asof(\n            pandas_left,\n            pandas_right,\n            left_index=True,\n            right_index=True,\n            suffixes=suffixes,\n        )\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            modin_merged = pd.merge_asof(\n                modin_left,\n                modin_right,\n                left_index=True,\n                right_index=True,\n                suffixes=suffixes,\n            )\n        df_equals(pandas_merged, modin_merged)\n\n    with pytest.raises(ValueError):\n        pandas.merge_asof(\n            pandas_left,\n            pandas_right,\n            left_index=True,\n            right_index=True,\n            suffixes=(False, False),\n        )\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(\n            modin_left,\n            modin_right,\n            left_index=True,\n            right_index=True,\n            suffixes=(False, False),\n        )\n\n\ndef test_merge_asof_bad_arguments():\n    left = {\"a\": [1, 5, 10], \"b\": [5, 7, 9]}\n    right = {\"a\": [2, 3, 6], \"b\": [6, 5, 20]}\n    pandas_left, pandas_right = (pandas.DataFrame(left), pandas.DataFrame(right))\n    modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right)\n\n    # Can't mix by with left_by/right_by\n    with pytest.raises(ValueError):\n        pandas.merge_asof(\n            pandas_left, pandas_right, on=\"a\", by=\"b\", left_by=\"can't do with by\"\n        )\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(\n            modin_left, modin_right, on=\"a\", by=\"b\", left_by=\"can't do with by\"\n        )\n    with pytest.raises(ValueError):\n        pandas.merge_asof(\n            pandas_left, pandas_right, by=\"b\", on=\"a\", right_by=\"can't do with by\"\n        )\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(\n            modin_left, modin_right, by=\"b\", on=\"a\", right_by=\"can't do with by\"\n        )\n\n    # Can't mix on with left_on/right_on\n    with pytest.raises(ValueError):\n        pandas.merge_asof(pandas_left, pandas_right, on=\"a\", left_on=\"can't do with by\")\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(modin_left, modin_right, on=\"a\", left_on=\"can't do with by\")\n    with pytest.raises(ValueError):\n        pandas.merge_asof(\n            pandas_left, pandas_right, on=\"a\", right_on=\"can't do with by\"\n        )\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(modin_left, modin_right, on=\"a\", right_on=\"can't do with by\")\n\n    # Can't mix left_index with left_on or on, similarly for right.\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(modin_left, modin_right, on=\"a\", right_index=True)\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(\n            modin_left, modin_right, left_on=\"a\", right_on=\"a\", right_index=True\n        )\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(modin_left, modin_right, on=\"a\", left_index=True)\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(\n            modin_left, modin_right, left_on=\"a\", right_on=\"a\", left_index=True\n        )\n\n    # Need both left and right\n    with pytest.raises(Exception):  # Pandas bug, didn't validate inputs sufficiently\n        pandas.merge_asof(pandas_left, pandas_right, left_on=\"a\")\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(modin_left, modin_right, left_on=\"a\")\n    with pytest.raises(Exception):  # Pandas bug, didn't validate inputs sufficiently\n        pandas.merge_asof(pandas_left, pandas_right, right_on=\"a\")\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(modin_left, modin_right, right_on=\"a\")\n    with pytest.raises(ValueError):\n        pandas.merge_asof(pandas_left, pandas_right)\n    with pytest.raises(ValueError), warns_that_defaulting_to_pandas_if(\n        not current_execution_is_native()\n    ):\n        pd.merge_asof(modin_left, modin_right)\n\n\ndef test_merge_asof_merge_options():\n    modin_quotes = pd.DataFrame(\n        {\n            \"time\": [\n                pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.030\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.041\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.049\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.072\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.075\"),\n            ],\n            \"ticker\": [\"GOOG\", \"MSFT\", \"MSFT\", \"MSFT\", \"GOOG\", \"AAPL\", \"GOOG\", \"MSFT\"],\n            \"bid\": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],\n            \"ask\": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],\n        }\n    )\n    modin_trades = pd.DataFrame(\n        {\n            \"time\": [\n                pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.038\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n                pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n            ],\n            \"ticker2\": [\"MSFT\", \"MSFT\", \"GOOG\", \"GOOG\", \"AAPL\"],\n            \"price\": [51.95, 51.95, 720.77, 720.92, 98.0],\n            \"quantity\": [75, 155, 100, 100, 100],\n        }\n    )\n    pandas_quotes, pandas_trades = to_pandas(modin_quotes), to_pandas(modin_trades)\n\n    # left_by + right_by\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = pd.merge_asof(\n            modin_quotes,\n            modin_trades,\n            on=\"time\",\n            left_by=\"ticker\",\n            right_by=\"ticker2\",\n        )\n    df_equals(\n        pandas.merge_asof(\n            pandas_quotes,\n            pandas_trades,\n            on=\"time\",\n            left_by=\"ticker\",\n            right_by=\"ticker2\",\n        ),\n        modin_result,\n    )\n\n    # Just by:\n    pandas_trades[\"ticker\"] = pandas_trades[\"ticker2\"]\n    modin_trades[\"ticker\"] = modin_trades[\"ticker2\"]\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = pd.merge_asof(\n            modin_quotes,\n            modin_trades,\n            on=\"time\",\n            by=\"ticker\",\n        )\n    df_equals(\n        pandas.merge_asof(\n            pandas_quotes,\n            pandas_trades,\n            on=\"time\",\n            by=\"ticker\",\n        ),\n        modin_result,\n    )\n\n    # Tolerance\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = pd.merge_asof(\n            modin_quotes,\n            modin_trades,\n            on=\"time\",\n            by=\"ticker\",\n            tolerance=pd.Timedelta(\"2ms\"),\n        )\n    df_equals(\n        pandas.merge_asof(\n            pandas_quotes,\n            pandas_trades,\n            on=\"time\",\n            by=\"ticker\",\n            tolerance=pd.Timedelta(\"2ms\"),\n        ),\n        modin_result,\n    )\n\n    # Direction\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = pd.merge_asof(\n            modin_quotes,\n            modin_trades,\n            on=\"time\",\n            by=\"ticker\",\n            direction=\"forward\",\n        )\n    df_equals(\n        pandas.merge_asof(\n            pandas_quotes,\n            pandas_trades,\n            on=\"time\",\n            by=\"ticker\",\n            direction=\"forward\",\n        ),\n        modin_result,\n    )\n\n    # Allow exact matches\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = pd.merge_asof(\n            modin_quotes,\n            modin_trades,\n            on=\"time\",\n            by=\"ticker\",\n            tolerance=pd.Timedelta(\"10ms\"),\n            allow_exact_matches=False,\n        )\n    df_equals(\n        pandas.merge_asof(\n            pandas_quotes,\n            pandas_trades,\n            on=\"time\",\n            by=\"ticker\",\n            tolerance=pd.Timedelta(\"10ms\"),\n            allow_exact_matches=False,\n        ),\n        modin_result,\n    )\n\n\ndef test_pivot():\n    test_df = pd.DataFrame(\n        {\n            \"foo\": [\"one\", \"one\", \"one\", \"two\", \"two\", \"two\"],\n            \"bar\": [\"A\", \"B\", \"C\", \"A\", \"B\", \"C\"],\n            \"baz\": [1, 2, 3, 4, 5, 6],\n            \"zoo\": [\"x\", \"y\", \"z\", \"q\", \"w\", \"t\"],\n        }\n    )\n\n    df = pd.pivot(test_df, index=\"foo\", columns=\"bar\", values=\"baz\")\n    assert isinstance(df, pd.DataFrame)\n\n    with pytest.raises(ValueError):\n        pd.pivot(test_df[\"bar\"], index=\"foo\", columns=\"bar\", values=\"baz\")\n\n    if not (get_current_execution() == \"BaseOnPython\" or current_execution_is_native()):\n        # FIXME: Failed for some reason on 'BaseOnPython' and 'NativeOnNative'\n        # https://github.com/modin-project/modin/issues/6240\n        df_equals(\n            pd.pivot(test_df, columns=\"bar\"),\n            pandas.pivot(test_df._to_pandas(), columns=\"bar\"),\n        )\n\n        df_equals(\n            pd.pivot(test_df, index=\"foo\", columns=\"bar\"),\n            pandas.pivot(test_df._to_pandas(), index=\"foo\", columns=\"bar\"),\n        )\n\n\ndef test_pivot_values_is_none():\n    test_df = pd.DataFrame(\n        {\n            \"foo\": [\"one\", \"one\", \"one\", \"two\", \"two\", \"two\"],\n            \"bar\": [\"A\", \"B\", \"C\", \"A\", \"B\", \"C\"],\n            \"baz\": [1, 2, 3, 4, 5, 6],\n            \"zoo\": [\"x\", \"y\", \"z\", \"q\", \"w\", \"t\"],\n        }\n    )\n    df = pd.pivot(test_df, index=\"foo\", columns=\"bar\")\n    assert isinstance(df, pd.DataFrame)\n\n\ndef test_pivot_table():\n    test_df = pd.DataFrame(\n        {\n            \"A\": [\"foo\", \"foo\", \"foo\", \"foo\", \"foo\", \"bar\", \"bar\", \"bar\", \"bar\"],\n            \"B\": [\"one\", \"one\", \"one\", \"two\", \"two\", \"one\", \"one\", \"two\", \"two\"],\n            \"C\": [\n                \"small\",\n                \"large\",\n                \"large\",\n                \"small\",\n                \"small\",\n                \"large\",\n                \"small\",\n                \"small\",\n                \"large\",\n            ],\n            \"D\": [1, 2, 2, 3, 3, 4, 5, 6, 7],\n            \"E\": [2, 4, 5, 5, 6, 6, 8, 9, 9],\n        }\n    )\n\n    df = pd.pivot_table(\n        test_df, values=\"D\", index=[\"A\", \"B\"], columns=[\"C\"], aggfunc=np.sum\n    )\n    assert isinstance(df, pd.DataFrame)\n\n    with pytest.raises(ValueError):\n        pd.pivot_table(\n            test_df[\"C\"], values=\"D\", index=[\"A\", \"B\"], columns=[\"C\"], aggfunc=np.sum\n        )\n\n\ndef test_unique():\n    comparator = lambda *args: sort_if_range_partitioning(  # noqa: E731\n        *args, comparator=assert_array_equal\n    )\n\n    modin_result = pd.unique([2, 1, 3, 3])\n    pandas_result = pandas.unique([2, 1, 3, 3])\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n\n    modin_result = pd.unique(pd.Series([2] + [1] * 5))\n    pandas_result = pandas.unique(pandas.Series([2] + [1] * 5))\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n\n    modin_result = pd.unique(\n        pd.Series([pd.Timestamp(\"20160101\"), pd.Timestamp(\"20160101\")])\n    )\n    pandas_result = pandas.unique(\n        pandas.Series([pandas.Timestamp(\"20160101\"), pandas.Timestamp(\"20160101\")])\n    )\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n\n    modin_result = pd.unique(\n        pd.Series(\n            [\n                pd.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n                pd.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n            ]\n        )\n    )\n    pandas_result = pandas.unique(\n        pandas.Series(\n            [\n                pandas.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n                pandas.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n            ]\n        )\n    )\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n\n    modin_result = pd.unique(\n        pd.Index(\n            [\n                pd.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n                pd.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n            ]\n        )\n    )\n    pandas_result = pandas.unique(\n        pandas.Index(\n            [\n                pandas.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n                pandas.Timestamp(\"20160101\", tz=\"US/Eastern\"),\n            ]\n        )\n    )\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n\n    modin_result = pd.unique(pd.Series(pd.Categorical(list(\"baabc\"))))\n    pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list(\"baabc\"))))\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n\n\n@pytest.mark.parametrize(\"normalize, bins, dropna\", [(True, 3, False)])\ndef test_value_counts(normalize, bins, dropna):\n    # We sort indices for Modin and pandas result because of issue #1650\n    values = np.array([3, 1, 2, 3, 4, np.nan])\n    modin_result = sort_index_for_equal_values(\n        pd.value_counts(values, normalize=normalize, ascending=False), False\n    )\n    pandas_result = sort_index_for_equal_values(\n        pandas.value_counts(values, normalize=normalize, ascending=False), False\n    )\n    df_equals(modin_result, pandas_result)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = sort_index_for_equal_values(\n            pd.value_counts(values, bins=bins, ascending=False), False\n        )\n    pandas_result = sort_index_for_equal_values(\n        pandas.value_counts(values, bins=bins, ascending=False), False\n    )\n    df_equals(modin_result, pandas_result)\n\n    modin_result = sort_index_for_equal_values(\n        pd.value_counts(values, dropna=dropna, ascending=True), True\n    )\n    pandas_result = sort_index_for_equal_values(\n        pandas.value_counts(values, dropna=dropna, ascending=True), True\n    )\n    df_equals(modin_result, pandas_result)\n\n\ndef test_to_datetime():\n    # DataFrame input for to_datetime\n    modin_df = pd.DataFrame({\"year\": [2015, 2016], \"month\": [2, 3], \"day\": [4, 5]})\n    pandas_df = pandas.DataFrame({\"year\": [2015, 2016], \"month\": [2, 3], \"day\": [4, 5]})\n    df_equals(pd.to_datetime(modin_df), pandas.to_datetime(pandas_df))\n\n    # Series input for to_datetime\n    modin_s = pd.Series([\"3/11/2000\", \"3/12/2000\", \"3/13/2000\"] * 1000)\n    pandas_s = pandas.Series([\"3/11/2000\", \"3/12/2000\", \"3/13/2000\"] * 1000)\n    df_equals(pd.to_datetime(modin_s), pandas.to_datetime(pandas_s))\n\n    # Other inputs for to_datetime\n    value = 1490195805\n    assert pd.to_datetime(value, unit=\"s\") == pandas.to_datetime(value, unit=\"s\")\n    value = 1490195805433502912\n    assert pd.to_datetime(value, unit=\"ns\") == pandas.to_datetime(value, unit=\"ns\")\n    value = [1, 2, 3]\n    assert pd.to_datetime(value, unit=\"D\", origin=pd.Timestamp(\"2000-01-01\")).equals(\n        pandas.to_datetime(value, unit=\"D\", origin=pandas.Timestamp(\"2000-01-01\"))\n    )\n\n\ndef test_to_datetime_inplace_side_effect():\n    # See GH#3063\n    times = list(range(1617993360, 1618193360))\n    values = list(range(215441, 415441))\n    modin_df = pd.DataFrame({\"time\": times, \"value\": values})\n    pandas_df = pandas.DataFrame({\"time\": times, \"value\": values})\n    df_equals(\n        pd.to_datetime(modin_df[\"time\"], unit=\"s\"),\n        pandas.to_datetime(pandas_df[\"time\"], unit=\"s\"),\n    )\n\n\n@pytest.mark.parametrize(\n    \"data, errors, downcast\",\n    [\n        ([\"1.0\", \"2\", -3], \"raise\", None),\n        ([\"1.0\", \"2\", -3], \"raise\", \"float\"),\n        ([\"1.0\", \"2\", -3], \"raise\", \"signed\"),\n        ([\"apple\", \"1.0\", \"2\", -3], \"ignore\", None),\n        ([\"apple\", \"1.0\", \"2\", -3], \"coerce\", None),\n    ],\n)\ndef test_to_numeric(data, errors, downcast):\n    modin_series = pd.Series(data)\n    pandas_series = pandas.Series(data)\n    modin_result = pd.to_numeric(modin_series, errors=errors, downcast=downcast)\n    pandas_result = pandas.to_numeric(pandas_series, errors=errors, downcast=downcast)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"retbins\", bool_arg_values, ids=bool_arg_keys)\ndef test_qcut(retbins):\n    # test case from https://github.com/modin-project/modin/issues/5610\n    pandas_series = pandas.Series(range(10))\n    modin_series = pd.Series(range(10))\n    pandas_result = pandas.qcut(pandas_series, 4, retbins=retbins)\n    # NOTE that qcut() defaults to pandas at the API layer.\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = pd.qcut(modin_series, 4, retbins=retbins)\n    if retbins:\n        df_equals(modin_result[0], pandas_result[0])\n        df_equals(modin_result[0].cat.categories, pandas_result[0].cat.categories)\n        assert_array_equal(modin_result[1], pandas_result[1])\n    else:\n        df_equals(modin_result, pandas_result)\n        df_equals(modin_result.cat.categories, pandas_result.cat.categories)\n\n    # test case for fallback to pandas, taken from pandas docs\n    pandas_result = pandas.qcut(range(5), 4)\n    modin_result = pd.qcut(range(5), 4)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"bins, labels\",\n    [\n        pytest.param(\n            [-int(1e18), -1000, 0, 1000, 2000, int(1e18)],\n            [\n                \"-inf_to_-1000\",\n                \"-1000_to_0\",\n                \"0_to_1000\",\n                \"1000_to_2000\",\n                \"2000_to_inf\",\n            ],\n            id=\"bin_list_spanning_entire_range_with_custom_labels\",\n        ),\n        pytest.param(\n            [-int(1e18), -1000, 0, 1000, 2000, int(1e18)],\n            None,\n            id=\"bin_list_spanning_entire_range_with_default_labels\",\n        ),\n        pytest.param(\n            [-1000, 0, 1000, 2000], None, id=\"bin_list_not_spanning_entire_range\"\n        ),\n        pytest.param(\n            10,\n            [f\"custom_label{i}\" for i in range(9)],\n            id=\"int_bin_10_with_custom_labels\",\n        ),\n        pytest.param(1, None, id=\"int_bin_1_with_default_labels\"),\n        pytest.param(-1, None, id=\"int_bin_-1_with_default_labels\"),\n        pytest.param(111, None, id=\"int_bin_111_with_default_labels\"),\n    ],\n)\n@pytest.mark.parametrize(\"retbins\", bool_arg_values, ids=bool_arg_keys)\ndef test_cut(retbins, bins, labels):\n    # Would use `eval_general` here, but `eval_general` expects the operation\n    # to be supported by Modin, and so errors out when we give the defaulting\n    # to pandas UserWarning. We could get around this by using\n    # @pytest.mark.filterwarnings(\"ignore\"), but then `eval_general` fails because\n    # sometimes the return type of pd.cut is an np.ndarray, and `eval_general` does\n    # not know how to handle that.\n    try:\n        pd_result = pandas.cut(\n            pandas.Series(range(1000)), retbins=retbins, bins=bins, labels=labels\n        )\n    except Exception as pd_e:\n        with pytest.raises(Exception) as md_e:\n            with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n                md_result = pd.cut(\n                    pd.Series(range(1000)), retbins=retbins, bins=bins, labels=labels\n                )\n        assert isinstance(\n            md_e.value, type(pd_e)\n        ), f\"Got Modin Exception type {type(md_e.value)}, but pandas Exception type {type(pd_e)} was expected\"\n    else:\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            md_result = pd.cut(\n                pd.Series(range(1000)), retbins=retbins, bins=bins, labels=labels\n            )\n        if not isinstance(pd_result, tuple):\n            df_equals(md_result, pd_result)\n        else:\n            assert isinstance(\n                md_result, tuple\n            ), \"Modin returned single value, but pandas returned tuple of values\"\n            for pd_res, md_res in zip(pd_result, md_result):\n                if isinstance(pd_res, pandas.Series):\n                    df_equals(pd_res, md_res)\n                else:\n                    np.testing.assert_array_equal(pd_res, md_res)\n\n\ndef test_cut_fallback():\n    # Test case for falling back to pandas for cut.\n    pandas_result = pandas.cut(range(5), 4)\n    # note that we default to pandas at the API layer here, so we warn\n    # regardless of whether we are on native execution.\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        modin_result = pd.cut(range(5), 4)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", [test_data_values[0], []], ids=[\"test_data_values[0]\", \"[]\"]\n)\ndef test_to_pandas_indices(data):\n    md_df = pd.DataFrame(data)\n    index = pandas.MultiIndex.from_tuples(\n        [(i, i * 2) for i in np.arange(len(md_df) + 1)], names=[\"A\", \"B\"]\n    ).drop(0)\n    columns = pandas.MultiIndex.from_tuples(\n        [(i, i * 2) for i in np.arange(len(md_df.columns) + 1)], names=[\"A\", \"B\"]\n    ).drop(0)\n\n    md_df.index = index\n    md_df.columns = columns\n\n    pd_df = md_df._to_pandas()\n\n    for axis in [0, 1]:\n        assert md_df.axes[axis].equals(\n            pd_df.axes[axis]\n        ), f\"Indices at axis {axis} are different!\"\n        assert not hasattr(md_df.axes[axis], \"equal_levels\") or md_df.axes[\n            axis\n        ].equal_levels(\n            pd_df.axes[axis]\n        ), f\"Levels of indices at axis {axis} are different!\"\n\n\ndef test_to_pandas_read_only_issue():\n    df = pd.DataFrame(\n        [\n            [np.nan, 2, np.nan, 0],\n            [3, 4, np.nan, 1],\n            [np.nan, np.nan, np.nan, np.nan],\n            [np.nan, 3, np.nan, 4],\n        ],\n        columns=list(\"ABCD\"),\n    )\n    pdf = df._to_pandas()\n    # there shouldn't be `ValueError: putmask: output array is read-only`\n    pdf.fillna(0, inplace=True)\n\n\ndef test_to_numpy_read_only_issue():\n    df = pd.DataFrame(\n        [\n            [np.nan, 2, np.nan, 0],\n            [3, 4, np.nan, 1],\n            [np.nan, np.nan, np.nan, np.nan],\n            [np.nan, 3, np.nan, 4],\n        ],\n        columns=list(\"ABCD\"),\n    )\n    arr = df.to_numpy()\n    # there shouldn't be `ValueError: putmask: output array is read-only`\n    np.putmask(arr, np.isnan(arr), 0)\n\n\ndef test_create_categorical_dataframe_with_duplicate_column_name():\n    # This tests for https://github.com/modin-project/modin/issues/4312\n    pd_df = pandas.DataFrame(\n        {\n            \"a\": pandas.Categorical([1, 2]),\n            \"b\": [4, 5],\n            \"c\": pandas.Categorical([7, 8]),\n        }\n    )\n    pd_df.columns = [\"a\", \"b\", \"a\"]\n    md_df = pd.DataFrame(pd_df)\n    # Use assert_frame_equal instead of the common modin util df_equals because\n    # we should check dtypes of the new categorical with check_dtype=True.\n    # TODO(https://github.com/modin-project/modin/issues/3804): Make\n    # df_equals set check_dtype=True and use df_equals instead.\n    assert_frame_equal(\n        md_df._to_pandas(),\n        pd_df,\n        check_dtype=True,\n        check_index_type=True,\n        check_column_type=True,\n        check_names=True,\n        check_categorical=True,\n    )\n\n\n@pytest.mark.skipif(\n    get_current_execution() != \"BaseOnPython\",\n    reason=\"This test make sense only on BaseOnPython execution.\",\n)\n@pytest.mark.parametrize(\n    \"func, regex\",\n    [\n        (lambda df: df.mean(), r\"DataFrame\\.mean\"),\n        (lambda df: df + df, r\"DataFrame\\.add\"),\n        (lambda df: df.index, r\"DataFrame\\.get_axis\\(0\\)\"),\n        (\n            lambda df: df.drop(columns=\"col1\").squeeze().repeat(2),\n            r\"Series\\.repeat\",\n        ),\n        (lambda df: df.groupby(\"col1\").prod(), r\"GroupBy\\.prod\"),\n        (lambda df: df.rolling(1).count(), r\"Rolling\\.count\"),\n    ],\n)\ndef test_default_to_pandas_warning_message(func, regex):\n    data = {\"col1\": [1, 2, 3], \"col2\": [4, 5, 6]}\n    df = pd.DataFrame(data)\n\n    with pytest.warns(UserWarning, match=regex):\n        func(df)\n\n\ndef test_empty_dataframe():\n    df = pd.DataFrame(columns=[\"a\", \"b\"])\n    # NOTE that we default to pandas at the API layer.\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df[(df.a == 1) & (df.b == 2)]\n\n\ndef test_empty_series():\n    s = pd.Series([])\n    pd.to_numeric(s)\n\n\n@pytest.mark.parametrize(\n    \"arg\",\n    [[1, 2], [\"a\"], 1, \"a\"],\n    ids=[\"list_of_ints\", \"list_of_invalid_strings\", \"scalar\", \"invalid_scalar\"],\n)\ndef test_to_timedelta(arg, request):\n    # This test case comes from\n    # https://github.com/modin-project/modin/issues/4966\n    expected_exception = None\n    if request.node.callspec.id == \"list_of_invalid_strings\":\n        expected_exception = ValueError(\"Could not convert 'a' to NumPy timedelta\")\n    elif request.node.callspec.id == \"invalid_scalar\":\n        expected_exception = ValueError(\"unit abbreviation w/o a number\")\n    eval_general(\n        pd,\n        pandas,\n        lambda lib: lib.to_timedelta(arg),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_series_to_timedelta(data):\n    def make_frame(lib):\n        series = lib.Series(\n            next(iter(data.values())) if isinstance(data, dict) else data\n        )\n        return lib.to_timedelta(series).to_frame(name=\"timedelta\")\n\n    eval_general(pd, pandas, make_frame)\n\n\n@pytest.mark.parametrize(\n    \"key\",\n    [[\"col0\"], \"col0\", \"col1\"],\n    ids=[\"valid_list_of_string\", \"valid_string\", \"invalid_string\"],\n)\ndef test_get(key):\n    modin_df, pandas_df = create_test_dfs({\"col0\": [0, 1]})\n    eval_general(modin_df, pandas_df, lambda df: df.get(key))\n\n\n@pytest.mark.xfail(\n    condition=is_native_shallow_copy(),\n    reason=\"native pandas backend does not deep copy inputs by default\",\n    strict=True,\n)\ndef test_df_immutability():\n    \"\"\"\n    Verify that modifications of the source data doesn't propagate to Modin's DataFrame objects.\n    \"\"\"\n    src_data = pandas.DataFrame({\"a\": [1]})\n\n    md_df = pd.DataFrame(src_data)\n    src_data.iloc[0, 0] = 100\n\n    assert md_df._to_pandas().iloc[0, 0] == 1\n\n\ndef test_np_array_function():\n    # first argument is a numpy array, second argument is modin frame\n    assert_array_equal(\n        np.where(np.array([1, 0]), pd.Series([9, 9]), [-1, -1]), np.array([9, -1])\n    )\n    # multiple arguments are modin objects\n    assert_array_equal(\n        np.where(pd.DataFrame([[1, 0]]), pd.Series([9, 9]), [-1, -1]),\n        np.array([[9, -1]]),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/test_groupby.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport datetime\nimport itertools\nfrom unittest import mock\n\nimport numpy as np\nimport pandas\nimport pandas._libs.lib as lib\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import (\n    IsRayCluster,\n    NPartitions,\n    RangePartitioning,\n    StorageFormat,\n    context,\n)\nfrom modin.core.dataframe.algebra.default2pandas.groupby import GroupBy\nfrom modin.core.dataframe.pandas.partitioning.axis_partition import (\n    PandasDataframeAxisPartition,\n)\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    _assert_casting_functions_wrap_same_implementation,\n)\nfrom modin.pandas.io import from_pandas\nfrom modin.pandas.utils import is_scalar\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import (\n    MODIN_UNNAMED_SERIES_LABEL,\n    get_current_execution,\n    hashable,\n    try_cast_to_pandas,\n)\n\nfrom .utils import (\n    assert_set_of_rows_identical,\n    check_df_columns_have_nans,\n    create_test_dfs,\n    create_test_series,\n    default_to_pandas_ignore_string,\n    df_equals,\n    dict_equals,\n    eval_general,\n    generate_multiindex,\n    modin_df_almost_equals_pandas,\n    test_data,\n    test_data_values,\n    test_groupby_data,\n    try_modin_df_almost_equals_compare,\n    value_equals,\n)\n\nNPartitions.put(4)\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\n# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances\n# of defaulting to pandas.\npytestmark = [\n    pytest.mark.filterwarnings(default_to_pandas_ignore_string),\n    # TO MAKE SURE ALL FUTUREWARNINGS ARE CONSIDERED\n    pytest.mark.filterwarnings(\"error::FutureWarning\"),\n    # ... except for this expected Ray warning due to https://github.com/ray-project/ray/issues/54868\n    pytest.mark.filterwarnings(\n        \"ignore:.*In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None:FutureWarning\"\n    ),\n    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT\n    pytest.mark.filterwarnings(\n        \"ignore:DataFrame.groupby with axis=1 is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:DataFrameGroupBy.dtypes is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:DataFrameGroupBy.diff with axis=1 is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:DataFrameGroupBy.pct_change with axis=1 is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:The 'fill_method' keyword being not None and the 'limit' keyword \"\n        + \"in (DataFrame|DataFrameGroupBy).pct_change are deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:DataFrameGroupBy.shift with axis=1 is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:(DataFrameGroupBy|SeriesGroupBy).fillna is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:(DataFrame|Series).fillna with 'method' is deprecated:FutureWarning\"\n    ),\n    # FIXME: these cases inconsistent between modin and pandas\n    pytest.mark.filterwarnings(\n        \"ignore:A grouping was used that is not in the columns of the DataFrame and so was excluded from the result:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:The default of observed=False is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:.*DataFrame.idxmax with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:.*DataFrame.idxmin with all-NA values, or any-NA and skipna=False, is deprecated:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:(DataFrameGroupBy|SeriesGroupBy).apply operated on the grouping columns:FutureWarning\"\n    ),\n]\n\n\ndef get_external_groupers(df, columns, drop_from_original_df=False, add_plus_one=False):\n    \"\"\"\n    Construct ``by`` argument containing external groupers.\n\n    Parameters\n    ----------\n    df : pandas.DataFrame or modin.pandas.DataFrame\n    columns : list[tuple[bool, str]]\n        Columns to group on. If ``True`` do ``df[col]``, otherwise keep the column name.\n        '''\n        >>> columns = [(True, \"a\"), (False, \"b\")]\n        >>> get_external_groupers(df, columns)\n        [\n            pandas.Series(..., name=\"a\"),\n            \"b\"\n        ]\n        '''\n    drop_from_original_df : bool, default: False\n        Whether to drop selected external columns from `df`.\n    add_plus_one : bool, default: False\n        Whether to do ``df[name] + 1`` for external groupers (so they won't be considered as\n        sibling with `df`).\n\n    Returns\n    -------\n    new_df : pandas.DataFrame or modin.pandas.DataFrame\n        If `drop_from_original_df` was True, returns a new dataframe with\n        dropped external columns, otherwise returns `df`.\n    by : list\n        Groupers to pass to `df.groupby(by)`.\n    \"\"\"\n    new_df = df\n    by = []\n    for lookup, name in columns:\n        if lookup:\n            ser = df[name].copy()\n            if add_plus_one:\n                ser = ser + 1\n            by.append(ser)\n            if drop_from_original_df:\n                new_df = new_df.drop(columns=[name])\n        else:\n            by.append(name)\n    return new_df, by\n\n\ndef modin_groupby_equals_pandas(modin_groupby, pandas_groupby):\n    eval_general(\n        modin_groupby, pandas_groupby, lambda grp: grp.indices, comparator=dict_equals\n    )\n    # FIXME: https://github.com/modin-project/modin/issues/7032\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda grp: grp.groups,\n        comparator=dict_equals,\n        expected_exception=False,\n    )\n\n    for g1, g2 in itertools.zip_longest(modin_groupby, pandas_groupby):\n        value_equals(g1[0], g2[0])\n        df_equals(g1[1], g2[1])\n\n\ndef eval_aggregation(md_df, pd_df, operation=None, by=None, *args, **kwargs):\n    if by is None:\n        by = md_df.columns[0]\n    if operation is None:\n        operation = {}\n    return eval_general(\n        md_df,\n        pd_df,\n        lambda df, *args, **kwargs: df.groupby(by=by).agg(operation, *args, **kwargs),\n        *args,\n        **kwargs,\n    )\n\n\ndef build_types_asserter(comparator):\n    def wrapper(obj1, obj2, *args, **kwargs):\n        error_str = f\"obj1 and obj2 has incorrect types: {type(obj1)} and {type(obj2)}\"\n        assert not (is_scalar(obj1) ^ is_scalar(obj2)), error_str\n        assert obj1.__module__.split(\".\")[0] == \"modin\", error_str\n        assert obj2.__module__.split(\".\")[0] == \"pandas\", error_str\n        comparator(obj1, obj2, *args, **kwargs)\n\n    return wrapper\n\n\n@pytest.mark.parametrize(\"as_index\", [True, False])\ndef test_mixed_dtypes_groupby(as_index):\n    frame_data = np.random.RandomState(42).randint(97, 198, size=(2**6, 2**4))\n    pandas_df = pandas.DataFrame(frame_data).add_prefix(\"col\")\n    # Convert every other column to string\n    for col in pandas_df.iloc[\n        :, [i for i in range(len(pandas_df.columns)) if i % 2 == 0]\n    ]:\n        pandas_df[col] = [str(chr(i)) for i in pandas_df[col]]\n    modin_df = from_pandas(pandas_df)\n\n    n = 1\n\n    by_values = [\n        (\"col1\",),\n        (lambda x: x % 2,),\n        (modin_df[\"col0\"].copy(), pandas_df[\"col0\"].copy()),\n        (\"col3\",),\n    ]\n\n    for by in by_values:\n        if isinstance(by[0], str) and by[0] == \"col3\":\n            modin_groupby = modin_df.set_index(by[0]).groupby(\n                by=by[0], as_index=as_index\n            )\n            pandas_groupby = pandas_df.set_index(by[0]).groupby(\n                by=by[-1], as_index=as_index\n            )\n            # difference in behaviour between .groupby().ffill() and\n            # .groupby.fillna(method='ffill') on duplicated indices\n            # caused by https://github.com/pandas-dev/pandas/issues/43412\n            # is hurting the tests, for now sort the frames\n            md_sorted_grpby = (\n                modin_df.set_index(by[0])\n                .sort_index()\n                .groupby(by=by[0], as_index=as_index)\n            )\n            pd_sorted_grpby = (\n                pandas_df.set_index(by[0])\n                .sort_index()\n                .groupby(by=by[0], as_index=as_index)\n            )\n        else:\n            modin_groupby = modin_df.groupby(by=by[0], as_index=as_index)\n            pandas_groupby = pandas_df.groupby(by=by[-1], as_index=as_index)\n            md_sorted_grpby, pd_sorted_grpby = modin_groupby, pandas_groupby\n\n        modin_groupby_equals_pandas(modin_groupby, pandas_groupby)\n        eval_ngroups(modin_groupby, pandas_groupby)\n        eval_general(\n            md_sorted_grpby,\n            pd_sorted_grpby,\n            lambda df: df.ffill(),\n            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),\n        )\n        # FIXME: https://github.com/modin-project/modin/issues/7032\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.sem(),\n            modin_df_almost_equals_pandas,\n            expected_exception=False,\n        )\n        eval_general(\n            modin_groupby, pandas_groupby, lambda df: df.sample(random_state=1)\n        )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.ewm(com=0.5).std(),\n            expected_exception=pandas.errors.DataError(\n                \"Cannot aggregate non-numeric type: object\"\n            ),\n        )\n        eval_shift(\n            modin_groupby,\n            pandas_groupby,\n            comparator=(\n                # We should sort the result before comparison for transform functions\n                # in case of range-partitioning groupby (https://github.com/modin-project/modin/issues/5924).\n                # This test though produces so much NaN values in the result, so it's impossible to sort,\n                # using manual comparison of set of rows instead\n                assert_set_of_rows_identical\n                if RangePartitioning.get()\n                else None\n            ),\n        )\n        eval_mean(modin_groupby, pandas_groupby, numeric_only=True)\n        eval_any(modin_groupby, pandas_groupby)\n        eval_min(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())\n        eval_ndim(modin_groupby, pandas_groupby)\n        eval_cumsum(modin_groupby, pandas_groupby, numeric_only=True)\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.pct_change(),\n            modin_df_almost_equals_pandas,\n            # FIXME: https://github.com/modin-project/modin/issues/7032\n            expected_exception=False,\n        )\n        eval_cummax(modin_groupby, pandas_groupby, numeric_only=True)\n\n        # TODO Add more apply functions\n        apply_functions = [lambda df: df.sum(), min]\n        for func in apply_functions:\n            eval_apply(modin_groupby, pandas_groupby, func)\n\n        eval_dtypes(modin_groupby, pandas_groupby)\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.first(),\n            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),\n        )\n        eval_cummin(modin_groupby, pandas_groupby, numeric_only=True)\n        eval_general(\n            md_sorted_grpby,\n            pd_sorted_grpby,\n            lambda df: df.bfill(),\n            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),\n        )\n        # numeric_only=False doesn't work\n        eval_general(\n            modin_groupby, pandas_groupby, lambda df: df.idxmin(numeric_only=True)\n        )\n        eval_prod(modin_groupby, pandas_groupby, numeric_only=True)\n        if as_index:\n            eval_std(modin_groupby, pandas_groupby, numeric_only=True)\n            eval_var(modin_groupby, pandas_groupby, numeric_only=True)\n            eval_skew(modin_groupby, pandas_groupby, numeric_only=True)\n\n        agg_functions = [\n            lambda df: df.sum(),\n            \"min\",\n            min,\n            \"max\",\n            max,\n            sum,\n            {\"col2\": \"sum\"},\n            {\"col2\": sum},\n            {\"col2\": \"max\", \"col4\": \"sum\", \"col5\": \"min\"},\n            {\"col2\": max, \"col4\": sum, \"col5\": \"min\"},\n            # Intersection of 'by' and agg cols for TreeReduce impl\n            {\"col0\": \"count\", \"col1\": \"count\", \"col2\": \"count\"},\n            # Intersection of 'by' and agg cols for FullAxis impl\n            {\"col0\": \"nunique\", \"col1\": \"nunique\", \"col2\": \"nunique\"},\n        ]\n        for func in agg_functions:\n            eval_agg(modin_groupby, pandas_groupby, func)\n\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.last())\n        eval_max(modin_groupby, pandas_groupby)\n        eval_len(modin_groupby, pandas_groupby)\n        eval_sum(modin_groupby, pandas_groupby)\n        if not RangePartitioning.get():\n            # `.group` fails with experimental groupby\n            # https://github.com/modin-project/modin/issues/6083\n            eval_ngroup(modin_groupby, pandas_groupby)\n        eval_nunique(modin_groupby, pandas_groupby)\n        eval_value_counts(modin_groupby, pandas_groupby)\n        eval_median(modin_groupby, pandas_groupby, numeric_only=True)\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.head(n),\n            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),\n        )\n        eval_cumprod(modin_groupby, pandas_groupby, numeric_only=True)\n        # numeric_only=False doesn't work\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.cov(numeric_only=True),\n            modin_df_almost_equals_pandas,\n        )\n\n        transform_functions = [lambda df: df, lambda df: df + df]\n        for func in transform_functions:\n            eval_transform(modin_groupby, pandas_groupby, func)\n\n        pipe_functions = [lambda dfgb: dfgb.sum()]\n        for func in pipe_functions:\n            eval_pipe(modin_groupby, pandas_groupby, func)\n\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.corr(numeric_only=True),\n            modin_df_almost_equals_pandas,\n        )\n        eval_fillna(modin_groupby, pandas_groupby)\n        eval_count(modin_groupby, pandas_groupby)\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.tail(n),\n            comparator=lambda *dfs: df_equals(*sort_if_experimental_groupby(*dfs)),\n        )\n        eval_quantile(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))\n        eval___getattr__(modin_groupby, pandas_groupby, \"col2\")\n        eval_groups(modin_groupby, pandas_groupby)\n\n\nclass GetColumn:\n    \"\"\"Indicate to the test that it should do gc(df).\"\"\"\n\n    def __init__(self, name):\n        self.name = name\n\n    def __call__(self, df):\n        return df[self.name]\n\n\ndef test_aggregate_alias():\n    # It's optimization. If failed, groupby().aggregate should be tested explicitly\n    from modin.pandas.groupby import DataFrameGroupBy, SeriesGroupBy\n\n    _assert_casting_functions_wrap_same_implementation(\n        DataFrameGroupBy.aggregate, DataFrameGroupBy.agg\n    )\n    _assert_casting_functions_wrap_same_implementation(\n        SeriesGroupBy.aggregate, SeriesGroupBy.agg\n    )\n\n\n@pytest.mark.parametrize(\n    \"by\",\n    [\n        [1, 2, 1, 2],\n        lambda x: x % 3,\n        \"col1\",\n        [\"col1\"],\n        # col2 contains NaN, is it necessary to test functions like size()\n        \"col2\",\n        [\"col2\"],  # 5\n        pytest.param(\n            [\"col1\", \"col2\"],\n            marks=pytest.mark.xfail(reason=\"Excluded because of bug #1554\"),\n        ),\n        pytest.param(\n            [\"col2\", \"col4\"],\n            marks=pytest.mark.xfail(reason=\"Excluded because of bug #1554\"),\n        ),\n        pytest.param(\n            [\"col4\", \"col2\"],\n            marks=pytest.mark.xfail(reason=\"Excluded because of bug #1554\"),\n        ),\n        pytest.param(\n            [\"col3\", \"col4\", \"col2\"],\n            marks=pytest.mark.xfail(reason=\"Excluded because of bug #1554\"),\n        ),\n        # but cum* functions produce undefined results with NaNs so we need to test the same combinations without NaN too\n        [\"col5\"],  # 10\n        [\"col1\", \"col5\"],\n        [\"col5\", \"col4\"],\n        [\"col4\", \"col5\"],\n        [\"col5\", \"col4\", \"col1\"],\n        [\"col1\", pd.Series([1, 5, 7, 8])],  # 15\n        [pd.Series([1, 5, 7, 8])],\n        [\n            pd.Series([1, 5, 7, 8]),\n            pd.Series([1, 5, 7, 8]),\n            pd.Series([1, 5, 7, 8]),\n            pd.Series([1, 5, 7, 8]),\n            pd.Series([1, 5, 7, 8]),\n        ],\n        [\"col1\", GetColumn(\"col5\")],\n        [GetColumn(\"col1\"), GetColumn(\"col5\")],\n        [GetColumn(\"col1\")],  # 20\n    ],\n)\n@pytest.mark.parametrize(\"as_index\", [True, False], ids=lambda v: f\"as_index={v}\")\n@pytest.mark.parametrize(\n    \"col1_category\", [True, False], ids=lambda v: f\"col1_category={v}\"\n)\ndef test_simple_row_groupby(by, as_index, col1_category):\n    pandas_df = pandas.DataFrame(\n        {\n            \"col1\": [0, 1, 2, 3],\n            \"col2\": [4, 5, np.nan, 7],\n            \"col3\": [np.nan, np.nan, 12, 10],\n            \"col4\": [17, 13, 16, 15],\n            \"col5\": [-4, -5, -6, -7],\n        }\n    )\n\n    if col1_category:\n        pandas_df = pandas_df.astype({\"col1\": \"category\"})\n        # As of pandas 1.4.0 operators like min cause TypeErrors to be raised on unordered\n        # categorical columns. We need to specify the categorical column as ordered to bypass this.\n        pandas_df[\"col1\"] = pandas_df[\"col1\"].cat.as_ordered()\n\n    modin_df = from_pandas(pandas_df)\n    n = 1\n\n    def maybe_get_columns(df, by):\n        if isinstance(by, list):\n            return [o(df) if isinstance(o, GetColumn) else o for o in by]\n        else:\n            return by\n\n    modin_groupby = modin_df.groupby(\n        by=maybe_get_columns(modin_df, by), as_index=as_index\n    )\n\n    pandas_by = maybe_get_columns(pandas_df, try_cast_to_pandas(by))\n    pandas_groupby = pandas_df.groupby(by=pandas_by, as_index=as_index)\n\n    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)\n    eval_ngroups(modin_groupby, pandas_groupby)\n    eval_shift(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())\n    if as_index:\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.nth(0))\n    else:\n        # FIXME: df.groupby(as_index=False).nth() does not produce correct index in Modin,\n        #        it should maintain values from df.index, not create a new one or re-order it;\n        #        it also produces completely wrong result for multi-column `by` :(\n        if not isinstance(pandas_by, list) or len(pandas_by) <= 1:\n            eval_general(\n                modin_groupby,\n                pandas_groupby,\n                lambda df: df.nth(0).sort_values(\"col1\").reset_index(drop=True),\n            )\n\n    expected_exception = None\n    if col1_category:\n        expected_exception = TypeError(\n            \"category dtype does not support aggregation 'sem'\"\n        )\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.sem(),\n        modin_df_almost_equals_pandas,\n        expected_exception=expected_exception,\n    )\n    eval_mean(modin_groupby, pandas_groupby, numeric_only=True)\n    eval_any(modin_groupby, pandas_groupby)\n    eval_min(modin_groupby, pandas_groupby)\n    # FIXME: https://github.com/modin-project/modin/issues/7033\n    eval_general(\n        modin_groupby, pandas_groupby, lambda df: df.idxmax(), expected_exception=False\n    )\n    eval_ndim(modin_groupby, pandas_groupby)\n    if not check_df_columns_have_nans(modin_df, by):\n        # cum* functions produce undefined results for columns with NaNs so we run them only when \"by\" columns contain no NaNs\n\n        expected_exception = None\n        if col1_category:\n            expected_exception = TypeError(\n                \"category type does not support cumsum operations\"\n            )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.cumsum(),\n            expected_exception=expected_exception,\n        )\n        expected_exception = None\n        if col1_category:\n            expected_exception = TypeError(\n                \"category type does not support cummax operations\"\n            )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.cummax(),\n            expected_exception=expected_exception,\n        )\n        expected_exception = None\n        if col1_category:\n            expected_exception = TypeError(\n                \"category type does not support cummin operations\"\n            )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.cummin(),\n            expected_exception=expected_exception,\n        )\n        expected_exception = None\n        if col1_category:\n            expected_exception = TypeError(\n                \"category type does not support cumprod operations\"\n            )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.cumprod(),\n            expected_exception=expected_exception,\n        )\n        expected_exception = None\n        if col1_category:\n            expected_exception = TypeError(\n                \"category type does not support cumcount operations\"\n            )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.cumcount(),\n            expected_exception=expected_exception,\n        )\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.pct_change(\n            periods=2, fill_method=\"bfill\", limit=1, freq=None, axis=1\n        ),\n        modin_df_almost_equals_pandas,\n    )\n\n    apply_functions = [\n        lambda df: df.sum(numeric_only=True),\n        lambda df: pandas.Series([1, 2, 3, 4], name=\"result\"),\n        min,\n    ]\n    for func in apply_functions:\n        eval_apply(modin_groupby, pandas_groupby, func)\n\n    eval_dtypes(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())\n    # FIXME: https://github.com/modin-project/modin/issues/7033\n    eval_general(\n        modin_groupby, pandas_groupby, lambda df: df.idxmin(), expected_exception=False\n    )\n    expected_exception = None\n    if col1_category:\n        expected_exception = TypeError(\"category type does not support prod operations\")\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda grp: grp.prod(),\n        expected_exception=expected_exception,\n    )\n\n    if as_index:\n        eval_std(modin_groupby, pandas_groupby, numeric_only=True)\n        eval_var(modin_groupby, pandas_groupby, numeric_only=True)\n        eval_skew(modin_groupby, pandas_groupby, numeric_only=True)\n\n    agg_functions = [\n        lambda df: df.sum(),\n        \"min\",\n        \"max\",\n        min,\n        sum,\n        # Intersection of 'by' and agg cols for TreeReduce impl\n        {\"col1\": \"count\", \"col2\": \"count\"},\n        # Intersection of 'by' and agg cols for FullAxis impl\n        {\"col1\": \"nunique\", \"col2\": \"nunique\"},\n    ]\n    for func in agg_functions:\n        # Pandas raises an exception when 'by' contains categorical key and `as_index=False`\n        # because of this bug: https://github.com/pandas-dev/pandas/issues/36698\n        # Modin correctly processes the result\n        is_pandas_bug_case = not as_index and col1_category and isinstance(func, dict)\n        expected_exception = None\n        if col1_category:\n            # FIXME: https://github.com/modin-project/modin/issues/7033\n            expected_exception = False\n        if not is_pandas_bug_case:\n            eval_general(\n                modin_groupby,\n                pandas_groupby,\n                lambda grp: grp.agg(func),\n                expected_exception=expected_exception,\n            )\n\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.rank())\n    eval_max(modin_groupby, pandas_groupby)\n    eval_len(modin_groupby, pandas_groupby)\n    expected_exception = None\n    if col1_category:\n        expected_exception = TypeError(\"category type does not support sum operations\")\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.sum(),\n        expected_exception=expected_exception,\n    )\n\n    eval_ngroup(modin_groupby, pandas_groupby)\n    # Pandas raising exception when 'by' contains categorical key and `as_index=False`\n    # because of a bug: https://github.com/pandas-dev/pandas/issues/36698\n    # Modin correctly processes the result\n    if not (col1_category and not as_index):\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.nunique(),\n        )\n    expected_exception = None\n    if col1_category:\n        expected_exception = TypeError(\n            \"category dtype does not support aggregation 'median'\"\n        )\n    # TypeError: category type does not support median operations\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.median(),\n        modin_df_almost_equals_pandas,\n        expected_exception=expected_exception,\n    )\n\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.cov(),\n        modin_df_almost_equals_pandas,\n    )\n\n    if not check_df_columns_have_nans(modin_df, by):\n        # Pandas groupby.transform does not work correctly with NaN values in grouping columns. See Pandas bug 17093.\n        transform_functions = [lambda df: df + 4, lambda df: -df - 10]\n        for idx, func in enumerate(transform_functions):\n            expected_exception = None\n            if col1_category:\n                if idx == 0:\n                    expected_exception = TypeError(\n                        \"unsupported operand type(s) for +: 'Categorical' and 'int'\"\n                    )\n                elif idx == 1:\n                    expected_exception = TypeError(\n                        \"bad operand type for unary -: 'Categorical'\"\n                    )\n            eval_general(\n                modin_groupby,\n                pandas_groupby,\n                lambda df: df.transform(func),\n                expected_exception=expected_exception,\n            )\n\n    pipe_functions = [lambda dfgb: dfgb.sum()]\n    for func in pipe_functions:\n        expected_exception = None\n        if col1_category:\n            expected_exception = TypeError(\n                \"category type does not support sum operations\"\n            )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.pipe(func),\n            expected_exception=expected_exception,\n        )\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.corr(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_fillna(modin_groupby, pandas_groupby)\n    eval_count(modin_groupby, pandas_groupby)\n    if get_current_execution() != \"BaseOnPython\" and not current_execution_is_native():\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.size(),\n        )\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))\n    eval_quantile(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))\n    if isinstance(by, list) and not any(\n        isinstance(o, (pd.Series, pandas.Series)) for o in by\n    ):\n        # Not yet supported for non-original-column-from-dataframe Series in by:\n        eval___getattr__(modin_groupby, pandas_groupby, \"col3\")\n        # FIXME: https://github.com/modin-project/modin/issues/7033\n        eval___getitem__(\n            modin_groupby, pandas_groupby, \"col3\", expected_exception=False\n        )\n    eval_groups(modin_groupby, pandas_groupby)\n    # Intersection of the selection and 'by' columns is not yet supported\n    non_by_cols = (\n        # Potential selection starts only from the second column, because the first may\n        # be categorical in this test, which is not yet supported\n        [col for col in pandas_df.columns[1:] if col not in modin_groupby._internal_by]\n        if isinstance(by, list)\n        else [\"col3\", \"col4\"]\n    )\n    # FIXME: https://github.com/modin-project/modin/issues/7033\n    eval___getitem__(\n        modin_groupby, pandas_groupby, non_by_cols, expected_exception=False\n    )\n    # When GroupBy.__getitem__ meets an intersection of the selection and 'by' columns\n    # it throws a warning with the suggested workaround. The following code tests\n    # that this workaround works as expected.\n    if len(modin_groupby._internal_by) != 0:\n        if not isinstance(by, list):\n            by = [by]\n        by_from_workaround = [\n            (\n                modin_df[getattr(col, \"name\", col)].copy()\n                if (hashable(col) and col in modin_groupby._internal_by)\n                or isinstance(col, GetColumn)\n                else col\n            )\n            for col in by\n        ]\n        # GroupBy result with 'as_index=False' depends on the 'by' origin, since we forcibly changed\n        # the origin of 'by' for modin by doing a copy, set 'as_index=True' to compare results.\n        modin_groupby = modin_df.groupby(\n            maybe_get_columns(modin_df, by_from_workaround), as_index=True\n        )\n        pandas_groupby = pandas_df.groupby(pandas_by, as_index=True)\n        eval___getitem__(\n            modin_groupby,\n            pandas_groupby,\n            list(modin_groupby._internal_by) + non_by_cols[:1],\n        )\n\n\ndef test_single_group_row_groupby():\n    pandas_df = pandas.DataFrame(\n        {\n            \"col1\": [0, 1, 2, 3],\n            \"col2\": [4, 5, 36, 7],\n            \"col3\": [3, 8, 12, 10],\n            \"col4\": [17, 3, 16, 15],\n            \"col5\": [-4, 5, -6, -7],\n        }\n    )\n\n    modin_df = from_pandas(pandas_df)\n\n    by = [\"1\", \"1\", \"1\", \"1\"]\n    n = 6\n\n    modin_groupby = modin_df.groupby(by=by)\n    pandas_groupby = pandas_df.groupby(by=by)\n\n    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)\n    eval_ngroups(modin_groupby, pandas_groupby)\n    eval_shift(modin_groupby, pandas_groupby)\n    eval_skew(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.sem(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_mean(modin_groupby, pandas_groupby)\n    eval_any(modin_groupby, pandas_groupby)\n    eval_min(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())\n    eval_ndim(modin_groupby, pandas_groupby)\n    eval_cumsum(modin_groupby, pandas_groupby)\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.pct_change(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_cummax(modin_groupby, pandas_groupby)\n\n    apply_functions = [lambda df: df.sum(), lambda df: -df]\n    for func in apply_functions:\n        eval_apply(modin_groupby, pandas_groupby, func)\n\n    eval_dtypes(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())\n    eval_cummin(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())\n    eval_prod(modin_groupby, pandas_groupby)\n    eval_std(modin_groupby, pandas_groupby)\n\n    agg_functions = [\n        lambda df: df.sum(),\n        \"min\",\n        \"max\",\n        max,\n        sum,\n        {\"col2\": \"sum\"},\n        {\"col2\": \"max\", \"col4\": \"sum\", \"col5\": \"min\"},\n    ]\n    for func in agg_functions:\n        eval_agg(modin_groupby, pandas_groupby, func)\n\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())\n    eval_rank(modin_groupby, pandas_groupby)\n    eval_max(modin_groupby, pandas_groupby)\n    eval_var(modin_groupby, pandas_groupby)\n    eval_len(modin_groupby, pandas_groupby)\n    eval_sum(modin_groupby, pandas_groupby)\n    eval_ngroup(modin_groupby, pandas_groupby)\n    eval_nunique(modin_groupby, pandas_groupby)\n    eval_value_counts(modin_groupby, pandas_groupby)\n    eval_median(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))\n    eval_cumprod(modin_groupby, pandas_groupby)\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.cov(),\n        modin_df_almost_equals_pandas,\n    )\n\n    transform_functions = [lambda df: df + 4, lambda df: -df - 10]\n    for func in transform_functions:\n        eval_transform(modin_groupby, pandas_groupby, func)\n\n    pipe_functions = [lambda dfgb: dfgb.sum()]\n    for func in pipe_functions:\n        eval_pipe(modin_groupby, pandas_groupby, func)\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.corr(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_fillna(modin_groupby, pandas_groupby)\n    eval_count(modin_groupby, pandas_groupby)\n    eval_size(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))\n    eval_quantile(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))\n    eval___getattr__(modin_groupby, pandas_groupby, \"col2\")\n    eval_groups(modin_groupby, pandas_groupby)\n\n\n@pytest.mark.parametrize(\"is_by_category\", [True, False])\ndef test_large_row_groupby(is_by_category):\n    pandas_df = pandas.DataFrame(\n        np.random.randint(0, 8, size=(100, 4)), columns=list(\"ABCD\")\n    )\n\n    modin_df = from_pandas(pandas_df)\n\n    by = [str(i) for i in pandas_df[\"A\"].tolist()]\n\n    if is_by_category:\n        by = pandas.Categorical(by)\n\n    n = 4\n\n    modin_groupby = modin_df.groupby(by=by)\n    pandas_groupby = pandas_df.groupby(by=by)\n\n    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)\n    eval_ngroups(modin_groupby, pandas_groupby)\n    eval_shift(modin_groupby, pandas_groupby)\n    eval_skew(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.sem(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_mean(modin_groupby, pandas_groupby)\n    eval_any(modin_groupby, pandas_groupby)\n    eval_min(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())\n    eval_ndim(modin_groupby, pandas_groupby)\n    eval_cumsum(modin_groupby, pandas_groupby)\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.diff(periods=2),\n    )\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.diff(periods=-1),\n    )\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.diff(axis=1),\n    )\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.pct_change(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_cummax(modin_groupby, pandas_groupby)\n\n    apply_functions = [lambda df: df.sum(), lambda df: -df]\n    for func in apply_functions:\n        eval_apply(modin_groupby, pandas_groupby, func)\n\n    eval_dtypes(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())\n    eval_cummin(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())\n    # eval_prod(modin_groupby, pandas_groupby) causes overflows\n    eval_std(modin_groupby, pandas_groupby)\n\n    agg_functions = [\n        lambda df: df.sum(),\n        \"min\",\n        \"max\",\n        min,\n        sum,\n        {\"A\": \"sum\"},\n        {\"A\": lambda df: df.sum()},\n        {\"A\": \"max\", \"B\": \"sum\", \"C\": \"min\"},\n    ]\n    for func in agg_functions:\n        eval_agg(modin_groupby, pandas_groupby, func)\n\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())\n    eval_rank(modin_groupby, pandas_groupby)\n    eval_max(modin_groupby, pandas_groupby)\n    eval_var(modin_groupby, pandas_groupby)\n    eval_len(modin_groupby, pandas_groupby)\n    eval_sum(modin_groupby, pandas_groupby)\n    eval_ngroup(modin_groupby, pandas_groupby)\n    eval_nunique(modin_groupby, pandas_groupby)\n    eval_value_counts(modin_groupby, pandas_groupby)\n    eval_median(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))\n    # eval_cumprod(modin_groupby, pandas_groupby) causes overflows\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.cov(),\n        modin_df_almost_equals_pandas,\n    )\n\n    transform_functions = [lambda df: df + 4, lambda df: -df - 10]\n    for func in transform_functions:\n        eval_transform(modin_groupby, pandas_groupby, func)\n\n    pipe_functions = [lambda dfgb: dfgb.sum()]\n    for func in pipe_functions:\n        eval_pipe(modin_groupby, pandas_groupby, func)\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.corr(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_fillna(modin_groupby, pandas_groupby)\n    eval_count(modin_groupby, pandas_groupby)\n    eval_size(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))\n    eval_quantile(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))\n    eval_groups(modin_groupby, pandas_groupby)\n\n\ndef test_simple_col_groupby():\n    pandas_df = pandas.DataFrame(\n        {\n            \"col1\": [0, 3, 2, 3],\n            \"col2\": [4, 1, 6, 7],\n            \"col3\": [3, 8, 2, 10],\n            \"col4\": [1, 13, 6, 15],\n            \"col5\": [-4, 5, 6, -7],\n        }\n    )\n\n    modin_df = from_pandas(pandas_df)\n\n    by = [1, 2, 3, 2, 1]\n\n    modin_groupby = modin_df.groupby(axis=1, by=by)\n    pandas_groupby = pandas_df.groupby(axis=1, by=by)\n\n    modin_groupby_equals_pandas(modin_groupby, pandas_groupby)\n    eval_ngroups(modin_groupby, pandas_groupby)\n    eval_shift(modin_groupby, pandas_groupby)\n    eval_skew(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.sem(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_mean(modin_groupby, pandas_groupby)\n    eval_any(modin_groupby, pandas_groupby)\n    eval_min(modin_groupby, pandas_groupby)\n    eval_ndim(modin_groupby, pandas_groupby)\n\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())\n    eval_quantile(modin_groupby, pandas_groupby)\n\n    # https://github.com/pandas-dev/pandas/issues/21127\n    # eval_cumsum(modin_groupby, pandas_groupby)\n    # eval_cummax(modin_groupby, pandas_groupby)\n    # eval_cummin(modin_groupby, pandas_groupby)\n    # eval_cumprod(modin_groupby, pandas_groupby)\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.pct_change(),\n        modin_df_almost_equals_pandas,\n    )\n    apply_functions = [lambda df: -df, lambda df: df.sum(axis=1)]\n    for func in apply_functions:\n        eval_apply(modin_groupby, pandas_groupby, func)\n\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.first())\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())\n    eval_prod(modin_groupby, pandas_groupby)\n    eval_std(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.last())\n    eval_max(modin_groupby, pandas_groupby)\n    eval_var(modin_groupby, pandas_groupby)\n    eval_len(modin_groupby, pandas_groupby)\n    eval_sum(modin_groupby, pandas_groupby)\n\n    # Pandas fails on this case with ValueError\n    # eval_ngroup(modin_groupby, pandas_groupby)\n    # eval_nunique(modin_groupby, pandas_groupby)\n    # NotImplementedError: DataFrameGroupBy.value_counts only handles axis=0\n    # eval_value_counts(modin_groupby, pandas_groupby)\n    eval_median(modin_groupby, pandas_groupby)\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.cov(),\n        modin_df_almost_equals_pandas,\n    )\n\n    transform_functions = [lambda df: df + 4, lambda df: -df - 10]\n    for func in transform_functions:\n        eval_transform(modin_groupby, pandas_groupby, func)\n\n    pipe_functions = [lambda dfgb: dfgb.sum()]\n    for func in pipe_functions:\n        eval_pipe(modin_groupby, pandas_groupby, func)\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda df: df.corr(),\n        modin_df_almost_equals_pandas,\n    )\n    eval_fillna(modin_groupby, pandas_groupby)\n    eval_count(modin_groupby, pandas_groupby)\n    eval_size(modin_groupby, pandas_groupby)\n    eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))\n\n    # https://github.com/pandas-dev/pandas/issues/54858\n    # eval_groups(modin_groupby, pandas_groupby)\n\n\n@pytest.mark.parametrize(\n    \"by\", [np.random.randint(0, 100, size=2**8), lambda x: x % 3, None]\n)\n@pytest.mark.parametrize(\"as_index_series_or_dataframe\", [0, 1, 2])\ndef test_series_groupby(by, as_index_series_or_dataframe):\n    if as_index_series_or_dataframe <= 1:\n        as_index = as_index_series_or_dataframe == 1\n        series_data = np.random.randint(97, 198, size=2**8)\n        modin_series = pd.Series(series_data)\n        pandas_series = pandas.Series(series_data)\n    else:\n        as_index = True\n        pandas_series = pandas.DataFrame(\n            {\n                \"col1\": [0, 1, 2, 3],\n                \"col2\": [4, 5, 6, 7],\n                \"col3\": [3, 8, 12, 10],\n                \"col4\": [17, 13, 16, 15],\n                \"col5\": [-4, -5, -6, -7],\n            }\n        )\n        modin_series = from_pandas(pandas_series)\n        if isinstance(by, np.ndarray) or by is None:\n            by = np.random.randint(0, 100, size=len(pandas_series.index))\n\n    n = 1\n\n    try:\n        pandas_groupby = pandas_series.groupby(by, as_index=as_index)\n        if as_index_series_or_dataframe == 2:\n            pandas_groupby = pandas_groupby[\"col1\"]\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.groupby(by, as_index=as_index)\n    else:\n        modin_groupby = modin_series.groupby(by, as_index=as_index)\n        if as_index_series_or_dataframe == 2:\n            modin_groupby = modin_groupby[\"col1\"]\n\n        modin_groupby_equals_pandas(modin_groupby, pandas_groupby)\n        eval_ngroups(modin_groupby, pandas_groupby)\n        eval_shift(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.sem(),\n            modin_df_almost_equals_pandas,\n        )\n        eval_general(\n            modin_groupby, pandas_groupby, lambda df: df.sample(random_state=1)\n        )\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.ewm(com=0.5).std())\n        eval_general(\n            modin_groupby, pandas_groupby, lambda df: df.is_monotonic_decreasing\n        )\n        eval_general(\n            modin_groupby, pandas_groupby, lambda df: df.is_monotonic_increasing\n        )\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.nlargest())\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.nsmallest())\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.unique())\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.dtype)\n        eval_mean(modin_groupby, pandas_groupby)\n        eval_any(modin_groupby, pandas_groupby)\n        eval_min(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())\n        eval_ndim(modin_groupby, pandas_groupby)\n        eval_cumsum(modin_groupby, pandas_groupby)\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.pct_change(),\n            modin_df_almost_equals_pandas,\n        )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.diff(periods=2),\n        )\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda df: df.diff(periods=-1),\n        )\n        eval_cummax(modin_groupby, pandas_groupby)\n\n        apply_functions = [lambda df: df.sum(), min]\n        for func in apply_functions:\n            eval_apply(modin_groupby, pandas_groupby, func)\n\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.first())\n        eval_cummin(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())\n        eval_prod(modin_groupby, pandas_groupby)\n        if as_index:\n            eval_std(modin_groupby, pandas_groupby)\n            eval_var(modin_groupby, pandas_groupby)\n            eval_skew(modin_groupby, pandas_groupby)\n\n        agg_functions = [\n            lambda df: df.sum(),\n            \"min\",\n            \"max\",\n            max,\n            sum,\n            np.mean,\n            [\"min\", \"max\"],\n            [np.mean, np.std, np.var, np.max, np.min],\n        ]\n        for func in agg_functions:\n            eval_agg(modin_groupby, pandas_groupby, func)\n\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.last())\n        eval_rank(modin_groupby, pandas_groupby)\n        eval_max(modin_groupby, pandas_groupby)\n        eval_len(modin_groupby, pandas_groupby)\n        eval_sum(modin_groupby, pandas_groupby)\n        eval_size(modin_groupby, pandas_groupby)\n        eval_ngroup(modin_groupby, pandas_groupby)\n        eval_nunique(modin_groupby, pandas_groupby)\n        eval_value_counts(modin_groupby, pandas_groupby)\n        eval_median(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))\n        eval_cumprod(modin_groupby, pandas_groupby)\n\n        transform_functions = [lambda df: df + 4, lambda df: -df - 10]\n        for func in transform_functions:\n            eval_transform(modin_groupby, pandas_groupby, func)\n\n        pipe_functions = [lambda dfgb: dfgb.sum()]\n        for func in pipe_functions:\n            eval_pipe(modin_groupby, pandas_groupby, func)\n\n        eval_fillna(modin_groupby, pandas_groupby)\n        eval_count(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.tail(n))\n        eval_quantile(modin_groupby, pandas_groupby)\n        eval_general(modin_groupby, pandas_groupby, lambda df: df.take([0]))\n        eval_groups(modin_groupby, pandas_groupby)\n\n\ndef test_agg_udf_6600():\n    data = {\n        \"name\": [\"Mariners\", \"Lakers\"] * 50,\n        \"league_abbreviation\": [\"MLB\", \"NBA\"] * 50,\n    }\n    modin_teams, pandas_teams = create_test_dfs(data)\n\n    def my_first_item(s):\n        return s.iloc[0]\n\n    for agg in (my_first_item, [my_first_item], [\"nunique\", my_first_item]):\n        eval_general(\n            modin_teams,\n            pandas_teams,\n            operation=lambda df: df.groupby(\"league_abbreviation\").name.agg(agg),\n        )\n\n\ndef test_multi_column_groupby():\n    pandas_df = pandas.DataFrame(\n        {\n            \"col1\": np.random.randint(0, 100, size=1000),\n            \"col2\": np.random.randint(0, 100, size=1000),\n            \"col3\": np.random.randint(0, 100, size=1000),\n            \"col4\": np.random.randint(0, 100, size=1000),\n            \"col5\": np.random.randint(0, 100, size=1000),\n        },\n        index=[\"row{}\".format(i) for i in range(1000)],\n    )\n\n    modin_df = from_pandas(pandas_df)\n    by = [\"col1\", \"col2\"]\n\n    df_equals(modin_df.groupby(by).count(), pandas_df.groupby(by).count())\n\n    with pytest.warns(UserWarning):\n        for k, _ in modin_df.groupby(by):\n            assert isinstance(k, tuple)\n\n    by = [\"row0\", \"row1\"]\n    with pytest.raises(KeyError):\n        modin_df.groupby(by, axis=1).count()\n\n\ndef sort_if_experimental_groupby(*dfs):\n    \"\"\"\n    This method should be applied before comparing results of ``groupby.transform`` as\n    the experimental implementation changes the order of rows for that:\n    https://github.com/modin-project/modin/issues/5924\n    \"\"\"\n    result = dfs\n    if RangePartitioning.get():\n        dfs = try_cast_to_pandas(dfs)\n        result = []\n        for df in dfs:\n            if df.ndim == 1:\n                # Series case\n                result.append(df.sort_index())\n                continue\n\n            # filtering out index names in order to avoid:\n            # ValueError: 'col' is both an index level and a column label, which is ambiguous.\n            cols_no_idx_names = df.columns.difference(\n                df.index.names, sort=False\n            ).tolist()\n            df = df.sort_values(cols_no_idx_names)\n            result.append(df)\n    return result\n\n\ndef eval_ngroups(modin_groupby, pandas_groupby):\n    assert modin_groupby.ngroups == pandas_groupby.ngroups\n\n\ndef eval_skew(modin_groupby, pandas_groupby, numeric_only=False):\n    modin_df_almost_equals_pandas(\n        modin_groupby.skew(numeric_only=numeric_only),\n        pandas_groupby.skew(numeric_only=numeric_only),\n    )\n\n\ndef eval_mean(modin_groupby, pandas_groupby, numeric_only=False):\n    modin_df_almost_equals_pandas(\n        modin_groupby.mean(numeric_only=numeric_only),\n        pandas_groupby.mean(numeric_only=numeric_only),\n    )\n\n\ndef eval_any(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.any(), pandas_groupby.any())\n\n\ndef eval_min(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.min(), pandas_groupby.min())\n\n\ndef eval_ndim(modin_groupby, pandas_groupby):\n    assert modin_groupby.ndim == pandas_groupby.ndim\n\n\ndef eval_cumsum(modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False):\n    df_equals(\n        *sort_if_experimental_groupby(\n            modin_groupby.cumsum(axis=axis, numeric_only=numeric_only),\n            pandas_groupby.cumsum(axis=axis, numeric_only=numeric_only),\n        )\n    )\n\n\ndef eval_cummax(modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False):\n    df_equals(\n        *sort_if_experimental_groupby(\n            modin_groupby.cummax(axis=axis, numeric_only=numeric_only),\n            pandas_groupby.cummax(axis=axis, numeric_only=numeric_only),\n        )\n    )\n\n\ndef eval_cummin(modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False):\n    df_equals(\n        *sort_if_experimental_groupby(\n            modin_groupby.cummin(axis=axis, numeric_only=numeric_only),\n            pandas_groupby.cummin(axis=axis, numeric_only=numeric_only),\n        )\n    )\n\n\ndef eval_apply(modin_groupby, pandas_groupby, func):\n    df_equals(modin_groupby.apply(func), pandas_groupby.apply(func))\n\n\ndef eval_dtypes(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.dtypes, pandas_groupby.dtypes)\n\n\ndef eval_prod(modin_groupby, pandas_groupby, numeric_only=False):\n    df_equals(\n        modin_groupby.prod(numeric_only=numeric_only),\n        pandas_groupby.prod(numeric_only=numeric_only),\n    )\n\n\ndef eval_std(modin_groupby, pandas_groupby, numeric_only=False):\n    modin_df_almost_equals_pandas(\n        modin_groupby.std(numeric_only=numeric_only),\n        pandas_groupby.std(numeric_only=numeric_only),\n    )\n\n\ndef eval_agg(modin_groupby, pandas_groupby, func):\n    df_equals(modin_groupby.agg(func), pandas_groupby.agg(func))\n\n\ndef eval_rank(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.rank(), pandas_groupby.rank())\n\n\ndef eval_max(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.max(), pandas_groupby.max())\n\n\ndef eval_var(modin_groupby, pandas_groupby, numeric_only=False):\n    modin_df_almost_equals_pandas(\n        modin_groupby.var(numeric_only=numeric_only),\n        pandas_groupby.var(numeric_only=numeric_only),\n    )\n\n\ndef eval_len(modin_groupby, pandas_groupby):\n    assert len(modin_groupby) == len(pandas_groupby)\n\n\ndef eval_sum(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.sum(), pandas_groupby.sum())\n\n\ndef eval_ngroup(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.ngroup(), pandas_groupby.ngroup())\n\n\ndef eval_nunique(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.nunique(), pandas_groupby.nunique())\n\n\ndef eval_value_counts(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.value_counts(), pandas_groupby.value_counts())\n\n\ndef eval_median(modin_groupby, pandas_groupby, numeric_only=False):\n    modin_df_almost_equals_pandas(\n        modin_groupby.median(numeric_only=numeric_only),\n        pandas_groupby.median(numeric_only=numeric_only),\n    )\n\n\ndef eval_cumprod(\n    modin_groupby, pandas_groupby, axis=lib.no_default, numeric_only=False\n):\n    df_equals(\n        *sort_if_experimental_groupby(\n            modin_groupby.cumprod(numeric_only=numeric_only),\n            pandas_groupby.cumprod(numeric_only=numeric_only),\n        )\n    )\n    df_equals(\n        *sort_if_experimental_groupby(\n            modin_groupby.cumprod(axis=axis, numeric_only=numeric_only),\n            pandas_groupby.cumprod(axis=axis, numeric_only=numeric_only),\n        )\n    )\n\n\ndef eval_transform(modin_groupby, pandas_groupby, func):\n    df_equals(\n        *sort_if_experimental_groupby(\n            modin_groupby.transform(func), pandas_groupby.transform(func)\n        )\n    )\n\n\ndef eval_fillna(modin_groupby, pandas_groupby):\n    df_equals(\n        *sort_if_experimental_groupby(\n            modin_groupby.fillna(method=\"ffill\"), pandas_groupby.fillna(method=\"ffill\")\n        )\n    )\n\n\ndef eval_count(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.count(), pandas_groupby.count())\n\n\ndef eval_size(modin_groupby, pandas_groupby):\n    df_equals(modin_groupby.size(), pandas_groupby.size())\n\n\ndef eval_pipe(modin_groupby, pandas_groupby, func):\n    df_equals(modin_groupby.pipe(func), pandas_groupby.pipe(func))\n\n\ndef eval_quantile(modin_groupby, pandas_groupby):\n    try:\n        pandas_result = pandas_groupby.quantile(q=0.4, numeric_only=True)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_groupby.quantile(q=0.4, numeric_only=True)\n    else:\n        df_equals(modin_groupby.quantile(q=0.4, numeric_only=True), pandas_result)\n\n\ndef eval___getattr__(modin_groupby, pandas_groupby, item):\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda grp: grp[item].count(),\n        comparator=build_types_asserter(df_equals),\n    )\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda grp: getattr(grp, item).count(),\n        comparator=build_types_asserter(df_equals),\n    )\n\n\ndef eval___getitem__(md_grp, pd_grp, item, expected_exception=None):\n    eval_general(\n        md_grp,\n        pd_grp,\n        lambda grp: grp[item].mean(),\n        comparator=build_types_asserter(df_equals),\n        expected_exception=expected_exception,\n    )\n    eval_general(\n        md_grp,\n        pd_grp,\n        lambda grp: grp[item].count(),\n        comparator=build_types_asserter(df_equals),\n        expected_exception=expected_exception,\n    )\n\n    def build_list_agg(fns):\n        def test(grp):\n            res = grp[item].agg(fns)\n            if res.ndim == 2:\n                # `as_index=False` case\n                new_axis = fns\n                if \"index\" in res.columns:\n                    new_axis = [\"index\"] + new_axis\n                # Modin's frame has an extra level in the result. Alligning columns to compare.\n                # https://github.com/modin-project/modin/issues/3490\n                res = res.set_axis(new_axis, axis=1)\n            return res\n\n        return test\n\n    eval_general(\n        md_grp,\n        pd_grp,\n        build_list_agg([\"mean\"]),\n        comparator=build_types_asserter(df_equals),\n        expected_exception=expected_exception,\n    )\n    eval_general(\n        md_grp,\n        pd_grp,\n        build_list_agg([\"mean\", \"count\"]),\n        comparator=build_types_asserter(df_equals),\n        expected_exception=expected_exception,\n    )\n\n    # Explicit default-to-pandas test\n    eval_general(\n        md_grp,\n        pd_grp,\n        # Defaulting to pandas only for Modin groupby objects\n        lambda grp: (\n            grp[item].sum()\n            if not isinstance(grp, pd.groupby.DataFrameGroupBy)\n            else grp[item]._default_to_pandas(lambda df: df.sum())\n        ),\n        comparator=build_types_asserter(df_equals),\n        expected_exception=expected_exception,\n    )\n\n\ndef eval_groups(modin_groupby, pandas_groupby):\n    for k, v in modin_groupby.groups.items():\n        assert v.equals(pandas_groupby.groups[k])\n    if RangePartitioning.get():\n        # `.get_group()` doesn't work correctly with experimental groupby:\n        # https://github.com/modin-project/modin/issues/6093\n        return\n    for name in pandas_groupby.groups:\n        df_equals(modin_groupby.get_group(name), pandas_groupby.get_group(name))\n\n\ndef eval_shift(modin_groupby, pandas_groupby, comparator=None):\n    if comparator is None:\n\n        def comparator(df1, df2):\n            df_equals(*sort_if_experimental_groupby(df1, df2))\n\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda groupby: groupby.shift(),\n        comparator=comparator,\n    )\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda groupby: groupby.shift(periods=0),\n        comparator=comparator,\n    )\n    eval_general(\n        modin_groupby,\n        pandas_groupby,\n        lambda groupby: groupby.shift(periods=-3),\n        comparator=comparator,\n    )\n\n    # Disabled for `BaseOnPython` because of the issue with `getitem_array`.\n    # groupby.shift internally masks the source frame with a Series boolean mask,\n    # doing so ends up in the `getitem_array` method, that is broken for `BaseOnPython`:\n    # https://github.com/modin-project/modin/issues/3701\n    if get_current_execution() != \"BaseOnPython\" and not current_execution_is_native():\n        if isinstance(pandas_groupby, pandas.core.groupby.DataFrameGroupBy):\n            pandas_res = pandas_groupby.shift(axis=1, fill_value=777)\n            modin_res = modin_groupby.shift(axis=1, fill_value=777)\n            # Pandas produces unexpected index order (pandas GH 44269).\n            # Here we align index of Modin result with pandas to make test passed.\n            import pandas.core.algorithms as algorithms\n\n            indexer, _ = modin_res.index.get_indexer_non_unique(modin_res.index._values)\n            indexer = algorithms.unique1d(indexer)\n            modin_res = modin_res.take(indexer)\n\n            comparator(modin_res, pandas_res)\n        else:\n            eval_general(\n                modin_groupby,\n                pandas_groupby,\n                lambda groupby: groupby.shift(fill_value=777),\n                comparator=comparator,\n            )\n\n\ndef test_groupby_on_index_values_with_loop():\n    length = 2**6\n    data = {\n        \"a\": np.random.randint(0, 100, size=length),\n        \"b\": np.random.randint(0, 100, size=length),\n        \"c\": np.random.randint(0, 100, size=length),\n    }\n    idx = [\"g1\" if i % 3 != 0 else \"g2\" for i in range(length)]\n    modin_df = pd.DataFrame(data, index=idx, columns=list(\"aba\"))\n    pandas_df = pandas.DataFrame(data, index=idx, columns=list(\"aba\"))\n    modin_groupby_obj = modin_df.groupby(modin_df.index)\n    pandas_groupby_obj = pandas_df.groupby(pandas_df.index)\n\n    modin_dict = {k: v for k, v in modin_groupby_obj}\n    pandas_dict = {k: v for k, v in pandas_groupby_obj}\n\n    for k in modin_dict:\n        df_equals(modin_dict[k], pandas_dict[k])\n\n    modin_groupby_obj = modin_df.groupby(modin_df.columns, axis=1)\n    pandas_groupby_obj = pandas_df.groupby(pandas_df.columns, axis=1)\n\n    modin_dict = {k: v for k, v in modin_groupby_obj}\n    pandas_dict = {k: v for k, v in pandas_groupby_obj}\n\n    for k in modin_dict:\n        df_equals(modin_dict[k], pandas_dict[k])\n\n\ndef test_groupby_getitem_preserves_key_order_issue_6154():\n    a = np.tile([\"a\", \"b\", \"c\", \"d\", \"e\"], (1, 10))\n    np.random.shuffle(a[0])\n    df = pd.DataFrame(\n        np.hstack((a.T, np.arange(100).reshape((50, 2)))),\n        columns=[\"col 1\", \"col 2\", \"col 3\"],\n    )\n    eval_general(\n        df, df._to_pandas(), lambda df: df.groupby(\"col 1\")[[\"col 3\", \"col 2\"]].count()\n    )\n\n\n@pytest.mark.parametrize(\n    \"groupby_kwargs\",\n    [\n        pytest.param({\"level\": 1, \"axis\": 1}, id=\"level_idx_axis=1\"),\n        pytest.param({\"level\": 1}, id=\"level_idx\"),\n        pytest.param({\"level\": [1, \"four\"]}, id=\"level_idx+name\"),\n        pytest.param({\"by\": \"four\"}, id=\"level_name\"),\n        pytest.param({\"by\": [\"one\", \"two\"]}, id=\"level_name_multi_by\"),\n        pytest.param({\"by\": [\"item0\", \"one\", \"two\"]}, id=\"col_name+level_name\"),\n    ],\n)\ndef test_groupby_multiindex(groupby_kwargs):\n    frame_data = np.random.randint(0, 100, size=(2**6, 2**6))\n    modin_df = pd.DataFrame(frame_data)\n    pandas_df = pandas.DataFrame(frame_data)\n\n    new_index = pandas.Index([f\"item{i}\" for i in range(len(pandas_df))])\n    new_columns = pandas.MultiIndex.from_tuples(\n        [(i // 4, i // 2, i) for i in modin_df.columns], names=[\"four\", \"two\", \"one\"]\n    )\n    modin_df.columns = new_columns\n    modin_df.index = new_index\n    pandas_df.columns = new_columns\n    pandas_df.index = new_index\n\n    if groupby_kwargs.get(\"axis\", 0) == 0:\n        modin_df = modin_df.T\n        pandas_df = pandas_df.T\n\n    md_grp, pd_grp = (\n        modin_df.groupby(**groupby_kwargs),\n        pandas_df.groupby(**groupby_kwargs),\n    )\n    modin_groupby_equals_pandas(md_grp, pd_grp)\n    df_equals(md_grp.sum(), pd_grp.sum())\n    df_equals(md_grp.size(), pd_grp.size())\n    # Grouping on level works incorrect in case of aggregation:\n    # https://github.com/modin-project/modin/issues/2912\n    # df_equals(md_grp.quantile(), pd_grp.quantile())\n    df_equals(md_grp.first(), pd_grp.first())\n\n\n@pytest.mark.parametrize(\"dropna\", [True, False])\n@pytest.mark.parametrize(\n    \"groupby_kwargs\",\n    [\n        pytest.param({\"level\": 1, \"axis\": 1}, id=\"level_idx_axis=1\"),\n        pytest.param({\"level\": 1}, id=\"level_idx\"),\n        pytest.param({\"level\": [1, \"four\"]}, id=\"level_idx+name\"),\n        pytest.param({\"by\": \"four\"}, id=\"level_name\"),\n        pytest.param({\"by\": [\"one\", \"two\"]}, id=\"level_name_multi_by\"),\n        pytest.param(\n            {\"by\": [\"item0\", \"one\", \"two\"]},\n            id=\"col_name+level_name\",\n        ),\n        pytest.param(\n            {\"by\": [\"item0\"]},\n            id=\"col_name\",\n        ),\n        pytest.param(\n            {\"by\": [\"item0\", \"item1\"]},\n            id=\"col_name_multi_by\",\n        ),\n    ],\n)\ndef test_groupby_with_kwarg_dropna(groupby_kwargs, dropna):\n    modin_df = pd.DataFrame(test_data[\"float_nan_data\"])\n    pandas_df = pandas.DataFrame(test_data[\"float_nan_data\"])\n\n    new_index = pandas.Index([f\"item{i}\" for i in range(len(pandas_df))])\n    new_columns = pandas.MultiIndex.from_tuples(\n        [(i // 4, i // 2, i) for i in range(len(modin_df.columns))],\n        names=[\"four\", \"two\", \"one\"],\n    )\n    modin_df.columns = new_columns\n    modin_df.index = new_index\n    pandas_df.columns = new_columns\n    pandas_df.index = new_index\n\n    if groupby_kwargs.get(\"axis\", 0) == 0:\n        modin_df = modin_df.T\n        pandas_df = pandas_df.T\n\n    md_grp, pd_grp = (\n        modin_df.groupby(**groupby_kwargs, dropna=dropna),\n        pandas_df.groupby(**groupby_kwargs, dropna=dropna),\n    )\n    modin_groupby_equals_pandas(md_grp, pd_grp)\n\n    by_kwarg = groupby_kwargs.get(\"by\", [])\n    # Disabled because of broken `dropna=False` for TreeReduce implemented aggs:\n    # https://github.com/modin-project/modin/issues/3817\n    if not (\n        not dropna\n        and len(by_kwarg) > 1\n        and any(col in modin_df.columns for col in by_kwarg)\n    ):\n        df_equals(md_grp.sum(), pd_grp.sum())\n        df_equals(md_grp.size(), pd_grp.size())\n    # Grouping on level works incorrect in case of aggregation:\n    # https://github.com/modin-project/modin/issues/2912\n    # \"BaseOnPython\" tests are disabled because of the bug:\n    # https://github.com/modin-project/modin/issues/3827\n    if (\n        get_current_execution() != \"BaseOnPython\"\n        and not current_execution_is_native()\n        and any(col in modin_df.columns for col in by_kwarg)\n    ):\n        df_equals(md_grp.quantile(), pd_grp.quantile())\n    # Default-to-pandas tests are disabled for multi-column 'by' because of the bug:\n    # https://github.com/modin-project/modin/issues/3827\n    if not (not dropna and len(by_kwarg) > 1):\n        df_equals(md_grp.first(), pd_grp.first())\n        df_equals(md_grp._default_to_pandas(lambda df: df.sum()), pd_grp.sum())\n\n\n@pytest.mark.parametrize(\"groupby_axis\", [lib.no_default, 1])\n@pytest.mark.parametrize(\"shift_axis\", [lib.no_default, 1])\n@pytest.mark.parametrize(\"groupby_sort\", [True, False])\ndef test_shift_freq(groupby_axis, shift_axis, groupby_sort):\n    pandas_df = pandas.DataFrame(\n        {\n            \"col1\": [1, 0, 2, 3],\n            \"col2\": [4, 5, np.nan, 7],\n            \"col3\": [np.nan, np.nan, 12, 10],\n            \"col4\": [17, 13, 16, 15],\n        }\n    )\n    modin_df = from_pandas(pandas_df)\n\n    new_index = pandas.date_range(\"1/12/2020\", periods=4, freq=\"s\")\n    if groupby_axis == 0 and shift_axis == 0:\n        pandas_df.index = modin_df.index = new_index\n        by = [[\"col2\", \"col3\"], [\"col2\"], [\"col4\"], [0, 1, 0, 2]]\n    else:\n        pandas_df.index = modin_df.index = new_index\n        pandas_df.columns = modin_df.columns = new_index\n        by = [[0, 1, 0, 2]]\n\n    for _by in by:\n        pandas_groupby = pandas_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)\n        modin_groupby = modin_df.groupby(by=_by, axis=groupby_axis, sort=groupby_sort)\n        eval_general(\n            modin_groupby,\n            pandas_groupby,\n            lambda groupby: groupby.shift(axis=shift_axis, freq=\"s\"),\n        )\n\n\n@pytest.mark.parametrize(\n    \"by_and_agg_dict\",\n    [\n        {\n            \"by\": [\n                list(test_data[\"int_data\"].keys())[0],\n                list(test_data[\"int_data\"].keys())[1],\n            ],\n            \"agg_dict\": {\n                \"max\": (list(test_data[\"int_data\"].keys())[2], np.max),\n                \"min\": (list(test_data[\"int_data\"].keys())[2], np.min),\n            },\n        },\n        {\n            \"by\": [\"col1\"],\n            \"agg_dict\": {\n                \"max\": (list(test_data[\"int_data\"].keys())[0], np.max),\n                \"min\": (list(test_data[\"int_data\"].keys())[-1], np.min),\n            },\n        },\n        {\n            \"by\": [\n                list(test_data[\"int_data\"].keys())[0],\n                list(test_data[\"int_data\"].keys())[-1],\n            ],\n            \"agg_dict\": {\n                \"max\": (list(test_data[\"int_data\"].keys())[1], max),\n                \"min\": (list(test_data[\"int_data\"].keys())[-2], min),\n            },\n        },\n        pytest.param(\n            {\n                \"by\": [\n                    list(test_data[\"int_data\"].keys())[0],\n                    list(test_data[\"int_data\"].keys())[-1],\n                ],\n                \"agg_dict\": {\n                    \"max\": (list(test_data[\"int_data\"].keys())[1], max),\n                    \"min\": (list(test_data[\"int_data\"].keys())[-1], min),\n                },\n            },\n            marks=pytest.mark.skip(\"See Modin issue #3602\"),\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"as_index\", [True, False])\ndef test_agg_func_None_rename(by_and_agg_dict, as_index):\n    modin_df, pandas_df = create_test_dfs(test_data[\"int_data\"])\n\n    modin_result = modin_df.groupby(by_and_agg_dict[\"by\"], as_index=as_index).agg(\n        **by_and_agg_dict[\"agg_dict\"]\n    )\n    pandas_result = pandas_df.groupby(by_and_agg_dict[\"by\"], as_index=as_index).agg(\n        **by_and_agg_dict[\"agg_dict\"]\n    )\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"as_index\",\n    [\n        True,\n        pytest.param(\n            False,\n            marks=pytest.mark.skipif(\n                get_current_execution() == \"BaseOnPython\"\n                or RangePartitioning.get()\n                or current_execution_is_native(),\n                reason=\"See Pandas issue #39103\",\n            ),\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"by_length\", [1, 3])\n@pytest.mark.parametrize(\n    \"agg_fns\",\n    [[\"sum\", \"min\", \"max\"], [\"mean\", \"quantile\"]],\n    ids=[\"reduce\", \"aggregation\"],\n)\n@pytest.mark.parametrize(\n    \"intersection_with_by_cols\",\n    [pytest.param(True, marks=pytest.mark.skip(\"See Modin issue #3602\")), False],\n)\ndef test_dict_agg_rename_mi_columns(\n    as_index, by_length, agg_fns, intersection_with_by_cols\n):\n    md_df, pd_df = create_test_dfs(test_data[\"int_data\"])\n    mi_columns = generate_multiindex(len(md_df.columns), nlevels=4)\n\n    md_df.columns, pd_df.columns = mi_columns, mi_columns\n\n    by = list(md_df.columns[:by_length])\n    agg_cols = (\n        list(md_df.columns[by_length - 1 : by_length + 2])\n        if intersection_with_by_cols\n        else list(md_df.columns[by_length : by_length + 3])\n    )\n\n    agg_dict = {\n        f\"custom-{i}\" + str(agg_fns[i % len(agg_fns)]): (col, agg_fns[i % len(agg_fns)])\n        for i, col in enumerate(agg_cols)\n    }\n\n    md_res = md_df.groupby(by, as_index=as_index).agg(**agg_dict)\n    pd_res = pd_df.groupby(by, as_index=as_index).agg(**agg_dict)\n\n    df_equals(md_res, pd_res)\n\n\ndef test_agg_4604():\n    data = {\"col1\": [1, 2], \"col2\": [3, 4]}\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n    # add another partition\n    modin_df[\"col3\"] = modin_df[\"col1\"]\n    pandas_df[\"col3\"] = pandas_df[\"col1\"]\n\n    # problem only with custom aggregation function\n    def col3(x):\n        return np.max(x)\n\n    by = [\"col1\"]\n    agg_func = {\"col2\": [\"sum\", \"min\"], \"col3\": col3}\n\n    modin_groupby, pandas_groupby = modin_df.groupby(by), pandas_df.groupby(by)\n    eval_agg(modin_groupby, pandas_groupby, agg_func)\n\n\n@pytest.mark.parametrize(\n    \"operation\",\n    [\n        \"quantile\",\n        \"mean\",\n        \"sum\",\n        \"median\",\n        \"cumprod\",\n    ],\n)\ndef test_agg_exceptions(operation):\n    N = 256\n    fill_data = [\n        (\n            \"nan_column\",\n            [\n                np.datetime64(\"2010\"),\n                None,\n                np.datetime64(\"2007\"),\n                np.datetime64(\"2010\"),\n                np.datetime64(\"2006\"),\n                np.datetime64(\"2012\"),\n                None,\n                np.datetime64(\"2011\"),\n            ]\n            * (N // 8),\n        ),\n        (\n            \"date_column\",\n            [\n                np.datetime64(\"2010\"),\n                np.datetime64(\"2011\"),\n                np.datetime64(\"2011-06-15T00:00\"),\n                np.datetime64(\"2009-01-01\"),\n            ]\n            * (N // 4),\n        ),\n    ]\n\n    data1 = {\n        \"column_to_by\": [\"foo\", \"bar\", \"baz\", \"bar\"] * (N // 4),\n        # Earlier, the type of this column was `object`. In such a situation,\n        # when performing aggregation on different column partitions, different\n        # exceptions were thrown. The exception that engines return to the main\n        # process was non-deterministic, either `TypeError` or `NotImplementedError`.\n        \"nan_column\": [np.nan] * N,\n    }\n\n    data2 = {\n        f\"{key}{i}\": value\n        for key, value in fill_data\n        for i in range(N // len(fill_data))\n    }\n\n    data = {**data1, **data2}\n\n    def comparator(df1, df2):\n        from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy\n\n        if GroupBy.is_transformation_kernel(operation):\n            df1, df2 = sort_if_experimental_groupby(df1, df2)\n\n        df_equals(df1, df2)\n\n    expected_exception = None\n    if operation == \"sum\":\n        expected_exception = TypeError(\n            \"datetime64 type does not support sum operations\"\n        )\n    elif operation == \"cumprod\":\n        expected_exception = TypeError(\n            \"datetime64 type does not support cumprod operations\"\n        )\n    eval_aggregation(\n        *create_test_dfs(data),\n        operation=operation,\n        comparator=comparator,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.skip(\n    \"Pandas raises a ValueError on empty dictionary aggregation since 1.2.0\"\n    + \"It's unclear is that was made on purpose or it is a bug. That question\"\n    + \"was asked in https://github.com/pandas-dev/pandas/issues/39609.\"\n    + \"So until the answer this test is disabled.\"\n)\n@pytest.mark.parametrize(\n    \"kwargs\",\n    [\n        {\n            \"Max\": (\"cnt\", np.max),\n            \"Sum\": (\"cnt\", np.sum),\n            \"Num\": (\"c\", pd.Series.nunique),\n            \"Num1\": (\"c\", pandas.Series.nunique),\n        },\n        {\n            \"func\": {\n                \"Max\": (\"cnt\", np.max),\n                \"Sum\": (\"cnt\", np.sum),\n                \"Num\": (\"c\", pd.Series.nunique),\n                \"Num1\": (\"c\", pandas.Series.nunique),\n            }\n        },\n    ],\n)\ndef test_to_pandas_convertion(kwargs):\n    data = {\"a\": [1, 2], \"b\": [3, 4], \"c\": [5, 6]}\n    by = [\"a\", \"b\"]\n\n    eval_aggregation(*create_test_dfs(data), by=by, **kwargs)\n\n\n@pytest.mark.parametrize(\n    # When True, do df[name], otherwise just use name\n    \"columns\",\n    [\n        [(False, \"a\"), (False, \"b\"), (False, \"c\")],\n        [(False, \"a\"), (False, \"b\")],\n        [(True, \"b\"), (True, \"a\"), (True, \"c\")],\n        [(True, \"a\"), (True, \"b\")],\n        [(True, \"c\"), (False, \"a\"), (False, \"b\")],\n        [(False, \"a\"), (True, \"c\")],\n    ],\n)\n@pytest.mark.parametrize(\"drop_from_original_df\", [True, False])\n@pytest.mark.parametrize(\"as_index\", [True, False])\ndef test_mixed_columns(columns, drop_from_original_df, as_index):\n    data = {\n        \"a\": [1, 1, 2, 2] * 64,\n        \"b\": [11, 11, 22, 22] * 64,\n        \"c\": [111, 111, 222, 222] * 64,\n        \"data\": [1, 2, 3, 4] * 64,\n    }\n\n    md_df, pd_df = create_test_dfs(data)\n    md_df, md_by = get_external_groupers(md_df, columns, drop_from_original_df)\n    pd_df, pd_by = get_external_groupers(pd_df, columns, drop_from_original_df)\n\n    md_grp = md_df.groupby(md_by, as_index=as_index)\n    pd_grp = pd_df.groupby(pd_by, as_index=as_index)\n\n    df_equals(md_grp.size(), pd_grp.size())\n    df_equals(md_grp.sum(), pd_grp.sum())\n    df_equals(\n        md_grp.apply(lambda df: df.sum(), include_groups=False),\n        pd_grp.apply(lambda df: df.sum(), include_groups=False),\n    )\n\n\n@pytest.mark.parametrize(\"as_index\", [True, False])\ndef test_groupby_external_grouper_duplicated_names(as_index):\n    data = {\n        \"a\": [1, 1, 2, 2] * 64,\n        \"b\": [11, 11, 22, 22] * 64,\n        \"c\": [111, 111, 222, 222] * 64,\n        \"data\": [1, 2, 3, 4] * 64,\n    }\n\n    md_df, pd_df = create_test_dfs(data)\n\n    md_unnamed_series1, pd_unnamed_series1 = create_test_series([1, 1, 2, 2] * 64)\n    md_unnamed_series2, pd_unnamed_series2 = create_test_series([10, 10, 20, 20] * 64)\n\n    md_grp = md_df.groupby([md_unnamed_series1, md_unnamed_series2], as_index=as_index)\n    pd_grp = pd_df.groupby([pd_unnamed_series1, pd_unnamed_series2], as_index=as_index)\n\n    df_equals(md_grp.sum(), pd_grp.sum())\n\n    md_same_named_series1, pd_same_named_series1 = create_test_series(\n        [1, 1, 2, 2] * 64, name=\"series_name\"\n    )\n    md_same_named_series2, pd_same_named_series2 = create_test_series(\n        [10, 10, 20, 20] * 64, name=\"series_name\"\n    )\n\n    md_grp = md_df.groupby(\n        [md_same_named_series1, md_same_named_series2], as_index=as_index\n    )\n    pd_grp = pd_df.groupby(\n        [pd_same_named_series1, pd_same_named_series2], as_index=as_index\n    )\n\n    df_equals(md_grp.sum(), pd_grp.sum())\n\n\n@pytest.mark.parametrize(\n    # When True, use (df[name] + 1), otherwise just use name\n    \"columns\",\n    [\n        [(True, \"a\"), (True, \"b\"), (True, \"c\")],\n        [(True, \"a\"), (True, \"b\")],\n        [(False, \"a\"), (False, \"b\"), (True, \"c\")],\n        [(False, \"a\"), (True, \"c\")],\n        [(False, \"a\"), (True, \"c\"), (False, [1, 1, 2])],\n        [(False, \"a\"), (False, \"b\"), (False, \"c\")],\n        [(False, \"a\"), (False, \"b\"), (False, \"c\"), (False, [1, 1, 2])],\n    ],\n)\ndef test_internal_by_detection(columns):\n    data = {\"a\": [1, 1, 2], \"b\": [11, 11, 22], \"c\": [111, 111, 222]}\n\n    md_df = pd.DataFrame(data)\n    _, by = get_external_groupers(md_df, columns, add_plus_one=True)\n    md_grp = md_df.groupby(by)\n\n    ref = frozenset(\n        col for is_lookup, col in columns if not is_lookup and hashable(col)\n    )\n    exp = frozenset(md_grp._internal_by)\n\n    assert ref == exp\n\n\n@pytest.mark.parametrize(\n    # When True, use (df[name] + 1), otherwise just use name\n    \"columns\",\n    [\n        [(True, \"a\"), (True, \"b\"), (True, \"c\")],\n        [(True, \"a\"), (True, \"b\")],\n        [(False, \"a\"), (False, \"b\"), (True, \"c\")],\n        [(False, \"a\"), (True, \"c\")],\n        [(False, \"a\"), (True, \"c\"), (False, [1, 1, 2])],\n    ],\n)\n@pytest.mark.parametrize(\"as_index\", [True, False])\ndef test_mixed_columns_not_from_df(columns, as_index):\n    \"\"\"\n    Unlike the previous test, in this case the Series is not just a column from\n    the original DataFrame, so you can't use a fasttrack.\n    \"\"\"\n    data = {\"a\": [1, 1, 2], \"b\": [11, 11, 22], \"c\": [111, 111, 222]}\n    groupby_kw = {\"as_index\": as_index}\n\n    md_df, pd_df = create_test_dfs(data)\n    (_, by_md), (_, by_pd) = map(\n        lambda df: get_external_groupers(df, columns, add_plus_one=True), [md_df, pd_df]\n    )\n\n    pd_grp = pd_df.groupby(by_pd, **groupby_kw)\n    md_grp = md_df.groupby(by_md, **groupby_kw)\n\n    modin_groupby_equals_pandas(md_grp, pd_grp)\n    eval_general(md_grp, pd_grp, lambda grp: grp.size())\n    eval_general(\n        md_grp, pd_grp, lambda grp: grp.apply(lambda df: df.sum(), include_groups=False)\n    )\n    eval_general(md_grp, pd_grp, lambda grp: grp.first())\n\n\n@pytest.mark.parametrize(\n    # When True, do df[obj], otherwise just use the obj\n    \"columns\",\n    [\n        [(False, \"a\")],\n        [(False, \"a\"), (False, \"b\"), (False, \"c\")],\n        [(False, \"a\"), (False, \"b\")],\n        [(False, \"b\"), (False, \"a\")],\n        [(True, \"a\"), (True, \"b\"), (True, \"c\")],\n        [(True, \"a\"), (True, \"b\")],\n        [(False, \"a\"), (False, \"b\"), (True, \"c\")],\n        [(False, \"a\"), (True, \"c\")],\n        [(False, \"a\"), (False, pd.Series([5, 6, 7, 8]))],\n    ],\n)\ndef test_unknown_groupby(columns):\n    data = {\"b\": [11, 11, 22, 200], \"c\": [111, 111, 222, 7000]}\n    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)\n\n    with pytest.raises(KeyError):\n        pandas_df.groupby(by=get_external_groupers(pandas_df, columns)[1])\n    with pytest.raises(KeyError):\n        modin_df.groupby(by=get_external_groupers(modin_df, columns)[1])\n\n\n@pytest.mark.parametrize(\n    \"func_to_apply\",\n    [\n        lambda df: df.sum(),\n        lambda df: df.size(),\n        lambda df: df.quantile(),\n        lambda df: df.dtypes,\n        lambda df: df.apply(lambda df: df.sum()),\n        pytest.param(\n            lambda df: df.apply(lambda df: pandas.Series([1, 2, 3, 4])),\n            marks=pytest.mark.skip(\"See modin issue #2511\"),\n        ),\n        lambda grp: grp.agg(\n            {\n                list(test_data_values[0].keys())[1]: (max, min, sum),\n                list(test_data_values[0].keys())[-2]: (sum, min, max),\n            }\n        ),\n        lambda grp: grp.agg(\n            {\n                list(test_data_values[0].keys())[1]: [\n                    (\"new_sum\", \"sum\"),\n                    (\"new_min\", \"min\"),\n                ],\n                list(test_data_values[0].keys())[-2]: np.sum,\n            }\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: [\n                        (\"new_sum\", \"sum\"),\n                        (\"new_mean\", \"mean\"),\n                    ],\n                    list(test_data_values[0].keys())[-2]: \"skew\",\n                }\n            ),\n            id=\"renaming_aggs_at_different_partitions\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: [\n                        (\"new_sum\", \"sum\"),\n                        (\"new_mean\", \"mean\"),\n                    ],\n                    list(test_data_values[0].keys())[2]: \"skew\",\n                }\n            ),\n            id=\"renaming_aggs_at_same_partition\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: \"mean\",\n                    list(test_data_values[0].keys())[-2]: \"skew\",\n                }\n            ),\n            id=\"custom_aggs_at_different_partitions\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: \"mean\",\n                    list(test_data_values[0].keys())[2]: \"skew\",\n                }\n            ),\n            id=\"custom_aggs_at_same_partition\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: \"mean\",\n                    list(test_data_values[0].keys())[-2]: \"sum\",\n                }\n            ),\n            id=\"native_and_custom_aggs_at_different_partitions\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: \"mean\",\n                    list(test_data_values[0].keys())[2]: \"sum\",\n                }\n            ),\n            id=\"native_and_custom_aggs_at_same_partition\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: (max, \"mean\", sum),\n                    list(test_data_values[0].keys())[-1]: (sum, \"skew\", max),\n                }\n            ),\n            id=\"Agg_and_by_intersection_TreeReduce_implementation\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg(\n                {\n                    list(test_data_values[0].keys())[1]: (max, \"mean\", \"nunique\"),\n                    list(test_data_values[0].keys())[-1]: (sum, min, max),\n                }\n            ),\n            id=\"Agg_and_by_intersection_FullAxis_implementation\",\n        ),\n        pytest.param(\n            lambda grp: grp.agg({list(test_data_values[0].keys())[0]: \"count\"}),\n            id=\"Agg_and_by_intersection_issue_3376\",\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"as_index\", [True, False])\n@pytest.mark.parametrize(\"by_length\", [1, 2])\n@pytest.mark.parametrize(\n    \"categorical_by\",\n    [pytest.param(True, marks=pytest.mark.skip(\"See modin issue #2513\")), False],\n)\ndef test_multi_column_groupby_different_partitions(\n    func_to_apply, as_index, by_length, categorical_by, request\n):\n    if (\n        not categorical_by\n        and by_length == 1\n        and \"custom_aggs_at_same_partition\" in request.node.name\n        or \"renaming_aggs_at_same_partition\" in request.node.name\n    ):\n        pytest.xfail(\n            \"After upgrade to pandas 2.1 skew results are different: AssertionError: 1.0 >= 0.0001.\"\n            + \" See https://github.com/modin-project/modin/issues/6530 for details.\"\n        )\n    data = test_data_values[0]\n    md_df, pd_df = create_test_dfs(data)\n\n    by = [pd_df.columns[-i if i % 2 else i] for i in range(by_length)]\n\n    if categorical_by:\n        md_df = md_df.astype({by[0]: \"category\"})\n        pd_df = pd_df.astype({by[0]: \"category\"})\n\n    md_grp, pd_grp = (\n        md_df.groupby(by, as_index=as_index),\n        pd_df.groupby(by, as_index=as_index),\n    )\n    eval_general(\n        md_grp,\n        pd_grp,\n        func_to_apply,\n        # 'skew' and 'mean' results are not 100% equal to pandas as they use\n        # different formulas and so precision errors come into play. Thus\n        # using a custom comparator that allows slight numeric deviations.\n        comparator=try_modin_df_almost_equals_compare,\n    )\n    # FIXME: https://github.com/modin-project/modin/issues/7034\n    eval___getitem__(md_grp, pd_grp, md_df.columns[1], expected_exception=False)\n    # FIXME: https://github.com/modin-project/modin/issues/7034\n    eval___getitem__(\n        md_grp, pd_grp, [md_df.columns[1], md_df.columns[2]], expected_exception=False\n    )\n\n\ndef test_empty_partitions_after_groupby():\n    def func_to_apply(grp):\n        return grp.agg(\n            {\n                list(test_data_values[0].keys())[1]: \"sum\",\n                list(test_data_values[0].keys())[-1]: \"sum\",\n            }\n        )\n\n    data = test_data_values[0]\n    md_df, pd_df = create_test_dfs(data)\n    by = pd_df.columns[0]\n\n    with context(DynamicPartitioning=True):\n        md_grp, pd_grp = (\n            md_df.groupby(by),\n            pd_df.groupby(by),\n        )\n        eval_general(\n            md_grp,\n            pd_grp,\n            func_to_apply,\n        )\n\n\n@pytest.mark.parametrize(\n    \"by\",\n    [\n        0,\n        1.5,\n        \"str\",\n        pandas.Timestamp(\"2020-02-02\"),\n        [0, \"str\"],\n        [pandas.Timestamp(\"2020-02-02\"), 1.5],\n    ],\n)\n@pytest.mark.parametrize(\"as_index\", [True, False])\ndef test_not_str_by(by, as_index):\n    columns = pandas.Index([0, 1.5, \"str\", pandas.Timestamp(\"2020-02-02\")])\n    data = {col: np.arange(5) for col in columns}\n    md_df, pd_df = create_test_dfs(data)\n\n    md_grp, pd_grp = (\n        md_df.groupby(by, as_index=as_index),\n        pd_df.groupby(by, as_index=as_index),\n    )\n\n    modin_groupby_equals_pandas(md_grp, pd_grp)\n    eval_general(md_grp, pd_grp, lambda grp: grp.sum())\n    eval_general(md_grp, pd_grp, lambda grp: grp.size())\n    eval_general(md_grp, pd_grp, lambda grp: grp.agg(lambda df: df.mean()))\n    eval_general(md_grp, pd_grp, lambda grp: grp.dtypes)\n    eval_general(md_grp, pd_grp, lambda grp: grp.first())\n\n\n@pytest.mark.parametrize(\"internal_by_length\", [0, 1, 2])\n@pytest.mark.parametrize(\"external_by_length\", [0, 1, 2])\n@pytest.mark.parametrize(\"has_categorical_by\", [True, False])\n@pytest.mark.parametrize(\n    \"agg_func\",\n    [\n        pytest.param(\n            lambda grp: grp.apply(lambda df: df.dtypes), id=\"modin_dtypes_impl\"\n        ),\n        pytest.param(\n            lambda grp: grp.apply(lambda df: df.sum(numeric_only=True)), id=\"apply_sum\"\n        ),\n        pytest.param(lambda grp: grp.count(), id=\"count\"),\n        pytest.param(lambda grp: grp.nunique(), id=\"nunique\"),\n        # Integer key means the index of the column to replace it with.\n        # 0 and -1 are considered to be the indices of the columns to group on.\n        pytest.param({1: \"sum\", 2: \"nunique\"}, id=\"dict_agg_no_intersection_with_by\"),\n        pytest.param(\n            {0: \"mean\", 1: \"sum\", 2: \"nunique\"},\n            id=\"dict_agg_has_intersection_with_by\",\n        ),\n        pytest.param(\n            {1: \"sum\", 2: \"nunique\", -1: \"nunique\"},\n            id=\"dict_agg_has_intersection_with_categorical_by\",\n        ),\n    ],\n)\n# There are two versions of the `handle_as_index` method: the one accepting pandas.DataFrame from\n# the execution kernel and backend agnostic. This parameter indicates which one implementation to use.\n@pytest.mark.parametrize(\"use_backend_agnostic_method\", [True, False])\ndef test_handle_as_index(\n    internal_by_length,\n    external_by_length,\n    has_categorical_by,\n    agg_func,\n    use_backend_agnostic_method,\n    request,\n):\n    \"\"\"\n    Test ``modin.core.dataframe.algebra.default2pandas.groupby.GroupBy.handle_as_index``.\n\n    The role of the ``handle_as_index`` method is to build a groupby result considering\n    ``as_index=False`` from the result that was computed with ``as_index=True``.\n\n    So the testing flow is the following:\n        1. Compute GroupBy result with the ``as_index=True`` parameter via Modin.\n        2. Build ``as_index=False`` result from the ``as_index=True`` using ``handle_as_index`` method.\n        3. Compute GroupBy result with the ``as_index=False`` parameter via pandas as the reference result.\n        4. Compare the result from the second step with the reference.\n    \"\"\"\n    by_length = internal_by_length + external_by_length\n    if by_length == 0:\n        pytest.skip(\"No keys to group on were passed, skipping the test.\")\n\n    if (\n        has_categorical_by\n        and by_length > 1\n        and (\n            isinstance(agg_func, dict)\n            or (\"nunique\" in request.node.callspec.id.split(\"-\"))\n        )\n    ):\n        pytest.skip(\n            \"The linked bug makes pandas raise an exception when 'by' is categorical: \"\n            + \"https://github.com/pandas-dev/pandas/issues/36698\"\n        )\n\n    df = pandas.DataFrame(test_groupby_data)\n    external_by_cols = GroupBy.validate_by(df.add_prefix(\"external_\"))\n\n    if has_categorical_by:\n        df = df.astype({df.columns[-1]: \"category\"})\n\n    if isinstance(agg_func, dict):\n        agg_func = {df.columns[key]: value for key, value in agg_func.items()}\n        selection = list(agg_func.keys())\n        agg_dict = agg_func\n        agg_func = lambda grp: grp.agg(agg_dict)  # noqa: E731 (lambda assignment)\n    else:\n        selection = None\n\n    # Selecting 'by' columns from both sides of the frame so they located in different partitions\n    internal_by = df.columns[\n        range(-internal_by_length // 2, internal_by_length // 2)\n    ].tolist()\n    external_by = external_by_cols[:external_by_length]\n\n    pd_by = internal_by + external_by\n    md_by = internal_by + [pd.Series(ser) for ser in external_by]\n\n    grp_result = pd.DataFrame(df).groupby(md_by, as_index=True)\n    grp_reference = df.groupby(pd_by, as_index=False)\n\n    agg_result = agg_func(grp_result)\n    agg_reference = agg_func(grp_reference)\n\n    if use_backend_agnostic_method:\n        reset_index, drop, lvls_to_drop, cols_to_drop = GroupBy.handle_as_index(\n            result_cols=agg_result.columns,\n            result_index_names=agg_result.index.names,\n            internal_by_cols=internal_by,\n            by_cols_dtypes=df[internal_by].dtypes.values,\n            by_length=len(md_by),\n            selection=selection,\n            drop=len(internal_by) != 0,\n        )\n\n        if len(lvls_to_drop) > 0:\n            agg_result.index = agg_result.index.droplevel(lvls_to_drop)\n        if len(cols_to_drop) > 0:\n            agg_result = agg_result.drop(columns=cols_to_drop)\n        if reset_index:\n            agg_result = agg_result.reset_index(drop=drop)\n    else:\n        GroupBy.handle_as_index_for_dataframe(\n            result=agg_result,\n            internal_by_cols=internal_by,\n            by_cols_dtypes=df[internal_by].dtypes.values,\n            by_length=len(md_by),\n            selection=selection,\n            drop=len(internal_by) != 0,\n            inplace=True,\n        )\n\n    df_equals(agg_result, agg_reference)\n\n\ndef test_validate_by():\n    \"\"\"Test ``modin.core.dataframe.algebra.default2pandas.groupby.GroupBy.validate_by``.\"\"\"\n\n    def compare(obj1, obj2):\n        assert type(obj1) is type(\n            obj2\n        ), f\"Both objects must be instances of the same type: {type(obj1)} != {type(obj2)}.\"\n        if isinstance(obj1, list):\n            for val1, val2 in itertools.zip_longest(obj1, obj2):\n                df_equals(val1, val2)\n        else:\n            df_equals(obj1, obj2)\n\n    # This emulates situation when the Series's query compiler being passed as a 'by':\n    #   1. The Series at the QC level is represented as a single-column frame with the `MODIN_UNNAMED_SERIES_LABEL` columns.\n    #   2. The valid representation of such QC is an unnamed Series.\n    reduced_frame = pandas.DataFrame({MODIN_UNNAMED_SERIES_LABEL: [1, 2, 3]})\n    series_result = GroupBy.validate_by(reduced_frame)\n    series_reference = [pandas.Series([1, 2, 3], name=None)]\n    compare(series_reference, series_result)\n\n    # This emulates situation when several 'by' columns of the group frame are passed as a single QueryCompiler:\n    #   1. If grouping on several columns the 'by' at the QC level is the following: ``df[by]._query_compiler``.\n    #   2. The valid representation of such QC is a list of Series.\n    splited_df = [pandas.Series([1, 2, 3], name=f\"col{i}\") for i in range(3)]\n    splited_df_result = GroupBy.validate_by(\n        pandas.concat(splited_df, axis=1, copy=True)\n    )\n    compare(splited_df, splited_df_result)\n\n    # This emulates situation of mixed by (two column names and an external Series):\n    by = [\"col1\", \"col2\", pandas.DataFrame({MODIN_UNNAMED_SERIES_LABEL: [1, 2, 3]})]\n    result_by = GroupBy.validate_by(by)\n    reference_by = [\"col1\", \"col2\", pandas.Series([1, 2, 3], name=None)]\n    compare(reference_by, result_by)\n\n\n@pytest.mark.skipif(\n    get_current_execution() == \"BaseOnPython\" or current_execution_is_native(),\n    reason=\"The test only make sense for partitioned executions\",\n)\ndef test_groupby_with_virtual_partitions():\n    # from https://github.com/modin-project/modin/issues/4464\n    modin_df, pandas_df = create_test_dfs(test_data[\"int_data\"])\n\n    # Concatenate DataFrames here to make virtual partitions.\n    big_modin_df = pd.concat([modin_df for _ in range(5)])\n    big_pandas_df = pandas.concat([pandas_df for _ in range(5)])\n\n    # Check that the constructed Modin DataFrame has virtual partitions when\n    assert issubclass(\n        type(big_modin_df._query_compiler._modin_frame._partitions[0][0]),\n        PandasDataframeAxisPartition,\n    )\n    eval_general(\n        big_modin_df, big_pandas_df, lambda df: df.groupby(df.columns[0]).count()\n    )\n\n\n@pytest.mark.parametrize(\"sort\", [True, False])\n@pytest.mark.parametrize(\"is_categorical_by\", [True, False])\ndef test_groupby_sort(sort, is_categorical_by):\n    # from issue #3571\n    by = np.array([\"a\"] * 50000 + [\"b\"] * 10000 + [\"c\"] * 1000)\n    random_state = np.random.RandomState(seed=42)\n    random_state.shuffle(by)\n\n    data = {\"key_col\": by, \"data_col\": np.arange(len(by))}\n    md_df, pd_df = create_test_dfs(data)\n\n    if is_categorical_by:\n        md_df = md_df.astype({\"key_col\": \"category\"})\n        pd_df = pd_df.astype({\"key_col\": \"category\"})\n\n    md_grp = md_df.groupby(\"key_col\", sort=sort)\n    pd_grp = pd_df.groupby(\"key_col\", sort=sort)\n\n    modin_groupby_equals_pandas(md_grp, pd_grp)\n    eval_general(md_grp, pd_grp, lambda grp: grp.sum(numeric_only=True))\n    eval_general(md_grp, pd_grp, lambda grp: grp.size())\n    eval_general(md_grp, pd_grp, lambda grp: grp.agg(lambda df: df.mean()))\n    eval_general(md_grp, pd_grp, lambda grp: grp.dtypes)\n    eval_general(md_grp, pd_grp, lambda grp: grp.first())\n\n\ndef test_groupby_with_frozenlist():\n    pandas_df = pandas.DataFrame(data={\"a\": [1, 2, 3], \"b\": [1, 2, 3], \"c\": [1, 2, 3]})\n    pandas_df = pandas_df.set_index([\"a\", \"b\"])\n    modin_df = from_pandas(pandas_df)\n    eval_general(modin_df, pandas_df, lambda df: df.groupby(df.index.names).count())\n\n\n@pytest.mark.parametrize(\n    \"by_func\",\n    [\n        lambda df: \"timestamp0\",\n        lambda df: [\"timestamp0\", \"timestamp1\"],\n        lambda df: [\"timestamp0\", df[\"timestamp1\"]],\n    ],\n)\ndef test_mean_with_datetime(by_func):\n    data = {\n        \"timestamp0\": [pd.to_datetime(1490195805, unit=\"s\")],\n        \"timestamp1\": [pd.to_datetime(1490195805, unit=\"s\")],\n        \"numeric\": [0],\n    }\n\n    modin_df, pandas_df = create_test_dfs(data)\n    eval_general(modin_df, pandas_df, lambda df: df.groupby(by=by_func(df)).mean())\n\n\ndef test_groupby_ohlc():\n    pandas_df = pandas.DataFrame(\n        np.random.randint(0, 100, (50, 2)), columns=[\"stock A\", \"stock B\"]\n    )\n    pandas_df[\"Date\"] = pandas.concat(\n        [pandas.date_range(\"1/1/2000\", periods=10, freq=\"min\").to_series()] * 5\n    ).reset_index(drop=True)\n    modin_df = pd.DataFrame(pandas_df)\n    eval_general(modin_df, pandas_df, lambda df: df.groupby(\"Date\")[\"stock A\"].ohlc())\n    pandas_multiindex_result = pandas_df.groupby(\"Date\")[[\"stock A\"]].ohlc()\n\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df)\n    ):\n        modin_multiindex_result = modin_df.groupby(\"Date\")[[\"stock A\"]].ohlc()\n    df_equals(modin_multiindex_result, pandas_multiindex_result)\n\n    pandas_multiindex_result = pandas_df.groupby(\"Date\")[[\"stock A\", \"stock B\"]].ohlc()\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_df)\n    ):\n        modin_multiindex_result = modin_df.groupby(\"Date\")[\n            [\"stock A\", \"stock B\"]\n        ].ohlc()\n    df_equals(modin_multiindex_result, pandas_multiindex_result)\n\n\n@pytest.mark.parametrize(\n    \"modin_df_recipe\",\n    [\"non_lazy_frame\", \"frame_with_deferred_index\", \"lazy_frame\"],\n)\ndef test_groupby_on_empty_data(modin_df_recipe):\n    class ModinDfConstructor:\n        def __init__(self, recipe, df_kwargs):\n            self._recipe = recipe\n            self._mock_obj = None\n            self._df_kwargs = df_kwargs\n\n        def non_lazy_frame(self):\n            return pd.DataFrame(**self._df_kwargs)\n\n        def frame_with_deferred_index(self):\n            df = pd.DataFrame(**self._df_kwargs)\n            try:\n                # The frame would stop being lazy once index computation is triggered\n                df._query_compiler.set_frame_index_cache(None)\n            except AttributeError:\n                pytest.skip(\n                    reason=\"Selected execution doesn't support deferred indices.\"\n                )\n\n            return df\n\n        def lazy_frame(self):\n            donor_obj = pd.DataFrame()._query_compiler\n\n            self._mock_obj = mock.patch(\n                f\"{donor_obj.__module__}.{donor_obj.__class__.__name__}.lazy_shape\",\n                new_callable=mock.PropertyMock,\n            )\n            patch_obj = self._mock_obj.__enter__()\n            patch_obj.return_value = True\n\n            df = pd.DataFrame(**self._df_kwargs)\n            # The frame is lazy until `self.__exit__()` is called\n            assert df._query_compiler.lazy_shape\n            return df\n\n        def __enter__(self):\n            return getattr(self, self._recipe)()\n\n        def __exit__(self, *args, **kwargs):\n            if self._mock_obj is not None:\n                self._mock_obj.__exit__(*args, **kwargs)\n\n    def run_test(eval_function, *args, **kwargs):\n        df_kwargs = {\"columns\": [\"a\", \"b\", \"c\"]}\n        with ModinDfConstructor(modin_df_recipe, df_kwargs) as modin_df:\n            pandas_df = pandas.DataFrame(**df_kwargs)\n\n            modin_grp = modin_df.groupby(modin_df.columns[0])\n            pandas_grp = pandas_df.groupby(pandas_df.columns[0])\n\n            eval_function(modin_grp, pandas_grp, *args, **kwargs)\n\n    run_test(eval___getattr__, item=\"b\")\n    run_test(eval___getitem__, item=\"b\")\n    run_test(eval_agg, func=lambda df: df.mean())\n    run_test(eval_any)\n    run_test(eval_apply, func=lambda df: df.mean())\n    run_test(eval_count)\n    run_test(eval_cummax, numeric_only=True)\n    run_test(eval_cummin, numeric_only=True)\n    run_test(eval_cumprod, numeric_only=True)\n    run_test(eval_cumsum, numeric_only=True)\n    run_test(eval_dtypes)\n    run_test(eval_fillna)\n    run_test(eval_groups)\n    run_test(eval_len)\n    run_test(eval_max)\n    run_test(eval_mean)\n    run_test(eval_median)\n    run_test(eval_min)\n    run_test(eval_ndim)\n    run_test(eval_ngroup)\n    run_test(eval_ngroups)\n    run_test(eval_nunique)\n    run_test(eval_prod)\n    run_test(eval_quantile)\n    run_test(eval_rank)\n    run_test(eval_size)\n    run_test(eval_skew)\n    run_test(eval_sum)\n    run_test(eval_var)\n\n    if modin_df_recipe != \"lazy_frame\":\n        # TODO: these functions have their specific implementations in the\n        # front-end that are unable to operate on empty frames and thus\n        # fail on an empty lazy frame.\n        # https://github.com/modin-project/modin/issues/5505\n        # https://github.com/modin-project/modin/issues/5506\n        run_test(eval_pipe, func=lambda df: df.mean())\n        run_test(eval_shift)\n\n    # TODO: these functions fail in case of empty data in the pandas itself,\n    # we have to modify the `eval_*` functions to be able to check for\n    # exceptions equality:\n    # https://github.com/modin-project/modin/issues/5441\n    # run_test(eval_transform, func=lambda df: df.mean())\n    # run_test(eval_std)\n\n\ndef test_skew_corner_cases():\n    \"\"\"\n    This test was inspired by https://github.com/modin-project/modin/issues/5545.\n\n    The test verifies that modin acts exactly as pandas when the input data is\n    bad for the 'skew' and so some components of the 'skew' formula appears to be invalid:\n        ``(count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)``\n    \"\"\"\n    # When 'm2 == m3 == 0' thus causing 0 / 0 division in the second multiplier.\n    # Note: mX = 'sum((col - mean(col)) ^ x)'\n    modin_df, pandas_df = create_test_dfs({\"col0\": [1, 1, 1], \"col1\": [10, 10, 10]})\n    eval_general(modin_df, pandas_df, lambda df: df.groupby(\"col0\").skew())\n\n    # When 'count < 3' thus causing dividing by zero in the first multiplier\n    # Note: count = group_size\n    modin_df, pandas_df = create_test_dfs({\"col0\": [1, 1], \"col1\": [1, 2]})\n    eval_general(modin_df, pandas_df, lambda df: df.groupby(\"col0\").skew())\n\n    # When 'count < 3' and 'm3 / m2 != 0'. The case comes from:\n    # https://github.com/modin-project/modin/issues/5545\n    modin_df, pandas_df = create_test_dfs({\"col0\": [1, 1], \"col1\": [171, 137]})\n    eval_general(modin_df, pandas_df, lambda df: df.groupby(\"col0\").skew())\n\n\n@pytest.mark.parametrize(\n    \"by\",\n    [\n        pandas.Grouper(key=\"time_stamp\", freq=\"3D\"),\n        [pandas.Grouper(key=\"time_stamp\", freq=\"1ME\"), \"count\"],\n    ],\n)\ndef test_groupby_with_grouper(by):\n    # See https://github.com/modin-project/modin/issues/5091 for more details\n    # Generate larger data so that it can handle partitioning cases\n    data = {\n        \"id\": [i for i in range(200)],\n        \"time_stamp\": [\n            pd.Timestamp(\"2000-01-02\") + datetime.timedelta(days=x) for x in range(200)\n        ],\n    }\n    for i in range(200):\n        data[f\"count_{i}\"] = [i, i + 1] * 100\n\n    modin_df, pandas_df = create_test_dfs(data)\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.groupby(by).mean(),\n        # FIXME: https://github.com/modin-project/modin/issues/7033\n        expected_exception=False,\n    )\n\n\ndef test_groupby_preserves_by_order():\n    modin_df, pandas_df = create_test_dfs({\"col0\": [1, 1, 1], \"col1\": [10, 10, 10]})\n\n    modin_res = modin_df.groupby([pd.Series([100, 100, 100]), \"col0\"]).mean()\n    pandas_res = pandas_df.groupby([pandas.Series([100, 100, 100]), \"col0\"]).mean()\n\n    df_equals(modin_res, pandas_res)\n\n\n@pytest.mark.parametrize(\n    \"method\",\n    # test all aggregations from pandas.core.groupby.base.reduction_kernels except\n    # nth and corrwith, both of which require extra arguments.\n    [\n        \"all\",\n        \"any\",\n        \"count\",\n        \"first\",\n        \"idxmax\",\n        \"idxmin\",\n        \"last\",\n        \"max\",\n        \"mean\",\n        \"median\",\n        \"min\",\n        \"nunique\",\n        \"prod\",\n        \"quantile\",\n        \"sem\",\n        \"size\",\n        \"skew\",\n        \"std\",\n        \"sum\",\n        \"var\",\n    ],\n)\n@pytest.mark.skipif(\n    StorageFormat.get() != \"Pandas\",\n    reason=\"only relevant to pandas execution\",\n)\ndef test_groupby_agg_with_empty_column_partition_6175(method):\n    df = pd.concat(\n        [\n            pd.DataFrame({\"col33\": [0, 1], \"index\": [2, 3]}),\n            pd.DataFrame({\"col34\": [4, 5]}),\n        ],\n        axis=1,\n    )\n    assert df._query_compiler._modin_frame._partitions.shape == (1, 2)\n    eval_general(\n        df,\n        df._to_pandas(),\n        lambda df: getattr(df.groupby([\"col33\", \"index\"]), method)(),\n    )\n\n\ndef test_groupby_pct_change_diff_6194():\n    df = pd.DataFrame(\n        {\n            \"by\": [\"a\", \"b\", \"c\", \"a\", \"c\"],\n            \"value\": [1, 2, 4, 5, 1],\n        }\n    )\n    # These methods should not crash\n    eval_general(\n        df,\n        df._to_pandas(),\n        lambda df: df.groupby(by=\"by\").pct_change(),\n    )\n    eval_general(\n        df,\n        df._to_pandas(),\n        lambda df: df.groupby(by=\"by\").diff(),\n    )\n\n\ndef test_groupby_datetime_diff_6628():\n    dates = pd.date_range(start=\"2023-01-01\", periods=10, freq=\"W\")\n    df = pd.DataFrame(\n        {\n            \"date\": dates,\n            \"group\": \"A\",\n        }\n    )\n    eval_general(\n        df,\n        df._to_pandas(),\n        lambda df: df.groupby(\"group\").diff(),\n    )\n\n\ndef eval_rolling(md_window, pd_window):\n    eval_general(md_window, pd_window, lambda window: window.count())\n    eval_general(md_window, pd_window, lambda window: window.sum())\n    eval_general(md_window, pd_window, lambda window: window.mean())\n    eval_general(md_window, pd_window, lambda window: window.median())\n    eval_general(md_window, pd_window, lambda window: window.var())\n    eval_general(md_window, pd_window, lambda window: window.std())\n    eval_general(md_window, pd_window, lambda window: window.min())\n    eval_general(md_window, pd_window, lambda window: window.max())\n    expected_exception = None\n    if pd_window.on == \"col4\":\n        expected_exception = ValueError(\n            \"Length mismatch: Expected axis has 450 elements, new values have 600 elements\"\n        )\n    eval_general(\n        md_window,\n        pd_window,\n        lambda window: window.corr(),\n        expected_exception=expected_exception,\n    )\n    eval_general(\n        md_window,\n        pd_window,\n        lambda window: window.cov(),\n        expected_exception=expected_exception,\n    )\n    eval_general(md_window, pd_window, lambda window: window.skew())\n    eval_general(md_window, pd_window, lambda window: window.kurt())\n    eval_general(\n        md_window, pd_window, lambda window: window.apply(lambda df: (df + 10).sum())\n    )\n    eval_general(md_window, pd_window, lambda window: window.agg(\"sum\"))\n    eval_general(md_window, pd_window, lambda window: window.quantile(0.2))\n    eval_general(md_window, pd_window, lambda window: window.rank())\n\n    expected_exception = None\n    if pd_window.on == \"col4\":\n        expected_exception = TypeError(\n            \"Addition/subtraction of integers and integer-arrays with DatetimeArray is no longer supported.\"\n            + \"  Instead of adding/subtracting `n`, use `n * obj.freq`\"\n        )\n\n    if not md_window._as_index:\n        # There's a mismatch in group columns when 'as_index=False'\n        # see: https://github.com/modin-project/modin/issues/6291\n        by_cols = list(md_window._groupby_obj._internal_by)\n        eval_general(\n            md_window,\n            pd_window,\n            lambda window: window.sem().drop(columns=by_cols, errors=\"ignore\"),\n            expected_exception=expected_exception,\n        )\n    else:\n        eval_general(\n            md_window,\n            pd_window,\n            lambda window: window.sem(),\n            expected_exception=expected_exception,\n        )\n\n\n@pytest.mark.parametrize(\"center\", [True, False])\n@pytest.mark.parametrize(\"closed\", [\"right\", \"left\", \"both\", \"neither\"])\n@pytest.mark.parametrize(\"as_index\", [True, False])\ndef test_rolling_int_window(center, closed, as_index):\n    col_part1 = pd.DataFrame(\n        {\n            \"by\": np.tile(np.arange(15), 10),\n            \"col1\": np.arange(150),\n            \"col2\": np.arange(10, 160),\n        }\n    )\n    col_part2 = pd.DataFrame({\"col3\": np.arange(20, 170)})\n\n    md_df = pd.concat([col_part1, col_part2], axis=1)\n    pd_df = md_df._to_pandas()\n\n    if StorageFormat.get() == \"Pandas\":\n        assert md_df._query_compiler._modin_frame._partitions.shape[1] == 2\n\n    md_window = md_df.groupby(\"by\", as_index=as_index).rolling(\n        3, center=center, closed=closed\n    )\n    pd_window = pd_df.groupby(\"by\", as_index=as_index).rolling(\n        3, center=center, closed=closed\n    )\n    eval_rolling(md_window, pd_window)\n\n\n@pytest.mark.parametrize(\"center\", [True, False])\n@pytest.mark.parametrize(\"closed\", [\"right\", \"left\", \"both\", \"neither\"])\n@pytest.mark.parametrize(\"as_index\", [True, False])\n@pytest.mark.parametrize(\"on\", [None, \"col4\"])\ndef test_rolling_timedelta_window(center, closed, as_index, on):\n    col_part1 = pd.DataFrame(\n        {\n            \"by\": np.tile(np.arange(15), 10),\n            \"col1\": np.arange(150),\n            \"col2\": np.arange(10, 160),\n        }\n    )\n    col_part2 = pd.DataFrame({\"col3\": np.arange(20, 170)})\n\n    if on is not None:\n        col_part2[on] = pandas.DatetimeIndex(\n            [\n                datetime.date(2020, 1, 1) + datetime.timedelta(hours=12) * i\n                for i in range(150)\n            ]\n        )\n\n    md_df = pd.concat([col_part1, col_part2], axis=1)\n    md_df.index = pandas.DatetimeIndex(\n        [datetime.date(2020, 1, 1) + datetime.timedelta(days=1) * i for i in range(150)]\n    )\n\n    pd_df = md_df._to_pandas()\n\n    if StorageFormat.get() == \"Pandas\":\n        assert (\n            md_df._query_compiler._modin_frame._partitions.shape[1] == 2\n            if on is None\n            else 3\n        )\n\n    md_window = md_df.groupby(\"by\", as_index=as_index).rolling(\n        datetime.timedelta(days=3), center=center, closed=closed, on=on\n    )\n    pd_window = pd_df.groupby(\"by\", as_index=as_index).rolling(\n        datetime.timedelta(days=3), center=center, closed=closed, on=on\n    )\n    eval_rolling(md_window, pd_window)\n\n\n@pytest.mark.parametrize(\n    \"func\",\n    [\n        pytest.param(\"sum\", id=\"map_reduce_func\"),\n        pytest.param(\"median\", id=\"full_axis_func\"),\n    ],\n)\ndef test_groupby_deferred_index(func):\n    # the test is copied from the issue:\n    # https://github.com/modin-project/modin/issues/6368\n\n    def perform(lib):\n        df1 = lib.DataFrame({\"a\": [1, 1, 2, 2]})\n        df2 = lib.DataFrame({\"b\": [3, 4, 5, 6], \"c\": [7, 5, 4, 3]})\n\n        df = lib.concat([df1, df2], axis=1)\n        df.index = [10, 11, 12, 13]\n\n        grp = df.groupby(\"a\")\n        grp.indices\n\n        return getattr(grp, func)()\n\n    eval_general(pd, pandas, perform)\n\n\n# there are two different implementations of partitions aligning for cluster and non-cluster mode,\n# here we want to test both of them, so simply modifying the config for this test\n@pytest.mark.parametrize(\n    \"modify_config\",\n    [\n        {RangePartitioning: True, IsRayCluster: True},\n        {RangePartitioning: True, IsRayCluster: False},\n    ],\n    indirect=True,\n)\ndef test_shape_changing_udf(modify_config):\n    modin_df, pandas_df = create_test_dfs(\n        {\n            \"by_col1\": ([1] * 50) + ([10] * 50),\n            \"col2\": np.arange(100),\n            \"col3\": np.arange(100),\n        }\n    )\n\n    def func1(group):\n        # changes the original shape and indexing of the 'group'\n        return pandas.Series(\n            [1, 2, 3, 4], index=[\"new_col1\", \"new_col2\", \"new_col4\", \"new_col3\"]\n        )\n\n    eval_general(\n        modin_df.groupby(\"by_col1\"),\n        pandas_df.groupby(\"by_col1\"),\n        lambda df: df.apply(func1),\n    )\n\n    def func2(group):\n        # each group have different shape at the end\n        # (we do .to_frame().T as otherwise this scenario doesn't work in pandas)\n        if group.iloc[0, 0] == 1:\n            return (\n                pandas.Series(\n                    [1, 2, 3, 4], index=[\"new_col1\", \"new_col2\", \"new_col4\", \"new_col3\"]\n                )\n                .to_frame()\n                .T\n            )\n        return (\n            pandas.Series([20, 33, 44], index=[\"new_col2\", \"new_col3\", \"new_col4\"])\n            .to_frame()\n            .T\n        )\n\n    eval_general(\n        modin_df.groupby(\"by_col1\"),\n        pandas_df.groupby(\"by_col1\"),\n        lambda df: df.apply(func2),\n    )\n\n    def func3(group):\n        # one of the groups produce an empty dataframe, in the result we should\n        # have joined columns of both of these dataframes\n        if group.iloc[0, 0] == 1:\n            return pandas.DataFrame([[1, 2, 3]], index=[\"col1\", \"col2\", \"col3\"])\n        return pandas.DataFrame(columns=[\"col2\", \"col3\", \"col4\", \"col5\"])\n\n    eval_general(\n        modin_df.groupby(\"by_col1\"),\n        pandas_df.groupby(\"by_col1\"),\n        lambda df: df.apply(func3),\n    )\n\n\n@pytest.mark.parametrize(\"modify_config\", [{RangePartitioning: True}], indirect=True)\ndef test_reshuffling_groupby_on_strings(modify_config):\n    # reproducer from https://github.com/modin-project/modin/issues/6509\n    modin_df, pandas_df = create_test_dfs(\n        {\"col1\": [\"a\"] * 50 + [\"b\"] * 50, \"col2\": range(100)}\n    )\n\n    modin_df = modin_df.astype({\"col1\": \"string\"})\n    pandas_df = pandas_df.astype({\"col1\": \"string\"})\n\n    md_grp = modin_df.groupby(\"col1\")\n    pd_grp = pandas_df.groupby(\"col1\")\n\n    eval_general(md_grp, pd_grp, lambda grp: grp.mean())\n    eval_general(md_grp, pd_grp, lambda grp: grp.nth(2))\n    eval_general(md_grp, pd_grp, lambda grp: grp.head(10))\n    eval_general(md_grp, pd_grp, lambda grp: grp.tail(10))\n\n\n@pytest.mark.parametrize(\"modify_config\", [{RangePartitioning: True}], indirect=True)\ndef test_groupby_apply_series_result(modify_config):\n    # reproducer from the issue:\n    # https://github.com/modin-project/modin/issues/6632\n    df = pd.DataFrame(\n        np.random.randint(5, 10, size=5), index=[f\"s{i+1}\" for i in range(5)]\n    )\n    df[\"group\"] = [1, 1, 2, 2, 3]\n\n    eval_general(\n        df,\n        df._to_pandas(),\n        lambda df: df.groupby(\"group\").apply(\n            lambda x: x.name + 2, include_groups=False\n        ),\n    )\n\n\ndef test_groupby_named_aggregation():\n    modin_ser, pandas_ser = create_test_series([10, 10, 10, 1, 1, 1, 2, 3], name=\"data\")\n    eval_general(\n        modin_ser, pandas_ser, lambda ser: ser.groupby(level=0).agg(result=(\"max\"))\n    )\n\n\ndef test_groupby_several_column_partitions():\n    # see details in #6948\n    columns = [\n        \"l_returnflag\",\n        \"l_linestatus\",\n        \"l_discount\",\n        \"l_extendedprice\",\n        \"l_quantity\",\n    ]\n    modin_df, pandas_df = create_test_dfs(\n        np.random.randint(0, 100, size=(1000, len(columns))), columns=columns\n    )\n\n    pandas_df[\"a\"] = (pandas_df.l_extendedprice) * (1 - (pandas_df.l_discount))\n    # to create another column partition\n    modin_df[\"a\"] = (modin_df.l_extendedprice) * (1 - (modin_df.l_discount))\n\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.groupby([\"l_returnflag\", \"l_linestatus\"])\n        .agg(\n            sum_qty=(\"l_quantity\", \"sum\"),\n            sum_base_price=(\"l_extendedprice\", \"sum\"),\n            sum_disc_price=(\"a\", \"sum\"),\n            # sum_charge=(\"b\", \"sum\"),\n            avg_qty=(\"l_quantity\", \"mean\"),\n            avg_price=(\"l_extendedprice\", \"mean\"),\n            avg_disc=(\"l_discount\", \"mean\"),\n            count_order=(\"l_returnflag\", \"count\"),\n        )\n        .reset_index(),\n    )\n\n\ndef test_groupby_named_agg():\n    # from pandas docs\n\n    data = {\n        \"A\": [1, 1, 2, 2],\n        \"B\": [1, 2, 3, 4],\n        \"C\": [0.362838, 0.227877, 1.267767, -0.562860],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df.groupby(\"A\").agg(\n            b_min=pd.NamedAgg(column=\"B\", aggfunc=\"min\"),\n            c_sum=pd.NamedAgg(column=\"C\", aggfunc=\"sum\"),\n        ),\n    )\n\n\n### TEST GROUPBY WARNINGS ###\n\n\ndef test_groupby_axis_1_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, 1, 6, 7],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n\n    with pytest.warns(\n        FutureWarning, match=\"DataFrame.groupby with axis=1 is deprecated\"\n    ):\n        modin_df.groupby(by=\"col1\", axis=1)\n    with pytest.warns(\n        FutureWarning, match=\"DataFrame.groupby with axis=1 is deprecated\"\n    ):\n        pandas_df.groupby(by=\"col1\", axis=1)\n\n\ndef test_groupby_dtypes_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, 1, 6, 7],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_groupby = modin_df.groupby(by=\"col1\")\n    pandas_groupby = pandas_df.groupby(by=\"col1\")\n\n    with pytest.warns(FutureWarning, match=\"DataFrameGroupBy.dtypes is deprecated\"):\n        modin_groupby.dtypes\n    with pytest.warns(FutureWarning, match=\"DataFrameGroupBy.dtypes is deprecated\"):\n        pandas_groupby.dtypes\n\n\ndef test_groupby_diff_axis_1_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, 1, 6, 7],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_groupby = modin_df.groupby(by=\"col1\")\n    pandas_groupby = pandas_df.groupby(by=\"col1\")\n\n    with pytest.warns(\n        FutureWarning, match=\"DataFrameGroupBy.diff with axis=1 is deprecated\"\n    ):\n        modin_groupby.diff(axis=1)\n    with pytest.warns(\n        FutureWarning, match=\"DataFrameGroupBy.diff with axis=1 is deprecated\"\n    ):\n        pandas_groupby.diff(axis=1)\n\n\ndef test_groupby_pct_change_axis_1_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, 1, 6, 7],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_groupby = modin_df.groupby(by=\"col1\")\n    pandas_groupby = pandas_df.groupby(by=\"col1\")\n\n    with pytest.warns(\n        FutureWarning, match=\"DataFrameGroupBy.pct_change with axis=1 is deprecated\"\n    ):\n        modin_groupby.pct_change(axis=1)\n    with pytest.warns(\n        FutureWarning, match=\"DataFrameGroupBy.pct_change with axis=1 is deprecated\"\n    ):\n        pandas_groupby.pct_change(axis=1)\n\n\ndef test_groupby_pct_change_parameters_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, 1, 6, 7],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_groupby = modin_df.groupby(by=\"col1\")\n    pandas_groupby = pandas_df.groupby(by=\"col1\")\n\n    match_string = (\n        \"The 'fill_method' keyword being not None and the 'limit' keyword \"\n        + \"in (DataFrame|DataFrameGroupBy).pct_change are deprecated\"\n    )\n\n    with pytest.warns(\n        FutureWarning,\n        match=match_string,\n    ):\n        modin_groupby.pct_change(fill_method=\"bfill\", limit=1)\n    with pytest.warns(\n        FutureWarning,\n        match=match_string,\n    ):\n        pandas_groupby.pct_change(fill_method=\"bfill\", limit=1)\n\n\ndef test_groupby_shift_axis_1_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, 1, 6, 7],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_groupby = modin_df.groupby(by=\"col1\")\n    pandas_groupby = pandas_df.groupby(by=\"col1\")\n\n    with pytest.warns(\n        FutureWarning,\n        match=\"DataFrameGroupBy.shift with axis=1 is deprecated\",\n    ):\n        pandas_groupby.shift(axis=1, fill_value=777)\n    with pytest.warns(\n        FutureWarning,\n        match=\"DataFrameGroupBy.shift with axis=1 is deprecated\",\n    ):\n        modin_groupby.shift(axis=1, fill_value=777)\n\n\ndef test_groupby_fillna_axis_1_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, None, 6, None],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_groupby = modin_df.groupby(by=\"col1\")\n    pandas_groupby = pandas_df.groupby(by=\"col1\")\n\n    with pytest.warns(\n        FutureWarning,\n        match=\"DataFrameGroupBy.fillna is deprecated\",\n    ):\n        modin_groupby.fillna(method=\"ffill\")\n    with pytest.warns(\n        FutureWarning,\n        match=\"DataFrameGroupBy.fillna is deprecated\",\n    ):\n        pandas_groupby.fillna(method=\"ffill\")\n\n\ndef test_groupby_agg_provided_callable_warning():\n    data = {\n        \"col1\": [0, 3, 2, 3],\n        \"col2\": [4, 1, 6, 7],\n    }\n    modin_df, pandas_df = create_test_dfs(data)\n    modin_groupby = modin_df.groupby(by=\"col1\")\n    pandas_groupby = pandas_df.groupby(by=\"col1\")\n\n    for func in (sum, max):\n        with pytest.warns(\n            FutureWarning,\n            match=\"In a future version of pandas, the provided callable will be used directly\",\n        ):\n            modin_groupby.agg(func)\n        with pytest.warns(\n            FutureWarning,\n            match=\"In a future version of pandas, the provided callable will be used directly\",\n        ):\n            pandas_groupby.agg(func)\n\n\n@pytest.mark.parametrize(\"modify_config\", [{RangePartitioning: True}], indirect=True)\n@pytest.mark.parametrize(\"observed\", [False])\n@pytest.mark.parametrize(\"as_index\", [True])\n@pytest.mark.parametrize(\n    \"func\",\n    [\n        pytest.param(lambda grp: grp.sum(), id=\"sum\"),\n        pytest.param(lambda grp: grp.size(), id=\"size\"),\n        pytest.param(lambda grp: grp.apply(lambda df: df.sum()), id=\"apply_sum\"),\n        pytest.param(\n            lambda grp: grp.apply(\n                lambda df: (\n                    df.sum()\n                    if len(df) > 0\n                    else pandas.Series([10] * len(df.columns), index=df.columns)\n                )\n            ),\n            id=\"apply_transform\",\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"by_cols, cat_cols\",\n    [\n        (\"a\", [\"a\"]),\n        (\"b\", [\"b\"]),\n        (\"e\", [\"e\"]),\n        ([\"a\", \"e\"], [\"a\"]),\n        ([\"a\", \"e\"], [\"e\"]),\n        ([\"a\", \"e\"], [\"a\", \"e\"]),\n        ([\"b\", \"e\"], [\"b\"]),\n        ([\"b\", \"e\"], [\"e\"]),\n        ([\"b\", \"e\"], [\"b\", \"e\"]),\n        ([\"a\", \"b\", \"e\"], [\"a\"]),\n        ([\"a\", \"b\", \"e\"], [\"b\"]),\n        ([\"a\", \"b\", \"e\"], [\"e\"]),\n        ([\"a\", \"b\", \"e\"], [\"a\", \"e\"]),\n        ([\"a\", \"b\", \"e\"], [\"a\", \"b\", \"e\"]),\n    ],\n)\n@pytest.mark.parametrize(\n    \"exclude_values\",\n    [\n        pytest.param(lambda row: ~row[\"a\"].isin([\"a\", \"e\"]), id=\"exclude_from_a\"),\n        pytest.param(lambda row: ~row[\"b\"].isin([4]), id=\"exclude_from_b\"),\n        pytest.param(lambda row: ~row[\"e\"].isin([\"x\"]), id=\"exclude_from_e\"),\n        pytest.param(\n            lambda row: ~row[\"a\"].isin([\"a\", \"e\"]) & ~row[\"b\"].isin([4]),\n            id=\"exclude_from_a_b\",\n        ),\n        pytest.param(\n            lambda row: ~row[\"b\"].isin([4]) & ~row[\"e\"].isin([\"x\"]),\n            id=\"exclude_from_b_e\",\n        ),\n        pytest.param(\n            lambda row: ~row[\"a\"].isin([\"a\", \"e\"])\n            & ~row[\"b\"].isin([4])\n            & ~row[\"e\"].isin([\"x\"]),\n            id=\"exclude_from_a_b_e\",\n        ),\n    ],\n)\ndef test_range_groupby_categories(\n    observed, func, by_cols, cat_cols, exclude_values, as_index, modify_config\n):\n    data = {\n        \"a\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"b\", \"g\", \"a\"] * 32,\n        \"b\": [1, 2, 3, 4] * 64,\n        \"c\": range(256),\n        \"d\": range(256),\n        \"e\": [\"x\", \"y\"] * 128,\n    }\n\n    md_df, pd_df = create_test_dfs(data)\n    md_df = md_df.astype({col: \"category\" for col in cat_cols})[exclude_values]\n    pd_df = pd_df.astype({col: \"category\" for col in cat_cols})[exclude_values]\n\n    md_res = func(md_df.groupby(by_cols, observed=observed, as_index=as_index))\n    pd_res = func(pd_df.groupby(by_cols, observed=observed, as_index=as_index))\n\n    # HACK, FIXME: there's a bug in range-partitioning impl that apparently can\n    # break the order of rows in the result for multi-column groupbys. Placing the sorting-hack for now\n    # https://github.com/modin-project/modin/issues/6875\n    df_equals(md_res.sort_index(axis=0), pd_res.sort_index(axis=0))\n\n\n@pytest.mark.parametrize(\"cat_cols\", [[\"a\"], [\"b\"], [\"a\", \"b\"]])\n@pytest.mark.parametrize(\n    \"columns\", [[(False, \"a\"), (True, \"b\")], [(True, \"a\")], [(True, \"a\"), (True, \"b\")]]\n)\ndef test_range_groupby_categories_external_grouper(columns, cat_cols):\n    data = {\n        \"a\": [1, 1, 2, 2] * 64,\n        \"b\": [11, 11, 22, 22] * 64,\n        \"c\": [111, 111, 222, 222] * 64,\n        \"data\": [1, 2, 3, 4] * 64,\n    }\n\n    md_df, pd_df = create_test_dfs(data)\n    md_df = md_df.astype({col: \"category\" for col in cat_cols})\n    pd_df = pd_df.astype({col: \"category\" for col in cat_cols})\n\n    md_df, md_by = get_external_groupers(md_df, columns, drop_from_original_df=True)\n    pd_df, pd_by = get_external_groupers(pd_df, columns, drop_from_original_df=True)\n\n    eval_general(md_df.groupby(md_by), pd_df.groupby(pd_by), lambda grp: grp.count())\n\n\n@pytest.mark.parametrize(\"by\", [[\"a\"], [\"a\", \"b\"]])\n@pytest.mark.parametrize(\"as_index\", [True, False])\n@pytest.mark.parametrize(\"include_groups\", [True, False])\ndef test_include_groups(by, as_index, include_groups):\n    data = {\n        \"a\": [1, 1, 2, 2] * 64,\n        \"b\": [11, 11, 22, 22] * 64,\n        \"c\": [111, 111, 222, 222] * 64,\n        \"data\": [1, 2, 3, 4] * 64,\n    }\n\n    def func(df):\n        if include_groups:\n            assert len(df.columns.intersection(by)) == len(by)\n        else:\n            assert len(df.columns.intersection(by)) == 0\n        return df.sum()\n\n    md_df, pd_df = create_test_dfs(data)\n    eval_general(\n        md_df,\n        pd_df,\n        lambda df: df.groupby(by, as_index=as_index).apply(\n            func, include_groups=include_groups\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"skipna\", [True, False])\n@pytest.mark.parametrize(\"how\", [\"first\", \"last\"])\ndef test_first_last_skipna(how, skipna):\n    md_df, pd_df = create_test_dfs(\n        {\n            \"a\": [2, 1, 1, 2, 3, 3] * 20,\n            \"b\": [np.nan, 3.0, np.nan, 4.0, np.nan, np.nan] * 20,\n            \"c\": [np.nan, 3.0, np.nan, 4.0, np.nan, np.nan] * 20,\n        }\n    )\n\n    pd_res = getattr(pd_df.groupby(\"a\"), how)(skipna=skipna)\n    md_res = getattr(md_df.groupby(\"a\"), how)(skipna=skipna)\n    df_equals(md_res, pd_res)\n"
  },
  {
    "path": "modin/tests/pandas/test_io.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport csv\nimport inspect\nimport os\nimport platform\nimport sys\nimport unittest.mock as mock\nfrom collections import defaultdict\nfrom io import BytesIO, StringIO\nfrom pathlib import Path\nfrom typing import Dict\n\nimport fastparquet\nimport numpy as np\nimport pandas\nimport pandas._libs.lib as lib\nimport pyarrow as pa\nimport pyarrow.dataset\nimport pytest\nimport sqlalchemy as sa\nfrom packaging import version\nfrom pandas._testing import ensure_clean\nfrom pandas.errors import ParserWarning\nfrom scipy import sparse\n\nfrom modin.config import (\n    AsyncReadMode,\n    Engine,\n    IsExperimental,\n    MinRowPartitionSize,\n    ReadSqlEngine,\n    StorageFormat,\n    TestDatasetSize,\n    TestReadFromPostgres,\n    TestReadFromSqlServer,\n)\nfrom modin.db_conn import ModinDatabaseConnection, UnsupportedDatabaseException\nfrom modin.pandas.io import from_arrow, from_dask, from_map, from_ray, to_pandas\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    warns_that_defaulting_to_pandas_if,\n)\n\nfrom .utils import (\n    check_file_leaks,\n    create_test_dfs,\n    create_test_series,\n    default_to_pandas_ignore_string,\n    df_equals,\n    dummy_decorator,\n    eval_general,\n    eval_io,\n    eval_io_from_str,\n    generate_dataframe,\n    get_unique_filename,\n    json_long_bytes,\n    json_long_string,\n    json_short_bytes,\n    json_short_string,\n    parse_dates_values_by_id,\n)\nfrom .utils import test_data as utils_test_data\nfrom .utils import (\n    time_parsing_csv_path,\n)\n\nif StorageFormat.get() == \"Pandas\":\n    import modin.pandas as pd\nelse:\n    import modin.experimental.pandas as pd\n\ntry:\n    import ray\n\n    EXCEPTIONS = (ray.exceptions.WorkerCrashedError,)\nexcept ImportError:\n    EXCEPTIONS = ()\n\n\nfrom modin.config import NPartitions\n\nNPartitions.put(4)\n\nDATASET_SIZE_DICT = {\n    \"Small\": 64,\n    \"Normal\": 2000,\n    \"Big\": 20000,\n}\n\n# Number of rows in the test file\nNROWS = DATASET_SIZE_DICT.get(TestDatasetSize.get(), DATASET_SIZE_DICT[\"Small\"])\n\nTEST_DATA = {\n    \"col1\": [0, 1, 2, 3],\n    \"col2\": [4, 5, 6, 7],\n    \"col3\": [8, 9, 10, 11],\n    \"col4\": [12, 13, 14, 15],\n    \"col5\": [0, 0, 0, 0],\n}\n\n\ndef assert_files_eq(path1, path2):\n    with open(path1, \"rb\") as file1, open(path2, \"rb\") as file2:\n        file1_content = file1.read()\n        file2_content = file2.read()\n\n        if file1_content == file2_content:\n            return True\n        else:\n            return False\n\n\ndef setup_clipboard(row_size=NROWS):\n    df = pandas.DataFrame({\"col1\": np.arange(row_size), \"col2\": np.arange(row_size)})\n    df.to_clipboard()\n\n\ndef parquet_eval_to_file(tmp_dir, modin_obj, pandas_obj, fn, extension, **fn_kwargs):\n    \"\"\"\n    Helper function to test `to_parquet` method.\n\n    Parameters\n    ----------\n    tmp_dir : Union[str, Path]\n        Temporary directory.\n    modin_obj : pd.DataFrame\n        A Modin DataFrame or a Series to test `to_parquet` method.\n    pandas_obj: pandas.DataFrame\n        A pandas DataFrame or a Series to test `to_parquet` method.\n    fn : str\n        Name of the method, that should be tested.\n    extension : str\n        Extension of the test file.\n    \"\"\"\n    unique_filename_modin = get_unique_filename(extension=extension, data_dir=tmp_dir)\n    unique_filename_pandas = get_unique_filename(extension=extension, data_dir=tmp_dir)\n\n    engine = fn_kwargs.get(\"engine\", \"auto\")\n\n    getattr(modin_obj, fn)(unique_filename_modin, **fn_kwargs)\n    getattr(pandas_obj, fn)(unique_filename_pandas, **fn_kwargs)\n\n    pandas_df = pandas.read_parquet(unique_filename_pandas, engine=engine)\n    modin_df = pd.read_parquet(unique_filename_modin, engine=engine)\n    df_equals(pandas_df, modin_df)\n\n\ndef eval_to_file(tmp_dir, modin_obj, pandas_obj, fn, extension, **fn_kwargs):\n    \"\"\"\n    Test `fn` method of `modin_obj` and `pandas_obj`.\n\n    Parameters\n    ----------\n    tmp_dir : Union[str, Path]\n        Temporary directory.\n    modin_obj: Modin DataFrame or Series\n        Object to test.\n    pandas_obj: Pandas DataFrame or Series\n        Object to test.\n    fn: str\n        Name of the method, that should be tested.\n    extension: str\n        Extension of the test file.\n    \"\"\"\n    unique_filename_modin = get_unique_filename(extension=extension, data_dir=tmp_dir)\n    unique_filename_pandas = get_unique_filename(extension=extension, data_dir=tmp_dir)\n\n    # parameter `max_retries=0` is set for `to_csv` function on Ray engine,\n    # in order to increase the stability of tests, we repeat the call of\n    # the entire function manually\n    last_exception = None\n    for _ in range(3):\n        try:\n            getattr(modin_obj, fn)(unique_filename_modin, **fn_kwargs)\n        except EXCEPTIONS as err:\n            last_exception = err\n            continue\n        break\n    # If we do have an exception that's valid let's raise it\n    if last_exception:\n        raise last_exception\n\n    getattr(pandas_obj, fn)(unique_filename_pandas, **fn_kwargs)\n\n    assert assert_files_eq(unique_filename_modin, unique_filename_pandas)\n\n\ndef eval_to_csv_file(tmp_dir, modin_obj, pandas_obj, extension, **kwargs):\n    if extension is None:\n        kwargs[\"mode\"] = \"t\"\n        kwargs[\"compression\"] = \"infer\"\n        modin_csv = modin_obj.to_csv(**kwargs)\n        pandas_csv = pandas_obj.to_csv(**kwargs)\n        if modin_csv == pandas_csv:\n            return\n\n        force_read = True\n        modin_file = get_unique_filename(extension=\"csv\", data_dir=tmp_dir)\n        pandas_file = get_unique_filename(extension=\"csv\", data_dir=tmp_dir)\n        with open(modin_file, \"w\") as file:\n            file.write(modin_csv)\n        with open(pandas_file, \"w\") as file:\n            file.write(pandas_csv)\n    else:\n        force_read = extension != \"csv\" or kwargs.get(\"compression\", None)\n        modin_file = get_unique_filename(extension=extension, data_dir=tmp_dir)\n        pandas_file = get_unique_filename(extension=extension, data_dir=tmp_dir)\n        modin_obj.to_csv(modin_file, **kwargs)\n        pandas_obj.to_csv(pandas_file, **kwargs)\n\n    if force_read or not assert_files_eq(modin_file, pandas_file):\n        # If the files are not identical, make sure they can\n        # be read by pandas and contains identical data.\n        read_kwargs = {}\n        if kwargs.get(\"index\", None) is not False:\n            read_kwargs[\"index_col\"] = 0\n        if (value := kwargs.get(\"sep\", None)) is not None:\n            read_kwargs[\"sep\"] = value\n        if (value := kwargs.get(\"compression\", None)) is not None:\n            read_kwargs[\"compression\"] = value\n        modin_obj = pandas.read_csv(modin_file, **read_kwargs)\n        pandas_obj = pandas.read_csv(pandas_file, **read_kwargs)\n        df_equals(pandas_obj, modin_obj)\n\n\n@pytest.fixture\ndef make_parquet_dir(tmp_path):\n    def _make_parquet_dir(\n        dfs_by_filename: Dict[str, pandas.DataFrame], row_group_size: int\n    ):\n        for filename, df in dfs_by_filename.items():\n            df.to_parquet(\n                os.path.join(tmp_path, filename), row_group_size=row_group_size\n            )\n        return tmp_path\n\n    yield _make_parquet_dir\n\n\n@pytest.mark.usefixtures(\"TestReadCSVFixture\")\n@pytest.mark.skipif(\n    IsExperimental.get() and StorageFormat.get() == \"Pyarrow\",\n    reason=\"Segmentation fault; see PR #2347 ffor details\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestCsv:\n    # delimiter tests\n    @pytest.mark.parametrize(\"sep\", [\"_\", \",\", \".\"])\n    @pytest.mark.parametrize(\"decimal\", [\".\", \"_\"])\n    @pytest.mark.parametrize(\"thousands\", [None, \",\", \"_\", \" \"])\n    def test_read_csv_seps(self, make_csv_file, sep, decimal, thousands):\n        unique_filename = make_csv_file(\n            delimiter=sep,\n            thousands_separator=thousands,\n            decimal_separator=decimal,\n        )\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            sep=sep,\n            decimal=decimal,\n            thousands=thousands,\n        )\n\n    @pytest.mark.parametrize(\"sep\", [None, \"_\"])\n    @pytest.mark.parametrize(\"delimiter\", [\".\", \"_\"])\n    def test_read_csv_seps_except(self, make_csv_file, sep, delimiter):\n        unique_filename = make_csv_file(delimiter=delimiter)\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            delimiter=delimiter,\n            sep=sep,\n            expected_exception=ValueError(\n                \"Specified a sep and a delimiter; you can only specify one.\"\n            ),\n        )\n\n    @pytest.mark.parametrize(\n        \"dtype_backend\", [lib.no_default, \"numpy_nullable\", \"pyarrow\"]\n    )\n    def test_read_csv_dtype_backend(self, make_csv_file, dtype_backend):\n        unique_filename = make_csv_file()\n\n        def comparator(df1, df2):\n            df_equals(df1, df2)\n            df_equals(df1.dtypes, df2.dtypes)\n\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            dtype_backend=dtype_backend,\n            comparator=comparator,\n        )\n\n    # Column and Index Locations and Names tests\n    @pytest.mark.parametrize(\"header\", [\"infer\", None, 0])\n    @pytest.mark.parametrize(\"index_col\", [None, \"col1\"])\n    @pytest.mark.parametrize(\n        \"names\", [lib.no_default, [\"col1\"], [\"c1\", \"c2\", \"c3\", \"c4\", \"c5\", \"c6\"]]\n    )\n    @pytest.mark.parametrize(\n        \"usecols\", [None, [\"col1\"], [\"col1\", \"col2\", \"col6\"], [0, 1, 5]]\n    )\n    @pytest.mark.parametrize(\"skip_blank_lines\", [True, False])\n    def test_read_csv_col_handling(\n        self,\n        header,\n        index_col,\n        names,\n        usecols,\n        skip_blank_lines,\n    ):\n        if names is lib.no_default:\n            pytest.skip(\"some parameters combiantions fails: issue #2312\")\n        if header in [\"infer\", None] and names is not lib.no_default:\n            pytest.skip(\n                \"Heterogeneous data in a column is not cast to a common type: issue #3346\"\n            )\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_blank_lines\"],\n            header=header,\n            index_col=index_col,\n            names=names,\n            usecols=usecols,\n            skip_blank_lines=skip_blank_lines,\n            # FIXME: https://github.com/modin-project/modin/issues/7035\n            expected_exception=False,\n        )\n\n    @pytest.mark.parametrize(\"usecols\", [lambda col_name: col_name in [\"a\", \"b\", \"e\"]])\n    def test_from_csv_with_callable_usecols(self, usecols):\n        fname = \"modin/tests/pandas/data/test_usecols.csv\"\n        pandas_df = pandas.read_csv(fname, usecols=usecols)\n        modin_df = pd.read_csv(fname, usecols=usecols)\n        df_equals(modin_df, pandas_df)\n\n    # General Parsing Configuration\n    @pytest.mark.parametrize(\"dtype\", [None, True])\n    @pytest.mark.parametrize(\"engine\", [None, \"python\", \"c\"])\n    @pytest.mark.parametrize(\n        \"converters\",\n        [\n            None,\n            {\n                \"col1\": lambda x: np.int64(x) * 10,\n                \"col2\": pandas.to_datetime,\n                \"col4\": lambda x: x.replace(\":\", \";\"),\n            },\n        ],\n    )\n    @pytest.mark.parametrize(\"skipfooter\", [0, 10])\n    def test_read_csv_parsing_1(\n        self,\n        dtype,\n        engine,\n        converters,\n        skipfooter,\n    ):\n        if dtype:\n            dtype = {\n                col: \"object\"\n                for col in pandas.read_csv(\n                    pytest.csvs_names[\"test_read_csv_regular\"], nrows=1\n                ).columns\n            }\n\n        expected_exception = None\n        if engine == \"c\" and skipfooter != 0:\n            expected_exception = ValueError(\n                \"the 'c' engine does not support skipfooter\"\n            )\n        eval_io(\n            fn_name=\"read_csv\",\n            expected_exception=expected_exception,\n            check_kwargs_callable=not callable(converters),\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_regular\"],\n            dtype=dtype,\n            engine=engine,\n            converters=converters,\n            skipfooter=skipfooter,\n        )\n\n    @pytest.mark.parametrize(\"header\", [\"infer\", None, 0])\n    @pytest.mark.parametrize(\n        \"skiprows\",\n        [\n            2,\n            lambda x: x % 2,\n            lambda x: x > 25,\n            lambda x: x > 128,\n            np.arange(10, 50),\n            np.arange(10, 50, 2),\n        ],\n    )\n    @pytest.mark.parametrize(\"nrows\", [35, None])\n    @pytest.mark.parametrize(\n        \"names\",\n        [\n            [f\"c{col_number}\" for col_number in range(4)],\n            [f\"c{col_number}\" for col_number in range(6)],\n            None,\n        ],\n    )\n    @pytest.mark.parametrize(\"encoding\", [\"latin1\", \"windows-1251\", None])\n    def test_read_csv_parsing_2(\n        self,\n        make_csv_file,\n        request,\n        header,\n        skiprows,\n        nrows,\n        names,\n        encoding,\n    ):\n        if encoding:\n            unique_filename = make_csv_file(encoding=encoding)\n        else:\n            unique_filename = pytest.csvs_names[\"test_read_csv_regular\"]\n        kwargs = {\n            \"filepath_or_buffer\": unique_filename,\n            \"header\": header,\n            \"skiprows\": skiprows,\n            \"nrows\": nrows,\n            \"names\": names,\n            \"encoding\": encoding,\n        }\n\n        if Engine.get() != \"Python\":\n            df = pandas.read_csv(**dict(kwargs, nrows=1))\n            # in that case first partition will contain str\n            if df[df.columns[0]][df.index[0]] in [\"c1\", \"col1\", \"c3\", \"col3\"]:\n                pytest.xfail(\"read_csv incorrect output with float data - issue #2634\")\n\n        eval_io(\n            fn_name=\"read_csv\",\n            expected_exception=None,\n            check_kwargs_callable=not callable(skiprows),\n            # read_csv kwargs\n            **kwargs,\n        )\n\n    @pytest.mark.parametrize(\"true_values\", [[\"Yes\"], [\"Yes\", \"true\"], None])\n    @pytest.mark.parametrize(\"false_values\", [[\"No\"], [\"No\", \"false\"], None])\n    @pytest.mark.parametrize(\"skipfooter\", [0, 10])\n    @pytest.mark.parametrize(\"nrows\", [35, None])\n    def test_read_csv_parsing_3(\n        self,\n        true_values,\n        false_values,\n        skipfooter,\n        nrows,\n    ):\n        # TODO: Check #2446 as it was closed\n        xfail_case = (false_values or true_values) and Engine.get() != \"Python\"\n        if xfail_case:\n            pytest.xfail(\"modin and pandas dataframes differs - issue #2446\")\n\n        expected_exception = None\n        if skipfooter != 0 and nrows is not None:\n            expected_exception = ValueError(\"'skipfooter' not supported with 'nrows'\")\n        eval_io(\n            fn_name=\"read_csv\",\n            expected_exception=expected_exception,\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_yes_no\"],\n            true_values=true_values,\n            false_values=false_values,\n            skipfooter=skipfooter,\n            nrows=nrows,\n        )\n\n    def test_read_csv_skipinitialspace(self):\n        with ensure_clean(\".csv\") as unique_filename:\n            str_initial_spaces = (\n                \"col1,col2,col3,col4\\n\"\n                + \"five,  six,  seven,  eight\\n\"\n                + \"    five,    six,    seven,    eight\\n\"\n                + \"five, six,  seven,   eight\\n\"\n            )\n\n            eval_io_from_str(str_initial_spaces, unique_filename, skipinitialspace=True)\n\n    # NA and Missing Data Handling tests\n    @pytest.mark.parametrize(\"na_values\", [\"custom_nan\", \"73\"])\n    @pytest.mark.parametrize(\"keep_default_na\", [True, False])\n    @pytest.mark.parametrize(\"na_filter\", [True, False])\n    @pytest.mark.parametrize(\"verbose\", [True, False])\n    @pytest.mark.parametrize(\"skip_blank_lines\", [True, False])\n    def test_read_csv_nans_handling(\n        self,\n        na_values,\n        keep_default_na,\n        na_filter,\n        verbose,\n        skip_blank_lines,\n    ):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_nans\"],\n            na_values=na_values,\n            keep_default_na=keep_default_na,\n            na_filter=na_filter,\n            verbose=verbose,\n            skip_blank_lines=skip_blank_lines,\n        )\n\n    # Datetime Handling tests\n    @pytest.mark.parametrize(\n        \"parse_dates\", [True, False, [\"col2\"], [\"col2\", \"col4\"], [1, 3]]\n    )\n    @pytest.mark.parametrize(\"infer_datetime_format\", [True, False])\n    @pytest.mark.parametrize(\"keep_date_col\", [True, False])\n    @pytest.mark.parametrize(\n        \"date_parser\",\n        [lib.no_default, lambda x: pandas.to_datetime(x, format=\"%Y-%m-%d\")],\n        ids=[\"default\", \"format-Ymd\"],\n    )\n    @pytest.mark.parametrize(\"dayfirst\", [True, False])\n    @pytest.mark.parametrize(\"cache_dates\", [True, False])\n    def test_read_csv_datetime(\n        self,\n        parse_dates,\n        infer_datetime_format,\n        keep_date_col,\n        date_parser,\n        dayfirst,\n        cache_dates,\n        request,\n    ):\n        expected_exception = None\n\n        if \"format-Ymd\" in request.node.callspec.id and (\n            \"parse_dates3\" in request.node.callspec.id\n            or \"parse_dates4\" in request.node.callspec.id\n        ):\n            msg = (\n                'time data \"00:00:00\" doesn\\'t match format \"%Y-%m-%d\", at position 0. You might want to try:\\n'\n                + \"    - passing `format` if your strings have a consistent format;\\n\"\n                + \"    - passing `format='ISO8601'` if your strings are all ISO8601 \"\n                + \"but not necessarily in exactly the same format;\\n\"\n                + \"    - passing `format='mixed'`, and the format will be inferred \"\n                + \"for each element individually. You might want to use `dayfirst` \"\n                + \"alongside this.\"\n            )\n            expected_exception = ValueError(msg)\n\n        eval_io(\n            fn_name=\"read_csv\",\n            check_kwargs_callable=not callable(date_parser),\n            expected_exception=expected_exception,\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_regular\"],\n            parse_dates=parse_dates,\n            infer_datetime_format=infer_datetime_format,\n            keep_date_col=keep_date_col,\n            date_parser=date_parser,\n            dayfirst=dayfirst,\n            cache_dates=cache_dates,\n        )\n\n    @pytest.mark.parametrize(\"date\", [\"2023-01-01 00:00:01.000000000\", \"2023\"])\n    @pytest.mark.parametrize(\"dtype\", [None, \"str\", {\"id\": \"int64\"}])\n    @pytest.mark.parametrize(\"parse_dates\", [None, [], [\"date\"], [1]])\n    def test_read_csv_dtype_parse_dates(self, date, dtype, parse_dates):\n        with ensure_clean(\".csv\") as filename:\n            with open(filename, \"w\") as file:\n                file.write(f\"id,date\\n1,{date}\")\n            eval_io(\n                fn_name=\"read_csv\",\n                # read_csv kwargs\n                filepath_or_buffer=filename,\n                dtype=dtype,\n                parse_dates=parse_dates,\n            )\n\n    # Iteration tests\n    @pytest.mark.parametrize(\"iterator\", [True, False])\n    def test_read_csv_iteration(self, iterator):\n        filename = pytest.csvs_names[\"test_read_csv_regular\"]\n\n        # Tests __next__ and correctness of reader as an iterator\n        # Use larger chunksize to read through file quicker\n        rdf_reader = pd.read_csv(filename, chunksize=500, iterator=iterator)\n        pd_reader = pandas.read_csv(filename, chunksize=500, iterator=iterator)\n\n        for modin_df, pd_df in zip(rdf_reader, pd_reader):\n            df_equals(modin_df, pd_df)\n\n        # Tests that get_chunk works correctly\n        rdf_reader = pd.read_csv(filename, chunksize=1, iterator=iterator)\n        pd_reader = pandas.read_csv(filename, chunksize=1, iterator=iterator)\n\n        modin_df = rdf_reader.get_chunk(1)\n        pd_df = pd_reader.get_chunk(1)\n\n        df_equals(modin_df, pd_df)\n\n        # Tests that read works correctly\n        rdf_reader = pd.read_csv(filename, chunksize=1, iterator=iterator)\n        pd_reader = pandas.read_csv(filename, chunksize=1, iterator=iterator)\n\n        modin_df = rdf_reader.read()\n        pd_df = pd_reader.read()\n\n        df_equals(modin_df, pd_df)\n\n        # Tests #6553\n        if iterator:\n            rdf_reader = pd.read_csv(filename, iterator=iterator)\n            pd_reader = pandas.read_csv(filename, iterator=iterator)\n\n            modin_df = rdf_reader.read()\n            pd_df = pd_reader.read()\n\n            df_equals(modin_df, pd_df)\n\n    @pytest.mark.parametrize(\"pathlike\", [False, True])\n    def test_read_csv_encoding_976(self, pathlike):\n        file_name = \"modin/tests/pandas/data/issue_976.csv\"\n        if pathlike:\n            file_name = Path(file_name)\n        names = [str(i) for i in range(11)]\n\n        kwargs = {\n            \"sep\": \";\",\n            \"names\": names,\n            \"encoding\": \"windows-1251\",\n        }\n        df1 = pd.read_csv(file_name, **kwargs)\n        df2 = pandas.read_csv(file_name, **kwargs)\n        # these columns contain data of various types in partitions\n        # see #1931 for details;\n        df1 = df1.drop([\"4\", \"5\"], axis=1)\n        df2 = df2.drop([\"4\", \"5\"], axis=1)\n\n        df_equals(df1, df2)\n\n    # Quoting, Compression parameters tests\n    @pytest.mark.parametrize(\"compression\", [\"infer\", \"gzip\", \"bz2\", \"xz\", \"zip\"])\n    @pytest.mark.parametrize(\"encoding\", [None, \"latin8\", \"utf16\"])\n    @pytest.mark.parametrize(\"engine\", [None, \"python\", \"c\", \"pyarrow\"])\n    def test_read_csv_compression(self, make_csv_file, compression, encoding, engine):\n        unique_filename = make_csv_file(encoding=encoding, compression=compression)\n        expected_exception = None\n        if encoding == \"utf16\" and compression in (\"bz2\", \"xz\"):\n            expected_exception = UnicodeError(\"UTF-16 stream does not start with BOM\")\n\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            compression=compression,\n            encoding=encoding,\n            engine=engine,\n            expected_exception=expected_exception,\n        )\n\n    @pytest.mark.parametrize(\n        \"encoding\",\n        [\n            None,\n            \"ISO-8859-1\",\n            \"latin1\",\n            \"iso-8859-1\",\n            \"cp1252\",\n            \"utf8\",\n            pytest.param(\n                \"unicode_escape\",\n                marks=pytest.mark.skipif(\n                    condition=sys.version_info < (3, 9),\n                    reason=\"https://bugs.python.org/issue45461\",\n                ),\n            ),\n            \"raw_unicode_escape\",\n            \"utf_16_le\",\n            \"utf_16_be\",\n            \"utf32\",\n            \"utf_32_le\",\n            \"utf_32_be\",\n            \"utf-8-sig\",\n        ],\n    )\n    def test_read_csv_encoding(self, make_csv_file, encoding):\n        unique_filename = make_csv_file(encoding=encoding)\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            encoding=encoding,\n        )\n\n    @pytest.mark.parametrize(\"thousands\", [None, \",\", \"_\", \" \"])\n    @pytest.mark.parametrize(\"decimal\", [\".\", \"_\"])\n    @pytest.mark.parametrize(\"lineterminator\", [None, \"x\", \"\\n\"])\n    @pytest.mark.parametrize(\"escapechar\", [None, \"d\", \"x\"])\n    @pytest.mark.parametrize(\"dialect\", [\"test_csv_dialect\", \"use_dialect_name\", None])\n    def test_read_csv_file_format(\n        self,\n        make_csv_file,\n        thousands,\n        decimal,\n        lineterminator,\n        escapechar,\n        dialect,\n    ):\n        if dialect:\n            test_csv_dialect_params = {\n                \"delimiter\": \"_\",\n                \"doublequote\": False,\n                \"escapechar\": \"\\\\\",\n                \"quotechar\": \"d\",\n                \"quoting\": csv.QUOTE_ALL,\n            }\n            csv.register_dialect(dialect, **test_csv_dialect_params)\n            if dialect != \"use_dialect_name\":\n                # otherwise try with dialect name instead of `_csv.Dialect` object\n                dialect = csv.get_dialect(dialect)\n            unique_filename = make_csv_file(**test_csv_dialect_params)\n        else:\n            unique_filename = make_csv_file(\n                thousands_separator=thousands,\n                decimal_separator=decimal,\n                escapechar=escapechar,\n                lineterminator=lineterminator,\n            )\n\n        expected_exception = None\n        if dialect is None:\n            # FIXME: https://github.com/modin-project/modin/issues/7035\n            expected_exception = False\n\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            thousands=thousands,\n            decimal=decimal,\n            lineterminator=lineterminator,\n            escapechar=escapechar,\n            dialect=dialect,\n            expected_exception=expected_exception,\n        )\n\n    @pytest.mark.parametrize(\n        \"quoting\",\n        [csv.QUOTE_ALL, csv.QUOTE_MINIMAL, csv.QUOTE_NONNUMERIC, csv.QUOTE_NONE],\n    )\n    @pytest.mark.parametrize(\"quotechar\", ['\"', \"_\", \"d\"])\n    @pytest.mark.parametrize(\"doublequote\", [True, False])\n    @pytest.mark.parametrize(\"comment\", [None, \"#\", \"x\"])\n    def test_read_csv_quoting(\n        self,\n        make_csv_file,\n        quoting,\n        quotechar,\n        doublequote,\n        comment,\n    ):\n        # in these cases escapechar should be set, otherwise error occures\n        # _csv.Error: need to escape, but no escapechar set\"\n        use_escapechar = (\n            not doublequote and quotechar != '\"' and quoting != csv.QUOTE_NONE\n        )\n        escapechar = \"\\\\\" if use_escapechar else None\n        unique_filename = make_csv_file(\n            quoting=quoting,\n            quotechar=quotechar,\n            doublequote=doublequote,\n            escapechar=escapechar,\n            comment_col_char=comment,\n        )\n\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            quoting=quoting,\n            quotechar=quotechar,\n            doublequote=doublequote,\n            escapechar=escapechar,\n            comment=comment,\n        )\n\n    # Error Handling parameters tests\n    @pytest.mark.skip(reason=\"https://github.com/modin-project/modin/issues/6239\")\n    @pytest.mark.parametrize(\"on_bad_lines\", [\"error\", \"warn\", \"skip\", None])\n    def test_read_csv_error_handling(self, on_bad_lines):\n        # in that case exceptions are raised both by Modin and pandas\n        # and tests pass\n        raise_exception_case = on_bad_lines is not None\n        # TODO: Check #2500 as it was closed\n        if not raise_exception_case and Engine.get() not in [\"Python\"]:\n            pytest.xfail(\"read_csv doesn't raise `bad lines` exceptions - issue #2500\")\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_bad_lines\"],\n            on_bad_lines=on_bad_lines,\n        )\n\n    @pytest.mark.parametrize(\"float_precision\", [None, \"high\", \"legacy\", \"round_trip\"])\n    def test_python_engine_float_precision_except(self, float_precision):\n        expected_exception = None\n        if float_precision is not None:\n            expected_exception = ValueError(\n                \"The 'float_precision' option is not supported with the 'python' engine\"\n            )\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_regular\"],\n            engine=\"python\",\n            float_precision=float_precision,\n            expected_exception=expected_exception,\n        )\n\n    @pytest.mark.parametrize(\"low_memory\", [False, True])\n    def test_python_engine_low_memory_except(self, low_memory):\n        expected_exception = None\n        if not low_memory:\n            expected_exception = ValueError(\n                \"The 'low_memory' option is not supported with the 'python' engine\"\n            )\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_regular\"],\n            engine=\"python\",\n            low_memory=low_memory,\n            expected_exception=expected_exception,\n        )\n\n    @pytest.mark.parametrize(\"delim_whitespace\", [True, False])\n    def test_delim_whitespace(self, delim_whitespace, tmp_path):\n        str_delim_whitespaces = \"col1 col2  col3   col4\\n5 6   7  8\\n9  10    11 12\\n\"\n        unique_filename = get_unique_filename(data_dir=tmp_path)\n        eval_io_from_str(\n            str_delim_whitespaces,\n            unique_filename,\n            delim_whitespace=delim_whitespace,\n        )\n\n    # Internal parameters tests\n    @pytest.mark.parametrize(\"engine\", [\"c\"])\n    @pytest.mark.parametrize(\"delimiter\", [\",\", \" \"])\n    @pytest.mark.parametrize(\"low_memory\", [True, False])\n    @pytest.mark.parametrize(\"memory_map\", [True, False])\n    @pytest.mark.parametrize(\"float_precision\", [None, \"high\", \"round_trip\"])\n    def test_read_csv_internal(\n        self,\n        make_csv_file,\n        engine,\n        delimiter,\n        low_memory,\n        memory_map,\n        float_precision,\n    ):\n        unique_filename = make_csv_file(delimiter=delimiter)\n        eval_io(\n            filepath_or_buffer=unique_filename,\n            fn_name=\"read_csv\",\n            engine=engine,\n            delimiter=delimiter,\n            low_memory=low_memory,\n            memory_map=memory_map,\n            float_precision=float_precision,\n        )\n\n    # Issue related, specific or corner cases\n    @pytest.mark.parametrize(\"nrows\", [2, None])\n    def test_read_csv_bad_quotes(self, nrows):\n        csv_bad_quotes = (\n            '1, 2, 3, 4\\none, two, three, four\\nfive, \"six\", seven, \"eight\\n'\n        )\n\n        with ensure_clean(\".csv\") as unique_filename:\n            eval_io_from_str(csv_bad_quotes, unique_filename, nrows=nrows)\n\n    def test_read_csv_categories(self):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/test_categories.csv\",\n            names=[\"one\", \"two\"],\n            dtype={\"one\": \"int64\", \"two\": \"category\"},\n        )\n\n    @pytest.mark.parametrize(\"encoding\", [None, \"utf-8\"])\n    @pytest.mark.parametrize(\"encoding_errors\", [\"strict\", \"ignore\"])\n    @pytest.mark.parametrize(\n        \"parse_dates\",\n        [pytest.param(value, id=id) for id, value in parse_dates_values_by_id.items()],\n    )\n    @pytest.mark.parametrize(\"index_col\", [None, 0, 5])\n    @pytest.mark.parametrize(\"header\", [\"infer\", 0])\n    @pytest.mark.parametrize(\n        \"names\",\n        [\n            None,\n            [\n                \"timestamp\",\n                \"year\",\n                \"month\",\n                \"date\",\n                \"symbol\",\n                \"high\",\n                \"low\",\n                \"open\",\n                \"close\",\n                \"spread\",\n                \"volume\",\n            ],\n        ],\n    )\n    @pytest.mark.exclude_in_sanity\n    def test_read_csv_parse_dates(\n        self,\n        names,\n        header,\n        index_col,\n        parse_dates,\n        encoding,\n        encoding_errors,\n        request,\n    ):\n        if names is not None and header == \"infer\":\n            pytest.xfail(\n                \"read_csv with Ray engine works incorrectly with date data and names parameter provided - issue #2509\"\n            )\n\n        expected_exception = None\n        if \"nonexistent_int_column\" in request.node.callspec.id:\n            expected_exception = IndexError(\"list index out of range\")\n        elif \"nonexistent_string_column\" in request.node.callspec.id:\n            expected_exception = ValueError(\n                \"Missing column provided to 'parse_dates': 'z'\"\n            )\n        eval_io(\n            fn_name=\"read_csv\",\n            expected_exception=expected_exception,\n            # read_csv kwargs\n            filepath_or_buffer=time_parsing_csv_path,\n            names=names,\n            header=header,\n            index_col=index_col,\n            parse_dates=parse_dates,\n            encoding=encoding,\n            encoding_errors=encoding_errors,\n        )\n\n    @pytest.mark.parametrize(\n        \"storage_options\",\n        [{\"anon\": False}, {\"anon\": True}, {\"key\": \"123\", \"secret\": \"123\"}, None],\n    )\n    @pytest.mark.xfail(\n        reason=\"S3 file gone missing, see https://github.com/modin-project/modin/issues/4875\"\n    )\n    def test_read_csv_s3(self, storage_options):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"s3://noaa-ghcn-pds/csv/1788.csv\",\n            storage_options=storage_options,\n        )\n\n    @pytest.mark.xfail(\n        reason=\"S3 file gone missing, see https://github.com/modin-project/modin/issues/7571\"\n    )\n    def test_read_csv_s3_issue4658(self):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv\",\n            nrows=10,\n            storage_options={\"anon\": True},\n        )\n\n    @pytest.mark.parametrize(\"names\", [list(\"XYZ\"), None])\n    @pytest.mark.parametrize(\"skiprows\", [1, 2, 3, 4, None])\n    def test_read_csv_skiprows_names(self, names, skiprows):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/issue_2239.csv\",\n            names=names,\n            skiprows=skiprows,\n        )\n\n    def _has_pandas_fallback_reason(self):\n        # The Python engine does not use custom IO dispatchers, so specialized error messages\n        # won't appear\n        return Engine.get() != \"Python\"\n\n    def test_read_csv_default_to_pandas(self):\n        if self._has_pandas_fallback_reason():\n            warning_suffix = \"buffers\"\n        else:\n            warning_suffix = \"\"\n        with warns_that_defaulting_to_pandas_if(\n            not current_execution_is_native(), suffix=warning_suffix\n        ):\n            # This tests that we default to pandas on a buffer\n            with open(pytest.csvs_names[\"test_read_csv_regular\"], \"r\") as _f:\n                pd.read_csv(StringIO(_f.read()))\n\n    def test_read_csv_url(self):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"https://raw.githubusercontent.com/modin-project/modin/main/modin/tests/pandas/data/blah.csv\",\n        )\n\n    @pytest.mark.parametrize(\"nrows\", [21, 5, None])\n    @pytest.mark.parametrize(\"skiprows\", [4, 1, 500, None])\n    def test_read_csv_newlines_in_quotes(self, nrows, skiprows):\n        expected_exception = None\n        if skiprows == 500:\n            expected_exception = pandas.errors.EmptyDataError(\n                \"No columns to parse from file\"\n            )\n        eval_io(\n            fn_name=\"read_csv\",\n            expected_exception=expected_exception,\n            # read_csv kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/newlines.csv\",\n            nrows=nrows,\n            skiprows=skiprows,\n            cast_to_str=True,\n        )\n\n    @pytest.mark.parametrize(\"skiprows\", [None, 0, [], [1, 2], np.arange(0, 2)])\n    def test_read_csv_skiprows_with_usecols(self, skiprows):\n        usecols = {\"float_data\": \"float64\"}\n        expected_exception = None\n        if isinstance(skiprows, np.ndarray):\n            expected_exception = ValueError(\n                \"Usecols do not match columns, columns expected but not found: ['float_data']\"\n            )\n        eval_io(\n            fn_name=\"read_csv\",\n            expected_exception=expected_exception,\n            # read_csv kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/issue_4543.csv\",\n            skiprows=skiprows,\n            usecols=usecols.keys(),\n            dtype=usecols,\n        )\n\n    def test_read_csv_sep_none(self):\n        eval_io(\n            fn_name=\"read_csv\",\n            modin_warning=ParserWarning,\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_regular\"],\n            sep=None,\n        )\n\n    def test_read_csv_incorrect_data(self):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/test_categories.json\",\n        )\n\n    @pytest.mark.parametrize(\n        \"kwargs\",\n        [\n            {\"names\": [5, 1, 3, 4, 2, 6]},\n            {\"names\": [0]},\n            {\"names\": None, \"usecols\": [1, 0, 2]},\n            {\"names\": [3, 1, 2, 5], \"usecols\": [4, 1, 3, 2]},\n        ],\n    )\n    def test_read_csv_names_neq_num_cols(self, kwargs):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/issue_2074.csv\",\n            **kwargs,\n        )\n\n    def test_read_csv_wrong_path(self):\n        expected_exception = FileNotFoundError(2, \"No such file or directory\")\n        eval_io(\n            fn_name=\"read_csv\",\n            expected_exception=expected_exception,\n            # read_csv kwargs\n            filepath_or_buffer=\"/some/wrong/path.csv\",\n        )\n\n    @pytest.mark.parametrize(\"extension\", [None, \"csv\", \"csv.gz\"])\n    @pytest.mark.parametrize(\"sep\", [\" \"])\n    @pytest.mark.parametrize(\"header\", [False, True, \"sfx-\"])\n    @pytest.mark.parametrize(\"mode\", [\"w\", \"wb+\"])\n    @pytest.mark.parametrize(\"idx_name\", [None, \"Index\"])\n    @pytest.mark.parametrize(\"index\", [True, False, \"New index\"])\n    @pytest.mark.parametrize(\"index_label\", [None, False, \"New index\"])\n    @pytest.mark.parametrize(\"columns\", [None, [\"col1\", \"col3\", \"col5\"]])\n    @pytest.mark.exclude_in_sanity\n    @pytest.mark.skipif(\n        condition=Engine.get() == \"Unidist\" and os.name == \"nt\",\n        reason=\"https://github.com/modin-project/modin/issues/6846\",\n    )\n    def test_to_csv(\n        self,\n        tmp_path,\n        extension,\n        sep,\n        header,\n        mode,\n        idx_name,\n        index,\n        index_label,\n        columns,\n    ):\n        pandas_df = generate_dataframe(idx_name=idx_name)\n        modin_df = pd.DataFrame(pandas_df)\n\n        if isinstance(header, str):\n            if columns is None:\n                header = [f\"{header}{c}\" for c in modin_df.columns]\n            else:\n                header = [f\"{header}{c}\" for c in columns]\n\n        eval_to_csv_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            extension=extension,\n            sep=sep,\n            header=header,\n            mode=mode,\n            index=index,\n            index_label=index_label,\n            columns=columns,\n        )\n\n    @pytest.mark.skipif(\n        condition=Engine.get() == \"Unidist\" and os.name == \"nt\",\n        reason=\"https://github.com/modin-project/modin/issues/6846\",\n    )\n    def test_dataframe_to_csv(self, tmp_path):\n        pandas_df = pandas.read_csv(pytest.csvs_names[\"test_read_csv_regular\"])\n        modin_df = pd.DataFrame(pandas_df)\n        eval_to_csv_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            extension=\"csv\",\n        )\n\n    @pytest.mark.skipif(\n        condition=Engine.get() == \"Unidist\" and os.name == \"nt\",\n        reason=\"https://github.com/modin-project/modin/issues/6846\",\n    )\n    def test_series_to_csv(self, tmp_path):\n        pandas_s = pandas.read_csv(\n            pytest.csvs_names[\"test_read_csv_regular\"], usecols=[\"col1\"]\n        ).squeeze()\n        modin_s = pd.Series(pandas_s)\n        eval_to_csv_file(\n            tmp_path,\n            modin_obj=modin_s,\n            pandas_obj=pandas_s,\n            extension=\"csv\",\n        )\n\n    def test_read_csv_within_decorator(self):\n        @dummy_decorator()\n        def wrapped_read_csv(file, method):\n            if method == \"pandas\":\n                return pandas.read_csv(file)\n\n            if method == \"modin\":\n                return pd.read_csv(file)\n\n        pandas_df = wrapped_read_csv(\n            pytest.csvs_names[\"test_read_csv_regular\"], method=\"pandas\"\n        )\n        modin_df = wrapped_read_csv(\n            pytest.csvs_names[\"test_read_csv_regular\"], method=\"modin\"\n        )\n\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.parametrize(\n        \"read_mode\",\n        [\n            \"r\",\n            \"rb\",\n        ],\n    )\n    @pytest.mark.parametrize(\"buffer_start_pos\", [0, 10])\n    @pytest.mark.parametrize(\"set_async_read_mode\", [False, True], indirect=True)\n    def test_read_csv_file_handle(\n        self, read_mode, make_csv_file, buffer_start_pos, set_async_read_mode\n    ):\n        unique_filename = make_csv_file()\n        with open(unique_filename, mode=read_mode) as buffer:\n            buffer.seek(buffer_start_pos)\n            pandas_df = pandas.read_csv(buffer)\n            buffer.seek(buffer_start_pos)\n            modin_df = pd.read_csv(buffer)\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.skipif(\n        current_execution_is_native(),\n        reason=\"no partitions\",\n    )\n    def test_unnamed_index(self):\n        def get_internal_df(df):\n            partition = read_df._query_compiler._modin_frame._partitions[0][0]\n            return partition.to_pandas()\n\n        path = \"modin/tests/pandas/data/issue_3119.csv\"\n        read_df = pd.read_csv(path, index_col=0)\n        assert get_internal_df(read_df).index.name is None\n        read_df = pd.read_csv(path, index_col=[0, 1])\n        for name1, name2 in zip(get_internal_df(read_df).index.names, [None, \"a\"]):\n            assert name1 == name2\n\n    def test_read_csv_empty_frame(self):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_regular\"],\n            usecols=[\"col1\"],\n            index_col=\"col1\",\n        )\n\n    @pytest.mark.parametrize(\n        \"skiprows\",\n        [\n            [x for x in range(10)],\n            [x + 5 for x in range(15)],\n            [x for x in range(10) if x % 2 == 0],\n            [x + 5 for x in range(15) if x % 2 == 0],\n            lambda x: x % 2,\n            lambda x: x > 20,\n            lambda x: x < 20,\n            lambda x: True,\n            lambda x: x in [10, 20],\n            lambda x: x << 10,\n        ],\n    )\n    @pytest.mark.parametrize(\"header\", [\"infer\", None, 0, 1, 150])\n    def test_read_csv_skiprows_corner_cases(self, skiprows, header):\n        eval_io(\n            fn_name=\"read_csv\",\n            check_kwargs_callable=not callable(skiprows),\n            # read_csv kwargs\n            filepath_or_buffer=pytest.csvs_names[\"test_read_csv_regular\"],\n            skiprows=skiprows,\n            header=header,\n            dtype=\"str\",  # to avoid issues with heterogeneous data\n            # FIXME: https://github.com/modin-project/modin/issues/7035\n            expected_exception=False,\n        )\n\n    def test_to_csv_with_index(self, tmp_path):\n        cols = 100\n        arows = 20000\n        keyrange = 100\n        values = np.vstack(\n            [\n                np.random.choice(keyrange, size=(arows)),\n                np.random.normal(size=(cols, arows)),\n            ]\n        ).transpose()\n        modin_df = pd.DataFrame(\n            values,\n            columns=[\"key\"] + [\"avalue\" + str(i) for i in range(1, 1 + cols)],\n        ).set_index(\"key\")\n        pandas_df = pandas.DataFrame(\n            values,\n            columns=[\"key\"] + [\"avalue\" + str(i) for i in range(1, 1 + cols)],\n        ).set_index(\"key\")\n        eval_to_csv_file(tmp_path, modin_df, pandas_df, \"csv\")\n\n    @pytest.mark.parametrize(\"set_async_read_mode\", [False, True], indirect=True)\n    def test_read_csv_issue_5150(self, set_async_read_mode):\n        with ensure_clean(\".csv\") as unique_filename:\n            pandas_df = pandas.DataFrame(np.random.randint(0, 100, size=(2**6, 2**6)))\n            pandas_df.to_csv(unique_filename, index=False)\n            expected_pandas_df = pandas.read_csv(unique_filename, index_col=False)\n            modin_df = pd.read_csv(unique_filename, index_col=False)\n            actual_pandas_df = modin_df._to_pandas()\n            if AsyncReadMode.get():\n                # If read operations are asynchronous, then the dataframes\n                # check should be inside `ensure_clean` context\n                # because the file may be deleted before actual reading starts\n                df_equals(expected_pandas_df, actual_pandas_df)\n        if not AsyncReadMode.get():\n            df_equals(expected_pandas_df, actual_pandas_df)\n\n    @pytest.mark.parametrize(\"usecols\", [None, [0, 1, 2, 3, 4]])\n    def test_read_csv_1930(self, usecols):\n        eval_io(\n            fn_name=\"read_csv\",\n            # read_csv kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/issue_1930.csv\",\n            names=[\"c1\", \"c2\", \"c3\", \"c4\", \"c5\"],\n            usecols=usecols,\n        )\n\n\ndef _check_relative_io(fn_name, unique_filename, path_arg, storage_default=()):\n    # Windows can be funny at where it searches for ~; besides, Python >= 3.8 no longer honors %HOME%\n    dirname, basename = os.path.split(unique_filename)\n    pinned_home = {envvar: dirname for envvar in (\"HOME\", \"USERPROFILE\", \"HOMEPATH\")}\n    should_default = Engine.get() == \"Python\" or StorageFormat.get() in storage_default\n    with mock.patch.dict(os.environ, pinned_home):\n        with warns_that_defaulting_to_pandas_if(should_default):\n            eval_io(\n                fn_name=fn_name,\n                **{path_arg: f\"~/{basename}\"},\n            )\n        # check that when read without $HOME patched we have equivalent results\n        eval_general(\n            f\"~/{basename}\",\n            unique_filename,\n            lambda fname: getattr(pandas, fn_name)(**{path_arg: fname}),\n        )\n\n\n# Leave this test apart from the test classes, which skip the default to pandas\n# warning check. We want to make sure we are NOT defaulting to pandas for a\n# path relative to user home.\n# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this\n# commment once we turn all default to pandas messages into errors.\ndef test_read_csv_relative_to_user_home(make_csv_file):\n    unique_filename = make_csv_file()\n    _check_relative_io(\"read_csv\", unique_filename, \"filepath_or_buffer\")\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestTable:\n    def test_read_table(self, make_csv_file):\n        unique_filename = make_csv_file(delimiter=\"\\t\")\n        eval_io(\n            fn_name=\"read_table\",\n            # read_table kwargs\n            filepath_or_buffer=unique_filename,\n        )\n\n    @pytest.mark.parametrize(\"set_async_read_mode\", [False, True], indirect=True)\n    def test_read_table_within_decorator(self, make_csv_file, set_async_read_mode):\n        @dummy_decorator()\n        def wrapped_read_table(file, method):\n            if method == \"pandas\":\n                return pandas.read_table(file)\n\n            if method == \"modin\":\n                return pd.read_table(file)\n\n        unique_filename = make_csv_file(delimiter=\"\\t\")\n\n        pandas_df = wrapped_read_table(unique_filename, method=\"pandas\")\n        modin_df = wrapped_read_table(unique_filename, method=\"modin\")\n\n        df_equals(modin_df, pandas_df)\n\n    def test_read_table_empty_frame(self, make_csv_file):\n        unique_filename = make_csv_file(delimiter=\"\\t\")\n\n        eval_io(\n            fn_name=\"read_table\",\n            # read_table kwargs\n            filepath_or_buffer=unique_filename,\n            usecols=[\"col1\"],\n            index_col=\"col1\",\n        )\n\n\n@pytest.mark.parametrize(\"engine\", [\"pyarrow\", \"fastparquet\"])\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestParquet:\n    @pytest.mark.parametrize(\"columns\", [None, [\"col1\"]])\n    @pytest.mark.parametrize(\"row_group_size\", [None, 100, 1000, 10_000])\n    @pytest.mark.parametrize(\"path_type\", [Path, str])\n    def test_read_parquet(\n        self, engine, make_parquet_file, columns, row_group_size, path_type\n    ):\n        self._test_read_parquet(\n            engine=engine,\n            make_parquet_file=make_parquet_file,\n            columns=columns,\n            filters=None,\n            row_group_size=row_group_size,\n            path_type=path_type,\n        )\n\n    def _test_read_parquet(\n        self,\n        engine,\n        make_parquet_file,\n        columns,\n        filters,\n        row_group_size,\n        path_type=str,\n        range_index_start=0,\n        range_index_step=1,\n        range_index_name=None,\n        expected_exception=None,\n    ):\n        if engine == \"pyarrow\" and filters == [] and os.name == \"nt\":\n            # pyarrow, and therefore pandas using pyarrow, errors in this case.\n            # Modin correctly replicates this behavior; however error cases\n            # cause race conditions with ensure_clean on Windows.\n            # TODO: Remove this once https://github.com/modin-project/modin/issues/6460 is fixed.\n            pytest.xfail(\n                \"Skipping empty filters error case to avoid race condition - see #6460\"\n            )\n\n        with ensure_clean(\".parquet\") as unique_filename:\n            unique_filename = path_type(unique_filename)\n            make_parquet_file(\n                filename=unique_filename,\n                row_group_size=row_group_size,\n                range_index_start=range_index_start,\n                range_index_step=range_index_step,\n                range_index_name=range_index_name,\n            )\n\n            eval_io(\n                fn_name=\"read_parquet\",\n                # read_parquet kwargs\n                engine=engine,\n                path=unique_filename,\n                columns=columns,\n                filters=filters,\n                expected_exception=expected_exception,\n            )\n\n    @pytest.mark.parametrize(\n        \"dtype_backend\", [lib.no_default, \"numpy_nullable\", \"pyarrow\"]\n    )\n    def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backend):\n        with ensure_clean(\".parquet\") as unique_filename:\n            make_parquet_file(filename=unique_filename, row_group_size=100)\n\n            def comparator(df1, df2):\n                df_equals(df1, df2)\n                df_equals(df1.dtypes, df2.dtypes)\n\n            expected_exception = None\n            if engine == \"fastparquet\":\n                expected_exception = ValueError(\n                    \"The 'dtype_backend' argument is not supported for the fastparquet engine\"\n                )\n\n            eval_io(\n                fn_name=\"read_parquet\",\n                # read_parquet kwargs\n                engine=engine,\n                path=unique_filename,\n                dtype_backend=dtype_backend,\n                comparator=comparator,\n                expected_exception=expected_exception,\n            )\n\n    # Tests issue #6778\n    def test_read_parquet_no_extension(self, engine, make_parquet_file):\n        with ensure_clean(\".parquet\") as unique_filename:\n            # Remove the .parquet extension\n            no_ext_fname = unique_filename[: unique_filename.index(\".parquet\")]\n\n            make_parquet_file(filename=no_ext_fname)\n            eval_io(\n                fn_name=\"read_parquet\",\n                # read_parquet kwargs\n                engine=engine,\n                path=no_ext_fname,\n            )\n\n    @pytest.mark.parametrize(\n        \"filters\",\n        [None, [], [(\"col1\", \"==\", 5)], [(\"col1\", \"<=\", 215), (\"col2\", \">=\", 35)]],\n    )\n    def test_read_parquet_filters(self, engine, make_parquet_file, filters):\n        expected_exception = None\n        if filters == [] and engine == \"pyarrow\":\n            expected_exception = ValueError(\"Malformed filters\")\n        self._test_read_parquet(\n            engine=engine,\n            make_parquet_file=make_parquet_file,\n            columns=None,\n            filters=filters,\n            row_group_size=100,\n            path_type=str,\n            expected_exception=expected_exception,\n        )\n\n    @pytest.mark.parametrize(\"columns\", [None, [\"col1\"]])\n    @pytest.mark.parametrize(\n        \"filters\",\n        [None, [(\"col1\", \"<=\", 1_000_000)], [(\"col1\", \"<=\", 75), (\"col2\", \">=\", 35)]],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_start\",\n        [0, 5_000],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_step\",\n        [1, 10],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_name\",\n        [None, \"my_index\"],\n    )\n    def test_read_parquet_range_index(\n        self,\n        engine,\n        make_parquet_file,\n        columns,\n        filters,\n        range_index_start,\n        range_index_step,\n        range_index_name,\n    ):\n        self._test_read_parquet(\n            engine=engine,\n            make_parquet_file=make_parquet_file,\n            columns=columns,\n            filters=filters,\n            row_group_size=100,\n            path_type=str,\n            range_index_start=range_index_start,\n            range_index_step=range_index_step,\n            range_index_name=range_index_name,\n        )\n\n    def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file):\n        if engine == \"fastparquet\" and os.name == \"nt\":\n            pytest.xfail(reason=\"https://github.com/pandas-dev/pandas/issues/51720\")\n        with ensure_clean(\".parquet\") as f1, ensure_clean(\n            \".parquet\"\n        ) as f2, ensure_clean(\".parquet\") as f3:\n            for f in [f1, f2, f3]:\n                make_parquet_file(filename=f)\n            eval_io(fn_name=\"read_parquet\", path=[f1, f2, f3], engine=engine)\n\n    def test_read_parquet_indexing_by_column(self, tmp_path, engine, make_parquet_file):\n        # Test indexing into a column of Modin with various parquet file row lengths.\n        # Specifically, tests for https://github.com/modin-project/modin/issues/3527\n        # which fails when min_partition_size < nrows < min_partition_size * (num_partitions - 1)\n\n        nrows = (\n            MinRowPartitionSize.get() + 1\n        )  # Use the minimal guaranteed failing value for nrows.\n        unique_filename = get_unique_filename(extension=\"parquet\", data_dir=tmp_path)\n        make_parquet_file(filename=unique_filename, nrows=nrows)\n\n        parquet_df = pd.read_parquet(unique_filename, engine=engine)\n        for col in parquet_df.columns:\n            parquet_df[col]\n\n    @pytest.mark.parametrize(\"columns\", [None, [\"col1\"]])\n    @pytest.mark.parametrize(\n        \"filters\",\n        [None, [(\"col1\", \"<=\", 3_215), (\"col2\", \">=\", 35)]],\n    )\n    @pytest.mark.parametrize(\"row_group_size\", [None, 100, 1000, 10_000])\n    @pytest.mark.parametrize(\n        \"rows_per_file\", [[1000] * 40, [0, 0, 40_000], [10_000, 10_000] + [100] * 200]\n    )\n    @pytest.mark.exclude_in_sanity\n    def test_read_parquet_directory(\n        self, engine, make_parquet_dir, columns, filters, row_group_size, rows_per_file\n    ):\n        self._test_read_parquet_directory(\n            engine=engine,\n            make_parquet_dir=make_parquet_dir,\n            columns=columns,\n            filters=filters,\n            range_index_start=0,\n            range_index_step=1,\n            range_index_name=None,\n            row_group_size=row_group_size,\n            rows_per_file=rows_per_file,\n        )\n\n    def _test_read_parquet_directory(\n        self,\n        engine,\n        make_parquet_dir,\n        columns,\n        filters,\n        range_index_start,\n        range_index_step,\n        range_index_name,\n        row_group_size,\n        rows_per_file,\n    ):\n        num_cols = DATASET_SIZE_DICT.get(\n            TestDatasetSize.get(), DATASET_SIZE_DICT[\"Small\"]\n        )\n        dfs_by_filename = {}\n        start_row = 0\n        for i, length in enumerate(rows_per_file):\n            end_row = start_row + length\n            df = pandas.DataFrame(\n                {f\"col{x + 1}\": np.arange(start_row, end_row) for x in range(num_cols)},\n            )\n            index = pandas.RangeIndex(\n                start=range_index_start,\n                stop=range_index_start + (length * range_index_step),\n                step=range_index_step,\n                name=range_index_name,\n            )\n            if (\n                range_index_start == 0\n                and range_index_step == 1\n                and range_index_name is None\n            ):\n                assert df.index.equals(index)\n            else:\n                df.index = index\n\n            dfs_by_filename[f\"{i}.parquet\"] = df\n            start_row = end_row\n        path = make_parquet_dir(dfs_by_filename, row_group_size)\n\n        # There are specific files that PyArrow will try to ignore by default\n        # in a parquet directory. One example are files that start with '_'. Our\n        # previous implementation tried to read all files in a parquet directory,\n        # but we now make use of PyArrow to ensure the directory is valid.\n        with open(os.path.join(path, \"_committed_file\"), \"w+\") as f:\n            f.write(\"testingtesting\")\n\n        eval_io(\n            fn_name=\"read_parquet\",\n            # read_parquet kwargs\n            engine=engine,\n            path=path,\n            columns=columns,\n            filters=filters,\n        )\n\n    @pytest.mark.parametrize(\n        \"filters\",\n        [None, [(\"col1\", \"<=\", 1_000_000)], [(\"col1\", \"<=\", 75), (\"col2\", \">=\", 35)]],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_start\",\n        [0, 5_000],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_step\",\n        [1, 10],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_name\",\n        [None, \"my_index\"],\n    )\n    @pytest.mark.parametrize(\"row_group_size\", [None, 20])\n    def test_read_parquet_directory_range_index(\n        self,\n        engine,\n        make_parquet_dir,\n        filters,\n        range_index_start,\n        range_index_step,\n        range_index_name,\n        row_group_size,\n    ):\n        self._test_read_parquet_directory(\n            engine=engine,\n            make_parquet_dir=make_parquet_dir,\n            columns=None,\n            filters=filters,\n            range_index_start=range_index_start,\n            range_index_step=range_index_step,\n            range_index_name=range_index_name,\n            row_group_size=row_group_size,\n            # We don't vary rows_per_file, but we choose a\n            # tricky option: uneven with some empty files,\n            # none divisible by the row_group_size.\n            # We use a smaller total size than in other tests\n            # to make this test run faster.\n            rows_per_file=([250] + [0] * 10 + [25] * 10),\n        )\n\n    @pytest.mark.parametrize(\n        \"filters\",\n        [None, [(\"col1\", \"<=\", 1_000_000)], [(\"col1\", \"<=\", 75), (\"col2\", \">=\", 35)]],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_start\",\n        [0, 5_000],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_step\",\n        [1, 10],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_name\",\n        [None, \"my_index\"],\n    )\n    def test_read_parquet_directory_range_index_consistent_metadata(\n        self,\n        engine,\n        filters,\n        range_index_start,\n        range_index_step,\n        range_index_name,\n        tmp_path,\n    ):\n        num_cols = DATASET_SIZE_DICT.get(\n            TestDatasetSize.get(), DATASET_SIZE_DICT[\"Small\"]\n        )\n        df = pandas.DataFrame(\n            {f\"col{x + 1}\": np.arange(0, 500) for x in range(num_cols)},\n        )\n        index = pandas.RangeIndex(\n            start=range_index_start,\n            stop=range_index_start + (len(df) * range_index_step),\n            step=range_index_step,\n            name=range_index_name,\n        )\n        if (\n            range_index_start == 0\n            and range_index_step == 1\n            and range_index_name is None\n        ):\n            assert df.index.equals(index)\n        else:\n            df.index = index\n\n        path = get_unique_filename(extension=None, data_dir=tmp_path)\n\n        table = pa.Table.from_pandas(df)\n        pyarrow.dataset.write_dataset(\n            table,\n            path,\n            format=\"parquet\",\n            max_rows_per_group=35,\n            max_rows_per_file=100,\n        )\n\n        # There are specific files that PyArrow will try to ignore by default\n        # in a parquet directory. One example are files that start with '_'. Our\n        # previous implementation tried to read all files in a parquet directory,\n        # but we now make use of PyArrow to ensure the directory is valid.\n        with open(os.path.join(path, \"_committed_file\"), \"w+\") as f:\n            f.write(\"testingtesting\")\n\n        eval_io(\n            fn_name=\"read_parquet\",\n            # read_parquet kwargs\n            engine=engine,\n            path=path,\n            filters=filters,\n        )\n\n    @pytest.mark.parametrize(\"columns\", [None, [\"col1\"]])\n    @pytest.mark.parametrize(\n        \"filters\",\n        [None, [], [(\"col1\", \"==\", 5)], [(\"col1\", \"<=\", 215), (\"col2\", \">=\", 35)]],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_start\",\n        [0, 5_000],\n    )\n    @pytest.mark.parametrize(\n        \"range_index_step\",\n        [1, 10],\n    )\n    def test_read_parquet_partitioned_directory(\n        self,\n        tmp_path,\n        make_parquet_file,\n        columns,\n        filters,\n        range_index_start,\n        range_index_step,\n        engine,\n    ):\n        unique_filename = get_unique_filename(extension=None, data_dir=tmp_path)\n        make_parquet_file(\n            filename=unique_filename,\n            partitioned_columns=[\"col1\"],\n            range_index_start=range_index_start,\n            range_index_step=range_index_step,\n            range_index_name=\"my_index\",\n        )\n\n        expected_exception = None\n        if filters == [] and engine == \"pyarrow\":\n            expected_exception = ValueError(\"Malformed filters\")\n        eval_io(\n            fn_name=\"read_parquet\",\n            # read_parquet kwargs\n            engine=engine,\n            path=unique_filename,\n            columns=columns,\n            filters=filters,\n            expected_exception=expected_exception,\n        )\n\n    @pytest.mark.parametrize(\n        \"filters\",\n        [\n            None,\n            [],\n            [(\"B\", \"==\", \"a\")],\n            [\n                (\"B\", \"==\", \"a\"),\n                (\"A\", \">=\", 50_000),\n                (\"idx\", \"<=\", 30_000),\n                (\"idx_categorical\", \"==\", \"y\"),\n            ],\n        ],\n    )\n    def test_read_parquet_pandas_index(self, engine, filters):\n        if (\n            version.parse(pa.__version__) >= version.parse(\"12.0.0\")\n            and version.parse(pd.__version__) < version.parse(\"2.0.0\")\n            and engine == \"pyarrow\"\n        ):\n            pytest.xfail(\"incompatible versions; see #6072\")\n        # Ensure modin can read parquet files written by pandas with a non-RangeIndex object\n        pandas_df = pandas.DataFrame(\n            {\n                \"idx\": np.random.randint(0, 100_000, size=2000),\n                \"idx_categorical\": pandas.Categorical([\"y\", \"z\"] * 1000),\n                # Can't do interval index right now because of this bug fix that is planned\n                # to be apart of the pandas 1.5.0 release: https://github.com/pandas-dev/pandas/pull/46034\n                # \"idx_interval\": pandas.interval_range(start=0, end=2000),\n                \"idx_periodrange\": pandas.period_range(\n                    start=\"2017-01-01\", periods=2000\n                ),\n                \"A\": np.random.randint(0, 100_000, size=2000),\n                \"B\": [\"a\", \"b\"] * 1000,\n                \"C\": [\"c\"] * 2000,\n            }\n        )\n        # Older versions of pyarrow do not support Arrow to Parquet\n        # schema conversion for duration[ns]\n        # https://issues.apache.org/jira/browse/ARROW-6780\n        if version.parse(pa.__version__) >= version.parse(\"8.0.0\"):\n            pandas_df[\"idx_timedelta\"] = pandas.timedelta_range(\n                start=\"1 day\", periods=2000\n            )\n\n        # There is a non-deterministic bug in the fastparquet engine when we\n        # try to set the index to the datetime column. Please see:\n        # https://github.com/dask/fastparquet/issues/796\n        if engine == \"pyarrow\":\n            pandas_df[\"idx_datetime\"] = pandas.date_range(\n                start=\"1/1/2018\", periods=2000\n            )\n\n        for col in pandas_df.columns:\n            if col.startswith(\"idx\"):\n                # Before this commit, first released in version 2023.1.0, fastparquet relied\n                # on pandas private APIs to handle Categorical indices.\n                # These private APIs broke in pandas 2.\n                # https://github.com/dask/fastparquet/commit/cf60ae0e9a9ca57afc7a8da98d8c0423db1c0c53\n                if (\n                    col == \"idx_categorical\"\n                    and engine == \"fastparquet\"\n                    and version.parse(fastparquet.__version__)\n                    < version.parse(\"2023.1.0\")\n                ):\n                    continue\n\n                with ensure_clean(\".parquet\") as unique_filename:\n                    pandas_df.set_index(col).to_parquet(unique_filename)\n                    # read the same parquet using modin.pandas\n                    eval_io(\n                        \"read_parquet\",\n                        # read_parquet kwargs\n                        path=unique_filename,\n                        engine=engine,\n                        filters=filters,\n                    )\n\n        with ensure_clean(\".parquet\") as unique_filename:\n            pandas_df.set_index([\"idx\", \"A\"]).to_parquet(unique_filename)\n            eval_io(\n                \"read_parquet\",\n                # read_parquet kwargs\n                path=unique_filename,\n                engine=engine,\n                filters=filters,\n            )\n\n    @pytest.mark.parametrize(\n        \"filters\",\n        [\n            None,\n            [],\n            [(\"B\", \"==\", \"a\")],\n            [(\"B\", \"==\", \"a\"), (\"A\", \">=\", 5), (\"idx\", \"<=\", 30_000)],\n        ],\n    )\n    def test_read_parquet_pandas_index_partitioned(self, tmp_path, engine, filters):\n        # Ensure modin can read parquet files written by pandas with a non-RangeIndex object\n        pandas_df = pandas.DataFrame(\n            {\n                \"idx\": np.random.randint(0, 100_000, size=2000),\n                \"A\": np.random.randint(0, 10, size=2000),\n                \"B\": [\"a\", \"b\"] * 1000,\n                \"C\": [\"c\"] * 2000,\n            }\n        )\n        unique_filename = get_unique_filename(extension=\"parquet\", data_dir=tmp_path)\n        pandas_df.set_index(\"idx\").to_parquet(unique_filename, partition_cols=[\"A\"])\n        expected_exception = None\n        if filters == [] and engine == \"pyarrow\":\n            expected_exception = ValueError(\"Malformed filters\")\n        # read the same parquet using modin.pandas\n        eval_io(\n            \"read_parquet\",\n            # read_parquet kwargs\n            path=unique_filename,\n            engine=engine,\n            filters=filters,\n            expected_exception=expected_exception,\n        )\n\n    def test_read_parquet_hdfs(self, engine):\n        eval_io(\n            fn_name=\"read_parquet\",\n            # read_parquet kwargs\n            path=\"modin/tests/pandas/data/hdfs.parquet\",\n            engine=engine,\n        )\n\n    @pytest.mark.parametrize(\n        \"path_type\",\n        [\"object\", \"directory\", \"url\"],\n    )\n    def test_read_parquet_s3(self, s3_resource, path_type, engine, s3_storage_options):\n        s3_path = \"s3://modin-test/modin-bugs/test_data.parquet\"\n        if path_type == \"object\":\n            import s3fs\n\n            fs = s3fs.S3FileSystem(\n                endpoint_url=s3_storage_options[\"client_kwargs\"][\"endpoint_url\"]\n            )\n            with fs.open(s3_path, \"rb\") as file_obj:\n                eval_io(\"read_parquet\", path=file_obj, engine=engine)\n        elif path_type == \"directory\":\n            s3_path = \"s3://modin-test/modin-bugs/test_data_dir.parquet\"\n            eval_io(\n                \"read_parquet\",\n                path=s3_path,\n                storage_options=s3_storage_options,\n                engine=engine,\n            )\n        else:\n            eval_io(\n                \"read_parquet\",\n                path=s3_path,\n                storage_options=s3_storage_options,\n                engine=engine,\n            )\n\n    @pytest.mark.parametrize(\n        \"filters\",\n        [None, [], [(\"idx\", \"<=\", 30_000)], [(\"idx\", \"<=\", 30_000), (\"A\", \">=\", 5)]],\n    )\n    def test_read_parquet_without_metadata(self, tmp_path, engine, filters):\n        \"\"\"Test that Modin can read parquet files not written by pandas.\"\"\"\n        from pyarrow import csv, parquet\n\n        parquet_fname = get_unique_filename(extension=\"parquet\", data_dir=tmp_path)\n        csv_fname = get_unique_filename(extension=\"parquet\", data_dir=tmp_path)\n        pandas_df = pandas.DataFrame(\n            {\n                \"idx\": np.random.randint(0, 100_000, size=2000),\n                \"A\": np.random.randint(0, 10, size=2000),\n                \"B\": [\"a\", \"b\"] * 1000,\n                \"C\": [\"c\"] * 2000,\n            }\n        )\n        pandas_df.to_csv(csv_fname, index=False)\n        # read into pyarrow table and write it to a parquet file\n        t = csv.read_csv(csv_fname)\n        parquet.write_table(t, parquet_fname)\n\n        expected_exception = None\n        if filters == [] and engine == \"pyarrow\":\n            expected_exception = ValueError(\"Malformed filters\")\n        eval_io(\n            \"read_parquet\",\n            # read_parquet kwargs\n            path=parquet_fname,\n            engine=engine,\n            filters=filters,\n            expected_exception=expected_exception,\n        )\n\n    def test_read_empty_parquet_file(self, tmp_path, engine):\n        test_df = pandas.DataFrame()\n        path = tmp_path / \"data\"\n        path.mkdir()\n        test_df.to_parquet(path / \"part-00000.parquet\", engine=engine)\n        eval_io(fn_name=\"read_parquet\", path=path, engine=engine)\n\n    @pytest.mark.parametrize(\n        \"compression_kwargs\",\n        [\n            pytest.param({}, id=\"no_compression_kwargs\"),\n            pytest.param({\"compression\": None}, id=\"compression=None\"),\n            pytest.param({\"compression\": \"gzip\"}, id=\"compression=gzip\"),\n            pytest.param({\"compression\": \"snappy\"}, id=\"compression=snappy\"),\n            pytest.param({\"compression\": \"brotli\"}, id=\"compression=brotli\"),\n        ],\n    )\n    @pytest.mark.parametrize(\"extension\", [\"parquet\", \".gz\", \".bz2\", \".zip\", \".xz\"])\n    def test_to_parquet(self, tmp_path, engine, compression_kwargs, extension):\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n        parquet_eval_to_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            fn=\"to_parquet\",\n            extension=extension,\n            engine=engine,\n            **compression_kwargs,\n        )\n\n    def test_to_parquet_keep_index(self, tmp_path, engine):\n        data = {\"c0\": [0, 1] * 1000, \"c1\": [2, 3] * 1000}\n        modin_df, pandas_df = create_test_dfs(data)\n        modin_df.index.name = \"foo\"\n        pandas_df.index.name = \"foo\"\n\n        parquet_eval_to_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            fn=\"to_parquet\",\n            extension=\"parquet\",\n            index=True,\n            engine=engine,\n        )\n\n    def test_to_parquet_s3(self, s3_resource, engine, s3_storage_options):\n        # use utils_test_data because it spans multiple partitions\n        modin_path = \"s3://modin-test/modin-dir/modin_df.parquet\"\n        mdf, pdf = create_test_dfs(utils_test_data[\"int_data\"])\n        pdf.to_parquet(\n            \"s3://modin-test/pandas-dir/pandas_df.parquet\",\n            engine=engine,\n            storage_options=s3_storage_options,\n        )\n        mdf.to_parquet(modin_path, engine=engine, storage_options=s3_storage_options)\n        df_equals(\n            pandas.read_parquet(\n                \"s3://modin-test/pandas-dir/pandas_df.parquet\",\n                storage_options=s3_storage_options,\n            ),\n            pd.read_parquet(modin_path, storage_options=s3_storage_options),\n        )\n        # check we're not creating local file:\n        # https://github.com/modin-project/modin/issues/5888\n        assert not os.path.isdir(modin_path)\n\n    def test_read_parquet_2462(self, tmp_path, engine):\n        test_df = pandas.DataFrame({\"col1\": [[\"ad_1\", \"ad_2\"], [\"ad_3\"]]})\n        path = tmp_path / \"data\"\n        path.mkdir()\n        test_df.to_parquet(path / \"part-00000.parquet\", engine=engine)\n        read_df = pd.read_parquet(path, engine=engine)\n        df_equals(test_df, read_df)\n\n    def test_read_parquet_5767(self, tmp_path, engine):\n        test_df = pandas.DataFrame({\"a\": [1, 2, 3, 4], \"b\": [1, 1, 2, 2]})\n        path = tmp_path / \"data\"\n        path.mkdir()\n        file_name = \"modin_issue#0000.parquet\"\n        test_df.to_parquet(path / file_name, engine=engine, partition_cols=[\"b\"])\n        read_df = pd.read_parquet(path / file_name)\n        # both Modin and pandas read column \"b\" as a category\n        df_equals(test_df, read_df.astype(\"int64\"))\n\n    @pytest.mark.parametrize(\"index\", [False, True])\n    def test_read_parquet_6855(self, tmp_path, engine, index):\n        if engine == \"fastparquet\":\n            pytest.skip(\"integer columns aren't supported\")\n        test_df = pandas.DataFrame(np.random.rand(10**2, 10))\n        path = tmp_path / \"data\"\n        path.mkdir()\n        file_name = \"issue6855.parquet\"\n        test_df.to_parquet(path / file_name, index=index, engine=engine)\n        read_df = pd.read_parquet(path / file_name, engine=engine)\n        if not index:\n            # In that case pyarrow cannot preserve index dtype\n            read_df.columns = pandas.Index(read_df.columns).astype(\"int64\").to_list()\n        df_equals(test_df, read_df)\n\n    def test_read_parquet_s3_with_column_partitioning(\n        self, s3_resource, engine, s3_storage_options\n    ):\n        # https://github.com/modin-project/modin/issues/4636\n        s3_path = \"s3://modin-test/modin-bugs/issue5159.parquet\"\n        eval_io(\n            fn_name=\"read_parquet\",\n            path=s3_path,\n            engine=engine,\n            storage_options=s3_storage_options,\n        )\n\n\n# Leave this test apart from the test classes, which skip the default to pandas\n# warning check. We want to make sure we are NOT defaulting to pandas for a\n# path relative to user home.\n# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this\n# commment once we turn all default to pandas messages into errors.\ndef test_read_parquet_relative_to_user_home(make_parquet_file):\n    with ensure_clean(\".parquet\") as unique_filename:\n        make_parquet_file(filename=unique_filename)\n        _check_relative_io(\"read_parquet\", unique_filename, \"path\")\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestJson:\n    @pytest.mark.parametrize(\"pathlike\", [False, True])\n    @pytest.mark.parametrize(\"lines\", [False, True])\n    def test_read_json(self, make_json_file, lines, pathlike):\n        unique_filename = make_json_file(lines=lines)\n        eval_io(\n            fn_name=\"read_json\",\n            # read_json kwargs\n            path_or_buf=Path(unique_filename) if pathlike else unique_filename,\n            lines=lines,\n        )\n\n    @pytest.mark.parametrize(\n        \"dtype_backend\", [lib.no_default, \"numpy_nullable\", \"pyarrow\"]\n    )\n    def test_read_json_dtype_backend(self, make_json_file, dtype_backend):\n        def comparator(df1, df2):\n            df_equals(df1, df2)\n            df_equals(df1.dtypes, df2.dtypes)\n\n        eval_io(\n            fn_name=\"read_json\",\n            # read_json kwargs\n            path_or_buf=make_json_file(lines=True),\n            lines=True,\n            dtype_backend=dtype_backend,\n            comparator=comparator,\n        )\n\n    @pytest.mark.parametrize(\n        \"storage_options_extra\",\n        [{\"anon\": False}, {\"anon\": True}, {\"key\": \"123\", \"secret\": \"123\"}],\n    )\n    def test_read_json_s3(self, s3_resource, s3_storage_options, storage_options_extra):\n        s3_path = \"s3://modin-test/modin-bugs/test_data.json\"\n        expected_exception = None\n        if \"anon\" in storage_options_extra:\n            expected_exception = PermissionError(\"Forbidden\")\n        eval_io(\n            fn_name=\"read_json\",\n            path_or_buf=s3_path,\n            lines=True,\n            orient=\"records\",\n            storage_options=s3_storage_options | storage_options_extra,\n            expected_exception=expected_exception,\n        )\n\n    def test_read_json_categories(self):\n        eval_io(\n            fn_name=\"read_json\",\n            # read_json kwargs\n            path_or_buf=\"modin/tests/pandas/data/test_categories.json\",\n            dtype={\"one\": \"int64\", \"two\": \"category\"},\n        )\n\n    def test_read_json_different_columns(self):\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            eval_io(\n                fn_name=\"read_json\",\n                # read_json kwargs\n                path_or_buf=\"modin/tests/pandas/data/test_different_columns_in_rows.json\",\n                lines=True,\n            )\n\n    @pytest.mark.parametrize(\n        \"data\",\n        [json_short_string, json_short_bytes, json_long_string, json_long_bytes],\n    )\n    def test_read_json_string_bytes(self, data):\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            modin_df = pd.read_json(data)\n        # For I/O objects we need to rewind to reuse the same object.\n        if hasattr(data, \"seek\"):\n            data.seek(0)\n        df_equals(modin_df, pandas.read_json(data))\n\n    def test_to_json(self, tmp_path):\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n        eval_to_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            fn=\"to_json\",\n            extension=\"json\",\n        )\n\n    @pytest.mark.parametrize(\n        \"read_mode\",\n        [\n            \"r\",\n            \"rb\",\n        ],\n    )\n    def test_read_json_file_handle(self, make_json_file, read_mode):\n        with open(make_json_file(), mode=read_mode) as buf:\n            df_pandas = pandas.read_json(buf)\n            buf.seek(0)\n            df_modin = pd.read_json(buf)\n            df_equals(df_pandas, df_modin)\n\n    @pytest.mark.skipif(\n        current_execution_is_native(),\n        reason=\"no partitions\",\n    )\n    def test_read_json_metadata(self, make_json_file):\n        # `lines=True` is for triggering Modin implementation,\n        # `orient=\"records\"` should be set if `lines=True`\n        df = pd.read_json(\n            make_json_file(ncols=80, lines=True), lines=True, orient=\"records\"\n        )\n        parts_width_cached = df._query_compiler._modin_frame._column_widths_cache\n        num_splits = len(df._query_compiler._modin_frame._partitions[0])\n        parts_width_actual = [\n            len(df._query_compiler._modin_frame._partitions[0][i].get().columns)\n            for i in range(num_splits)\n        ]\n\n        assert parts_width_cached == parts_width_actual\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestExcel:\n    @check_file_leaks\n    @pytest.mark.parametrize(\"pathlike\", [False, True])\n    def test_read_excel(self, pathlike, make_excel_file):\n        unique_filename = make_excel_file()\n        eval_io(\n            fn_name=\"read_excel\",\n            # read_excel kwargs\n            io=Path(unique_filename) if pathlike else unique_filename,\n        )\n\n    @check_file_leaks\n    @pytest.mark.parametrize(\"skiprows\", [2, [1, 3], lambda x: x in [0, 2]])\n    def test_read_excel_skiprows(self, skiprows, make_excel_file):\n        eval_io(\n            fn_name=\"read_excel\",\n            # read_excel kwargs\n            io=make_excel_file(),\n            skiprows=skiprows,\n            check_kwargs_callable=False,\n        )\n\n    @check_file_leaks\n    @pytest.mark.parametrize(\n        \"dtype_backend\", [lib.no_default, \"numpy_nullable\", \"pyarrow\"]\n    )\n    def test_read_excel_dtype_backend(self, make_excel_file, dtype_backend):\n        def comparator(df1, df2):\n            df_equals(df1, df2)\n            df_equals(df1.dtypes, df2.dtypes)\n\n        eval_io(\n            fn_name=\"read_excel\",\n            # read_csv kwargs\n            io=make_excel_file(),\n            dtype_backend=dtype_backend,\n            comparator=comparator,\n        )\n\n    @check_file_leaks\n    def test_read_excel_engine(self, make_excel_file):\n        eval_io(\n            fn_name=\"read_excel\",\n            modin_warning=(UserWarning if StorageFormat.get() == \"Pandas\" else None),\n            # read_excel kwargs\n            io=make_excel_file(),\n            engine=\"openpyxl\",\n        )\n\n    @check_file_leaks\n    def test_read_excel_index_col(self, make_excel_file):\n        eval_io(\n            fn_name=\"read_excel\",\n            modin_warning=(UserWarning if StorageFormat.get() == \"Pandas\" else None),\n            # read_excel kwargs\n            io=make_excel_file(),\n            index_col=0,\n        )\n\n    @check_file_leaks\n    def test_read_excel_all_sheets(self, make_excel_file):\n        unique_filename = make_excel_file()\n\n        pandas_df = pandas.read_excel(unique_filename, sheet_name=None)\n        modin_df = pd.read_excel(unique_filename, sheet_name=None)\n\n        assert isinstance(pandas_df, dict)\n        assert isinstance(modin_df, type(pandas_df))\n        assert pandas_df.keys() == modin_df.keys()\n\n        for key in pandas_df.keys():\n            df_equals(modin_df.get(key), pandas_df.get(key))\n\n    # TODO: Check pandas gh-#39250 as it was fixed\n    @pytest.mark.xfail(\n        (StorageFormat.get() == \"Pandas\" and Engine.get() != \"Python\"),\n        reason=\"pandas throws the exception. See pandas issue #39250 for more info\",\n    )\n    @check_file_leaks\n    def test_read_excel_sheetname_title(self):\n        eval_io(\n            fn_name=\"read_excel\",\n            # read_excel kwargs\n            io=\"modin/tests/pandas/data/excel_sheetname_title.xlsx\",\n            # FIXME: https://github.com/modin-project/modin/issues/7036\n            expected_exception=False,\n        )\n\n    @check_file_leaks\n    def test_excel_empty_line(self):\n        path = \"modin/tests/pandas/data/test_emptyline.xlsx\"\n        modin_df = pd.read_excel(path)\n        assert str(modin_df)\n\n    @check_file_leaks\n    def test_read_excel_empty_rows(self):\n        # Test parsing empty rows in middle of excel dataframe as NaN values\n        eval_io(\n            fn_name=\"read_excel\",\n            io=\"modin/tests/pandas/data/test_empty_rows.xlsx\",\n        )\n\n    @check_file_leaks\n    def test_read_excel_border_rows(self):\n        # Test parsing border rows as NaN values in excel dataframe\n        eval_io(\n            fn_name=\"read_excel\",\n            io=\"modin/tests/pandas/data/test_border_rows.xlsx\",\n        )\n\n    @check_file_leaks\n    def test_read_excel_every_other_nan(self):\n        # Test for reading excel dataframe with every other row as a NaN value\n        eval_io(\n            fn_name=\"read_excel\",\n            io=\"modin/tests/pandas/data/every_other_row_nan.xlsx\",\n        )\n\n    @check_file_leaks\n    def test_read_excel_header_none(self):\n        eval_io(\n            fn_name=\"read_excel\",\n            io=\"modin/tests/pandas/data/every_other_row_nan.xlsx\",\n            header=None,\n        )\n\n    @pytest.mark.parametrize(\n        \"sheet_name\",\n        [\n            \"Sheet1\",\n            \"AnotherSpecialName\",\n            \"SpecialName\",\n            \"SecondSpecialName\",\n            0,\n            1,\n            2,\n            3,\n        ],\n    )\n    @check_file_leaks\n    def test_read_excel_sheet_name(self, sheet_name):\n        eval_io(\n            fn_name=\"read_excel\",\n            # read_excel kwargs\n            io=\"modin/tests/pandas/data/modin_error_book.xlsx\",\n            sheet_name=sheet_name,\n            # https://github.com/modin-project/modin/issues/5965\n            comparator_kwargs={\"check_dtypes\": False},\n        )\n\n    def test_ExcelFile(self, make_excel_file):\n        unique_filename = make_excel_file()\n\n        modin_excel_file = pd.ExcelFile(unique_filename)\n        pandas_excel_file = pandas.ExcelFile(unique_filename)\n\n        try:\n            df_equals(modin_excel_file.parse(), pandas_excel_file.parse())\n            assert modin_excel_file.io == unique_filename\n        finally:\n            modin_excel_file.close()\n            pandas_excel_file.close()\n\n    def test_ExcelFile_bytes(self, make_excel_file):\n        unique_filename = make_excel_file()\n        with open(unique_filename, mode=\"rb\") as f:\n            content = f.read()\n\n        modin_excel_file = pd.ExcelFile(content)\n        pandas_excel_file = pandas.ExcelFile(content)\n\n        df_equals(modin_excel_file.parse(), pandas_excel_file.parse())\n\n    def test_read_excel_ExcelFile(self, make_excel_file):\n        unique_filename = make_excel_file()\n        with open(unique_filename, mode=\"rb\") as f:\n            content = f.read()\n\n        modin_excel_file = pd.ExcelFile(content)\n        pandas_excel_file = pandas.ExcelFile(content)\n\n        df_equals(pd.read_excel(modin_excel_file), pandas.read_excel(pandas_excel_file))\n\n    @pytest.mark.parametrize(\"use_bytes_io\", [False, True])\n    def test_read_excel_bytes(self, use_bytes_io, make_excel_file):\n        unique_filename = make_excel_file()\n        with open(unique_filename, mode=\"rb\") as f:\n            io_bytes = f.read()\n\n        if use_bytes_io:\n            io_bytes = BytesIO(io_bytes)\n\n        eval_io(\n            fn_name=\"read_excel\",\n            # read_excel kwargs\n            io=io_bytes,\n        )\n\n    def test_read_excel_file_handle(self, make_excel_file):\n        unique_filename = make_excel_file()\n        with open(unique_filename, mode=\"rb\") as f:\n            eval_io(\n                fn_name=\"read_excel\",\n                # read_excel kwargs\n                io=f,\n            )\n\n    @pytest.mark.xfail(strict=False, reason=\"Flaky test, defaults to pandas\")\n    def test_to_excel(self, tmp_path):\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n\n        unique_filename_modin = get_unique_filename(extension=\"xlsx\", data_dir=tmp_path)\n        unique_filename_pandas = get_unique_filename(\n            extension=\"xlsx\", data_dir=tmp_path\n        )\n\n        modin_writer = pandas.ExcelWriter(unique_filename_modin)\n        pandas_writer = pandas.ExcelWriter(unique_filename_pandas)\n\n        modin_df.to_excel(modin_writer)\n        pandas_df.to_excel(pandas_writer)\n\n        modin_writer.save()\n        pandas_writer.save()\n\n        assert assert_files_eq(unique_filename_modin, unique_filename_pandas)\n\n    @check_file_leaks\n    def test_read_excel_empty_frame(self, make_excel_file):\n        eval_io(\n            fn_name=\"read_excel\",\n            modin_warning=(UserWarning if StorageFormat.get() == \"Pandas\" else None),\n            # read_excel kwargs\n            io=make_excel_file(),\n            usecols=[0],\n            index_col=0,\n        )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestHdf:\n    @pytest.mark.parametrize(\"format\", [None, \"table\"])\n    def test_read_hdf(self, make_hdf_file, format):\n        eval_io(\n            fn_name=\"read_hdf\",\n            # read_hdf kwargs\n            path_or_buf=make_hdf_file(format=format),\n            key=\"df\",\n        )\n\n    def test_HDFStore(self, tmp_path):\n        unique_filename_modin = get_unique_filename(extension=\"hdf\", data_dir=tmp_path)\n        unique_filename_pandas = get_unique_filename(extension=\"hdf\", data_dir=tmp_path)\n\n        modin_store = pd.HDFStore(unique_filename_modin)\n        pandas_store = pandas.HDFStore(unique_filename_pandas)\n\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n\n        modin_store[\"foo\"] = modin_df\n        pandas_store[\"foo\"] = pandas_df\n\n        modin_df = modin_store.get(\"foo\")\n        pandas_df = pandas_store.get(\"foo\")\n        df_equals(modin_df, pandas_df)\n\n        modin_store.close()\n        pandas_store.close()\n        modin_df = pandas.read_hdf(unique_filename_modin, key=\"foo\", mode=\"r\")\n        pandas_df = pandas.read_hdf(unique_filename_pandas, key=\"foo\", mode=\"r\")\n        df_equals(modin_df, pandas_df)\n        assert isinstance(modin_store, pd.HDFStore)\n\n        with ensure_clean(\".hdf5\") as hdf_file:\n            with pd.HDFStore(hdf_file, mode=\"w\") as store:\n                store.append(\"data/df1\", pd.DataFrame(np.random.randn(5, 5)))\n                store.append(\"data/df2\", pd.DataFrame(np.random.randn(4, 4)))\n\n            modin_df = pd.read_hdf(hdf_file, key=\"data/df1\", mode=\"r\")\n            pandas_df = pandas.read_hdf(hdf_file, key=\"data/df1\", mode=\"r\")\n        df_equals(modin_df, pandas_df)\n\n    def test_HDFStore_in_read_hdf(self):\n        with ensure_clean(\".hdf\") as filename:\n            dfin = pd.DataFrame(np.random.rand(8, 8))\n            dfin.to_hdf(filename, \"/key\")\n\n            with pd.HDFStore(filename) as h:\n                modin_df = pd.read_hdf(h, \"/key\")\n            with pandas.HDFStore(filename) as h:\n                pandas_df = pandas.read_hdf(h, \"/key\")\n        df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestSql:\n    @pytest.mark.parametrize(\"read_sql_engine\", [\"Pandas\", \"Connectorx\"])\n    def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):\n        filename = get_unique_filename(\".db\")\n        table = \"test_read_sql\"\n        conn = make_sql_connection(tmp_path / filename, table)\n        query = f\"select * from {table}\"\n\n        eval_io(\n            fn_name=\"read_sql\",\n            # read_sql kwargs\n            sql=query,\n            con=conn,\n        )\n\n        eval_io(\n            fn_name=\"read_sql\",\n            # read_sql kwargs\n            sql=query,\n            con=conn,\n            index_col=\"index\",\n        )\n\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            pd.read_sql_query(query, conn)\n\n        with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n            pd.read_sql_table(table, conn)\n\n        # Test SQLAlchemy engine\n        sqlalchemy_engine = sa.create_engine(conn)\n        eval_io(\n            fn_name=\"read_sql\",\n            # read_sql kwargs\n            sql=query,\n            con=sqlalchemy_engine,\n        )\n\n        # Test SQLAlchemy Connection\n        sqlalchemy_connection = sqlalchemy_engine.connect()\n        eval_io(\n            fn_name=\"read_sql\",\n            # read_sql kwargs\n            sql=query,\n            con=sqlalchemy_connection,\n        )\n\n        old_sql_engine = ReadSqlEngine.get()\n        ReadSqlEngine.put(read_sql_engine)\n        if ReadSqlEngine.get() == \"Connectorx\":\n            modin_df = pd.read_sql(sql=query, con=conn)\n        else:\n            modin_df = pd.read_sql(\n                sql=query, con=ModinDatabaseConnection(\"sqlalchemy\", conn)\n            )\n        ReadSqlEngine.put(old_sql_engine)\n        pandas_df = pandas.read_sql(sql=query, con=sqlalchemy_connection)\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.parametrize(\n        \"dtype_backend\", [lib.no_default, \"numpy_nullable\", \"pyarrow\"]\n    )\n    def test_read_sql_dtype_backend(self, tmp_path, make_sql_connection, dtype_backend):\n        filename = get_unique_filename(extension=\"db\")\n\n        table = \"test_read_sql_dtype_backend\"\n        conn = make_sql_connection(tmp_path / filename, table)\n        query = f\"select * from {table}\"\n\n        def comparator(df1, df2):\n            df_equals(df1, df2)\n            df_equals(df1.dtypes, df2.dtypes)\n\n        eval_io(\n            fn_name=\"read_sql\",\n            # read_sql kwargs\n            sql=query,\n            con=conn,\n            dtype_backend=dtype_backend,\n            comparator=comparator,\n        )\n\n    @pytest.mark.skipif(\n        not TestReadFromSqlServer.get(),\n        reason=\"Skip the test when the test SQL server is not set up.\",\n    )\n    def test_read_sql_from_sql_server(self):\n        table_name = \"test_1000x256\"\n        query = f\"SELECT * FROM {table_name}\"\n        sqlalchemy_connection_string = (\n            \"mssql+pymssql://sa:Strong.Pwd-123@0.0.0.0:1433/master\"\n        )\n        pandas_df_to_read = pandas.DataFrame(\n            np.arange(\n                1000 * 256,\n            ).reshape(1000, 256)\n        ).add_prefix(\"col\")\n        pandas_df_to_read.to_sql(\n            table_name, sqlalchemy_connection_string, if_exists=\"replace\"\n        )\n        modin_df = pd.read_sql(\n            query,\n            ModinDatabaseConnection(\"sqlalchemy\", sqlalchemy_connection_string),\n        )\n        pandas_df = pandas.read_sql(query, sqlalchemy_connection_string)\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.skipif(\n        not TestReadFromPostgres.get(),\n        reason=\"Skip the test when the postgres server is not set up.\",\n    )\n    def test_read_sql_from_postgres(self):\n        table_name = \"test_1000x256\"\n        query = f\"SELECT * FROM {table_name}\"\n        connection = \"postgresql://sa:Strong.Pwd-123@localhost:2345/postgres\"\n        pandas_df_to_read = pandas.DataFrame(\n            np.arange(\n                1000 * 256,\n            ).reshape(1000, 256)\n        ).add_prefix(\"col\")\n        pandas_df_to_read.to_sql(table_name, connection, if_exists=\"replace\")\n        modin_df = pd.read_sql(\n            query,\n            ModinDatabaseConnection(\"psycopg2\", connection),\n        )\n        pandas_df = pandas.read_sql(query, connection)\n        df_equals(modin_df, pandas_df)\n\n    def test_invalid_modin_database_connections(self):\n        with pytest.raises(UnsupportedDatabaseException):\n            ModinDatabaseConnection(\"unsupported_database\")\n\n    def test_read_sql_with_chunksize(self, make_sql_connection):\n        filename = get_unique_filename(extension=\"db\")\n        table = \"test_read_sql_with_chunksize\"\n        conn = make_sql_connection(filename, table)\n        query = f\"select * from {table}\"\n\n        pandas_gen = pandas.read_sql(query, conn, chunksize=10)\n        modin_gen = pd.read_sql(query, conn, chunksize=10)\n        for modin_df, pandas_df in zip(modin_gen, pandas_gen):\n            df_equals(modin_df, pandas_df)\n\n    @pytest.mark.parametrize(\"index\", [False, True])\n    @pytest.mark.parametrize(\"conn_type\", [\"str\", \"sqlalchemy\", \"sqlalchemy+connect\"])\n    def test_to_sql(self, tmp_path, make_sql_connection, index, conn_type):\n        table_name = f\"test_to_sql_{str(index)}\"\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n\n        # We do not pass the table name so the fixture won't generate a table\n        conn = make_sql_connection(tmp_path / f\"{table_name}_modin.db\")\n        if conn_type.startswith(\"sqlalchemy\"):\n            conn = sa.create_engine(conn)\n            if conn_type == \"sqlalchemy+connect\":\n                conn = conn.connect()\n        modin_df.to_sql(table_name, conn, index=index)\n        df_modin_sql = pandas.read_sql(\n            table_name, con=conn, index_col=\"index\" if index else None\n        )\n\n        # We do not pass the table name so the fixture won't generate a table\n        conn = make_sql_connection(tmp_path / f\"{table_name}_pandas.db\")\n        if conn_type.startswith(\"sqlalchemy\"):\n            conn = sa.create_engine(conn)\n            if conn_type == \"sqlalchemy+connect\":\n                conn = conn.connect()\n        pandas_df.to_sql(table_name, conn, index=index)\n        df_pandas_sql = pandas.read_sql(\n            table_name, con=conn, index_col=\"index\" if index else None\n        )\n\n        assert df_modin_sql.sort_index().equals(df_pandas_sql.sort_index())\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestHtml:\n    @pytest.mark.skipif(\n        platform.system() == \"Windows\",\n        reason=\"https://github.com/modin-project/modin/issues/7497\",\n    )\n    def test_read_html(self, make_html_file):\n        eval_io(fn_name=\"read_html\", io=make_html_file())\n\n    def test_to_html(self, tmp_path):\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n\n        eval_to_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            fn=\"to_html\",\n            extension=\"html\",\n        )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestFwf:\n    @pytest.mark.parametrize(\"pathlike\", [False, True])\n    def test_fwf_file(self, make_fwf_file, pathlike):\n        fwf_data = (\n            \"id8141  360.242940  149.910199 11950.7\\n\"\n            + \"id1594  444.953632  166.985655 11788.4\\n\"\n            + \"id1849  364.136849  183.628767 11806.2\\n\"\n            + \"id1230  413.836124  184.375703 11916.8\\n\"\n            + \"id1948  502.953953  173.237159 12468.3\\n\"\n        )\n        unique_filename = make_fwf_file(fwf_data=fwf_data)\n\n        colspecs = [(0, 6), (8, 20), (21, 33), (34, 43)]\n        df = pd.read_fwf(\n            Path(unique_filename) if pathlike else unique_filename,\n            colspecs=colspecs,\n            header=None,\n            index_col=0,\n        )\n        assert isinstance(df, pd.DataFrame)\n\n    @pytest.mark.parametrize(\n        \"kwargs\",\n        [\n            {\n                \"colspecs\": [\n                    (0, 11),\n                    (11, 15),\n                    (19, 24),\n                    (27, 32),\n                    (35, 40),\n                    (43, 48),\n                    (51, 56),\n                    (59, 64),\n                    (67, 72),\n                    (75, 80),\n                    (83, 88),\n                    (91, 96),\n                    (99, 104),\n                    (107, 112),\n                ],\n                \"names\": [\"stationID\", \"year\", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],\n                \"na_values\": [\"-9999\"],\n                \"index_col\": [\"stationID\", \"year\"],\n            },\n            {\n                \"widths\": [20, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],\n                \"names\": [\"id\", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],\n                \"index_col\": [0],\n            },\n        ],\n    )\n    def test_fwf_file_colspecs_widths(self, make_fwf_file, kwargs):\n        unique_filename = make_fwf_file()\n\n        modin_df = pd.read_fwf(unique_filename, **kwargs)\n        pandas_df = pd.read_fwf(unique_filename, **kwargs)\n\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.parametrize(\n        \"usecols\",\n        [\n            [\"a\"],\n            [\"a\", \"b\", \"d\"],\n            [0, 1, 3],\n        ],\n    )\n    def test_fwf_file_usecols(self, make_fwf_file, usecols):\n        fwf_data = (\n            \"a       b           c          d\\n\"\n            + \"id8141  360.242940  149.910199 11950.7\\n\"\n            + \"id1594  444.953632  166.985655 11788.4\\n\"\n            + \"id1849  364.136849  183.628767 11806.2\\n\"\n            + \"id1230  413.836124  184.375703 11916.8\\n\"\n            + \"id1948  502.953953  173.237159 12468.3\\n\"\n        )\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_fwf kwargs\n            filepath_or_buffer=make_fwf_file(fwf_data=fwf_data),\n            usecols=usecols,\n        )\n\n    @pytest.mark.parametrize(\n        \"dtype_backend\", [lib.no_default, \"numpy_nullable\", \"pyarrow\"]\n    )\n    def test_read_fwf_dtype_backend(self, make_fwf_file, dtype_backend):\n        unique_filename = make_fwf_file()\n\n        def comparator(df1, df2):\n            df_equals(df1, df2)\n            df_equals(df1.dtypes, df2.dtypes)\n\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_csv kwargs\n            filepath_or_buffer=unique_filename,\n            dtype_backend=dtype_backend,\n            comparator=comparator,\n        )\n\n    def test_fwf_file_chunksize(self, make_fwf_file):\n        unique_filename = make_fwf_file()\n\n        # Tests __next__ and correctness of reader as an iterator\n        rdf_reader = pd.read_fwf(unique_filename, chunksize=5)\n        pd_reader = pandas.read_fwf(unique_filename, chunksize=5)\n\n        for modin_df, pd_df in zip(rdf_reader, pd_reader):\n            df_equals(modin_df, pd_df)\n\n        # Tests that get_chunk works correctly\n        rdf_reader = pd.read_fwf(unique_filename, chunksize=1)\n        pd_reader = pandas.read_fwf(unique_filename, chunksize=1)\n\n        modin_df = rdf_reader.get_chunk(1)\n        pd_df = pd_reader.get_chunk(1)\n\n        df_equals(modin_df, pd_df)\n\n        # Tests that read works correctly\n        rdf_reader = pd.read_fwf(unique_filename, chunksize=1)\n        pd_reader = pandas.read_fwf(unique_filename, chunksize=1)\n\n        modin_df = rdf_reader.read()\n        pd_df = pd_reader.read()\n\n        df_equals(modin_df, pd_df)\n\n    @pytest.mark.parametrize(\"nrows\", [13, None])\n    def test_fwf_file_skiprows(self, make_fwf_file, nrows):\n        unique_filename = make_fwf_file()\n\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_fwf kwargs\n            filepath_or_buffer=unique_filename,\n            skiprows=2,\n            nrows=nrows,\n        )\n\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_fwf kwargs\n            filepath_or_buffer=unique_filename,\n            usecols=[0, 4, 7],\n            skiprows=[2, 5],\n            nrows=nrows,\n        )\n\n    def test_fwf_file_index_col(self, make_fwf_file):\n        fwf_data = (\n            \"a       b           c          d\\n\"\n            + \"id8141  360.242940  149.910199 11950.7\\n\"\n            + \"id1594  444.953632  166.985655 11788.4\\n\"\n            + \"id1849  364.136849  183.628767 11806.2\\n\"\n            + \"id1230  413.836124  184.375703 11916.8\\n\"\n            + \"id1948  502.953953  173.237159 12468.3\\n\"\n        )\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_fwf kwargs\n            filepath_or_buffer=make_fwf_file(fwf_data=fwf_data),\n            index_col=\"c\",\n        )\n\n    def test_fwf_file_skipfooter(self, make_fwf_file):\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_fwf kwargs\n            filepath_or_buffer=make_fwf_file(),\n            skipfooter=2,\n        )\n\n    def test_fwf_file_parse_dates(self, make_fwf_file):\n        dates = pandas.date_range(\"2000\", freq=\"h\", periods=10)\n        fwf_data = \"col1 col2        col3 col4\"\n        for i in range(10, 20):\n            fwf_data = fwf_data + \"\\n{col1}   {col2}  {col3}   {col4}\".format(\n                col1=str(i),\n                col2=str(dates[i - 10].date()),\n                col3=str(i),\n                col4=str(dates[i - 10].time()),\n            )\n        unique_filename = make_fwf_file(fwf_data=fwf_data)\n\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_fwf kwargs\n            filepath_or_buffer=unique_filename,\n            parse_dates=[[\"col2\", \"col4\"]],\n        )\n\n        eval_io(\n            fn_name=\"read_fwf\",\n            # read_fwf kwargs\n            filepath_or_buffer=unique_filename,\n            parse_dates={\"time\": [\"col2\", \"col4\"]},\n        )\n\n    @pytest.mark.parametrize(\n        \"read_mode\",\n        [\n            \"r\",\n            \"rb\",\n        ],\n    )\n    def test_read_fwf_file_handle(self, make_fwf_file, read_mode):\n        with open(make_fwf_file(), mode=read_mode) as buffer:\n            df_pandas = pandas.read_fwf(buffer)\n            buffer.seek(0)\n            df_modin = pd.read_fwf(buffer)\n            df_equals(df_modin, df_pandas)\n\n    def test_read_fwf_empty_frame(self, make_fwf_file):\n        kwargs = {\n            \"usecols\": [0],\n            \"index_col\": 0,\n        }\n        unique_filename = make_fwf_file()\n\n        modin_df = pd.read_fwf(unique_filename, **kwargs)\n        pandas_df = pandas.read_fwf(unique_filename, **kwargs)\n\n        df_equals(modin_df, pandas_df)\n\n    @pytest.mark.parametrize(\n        \"storage_options_extra\",\n        [{\"anon\": False}, {\"anon\": True}, {\"key\": \"123\", \"secret\": \"123\"}],\n    )\n    def test_read_fwf_s3(self, s3_resource, s3_storage_options, storage_options_extra):\n        expected_exception = None\n        if \"anon\" in storage_options_extra:\n            expected_exception = PermissionError(\"Forbidden\")\n        eval_io(\n            fn_name=\"read_fwf\",\n            filepath_or_buffer=\"s3://modin-test/modin-bugs/test_data.fwf\",\n            storage_options=s3_storage_options | storage_options_extra,\n            expected_exception=expected_exception,\n        )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestGbq:\n    @pytest.mark.skip(reason=\"Can not pass without GBQ access\")\n    def test_read_gbq(self):\n        # Test API, but do not supply credentials until credits can be secured.\n        with pytest.raises(\n            ValueError, match=\"Could not determine project ID and one was not supplied.\"\n        ):\n            pd.read_gbq(\"SELECT 1\")\n\n    @pytest.mark.skip(reason=\"Can not pass without GBQ access\")\n    def test_to_gbq(self):\n        modin_df, _ = create_test_dfs(TEST_DATA)\n        # Test API, but do not supply credentials until credits can be secured.\n        with pytest.raises(\n            ValueError, match=\"Could not determine project ID and one was not supplied.\"\n        ):\n            modin_df.to_gbq(\"modin.table\")\n\n    def test_read_gbq_mock(self):\n        test_args = (\"fake_query\",)\n        test_kwargs = inspect.signature(pd.read_gbq).parameters.copy()\n        test_kwargs.update(project_id=\"test_id\", dialect=\"standart\")\n        test_kwargs.pop(\"query\", None)\n        with mock.patch(\n            \"pandas.read_gbq\", return_value=pandas.DataFrame([])\n        ) as read_gbq:\n            pd.read_gbq(*test_args, **test_kwargs)\n        read_gbq.assert_called_once_with(*test_args, **test_kwargs)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestStata:\n    def test_read_stata(self, make_stata_file):\n        eval_io(\n            fn_name=\"read_stata\",\n            # read_stata kwargs\n            filepath_or_buffer=make_stata_file(),\n        )\n\n    def test_to_stata(self, tmp_path):\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n        eval_to_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            fn=\"to_stata\",\n            extension=\"stata\",\n        )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestSas:\n    def test_read_sas(self):\n        eval_io(\n            fn_name=\"read_sas\",\n            # read_sas kwargs\n            filepath_or_buffer=\"modin/tests/pandas/data/airline.sas7bdat\",\n        )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestFeather:\n    def test_read_feather(self, make_feather_file):\n        eval_io(\n            fn_name=\"read_feather\",\n            # read_feather kwargs\n            path=make_feather_file(),\n        )\n\n    @pytest.mark.parametrize(\n        \"dtype_backend\", [lib.no_default, \"numpy_nullable\", \"pyarrow\"]\n    )\n    def test_read_feather_dtype_backend(self, make_feather_file, dtype_backend):\n        def comparator(df1, df2):\n            df_equals(df1, df2)\n            df_equals(df1.dtypes, df2.dtypes)\n\n        eval_io(\n            fn_name=\"read_feather\",\n            # read_feather kwargs\n            path=make_feather_file(),\n            dtype_backend=dtype_backend,\n            comparator=comparator,\n        )\n\n    @pytest.mark.parametrize(\n        \"storage_options_extra\",\n        [{\"anon\": False}, {\"anon\": True}, {\"key\": \"123\", \"secret\": \"123\"}],\n    )\n    def test_read_feather_s3(\n        self, s3_resource, s3_storage_options, storage_options_extra\n    ):\n        expected_exception = None\n        if \"anon\" in storage_options_extra:\n            expected_exception = PermissionError(\"Forbidden\")\n        eval_io(\n            fn_name=\"read_feather\",\n            path=\"s3://modin-test/modin-bugs/test_data.feather\",\n            storage_options=s3_storage_options | storage_options_extra,\n            expected_exception=expected_exception,\n        )\n\n    def test_read_feather_path_object(self, make_feather_file):\n        eval_io(\n            fn_name=\"read_feather\",\n            path=Path(make_feather_file()),\n        )\n\n    def test_to_feather(self, tmp_path):\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n        eval_to_file(\n            tmp_path,\n            modin_obj=modin_df,\n            pandas_obj=pandas_df,\n            fn=\"to_feather\",\n            extension=\"feather\",\n        )\n\n    def test_read_feather_with_index_metadata(self, tmp_path):\n        # see: https://github.com/modin-project/modin/issues/6212\n        df = pandas.DataFrame({\"a\": [1, 2, 3]}, index=[0, 1, 2])\n        assert not isinstance(df.index, pandas.RangeIndex)\n\n        path = get_unique_filename(extension=\".feather\", data_dir=tmp_path)\n        df.to_feather(path)\n        eval_io(\n            fn_name=\"read_feather\",\n            path=path,\n        )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestClipboard:\n    @pytest.mark.skip(reason=\"No clipboard in CI\")\n    def test_read_clipboard(self):\n        setup_clipboard()\n\n        eval_io(fn_name=\"read_clipboard\")\n\n    @pytest.mark.skip(reason=\"No clipboard in CI\")\n    def test_to_clipboard(self):\n        modin_df, pandas_df = create_test_dfs(TEST_DATA)\n\n        modin_df.to_clipboard()\n        modin_as_clip = pandas.read_clipboard()\n\n        pandas_df.to_clipboard()\n        pandas_as_clip = pandas.read_clipboard()\n\n        assert modin_as_clip.equals(pandas_as_clip)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestPickle:\n    def test_read_pickle(self, make_pickle_file):\n        eval_io(\n            fn_name=\"read_pickle\",\n            # read_pickle kwargs\n            filepath_or_buffer=make_pickle_file(),\n        )\n\n    def test_to_pickle(self, tmp_path):\n        modin_df, _ = create_test_dfs(TEST_DATA)\n\n        unique_filename_modin = get_unique_filename(extension=\"pkl\", data_dir=tmp_path)\n\n        modin_df.to_pickle(unique_filename_modin)\n        recreated_modin_df = pd.read_pickle(unique_filename_modin)\n\n        df_equals(modin_df, recreated_modin_df)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestXml:\n    @pytest.mark.skipif(\n        platform.system() == \"Windows\",\n        reason=\"https://github.com/modin-project/modin/issues/7497\",\n    )\n    def test_read_xml(self):\n        # example from pandas\n        data = \"\"\"<?xml version='1.0' encoding='utf-8'?>\n<data xmlns=\"http://example.com\">\n <row>\n   <shape>square</shape>\n   <degrees>360</degrees>\n   <sides>4.0</sides>\n </row>\n <row>\n   <shape>circle</shape>\n   <degrees>360</degrees>\n   <sides/>\n </row>\n <row>\n   <shape>triangle</shape>\n   <degrees>180</degrees>\n   <sides>3.0</sides>\n </row>\n</data>\n\"\"\"\n        eval_io(\"read_xml\", path_or_buffer=data)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestOrc:\n    # It's not easy to add infrastructure for `orc` format.\n    # In case of defaulting to pandas, it's enough\n    # to check that the parameters are passed to pandas.\n    def test_read_orc(self):\n        test_args = (\"fake_path\",)\n        test_kwargs = dict(\n            columns=[\"A\"],\n            dtype_backend=lib.no_default,\n            filesystem=None,\n            fake_kwarg=\"some_pyarrow_parameter\",\n        )\n        with mock.patch(\n            \"pandas.read_orc\", return_value=pandas.DataFrame([])\n        ) as read_orc:\n            pd.read_orc(*test_args, **test_kwargs)\n        read_orc.assert_called_once_with(*test_args, **test_kwargs)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\nclass TestSpss:\n    # It's not easy to add infrastructure for `spss` format.\n    # In case of defaulting to pandas, it's enough\n    # to check that the parameters are passed to pandas.\n    def test_read_spss(self):\n        test_args = (\"fake_path\",)\n        test_kwargs = dict(\n            usecols=[\"A\"], convert_categoricals=False, dtype_backend=lib.no_default\n        )\n        with mock.patch(\n            \"pandas.read_spss\", return_value=pandas.DataFrame([])\n        ) as read_spss:\n            pd.read_spss(*test_args, **test_kwargs)\n        read_spss.assert_called_once_with(*test_args, **test_kwargs)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_json_normalize():\n    # example from pandas\n    data = [\n        {\"id\": 1, \"name\": {\"first\": \"Coleen\", \"last\": \"Volk\"}},\n        {\"name\": {\"given\": \"Mark\", \"family\": \"Regner\"}},\n        {\"id\": 2, \"name\": \"Faye Raker\"},\n    ]\n    eval_io(\"json_normalize\", data=data)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_from_arrow():\n    _, pandas_df = create_test_dfs(TEST_DATA)\n    modin_df = from_arrow(pa.Table.from_pandas(pandas_df))\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.skipif(\n    condition=Engine.get() != \"Ray\",\n    reason=\"Distributed 'from_pandas' is only available for Ray engine\",\n)\n@pytest.mark.parametrize(\"modify_config\", [{AsyncReadMode: True}], indirect=True)\ndef test_distributed_from_pandas(modify_config):\n    pandas_df = pandas.DataFrame({f\"col{i}\": np.arange(200_000) for i in range(64)})\n    modin_df = pd.DataFrame(pandas_df)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_from_spmatrix():\n    data = sparse.eye(3)\n    with pytest.warns(UserWarning, match=\"defaulting to pandas.*\"):\n        modin_df = pd.DataFrame.sparse.from_spmatrix(data)\n    pandas_df = pandas.DataFrame.sparse.from_spmatrix(data)\n    df_equals(modin_df, pandas_df)\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_to_dense():\n    data = {\"col1\": pandas.arrays.SparseArray([0, 1, 0])}\n    modin_df, pandas_df = create_test_dfs(data)\n    df_equals(modin_df.sparse.to_dense(), pandas_df.sparse.to_dense())\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_to_dict_dataframe():\n    modin_df, _ = create_test_dfs(TEST_DATA)\n    assert modin_df.to_dict() == to_pandas(modin_df).to_dict()\n\n\n@pytest.mark.parametrize(\n    \"kwargs\",\n    [\n        pytest.param({}, id=\"no_kwargs\"),\n        pytest.param({\"into\": dict}, id=\"into_dict\"),\n        pytest.param({\"into\": defaultdict(list)}, id=\"into_defaultdict\"),\n    ],\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_to_dict_series(kwargs):\n    eval_general(\n        *[df.iloc[:, 0] for df in create_test_dfs(utils_test_data[\"int_data\"])],\n        lambda df: df.to_dict(**kwargs),\n    )\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_to_latex():\n    modin_df, _ = create_test_dfs(TEST_DATA)\n    assert modin_df.to_latex() == to_pandas(modin_df).to_latex()\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\n@pytest.mark.skipif(\n    platform.system() == \"Windows\",\n    reason=\"https://github.com/modin-project/modin/issues/7497\",\n)\ndef test_to_xml():\n    # `lxml` is a required dependency for `to_xml`, but optional for Modin.\n    # For some engines we do not install it.\n    pytest.importorskip(\"lxml\")\n    modin_df, _ = create_test_dfs(TEST_DATA)\n    assert modin_df.to_xml() == to_pandas(modin_df).to_xml()\n\n\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_to_period():\n    index = pandas.DatetimeIndex(\n        pandas.date_range(\"2000\", freq=\"h\", periods=len(TEST_DATA[\"col1\"]))\n    )\n    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)\n    df_equals(modin_df.to_period(), pandas_df.to_period())\n\n\n@pytest.mark.xfail(\n    Engine.get() == \"Ray\" and version.parse(ray.__version__) <= version.parse(\"2.9.3\"),\n    reason=\"Ray-2.9.3 has a problem using pandas 2.2.0. It will be resolved in the next release of Ray.\",\n)\n@pytest.mark.skipif(\n    condition=Engine.get() != \"Ray\",\n    reason=\"Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine.\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_df_to_ray():\n    index = pandas.DatetimeIndex(\n        pandas.date_range(\"2000\", freq=\"h\", periods=len(TEST_DATA[\"col1\"]))\n    )\n    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)\n    ray_dataset = modin_df.modin.to_ray()\n    df_equals(ray_dataset.to_pandas(), pandas_df)\n\n\n@pytest.mark.xfail(\n    Engine.get() == \"Ray\" and version.parse(ray.__version__) <= version.parse(\"2.9.3\"),\n    reason=\"Ray-2.9.3 has a problem using pandas 2.2.0. It will be resolved in the next release of Ray.\",\n)\n@pytest.mark.skipif(\n    condition=Engine.get() != \"Ray\",\n    reason=\"Modin Dataframe can only be converted to a Ray Dataset if Modin uses a Ray engine.\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_series_to_ray():\n    index = pandas.DatetimeIndex(\n        pandas.date_range(\"2000\", freq=\"h\", periods=len(TEST_DATA[\"col1\"]))\n    )\n    # A Pandas DataFrame with column names of non-str types is not supported by Ray Dataset.\n    index = [str(x) for x in index]\n    pandas_df = pandas.DataFrame(TEST_DATA, index=index)\n    pandas_s = pandas_df.iloc[0]\n    modin_s = pd.Series(pandas_s)\n    ray_dataset = modin_s.modin.to_ray()\n    df_equals(ray_dataset.to_pandas().squeeze(), pandas_s)\n\n\n@pytest.mark.xfail(\n    Engine.get() == \"Ray\" and version.parse(ray.__version__) <= version.parse(\"2.9.3\"),\n    reason=\"Ray-2.9.3 has a problem using pandas 2.2.0. It will be resolved in the next release of Ray.\",\n)\n@pytest.mark.skipif(\n    condition=Engine.get() != \"Ray\",\n    reason=\"Ray Dataset can only be converted to a Modin Dataframe if Modin uses a Ray engine.\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_from_ray():\n    index = pandas.DatetimeIndex(\n        pandas.date_range(\"2000\", freq=\"h\", periods=len(TEST_DATA[\"col1\"]))\n    )\n    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)\n    ray_df = ray.data.from_pandas(pandas_df)\n    result_df = from_ray(ray_df)\n    df_equals(result_df, modin_df)\n\n\n@pytest.mark.skipif(\n    condition=Engine.get() != \"Dask\",\n    reason=\"Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine.\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_df_to_dask():\n    index = pandas.DatetimeIndex(\n        pandas.date_range(\"2000\", freq=\"h\", periods=len(TEST_DATA[\"col1\"]))\n    )\n\n    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)\n\n    dask_df = modin_df.modin.to_dask()\n    df_equals(dask_df.compute(), pandas_df)\n\n\n@pytest.mark.skipif(\n    condition=Engine.get() != \"Dask\",\n    reason=\"Modin DataFrame can only be converted to a Dask DataFrame if Modin uses a Dask engine.\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_series_to_dask():\n    modin_s, pandas_s = create_test_series(TEST_DATA[\"col1\"])\n\n    dask_series = modin_s.modin.to_dask()\n    df_equals(dask_series.compute(), pandas_s)\n\n\n@pytest.mark.skipif(\n    condition=Engine.get() != \"Dask\",\n    reason=\"Dask DataFrame can only be converted to a Modin DataFrame if Modin uses a Dask engine.\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_from_dask():\n    import dask.dataframe as dd\n\n    index = pandas.DatetimeIndex(\n        pandas.date_range(\"2000\", freq=\"h\", periods=len(TEST_DATA[\"col1\"]))\n    )\n    modin_df, pandas_df = create_test_dfs(TEST_DATA, index=index)\n\n    dask_df = dd.from_pandas(pandas_df, npartitions=NPartitions.get())\n\n    result_df = from_dask(dask_df)\n    df_equals(result_df, modin_df)\n\n\n@pytest.mark.skipif(\n    condition=Engine.get() not in (\"Ray\", \"Dask\", \"Unidist\"),\n    reason=\"Modin DataFrame can only be created from map if Modin uses Ray, Dask or MPI engine.\",\n)\n@pytest.mark.filterwarnings(default_to_pandas_ignore_string)\ndef test_from_map():\n    factor = 3\n    data = [1] * factor + [2] * factor + [3] * factor\n    expected_df = pd.DataFrame(data, index=[0, 1, 2] * factor)\n\n    def map_func(x, factor):\n        return [x] * factor\n\n    result_df = from_map(map_func, [1, 2, 3], 3)\n    df_equals(result_df, expected_df)\n"
  },
  {
    "path": "modin/tests/pandas/test_repartition.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport re\n\nimport numpy as np\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import context\nfrom modin.core.storage_formats.pandas.native_query_compiler import (\n    _NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE,\n)\nfrom modin.tests.test_utils import current_execution_is_native\nfrom modin.utils import get_current_execution\n\n\n@pytest.fixture(autouse=True)\ndef set_npartitions():\n    with context(NPartitions=4):\n        yield\n\n\n@pytest.mark.skipif(\n    current_execution_is_native(), reason=\"Native execution does not have partitions.\"\n)\n@pytest.mark.skipif(\n    get_current_execution() == \"BaseOnPython\",\n    reason=\"BaseOnPython chooses partition numbers differently\",\n)\n@pytest.mark.parametrize(\"axis\", [0, 1, None])\n@pytest.mark.parametrize(\"dtype\", [\"DataFrame\", \"Series\"])\ndef test_repartition(axis, dtype):\n    if axis in (1, None) and dtype == \"Series\":\n        # no sense for Series\n        return\n\n    df = pd.DataFrame({\"col1\": [1, 2], \"col2\": [5, 6]})\n    df2 = pd.DataFrame({\"col3\": [9, 4]})\n\n    df = pd.concat([df, df2], axis=1)\n    df = pd.concat([df, df], axis=0)\n\n    obj = df if dtype == \"DataFrame\" else df[\"col1\"]\n\n    source_shapes = {\n        \"DataFrame\": (2, 2),\n        \"Series\": (2, 1),\n    }\n    # check that the test makes sense\n    assert obj._query_compiler._modin_frame._partitions.shape == source_shapes[dtype]\n\n    kwargs = {\"axis\": axis} if dtype == \"DataFrame\" else {}\n    obj = obj._repartition(**kwargs)\n\n    if dtype == \"DataFrame\":\n        results = {\n            None: (1, 1),\n            0: (1, 2),\n            1: (2, 1),\n        }\n    else:\n        results = {\n            None: (1, 1),\n            0: (1, 1),\n            1: (2, 1),\n        }\n\n    assert obj._query_compiler._modin_frame._partitions.shape == results[axis]\n\n\n@pytest.mark.skipif(\n    current_execution_is_native(), reason=\"Native execution does not have partitions.\"\n)\ndef test_repartition_7170():\n    with context(MinColumnPartitionSize=102, NPartitions=5):\n        df = pd.DataFrame(np.random.rand(10000, 100))\n        _ = df._repartition(axis=1).to_numpy()\n\n\n@pytest.mark.skipif(\n    not current_execution_is_native(), reason=\"This is a native execution test.\"\n)\ndef test_repartition_not_valid_on_native_execution():\n    df = pd.DataFrame()\n    with pytest.raises(\n        Exception,\n        match=re.escape(_NO_REPARTITION_ON_NATIVE_EXECUTION_EXCEPTION_MESSAGE),\n    ):\n        df._repartition()\n"
  },
  {
    "path": "modin/tests/pandas/test_reshape.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport contextlib\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import StorageFormat\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\n\nfrom .utils import df_equals, test_data_values\n\n\ndef test_get_dummies():\n    s = pd.Series(list(\"abca\"))\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.get_dummies(s)\n\n    s1 = [\"a\", \"b\", np.nan]\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.get_dummies(s1)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.get_dummies(s1, dummy_na=True)\n\n    data = {\"A\": [\"a\", \"b\", \"a\"], \"B\": [\"b\", \"a\", \"c\"], \"C\": [1, 2, 3]}\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    modin_result = pd.get_dummies(modin_df, prefix=[\"col1\", \"col2\"])\n    pandas_result = pandas.get_dummies(pandas_df, prefix=[\"col1\", \"col2\"])\n    df_equals(modin_result, pandas_result)\n    assert modin_result._to_pandas().columns.equals(pandas_result.columns)\n    assert modin_result.shape == pandas_result.shape\n\n    modin_result = pd.get_dummies(pd.DataFrame(pd.Series(list(\"abcdeabac\"))))\n    pandas_result = pandas.get_dummies(\n        pandas.DataFrame(pandas.Series(list(\"abcdeabac\")))\n    )\n    df_equals(modin_result, pandas_result)\n    assert modin_result._to_pandas().columns.equals(pandas_result.columns)\n    assert modin_result.shape == pandas_result.shape\n\n    with pytest.raises(NotImplementedError):\n        pd.get_dummies(modin_df, prefix=[\"col1\", \"col2\"], sparse=True)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.get_dummies(pd.Series(list(\"abcaa\")))\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.get_dummies(pd.Series(list(\"abcaa\")), drop_first=True)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.get_dummies(pd.Series(list(\"abc\")), dtype=float)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        pd.get_dummies(1)\n\n    # test from #5184\n    pandas_df = pandas.DataFrame({\"a\": [1, 2, 3], \"b\": [4, 5, 6], \"c\": [\"7\", \"8\", \"9\"]})\n    modin_df = pd.DataFrame(pandas_df)\n    pandas_result = pandas.get_dummies(pandas_df, columns=[\"a\", \"b\"])\n    modin_result = pd.get_dummies(modin_df, columns=[\"a\", \"b\"])\n    df_equals(modin_result, pandas_result)\n\n\ndef test_melt():\n    data = test_data_values[0]\n\n    with (\n        pytest.warns(\n            UserWarning, match=r\"`melt` implementation has mismatches with pandas\"\n        )\n        if StorageFormat.get() == \"Pandas\"\n        else contextlib.nullcontext()\n    ):\n        pd.melt(pd.DataFrame(data))\n\n\ndef test_crosstab():\n    a = np.array(\n        [\"foo\", \"foo\", \"foo\", \"foo\", \"bar\", \"bar\", \"bar\", \"bar\", \"foo\", \"foo\", \"foo\"],\n        dtype=object,\n    )\n    b = np.array(\n        [\"one\", \"one\", \"one\", \"two\", \"one\", \"one\", \"one\", \"two\", \"two\", \"two\", \"one\"],\n        dtype=object,\n    )\n    c = np.array(\n        [\n            \"dull\",\n            \"dull\",\n            \"shiny\",\n            \"dull\",\n            \"dull\",\n            \"shiny\",\n            \"shiny\",\n            \"dull\",\n            \"shiny\",\n            \"shiny\",\n            \"shiny\",\n        ],\n        dtype=object,\n    )\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.crosstab(a, [b, c], rownames=[\"a\"], colnames=[\"b\", \"c\"])\n        assert isinstance(df, pd.DataFrame)\n\n    foo = pd.Categorical([\"a\", \"b\"], categories=[\"a\", \"b\", \"c\"])\n    bar = pd.Categorical([\"d\", \"e\"], categories=[\"d\", \"e\", \"f\"])\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.crosstab(foo, bar)\n        assert isinstance(df, pd.DataFrame)\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.crosstab(foo, bar, dropna=False)\n        assert isinstance(df, pd.DataFrame)\n\n\ndef test_lreshape():\n    data = pd.DataFrame(\n        {\n            \"hr1\": [514, 573],\n            \"hr2\": [545, 526],\n            \"team\": [\"Red Sox\", \"Yankees\"],\n            \"year1\": [2007, 2008],\n            \"year2\": [2008, 2008],\n        }\n    )\n\n    with warns_that_defaulting_to_pandas_if(not current_execution_is_native()):\n        df = pd.lreshape(data, {\"year\": [\"year1\", \"year2\"], \"hr\": [\"hr1\", \"hr2\"]})\n        assert isinstance(df, pd.DataFrame)\n\n    with pytest.raises(ValueError):\n        pd.lreshape(data.to_numpy(), {\"year\": [\"year1\", \"year2\"], \"hr\": [\"hr1\", \"hr2\"]})\n\n\ndef test_wide_to_long():\n    data = pd.DataFrame(\n        {\n            \"hr1\": [514, 573],\n            \"hr2\": [545, 526],\n            \"team\": [\"Red Sox\", \"Yankees\"],\n            \"year1\": [2007, 2008],\n            \"year2\": [2008, 2008],\n        }\n    )\n\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(data)\n    ):\n        df = pd.wide_to_long(data, [\"hr\", \"year\"], \"team\", \"index\")\n        assert isinstance(df, pd.DataFrame)\n\n    with pytest.raises(ValueError):\n        pd.wide_to_long(data.to_numpy(), [\"hr\", \"year\"], \"team\", \"index\")\n"
  },
  {
    "path": "modin/tests/pandas/test_rolling.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pandas._libs.lib as lib\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\n\nfrom .utils import (\n    create_test_dfs,\n    create_test_series,\n    default_to_pandas_ignore_string,\n    df_equals,\n    eval_general,\n    test_data_keys,\n    test_data_values,\n)\n\nNPartitions.put(4)\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\n# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances\n# of defaulting to pandas.\npytestmark = [\n    pytest.mark.filterwarnings(default_to_pandas_ignore_string),\n    # TO MAKE SURE ALL FUTUREWARNINGS ARE CONSIDERED\n    pytest.mark.filterwarnings(\"error::FutureWarning\"),\n    # ... except for this expected Ray warning due to https://github.com/ray-project/ray/issues/54868\n    pytest.mark.filterwarnings(\n        \"ignore:.*In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None:FutureWarning\"\n    ),\n    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT\n    pytest.mark.filterwarnings(\n        \"ignore:Support for axis=1 in DataFrame.rolling is deprecated:FutureWarning\"\n    ),\n    # FIXME: these cases inconsistent between modin and pandas\n    pytest.mark.filterwarnings(\n        \"ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning\"\n    ),\n]\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"window\", [5, 100])\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\"axis\", [lib.no_default, 1])\n@pytest.mark.parametrize(\n    \"method, kwargs\",\n    [\n        (\"count\", {}),\n        (\"sum\", {}),\n        (\"mean\", {}),\n        (\"var\", {\"ddof\": 0}),\n        (\"std\", {\"ddof\": 0}),\n        (\"min\", {}),\n        (\"max\", {}),\n        (\"skew\", {}),\n        (\"kurt\", {}),\n        (\"apply\", {\"func\": np.sum}),\n        (\"rank\", {}),\n        (\"sem\", {\"ddof\": 0}),\n        (\"quantile\", {\"q\": 0.1}),\n        (\"median\", {}),\n    ],\n)\ndef test_dataframe_rolling(data, window, min_periods, axis, method, kwargs):\n    # Testing of Rolling class\n    modin_df, pandas_df = create_test_dfs(data)\n    if window > len(pandas_df):\n        window = len(pandas_df)\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: getattr(\n            df.rolling(\n                window=window,\n                min_periods=min_periods,\n                win_type=None,\n                center=True,\n                axis=axis,\n            ),\n            method,\n        )(**kwargs),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"window\", [5, 100])\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\"axis\", [lib.no_default, 1])\ndef test_dataframe_agg(data, window, min_periods, axis):\n    modin_df, pandas_df = create_test_dfs(data)\n    if window > len(pandas_df):\n        window = len(pandas_df)\n    modin_rolled = modin_df.rolling(\n        window=window, min_periods=min_periods, win_type=None, center=True, axis=axis\n    )\n    pandas_rolled = pandas_df.rolling(\n        window=window, min_periods=min_periods, win_type=None, center=True, axis=axis\n    )\n    df_equals(pandas_rolled.aggregate(np.sum), modin_rolled.aggregate(np.sum))\n    # TODO(https://github.com/modin-project/modin/issues/4260): Once pandas\n    # allows us to rolling aggregate a list of functions over axis 1, test\n    # that, too.\n    if axis != 1:\n        df_equals(\n            pandas_rolled.aggregate([np.sum, np.mean]),\n            modin_rolled.aggregate([np.sum, np.mean]),\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"window\", [5, 100])\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\"axis\", [lib.no_default, 1])\n@pytest.mark.parametrize(\n    \"method, kwargs\",\n    [\n        (\"sum\", {}),\n        (\"mean\", {}),\n        (\"var\", {\"ddof\": 0}),\n        (\"std\", {\"ddof\": 0}),\n    ],\n)\ndef test_dataframe_window(data, window, min_periods, axis, method, kwargs):\n    # Testing of Window class\n    modin_df, pandas_df = create_test_dfs(data)\n    if window > len(pandas_df):\n        window = len(pandas_df)\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: getattr(\n            df.rolling(\n                window=window,\n                min_periods=min_periods,\n                win_type=\"triang\",\n                center=True,\n                axis=axis,\n            ),\n            method,\n        )(**kwargs),\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [lib.no_default, \"columns\"])\n@pytest.mark.parametrize(\"on\", [None, \"DateCol\"])\n@pytest.mark.parametrize(\"closed\", [\"both\", \"right\"])\n@pytest.mark.parametrize(\"window\", [3, \"3s\"])\ndef test_dataframe_dt_index(axis, on, closed, window):\n    index = pandas.date_range(\"31/12/2000\", periods=12, freq=\"min\")\n    data = {\"A\": range(12), \"B\": range(12)}\n    pandas_df = pandas.DataFrame(data, index=index)\n    modin_df = pd.DataFrame(data, index=index)\n    if on is not None and axis == lib.no_default and isinstance(window, str):\n        pandas_df[on] = pandas.date_range(\"22/06/1941\", periods=12, freq=\"min\")\n        modin_df[on] = pd.date_range(\"22/06/1941\", periods=12, freq=\"min\")\n    else:\n        on = None\n    if axis == \"columns\":\n        pandas_df = pandas_df.T\n        modin_df = modin_df.T\n    pandas_rolled = pandas_df.rolling(window=window, on=on, axis=axis, closed=closed)\n    modin_rolled = modin_df.rolling(window=window, on=on, axis=axis, closed=closed)\n    if isinstance(window, int):\n        # This functions are very slowly for data from test_rolling\n        df_equals(\n            modin_rolled.corr(modin_df, True), pandas_rolled.corr(pandas_df, True)\n        )\n        df_equals(\n            modin_rolled.corr(modin_df, False), pandas_rolled.corr(pandas_df, False)\n        )\n        df_equals(modin_rolled.cov(modin_df, True), pandas_rolled.cov(pandas_df, True))\n        df_equals(\n            modin_rolled.cov(modin_df, False), pandas_rolled.cov(pandas_df, False)\n        )\n        if axis == lib.no_default:\n            df_equals(\n                modin_rolled.cov(modin_df[modin_df.columns[0]], True),\n                pandas_rolled.cov(pandas_df[pandas_df.columns[0]], True),\n            )\n            df_equals(\n                modin_rolled.corr(modin_df[modin_df.columns[0]], True),\n                pandas_rolled.corr(pandas_df[pandas_df.columns[0]], True),\n            )\n    else:\n        df_equals(modin_rolled.count(), pandas_rolled.count())\n        df_equals(modin_rolled.skew(), pandas_rolled.skew())\n        df_equals(\n            modin_rolled.apply(np.sum, raw=True),\n            pandas_rolled.apply(np.sum, raw=True),\n        )\n        df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))\n        df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"window\", [5, 100])\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\n    \"method, kwargs\",\n    [\n        (\"count\", {}),\n        (\"sum\", {}),\n        (\"mean\", {}),\n        (\"var\", {\"ddof\": 0}),\n        (\"std\", {\"ddof\": 0}),\n        (\"min\", {}),\n        (\"max\", {}),\n        (\"skew\", {}),\n        (\"kurt\", {}),\n        (\"apply\", {\"func\": np.sum}),\n        (\"rank\", {}),\n        (\"sem\", {\"ddof\": 0}),\n        (\"aggregate\", {\"func\": np.sum}),\n        (\"agg\", {\"func\": [np.sum, np.mean]}),\n        (\"quantile\", {\"q\": 0.1}),\n        (\"median\", {}),\n    ],\n)\ndef test_series_rolling(data, window, min_periods, method, kwargs):\n    # Test of Rolling class\n    modin_series, pandas_series = create_test_series(data)\n    if window > len(pandas_series):\n        window = len(pandas_series)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: getattr(\n            series.rolling(\n                window=window,\n                min_periods=min_periods,\n                win_type=None,\n                center=True,\n            ),\n            method,\n        )(**kwargs),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"window\", [5, 100])\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\ndef test_series_corr_cov(data, window, min_periods):\n    modin_series, pandas_series = create_test_series(data)\n    if window > len(pandas_series):\n        window = len(pandas_series)\n    modin_rolled = modin_series.rolling(\n        window=window, min_periods=min_periods, win_type=None, center=True\n    )\n    pandas_rolled = pandas_series.rolling(\n        window=window, min_periods=min_periods, win_type=None, center=True\n    )\n    df_equals(modin_rolled.corr(modin_series), pandas_rolled.corr(pandas_series))\n    df_equals(\n        modin_rolled.cov(modin_series, True), pandas_rolled.cov(pandas_series, True)\n    )\n    df_equals(\n        modin_rolled.cov(modin_series, False), pandas_rolled.cov(pandas_series, False)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"window\", [5, 100])\n@pytest.mark.parametrize(\"min_periods\", [None, 5])\n@pytest.mark.parametrize(\n    \"method, kwargs\",\n    [\n        (\"sum\", {}),\n        (\"mean\", {}),\n        (\"var\", {\"ddof\": 0}),\n        (\"std\", {\"ddof\": 0}),\n    ],\n)\ndef test_series_window(data, window, min_periods, method, kwargs):\n    # Test of Window class\n    modin_series, pandas_series = create_test_series(data)\n    if window > len(pandas_series):\n        window = len(pandas_series)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: getattr(\n            series.rolling(\n                window=window,\n                min_periods=min_periods,\n                win_type=\"triang\",\n                center=True,\n            ),\n            method,\n        )(**kwargs),\n    )\n\n\n@pytest.mark.parametrize(\"closed\", [\"both\", \"right\"])\ndef test_series_dt_index(closed):\n    index = pandas.date_range(\"1/1/2000\", periods=12, freq=\"min\")\n    pandas_series = pandas.Series(range(12), index=index)\n    modin_series = pd.Series(range(12), index=index)\n\n    pandas_rolled = pandas_series.rolling(\"3s\", closed=closed)\n    modin_rolled = modin_series.rolling(\"3s\", closed=closed)\n    df_equals(modin_rolled.count(), pandas_rolled.count())\n    df_equals(modin_rolled.skew(), pandas_rolled.skew())\n    df_equals(\n        modin_rolled.apply(np.sum, raw=True), pandas_rolled.apply(np.sum, raw=True)\n    )\n    df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))\n    df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))\n\n\ndef test_api_indexer():\n    modin_df, pandas_df = create_test_dfs(test_data_values[0])\n    indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=3)\n    pandas_rolled = pandas_df.rolling(window=indexer)\n    modin_rolled = modin_df.rolling(window=indexer)\n    df_equals(modin_rolled.sum(), pandas_rolled.sum())\n\n\ndef test_issue_3512():\n    data = np.random.rand(129)\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n\n    modin_ans = modin_df[0:33].rolling(window=21).mean()\n    pandas_ans = pandas_df[0:33].rolling(window=21).mean()\n\n    df_equals(modin_ans, pandas_ans)\n\n\n### TEST ROLLING WARNINGS ###\n\n\ndef test_rolling_axis_1_depr():\n    index = pandas.date_range(\"31/12/2000\", periods=12, freq=\"min\")\n    data = {\"A\": range(12), \"B\": range(12)}\n    modin_df = pd.DataFrame(data, index=index)\n    with pytest.warns(\n        FutureWarning,\n        match=\"Support for axis=1 in DataFrame.rolling is deprecated\",\n    ):\n        modin_df.rolling(window=3, axis=1)\n"
  },
  {
    "path": "modin/tests/pandas/test_series.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom __future__ import annotations\n\nimport datetime\nimport itertools\nimport json\nimport sys\nimport unittest.mock as mock\nimport warnings\n\nimport matplotlib\nimport numpy as np\nimport pandas\nimport pandas._libs.lib as lib\nimport pytest\nfrom numpy.testing import assert_array_equal\nfrom packaging.version import Version\nfrom pandas.core.indexing import IndexingError\nfrom pandas.errors import PerformanceWarning, SpecificationError\n\nimport modin.pandas as pd\nfrom modin.config import Engine, NPartitions, StorageFormat\nfrom modin.core.storage_formats.pandas.query_compiler_caster import (\n    _assert_casting_functions_wrap_same_implementation,\n)\nfrom modin.pandas.io import to_pandas\nfrom modin.tests.test_utils import (\n    current_execution_is_native,\n    df_or_series_using_native_execution,\n    warns_that_defaulting_to_pandas_if,\n)\nfrom modin.utils import get_current_execution, try_cast_to_pandas\n\nfrom .utils import (\n    RAND_HIGH,\n    RAND_LOW,\n    UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS,\n    CustomIntegerForAddition,\n    NonCommutativeMultiplyInteger,\n    agg_func_except_keys,\n    agg_func_except_values,\n    agg_func_keys,\n    agg_func_values,\n    arg_keys,\n    bool_arg_keys,\n    bool_arg_values,\n    categories_equals,\n    create_test_dfs,\n    create_test_series,\n    default_to_pandas_ignore_string,\n    df_equals,\n    df_equals_with_non_stable_indices,\n    encoding_types,\n    eval_general,\n    generate_multiindex,\n    int_arg_keys,\n    int_arg_values,\n    name_contains,\n    no_numeric_dfs,\n    numeric_dfs,\n    quantiles_keys,\n    quantiles_values,\n    random_state,\n    sort_if_range_partitioning,\n    string_na_rep_keys,\n    string_na_rep_values,\n    string_sep_keys,\n    string_sep_values,\n    test_data,\n    test_data_categorical_keys,\n    test_data_categorical_values,\n    test_data_diff_dtype,\n    test_data_keys,\n    test_data_large_categorical_series_keys,\n    test_data_large_categorical_series_values,\n    test_data_small_keys,\n    test_data_small_values,\n    test_data_values,\n    test_data_with_duplicates_keys,\n    test_data_with_duplicates_values,\n    test_string_data_keys,\n    test_string_data_values,\n    test_string_list_data_keys,\n    test_string_list_data_values,\n)\n\n# Our configuration in pytest.ini requires that we explicitly catch all\n# instances of defaulting to pandas, but some test modules, like this one,\n# have too many such instances.\n# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances\n# of defaulting to pandas.\npytestmark = [\n    pytest.mark.filterwarnings(default_to_pandas_ignore_string),\n    # IGNORE FUTUREWARNINGS MARKS TO CLEANUP OUTPUT\n    pytest.mark.filterwarnings(\n        \"ignore:.*bool is now deprecated and will be removed:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:first is deprecated and will be removed:FutureWarning\"\n    ),\n    pytest.mark.filterwarnings(\n        \"ignore:last is deprecated and will be removed:FutureWarning\"\n    ),\n]\n\nNPartitions.put(4)\n\n# Force matplotlib to not use any Xwindows backend.\nmatplotlib.use(\"Agg\")\n\n# Initialize the environment\npd.DataFrame()\n\n\ndef get_rop(op):\n    if op.startswith(\"__\") and op.endswith(\"__\"):\n        return \"__r\" + op[2:]\n    else:\n        return None\n\n\ndef inter_df_math_helper(\n    modin_series, pandas_series, op, comparator_kwargs=None, expected_exception=None\n):\n    inter_df_math_helper_one_side(\n        modin_series, pandas_series, op, comparator_kwargs, expected_exception\n    )\n    rop = get_rop(op)\n    if rop:\n        inter_df_math_helper_one_side(\n            modin_series, pandas_series, rop, comparator_kwargs, expected_exception\n        )\n\n\ndef inter_df_math_helper_one_side(\n    modin_series,\n    pandas_series,\n    op,\n    comparator_kwargs=None,\n    expected_exception=None,\n):\n    if comparator_kwargs is None:\n        comparator_kwargs = {}\n\n    try:\n        pandas_attr = getattr(pandas_series, op)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            _ = getattr(modin_series, op)\n        return\n    modin_attr = getattr(modin_series, op)\n\n    try:\n        pandas_result = pandas_attr(4)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            try_cast_to_pandas(modin_attr(4))  # force materialization\n    else:\n        modin_result = modin_attr(4)\n        df_equals(modin_result, pandas_result, **comparator_kwargs)\n\n    try:\n        pandas_result = pandas_attr(4.0)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            try_cast_to_pandas(modin_attr(4.0))  # force materialization\n    else:\n        modin_result = modin_attr(4.0)\n        df_equals(modin_result, pandas_result, **comparator_kwargs)\n\n    # These operations don't support non-scalar `other` or have a strange behavior in\n    # the testing environment\n    if op in [\n        \"__divmod__\",\n        \"divmod\",\n        \"rdivmod\",\n        \"floordiv\",\n        \"__floordiv__\",\n        \"rfloordiv\",\n        \"__rfloordiv__\",\n        \"mod\",\n        \"__mod__\",\n        \"rmod\",\n        \"__rmod__\",\n    ]:\n        return\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda df: (pandas_attr if isinstance(df, pandas.Series) else modin_attr)(df),\n        comparator_kwargs=comparator_kwargs,\n        expected_exception=expected_exception,\n    )\n\n    list_test = random_state.randint(RAND_LOW, RAND_HIGH, size=(modin_series.shape[0]))\n    try:\n        pandas_result = pandas_attr(list_test)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            try_cast_to_pandas(modin_attr(list_test))  # force materialization\n    else:\n        modin_result = modin_attr(list_test)\n        df_equals(modin_result, pandas_result, **comparator_kwargs)\n\n    series_test_modin = pd.Series(list_test, index=modin_series.index)\n    series_test_pandas = pandas.Series(list_test, index=pandas_series.index)\n\n    eval_general(\n        series_test_modin,\n        series_test_pandas,\n        lambda df: (pandas_attr if isinstance(df, pandas.Series) else modin_attr)(df),\n        comparator_kwargs=comparator_kwargs,\n        expected_exception=expected_exception,\n    )\n\n    # Level test\n    new_idx = pandas.MultiIndex.from_tuples(\n        [(i // 4, i // 2, i) for i in modin_series.index]\n    )\n    modin_df_multi_level = modin_series.copy()\n    modin_df_multi_level.index = new_idx\n    # When 'level' parameter is passed, modin's implementation must raise a default-to-pandas warning,\n    # here we first detect whether 'op' takes 'level' parameter at all and only then perform the warning check\n    # reasoning: https://github.com/modin-project/modin/issues/6893\n    try:\n        getattr(modin_df_multi_level, op)(modin_df_multi_level, level=1)\n    except TypeError:\n        # Operation doesn't support 'level' parameter\n        pass\n    else:\n        # Operation supports 'level' parameter, so it makes sense to check for a warning\n        with warns_that_defaulting_to_pandas_if(\n            not df_or_series_using_native_execution(modin_df_multi_level)\n        ):\n            getattr(modin_df_multi_level, op)(modin_df_multi_level, level=1)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_to_frame(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.to_frame(name=\"miao\"), pandas_series.to_frame(name=\"miao\"))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_to_list(data):\n    modin_series, pandas_series = create_test_series(data)\n    pd_res = pandas_series.to_list()\n    md_res = modin_series.to_list()\n    assert type(pd_res) is type(md_res)\n    assert np.array_equal(pd_res, md_res, equal_nan=True)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_to_json(data):\n    modin_series, pandas_series = create_test_series(data)\n    pd_res = pandas_series.to_json()\n    md_res = modin_series.to_json()\n    assert type(pd_res) is type(md_res)\n    assert pd_res == md_res\n\n\ndef test_accessing_index_element_as_property():\n    s = pd.Series([10, 20, 30], index=[\"a\", \"b\", \"c\"])\n    assert s.b == 20\n    with pytest.raises(Exception):\n        _ = s.d\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_callable_key_in_getitem(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(\n        modin_series[lambda s: s.index % 2 == 0],\n        pandas_series[lambda s: s.index % 2 == 0],\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_T(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.T, pandas_series.T)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___abs__(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.__abs__(), pandas_series.__abs__())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___add__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__add__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___and__(data, request):\n    modin_series, pandas_series = create_test_series(data)\n    expected_exception = None\n    if \"float_nan_data\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7037\n        expected_exception = False\n    inter_df_math_helper(\n        modin_series,\n        pandas_series,\n        \"__and__\",\n        # https://github.com/modin-project/modin/issues/5966\n        comparator_kwargs={\"check_dtypes\": False},\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"copy_kwargs\", ({\"copy\": True}, {\"copy\": None}, {}))\n@pytest.mark.parametrize(\n    \"get_array, get_array_name\",\n    (\n        (lambda df, copy_kwargs: df.__array__(**copy_kwargs), \"__array__\"),\n        (lambda df, copy_kwargs: np.array(df, **copy_kwargs), \"np.array\"),\n    ),\n)\ndef test___array__(data, copy_kwargs, get_array, get_array_name):\n    if (\n        get_array_name == \"np.array\"\n        and Version(np.__version__) < Version(\"2\")\n        and \"copy\" in copy_kwargs\n        and copy_kwargs[\"copy\"] is None\n    ):\n        pytest.skip(reason=\"np.array does not support copy=None before numpy 2.0\")\n    assert_array_equal(*(get_array(df, copy_kwargs) for df in create_test_series(data)))\n\n\n@pytest.mark.xfail(\n    raises=AssertionError, reason=\"https://github.com/modin-project/modin/issues/4650\"\n)\ndef test___array__copy_false_creates_view():\n    def do_in_place_update_via_copy(series):\n        array = np.array(series, copy=False)\n        array[0] += 1\n\n    eval_general(\n        *create_test_series([11]), do_in_place_update_via_copy, __inplace__=True\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___bool__(data):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.__bool__()\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.__bool__()\n    else:\n        modin_result = modin_series.__bool__()\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___contains__(request, data):\n    modin_series, pandas_series = create_test_series(data)\n\n    result = False\n    key = \"Not Exist\"\n    assert result == modin_series.__contains__(key)\n    assert result == (key in modin_series)\n\n    if \"empty_data\" not in request.node.name:\n        result = True\n        key = pandas_series.keys()[0]\n        assert result == modin_series.__contains__(key)\n        assert result == (key in modin_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___copy__(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.copy(), modin_series)\n    df_equals(modin_series.copy(), pandas_series.copy())\n    df_equals(modin_series.copy(), pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___deepcopy__(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.__deepcopy__(), modin_series)\n    df_equals(modin_series.__deepcopy__(), pandas_series.__deepcopy__())\n    df_equals(modin_series.__deepcopy__(), pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___delitem__(data):\n    modin_series, pandas_series = create_test_series(data)\n    del modin_series[modin_series.index[0]]\n    del pandas_series[pandas_series.index[0]]\n    df_equals(modin_series, pandas_series)\n\n    del modin_series[modin_series.index[-1]]\n    del pandas_series[pandas_series.index[-1]]\n    df_equals(modin_series, pandas_series)\n\n    del modin_series[modin_series.index[0]]\n    del pandas_series[pandas_series.index[0]]\n    df_equals(modin_series, pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_divmod(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"divmod\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rdivmod(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rdivmod\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___eq__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__eq__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___floordiv__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__floordiv__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___ge__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__ge__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___getitem__(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series[0], pandas_series[0])\n    df_equals(\n        modin_series[modin_series.index[-1]], pandas_series[pandas_series.index[-1]]\n    )\n    modin_series = pd.Series(list(range(1000)))\n    pandas_series = pandas.Series(list(range(1000)))\n    df_equals(modin_series[:30], pandas_series[:30])\n    df_equals(modin_series[modin_series > 500], pandas_series[pandas_series > 500])\n    df_equals(modin_series[::2], pandas_series[::2])\n    # Test getting an invalid string key\n    # FIXME: https://github.com/modin-project/modin/issues/7038\n    eval_general(\n        modin_series, pandas_series, lambda s: s[\"a\"], expected_exception=False\n    )\n    eval_general(\n        modin_series, pandas_series, lambda s: s[[\"a\"]], expected_exception=False\n    )\n\n    # Test empty series\n    df_equals(pd.Series([])[:30], pandas.Series([])[:30])\n\n\ndef test___getitem__1383():\n    # see #1383 for more details\n    data = [\"\", \"a\", \"b\", \"c\", \"a\"]\n    modin_series = pd.Series(data)\n    pandas_series = pandas.Series(data)\n    df_equals(modin_series[3:7], pandas_series[3:7])\n\n\n@pytest.mark.parametrize(\"start\", [-7, -5, -3, 0, None, 3, 5, 7])\n@pytest.mark.parametrize(\"stop\", [-7, -5, -3, 0, None, 3, 5, 7])\ndef test___getitem_edge_cases(start, stop):\n    data = [\"\", \"a\", \"b\", \"c\", \"a\"]\n    modin_series = pd.Series(data)\n    pandas_series = pandas.Series(data)\n    df_equals(modin_series[start:stop], pandas_series[start:stop])\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___gt__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__gt__\")\n\n\n@pytest.mark.parametrize(\"count_elements\", [0, 1, 10])\ndef test___int__(count_elements):\n    expected_exception = None\n    if count_elements != 1:\n        expected_exception = TypeError(\"cannot convert the series to <class 'int'>\")\n    eval_general(\n        *create_test_series([1.5] * count_elements),\n        int,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"count_elements\", [0, 1, 10])\ndef test___float__(count_elements):\n    expected_exception = None\n    if count_elements != 1:\n        expected_exception = TypeError(\"cannot convert the series to <class 'float'>\")\n    eval_general(\n        *create_test_series([1] * count_elements),\n        float,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___invert__(data, request):\n    modin_series, pandas_series = create_test_series(data)\n    expected_exception = None\n    if \"float_nan_data\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7081\n        expected_exception = False\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser.__invert__(),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___iter__(data):\n    modin_series, pandas_series = create_test_series(data)\n    for m, p in zip(modin_series.__iter__(), pandas_series.__iter__()):\n        np.testing.assert_equal(m, p)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___le__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__le__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___len__(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert len(modin_series) == len(pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___long__(data):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series[0].__long__()\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series[0].__long__()\n    else:\n        assert modin_series[0].__long__() == pandas_result\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___lt__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__lt__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___mod__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__mod__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___mul__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__mul__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___ne__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__ne__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___neg__(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda ser: ser.__neg__())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___or__(data, request):\n    modin_series, pandas_series = create_test_series(data)\n    expected_exception = None\n    if \"float_nan_data\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7081\n        expected_exception = False\n    inter_df_math_helper(\n        modin_series,\n        pandas_series,\n        \"__or__\",\n        # https://github.com/modin-project/modin/issues/5966\n        comparator_kwargs={\"check_dtypes\": False},\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___pow__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__pow__\")\n\n\n@pytest.mark.parametrize(\"name\", [\"Dates\", None])\n@pytest.mark.parametrize(\n    \"dt_index\", [True, False], ids=[\"dt_index_true\", \"dt_index_false\"]\n)\n@pytest.mark.parametrize(\n    \"data\",\n    [*test_data_values, \"empty\"],\n    ids=[*test_data_keys, \"empty\"],\n)\ndef test___repr__(name, dt_index, data):\n    if data == \"empty\":\n        modin_series, pandas_series = pd.Series(), pandas.Series()\n    else:\n        modin_series, pandas_series = create_test_series(data)\n    pandas_series.name = modin_series.name = name\n    if dt_index:\n        index = pandas.date_range(\n            \"1/1/2000\", periods=len(pandas_series.index), freq=\"min\"\n        )\n        pandas_series.index = modin_series.index = index\n\n    assert repr(modin_series) == repr(pandas_series)\n\n\ndef test___repr__4186():\n    modin_series, pandas_series = create_test_series(\n        [\"a\", \"b\", \"c\", \"a\"], dtype=\"category\"\n    )\n    assert repr(modin_series) == repr(pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.exclude_in_sanity\ndef test___round__(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(round(modin_series), round(pandas_series))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.exclude_in_sanity\ndef test___setitem__(data):\n    modin_series, pandas_series = create_test_series(data)\n    for key in modin_series.keys():\n        modin_series[key] = 0\n        pandas_series[key] = 0\n        df_equals(modin_series, pandas_series)\n\n\n@pytest.mark.parametrize(\n    \"key\",\n    [\n        pytest.param(lambda idx: slice(1, 3), id=\"location_based_slice\"),\n        pytest.param(lambda idx: slice(idx[1], idx[-1]), id=\"index_based_slice\"),\n        pytest.param(lambda idx: [idx[0], idx[2], idx[-1]], id=\"list_of_labels\"),\n        pytest.param(\n            lambda idx: [True if i % 2 else False for i in range(len(idx))],\n            id=\"boolean_mask\",\n        ),\n    ],\n)\n@pytest.mark.parametrize(\n    \"index\",\n    [\n        pytest.param(\n            lambda idx_len: [chr(x) for x in range(ord(\"a\"), ord(\"a\") + idx_len)],\n            id=\"str_index\",\n        ),\n        pytest.param(lambda idx_len: list(range(1, idx_len + 1)), id=\"int_index\"),\n    ],\n)\ndef test___setitem___non_hashable(key, index):\n    data = np.arange(5)\n    index = index(len(data))\n    key = key(index)\n    md_sr, pd_sr = create_test_series(data, index=index)\n\n    md_sr[key] = 10\n    pd_sr[key] = 10\n    df_equals(md_sr, pd_sr)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___sizeof__(data):\n    modin_series, pandas_series = create_test_series(data)\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.__sizeof__()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___str__(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert str(modin_series) == str(pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___sub__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__sub__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___truediv__(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"__truediv__\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test___xor__(data, request):\n    modin_series, pandas_series = create_test_series(data)\n    expected_exception = None\n    if \"float_nan_data\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7081\n        expected_exception = False\n    inter_df_math_helper(\n        modin_series,\n        pandas_series,\n        \"__xor__\",\n        # https://github.com/modin-project/modin/issues/5966\n        comparator_kwargs={\"check_dtypes\": False},\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_abs(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.abs(), pandas_series.abs())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_add(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"add\")\n\n\ndef test_add_does_not_change_original_series_name():\n    # See https://github.com/modin-project/modin/issues/5232\n    s1 = pd.Series(1, name=1)\n    s2 = pd.Series(2, name=2)\n    original_s1 = s1.copy(deep=True)\n    original_s2 = s2.copy(deep=True)\n    _ = s1 + s2\n    df_equals(s1, original_s1)\n    df_equals(s2, original_s2)\n\n\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_add_prefix(data, axis):\n    expected_exception = None\n    if axis:\n        expected_exception = ValueError(\"No axis named 1 for object type Series\")\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.add_prefix(\"PREFIX_ADD_\", axis=axis),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_add_suffix(data, axis):\n    expected_exception = None\n    if axis:\n        expected_exception = ValueError(\"No axis named 1 for object type Series\")\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.add_suffix(\"SUFFIX_ADD_\", axis=axis),\n        expected_exception=expected_exception,\n    )\n\n\ndef test_add_custom_class():\n    # see https://github.com/modin-project/modin/issues/5236\n    # Test that we can add any object that is addable to pandas object data\n    # via \"+\".\n    eval_general(\n        *create_test_series(test_data[\"int_data\"]),\n        lambda df: df + CustomIntegerForAddition(4),\n    )\n\n\ndef test_aggregate_alias():\n    # It's optimization. If failed, Series.agg should be tested explicitly\n    _assert_casting_functions_wrap_same_implementation(\n        pd.Series.aggregate, pd.Series.agg\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"func\", agg_func_values, ids=agg_func_keys)\ndef test_aggregate(data, func, request):\n    expected_exception = None\n    if \"should raise AssertionError\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7031\n        expected_exception = False\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.aggregate(func),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"func\", agg_func_except_values, ids=agg_func_except_keys)\ndef test_aggregate_except(data, func):\n    # SpecificationError is arisen because we treat a Series as a DataFrame.\n    # See details in pandas issues 36036.\n    with pytest.raises(SpecificationError):\n        eval_general(\n            *create_test_series(data),\n            lambda df: df.aggregate(func),\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_aggregate_error_checking(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    assert pandas_series.aggregate(\"ndim\") == 1\n    assert modin_series.aggregate(\"ndim\") == 1\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.aggregate(\"cumprod\"),\n    )\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.aggregate(\"NOT_EXISTS\"),\n        expected_exception=AttributeError(\n            \"'NOT_EXISTS' is not a valid function for 'Series' object\"\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_align(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.align(modin_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_all(data, skipna):\n    eval_general(*create_test_series(data), lambda df: df.all(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_any(data, skipna):\n    eval_general(*create_test_series(data), lambda df: df.any(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_append(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    data_to_append = {\"append_a\": 2, \"append_b\": 1000}\n\n    ignore_idx_values = [True, False]\n\n    for ignore in ignore_idx_values:\n        try:\n            pandas_result = pandas_series.append(data_to_append, ignore_index=ignore)\n        except Exception as err:\n            with pytest.raises(type(err)):\n                modin_series.append(data_to_append, ignore_index=ignore)\n        else:\n            modin_result = modin_series.append(data_to_append, ignore_index=ignore)\n            df_equals(modin_result, pandas_result)\n\n    try:\n        pandas_result = pandas_series.append(pandas_series.iloc[-1])\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.append(modin_series.iloc[-1])\n    else:\n        modin_result = modin_series.append(modin_series.iloc[-1])\n        df_equals(modin_result, pandas_result)\n\n    try:\n        pandas_result = pandas_series.append([pandas_series.iloc[-1]])\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.append([modin_series.iloc[-1]])\n    else:\n        modin_result = modin_series.append([modin_series.iloc[-1]])\n        df_equals(modin_result, pandas_result)\n\n    verify_integrity_values = [True, False]\n\n    for verify_integrity in verify_integrity_values:\n        try:\n            pandas_result = pandas_series.append(\n                [pandas_series, pandas_series], verify_integrity=verify_integrity\n            )\n        except Exception as err:\n            with pytest.raises(type(err)):\n                modin_series.append(\n                    [modin_series, modin_series], verify_integrity=verify_integrity\n                )\n        else:\n            modin_result = modin_series.append(\n                [modin_series, modin_series], verify_integrity=verify_integrity\n            )\n            df_equals(modin_result, pandas_result)\n\n        try:\n            pandas_result = pandas_series.append(\n                pandas_series, verify_integrity=verify_integrity\n            )\n        except Exception as err:\n            with pytest.raises(type(err)):\n                modin_series.append(modin_series, verify_integrity=verify_integrity)\n        else:\n            modin_result = modin_series.append(\n                modin_series, verify_integrity=verify_integrity\n            )\n            df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"func\", agg_func_values, ids=agg_func_keys)\ndef test_apply(data, func, request):\n    expected_exception = None\n    if \"should raise AssertionError\" in request.node.callspec.id:\n        # FIXME: https://github.com/modin-project/modin/issues/7031\n        expected_exception = False\n    elif \"df sum\" in request.node.callspec.id:\n        _type = \"int\" if \"int_data\" in request.node.callspec.id else \"float\"\n        expected_exception = AttributeError(f\"'{_type}' object has no attribute 'sum'\")\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.apply(func),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"func\", agg_func_except_values, ids=agg_func_except_keys)\ndef test_apply_except(data, func):\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.apply(func),\n        expected_exception=pandas.errors.SpecificationError(\n            \"Function names must be unique if there is no new column names assigned\"\n        ),\n    )\n\n\ndef test_apply_external_lib():\n    json_string = \"\"\"\n    {\n        \"researcher\": {\n            \"name\": \"Ford Prefect\",\n            \"species\": \"Betelgeusian\",\n            \"relatives\": [\n                {\n                    \"name\": \"Zaphod Beeblebrox\",\n                    \"species\": \"Betelgeusian\"\n                }\n            ]\n        }\n    }\n    \"\"\"\n    modin_result = pd.DataFrame.from_dict({\"a\": [json_string]}).a.apply(json.loads)\n    pandas_result = pandas.DataFrame.from_dict({\"a\": [json_string]}).a.apply(json.loads)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"func\", [\"count\", \"all\", \"kurt\", \"array\", \"searchsorted\"])\ndef test_apply_text_func(data, func, axis):\n    func_kwargs = {}\n    if func not in (\"count\", \"searchsorted\"):\n        func_kwargs[\"axis\"] = axis\n    elif not axis:\n        # FIXME: https://github.com/modin-project/modin/issues/7000\n        return\n    rows_number = len(next(iter(data.values())))  # length of the first data column\n    level_0 = np.random.choice([0, 1, 2], rows_number)\n    level_1 = np.random.choice([3, 4, 5], rows_number)\n    index = pd.MultiIndex.from_arrays([level_0, level_1])\n\n    modin_series, pandas_series = create_test_series(data)\n    modin_series.index = index\n    pandas_series.index = index\n\n    if func == \"searchsorted\":\n        # required parameter\n        func_kwargs[\"value\"] = pandas_series[1]\n\n    eval_general(modin_series, pandas_series, lambda df: df.apply(func, **func_kwargs))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [True, False])\ndef test_argmax(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.argmax(skipna=skipna), pandas_series.argmax(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [True, False])\ndef test_argmin(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.argmin(skipna=skipna), pandas_series.argmin(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_argsort(data):\n    modin_series, pandas_series = create_test_series(data)\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_result = modin_series.argsort()\n    df_equals(modin_result, pandas_series.argsort())\n\n\ndef test_asfreq():\n    index = pd.date_range(\"1/1/2000\", periods=4, freq=\"min\")\n    series = pd.Series([0.0, None, 2.0, 3.0], index=index)\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(series)\n    ):\n        # We are only testing that this defaults to pandas, so we will just check for\n        # the warning\n        series.asfreq(freq=\"30S\")\n\n\n@pytest.mark.parametrize(\n    \"where\",\n    [\n        20,\n        30,\n        [10, 40],\n        [20, 30],\n        [20],\n        25,\n        [25, 45],\n        [25, 30],\n        pandas.Index([20, 30]),\n        pandas.Index([10]),\n    ],\n)\ndef test_asof(where):\n    # With NaN:\n    values = [1, 2, np.nan, 4]\n    index = [10, 20, 30, 40]\n    modin_series, pandas_series = (\n        pd.Series(values, index=index),\n        pandas.Series(values, index=index),\n    )\n    df_equals(modin_series.asof(where), pandas_series.asof(where))\n\n    # No NaN:\n    values = [1, 2, 7, 4]\n    modin_series, pandas_series = (\n        pd.Series(values, index=index),\n        pandas.Series(values, index=index),\n    )\n    df_equals(modin_series.asof(where), pandas_series.asof(where))\n\n\n@pytest.mark.parametrize(\n    \"where\",\n    [20, 30, [10.5, 40.5], [10], pandas.Index([20, 30]), pandas.Index([10.5])],\n)\ndef test_asof_large(where):\n    values = test_data[\"float_nan_data\"][\"col1\"]\n    index = list(range(len(values)))\n    modin_series, pandas_series = (\n        pd.Series(values, index=index),\n        pandas.Series(values, index=index),\n    )\n    df_equals(modin_series.asof(where), pandas_series.asof(where))\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        test_data[\"int_data\"],\n        test_data[\"float_nan_data\"],\n    ],\n    ids=test_data_keys,\n)\ndef test_astype(data, request):\n    modin_series, pandas_series = create_test_series(data)\n    series_name = \"test_series\"\n    modin_series.name = pandas_series.name = series_name\n\n    eval_general(modin_series, pandas_series, lambda df: df.astype(str))\n    expected_exception = None\n    if \"float_nan_data\" in request.node.callspec.id:\n        expected_exception = pd.errors.IntCastingNaNError(\n            \"Cannot convert non-finite values (NA or inf) to integer\"\n        )\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser.astype(np.int64),\n        expected_exception=expected_exception,\n    )\n    eval_general(modin_series, pandas_series, lambda ser: ser.astype(np.float64))\n    eval_general(\n        modin_series, pandas_series, lambda ser: ser.astype({series_name: str})\n    )\n    # FIXME: https://github.com/modin-project/modin/issues/7039\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser.astype({\"wrong_name\": str}),\n        expected_exception=False,\n    )\n\n    # TODO(https://github.com/modin-project/modin/issues/4317): Test passing a\n    # dict to astype() for a series with no name.\n\n\n@pytest.mark.parametrize(\"dtype\", [\"int32\", \"float32\"])\ndef test_astype_32_types(dtype):\n    # https://github.com/modin-project/modin/issues/6881\n    assert pd.Series([1, 2, 6]).astype(dtype).dtype == dtype\n\n\n@pytest.mark.parametrize(\n    \"data\", [[\"A\", \"A\", \"B\", \"B\", \"A\"], [1, 1, 2, 1, 2, 2, 3, 1, 2, 1, 2]]\n)\ndef test_astype_categorical(data):\n    modin_df, pandas_df = create_test_series(data)\n\n    modin_result = modin_df.astype(\"category\")\n    pandas_result = pandas_df.astype(\"category\")\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtype == pandas_result.dtype\n\n    dtype = pd.CategoricalDtype(categories=sorted(set(data)))\n    modin_result = modin_df.astype(dtype)\n    pandas_result = pandas_df.astype(dtype)\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtype == pandas_result.dtype\n\n\n@pytest.mark.parametrize(\"data\", [[\"a\", \"a\", \"b\", \"c\", \"c\", \"d\", \"b\", \"d\"]])\n@pytest.mark.parametrize(\n    \"set_min_row_partition_size\",\n    [2, 4],\n    ids=[\"four_row_partitions\", \"two_row_partitions\"],\n    indirect=True,\n)\ndef test_astype_categorical_issue5722(data, set_min_row_partition_size):\n    modin_series, pandas_series = create_test_series(data)\n\n    modin_result = modin_series.astype(\"category\")\n    pandas_result = pandas_series.astype(\"category\")\n    df_equals(modin_result, pandas_result)\n    assert modin_result.dtype == pandas_result.dtype\n\n    pandas_result1, pandas_result2 = pandas_result.iloc[:4], pandas_result.iloc[4:]\n    modin_result1, modin_result2 = modin_result.iloc[:4], modin_result.iloc[4:]\n\n    # check categories\n    assert pandas_result1.cat.categories.equals(pandas_result2.cat.categories)\n    assert modin_result1.cat.categories.equals(modin_result2.cat.categories)\n    assert pandas_result1.cat.categories.equals(modin_result1.cat.categories)\n    assert pandas_result2.cat.categories.equals(modin_result2.cat.categories)\n\n    # check codes\n    assert_array_equal(pandas_result1.cat.codes.values, modin_result1.cat.codes.values)\n    assert_array_equal(pandas_result2.cat.codes.values, modin_result2.cat.codes.values)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_at(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(\n        modin_series.at[modin_series.index[0]], pandas_series.at[pandas_series.index[0]]\n    )\n    df_equals(\n        modin_series.at[modin_series.index[-1]], pandas_series[pandas_series.index[-1]]\n    )\n\n\ndef test_at_time():\n    i = pd.date_range(\"2008-01-01\", periods=1000, freq=\"12H\")\n    modin_series = pd.Series(list(range(1000)), index=i)\n    pandas_series = pandas.Series(list(range(1000)), index=i)\n    df_equals(modin_series.at_time(\"12:00\"), pandas_series.at_time(\"12:00\"))\n    df_equals(modin_series.at_time(\"3:00\"), pandas_series.at_time(\"3:00\"))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"lag\", [1, 2, 3])\ndef test_autocorr(data, lag):\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.autocorr(lag=lag)\n    pandas_result = pandas_series.autocorr(lag=lag)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_axes(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.axes[0].equals(pandas_series.axes[0])\n    assert len(modin_series.axes) == len(pandas_series.axes)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_attrs(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda df: df.attrs)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_array(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda df: df.array)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_between(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    df_equals(\n        modin_series.between(1, 4),\n        pandas_series.between(1, 4),\n    )\n\n\ndef test_between_time():\n    i = pd.date_range(\"2008-01-01\", periods=1000, freq=\"12H\")\n    modin_series = pd.Series(list(range(1000)), index=i)\n    pandas_series = pandas.Series(list(range(1000)), index=i)\n    df_equals(\n        modin_series.between_time(\"12:00\", \"17:00\"),\n        pandas_series.between_time(\"12:00\", \"17:00\"),\n    )\n    df_equals(\n        modin_series.between_time(\"3:00\", \"8:00\"),\n        pandas_series.between_time(\"3:00\", \"8:00\"),\n    )\n    df_equals(\n        modin_series.between_time(\"3:00\", \"8:00\", inclusive=\"right\"),\n        pandas_series.between_time(\"3:00\", \"8:00\", inclusive=\"right\"),\n    )\n\n\ndef test_add_series_to_timedeltaindex():\n    # Make a pandas.core.indexes.timedeltas.TimedeltaIndex\n    deltas = pd.to_timedelta([1], unit=\"h\")\n    test_series = create_test_series(np.datetime64(\"2000-12-12\"))\n    eval_general(*test_series, lambda s: s + deltas)\n    eval_general(*test_series, lambda s: s - deltas)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_bfill(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.bfill(), pandas_series.bfill())\n    # inplace\n    modin_series_cp = modin_series.copy()\n    pandas_series_cp = pandas_series.copy()\n    modin_series_cp.bfill(inplace=True)\n    pandas_series_cp.bfill(inplace=True)\n    df_equals(modin_series_cp, pandas_series_cp)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_bool(data):\n    modin_series, _ = create_test_series(data)\n\n    with pytest.warns(\n        FutureWarning, match=\"bool is now deprecated and will be removed\"\n    ):\n        with pytest.raises(ValueError):\n            modin_series.bool()\n    with pytest.raises(ValueError):\n        modin_series.__bool__()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"bound_type\", [\"list\", \"series\"], ids=[\"list\", \"series\"])\ndef test_clip_scalar(request, data, bound_type):\n    modin_series, pandas_series = create_test_series(\n        data,\n    )\n\n    if name_contains(request.node.name, numeric_dfs):\n        # set bounds\n        lower, upper = np.sort(random_state.randint(RAND_LOW, RAND_HIGH, 2))\n\n        # test only upper scalar bound\n        modin_result = modin_series.clip(None, upper)\n        pandas_result = pandas_series.clip(None, upper)\n        df_equals(modin_result, pandas_result)\n\n        # test lower and upper scalar bound\n        modin_result = modin_series.clip(lower, upper)\n        pandas_result = pandas_series.clip(lower, upper)\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"bound_type\", [\"list\", \"series\"], ids=[\"list\", \"series\"])\ndef test_clip_sequence(request, data, bound_type):\n    modin_series, pandas_series = create_test_series(\n        data,\n    )\n\n    if name_contains(request.node.name, numeric_dfs):\n        lower = random_state.randint(RAND_LOW, RAND_HIGH, len(pandas_series))\n        upper = random_state.randint(RAND_LOW, RAND_HIGH, len(pandas_series))\n\n        if bound_type == \"series\":\n            modin_lower = pd.Series(lower)\n            pandas_lower = pandas.Series(lower)\n            modin_upper = pd.Series(upper)\n            pandas_upper = pandas.Series(upper)\n        else:\n            modin_lower = pandas_lower = lower\n            modin_upper = pandas_upper = upper\n\n        # test lower and upper list bound\n        modin_result = modin_series.clip(modin_lower, modin_upper, axis=0)\n        pandas_result = pandas_series.clip(pandas_lower, pandas_upper)\n        df_equals(modin_result, pandas_result)\n\n        # test only upper list bound\n        modin_result = modin_series.clip(np.nan, modin_upper, axis=0)\n        pandas_result = pandas_series.clip(np.nan, pandas_upper)\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_combine(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    modin_series2 = modin_series % (max(modin_series) // 2)\n    modin_series.combine(modin_series2, lambda s1, s2: s1 if s1 < s2 else s2)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_combine_first(data):\n    modin_series, pandas_series = create_test_series(data)\n    modin_series2 = modin_series % (max(modin_series) // 2)\n    pandas_series2 = pandas_series % (max(pandas_series) // 2)\n    modin_result = modin_series.combine_first(modin_series2)\n    pandas_result = pandas_series.combine_first(pandas_series2)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_compress(data):\n    modin_series, pandas_series = create_test_series(data)  # noqa: F841\n    try:\n        pandas_series.compress(pandas_series > 30)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.compress(modin_series > 30)\n    else:\n        modin_series.compress(modin_series > 30)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_constructor(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series, pandas_series)\n    df_equals(pd.Series(modin_series), pandas.Series(pandas_series))\n\n\ndef test_constructor_columns_and_index():\n    modin_series = pd.Series([1, 1, 10], index=[1, 2, 3], name=\"health\")\n    pandas_series = pandas.Series([1, 1, 10], index=[1, 2, 3], name=\"health\")\n    df_equals(modin_series, pandas_series)\n    df_equals(pd.Series(modin_series), pandas.Series(pandas_series))\n    df_equals(\n        pd.Series(modin_series, name=\"max_speed\"),\n        pandas.Series(pandas_series, name=\"max_speed\"),\n    )\n    df_equals(\n        pd.Series(modin_series, index=[1, 2]),\n        pandas.Series(pandas_series, index=[1, 2]),\n    )\n    with pytest.raises(NotImplementedError):\n        pd.Series(modin_series, index=[1, 2, 99999])\n\n\ndef test_constructor_arrow_extension_array():\n    # example from pandas docs\n    pa = pytest.importorskip(\"pyarrow\")\n    array = pd.arrays.ArrowExtensionArray(\n        pa.array(\n            [{\"1\": \"2\"}, {\"10\": \"20\"}, None],\n            type=pa.map_(pa.string(), pa.string()),\n        )\n    )\n    md_ser, pd_ser = create_test_series(array)\n    df_equals(md_ser, pd_ser)\n    df_equals(md_ser.dtypes, pd_ser.dtypes)\n\n\ndef test_pyarrow_backed_constructor():\n    pa = pytest.importorskip(\"pyarrow\")\n    data = list(\"abcd\")\n    df_equals(*create_test_series(data, dtype=\"string[pyarrow]\"))\n    df_equals(*create_test_series(data, dtype=pd.ArrowDtype(pa.string())))\n\n    data = [[\"hello\"], [\"there\"]]\n    list_str_type = pa.list_(pa.string())\n    df_equals(*create_test_series(data, dtype=pd.ArrowDtype(list_str_type)))\n\n\ndef test_pyarrow_backed_functions():\n    pytest.importorskip(\"pyarrow\")\n    modin_series, pandas_series = create_test_series(\n        [-1.545, 0.211, None], dtype=\"float32[pyarrow]\"\n    )\n    df_equals(modin_series.mean(), pandas_series.mean())\n\n    def comparator(df1, df2):\n        df_equals(df1, df2)\n        df_equals(df1.dtypes, df2.dtypes)\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser\n        + (modin_series if isinstance(ser, pd.Series) else pandas_series),\n        comparator=comparator,\n    )\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser > (ser + 1),\n        comparator=comparator,\n    )\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser.dropna(),\n        comparator=comparator,\n    )\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser.isna(),\n        comparator=comparator,\n    )\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: ser.fillna(0),\n        comparator=comparator,\n    )\n\n\ndef test_pyarrow_array_retrieve():\n    pa = pytest.importorskip(\"pyarrow\")\n    modin_series, pandas_series = create_test_series(\n        [1, 2, None], dtype=\"uint8[pyarrow]\"\n    )\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda ser: pa.array(ser),\n    )\n\n\ndef test___arrow_array__():\n    # https://github.com/modin-project/modin/issues/6808\n    pa = pytest.importorskip(\"pyarrow\")\n    mpd_df_1 = pd.DataFrame({\"a\": [\"1\", \"2\", \"3\"], \"b\": [\"4\", \"5\", \"6\"]})\n    mpd_df_2 = pd.DataFrame({\"a\": [\"7\", \"8\", \"9\"], \"b\": [\"10\", \"11\", \"12\"]})\n    test_df = pd.concat([mpd_df_1, mpd_df_2])\n\n    res_from_md = pa.Table.from_pandas(df=test_df)\n    res_from_pd = pa.Table.from_pandas(df=test_df._to_pandas())\n    assert res_from_md.equals(res_from_pd)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_copy(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series, modin_series.copy())\n    df_equals(modin_series.copy(), pandas_series)\n    df_equals(modin_series.copy(), pandas_series.copy())\n\n\ndef test_copy_empty_series():\n    ser = pd.Series(range(3))\n    res = ser[:0].copy()\n    assert res.dtype == ser.dtype\n\n\n@pytest.mark.parametrize(\"method\", [\"pearson\", \"kendall\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_corr(data, method):\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.corr(modin_series, method=method)\n    pandas_result = pandas_series.corr(pandas_series, method=method)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    test_data_values + test_data_large_categorical_series_values,\n    ids=test_data_keys + test_data_large_categorical_series_keys,\n)\ndef test_count(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.count(), pandas_series.count())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_cov(data):\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.cov(modin_series)\n    pandas_result = pandas_series.cov(pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_cummax(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.cummax(skipna=skipna)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.cummax(skipna=skipna)\n    else:\n        df_equals(modin_series.cummax(skipna=skipna), pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_cummin(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.cummin(skipna=skipna)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.cummin(skipna=skipna)\n    else:\n        df_equals(modin_series.cummin(skipna=skipna), pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_cumprod(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.cumprod(skipna=skipna)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.cumprod(skipna=skipna)\n    else:\n        df_equals(modin_series.cumprod(skipna=skipna), pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_cumsum(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.cumsum(skipna=skipna)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.cumsum(skipna=skipna)\n    else:\n        df_equals(modin_series.cumsum(skipna=skipna), pandas_result)\n\n\ndef test_cumsum_6771():\n    _ = to_pandas(pd.Series([1, 2, 3], dtype=\"Int64\").cumsum())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_describe(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.describe(), pandas_series.describe())\n    percentiles = [0.10, 0.11, 0.44, 0.78, 0.99]\n    df_equals(\n        modin_series.describe(percentiles=percentiles),\n        pandas_series.describe(percentiles=percentiles),\n    )\n\n    try:\n        pandas_result = pandas_series.describe(exclude=[np.float64])\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.describe(exclude=[np.float64])\n    else:\n        modin_result = modin_series.describe(exclude=[np.float64])\n        df_equals(modin_result, pandas_result)\n\n    try:\n        pandas_result = pandas_series.describe(exclude=np.float64)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.describe(exclude=np.float64)\n    else:\n        modin_result = modin_series.describe(exclude=np.float64)\n        df_equals(modin_result, pandas_result)\n\n    try:\n        pandas_result = pandas_series.describe(\n            include=[np.timedelta64, np.datetime64, np.object_, np.bool_]\n        )\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.describe(\n                include=[np.timedelta64, np.datetime64, np.object_, np.bool_]\n            )\n    else:\n        modin_result = modin_series.describe(\n            include=[np.timedelta64, np.datetime64, np.object_, np.bool_]\n        )\n        df_equals(modin_result, pandas_result)\n\n    modin_result = modin_series.describe(include=str(modin_series.dtypes))\n    pandas_result = pandas_series.describe(include=str(pandas_series.dtypes))\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_series.describe(include=[np.number])\n    pandas_result = pandas_series.describe(include=[np.number])\n    df_equals(modin_result, pandas_result)\n\n    df_equals(\n        modin_series.describe(include=\"all\"), pandas_series.describe(include=\"all\")\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"periods\", int_arg_values, ids=arg_keys(\"periods\", int_arg_keys)\n)\ndef test_diff(data, periods):\n    modin_series, pandas_series = create_test_series(data)\n\n    try:\n        pandas_result = pandas_series.diff(periods=periods)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.diff(periods=periods)\n    else:\n        modin_result = modin_series.diff(periods=periods)\n        df_equals(modin_result, pandas_result)\n\n    try:\n        pandas_result = pandas_series.T.diff(periods=periods)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.T.diff(periods=periods)\n    else:\n        modin_result = modin_series.T.diff(periods=periods)\n        df_equals(modin_result, pandas_result)\n\n\ndef test_diff_with_dates():\n    data = pandas.date_range(\"2018-01-01\", periods=15, freq=\"h\").values\n    pandas_series = pandas.Series(data)\n    modin_series = pd.Series(pandas_series)\n\n    # Check that `diff` with datetime types works correctly.\n    pandas_result = pandas_series.diff()\n    modin_result = modin_series.diff()\n    df_equals(modin_result, pandas_result)\n\n    # Check that `diff` with timedelta types works correctly.\n    td_pandas_result = pandas_result.diff()\n    td_modin_result = modin_result.diff()\n    df_equals(td_modin_result, td_pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_div(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"div\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_divide(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"divide\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dot(data):\n    modin_series, pandas_series = create_test_series(data)\n    ind_len = len(modin_series)\n\n    # Test 1D array input\n    arr = np.arange(ind_len)\n    modin_result = modin_series.dot(arr)\n    pandas_result = pandas_series.dot(arr)\n    df_equals(modin_result, pandas_result)\n\n    # Test 2D array input\n    arr = np.arange(ind_len * 2).reshape(ind_len, 2)\n    modin_result = modin_series.dot(arr)\n    pandas_result = pandas_series.dot(arr)\n    assert_array_equal(modin_result, pandas_result)\n\n    # Test bad dimensions\n    with pytest.raises(ValueError):\n        modin_series.dot(np.arange(ind_len + 10))\n\n    # Test dataframe input\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    modin_result = modin_series.dot(modin_df)\n    pandas_result = pandas_series.dot(pandas_df)\n    df_equals(modin_result, pandas_result)\n\n    # Test series input\n    modin_series_2 = pd.Series(np.arange(ind_len), index=modin_series.index)\n    pandas_series_2 = pandas.Series(np.arange(ind_len), index=pandas_series.index)\n    modin_result = modin_series.dot(modin_series_2)\n    pandas_result = pandas_series.dot(pandas_series_2)\n    df_equals(modin_result, pandas_result)\n\n    # Test when input series index doesn't line up with columns\n    with pytest.raises(ValueError):\n        modin_series.dot(\n            pd.Series(\n                np.arange(ind_len), index=[\"a\" for _ in range(len(modin_series.index))]\n            )\n        )\n\n    # Test case when left series has size (1 x 1)\n    # and right dataframe has size (1 x n)\n    modin_result = pd.Series([1]).dot(pd.DataFrame(modin_series).T)\n    pandas_result = pandas.Series([1]).dot(pandas.DataFrame(pandas_series).T)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_matmul(data):\n    modin_series, pandas_series = create_test_series(data)  # noqa: F841\n    ind_len = len(modin_series)\n\n    # Test 1D array input\n    arr = np.arange(ind_len)\n    modin_result = modin_series @ arr\n    pandas_result = pandas_series @ arr\n    df_equals(modin_result, pandas_result)\n\n    # Test 2D array input\n    arr = np.arange(ind_len * 2).reshape(ind_len, 2)\n    modin_result = modin_series @ arr\n    pandas_result = pandas_series @ arr\n    assert_array_equal(modin_result, pandas_result)\n\n    # Test bad dimensions\n    with pytest.raises(ValueError):\n        modin_series @ np.arange(ind_len + 10)\n\n    # Test dataframe input\n    modin_df = pd.DataFrame(data)\n    pandas_df = pandas.DataFrame(data)\n    modin_result = modin_series @ modin_df\n    pandas_result = pandas_series @ pandas_df\n    df_equals(modin_result, pandas_result)\n\n    # Test series input\n    modin_series_2 = pd.Series(np.arange(ind_len), index=modin_series.index)\n    pandas_series_2 = pandas.Series(np.arange(ind_len), index=pandas_series.index)\n    modin_result = modin_series @ modin_series_2\n    pandas_result = pandas_series @ pandas_series_2\n    df_equals(modin_result, pandas_result)\n\n    # Test when input series index doesn't line up with columns\n    with pytest.raises(ValueError):\n        modin_series @ pd.Series(\n            np.arange(ind_len), index=[\"a\" for _ in range(len(modin_series.index))]\n        )\n\n\n@pytest.mark.xfail(reason=\"Using pandas Series.\")\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_drop(data):\n    modin_series = create_test_series(data)\n\n    with pytest.raises(NotImplementedError):\n        modin_series.drop(None, None, None, None)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys\n)\n@pytest.mark.parametrize(\n    \"keep\", [\"last\", \"first\", False], ids=[\"last\", \"first\", \"False\"]\n)\n@pytest.mark.parametrize(\"inplace\", [True, False], ids=[\"True\", \"False\"])\ndef test_drop_duplicates(data, keep, inplace):\n    modin_series, pandas_series = create_test_series(data)\n    modin_res = modin_series.drop_duplicates(keep=keep, inplace=inplace)\n    pandas_res = pandas_series.drop_duplicates(keep=keep, inplace=inplace)\n    if inplace:\n        sort_if_range_partitioning(modin_series, pandas_series)\n    else:\n        sort_if_range_partitioning(modin_res, pandas_res)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"how\", [\"any\", \"all\"], ids=[\"any\", \"all\"])\ndef test_dropna(data, how):\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.dropna(how=how)\n    pandas_result = pandas_series.dropna(how=how)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dropna_inplace(data):\n    modin_series, pandas_series = create_test_series(data)\n    pandas_result = pandas_series.dropna()\n    modin_series.dropna(inplace=True)\n    df_equals(modin_series, pandas_result)\n\n    modin_series, pandas_series = create_test_series(data)\n    pandas_series.dropna(how=\"any\", inplace=True)\n    modin_series.dropna(how=\"any\", inplace=True)\n    df_equals(modin_series, pandas_series)\n\n\ndef test_dtype_empty():\n    modin_series, pandas_series = pd.Series(), pandas.Series()\n    assert modin_series.dtype == pandas_series.dtype\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_dtype(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.dtype, modin_series.dtypes)\n    df_equals(modin_series.dtype, pandas_series.dtype)\n    df_equals(modin_series.dtype, pandas_series.dtypes)\n\n\n# Bug https://github.com/modin-project/modin/issues/4436 in\n# Series.dt.to_pydatetime is only reproducible when the date range out of which\n# the frame is created has timezone None, so that its dtype is datetime64[ns]\n# as opposed to, e.g. datetime64[ns, Europe/Berlin]. To reproduce that bug, we\n# use timezones None and Europe/Berlin.\n@pytest.mark.parametrize(\n    \"timezone\",\n    [\n        pytest.param(None),\n        pytest.param(\"Europe/Berlin\"),\n    ],\n)\ndef test_dt(timezone):\n    data = pd.date_range(\"2016-12-31\", periods=128, freq=\"D\", tz=timezone)\n    modin_series = pd.Series(data)\n    pandas_series = pandas.Series(data)\n\n    df_equals(modin_series.dt.date, pandas_series.dt.date)\n    df_equals(modin_series.dt.time, pandas_series.dt.time)\n    df_equals(modin_series.dt.timetz, pandas_series.dt.timetz)\n    df_equals(modin_series.dt.year, pandas_series.dt.year)\n    df_equals(modin_series.dt.month, pandas_series.dt.month)\n    df_equals(modin_series.dt.day, pandas_series.dt.day)\n    df_equals(modin_series.dt.hour, pandas_series.dt.hour)\n    df_equals(modin_series.dt.minute, pandas_series.dt.minute)\n    df_equals(modin_series.dt.second, pandas_series.dt.second)\n    df_equals(modin_series.dt.microsecond, pandas_series.dt.microsecond)\n    df_equals(modin_series.dt.nanosecond, pandas_series.dt.nanosecond)\n    df_equals(modin_series.dt.dayofweek, pandas_series.dt.dayofweek)\n    df_equals(modin_series.dt.day_of_week, pandas_series.dt.day_of_week)\n    df_equals(modin_series.dt.weekday, pandas_series.dt.weekday)\n    df_equals(modin_series.dt.dayofyear, pandas_series.dt.dayofyear)\n    df_equals(modin_series.dt.day_of_year, pandas_series.dt.day_of_year)\n    df_equals(modin_series.dt.unit, pandas_series.dt.unit)\n    df_equals(modin_series.dt.as_unit(\"s\"), pandas_series.dt.as_unit(\"s\"))\n    df_equals(modin_series.dt.isocalendar(), pandas_series.dt.isocalendar())\n    df_equals(modin_series.dt.quarter, pandas_series.dt.quarter)\n    df_equals(modin_series.dt.is_month_start, pandas_series.dt.is_month_start)\n    df_equals(modin_series.dt.is_month_end, pandas_series.dt.is_month_end)\n    df_equals(modin_series.dt.is_quarter_start, pandas_series.dt.is_quarter_start)\n    df_equals(modin_series.dt.is_quarter_end, pandas_series.dt.is_quarter_end)\n    df_equals(modin_series.dt.is_year_start, pandas_series.dt.is_year_start)\n    df_equals(modin_series.dt.is_year_end, pandas_series.dt.is_year_end)\n    df_equals(modin_series.dt.is_leap_year, pandas_series.dt.is_leap_year)\n    df_equals(modin_series.dt.daysinmonth, pandas_series.dt.daysinmonth)\n    df_equals(modin_series.dt.days_in_month, pandas_series.dt.days_in_month)\n    assert modin_series.dt.tz == pandas_series.dt.tz\n    assert modin_series.dt.freq == pandas_series.dt.freq\n    df_equals(modin_series.dt.to_period(\"W\"), pandas_series.dt.to_period(\"W\"))\n    assert_array_equal(\n        modin_series.dt.to_pydatetime(), pandas_series.dt.to_pydatetime()\n    )\n    df_equals(\n        modin_series.dt.tz_localize(None),\n        pandas_series.dt.tz_localize(None),\n    )\n    if timezone:\n        df_equals(\n            modin_series.dt.tz_convert(tz=\"Europe/Berlin\"),\n            pandas_series.dt.tz_convert(tz=\"Europe/Berlin\"),\n        )\n\n    df_equals(modin_series.dt.normalize(), pandas_series.dt.normalize())\n    df_equals(\n        modin_series.dt.strftime(\"%B %d, %Y, %r\"),\n        pandas_series.dt.strftime(\"%B %d, %Y, %r\"),\n    )\n    df_equals(modin_series.dt.round(\"h\"), pandas_series.dt.round(\"h\"))\n    df_equals(modin_series.dt.floor(\"h\"), pandas_series.dt.floor(\"h\"))\n    df_equals(modin_series.dt.ceil(\"h\"), pandas_series.dt.ceil(\"h\"))\n    df_equals(modin_series.dt.month_name(), pandas_series.dt.month_name())\n    df_equals(modin_series.dt.day_name(), pandas_series.dt.day_name())\n\n    modin_series = pd.Series(pd.to_timedelta(np.arange(128), unit=\"d\"))\n    pandas_series = pandas.Series(pandas.to_timedelta(np.arange(128), unit=\"d\"))\n\n    assert_array_equal(\n        modin_series.dt.to_pytimedelta(), pandas_series.dt.to_pytimedelta()\n    )\n    df_equals(modin_series.dt.total_seconds(), pandas_series.dt.total_seconds())\n    df_equals(modin_series.dt.days, pandas_series.dt.days)\n    df_equals(modin_series.dt.seconds, pandas_series.dt.seconds)\n    df_equals(modin_series.dt.microseconds, pandas_series.dt.microseconds)\n    df_equals(modin_series.dt.nanoseconds, pandas_series.dt.nanoseconds)\n    df_equals(modin_series.dt.components, pandas_series.dt.components)\n\n    data_per = pd.date_range(\"1/1/2012\", periods=128, freq=\"M\")\n    pandas_series = pandas.Series(data_per, index=data_per).dt.to_period()\n    modin_series = pd.Series(data_per, index=data_per).dt.to_period()\n\n    df_equals(modin_series.dt.qyear, pandas_series.dt.qyear)\n    df_equals(modin_series.dt.start_time, pandas_series.dt.start_time)\n    df_equals(modin_series.dt.end_time, pandas_series.dt.end_time)\n    df_equals(modin_series.dt.to_timestamp(), pandas_series.dt.to_timestamp())\n\n    def dt_with_empty_partition(lib):\n        # For context, see https://github.com/modin-project/modin/issues/5112\n        df = (\n            pd.concat(\n                [pd.DataFrame([None]), pd.DataFrame([pd.to_timedelta(1)])], axis=1\n            )\n            .dropna(axis=1)\n            .squeeze(1)\n        )\n        # BaseOnPython had a single partition after the concat, and it\n        # maintains that partition after dropna and squeeze. In other execution modes,\n        # the series should have two column partitions, one of which is empty.\n        if isinstance(df, pd.DataFrame) and get_current_execution() != \"BaseOnPython\":\n            assert df._query_compiler._modin_frame._partitions.shape == (1, 2)\n        return df.dt.days\n\n    eval_general(pd, pandas, dt_with_empty_partition)\n\n    if timezone is None:\n        data = pd.period_range(\"2016-12-31\", periods=128, freq=\"D\")\n        modin_series = pd.Series(data)\n        pandas_series = pandas.Series(data)\n        df_equals(modin_series.dt.asfreq(\"min\"), pandas_series.dt.asfreq(\"min\"))\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_with_duplicates_values, ids=test_data_with_duplicates_keys\n)\n@pytest.mark.parametrize(\n    \"keep\", [\"last\", \"first\", False], ids=[\"last\", \"first\", \"False\"]\n)\ndef test_duplicated(data, keep):\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.duplicated(keep=keep)\n    df_equals(modin_result, pandas_series.duplicated(keep=keep))\n\n\ndef test_duplicated_keeps_name_issue_7375():\n    # Ensure that the name property of a series is preserved across duplicated\n    modin_series, pandas_series = create_test_series([1, 2, 3, 1], name=\"a\")\n    df_equals(modin_series.duplicated(), pandas_series.duplicated())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_empty(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.empty == pandas_series.empty\n\n\ndef test_empty_series():\n    modin_series = pd.Series()\n    assert modin_series.empty\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_eq(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"eq\")\n\n\n@pytest.mark.parametrize(\n    \"series1_data,series2_data,expected_pandas_equals\",\n    [\n        pytest.param([1], [0], False, id=\"single_unequal_values\"),\n        pytest.param([None], [None], True, id=\"single_none_values\"),\n        pytest.param(\n            pandas.Series(1, name=\"series1\"),\n            pandas.Series(1, name=\"series2\"),\n            True,\n            id=\"different_names\",\n        ),\n        pytest.param(\n            pandas.Series([1], index=[1]),\n            pandas.Series([1], index=[1.0]),\n            True,\n            id=\"different_index_types\",\n        ),\n        pytest.param(\n            pandas.Series([1], index=[1]),\n            pandas.Series([1], index=[2]),\n            False,\n            id=\"different_index_values\",\n        ),\n        pytest.param([1], [1.0], False, id=\"different_value_types\"),\n        pytest.param(\n            [1, 2],\n            [1, 2],\n            True,\n            id=\"equal_series_of_length_two\",\n        ),\n        pytest.param(\n            [1, 2],\n            [1, 3],\n            False,\n            id=\"unequal_series_of_length_two\",\n        ),\n        pytest.param(\n            [[1, 2]],\n            [[1]],\n            False,\n            id=\"different_lengths\",\n        ),\n    ],\n)\ndef test_equals(series1_data, series2_data, expected_pandas_equals):\n    modin_series1, pandas_df1 = create_test_series(series1_data)\n    modin_series2, pandas_df2 = create_test_series(series2_data)\n\n    pandas_equals = pandas_df1.equals(pandas_df2)\n    assert pandas_equals == expected_pandas_equals, (\n        \"Test expected pandas to say the series were\"\n        + f\"{'' if expected_pandas_equals else ' not'} equal, but they were\"\n        + f\"{' not' if expected_pandas_equals else ''} equal.\"\n    )\n    assert modin_series1.equals(modin_series2) == pandas_equals\n    assert modin_series1.equals(pandas_df2) == pandas_equals\n\n\ndef test_equals_several_partitions():\n    modin_series1 = pd.concat([pd.Series([0, 1]), pd.Series([None, 1])])\n    modin_series2 = pd.concat([pd.Series([0, 1]), pd.Series([1, None])])\n    assert not modin_series1.equals(modin_series2)\n\n\ndef test_equals_with_nans():\n    ser1 = pd.Series([0, 1, None], dtype=\"uint8[pyarrow]\")\n    ser2 = pd.Series([None, None, None], dtype=\"uint8[pyarrow]\")\n    assert not ser1.equals(ser2)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_ewm(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.ewm(halflife=6)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_expanding(data):\n    modin_series, pandas_series = create_test_series(data)  # noqa: F841\n    df_equals(modin_series.expanding().sum(), pandas_series.expanding().sum())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_factorize(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.factorize()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_ffill(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.ffill(), pandas_series.ffill())\n    # inplace\n    modin_series_cp = modin_series.copy()\n    pandas_series_cp = pandas_series.copy()\n    modin_series_cp.ffill(inplace=True)\n    pandas_series_cp.ffill(inplace=True)\n    df_equals(modin_series_cp, pandas_series_cp)\n\n\n@pytest.mark.parametrize(\"limit_area\", [None, \"inside\", \"outside\"])\n@pytest.mark.parametrize(\"method\", [\"ffill\", \"bfill\"])\ndef test_ffill_bfill_limit_area(method, limit_area):\n    modin_ser, pandas_ser = create_test_series([1, None, 2, None])\n    eval_general(\n        modin_ser, pandas_ser, lambda ser: getattr(ser, method)(limit_area=limit_area)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"reindex\", [None, 2, -2])\n@pytest.mark.parametrize(\"limit\", [None, 1, 2, 0.5, -1, -2, 1.5])\n@pytest.mark.exclude_in_sanity\ndef test_fillna(data, reindex, limit):\n    modin_series, pandas_series = create_test_series(data)\n    index = pandas_series.index\n    pandas_replace_series = index.to_series().sample(frac=1)\n    modin_replace_series = pd.Series(pandas_replace_series)\n    replace_dict = pandas_replace_series.to_dict()\n\n    if reindex is not None:\n        if reindex > 0:\n            pandas_series = pandas_series[:reindex].reindex(index)\n        else:\n            pandas_series = pandas_series[reindex:].reindex(index)\n        # Because of bug #3178 modin Series has to be created from pandas\n        # Series instead of performing the same slice and reindex operations.\n        modin_series = pd.Series(pandas_series)\n\n    if isinstance(limit, float):\n        limit = int(len(modin_series) * limit)\n    if limit is not None and limit < 0:\n        limit = len(modin_series) + limit\n\n    df_equals(modin_series.fillna(0, limit=limit), pandas_series.fillna(0, limit=limit))\n    df_equals(\n        modin_series.fillna(method=\"bfill\", limit=limit),\n        pandas_series.fillna(method=\"bfill\", limit=limit),\n    )\n    df_equals(\n        modin_series.fillna(method=\"ffill\", limit=limit),\n        pandas_series.fillna(method=\"ffill\", limit=limit),\n    )\n    df_equals(\n        modin_series.fillna(modin_replace_series, limit=limit),\n        pandas_series.fillna(pandas_replace_series, limit=limit),\n    )\n    df_equals(\n        modin_series.fillna(replace_dict, limit=limit),\n        pandas_series.fillna(replace_dict, limit=limit),\n    )\n\n\n@pytest.mark.xfail(reason=\"Using pandas Series.\")\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_filter(data):\n    modin_series = create_test_series(data)\n\n    with pytest.raises(NotImplementedError):\n        modin_series.filter(None, None, None)\n\n\ndef test_first():\n    i = pd.date_range(\"2010-04-09\", periods=400, freq=\"2D\")\n    modin_series = pd.Series(list(range(400)), index=i)\n    pandas_series = pandas.Series(list(range(400)), index=i)\n    with pytest.warns(FutureWarning, match=\"first is deprecated and will be removed\"):\n        modin_result = modin_series.first(\"3D\")\n    df_equals(modin_result, pandas_series.first(\"3D\"))\n    df_equals(modin_series.first(\"20D\"), pandas_series.first(\"20D\"))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_first_valid_index(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.first_valid_index(), pandas_series.first_valid_index())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_floordiv(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"floordiv\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_ge(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"ge\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_get(data):\n    modin_series, pandas_series = create_test_series(data)\n    for key in modin_series.keys():\n        df_equals(modin_series.get(key), pandas_series.get(key))\n    df_equals(\n        modin_series.get(\"NO_EXIST\", \"DEFAULT\"),\n        pandas_series.get(\"NO_EXIST\", \"DEFAULT\"),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_gt(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"gt\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_hasnans(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.hasnans == pandas_series.hasnans\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"n\", int_arg_values, ids=arg_keys(\"n\", int_arg_keys))\ndef test_head(data, n):\n    modin_series, pandas_series = create_test_series(data)\n\n    df_equals(modin_series.head(n), pandas_series.head(n))\n    df_equals(\n        modin_series.head(len(modin_series)), pandas_series.head(len(pandas_series))\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_hist(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.hist(None)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_iat(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.iat[0], pandas_series.iat[0])\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_idxmax(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    pandas_result = pandas_series.idxmax(skipna=skipna)\n    modin_result = modin_series.idxmax(skipna=skipna)\n    df_equals(modin_result, pandas_result)\n\n    pandas_result = pandas_series.T.idxmax(skipna=skipna)\n    modin_result = modin_series.T.idxmax(skipna=skipna)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_idxmin(data, skipna):\n    modin_series, pandas_series = create_test_series(data)\n    pandas_result = pandas_series.idxmin(skipna=skipna)\n    modin_result = modin_series.idxmin(skipna=skipna)\n    df_equals(modin_result, pandas_result)\n\n    pandas_result = pandas_series.T.idxmin(skipna=skipna)\n    modin_result = modin_series.T.idxmin(skipna=skipna)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_iloc(request, data):\n    modin_series, pandas_series = create_test_series(data)\n\n    if not name_contains(request.node.name, [\"empty_data\"]):\n        # Scalar\n        np.testing.assert_equal(modin_series.iloc[0], pandas_series.iloc[0])\n\n        # Series\n        df_equals(modin_series.iloc[1:], pandas_series.iloc[1:])\n        df_equals(modin_series.iloc[1:2], pandas_series.iloc[1:2])\n        df_equals(modin_series.iloc[[1, 2]], pandas_series.iloc[[1, 2]])\n\n        # Write Item\n        modin_series.iloc[[1, 2]] = 42\n        pandas_series.iloc[[1, 2]] = 42\n        df_equals(modin_series, pandas_series)\n        with pytest.raises(IndexingError):\n            modin_series.iloc[1:, 1]\n    else:\n        with pytest.raises(IndexError):\n            modin_series.iloc[0]\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_index(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.index, pandas_series.index)\n    with pytest.raises(ValueError):\n        modin_series.index = list(modin_series.index) + [999]\n\n    modin_series.index = modin_series.index.map(str)\n    pandas_series.index = pandas_series.index.map(str)\n    df_equals(modin_series.index, pandas_series.index)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_interpolate(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.interpolate()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_is_monotonic_decreasing(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.is_monotonic_decreasing == pandas_series.is_monotonic_decreasing\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_is_monotonic_increasing(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.is_monotonic_increasing == pandas_series.is_monotonic_increasing\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_is_unique(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.is_unique == pandas_series.is_unique\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_isin(data):\n    modin_series, pandas_series = create_test_series(data)\n    val = [1, 2, 3, 4]\n    pandas_result = pandas_series.isin(val)\n    modin_result = modin_series.isin(val)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_isin_with_series():\n    modin_series1, pandas_series1 = create_test_series([1, 2, 3])\n    modin_series2, pandas_series2 = create_test_series([1, 2, 3, 4, 5])\n\n    eval_general(\n        (modin_series1, modin_series2),\n        (pandas_series1, pandas_series2),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n\n    # Verify that Series actualy behaves like Series and ignores unmatched indices on '.isin'\n    modin_series1, pandas_series1 = create_test_series([1, 2, 3], index=[10, 11, 12])\n\n    eval_general(\n        (modin_series1, modin_series2),\n        (pandas_series1, pandas_series2),\n        lambda srs: srs[0].isin(srs[1]),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_isnull(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.isnull(), pandas_series.isnull())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_items(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    modin_items = modin_series.items()\n    pandas_items = pandas_series.items()\n    for modin_item, pandas_item in zip(modin_items, pandas_items):\n        modin_index, modin_scalar = modin_item\n        pandas_index, pandas_scalar = pandas_item\n        df_equals(modin_scalar, pandas_scalar)\n        assert pandas_index == modin_index\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_keys(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.keys(), pandas_series.keys())\n\n\ndef test_kurtosis_alias():\n    # It's optimization. If failed, Series.kurt should be tested explicitly\n    # in tests: `test_kurt_kurtosis`, `test_kurt_kurtosis_level`.\n    _assert_casting_functions_wrap_same_implementation(\n        pd.Series.kurt, pd.Series.kurtosis\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_kurtosis(axis, skipna):\n    expected_exception = None\n    if axis:\n        expected_exception = ValueError(\"No axis named 1 for object type Series\")\n    eval_general(\n        *create_test_series(test_data[\"float_nan_data\"]),\n        lambda df: df.kurtosis(axis=axis, skipna=skipna),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [\"rows\", \"columns\"])\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\ndef test_kurtosis_numeric_only(axis, numeric_only):\n    expected_exception = None\n    if axis:\n        expected_exception = ValueError(\"No axis named columns for object type Series\")\n    eval_general(\n        *create_test_series(test_data_diff_dtype),\n        lambda df: df.kurtosis(axis=axis, numeric_only=numeric_only),\n        expected_exception=expected_exception,\n    )\n\n\ndef test_last():\n    modin_index = pd.date_range(\"2010-04-09\", periods=400, freq=\"2D\")\n    pandas_index = pandas.date_range(\"2010-04-09\", periods=400, freq=\"2D\")\n    modin_series = pd.Series(list(range(400)), index=modin_index)\n    pandas_series = pandas.Series(list(range(400)), index=pandas_index)\n    with pytest.warns(FutureWarning, match=\"last is deprecated and will be removed\"):\n        modin_result = modin_series.last(\"3D\")\n    df_equals(modin_result, pandas_series.last(\"3D\"))\n    df_equals(modin_series.last(\"20D\"), pandas_series.last(\"20D\"))\n\n\n@pytest.mark.parametrize(\"func\", [\"all\", \"any\", \"count\"])\ndef test_index_order(func):\n    # see #1708 and #1869 for details\n    s_modin, s_pandas = create_test_series(test_data[\"float_nan_data\"])\n    rows_number = len(s_modin.index)\n    level_0 = np.random.choice([x for x in range(10)], rows_number)\n    level_1 = np.random.choice([x for x in range(10)], rows_number)\n    index = pandas.MultiIndex.from_arrays([level_0, level_1])\n\n    s_modin.index = index\n    s_pandas.index = index\n\n    # The result of the operation is not a Series, `.index` is missed\n    df_equals(\n        getattr(s_modin, func)(),\n        getattr(s_pandas, func)(),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_last_valid_index(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.last_valid_index() == (pandas_series.last_valid_index())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_le(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"le\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_loc(data):\n    modin_series, pandas_series = create_test_series(data)\n    for v in modin_series.index:\n        df_equals(modin_series.loc[v], pandas_series.loc[v])\n        df_equals(modin_series.loc[v:], pandas_series.loc[v:])\n\n    indices = [True if i % 3 == 0 else False for i in range(len(modin_series.index))]\n    modin_result = modin_series.loc[indices]\n    pandas_result = pandas_series.loc[indices]\n    df_equals(modin_result, pandas_result)\n\n    # From issue #1988\n    index = pd.MultiIndex.from_product([np.arange(10), np.arange(10)], names=[\"f\", \"s\"])\n    data = np.arange(100)\n    modin_series = pd.Series(data, index=index).sort_index()\n    pandas_series = pandas.Series(data, index=index).sort_index()\n    modin_result = modin_series.loc[\n        (slice(None), 1),\n    ]  # fmt: skip\n    pandas_result = pandas_series.loc[\n        (slice(None), 1),\n    ]  # fmt: skip\n    df_equals(modin_result, pandas_result)\n\n\ndef test_loc_with_boolean_series():\n    modin_series, pandas_series = create_test_series([1, 2, 3])\n    modin_mask, pandas_mask = create_test_series([True, False, False])\n    modin_result = modin_series.loc[modin_mask]\n    pandas_result = pandas_series.loc[pandas_mask]\n    df_equals(modin_result, pandas_result)\n\n\n# This tests the bug from https://github.com/modin-project/modin/issues/3736\ndef test_loc_setting_categorical_series():\n    modin_series = pd.Series([\"a\", \"b\", \"c\"], dtype=\"category\")\n    pandas_series = pandas.Series([\"a\", \"b\", \"c\"], dtype=\"category\")\n    modin_series.loc[1:3] = \"a\"\n    pandas_series.loc[1:3] = \"a\"\n    df_equals(modin_series, pandas_series)\n\n\n# This tests the bug from https://github.com/modin-project/modin/issues/3736\ndef test_iloc_assigning_scalar_none_to_string_series():\n    data = [\"A\"]\n    modin_series, pandas_series = create_test_series(data, dtype=\"string\")\n    modin_series.iloc[0] = None\n    pandas_series.iloc[0] = None\n    df_equals(modin_series, pandas_series)\n\n\ndef test_set_ordered_categorical_column():\n    data = {\"a\": [1, 2, 3], \"b\": [4, 5, 6]}\n    mdf = pd.DataFrame(data)\n    pdf = pandas.DataFrame(data)\n    mdf[\"a\"] = pd.Categorical(mdf[\"a\"], ordered=True)\n    pdf[\"a\"] = pandas.Categorical(pdf[\"a\"], ordered=True)\n    df_equals(mdf, pdf)\n\n    modin_categories = mdf[\"a\"].dtype\n    pandas_categories = pdf[\"a\"].dtype\n    assert modin_categories == pandas_categories\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_lt(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"lt\")\n\n\n@pytest.mark.parametrize(\"na_values\", [\"ignore\", None], ids=[\"na_ignore\", \"na_none\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_map(data, na_values):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(\n        modin_series.map(str, na_action=na_values),\n        pandas_series.map(str, na_action=na_values),\n    )\n    mapper = {i: str(i) for i in range(100)}\n    df_equals(\n        modin_series.map(mapper, na_action=na_values),\n        pandas_series.map(mapper, na_action=na_values),\n        # https://github.com/modin-project/modin/issues/5967\n        check_dtypes=False,\n    )\n\n    # Return list objects\n    modin_series_lists = modin_series.map(lambda s: [s, s, s])\n    pandas_series_lists = pandas_series.map(lambda s: [s, s, s])\n    df_equals(modin_series_lists, pandas_series_lists)\n\n    # Index into list objects\n    df_equals(\n        modin_series_lists.map(lambda lst: lst[0]),\n        pandas_series_lists.map(lambda lst: lst[0]),\n    )\n\n\ndef test_mask():\n    modin_series = pd.Series(np.arange(10))\n    m = modin_series % 3 == 0\n    with warns_that_defaulting_to_pandas_if(not df_or_series_using_native_execution(m)):\n        try:\n            modin_series.mask(~m, -modin_series)\n        except ValueError:\n            pass\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_max(data, skipna):\n    eval_general(*create_test_series(data), lambda df: df.max(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_mean(data, skipna):\n    eval_general(*create_test_series(data), lambda df: df.mean(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_median(data, skipna):\n    eval_general(*create_test_series(data), lambda df: df.median(skipna=skipna))\n\n\n@pytest.mark.parametrize(\n    \"method\", [\"median\", \"skew\", \"std\", \"sum\", \"var\", \"prod\", \"sem\"]\n)\ndef test_median_skew_std_sum_var_prod_sem_1953(method):\n    # See #1953 for details\n    data = [3, 3, 3, 3, 3, 3, 3, 3, 3]\n    arrays = [\n        [\"1\", \"1\", \"1\", \"2\", \"2\", \"2\", \"3\", \"3\", \"3\"],\n        [\"1\", \"2\", \"3\", \"4\", \"5\", \"6\", \"7\", \"8\", \"9\"],\n    ]\n    modin_s = pd.Series(data, index=arrays)\n    pandas_s = pandas.Series(data, index=arrays)\n    eval_general(modin_s, pandas_s, lambda s: getattr(s, method)())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"index\", [True, False], ids=[\"True\", \"False\"])\ndef test_memory_usage(data, index):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(\n        modin_series.memory_usage(index=index), pandas_series.memory_usage(index=index)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_min(data, skipna):\n    eval_general(*create_test_series(data), lambda df: df.min(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_mod(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"mod\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_mode(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.mode(), pandas_series.mode())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_mul(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"mul\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_multiply(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"multiply\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_name(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.name == pandas_series.name\n    modin_series.name = pandas_series.name = \"New_name\"\n    assert modin_series.name == pandas_series.name\n    assert modin_series._query_compiler.columns == [\"New_name\"]\n\n\ndef test_tuple_name():\n    names = [(\"a\", 1), (\"a\", \"b\", \"c\"), \"flat\"]\n    s = pd.Series(name=names[0])\n    # The internal representation of the Series stores the name as a column label.\n    # When it is a tuple, this label is a MultiIndex object, and this test ensures that\n    # the Series's name property remains a tuple.\n    assert s.name == names[0]\n    assert isinstance(s.name, tuple)\n    # Setting the name to a tuple of a different level or a non-tuple should not error.\n    s.name = names[1]\n    assert s.name == names[1]\n    assert isinstance(s.name, tuple)\n    s.name = names[2]\n    assert s.name == names[2]\n    assert isinstance(s.name, str)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_nbytes(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.nbytes == pandas_series.nbytes\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_ndim(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    assert modin_series.ndim == 1\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_ne(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"ne\")\n\n\n@pytest.mark.xfail(reason=\"Using pandas Series.\")\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_nlargest(data):\n    modin_series = create_test_series(data)\n\n    with pytest.raises(NotImplementedError):\n        modin_series.nlargest(None)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_notnull(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.notnull(), pandas_series.notnull())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_nsmallest(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(\n        modin_series.nsmallest(n=5, keep=\"first\"),\n        pandas_series.nsmallest(n=5, keep=\"first\"),\n    )\n    df_equals(\n        modin_series.nsmallest(n=10, keep=\"first\"),\n        pandas_series.nsmallest(n=10, keep=\"first\"),\n    )\n    df_equals(\n        modin_series.nsmallest(n=10, keep=\"last\"),\n        pandas_series.nsmallest(n=10, keep=\"last\"),\n    )\n    df_equals(modin_series.nsmallest(keep=\"all\"), pandas_series.nsmallest(keep=\"all\"))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"dropna\", [True, False], ids=[\"True\", \"False\"])\ndef test_nunique(data, dropna):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.nunique(dropna=dropna), pandas_series.nunique(dropna=dropna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_pct_change(data):\n    modin_series, pandas_series = create_test_series(data)\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.pct_change()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_pipe(data):\n    modin_series, pandas_series = create_test_series(data)\n    n = len(modin_series.index)\n    a, b, c = 2 % n, 0, 3 % n\n\n    def h(x):\n        return x.dropna()\n\n    def g(x, arg1=0):\n        for _ in range(arg1):\n            x = (pd if isinstance(x, pd.Series) else pandas).concat((x, x))\n        return x\n\n    def f(x, arg2=0, arg3=0):\n        return x.drop(x.index[[arg2, arg3]])\n\n    df_equals(\n        f(g(h(modin_series), arg1=a), arg2=b, arg3=c),\n        (modin_series.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),\n    )\n    df_equals(\n        (modin_series.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),\n        (pandas_series.pipe(h).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_plot(request, data):\n    modin_series, pandas_series = create_test_series(data)\n\n    if name_contains(request.node.name, numeric_dfs):\n        # We have to test this way because equality in plots means same object.\n        zipped_plot_lines = zip(modin_series.plot().lines, pandas_series.plot().lines)\n        for left, right in zipped_plot_lines:\n            if isinstance(left.get_xdata(), np.ma.core.MaskedArray) and isinstance(\n                right.get_xdata(), np.ma.core.MaskedArray\n            ):\n                assert all((left.get_xdata() == right.get_xdata()).data)\n            else:\n                assert np.array_equal(left.get_xdata(), right.get_xdata())\n            if isinstance(left.get_ydata(), np.ma.core.MaskedArray) and isinstance(\n                right.get_ydata(), np.ma.core.MaskedArray\n            ):\n                assert all((left.get_ydata() == right.get_ydata()).data)\n            else:\n                assert np.array_equal(left.get_xdata(), right.get_xdata())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_pop(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    for key in modin_series.keys():\n        df_equals(modin_series.pop(key), pandas_series.pop(key))\n        df_equals(modin_series, pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_pow(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"pow\")\n\n\ndef test_product_alias():\n    _assert_casting_functions_wrap_same_implementation(\n        pd.Series.prod, pd.Series.product\n    )\n\n\n@pytest.mark.parametrize(\"axis\", [0, 1])\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_prod(axis, skipna):\n    expected_exception = None\n    if axis:\n        expected_exception = ValueError(\"No axis named 1 for object type Series\")\n    eval_general(\n        *create_test_series(test_data[\"float_nan_data\"]),\n        lambda s: s.prod(axis=axis, skipna=skipna),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\n@pytest.mark.parametrize(\n    \"min_count\", int_arg_values, ids=arg_keys(\"min_count\", int_arg_keys)\n)\ndef test_prod_specific(min_count, numeric_only):\n    eval_general(\n        *create_test_series(test_data_diff_dtype),\n        lambda df: df.prod(min_count=min_count, numeric_only=numeric_only),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"q\", quantiles_values, ids=quantiles_keys)\ndef test_quantile(request, data, q):\n    modin_series, pandas_series = create_test_series(data)\n    if not name_contains(request.node.name, no_numeric_dfs):\n        df_equals(modin_series.quantile(q), pandas_series.quantile(q))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_radd(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"radd\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"na_option\", [\"keep\", \"top\", \"bottom\"], ids=[\"keep\", \"top\", \"bottom\"]\n)\ndef test_rank(data, na_option):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.rank(na_option=na_option)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.rank(na_option=na_option)\n    else:\n        modin_result = modin_series.rank(na_option=na_option)\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"order\", [None, \"C\", \"F\", \"A\", \"K\"])\ndef test_ravel(data, order):\n    modin_series, pandas_series = create_test_series(data)\n    np.testing.assert_equal(\n        modin_series.ravel(order=order), pandas_series.ravel(order=order)\n    )\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        pandas.Categorical(np.arange(1000), ordered=True),\n        pandas.Categorical(np.arange(1000), ordered=False),\n        pandas.Categorical(np.arange(1000), categories=np.arange(500), ordered=True),\n        pandas.Categorical(np.arange(1000), categories=np.arange(500), ordered=False),\n    ],\n)\n@pytest.mark.parametrize(\"order\", [None, \"C\", \"F\", \"A\", \"K\"])\ndef test_ravel_category(data, order):\n    modin_series, pandas_series = create_test_series(data)\n    categories_equals(modin_series.ravel(order=order), pandas_series.ravel(order=order))\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        pandas.Categorical(np.arange(10), ordered=True),\n        pandas.Categorical(np.arange(10), ordered=False),\n        pandas.Categorical(np.arange(10), categories=np.arange(5), ordered=True),\n        pandas.Categorical(np.arange(10), categories=np.arange(5), ordered=False),\n    ],\n)\n@pytest.mark.parametrize(\"order\", [None, \"C\", \"F\", \"A\", \"K\"])\ndef test_ravel_simple_category(data, order):\n    modin_series, pandas_series = create_test_series(data)\n    categories_equals(modin_series.ravel(order=order), pandas_series.ravel(order=order))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rdiv(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rdiv\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_reindex(data):\n    modin_series, pandas_series = create_test_series(data)\n    pandas_result = pandas_series.reindex(\n        list(pandas_series.index) + [\"_A_NEW_ROW\"], fill_value=0\n    )\n    modin_result = modin_series.reindex(\n        list(modin_series.index) + [\"_A_NEW_ROW\"], fill_value=0\n    )\n    df_equals(pandas_result, modin_result)\n\n    frame_data = {\n        \"col1\": [0, 1, 2, 3],\n        \"col2\": [4, 5, 6, 7],\n        \"col3\": [8, 9, 10, 11],\n        \"col4\": [12, 13, 14, 15],\n        \"col5\": [0, 0, 0, 0],\n    }\n    pandas_df = pandas.DataFrame(frame_data)\n    modin_df = pd.DataFrame(frame_data)\n\n    for col in pandas_df.columns:\n        modin_series = modin_df[col]\n        pandas_series = pandas_df[col]\n        df_equals(\n            modin_series.reindex([0, 3, 2, 1]), pandas_series.reindex([0, 3, 2, 1])\n        )\n        df_equals(modin_series.reindex([0, 6, 2]), pandas_series.reindex([0, 6, 2]))\n        df_equals(\n            modin_series.reindex(index=[0, 1, 5]),\n            pandas_series.reindex(index=[0, 1, 5]),\n        )\n\n    # MultiIndex\n    modin_series, pandas_series = create_test_series(data)\n    modin_series.index, pandas_series.index = [\n        generate_multiindex(len(pandas_series))\n    ] * 2\n    pandas_result = pandas_series.reindex(list(reversed(pandas_series.index)))\n    modin_result = modin_series.reindex(list(reversed(modin_series.index)))\n    df_equals(pandas_result, modin_result)\n\n\ndef test_reindex_like():\n    o_data = [\n        [24.3, 75.7, \"high\"],\n        [31, 87.8, \"high\"],\n        [22, 71.6, \"medium\"],\n        [35, 95, \"medium\"],\n    ]\n    o_columns = [\"temp_celsius\", \"temp_fahrenheit\", \"windspeed\"]\n    o_index = pd.date_range(start=\"2014-02-12\", end=\"2014-02-15\", freq=\"D\")\n    new_data = [[28, \"low\"], [30, \"low\"], [35.1, \"medium\"]]\n    new_columns = [\"temp_celsius\", \"windspeed\"]\n    new_index = pd.DatetimeIndex([\"2014-02-12\", \"2014-02-13\", \"2014-02-15\"])\n    modin_df1 = pd.DataFrame(o_data, columns=o_columns, index=o_index)\n    modin_df2 = pd.DataFrame(new_data, columns=new_columns, index=new_index)\n    modin_result = modin_df2[\"windspeed\"].reindex_like(modin_df1[\"windspeed\"])\n\n    pandas_df1 = pandas.DataFrame(o_data, columns=o_columns, index=o_index)\n    pandas_df2 = pandas.DataFrame(new_data, columns=new_columns, index=new_index)\n    pandas_result = pandas_df2[\"windspeed\"].reindex_like(pandas_df1[\"windspeed\"])\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rename(data):\n    modin_series, pandas_series = create_test_series(data)\n    new_name = \"NEW_NAME\"\n    df_equals(modin_series.rename(new_name), pandas_series.rename(new_name))\n\n    modin_series_cp = modin_series.copy()\n    pandas_series_cp = pandas_series.copy()\n    modin_series_cp.rename(new_name, inplace=True)\n    pandas_series_cp.rename(new_name, inplace=True)\n    df_equals(modin_series_cp, pandas_series_cp)\n\n    modin_result = modin_series.rename(\"{}__\".format)\n    pandas_result = pandas_series.rename(\"{}__\".format)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_reorder_levels():\n    data = np.random.randint(1, 100, 12)\n    modin_series = pd.Series(\n        data,\n        index=pd.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    pandas_series = pandas.Series(\n        data,\n        index=pandas.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    modin_result = modin_series.reorder_levels([\"Letter\", \"Color\", \"Number\"])\n    pandas_result = pandas_series.reorder_levels([\"Letter\", \"Color\", \"Number\"])\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"repeats\", [0, 2, 3, 4], ids=[\"repeats_{}\".format(i) for i in [0, 2, 3, 4]]\n)\ndef test_repeat(data, repeats):\n    eval_general(pd.Series(data), pandas.Series(data), lambda df: df.repeat(repeats))\n\n\n@pytest.mark.parametrize(\"data\", [np.arange(256)])\n@pytest.mark.parametrize(\n    \"repeats\",\n    [\n        0,\n        2,\n        [2],\n        np.arange(256),\n        [0] * 64 + [2] * 64 + [3] * 32 + [4] * 32 + [5] * 64,\n        [2] * 257,\n    ],\n    ids=[\"0_case\", \"scalar\", \"one-elem-list\", \"array\", \"list\", \"wrong_list\"],\n)\ndef test_repeat_lists(data, repeats, request):\n    expected_exception = None\n    if \"wrong_list\" in request.node.callspec.id:\n        expected_exception = ValueError(\n            \"operands could not be broadcast together with shape (256,) (257,)\"\n        )\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.repeat(repeats),\n        expected_exception=expected_exception,\n    )\n\n\ndef test_clip_4485():\n    modin_result = pd.Series([1]).clip([3])\n    pandas_result = pandas.Series([1]).clip([3])\n    df_equals(modin_result, pandas_result)\n\n\ndef test_replace():\n    modin_series = pd.Series([0, 1, 2, 3, 4])\n    pandas_series = pandas.Series([0, 1, 2, 3, 4])\n    modin_result = modin_series.replace(0, 5)\n    pandas_result = pandas_series.replace(0, 5)\n    df_equals(modin_result, pandas_result)\n\n    modin_result = modin_series.replace([1, 2], method=\"bfill\")\n    pandas_result = pandas_series.replace([1, 2], method=\"bfill\")\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"closed\", [\"left\", \"right\"])\n@pytest.mark.parametrize(\"label\", [\"right\", \"left\"])\n@pytest.mark.parametrize(\"level\", [None, 1])\n@pytest.mark.exclude_in_sanity\ndef test_resample(closed, label, level):\n    rule = \"5min\"\n    freq = \"h\"\n\n    index = pandas.date_range(\"1/1/2000\", periods=12, freq=freq)\n    pandas_series = pandas.Series(range(12), index=index)\n    modin_series = pd.Series(range(12), index=index)\n\n    if level is not None:\n        index = pandas.MultiIndex.from_product(\n            [[\"a\", \"b\", \"c\"], pandas.date_range(\"31/12/2000\", periods=4, freq=freq)]\n        )\n        pandas_series.index = index\n        modin_series.index = index\n    pandas_resampler = pandas_series.resample(\n        rule, closed=closed, label=label, level=level\n    )\n    modin_resampler = modin_series.resample(\n        rule, closed=closed, label=label, level=level\n    )\n\n    df_equals(modin_resampler.count(), pandas_resampler.count())\n    df_equals(modin_resampler.var(0), pandas_resampler.var(0))\n    df_equals(modin_resampler.sum(), pandas_resampler.sum())\n    df_equals(modin_resampler.std(), pandas_resampler.std())\n    df_equals(modin_resampler.sem(), pandas_resampler.sem())\n    df_equals(modin_resampler.size(), pandas_resampler.size())\n    df_equals(modin_resampler.prod(), pandas_resampler.prod())\n    df_equals(modin_resampler.ohlc(), pandas_resampler.ohlc())\n    df_equals(modin_resampler.min(), pandas_resampler.min())\n    df_equals(modin_resampler.median(), pandas_resampler.median())\n    df_equals(modin_resampler.mean(), pandas_resampler.mean())\n    df_equals(modin_resampler.max(), pandas_resampler.max())\n    df_equals(modin_resampler.last(), pandas_resampler.last())\n    df_equals(modin_resampler.first(), pandas_resampler.first())\n    df_equals(modin_resampler.nunique(), pandas_resampler.nunique())\n    df_equals(\n        modin_resampler.pipe(lambda x: x.max() - x.min()),\n        pandas_resampler.pipe(lambda x: x.max() - x.min()),\n    )\n    df_equals(\n        modin_resampler.transform(lambda x: (x - x.mean()) / x.std()),\n        pandas_resampler.transform(lambda x: (x - x.mean()) / x.std()),\n    )\n    df_equals(\n        modin_resampler.aggregate(\"max\"),\n        pandas_resampler.aggregate(\"max\"),\n    )\n    df_equals(\n        modin_resampler.apply(\"sum\"),\n        pandas_resampler.apply(\"sum\"),\n    )\n    df_equals(\n        modin_resampler.get_group(name=list(modin_resampler.groups)[0]),\n        pandas_resampler.get_group(name=list(pandas_resampler.groups)[0]),\n    )\n    assert pandas_resampler.indices == modin_resampler.indices\n    assert pandas_resampler.groups == modin_resampler.groups\n    df_equals(modin_resampler.quantile(), pandas_resampler.quantile())\n    # Upsampling from level= or on= selection is not supported\n    if level is None:\n        df_equals(\n            modin_resampler.interpolate(),\n            pandas_resampler.interpolate(),\n        )\n        df_equals(modin_resampler.asfreq(), pandas_resampler.asfreq())\n        df_equals(\n            modin_resampler.fillna(method=\"nearest\"),\n            pandas_resampler.fillna(method=\"nearest\"),\n        )\n        df_equals(modin_resampler.nearest(), pandas_resampler.nearest())\n        df_equals(modin_resampler.bfill(), pandas_resampler.bfill())\n        df_equals(modin_resampler.ffill(), pandas_resampler.ffill())\n    df_equals(\n        modin_resampler.apply([\"sum\", \"mean\", \"max\"]),\n        pandas_resampler.apply([\"sum\", \"mean\", \"max\"]),\n    )\n    df_equals(\n        modin_resampler.aggregate([\"sum\", \"mean\", \"max\"]),\n        pandas_resampler.aggregate([\"sum\", \"mean\", \"max\"]),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"drop\", [True, False], ids=[\"True\", \"False\"])\n@pytest.mark.parametrize(\"name\", [lib.no_default, \"Custom name\"])\n@pytest.mark.parametrize(\"inplace\", [True, False])\ndef test_reset_index(data, drop, name, inplace):\n    expected_exception = None\n    if inplace and not drop:\n        expected_exception = TypeError(\n            \"Cannot reset_index inplace on a Series to create a DataFrame\"\n        )\n    eval_general(\n        *create_test_series(data),\n        lambda df, *args, **kwargs: df.reset_index(*args, **kwargs),\n        drop=drop,\n        name=name,\n        inplace=inplace,\n        __inplace__=inplace,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rfloordiv(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rfloordiv\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rmod(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rmod\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rmul(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rmul\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_round(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.round(), pandas_series.round())\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rpow(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rpow\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rsub(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rsub\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_rtruediv(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"rtruediv\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_sample(data):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.sample(frac=0.5, random_state=21019)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.sample(frac=0.5, random_state=21019)\n    else:\n        modin_result = modin_series.sample(frac=0.5, random_state=21019)\n        df_equals(pandas_result, modin_result)\n\n    try:\n        pandas_result = pandas_series.sample(n=12, random_state=21019)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.sample(n=12, random_state=21019)\n    else:\n        modin_result = modin_series.sample(n=12, random_state=21019)\n        df_equals(pandas_result, modin_result)\n\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        df_equals(\n            modin_series.sample(n=0, random_state=21019),\n            pandas_series.sample(n=0, random_state=21019),\n        )\n    with pytest.raises(ValueError):\n        modin_series.sample(n=-3)\n\n\n@pytest.mark.parametrize(\"single_value_data\", [True, False])\n@pytest.mark.parametrize(\"use_multiindex\", [True, False])\n@pytest.mark.parametrize(\"sorter\", [True, None])\n@pytest.mark.parametrize(\"values_number\", [1, 2, 5])\n@pytest.mark.parametrize(\"side\", [\"left\", \"right\"])\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.exclude_in_sanity\ndef test_searchsorted(\n    data, side, values_number, sorter, use_multiindex, single_value_data\n):\n    data = data if not single_value_data else data[next(iter(data.keys()))][0]\n    if not sorter:\n        modin_series, pandas_series = create_test_series(vals=data, sort=True)\n    else:\n        modin_series, pandas_series = create_test_series(vals=data)\n        sorter = np.argsort(list(modin_series))\n\n    if use_multiindex:\n        rows_number = len(modin_series.index)\n        level_0_series = random_state.choice([0, 1], rows_number)\n        level_1_series = random_state.choice([2, 3], rows_number)\n        index_series = pd.MultiIndex.from_arrays(\n            [level_0_series, level_1_series], names=[\"first\", \"second\"]\n        )\n        modin_series.index = index_series\n        pandas_series.index = index_series\n\n    min_sample = modin_series.min(skipna=True)\n    max_sample = modin_series.max(skipna=True)\n\n    if single_value_data:\n        values = [data]\n    else:\n        values = []\n        values.append(pandas_series.sample(n=values_number, random_state=random_state))\n        values.append(\n            random_state.uniform(low=min_sample, high=max_sample, size=values_number)\n        )\n        values.append(\n            random_state.uniform(\n                low=max_sample, high=2 * max_sample, size=values_number\n            )\n        )\n        values.append(\n            random_state.uniform(\n                low=min_sample - max_sample, high=min_sample, size=values_number\n            )\n        )\n        pure_float = random_state.uniform(float(min_sample), float(max_sample))\n        pure_int = int(pure_float)\n        values.append(pure_float)\n        values.append(pure_int)\n\n    test_cases = [\n        modin_series.searchsorted(value=value, side=side, sorter=sorter)\n        == pandas_series.searchsorted(value=value, side=side, sorter=sorter)\n        for value in values\n    ]\n    test_cases = [\n        case.all() if not isinstance(case, bool) else case for case in test_cases\n    ]\n\n    for case in test_cases:\n        assert case\n\n\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"ddof\", int_arg_values, ids=arg_keys(\"ddof\", int_arg_keys))\ndef test_sem_float_nan_only(skipna, ddof):\n    eval_general(\n        *create_test_series(test_data[\"float_nan_data\"]),\n        lambda df: df.sem(skipna=skipna, ddof=ddof),\n    )\n\n\n@pytest.mark.parametrize(\"ddof\", int_arg_values, ids=arg_keys(\"ddof\", int_arg_keys))\ndef test_sem_int_only(ddof):\n    eval_general(\n        *create_test_series(test_data[\"int_data\"]),\n        lambda df: df.sem(ddof=ddof),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_set_axis(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    modin_series.set_axis(labels=[\"{}_{}\".format(i, i + 1) for i in modin_series.index])\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_shape(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.shape == pandas_series.shape\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_size(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert modin_series.size == pandas_series.size\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\ndef test_skew(data, skipna):\n    eval_general(*create_test_series(data), lambda df: df.skew(skipna=skipna))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"index\", [\"default\", \"ndarray\", \"has_duplicates\"])\n@pytest.mark.parametrize(\"periods\", [0, 1, -1, 10, -10, 1000000000, -1000000000])\n@pytest.mark.parametrize(\"name\", [None, \"foo\"])\ndef test_shift(data, index, periods, name):\n    modin_series, pandas_series = create_test_series(data, name=name)\n    if index == \"ndarray\":\n        data_column_length = len(data[next(iter(data))])\n        modin_series.index = pandas_series.index = np.arange(2, data_column_length + 2)\n    elif index == \"has_duplicates\":\n        modin_series.index = pandas_series.index = list(modin_series.index[:-3]) + [\n            0,\n            1,\n            2,\n        ]\n\n    df_equals(\n        modin_series.shift(periods=periods),\n        pandas_series.shift(periods=periods),\n    )\n    df_equals(\n        modin_series.shift(periods=periods, fill_value=777),\n        pandas_series.shift(periods=periods, fill_value=777),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"ascending\", [False, True])\n@pytest.mark.parametrize(\n    \"sort_remaining\", bool_arg_values, ids=arg_keys(\"sort_remaining\", bool_arg_keys)\n)\n@pytest.mark.parametrize(\"na_position\", [\"first\", \"last\"], ids=[\"first\", \"last\"])\ndef test_sort_index(data, ascending, sort_remaining, na_position):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda df: df.sort_index(\n            ascending=ascending,\n            sort_remaining=sort_remaining,\n            na_position=na_position,\n        ),\n    )\n\n    eval_general(\n        modin_series.copy(),\n        pandas_series.copy(),\n        lambda df: df.sort_index(\n            ascending=ascending,\n            sort_remaining=sort_remaining,\n            na_position=na_position,\n            inplace=True,\n        ),\n        __inplace__=True,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"ascending\", [True, False])\n@pytest.mark.parametrize(\"na_position\", [\"first\", \"last\"], ids=[\"first\", \"last\"])\ndef test_sort_values(data, ascending, na_position):\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.sort_values(\n        ascending=ascending, na_position=na_position\n    )\n    pandas_result = pandas_series.sort_values(\n        ascending=ascending, na_position=na_position\n    )\n    # Note: For `ascending=False` only\n    # For some reason, the indexing of Series and DataFrame differ in the underlying\n    # algorithm. The order of values is the same, but the index values are shuffled.\n    # Since we use `DataFrame.sort_values` even for Series, the index can be different\n    # between `pandas.Series.sort_values`. For this reason, we check that the values are\n    # identical instead of the index as well.\n    if ascending:\n        df_equals_with_non_stable_indices(modin_result, pandas_result)\n    else:\n        np.testing.assert_equal(modin_result.values, pandas_result.values)\n\n    modin_series_cp = modin_series.copy()\n    pandas_series_cp = pandas_series.copy()\n    modin_series_cp.sort_values(\n        ascending=ascending, na_position=na_position, inplace=True\n    )\n    pandas_series_cp.sort_values(\n        ascending=ascending, na_position=na_position, inplace=True\n    )\n    # See above about `ascending=False`\n    if ascending:\n        df_equals_with_non_stable_indices(modin_result, pandas_result)\n    else:\n        np.testing.assert_equal(modin_series_cp.values, pandas_series_cp.values)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_squeeze(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.squeeze(None), pandas_series.squeeze(None))\n    df_equals(modin_series.squeeze(0), pandas_series.squeeze(0))\n    with pytest.raises(ValueError):\n        modin_series.squeeze(1)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"ddof\", int_arg_values, ids=arg_keys(\"ddof\", int_arg_keys))\ndef test_std(request, data, skipna, ddof):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.std(skipna=skipna, ddof=ddof)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.std(skipna=skipna, ddof=ddof)\n    else:\n        modin_result = modin_series.std(skipna=skipna, ddof=ddof)\n        df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_sub(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"sub\")\n\n\ndef test_6782():\n    datetime_scalar = datetime.datetime(1970, 1, 1, 0, 0)\n    match = \"Adding/subtracting object-dtype array to DatetimeArray not vectorized\"\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"error\", match, PerformanceWarning)\n        pd.Series([datetime.datetime(2000, 1, 1)]) - datetime_scalar\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_subtract(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"subtract\")\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    test_data_values + test_data_small_values,\n    ids=test_data_keys + test_data_small_keys,\n)\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"numeric_only\", [False, True])\n@pytest.mark.parametrize(\n    \"min_count\", int_arg_values, ids=arg_keys(\"min_count\", int_arg_keys)\n)\n@pytest.mark.exclude_in_sanity\ndef test_sum(data, skipna, numeric_only, min_count):\n    eval_general(\n        *create_test_series(data),\n        lambda df, *args, **kwargs: df.sum(*args, **kwargs),\n        skipna=skipna,\n        numeric_only=numeric_only,\n        min_count=min_count,\n    )\n\n\n@pytest.mark.parametrize(\"operation\", [\"sum\", \"shift\"])\ndef test_sum_axis_1_except(operation):\n    eval_general(\n        *create_test_series(test_data[\"int_data\"]),\n        lambda df, *args, **kwargs: getattr(df, operation)(*args, **kwargs),\n        axis=1,\n        expected_exception=ValueError(\"No axis named 1 for object type Series\"),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"axis1\", [0, 1, \"columns\", \"index\"])\n@pytest.mark.parametrize(\"axis2\", [0, 1, \"columns\", \"index\"])\ndef test_swapaxes(data, axis1, axis2):\n    modin_series, pandas_series = create_test_series(data)\n    try:\n        pandas_result = pandas_series.swapaxes(axis1, axis2)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.swapaxes(axis1, axis2)\n    else:\n        modin_result = modin_series.swapaxes(axis1, axis2)\n        df_equals(modin_result, pandas_result)\n\n\ndef test_swaplevel():\n    data = np.random.randint(1, 100, 12)\n    modin_s = pd.Series(\n        data,\n        index=pd.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    pandas_s = pandas.Series(\n        data,\n        index=pandas.MultiIndex.from_tuples(\n            [\n                (num, letter, color)\n                for num in range(1, 3)\n                for letter in [\"a\", \"b\", \"c\"]\n                for color in [\"Red\", \"Green\"]\n            ],\n            names=[\"Number\", \"Letter\", \"Color\"],\n        ),\n    )\n    df_equals(\n        modin_s.swaplevel(\"Number\", \"Color\"), pandas_s.swaplevel(\"Number\", \"Color\")\n    )\n    df_equals(modin_s.swaplevel(), pandas_s.swaplevel())\n    df_equals(modin_s.swaplevel(1, 0), pandas_s.swaplevel(1, 0))\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"n\", int_arg_values, ids=arg_keys(\"n\", int_arg_keys))\ndef test_tail(data, n):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.tail(n), pandas_series.tail(n))\n    df_equals(\n        modin_series.tail(len(modin_series)), pandas_series.tail(len(pandas_series))\n    )\n\n\ndef test_take():\n    modin_s = pd.Series([\"falcon\", \"parrot\", \"lion\", \"cat\"], index=[0, 2, 3, 1])\n    pandas_s = pandas.Series([\"falcon\", \"parrot\", \"lion\", \"cat\"], index=[0, 2, 3, 1])\n    a = modin_s.take([0, 3])\n    df_equals(a, pandas_s.take([0, 3]))\n    try:\n        pandas_s.take([2], axis=1)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_s.take([2], axis=1)\n\n\n@pytest.mark.parametrize(\n    \"ignore_index\", bool_arg_values, ids=arg_keys(\"ignore_index\", bool_arg_keys)\n)\ndef test_explode(ignore_index):\n    # Some items in this test data are lists that explode() should expand.\n    data = [[1, 2, 3], \"foo\", [], [3, 4]]\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(\n        modin_series.explode(ignore_index=ignore_index),\n        pandas_series.explode(ignore_index=ignore_index),\n    )\n\n\ndef test_to_period():\n    idx = pd.date_range(\"1/1/2012\", periods=5, freq=\"M\")\n    series = pd.Series(np.random.randint(0, 100, size=(len(idx))), index=idx)\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(series)\n    ):\n        series.to_period()\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    test_data_values + test_data_large_categorical_series_values,\n    ids=test_data_keys + test_data_large_categorical_series_keys,\n)\ndef test_to_numpy(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert_array_equal(modin_series.to_numpy(), pandas_series.to_numpy())\n\n\ndef test_to_numpy_dtype():\n    modin_series, pandas_series = create_test_series(test_data[\"float_nan_data\"])\n    assert_array_equal(\n        modin_series.to_numpy(dtype=\"int64\"),\n        pandas_series.to_numpy(dtype=\"int64\"),\n        strict=True,\n    )\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    test_data_values + test_data_large_categorical_series_values,\n    ids=test_data_keys + test_data_large_categorical_series_keys,\n)\ndef test_series_values(data):\n    modin_series, pandas_series = create_test_series(data)\n    assert_array_equal(modin_series.values, pandas_series.values)\n\n\ndef test_series_empty_values():\n    modin_series, pandas_series = pd.Series(), pandas.Series()\n    assert_array_equal(modin_series.values, pandas_series.values)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_to_string(request, data):\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.to_string(),\n    )\n\n\ndef test_to_timestamp():\n    idx = pd.date_range(\"1/1/2012\", periods=5, freq=\"M\")\n    series = pd.Series(np.random.randint(0, 100, size=(len(idx))), index=idx)\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(series)\n    ):\n        series.to_period().to_timestamp()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_to_xarray(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.to_xarray()\n\n\ndef test_to_xarray_mock():\n    modin_series = pd.Series([])\n\n    with mock.patch(\"pandas.Series.to_xarray\") as to_xarray:\n        modin_series.to_xarray()\n    to_xarray.assert_called_once()\n    assert len(to_xarray.call_args[0]) == 1\n    df_equals(modin_series, to_xarray.call_args[0][0])\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_tolist(data):\n    modin_series, _ = create_test_series(data)  # noqa: F841\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        modin_series.tolist()\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\n    \"func\", [lambda x: x + 1, [np.sqrt, np.exp]], ids=[\"lambda\", \"list_udfs\"]\n)\ndef test_transform(data, func, request):\n    if \"list_udfs\" in request.node.callspec.id:\n        pytest.xfail(reason=\"https://github.com/modin-project/modin/issues/6998\")\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.transform(func),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"func\", agg_func_except_values, ids=agg_func_except_keys)\ndef test_transform_except(data, func):\n    eval_general(\n        *create_test_series(data),\n        lambda df: df.transform(func),\n        expected_exception=ValueError(\"Function did not transform\"),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_transpose(data):\n    modin_series, pandas_series = create_test_series(data)\n    df_equals(modin_series.transpose(), modin_series)\n    df_equals(modin_series.transpose(), pandas_series.transpose())\n    df_equals(modin_series.transpose(), pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_truediv(data):\n    modin_series, pandas_series = create_test_series(data)\n    inter_df_math_helper(modin_series, pandas_series, \"truediv\")\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_truncate(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    before = 1\n    after = len(modin_series - 3)\n    df_equals(\n        modin_series.truncate(before, after), pandas_series.truncate(before, after)\n    )\n\n    before = 1\n    after = 3\n    df_equals(\n        modin_series.truncate(before, after), pandas_series.truncate(before, after)\n    )\n\n    before = None\n    after = None\n    df_equals(\n        modin_series.truncate(before, after), pandas_series.truncate(before, after)\n    )\n\n\ndef test_tz_convert():\n    modin_idx = pd.date_range(\n        \"1/1/2012\", periods=400, freq=\"2D\", tz=\"America/Los_Angeles\"\n    )\n    pandas_idx = pandas.date_range(\n        \"1/1/2012\", periods=400, freq=\"2D\", tz=\"America/Los_Angeles\"\n    )\n    data = np.random.randint(0, 100, size=len(modin_idx))\n    modin_series = pd.Series(data, index=modin_idx)\n    pandas_series = pandas.Series(data, index=pandas_idx)\n    modin_result = modin_series.tz_convert(\"UTC\", axis=0)\n    pandas_result = pandas_series.tz_convert(\"UTC\", axis=0)\n    df_equals(modin_result, pandas_result)\n\n    modin_multi = pd.MultiIndex.from_arrays([modin_idx, range(len(modin_idx))])\n    pandas_multi = pandas.MultiIndex.from_arrays([pandas_idx, range(len(modin_idx))])\n    modin_series = pd.Series(data, index=modin_multi)\n    pandas_series = pandas.Series(data, index=pandas_multi)\n    df_equals(\n        modin_series.tz_convert(\"UTC\", axis=0, level=0),\n        pandas_series.tz_convert(\"UTC\", axis=0, level=0),\n    )\n\n\ndef test_tz_localize():\n    idx = pd.date_range(\"1/1/2012\", periods=400, freq=\"2D\")\n    data = np.random.randint(0, 100, size=len(idx))\n    modin_series = pd.Series(data, index=idx)\n    pandas_series = pandas.Series(data, index=idx)\n    df_equals(\n        modin_series.tz_localize(\"America/Los_Angeles\"),\n        pandas_series.tz_localize(\"America/Los_Angeles\"),\n    )\n    df_equals(\n        modin_series.tz_localize(\"UTC\"),\n        pandas_series.tz_localize(\"UTC\"),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_unique(data):\n    comparator = lambda *args: sort_if_range_partitioning(  # noqa: E731\n        *args, comparator=assert_array_equal\n    )\n\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.unique()\n    pandas_result = pandas_series.unique()\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n    assert type(modin_result) is type(pandas_result)\n\n    modin_result = pd.Series([2, 1, 3, 3], name=\"A\").unique()\n    pandas_result = pandas.Series([2, 1, 3, 3], name=\"A\").unique()\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n    assert type(modin_result) is type(pandas_result)\n\n    modin_result = pd.Series([pd.Timestamp(\"2016-01-01\") for _ in range(3)]).unique()\n    pandas_result = pandas.Series(\n        [pd.Timestamp(\"2016-01-01\") for _ in range(3)]\n    ).unique()\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n    assert type(modin_result) is type(pandas_result)\n\n    modin_result = pd.Series(\n        [pd.Timestamp(\"2016-01-01\", tz=\"US/Eastern\") for _ in range(3)]\n    ).unique()\n    pandas_result = pandas.Series(\n        [pd.Timestamp(\"2016-01-01\", tz=\"US/Eastern\") for _ in range(3)]\n    ).unique()\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n    assert type(modin_result) is type(pandas_result)\n\n    modin_result = pandas.Series(pd.Categorical(list(\"baabc\"))).unique()\n    pandas_result = pd.Series(pd.Categorical(list(\"baabc\"))).unique()\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n    assert type(modin_result) is type(pandas_result)\n\n    modin_result = pd.Series(\n        pd.Categorical(list(\"baabc\"), categories=list(\"abc\"), ordered=True)\n    ).unique()\n    pandas_result = pandas.Series(\n        pd.Categorical(list(\"baabc\"), categories=list(\"abc\"), ordered=True)\n    ).unique()\n    comparator(modin_result, pandas_result)\n    assert modin_result.shape == pandas_result.shape\n    assert type(modin_result) is type(pandas_result)\n\n\ndef test_unique_pyarrow_dtype():\n    # See #6227 for details\n    modin_series, pandas_series = create_test_series(\n        [1, 0, pd.NA], dtype=\"uint8[pyarrow]\"\n    )\n\n    def comparator(df1, df2):\n        # Perform our own non-strict version of dtypes equality check\n        df_equals(df1, df2)\n        # to be sure `unique` return `ArrowExtensionArray`\n        assert type(df1) is type(df2)\n\n    eval_general(\n        modin_series, pandas_series, lambda df: df.unique(), comparator=comparator\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_unstack(data):\n    modin_series, pandas_series = create_test_series(data)\n    index = generate_multiindex(len(pandas_series), nlevels=4, is_tree_like=True)\n\n    modin_series = pd.Series(data[next(iter(data.keys()))], index=index)\n    pandas_series = pandas.Series(data[next(iter(data.keys()))], index=index)\n\n    df_equals(modin_series.unstack(), pandas_series.unstack())\n    df_equals(modin_series.unstack(level=0), pandas_series.unstack(level=0))\n    df_equals(modin_series.unstack(level=[0, 1]), pandas_series.unstack(level=[0, 1]))\n    df_equals(\n        modin_series.unstack(level=[0, 1, 2]), pandas_series.unstack(level=[0, 1, 2])\n    )\n\n\ndef test_unstack_error_no_multiindex():\n    modin_series = pd.Series([0, 1, 2])\n    with pytest.raises(ValueError, match=\"index must be a MultiIndex to unstack\"):\n        modin_series.unstack()\n\n\n@pytest.mark.parametrize(\n    \"data, other_data\",\n    [([1, 2, 3], [4, 5, 6]), ([1, 2, 3], [4, 5, 6, 7, 8]), ([1, 2, 3], [4, np.nan, 6])],\n)\ndef test_update(data, other_data):\n    modin_series, pandas_series = pd.Series(data), pandas.Series(data)\n    modin_series.update(pd.Series(other_data))\n    pandas_series.update(pandas.Series(other_data))\n    df_equals(modin_series, pandas_series)\n\n\n@pytest.mark.parametrize(\"sort\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"normalize\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"bins\", [3, None])\n@pytest.mark.parametrize(\n    \"dropna\",\n    [\n        pytest.param(None),\n        pytest.param(False),\n        pytest.param(True),\n    ],\n)\n@pytest.mark.parametrize(\"ascending\", [True, False])\n@pytest.mark.exclude_in_sanity\ndef test_value_counts(sort, normalize, bins, dropna, ascending):\n    def sort_sensitive_comparator(df1, df2):\n        # We sort indices for Modin and pandas result because of issue #1650\n        return (\n            df_equals_with_non_stable_indices(df1, df2)\n            if sort\n            else df_equals(df1.sort_index(), df2.sort_index())\n        )\n\n    eval_general(\n        *create_test_series(test_data_values[0]),\n        lambda df: df.value_counts(\n            sort=sort,\n            bins=bins,\n            normalize=normalize,\n            dropna=dropna,\n            ascending=ascending,\n        ),\n        comparator=sort_sensitive_comparator,\n    )\n\n    # from issue #2365\n    arr = np.random.rand(2**6)\n    arr[::10] = np.nan\n    eval_general(\n        *create_test_series(arr),\n        lambda df: df.value_counts(\n            sort=sort,\n            bins=bins,\n            normalize=normalize,\n            dropna=dropna,\n            ascending=ascending,\n        ),\n        comparator=sort_sensitive_comparator,\n    )\n\n\ndef test_value_counts_categorical():\n    # from issue #3571\n    data = np.array([\"a\"] * 50000 + [\"b\"] * 10000 + [\"c\"] * 1000)\n    random_state = np.random.RandomState(seed=42)\n    random_state.shuffle(data)\n    eval_general(\n        *create_test_series(data, dtype=\"category\"),\n        lambda df: df.value_counts(),\n        comparator=df_equals,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_values(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    np.testing.assert_equal(modin_series.values, pandas_series.values)\n\n\ndef test_values_non_numeric():\n    data = [\"str{0}\".format(i) for i in range(0, 10**3)]\n    modin_series, pandas_series = create_test_series(data)\n\n    modin_series = modin_series.astype(\"category\")\n    pandas_series = pandas_series.astype(\"category\")\n\n    df_equals(modin_series.values, pandas_series.values)\n\n\ndef test_values_ea():\n    data = pandas.arrays.SparseArray(np.arange(10, dtype=\"int64\"))\n    modin_series, pandas_series = create_test_series(data)\n    modin_values = modin_series.values\n    pandas_values = pandas_series.values\n\n    assert modin_values.dtype == pandas_values.dtype\n    df_equals(modin_values, pandas_values)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\n@pytest.mark.parametrize(\"skipna\", [False, True])\n@pytest.mark.parametrize(\"ddof\", int_arg_values, ids=arg_keys(\"ddof\", int_arg_keys))\ndef test_var(data, skipna, ddof):\n    modin_series, pandas_series = create_test_series(data)\n\n    try:\n        pandas_result = pandas_series.var(skipna=skipna, ddof=ddof)\n    except Exception as err:\n        with pytest.raises(type(err)):\n            modin_series.var(skipna=skipna, ddof=ddof)\n    else:\n        modin_result = modin_series.var(skipna=skipna, ddof=ddof)\n        df_equals(modin_result, pandas_result)\n\n\ndef test_view():\n    modin_series = pd.Series([-2, -1, 0, 1, 2], dtype=\"int8\")\n    pandas_series = pandas.Series([-2, -1, 0, 1, 2], dtype=\"int8\")\n    modin_result = modin_series.view(dtype=\"uint8\")\n    pandas_result = pandas_series.view(dtype=\"uint8\")\n    df_equals(modin_result, pandas_result)\n\n    modin_series = pd.Series([-20, -10, 0, 10, 20], dtype=\"int32\")\n    pandas_series = pandas.Series([-20, -10, 0, 10, 20], dtype=\"int32\")\n    modin_result = modin_series.view(dtype=\"float32\")\n    pandas_result = pandas_series.view(dtype=\"float32\")\n    df_equals(modin_result, pandas_result)\n\n    modin_series = pd.Series([-200, -100, 0, 100, 200], dtype=\"int64\")\n    pandas_series = pandas.Series([-200, -100, 0, 100, 200], dtype=\"int64\")\n    modin_result = modin_series.view(dtype=\"float64\")\n    pandas_result = pandas_series.view(dtype=\"float64\")\n    df_equals(modin_result, pandas_result)\n\n\ndef test_where():\n    frame_data = random_state.randn(100)\n    pandas_series = pandas.Series(frame_data)\n    modin_series = pd.Series(frame_data)\n    pandas_cond_series = pandas_series % 5 < 2\n    modin_cond_series = modin_series % 5 < 2\n\n    pandas_result = pandas_series.where(pandas_cond_series, -pandas_series)\n    modin_result = modin_series.where(modin_cond_series, -modin_series)\n    assert all((to_pandas(modin_result) == pandas_result))\n\n    other_data = random_state.randn(100)\n    modin_other, pandas_other = pd.Series(other_data), pandas.Series(other_data)\n    pandas_result = pandas_series.where(pandas_cond_series, pandas_other, axis=0)\n    modin_result = modin_series.where(modin_cond_series, modin_other, axis=0)\n    assert all(to_pandas(modin_result) == pandas_result)\n\n    pandas_result = pandas_series.where(pandas_series < 2, True)\n    modin_result = modin_series.where(modin_series < 2, True)\n    assert all(to_pandas(modin_result) == pandas_result)\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\n    \"key\",\n    [0, slice(0, len(test_string_data_values) / 2)],\n    ids=[\"single_key\", \"slice_key\"],\n)\ndef test_str___getitem__(data, key):\n    modin_series, pandas_series = create_test_series(data)\n    modin_result = modin_series.str[key]\n    pandas_result = pandas_series.str[key]\n    df_equals(\n        modin_result,\n        pandas_result,\n        # https://github.com/modin-project/modin/issues/5968\n        check_dtypes=False,\n    )\n\n\n# Test str operations\n@pytest.mark.parametrize(\n    \"others\",\n    [[\"abC|DeF,Hik\", \"gSaf,qWer|Gre\", \"asd3,4sad|\", np.nan], None],\n    ids=[\"list\", \"None\"],\n)\ndef test_str_cat(others):\n    data = [\"abC|DeF,Hik\", \"gSaf,qWer|Gre\", \"asd3,4sad|\", np.nan]\n    eval_general(*create_test_series(data), lambda s: s.str.cat(others=others))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"n\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"expand\", [False, True])\ndef test_str_split(data, pat, n, expand):\n    eval_general(\n        *create_test_series(data),\n        lambda series: series.str.split(pat, n=n, expand=expand),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"n\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"expand\", [False, True])\ndef test_str_rsplit(data, pat, n, expand):\n    eval_general(\n        *create_test_series(data),\n        lambda series: series.str.rsplit(pat, n=n, expand=expand),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"i\", int_arg_values, ids=int_arg_keys)\ndef test_str_get(data, i):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.get(i))\n\n\n@pytest.mark.parametrize(\n    \"data\", test_string_list_data_values, ids=test_string_list_data_keys\n)\n@pytest.mark.parametrize(\"sep\", string_sep_values, ids=string_sep_keys)\ndef test_str_join(data, sep):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.join(sep))\n\n\n@pytest.mark.parametrize(\n    \"data\", test_string_list_data_values, ids=test_string_list_data_keys\n)\n@pytest.mark.parametrize(\"sep\", string_sep_values, ids=string_sep_keys)\ndef test_str_get_dummies(data, sep):\n    modin_series, pandas_series = create_test_series(data)\n\n    if sep:\n        with warns_that_defaulting_to_pandas_if(\n            not df_or_series_using_native_execution(modin_series)\n        ):\n            # We are only testing that this defaults to pandas, so we will just check for\n            # the warning\n            modin_series.str.get_dummies(sep)\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"case\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"na\", string_na_rep_values, ids=string_na_rep_keys)\ndef test_str_contains(data, pat, case, na):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.contains(pat, case=case, na=na, regex=False),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n    # Test regex\n    pat = \",|b\"\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.contains(pat, case=case, na=na, regex=True),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"repl\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"n\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"case\", bool_arg_values, ids=bool_arg_keys)\ndef test_str_replace(data, pat, repl, n, case):\n    eval_general(\n        *create_test_series(data),\n        lambda series: series.str.replace(pat, repl, n=n, case=case, regex=False),\n        # https://github.com/modin-project/modin/issues/5970\n        comparator_kwargs={\"check_dtypes\": pat is not None},\n    )\n    # Test regex\n    eval_general(\n        *create_test_series(data),\n        lambda series: series.str.replace(\n            pat=\",|b\", repl=repl, n=n, case=case, regex=True\n        ),\n        # https://github.com/modin-project/modin/issues/5970\n        comparator_kwargs={\"check_dtypes\": pat is not None},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"repeats\", int_arg_values, ids=int_arg_keys)\ndef test_str_repeat(data, repeats):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.repeat(repeats))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_removeprefix(data):\n    modin_series, pandas_series = create_test_series(data)\n    prefix = \"test_prefix\"\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: (prefix + series).str.removeprefix(prefix),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_removesuffix(data):\n    modin_series, pandas_series = create_test_series(data)\n    suffix = \"test_suffix\"\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: (series + suffix).str.removesuffix(suffix),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"width\", [-1, 0, 5])\n@pytest.mark.parametrize(\n    \"side\", [\"left\", \"right\", \"both\"], ids=[\"left\", \"right\", \"both\"]\n)\n@pytest.mark.parametrize(\"fillchar\", string_sep_values, ids=string_sep_keys)\ndef test_str_pad(data, width, side, fillchar):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.pad(width, side=side, fillchar=fillchar),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"width\", [-1, 0, 5])\n@pytest.mark.parametrize(\"fillchar\", string_sep_values, ids=string_sep_keys)\ndef test_str_center(data, width, fillchar):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.center(width, fillchar=fillchar),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"width\", [-1, 0, 5])\n@pytest.mark.parametrize(\"fillchar\", string_sep_values, ids=string_sep_keys)\ndef test_str_ljust(data, width, fillchar):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.ljust(width, fillchar=fillchar),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"width\", [-1, 0, 5])\n@pytest.mark.parametrize(\"fillchar\", string_sep_values, ids=string_sep_keys)\ndef test_str_rjust(data, width, fillchar):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.rjust(width, fillchar=fillchar),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"width\", [-1, 0, 5])\ndef test_str_zfill(data, width):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.zfill(width))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"width\", [-1, 0, 5])\ndef test_str_wrap(data, width):\n    expected_exception = None\n    if width != 5:\n        expected_exception = ValueError(f\"invalid width {width} (must be > 0)\")\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.wrap(width),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"start\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"stop\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"step\", [-2, 1, 3])\ndef test_str_slice(data, start, stop, step):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.slice(start=start, stop=stop, step=step),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"start\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"stop\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"repl\", string_sep_values, ids=string_sep_keys)\ndef test_str_slice_replace(data, start, stop, repl):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.slice_replace(start=start, stop=stop, repl=repl),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\ndef test_str_count(data, pat):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.count(pat))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"na\", string_na_rep_values, ids=string_na_rep_keys)\ndef test_str_startswith(data, pat, na):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.startswith(pat, na=na),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"na\", string_na_rep_values, ids=string_na_rep_keys)\ndef test_str_endswith(data, pat, na):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.endswith(pat, na=na),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\ndef test_str_findall(data, pat):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.findall(pat))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\ndef test_str_fullmatch(data, pat):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.fullmatch(pat))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"case\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"na\", string_na_rep_values, ids=string_na_rep_keys)\ndef test_str_match(data, pat, case, na):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.match(pat, case=case, na=na),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"expand\", [False, True])\n@pytest.mark.parametrize(\"pat\", [r\"([ab])\", r\"([ab])(\\d)\"])\ndef test_str_extract(data, expand, pat):\n    modin_series, pandas_series = create_test_series(data)\n\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.extract(pat, expand=expand),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_extractall(data):\n    modin_series, pandas_series = create_test_series(data)\n\n    with warns_that_defaulting_to_pandas_if(\n        not df_or_series_using_native_execution(modin_series)\n    ):\n        # We are only testing that this defaults to pandas, so we will just check for\n        # the warning\n        modin_series.str.extractall(r\"([ab])(\\d)\")\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_len(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.len())\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"to_strip\", string_sep_values, ids=string_sep_keys)\ndef test_str_strip(data, to_strip):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series, pandas_series, lambda series: series.str.strip(to_strip=to_strip)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"to_strip\", string_sep_values, ids=string_sep_keys)\ndef test_str_rstrip(data, to_strip):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series, pandas_series, lambda series: series.str.rstrip(to_strip=to_strip)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"to_strip\", string_sep_values, ids=string_sep_keys)\ndef test_str_lstrip(data, to_strip):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series, pandas_series, lambda series: series.str.lstrip(to_strip=to_strip)\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"sep\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"expand\", [False, True])\ndef test_str_partition(data, sep, expand):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.partition(sep, expand=expand),\n        # https://github.com/modin-project/modin/issues/5971\n        comparator_kwargs={\"check_dtypes\": sep is not None},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"sep\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"expand\", [False, True])\ndef test_str_rpartition(data, sep, expand):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.rpartition(sep, expand=expand),\n        # https://github.com/modin-project/modin/issues/5971\n        comparator_kwargs={\"check_dtypes\": sep is not None},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_lower(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.lower())\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_upper(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.upper())\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_title(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.title())\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"sub\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"start\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"end\", int_arg_values, ids=int_arg_keys)\ndef test_str_find(data, sub, start, end):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.find(sub, start=start, end=end),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"sub\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\"start\", int_arg_values, ids=int_arg_keys)\n@pytest.mark.parametrize(\"end\", int_arg_values, ids=int_arg_keys)\ndef test_str_rfind(data, sub, start, end):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.rfind(sub, start=start, end=end),\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"sub\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\n    \"start, end\",\n    [(0, None), (1, -1), (1, 3)],\n    ids=[\"default\", \"non_default_working\", \"exception\"],\n)\ndef test_str_index(data, sub, start, end, request):\n    modin_series, pandas_series = create_test_series(data)\n    expected_exception = None\n    if \"exception-comma sep\" in request.node.callspec.id:\n        expected_exception = ValueError(\"substring not found\")\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.index(sub, start=start, end=end),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"sub\", string_sep_values, ids=string_sep_keys)\n@pytest.mark.parametrize(\n    \"start, end\",\n    [(0, None), (1, -1), (1, 3)],\n    ids=[\"default\", \"non_default_working\", \"exception\"],\n)\ndef test_str_rindex(data, sub, start, end, request):\n    modin_series, pandas_series = create_test_series(data)\n    expected_exception = None\n    if \"exception-comma sep\" in request.node.callspec.id:\n        expected_exception = ValueError(\"substring not found\")\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.rindex(sub, start=start, end=end),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_capitalize(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.capitalize())\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_swapcase(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.swapcase())\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\n    \"form\", [\"NFC\", \"NFKC\", \"NFD\", \"NFKD\"], ids=[\"NFC\", \"NFKC\", \"NFD\", \"NFKD\"]\n)\ndef test_str_normalize(data, form):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.normalize(form))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\n@pytest.mark.parametrize(\"pat\", string_sep_values, ids=string_sep_keys)\ndef test_str_translate(data, pat):\n    modin_series, pandas_series = create_test_series(data)\n\n    # Test none table\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.translate(None),\n        # https://github.com/modin-project/modin/issues/5970\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n    # Translation dictionary\n    table = {pat: \"DDD\"}\n    eval_general(\n        modin_series, pandas_series, lambda series: series.str.translate(table)\n    )\n\n    # Translation table with maketrans (python3 only)\n    if pat is not None:\n        table = str.maketrans(pat, \"d\" * len(pat))\n        eval_general(\n            modin_series, pandas_series, lambda series: series.str.translate(table)\n        )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_isalnum(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.isalnum(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_isalpha(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.isalpha(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_isdigit(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.isdigit(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_isspace(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.isspace(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_islower(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.islower(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_isupper(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.isupper(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_istitle(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.istitle(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_isnumeric(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.isnumeric(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_str_isdecimal(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.str.isdecimal(),\n        # https://github.com/modin-project/modin/issues/5969\n        comparator_kwargs={\"check_dtypes\": False},\n    )\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_casefold(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(modin_series, pandas_series, lambda series: series.str.casefold())\n\n\n@pytest.fixture\ndef str_encode_decode_test_data() -> list[str]:\n    return [\n        \"abC|DeF,Hik\",\n        \"234,3245.67\",\n        \"gSaf,qWer|Gre\",\n        \"asd3,4sad|\",\n        np.nan,\n        None,\n        # add a string that we can't encode in ascii, and whose utf-8 encoding\n        # we cannot decode in ascii\n        \"ക\",\n    ]\n\n\n@pytest.mark.parametrize(\"encoding\", encoding_types)\n@pytest.mark.parametrize(\"errors\", [\"strict\", \"ignore\", \"replace\"])\ndef test_str_encode(encoding, errors, str_encode_decode_test_data):\n    expected_exception = None\n    if errors == \"strict\" and encoding == \"ascii\":\n        # quite safe to check only types\n        expected_exception = False\n    eval_general(\n        *create_test_series(str_encode_decode_test_data),\n        lambda s: s.str.encode(encoding, errors=errors),\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\n    \"encoding\",\n    encoding_types,\n)\n@pytest.mark.parametrize(\"errors\", [\"strict\", \"ignore\", \"replace\"])\ndef test_str_decode(encoding, errors, str_encode_decode_test_data):\n    expected_exception = None\n    if errors == \"strict\":\n        # it's quite safe here to check only types of exceptions\n        expected_exception = False\n    eval_general(\n        *create_test_series(\n            [\n                s.encode(\"utf-8\") if isinstance(s, str) else s\n                for s in str_encode_decode_test_data\n            ]\n        ),\n        lambda s: s.str.decode(encoding, errors=errors),\n        expected_exception=expected_exception,\n    )\n\n\ndef test_list_general():\n    pa = pytest.importorskip(\"pyarrow\")\n\n    # Copied from pandas examples\n    modin_series, pandas_series = create_test_series(\n        [\n            [1, 2, 3],\n            [3],\n        ],\n        dtype=pd.ArrowDtype(pa.list_(pa.int64())),\n    )\n    eval_general(modin_series, pandas_series, lambda series: series.list.flatten())\n    eval_general(modin_series, pandas_series, lambda series: series.list.len())\n    eval_general(modin_series, pandas_series, lambda series: series.list[0])\n\n\ndef test_struct_general():\n    pa = pytest.importorskip(\"pyarrow\")\n\n    # Copied from pandas examples\n    modin_series, pandas_series = create_test_series(\n        [\n            {\"version\": 1, \"project\": \"pandas\"},\n            {\"version\": 2, \"project\": \"pandas\"},\n            {\"version\": 1, \"project\": \"numpy\"},\n        ],\n        dtype=pd.ArrowDtype(\n            pa.struct([(\"version\", pa.int64()), (\"project\", pa.string())])\n        ),\n    )\n    eval_general(modin_series, pandas_series, lambda series: series.struct.dtypes)\n    eval_general(\n        modin_series, pandas_series, lambda series: series.struct.field(\"project\")\n    )\n    eval_general(modin_series, pandas_series, lambda series: series.struct.explode())\n\n    # nested struct types\n    version_type = pa.struct(\n        [\n            (\"major\", pa.int64()),\n            (\"minor\", pa.int64()),\n        ]\n    )\n    modin_series, pandas_series = create_test_series(\n        [\n            {\"version\": {\"major\": 1, \"minor\": 5}, \"project\": \"pandas\"},\n            {\"version\": {\"major\": 2, \"minor\": 1}, \"project\": \"pandas\"},\n            {\"version\": {\"major\": 1, \"minor\": 26}, \"project\": \"numpy\"},\n        ],\n        dtype=pd.ArrowDtype(\n            pa.struct([(\"version\", version_type), (\"project\", pa.string())])\n        ),\n    )\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.struct.field([\"version\", \"minor\"]),\n    )\n\n\ndef _case_when_caselists():\n    def permutations(values):\n        return [\n            p\n            for r in range(1, len(values) + 1)\n            for p in itertools.permutations(values, r)\n        ]\n\n    conditions = permutations(\n        [\n            [True, False, False, False] * 10,\n            pandas.Series([True, False, False, False] * 10),\n            pandas.Series([True, False, False, False] * 10, index=range(78, -2, -2)),\n            lambda df: df.gt(0),\n        ]\n    )\n    replacements = permutations([[0, 3, 4, 5] * 10, 0, lambda df: 1])\n    caselists = []\n    for c in conditions:\n        for r in replacements:\n            if len(c) == len(r):\n                caselists.append(list(zip(c, r)))\n    return caselists\n\n\n@pytest.mark.parametrize(\n    \"base\",\n    [\n        pandas.Series(range(40)),\n        pandas.Series([0, 7, 8, 9] * 10, name=\"c\", index=range(0, 80, 2)),\n    ],\n)\n@pytest.mark.parametrize(\n    \"caselist\",\n    _case_when_caselists(),\n)\n@pytest.mark.skipif(\n    Engine.get() == \"Dask\",\n    reason=\"https://github.com/modin-project/modin/issues/7148\",\n)\ndef test_case_when(base, caselist):\n    pandas_result = base.case_when(caselist)\n    modin_bases = [pd.Series(base)]\n\n    # 'base' and serieses from 'caselist' must have equal lengths, however in this test we want\n    # to verify that 'case_when' works correctly even if partitioning of 'base' and 'caselist' isn't equal.\n    # BaseOnPython always uses a single partition, thus skipping this test for them.\n    if not (\n        f\"{StorageFormat.get()}On{Engine.get()}\" == \"BaseOnPython\"\n        or current_execution_is_native()\n    ):\n        # we can only import this function for partitioned execution modes.\n        from modin.tests.core.storage_formats.pandas.test_internals import (\n            construct_modin_df_by_scheme,\n        )\n\n        modin_base_repart = construct_modin_df_by_scheme(\n            base.to_frame(),\n            partitioning_scheme={\"row_lengths\": [14, 14, 12], \"column_widths\": [1]},\n        ).squeeze(axis=1)\n        assert (\n            modin_bases[0]._query_compiler._modin_frame._partitions.shape\n            != modin_base_repart._query_compiler._modin_frame._partitions.shape\n        )\n        modin_base_repart.name = base.name\n        modin_bases.append(modin_base_repart)\n\n    for modin_base in modin_bases:\n        df_equals(pandas_result, modin_base.case_when(caselist))\n        if any(\n            isinstance(data, pandas.Series)\n            for case_tuple in caselist\n            for data in case_tuple\n        ):\n            caselist = [\n                tuple(\n                    pd.Series(data) if isinstance(data, pandas.Series) else data\n                    for data in case_tuple\n                )\n                for case_tuple in caselist\n            ]\n            df_equals(pandas_result, modin_base.case_when(caselist))\n\n\n@pytest.mark.parametrize(\"data\", test_string_data_values, ids=test_string_data_keys)\ndef test_non_commutative_add_string_to_series(data):\n    # This test checks that add and radd do different things when addition is\n    # not commutative, e.g. for adding a string to a string. For context see\n    # https://github.com/modin-project/modin/issues/4908\n    eval_general(*create_test_series(data), lambda s: \"string\" + s)\n    eval_general(*create_test_series(data), lambda s: s + \"string\")\n\n\ndef test_non_commutative_multiply_pandas():\n    # The non commutative integer class implementation is tricky. Check that\n    # multiplying such an integer with a pandas series is really not\n    # commutative.\n    pandas_series = pandas.Series(1, dtype=int)\n    integer = NonCommutativeMultiplyInteger(2)\n    assert not (integer * pandas_series).equals(pandas_series * integer)\n\n\ndef test_non_commutative_multiply():\n    # This test checks that mul and rmul do different things when\n    # multiplication is not commutative, e.g. for adding a string to a string.\n    # For context see https://github.com/modin-project/modin/issues/5238\n    modin_series, pandas_series = create_test_series(1, dtype=int)\n    integer = NonCommutativeMultiplyInteger(2)\n    eval_general(modin_series, pandas_series, lambda s: integer * s)\n    eval_general(modin_series, pandas_series, lambda s: s * integer)\n\n\n@pytest.mark.parametrize(\n    \"is_sparse_data\", [True, False], ids=[\"is_sparse\", \"is_not_sparse\"]\n)\ndef test_hasattr_sparse(is_sparse_data):\n    modin_df, pandas_df = (\n        create_test_series(\n            pandas.arrays.SparseArray(test_data[\"float_nan_data\"].values())\n        )\n        if is_sparse_data\n        else create_test_series(test_data[\"float_nan_data\"])\n    )\n    eval_general(modin_df, pandas_df, lambda df: hasattr(df, \"sparse\"))\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_categories(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    df_equals(modin_series.cat.categories, pandas_series.cat.categories)\n\n    def set_categories(ser):\n        ser.cat.categories = list(\"qwert\")\n        return ser\n\n    # pandas 2.0.0: Removed setting Categorical.categories directly (GH47834)\n    # Just check the exception\n    expected_exception = AttributeError(\"can't set attribute\")\n    if sys.version_info >= (3, 10):\n        # The exception message varies across different versions of Python\n        expected_exception = False\n    eval_general(\n        modin_series,\n        pandas_series,\n        set_categories,\n        expected_exception=expected_exception,\n    )\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_ordered(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    assert modin_series.cat.ordered == pandas_series.cat.ordered\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_codes(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.codes\n    modin_result = modin_series.cat.codes\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"set_min_row_partition_size\",\n    [1, 2],\n    ids=[\"four_row_partitions\", \"two_row_partitions\"],\n    indirect=True,\n)\ndef test_cat_codes_issue5650(set_min_row_partition_size):\n    data = {\"name\": [\"abc\", \"def\", \"ghi\", \"jkl\"]}\n    pandas_df = pandas.DataFrame(data)\n    pandas_df = pandas_df.astype(\"category\")\n    modin_df = pd.DataFrame(data)\n    modin_df = modin_df.astype(\"category\")\n    eval_general(\n        modin_df,\n        pandas_df,\n        lambda df: df[\"name\"].cat.codes,\n        comparator_kwargs={\"check_dtypes\": True},\n    )\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_rename_categories(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.rename_categories(list(\"qwert\"))\n    modin_result = modin_series.cat.rename_categories(list(\"qwert\"))\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\n@pytest.mark.parametrize(\"ordered\", bool_arg_values, ids=bool_arg_keys)\ndef test_cat_reorder_categories(data, ordered):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.reorder_categories(list(\"tades\"), ordered=ordered)\n    modin_result = modin_series.cat.reorder_categories(list(\"tades\"), ordered=ordered)\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_add_categories(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.add_categories(list(\"qw\"))\n    modin_result = modin_series.cat.add_categories(list(\"qw\"))\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_remove_categories(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.remove_categories(list(\"at\"))\n    modin_result = modin_series.cat.remove_categories(list(\"at\"))\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_remove_unused_categories(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_series[1] = np.nan\n    pandas_result = pandas_series.cat.remove_unused_categories()\n    modin_series[1] = np.nan\n    modin_result = modin_series.cat.remove_unused_categories()\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\n@pytest.mark.parametrize(\"ordered\", bool_arg_values, ids=bool_arg_keys)\n@pytest.mark.parametrize(\"rename\", [True, False])\ndef test_cat_set_categories(data, ordered, rename):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.set_categories(\n        list(\"qwert\"), ordered=ordered, rename=rename\n    )\n    modin_result = modin_series.cat.set_categories(\n        list(\"qwert\"), ordered=ordered, rename=rename\n    )\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_as_ordered(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.as_ordered()\n    modin_result = modin_series.cat.as_ordered()\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\n@pytest.mark.parametrize(\n    \"data\", test_data_categorical_values, ids=test_data_categorical_keys\n)\ndef test_cat_as_unordered(data):\n    modin_series, pandas_series = create_test_series(data.copy())\n    pandas_result = pandas_series.cat.as_unordered()\n    modin_result = modin_series.cat.as_unordered()\n    df_equals(modin_series, pandas_series)\n    df_equals(modin_result, pandas_result)\n\n\ndef test_peculiar_callback():\n    def func(val):\n        if not isinstance(val, tuple):\n            raise BaseException(\"Urgh...\")\n        return val\n\n    pandas_df = pandas.DataFrame({\"col\": [(0, 1)]})\n    pandas_series = pandas_df[\"col\"].apply(func)\n\n    modin_df = pd.DataFrame({\"col\": [(0, 1)]})\n    modin_series = modin_df[\"col\"].apply(func)\n\n    df_equals(modin_series, pandas_series)\n\n\n@pytest.mark.parametrize(\"data\", test_data_values, ids=test_data_keys)\ndef test_apply_return_df(data):\n    modin_series, pandas_series = create_test_series(data)\n    eval_general(\n        modin_series,\n        pandas_series,\n        lambda series: series.apply(\n            lambda x: pandas.Series([x + i for i in range(100)])\n        ),\n    )\n\n\n@pytest.mark.parametrize(\n    \"apply_function\",\n    (\n        lambda series, function: function(series),\n        lambda series, function: series.apply(function),\n        lambda series, function: series.map(function),\n    ),\n)\n@pytest.mark.parametrize(\"function\", UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS)\ndef test_unary_numpy_universal_function_issue_6483_and_7645(function, apply_function):\n    eval_general(\n        *create_test_series(test_data[\"float_nan_data\"]),\n        lambda series: apply_function(series, function),\n    )\n\n\ndef test_binary_numpy_universal_function_issue_6483():\n    eval_general(\n        *create_test_series(test_data[\"float_nan_data\"]),\n        lambda series: np.arctan2(series, np.sin(series)),\n    )\n\n\ndef test__reduce__():\n    # `Series.__reduce__` will be called implicitly when lambda expressions are\n    # pre-processed for the distributed engine.\n    series_data = [\"Major League Baseball\", \"National Basketball Association\"]\n    abbr_md, abbr_pd = create_test_series(series_data, index=[\"MLB\", \"NBA\"])\n\n    dataframe_data = {\n        \"name\": [\"Mariners\", \"Lakers\"] * 500,\n        \"league_abbreviation\": [\"MLB\", \"NBA\"] * 500,\n    }\n    teams_md, teams_pd = create_test_dfs(dataframe_data)\n\n    result_md = (\n        teams_md.set_index(\"name\")\n        .league_abbreviation.apply(lambda abbr: abbr_md.loc[abbr])\n        .rename(\"league\")\n    )\n\n    result_pd = (\n        teams_pd.set_index(\"name\")\n        .league_abbreviation.apply(lambda abbr: abbr_pd.loc[abbr])\n        .rename(\"league\")\n    )\n    df_equals(result_md, result_pd)\n\n\n@pytest.mark.parametrize(\n    \"op\",\n    [\n        \"add\",\n        \"radd\",\n        \"divmod\",\n        \"eq\",\n        \"floordiv\",\n        \"ge\",\n        \"gt\",\n        \"le\",\n        \"lt\",\n        \"mod\",\n        \"mul\",\n        \"rmul\",\n        \"ne\",\n        \"pow\",\n        \"rdivmod\",\n        \"rfloordiv\",\n        \"rmod\",\n        \"rpow\",\n        \"rsub\",\n        \"rtruediv\",\n        \"sub\",\n        \"truediv\",\n    ],\n)\ndef test_binary_with_fill_value_issue_7381(op):\n    # Ensures that series binary operations respect the fill_value flag\n    series_md, series_pd = create_test_series([0, 1, 2, 3])\n    rhs_md, rhs_pd = create_test_series([0])\n    result_md = getattr(series_md, op)(rhs_md, fill_value=2)\n    result_pd = getattr(series_pd, op)(rhs_pd, fill_value=2)\n    df_equals(result_md, result_pd)\n\n\n@pytest.mark.parametrize(\"op\", [\"eq\", \"ge\", \"gt\", \"le\", \"lt\", \"ne\"])\ndef test_logical_binary_with_list(op):\n    series_md, series_pd = create_test_series([0, 1, 2])\n    rhs = [2, 1, 0]\n    result_md = getattr(series_md, op)(rhs)\n    result_pd = getattr(series_pd, op)(rhs)\n    df_equals(result_md, result_pd)\n\n\n@pytest.mark.parametrize(\"op\", [\"argmax\", \"argmin\"])\ndef test_argmax_argmin_7413(op):\n    # Ensures that argmin/argmax use positional index, not the actual index value\n    series_md, series_pd = create_test_series([1, 2, 3], index=[\"b\", \"a\", \"c\"])\n    result_md = getattr(series_md, op)()\n    result_pd = getattr(series_pd, op)()\n    assert result_md == result_pd\n\n\ndef test_rename_axis():\n    series_md, series_pd = create_test_series([0, 1, 2])\n    eval_general(series_md, series_pd, lambda ser: ser.rename_axis(\"name\"))\n    eval_general(\n        series_md,\n        series_pd,\n        lambda ser: ser.rename_axis(\"new_name\", inplace=True),\n        __inplace__=True,\n    )\n    # axis=1 is invalid for series\n    eval_general(\n        series_md,\n        series_pd,\n        lambda ser: ser.rename_axis(\"newer_name\", axis=1),\n        expected_exception=ValueError(\"No axis named 1 for object type Series\"),\n    )\n"
  },
  {
    "path": "modin/tests/pandas/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom __future__ import annotations\n\nimport csv\nimport functools\nimport itertools\nimport math\nimport os\nimport re\nfrom contextlib import contextmanager\nfrom io import BytesIO\nfrom pathlib import Path\nfrom string import ascii_letters\nfrom typing import Union\n\nimport numpy as np\nimport pandas\nimport psutil\nimport pytest\nfrom pandas.core.dtypes.common import (\n    is_bool_dtype,\n    is_datetime64_any_dtype,\n    is_list_like,\n    is_numeric_dtype,\n    is_object_dtype,\n    is_string_dtype,\n    is_timedelta64_dtype,\n)\n\nimport modin.pandas as pd\nfrom modin import set_execution\nfrom modin.config import (\n    Backend,\n    Engine,\n    MinColumnPartitionSize,\n    MinRowPartitionSize,\n    NativePandasDeepCopy,\n    NPartitions,\n    RangePartitioning,\n    StorageFormat,\n    TestDatasetSize,\n    TrackFileLeaks,\n)\nfrom modin.pandas.io import to_pandas\nfrom modin.pandas.testing import (\n    assert_extension_array_equal,\n    assert_frame_equal,\n    assert_index_equal,\n    assert_series_equal,\n)\nfrom modin.utils import try_cast_to_pandas\n\nrandom_state = np.random.RandomState(seed=42)\n\nDATASET_SIZE_DICT = {\n    \"Small\": (2**6, 2**6),\n    \"Normal\": (2**6, 2**8),\n    \"Big\": (2**7, 2**12),\n}\n\n# Size of test dataframes\nNCOLS, NROWS = DATASET_SIZE_DICT.get(TestDatasetSize.get(), DATASET_SIZE_DICT[\"Normal\"])\nNGROUPS = 10\n\n# Range for values for test data\nRAND_LOW = 0\nRAND_HIGH = 100\n\n# Input data and functions for the tests\n# The test data that we will test our code against\ntest_data = {\n    # \"empty_data\": {},\n    # \"columns_only\": {\"col1\": [], \"col2\": [], \"col3\": [], \"col4\": [], \"col5\": []},\n    \"int_data\": {\n        \"col{}\".format(int((i - NCOLS / 2) % NCOLS + 1)): random_state.randint(\n            RAND_LOW, RAND_HIGH, size=(NROWS)\n        )\n        for i in range(NCOLS)\n    },\n    \"float_nan_data\": {\n        \"col{}\".format(int((i - NCOLS / 2) % NCOLS + 1)): [\n            (\n                x\n                if (j % 4 == 0 and i > NCOLS // 2) or (j != i and i <= NCOLS // 2)\n                else np.nan\n            )\n            for j, x in enumerate(\n                random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS))\n            )\n        ]\n        for i in range(NCOLS)\n    },\n    # \"int_float_object_data\": {\n    #     \"col3\": [1, 2, 3, 4],\n    #     \"col4\": [4, 5, 6, 7],\n    #     \"col1\": [8.0, 9.4, 10.1, 11.3],\n    #     \"col2\": [\"a\", \"b\", \"c\", \"d\"],\n    # },\n    # \"datetime_timedelta_data\": {\n    #     \"col3\": [\n    #         np.datetime64(\"2010\"),\n    #         np.datetime64(\"2011\"),\n    #         np.datetime64(\"2011-06-15T00:00\"),\n    #         np.datetime64(\"2009-01-01\"),\n    #     ],\n    #     \"col4\": [\n    #         np.datetime64(\"2010\"),\n    #         np.datetime64(\"2011\"),\n    #         np.datetime64(\"2011-06-15T00:00\"),\n    #         np.datetime64(\"2009-01-01\"),\n    #     ],\n    #     \"col1\": [\n    #         np.timedelta64(1, \"M\"),\n    #         np.timedelta64(2, \"D\"),\n    #         np.timedelta64(3, \"Y\"),\n    #         np.timedelta64(20, \"D\"),\n    #     ],\n    #     \"col2\": [\n    #         np.timedelta64(1, \"M\"),\n    #         np.timedelta64(2, \"D\"),\n    #         np.timedelta64(3, \"Y\"),\n    #         np.timedelta64(20, \"D\"),\n    #     ],\n    # },\n    # \"all_data\": {\n    #     \"col3\": 1.0,\n    #     \"col4\": np.datetime64(\"2011-06-15T00:00\"),\n    #     \"col5\": np.array([3] * 4, dtype=\"int32\"),\n    #     \"col1\": \"foo\",\n    #     \"col2\": True,\n    # },\n}\n# The parse_dates param can take several different types and combinations of\n# types. Use the following values to test date parsing on a CSV created for\n# that purpose at `time_parsing_csv_path`\nparse_dates_values_by_id = {\n    \"bool\": False,\n    \"list_of_single_int\": [0],\n    \"list_of_single_string\": [\"timestamp\"],\n    \"list_of_list_of_strings\": [[\"year\", \"month\", \"date\"]],\n    \"list_of_string_and_list_of_strings\": [\"timestamp\", [\"year\", \"month\", \"date\"]],\n    \"list_of_list_of_ints\": [[1, 2, 3]],\n    \"list_of_list_of_strings_and_ints\": [[\"year\", 2, \"date\"]],\n    \"empty_list\": [],\n    \"dict\": {\"year_and_month\": [1, 2], \"day\": [\"date\"]},\n    \"nonexistent_string_column\": [\"z\"],\n    \"nonexistent_int_column\": [99],\n}\n\n# See details in #1403\ntest_data[\"int_data\"][\"index\"] = test_data[\"int_data\"].pop(\n    \"col{}\".format(int(NCOLS / 2))\n)\n\nfor col in test_data[\"float_nan_data\"]:\n    for row in range(NROWS // 2):\n        if row % 16 == 0:\n            test_data[\"float_nan_data\"][col][row] = np.nan\n\ntest_data_values = list(test_data.values())\ntest_data_keys = list(test_data.keys())\n\ntest_bool_data = {\n    \"col{}\".format(int((i - NCOLS / 2) % NCOLS + 1)): random_state.choice(\n        [True, False], size=(NROWS)\n    )\n    for i in range(NCOLS)\n}\n\ntest_groupby_data = {f\"col{i}\": np.arange(NCOLS) % NGROUPS for i in range(NROWS)}\n\ntest_data_resample = {\n    \"data\": {\n        f\"col{i}\": random_state.randint(RAND_LOW, RAND_HIGH, size=NROWS)\n        for i in range(10)\n    },\n    \"index\": pandas.date_range(\"31/12/2000\", periods=NROWS, freq=\"h\"),\n}\n\ntest_data_with_duplicates = {\n    \"no_duplicates\": {\n        \"col{}\".format(int((i - NCOLS / 2) % NCOLS + 1)): range(NROWS)\n        for i in range(NCOLS)\n    },\n    \"all_duplicates\": {\n        \"col{}\".format(int((i - NCOLS / 2) % NCOLS + 1)): [\n            float(i) for _ in range(NROWS)\n        ]\n        for i in range(NCOLS)\n    },\n    \"some_duplicates\": {\n        \"col{}\".format(int((i - NCOLS / 2) % NCOLS + 1)): [\n            i if j % 7 == 0 else x for j, x in enumerate(range(NROWS))\n        ]\n        for i in range(NCOLS)\n    },\n    \"has_name_column\": {\n        \"name\": [\"one\", \"two\", \"two\", \"three\"],\n        \"col1\": [1, 2, 2, 3],\n        \"col3\": [10, 20, 20, 3],\n        \"col7\": [100, 201, 200, 300],\n    },\n    \"str_columns\": {\n        \"col_str{}\".format(int((i - NCOLS / 2) % NCOLS + 1)): [\n            \"s\" + str(x % 5) for x in range(NROWS)\n        ]\n        for i in range(NCOLS)\n    },\n}\n\ntest_data_with_duplicates[\"float_nan\"] = test_data[\"float_nan_data\"]\n\ntest_data_small = {\n    \"small\": {\n        \"col0\": [1, 2, 3, 4],\n        \"col1\": [8.0, 9.4, 10.1, 11.3],\n        \"col2\": [4, 5, 6, 7],\n    }\n}\n\ntest_data_diff_dtype = {\n    \"int_col\": [-5, 2, 7, 16],\n    \"float_col\": [np.nan, -9.4, 10.1, np.nan],\n    \"str_col\": [\"a\", np.nan, \"c\", \"d\"],\n    \"bool_col\": [False, True, True, False],\n}\n\ntest_data_small_values = list(test_data_small.values())\ntest_data_small_keys = list(test_data_small.keys())\n\ntest_data_with_duplicates_values = list(test_data_with_duplicates.values())\ntest_data_with_duplicates_keys = list(test_data_with_duplicates.keys())\n\ntest_data_categorical = {\n    \"ordered\": pandas.Categorical(list(\"testdata\"), ordered=True),\n    \"unordered\": pandas.Categorical(list(\"testdata\"), ordered=False),\n}\n\ntest_data_categorical_values = list(test_data_categorical.values())\ntest_data_categorical_keys = list(test_data_categorical.keys())\n\n# Fully fill all of the partitions used in tests.\ntest_data_large_categorical_dataframe = {\n    i: pandas.Categorical(np.arange(NPartitions.get() * MinRowPartitionSize.get()))\n    for i in range(NPartitions.get() * MinColumnPartitionSize.get())\n}\ntest_data_large_categorical_series_values = [\n    pandas.Categorical(np.arange(NPartitions.get() * MinRowPartitionSize.get()))\n]\ntest_data_large_categorical_series_keys = [\"categorical_series\"]\n\nnumeric_dfs = [\n    \"empty_data\",\n    \"columns_only\",\n    \"int_data\",\n    \"float_nan_data\",\n    \"with_index_column\",\n]\n\nno_numeric_dfs = [\"datetime_timedelta_data\"]\n\n# String test data\ntest_string_data = {\n    \"separator data\": [\n        \"abC|DeF,Hik\",\n        \"234,3245.67\",\n        \"gSaf,qWer|Gre\",\n        \"asd3,4sad|\",\n        np.nan,\n    ]\n}\n\ntest_string_data_values = list(test_string_data.values())\ntest_string_data_keys = list(test_string_data.keys())\n\n# List of strings test data\ntest_string_list_data = {\"simple string\": [[\"a\"], [\"CdE\"], [\"jDf\"], [\"werB\"]]}\n\ntest_string_list_data_values = list(test_string_list_data.values())\ntest_string_list_data_keys = list(test_string_list_data.keys())\n\nstring_seperators = {\"comma sep\": \",\"}\n\nstring_sep_values = list(string_seperators.values())\nstring_sep_keys = list(string_seperators.keys())\n\nstring_na_rep = {\"None na_rep\": None, \"- na_rep\": \"-\", \"nan na_rep\": np.nan}\n\nstring_na_rep_values = list(string_na_rep.values())\nstring_na_rep_keys = list(string_na_rep.keys())\n\njoin_type = {\"left\": \"left\", \"right\": \"right\", \"inner\": \"inner\", \"outer\": \"outer\"}\n\njoin_type_keys = list(join_type.keys())\njoin_type_values = list(join_type.values())\n\n\nUNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS = (\n    np.negative,\n    np.abs,\n    np.sin,\n    np.positive,\n    np.absolute,\n    np.fabs,\n    np.rint,\n    np.sign,\n    np.conj,\n    np.conjugate,\n    np.exp,\n    np.exp2,\n    np.log,\n    np.log2,\n    np.log10,\n    np.expm1,\n    np.log1p,\n    np.sqrt,\n    np.square,\n    np.cbrt,\n    np.reciprocal,\n    np.sin,\n    np.cos,\n    np.tan,\n    np.arcsin,\n    np.arccos,\n    np.arctan,\n    np.sinh,\n    np.cosh,\n    np.tanh,\n    np.arcsinh,\n    np.arccosh,\n    np.arctanh,\n    np.degrees,\n    np.radians,\n    np.deg2rad,\n    np.rad2deg,\n    np.logical_not,\n    np.isfinite,\n    np.isinf,\n    np.isnan,\n    np.fabs,\n    np.signbit,\n    np.spacing,\n    np.floor,\n    np.ceil,\n    np.trunc,\n)\n\n# Test functions for applymap\ntest_func = {\n    \"plus one\": lambda x: x + 1,\n    \"convert to string\": str,\n    \"square\": lambda x: x * x,\n    \"identity\": lambda x: x,\n    \"return false\": lambda x: False,\n    **{func.__name__: func for func in UNIVERSAL_UNARY_NUMPY_FUNCTIONS_FOR_FLOATS},\n}\ntest_func_keys = list(test_func.keys())\ntest_func_values = list(test_func.values())\n\nnumeric_test_funcs = [\"plus one\", \"square\"]\n\n# Test functions for query\nquery_func = {\n    \"col1 < col2\": \"col1 < col2\",\n    \"col3 > col4\": \"col3 > col4\",\n    \"col1 == col2\": \"col1 == col2\",\n    \"(col2 > col1) and (col1 < col3)\": \"(col2 > col1) and (col1 < col3)\",\n    # this is how to query for values of an unnamed index per\n    # https://pandas.pydata.org/docs/user_guide/indexing.html#multiindex-query-syntax\n    \"ilevel_0 % 2 == 1\": \"ilevel_0 % 2 == 1\",\n}\nquery_func_keys = list(query_func.keys())\nquery_func_values = list(query_func.values())\n\n# Test agg functions for apply, agg, and aggregate\nagg_func = {\n    \"sum\": \"sum\",\n    \"df sum\": lambda df: df.sum(),\n    \"str\": str,\n    \"sum mean\": [\"sum\", \"mean\"],\n    \"sum df sum\": [\"sum\", lambda df: df.sum()],\n    # The case verifies that returning a scalar that is based on a frame's data doesn't cause a problem\n    \"sum of certain elements\": lambda axis: (\n        axis.iloc[0] + axis.iloc[-1] if isinstance(axis, pandas.Series) else axis + axis\n    ),\n    \"should raise AssertionError\": 1,\n}\nagg_func_keys = list(agg_func.keys())\nagg_func_values = list(agg_func.values())\n\n# For this sort of parameters pandas throws an exception.\n# See details in pandas issue 36036.\nagg_func_except = {\n    \"sum sum\": [\"sum\", \"sum\"],\n}\nagg_func_except_keys = list(agg_func_except.keys())\nagg_func_except_values = list(agg_func_except.values())\n\nnumeric_agg_funcs = [\"sum mean\", \"sum sum\", \"sum df sum\"]\n\nudf_func = {\n    \"return self\": lambda x, *args, **kwargs: type(x)(x.values),\n    \"change index\": lambda x, *args, **kwargs: pandas.Series(\n        x.values, index=np.arange(-1, len(x.index) - 1)\n    ),\n    \"return none\": lambda x, *args, **kwargs: None,\n    \"return empty\": lambda x, *args, **kwargs: pandas.Series(),\n    \"access self\": lambda x, other, *args, **kwargs: pandas.Series(\n        x.values, index=other.index\n    ),\n}\nudf_func_keys = list(udf_func.keys())\nudf_func_values = list(udf_func.values())\n\n# Test q values for quantiles\nquantiles = {\n    \"0.25\": 0.25,\n    \"0.5\": 0.5,\n    \"0.75\": 0.75,\n    \"0.66\": 0.66,\n    \"0.01\": 0.01,\n    \"list\": [0.25, 0.5, 0.75, 0.66, 0.01],\n}\nquantiles_keys = list(quantiles.keys())\nquantiles_values = list(quantiles.values())\n\n# Test indices for get, set_index, __contains__, insert\nindices = {\n    \"col1\": \"col1\",\n    \"col2\": \"col2\",\n    \"A\": \"A\",\n    \"B\": \"B\",\n    \"does not exist\": \"does not exist\",\n}\nindices_keys = list(indices.keys())\nindices_values = list(indices.values())\n\n# Test functions for groupby apply\ngroupby_apply_func = {\"sum\": lambda df: df.sum(), \"negate\": lambda df: -df}\ngroupby_apply_func_keys = list(groupby_apply_func.keys())\ngroupby_apply_func_values = list(groupby_apply_func.values())\n\n# Test functions for groupby agg\ngroupby_agg_func = {\"min\": \"min\", \"max\": \"max\"}\ngroupby_agg_func_keys = list(groupby_agg_func.keys())\ngroupby_agg_func_values = list(groupby_agg_func.values())\n\n# Test functions for groupby transform\ngroupby_transform_func = {\n    \"add 4\": lambda df: df + 4,\n    \"negatie and minus 10\": lambda df: -df - 10,\n}\ngroupby_transform_func_keys = list(groupby_transform_func.keys())\ngroupby_transform_func_values = list(groupby_transform_func.values())\n\n# Test functions for groupby pipe\ngroupby_pipe_func = {\"sum\": lambda df: df.sum()}\ngroupby_pipe_func_keys = list(groupby_pipe_func.keys())\ngroupby_pipe_func_values = list(groupby_pipe_func.values())\n\n# END Test input data and functions\n\n# Parametrizations of common kwargs\naxis = {\n    \"over_rows_int\": 0,\n    \"over_rows_str\": \"rows\",\n    \"over_columns_int\": 1,\n    \"over_columns_str\": \"columns\",\n}\naxis_keys = list(axis.keys())\naxis_values = list(axis.values())\n\nbool_arg = {\"True\": True, \"False\": False, \"None\": None}\nbool_arg_keys = list(bool_arg.keys())\nbool_arg_values = list(bool_arg.values())\n\nint_arg = {\"-5\": -5, \"-1\": -1, \"0\": 0, \"1\": 1, \"5\": 5}\nint_arg_keys = list(int_arg.keys())\nint_arg_values = list(int_arg.values())\n\n# END parametrizations of common kwargs\n\njson_short_string = \"\"\"[{\"project\": \"modin\"}]\"\"\"\njson_long_string = \"\"\"{\n        \"quiz\": {\n            \"sport\": {\n                \"q1\": {\n                    \"question\": \"Which one is correct team name in NBA?\",\n                    \"options\": [\n                        \"New York Bulls\",\n                        \"Los Angeles Kings\",\n                        \"Golden State Warriros\",\n                        \"Huston Rocket\"\n                    ],\n                    \"answer\": \"Huston Rocket\"\n                }\n            },\n            \"maths\": {\n                \"q1\": {\n                    \"question\": \"5 + 7 = ?\",\n                    \"options\": [\n                        \"10\",\n                        \"11\",\n                        \"12\",\n                        \"13\"\n                    ],\n                    \"answer\": \"12\"\n                },\n                \"q2\": {\n                    \"question\": \"12 - 8 = ?\",\n                    \"options\": [\n                        \"1\",\n                        \"2\",\n                        \"3\",\n                        \"4\"\n                    ],\n                    \"answer\": \"4\"\n                }\n            }\n        }\n    }\"\"\"\njson_long_bytes = BytesIO(json_long_string.encode(encoding=\"UTF-8\"))\njson_short_bytes = BytesIO(json_short_string.encode(encoding=\"UTF-8\"))\n\n\n# Text encoding types\nencoding_types = [\n    \"ascii\",\n    \"utf_32\",\n    \"utf_32_be\",\n    \"utf_32_le\",\n    \"utf_16\",\n    \"utf_16_be\",\n    \"utf_16_le\",\n    \"utf_7\",\n    \"utf_8\",\n    \"utf_8_sig\",\n]\n\ndefault_to_pandas_ignore_string = \"default:.*defaulting to pandas.*:UserWarning\"\n\n# Files compression to extension mapping\nCOMP_TO_EXT = {\"gzip\": \"gz\", \"bz2\": \"bz2\", \"xz\": \"xz\", \"zip\": \"zip\"}\n\n\ntime_parsing_csv_path = \"modin/tests/pandas/data/test_time_parsing.csv\"\n\n\nclass CustomIntegerForAddition:\n    def __init__(self, value: int):\n        self.value = value\n\n    def __add__(self, other):\n        return self.value + other\n\n    def __radd__(self, other):\n        return other + self.value\n\n\nclass NonCommutativeMultiplyInteger:\n    \"\"\"int-like class with non-commutative multiply operation.\n\n    We need to test that rmul and mul do different things even when\n    multiplication is not commutative, but almost all multiplication is\n    commutative. This class' fake multiplication overloads are not commutative\n    when you multiply an instance of this class with pandas.series, which\n    does not know how to __mul__ with this class. e.g.\n\n    NonCommutativeMultiplyInteger(2) * pd.Series(1, dtype=int) == pd.Series(2, dtype=int)\n    pd.Series(1, dtype=int) * NonCommutativeMultiplyInteger(2) == pd.Series(3, dtype=int)\n    \"\"\"\n\n    def __init__(self, value: int):\n        if not isinstance(value, int):\n            raise TypeError(\n                f\"must initialize with integer, but got {value} of type {type(value)}\"\n            )\n        self.value = value\n\n    def __mul__(self, other):\n        # Note that we need to check other is an int, otherwise when we (left) mul\n        # this with a series, we'll just multiply self.value by the series, whereas\n        # we want to make the series do an rmul instead.\n        if not isinstance(other, int):\n            return NotImplemented\n        return self.value * other\n\n    def __rmul__(self, other):\n        return self.value * other + 1\n\n\ndef categories_equals(left, right):\n    assert (left.ordered and right.ordered) or (not left.ordered and not right.ordered)\n    assert_extension_array_equal(left, right)\n\n\ndef df_categories_equals(df1, df2):\n    if not hasattr(df1, \"select_dtypes\"):\n        if isinstance(df1, pandas.CategoricalDtype):\n            categories_equals(df1, df2)\n        elif isinstance(getattr(df1, \"dtype\"), pandas.CategoricalDtype) and isinstance(\n            getattr(df2, \"dtype\"), pandas.CategoricalDtype\n        ):\n            categories_equals(df1.dtype, df2.dtype)\n        return True\n\n    df1_categorical = df1.select_dtypes(include=\"category\")\n    df2_categorical = df2.select_dtypes(include=\"category\")\n    assert df1_categorical.columns.equals(df2_categorical.columns)\n    # Use an index instead of a column name to iterate through columns. There\n    # may be duplicate colum names. e.g. if two columns are named col1,\n    # selecting df1_categorical[\"col1\"] gives a dataframe of width 2 instead of a series.\n    for i in range(len(df1_categorical.columns)):\n        assert_extension_array_equal(\n            df1_categorical.iloc[:, i].values,\n            df2_categorical.iloc[:, i].values,\n            check_dtype=False,\n        )\n\n\ndef assert_empty_frame_equal(df1, df2):\n    \"\"\"\n    Test if df1 and df2 are empty.\n\n    Parameters\n    ----------\n    df1 : pandas.DataFrame or pandas.Series\n    df2 : pandas.DataFrame or pandas.Series\n\n    Raises\n    ------\n    AssertionError\n        If check fails.\n    \"\"\"\n\n    if (df1.empty and not df2.empty) or (df2.empty and not df1.empty):\n        assert False, \"One of the passed frames is empty, when other isn't\"\n    elif df1.empty and df2.empty and type(df1) is not type(df2):\n        assert False, f\"Empty frames have different types: {type(df1)} != {type(df2)}\"\n\n\ndef assert_all_act_same(condition, *objs):\n    \"\"\"\n    Assert that all of the objs give the same boolean result for the passed condition (either all True or all False).\n\n    Parameters\n    ----------\n    condition : callable(obj) -> bool\n        Condition to run on the passed objects.\n    *objs :\n        Objects to pass to the condition.\n\n    Returns\n    -------\n    bool\n        Result of the condition.\n    \"\"\"\n    results = [condition(obj) for obj in objs]\n    if len(results) < 2:\n        return results[0] if len(results) else None\n\n    assert all(results[0] == res for res in results[1:])\n    return results[0]\n\n\ndef assert_dtypes_equal(df1, df2):\n    \"\"\"\n    Assert that the two passed DataFrame/Series objects have equal dtypes.\n\n    The function doesn't require that the dtypes are identical, it has the following reliefs:\n        1. The dtypes are not required to be in the same order\n           (e.g. {\"col1\": int, \"col2\": float} == {\"col2\": float, \"col1\": int})\n        2. The dtypes are only required to be in the same class\n           (e.g. both numerical, both categorical, etc...)\n\n    Parameters\n    ----------\n    df1 : DataFrame or Series\n    df2 : DataFrame or Series\n    \"\"\"\n    if not isinstance(\n        df1, (pandas.Series, pd.Series, pandas.DataFrame, pd.DataFrame)\n    ) or not isinstance(\n        df2, (pandas.Series, pd.Series, pandas.DataFrame, pd.DataFrame)\n    ):\n        return\n\n    if isinstance(df1.dtypes, (pandas.Series, pd.Series)):\n        dtypes1 = df1.dtypes\n        dtypes2 = df2.dtypes\n    else:\n        # Case when `dtypes` is a scalar\n        dtypes1 = pandas.Series({\"col\": df1.dtypes})\n        dtypes2 = pandas.Series({\"col\": df2.dtypes})\n\n    # Don't require for dtypes to be in the same order\n    assert len(dtypes1.index.difference(dtypes2.index)) == 0\n    assert len(dtypes1) == len(dtypes2)\n\n    dtype_comparators = (\n        is_numeric_dtype,\n        lambda obj: is_object_dtype(obj) or is_string_dtype(obj),\n        is_bool_dtype,\n        lambda obj: isinstance(obj, pandas.CategoricalDtype),\n        is_datetime64_any_dtype,\n        is_timedelta64_dtype,\n        lambda obj: isinstance(obj, pandas.PeriodDtype),\n    )\n\n    for idx in range(len(dtypes1)):\n        for comparator in dtype_comparators:\n            if assert_all_act_same(comparator, dtypes1.iloc[idx], dtypes2.iloc[idx]):\n                # We met a dtype that both types satisfy, so we can stop iterating\n                # over comparators and compare next dtypes\n                break\n\n\ndef assert_set_of_rows_identical(df1, df2):\n    \"\"\"\n    Assert that the set of rows for the passed dataframes is identical.\n\n    Works much slower than ``df1.equals(df2)``, so it's recommended to use this\n    function only in exceptional cases.\n    \"\"\"\n    # replacing NaN with None to pass the comparison: 'NaN == NaN -> false; None == None -> True'\n    df1, df2 = map(\n        lambda df: (df.to_frame() if df.ndim == 1 else df).replace({np.nan: None}),\n        (df1, df2),\n    )\n    rows1 = set((idx, *row.tolist()) for idx, row in df1.iterrows())\n    rows2 = set((idx, *row.tolist()) for idx, row in df2.iterrows())\n    assert rows1 == rows2\n\n\ndef sort_data(data):\n    \"\"\"Sort the passed sequence.\"\"\"\n    if isinstance(data, (pandas.DataFrame, pd.DataFrame)):\n        return data.sort_values(data.columns.to_list(), ignore_index=True)\n    elif isinstance(data, (pandas.Series, pd.Series)):\n        return data.sort_values()\n    else:\n        return np.sort(data)\n\n\ndef sort_if_range_partitioning(df1, df2, comparator=None, force=False):\n    \"\"\"Sort the passed objects if 'RangePartitioning' is enabled and compare the sorted results.\"\"\"\n    if comparator is None:\n        comparator = df_equals\n\n    if force or RangePartitioning.get():\n        df1, df2 = sort_data(df1), sort_data(df2)\n\n    comparator(df1, df2)\n\n\ndef df_equals(df1, df2, check_dtypes=True):\n    \"\"\"Tests if df1 and df2 are equal.\n\n    Args:\n        df1: (pandas or modin DataFrame or series) dataframe to test if equal.\n        df2: (pandas or modin DataFrame or series) dataframe to test if equal.\n\n    Returns:\n        True if df1 is equal to df2.\n    \"\"\"\n    # Gets AttributError if modin's groupby object is not import like this\n    from modin.pandas.groupby import DataFrameGroupBy\n\n    groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy)\n\n    # The typing behavior of how pandas treats its index is not consistent when the\n    # length of the DataFrame or Series is 0, so we just verify that the contents are\n    # the same.\n    if (\n        hasattr(df1, \"index\")\n        and hasattr(df2, \"index\")\n        and len(df1) == 0\n        and len(df2) == 0\n    ):\n        if type(df1).__name__ == type(df2).__name__:\n            if hasattr(df1, \"name\") and hasattr(df2, \"name\") and df1.name == df2.name:\n                return\n            if (\n                hasattr(df1, \"columns\")\n                and hasattr(df2, \"columns\")\n                and df1.columns.equals(df2.columns)\n            ):\n                return\n        assert False\n\n    if isinstance(df1, (list, tuple)) and all(\n        isinstance(d, (pd.DataFrame, pd.Series, pandas.DataFrame, pandas.Series))\n        for d in df1\n    ):\n        assert isinstance(df2, type(df1)), \"Different type of collection\"\n        assert len(df1) == len(df2), \"Different length result\"\n        return (df_equals(d1, d2) for d1, d2 in zip(df1, df2))\n\n    if check_dtypes:\n        assert_dtypes_equal(df1, df2)\n\n    # Convert to pandas\n    if isinstance(df1, (pd.DataFrame, pd.Series)):\n        df1 = to_pandas(df1)\n    if isinstance(df2, (pd.DataFrame, pd.Series)):\n        df2 = to_pandas(df2)\n\n    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):\n        assert_empty_frame_equal(df1, df2)\n\n    if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame):\n        assert_frame_equal(\n            df1,\n            df2,\n            check_dtype=False,\n            check_datetimelike_compat=True,\n            check_index_type=False,\n            check_column_type=False,\n            check_categorical=False,\n        )\n        df_categories_equals(df1, df2)\n    elif isinstance(df1, pandas.Index) and isinstance(df2, pandas.Index):\n        assert_index_equal(df1, df2)\n    elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series):\n        assert_series_equal(df1, df2, check_dtype=False, check_series_type=False)\n    elif (\n        hasattr(df1, \"dtype\")\n        and hasattr(df2, \"dtype\")\n        and isinstance(df1.dtype, pandas.core.dtypes.dtypes.ExtensionDtype)\n        and isinstance(df2.dtype, pandas.core.dtypes.dtypes.ExtensionDtype)\n    ):\n        assert_extension_array_equal(df1, df2)\n    elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types):\n        for g1, g2 in zip(df1, df2):\n            assert g1[0] == g2[0]\n            df_equals(g1[1], g2[1])\n    elif (\n        isinstance(df1, pandas.Series)\n        and isinstance(df2, pandas.Series)\n        and df1.empty\n        and df2.empty\n    ):\n        assert all(df1.index == df2.index)\n        assert df1.dtypes == df2.dtypes\n    elif isinstance(df1, pandas.core.arrays.NumpyExtensionArray):\n        assert isinstance(df2, pandas.core.arrays.NumpyExtensionArray)\n        assert df1 == df2\n    elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray):\n        np.testing.assert_array_equal(df1, df2)\n    else:\n        res = df1 != df2\n        if res.any() if isinstance(res, np.ndarray) else res:\n            np.testing.assert_almost_equal(df1, df2)\n\n\ndef modin_df_almost_equals_pandas(modin_df, pandas_df, max_diff=0.0001):\n    df_categories_equals(modin_df._to_pandas(), pandas_df)\n\n    modin_df = to_pandas(modin_df)\n\n    if hasattr(modin_df, \"select_dtypes\"):\n        modin_df = modin_df.select_dtypes(exclude=[\"category\"])\n    if hasattr(pandas_df, \"select_dtypes\"):\n        pandas_df = pandas_df.select_dtypes(exclude=[\"category\"])\n\n    if modin_df.equals(pandas_df):\n        return\n\n    isna = modin_df.isna().all()\n    if isinstance(isna, bool):\n        if isna:\n            assert pandas_df.isna().all()\n            return\n    elif isna.all():\n        assert pandas_df.isna().all().all()\n        return\n\n    diff = (modin_df - pandas_df).abs()\n    diff /= pandas_df.abs()\n    diff_max = diff.max() if isinstance(diff, pandas.Series) else diff.max().max()\n    assert diff_max < max_diff, f\"{diff_max} >= {max_diff}\"\n\n\ndef try_modin_df_almost_equals_compare(df1, df2):\n    \"\"\"Compare two dataframes as nearly equal if possible, otherwise compare as completely equal.\"\"\"\n    # `modin_df_almost_equals_pandas` is numeric-only comparator\n    dtypes1, dtypes2 = [\n        dtype if is_list_like(dtype := df.dtypes) else [dtype] for df in (df1, df2)\n    ]\n    if all(map(is_numeric_dtype, dtypes1)) and all(map(is_numeric_dtype, dtypes2)):\n        modin_df_almost_equals_pandas(df1, df2)\n    else:\n        df_equals(df1, df2)\n\n\ndef df_is_empty(df):\n    \"\"\"Tests if df is empty.\n\n    Args:\n        df: (pandas or modin DataFrame) dataframe to test if empty.\n\n    Returns:\n        True if df is empty.\n    \"\"\"\n    assert df.size == 0 and df.empty\n    assert df.shape[0] == 0 or df.shape[1] == 0\n\n\ndef arg_keys(arg_name, keys):\n    \"\"\"Appends arg_name to the front of all values in keys.\n\n    Args:\n        arg_name: (string) String containing argument name.\n        keys: (list of strings) Possible inputs of argument.\n\n    Returns:\n        List of strings with arg_name append to front of keys.\n    \"\"\"\n    return [\"{0}_{1}\".format(arg_name, key) for key in keys]\n\n\ndef name_contains(test_name, vals):\n    \"\"\"Determines if any string in vals is a substring of test_name.\n\n    Args:\n        test_name: (string) String to determine if contains substrings.\n        vals: (list of strings) List of substrings to test for.\n\n    Returns:\n        True if a substring in vals is in test_name, else False.\n    \"\"\"\n    return any(val in test_name for val in vals)\n\n\ndef check_df_columns_have_nans(df, cols):\n    \"\"\"Checks if there are NaN values in specified columns of a dataframe.\n\n    :param df: Dataframe to check.\n    :param cols: One column name or list of column names.\n    :return:\n        True if specified columns of dataframe contains NaNs.\n    \"\"\"\n    return (\n        pandas.api.types.is_list_like(cols)\n        and (\n            any(isinstance(x, str) and x in df.columns and df[x].hasnans for x in cols)\n            or any(\n                isinstance(x, pd.Series) and x._parent is df and x.hasnans for x in cols\n            )\n        )\n    ) or (\n        not pandas.api.types.is_list_like(cols)\n        and cols in df.columns\n        and df[cols].hasnans\n    )\n\n\nclass NoModinException(Exception):\n    pass\n\n\ndef eval_general(\n    modin_df,\n    pandas_df,\n    operation,\n    comparator=df_equals,\n    __inplace__=False,\n    expected_exception=None,\n    check_kwargs_callable=True,\n    md_extra_kwargs=None,\n    comparator_kwargs=None,\n    check_for_execution_propagation=True,\n    no_check_for_execution_propagation_reason=None,\n    **kwargs,\n):\n    md_kwargs, pd_kwargs = {}, {}\n\n    if isinstance(modin_df, (pd.DataFrame, pd.Series)):\n        original_engine = modin_df._query_compiler.engine\n        original_storage_format = modin_df._query_compiler.storage_format\n    else:\n        original_engine = None\n        original_storage_format = None\n\n    def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):\n        try:\n            pd_result = fn(pandas_df, **pd_kwargs)\n        except Exception as pd_e:\n            try:\n                if inplace:\n                    _ = fn(modin_df, **md_kwargs)\n                    try_cast_to_pandas(modin_df)  # force materialization\n                else:\n                    try_cast_to_pandas(\n                        fn(modin_df, **md_kwargs)\n                    )  # force materialization\n            except Exception as md_e:\n                assert isinstance(\n                    md_e, type(pd_e)\n                ), \"Got Modin Exception type {}, but pandas Exception type {} was expected\".format(\n                    type(md_e), type(pd_e)\n                )\n                if expected_exception:\n                    if Engine.get() == \"Ray\":\n                        from ray.exceptions import RayTaskError\n\n                        # unwrap ray exceptions from remote worker\n                        if isinstance(md_e, RayTaskError):\n                            md_e = md_e.args[0]\n                    assert (\n                        type(md_e) is type(expected_exception)\n                        and md_e.args == expected_exception.args\n                    ), f\"not acceptable Modin's exception: [{repr(md_e)}]\"\n                    assert (\n                        pd_e.args == expected_exception.args\n                    ), f\"not acceptable Pandas' exception: [{repr(pd_e)}]\"\n                elif expected_exception is False:\n                    # The only way to disable exception message checking.\n                    pass\n                else:\n                    # It’s not enough that Modin and pandas have the same types of exceptions;\n                    # we need to explicitly specify the instance of an exception\n                    # (using `expected_exception`) in tests so that we can check exception messages.\n                    # This allows us to eliminate situations where exceptions are thrown\n                    # that we don't expect, which could hide different bugs.\n                    raise pd_e\n            else:\n                raise NoModinException(\n                    f\"Modin doesn't throw an exception, while pandas does: [{repr(pd_e)}]\"\n                )\n        else:\n            md_result = fn(modin_df, **md_kwargs)\n            return (md_result, pd_result) if not inplace else (modin_df, pandas_df)\n\n    for key, value in kwargs.items():\n        if check_kwargs_callable and callable(value):\n            values = execute_callable(value)\n            # that means, that callable raised an exception\n            if values is None:\n                return\n            else:\n                md_value, pd_value = values\n        else:\n            md_value, pd_value = value, value\n\n        md_kwargs[key] = md_value\n        pd_kwargs[key] = pd_value\n\n        if md_extra_kwargs:\n            assert isinstance(md_extra_kwargs, dict)\n            md_kwargs.update(md_extra_kwargs)\n\n    values = execute_callable(\n        operation, md_kwargs=md_kwargs, pd_kwargs=pd_kwargs, inplace=__inplace__\n    )\n    if values is not None:\n        assert isinstance(values, tuple) and len(values) == 2\n        modin_result, pandas_result = values\n        if (\n            isinstance(modin_result, (pd.DataFrame, pd.Series))\n            and original_engine is not None\n            and original_storage_format is not None\n        ):\n            if check_for_execution_propagation:\n                assert modin_result._query_compiler.engine == original_engine, (\n                    f\"Result engine {modin_result._query_compiler.engine} does \"\n                    + f\"not match expected engine {original_engine}\"\n                )\n                assert (\n                    modin_result._query_compiler.storage_format\n                    == original_storage_format\n                ), (\n                    \"Result storage format \"\n                    + f\"{modin_result._query_compiler.storage_format} does \"\n                    + f\"not match expected storage format {original_storage_format}\"\n                )\n            else:\n                assert (\n                    isinstance(no_check_for_execution_propagation_reason, str)\n                    and len(no_check_for_execution_propagation_reason) > 0\n                ), (\n                    \"Must provide a reason for not expecting the operation to \"\n                    + \"propagate dataframe/series engine.\"\n                )\n        comparator(modin_result, pandas_result, **(comparator_kwargs or {}))\n\n\ndef eval_io(\n    fn_name,\n    comparator=df_equals,\n    cast_to_str=False,\n    expected_exception=None,\n    check_kwargs_callable=True,\n    modin_warning=None,\n    modin_warning_str_match=None,\n    md_extra_kwargs=None,\n    *args,\n    **kwargs,\n):\n    \"\"\"Evaluate I/O operation outputs equality check.\n\n    Parameters\n    ----------\n    fn_name: str\n        I/O operation name (\"read_csv\" for example).\n    comparator: obj\n        Function to perform comparison.\n    cast_to_str: bool\n        There could be some mismatches in dtypes, so we're\n        casting the whole frame to `str` before comparison.\n        See issue #1931 for details.\n    expected_exception: Exception\n        Exception that should be raised even if it is raised\n        both by Pandas and Modin.\n    modin_warning: obj\n        Warning that should be raised by Modin.\n    modin_warning_str_match: str\n        If `modin_warning` is set, checks that the raised warning matches this string.\n    md_extra_kwargs: dict\n        Modin operation specific kwargs.\n    \"\"\"\n\n    def applyier(module, *args, **kwargs):\n        result = getattr(module, fn_name)(*args, **kwargs)\n        if cast_to_str:\n            result = result.astype(str)\n        if isinstance(result, (pd.DataFrame, pd.Series)):\n            # Input methods that return a dataframe, e.g. read_csv, should\n            # return a dataframe with engine and storage_format that match\n            # the default Engine and StorageFormat, respectively.\n            assert result._query_compiler.engine == Engine.get()\n            assert result._query_compiler.storage_format == StorageFormat.get()\n        return result\n\n    def call_eval_general():\n        eval_general(\n            pd,\n            pandas,\n            applyier,\n            comparator=comparator,\n            expected_exception=expected_exception,\n            check_kwargs_callable=check_kwargs_callable,\n            md_extra_kwargs=md_extra_kwargs,\n            *args,\n            **kwargs,\n        )\n\n    warn_match = modin_warning_str_match if modin_warning is not None else None\n    if modin_warning:\n        with pytest.warns(modin_warning, match=warn_match):\n            call_eval_general()\n    else:\n        call_eval_general()\n\n\ndef eval_io_from_str(csv_str: str, unique_filename: str, **kwargs):\n    \"\"\"Evaluate I/O operation outputs equality check by using `csv_str`\n    data passed as python str (csv test file will be created from `csv_str`).\n\n    Parameters\n    ----------\n    csv_str: str\n        Test data for storing to csv file.\n    unique_filename: str\n        csv file name.\n    \"\"\"\n    with open(unique_filename, \"w\") as f:\n        f.write(csv_str)\n\n    eval_io(\n        filepath_or_buffer=unique_filename,\n        fn_name=\"read_csv\",\n        **kwargs,\n    )\n\n\ndef create_test_dfs(\n    *args, post_fn=None, backend=None, **kwargs\n) -> tuple[pd.DataFrame, pandas.DataFrame]:\n    if post_fn is None:\n        post_fn = lambda df: (  # noqa: E731\n            df.convert_dtypes(dtype_backend=backend) if backend is not None else df\n        )\n    elif backend is not None:\n        post_fn = lambda df: post_fn(df).convert_dtypes(  # noqa: E731\n            dtype_backend=backend\n        )\n    return tuple(\n        map(post_fn, [pd.DataFrame(*args, **kwargs), pandas.DataFrame(*args, **kwargs)])\n    )\n\n\ndef create_test_series(\n    vals, sort=False, backend=None, **kwargs\n) -> tuple[pd.Series, pandas.Series]:\n    if isinstance(vals, dict):\n        modin_series = pd.Series(vals[next(iter(vals.keys()))], **kwargs)\n        pandas_series = pandas.Series(vals[next(iter(vals.keys()))], **kwargs)\n    else:\n        modin_series = pd.Series(vals, **kwargs)\n        pandas_series = pandas.Series(vals, **kwargs)\n    if sort:\n        modin_series = modin_series.sort_values().reset_index(drop=True)\n        pandas_series = pandas_series.sort_values().reset_index(drop=True)\n\n    if backend is not None:\n        modin_series = modin_series.convert_dtypes(dtype_backend=backend)\n        pandas_series = pandas_series.convert_dtypes(dtype_backend=backend)\n    return modin_series, pandas_series\n\n\ndef generate_dfs():\n    df = pandas.DataFrame(\n        {\n            \"col1\": [0, 1, 2, 3],\n            \"col2\": [4, 5, 6, 7],\n            \"col3\": [8, 9, 10, 11],\n            \"col4\": [12, 13, 14, 15],\n            \"col5\": [0, 0, 0, 0],\n        }\n    )\n\n    df2 = pandas.DataFrame(\n        {\n            \"col1\": [0, 1, 2, 3],\n            \"col2\": [4, 5, 6, 7],\n            \"col3\": [8, 9, 10, 11],\n            \"col6\": [12, 13, 14, 15],\n            \"col7\": [0, 0, 0, 0],\n        }\n    )\n    return df, df2\n\n\ndef generate_multiindex_dfs(axis=1):\n    def generate_multiindex(index):\n        return pandas.MultiIndex.from_tuples(\n            [(\"a\", x) for x in index.values], names=[\"name1\", \"name2\"]\n        )\n\n    df1, df2 = generate_dfs()\n    df1.axes[axis], df2.axes[axis] = map(\n        generate_multiindex, [df1.axes[axis], df2.axes[axis]]\n    )\n    return df1, df2\n\n\ndef generate_multiindex(elements_number, nlevels=2, is_tree_like=False):\n    def generate_level(length, nlevel):\n        src = [\"bar\", \"baz\", \"foo\", \"qux\"]\n        return [src[i % len(src)] + f\"-{nlevel}-{i}\" for i in range(length)]\n\n    if is_tree_like:\n        for penalty_level in [0, 1]:\n            lvl_len_f, lvl_len_d = math.modf(\n                round(elements_number ** (1 / (nlevels - penalty_level)), 12)\n            )\n            if lvl_len_d >= 2 and lvl_len_f == 0:\n                break\n\n        if lvl_len_d < 2 or lvl_len_f != 0:\n            raise RuntimeError(\n                f\"Can't generate Tree-like MultiIndex with lenght: {elements_number} and number of levels: {nlevels}\"\n            )\n\n        lvl_len = int(lvl_len_d)\n        result = pd.MultiIndex.from_product(\n            [generate_level(lvl_len, i) for i in range(nlevels - penalty_level)],\n            names=[f\"level-{i}\" for i in range(nlevels - penalty_level)],\n        )\n        if penalty_level:\n            result = pd.MultiIndex.from_tuples(\n                [(\"base_level\", *ml_tuple) for ml_tuple in result],\n                names=[f\"level-{i}\" for i in range(nlevels)],\n            )\n        return result.sort_values()\n    else:\n        base_level = [\"first\"] * (elements_number // 2 + elements_number % 2) + [\n            \"second\"\n        ] * (elements_number // 2)\n        primary_levels = [generate_level(elements_number, i) for i in range(1, nlevels)]\n        arrays = [base_level] + primary_levels\n        return pd.MultiIndex.from_tuples(\n            list(zip(*arrays)), names=[f\"level-{i}\" for i in range(nlevels)]\n        ).sort_values()\n\n\ndef generate_none_dfs():\n    df = pandas.DataFrame(\n        {\n            \"col1\": [0, 1, 2, 3],\n            \"col2\": [4, 5, None, 7],\n            \"col3\": [8, 9, 10, 11],\n            \"col4\": [12, 13, 14, 15],\n            \"col5\": [None, None, None, None],\n        }\n    )\n\n    df2 = pandas.DataFrame(\n        {\n            \"col1\": [0, 1, 2, 3],\n            \"col2\": [4, 5, 6, 7],\n            \"col3\": [8, 9, 10, 11],\n            \"col6\": [12, 13, 14, 15],\n            \"col7\": [0, 0, 0, 0],\n        }\n    )\n    return df, df2\n\n\ndef get_unique_filename(\n    test_name: str = \"test\",\n    kwargs: dict = {},\n    extension: str = \"csv\",\n    data_dir: Union[str, Path] = \"\",\n    suffix: str = \"\",\n    debug_mode=False,\n):\n    \"\"\"Returns unique file name with specified parameters.\n\n    Parameters\n    ----------\n    test_name: str\n        name of the test for which the unique file name is needed.\n    kwargs: list of ints\n        Unique combiantion of test parameters for creation of unique name.\n    extension: str, default: \"csv\"\n        Extension of unique file.\n    data_dir: Union[str, Path]\n        Data directory where test files will be created.\n    suffix: str\n        String to append to the resulted name.\n    debug_mode: bool, default: False\n        Get unique filename containing kwargs values.\n        Otherwise kwargs values will be replaced with hash equivalent.\n\n    Returns\n    -------\n        Unique file name.\n    \"\"\"\n    suffix_part = f\"_{suffix}\" if suffix else \"\"\n    extension_part = f\".{extension}\" if extension else \"\"\n    if debug_mode:\n        # shortcut if kwargs parameter are not provided\n        if len(kwargs) == 0 and extension == \"csv\" and suffix == \"\":\n            return os.path.join(data_dir, (test_name + suffix_part + f\".{extension}\"))\n\n        assert \".\" not in extension, \"please provide pure extension name without '.'\"\n        prohibited_chars = ['\"', \"\\n\"]\n        non_prohibited_char = \"np_char\"\n        char_counter = 0\n        kwargs_name = dict(kwargs)\n        for key, value in kwargs_name.items():\n            for char in prohibited_chars:\n                if isinstance(value, str) and char in value or callable(value):\n                    kwargs_name[key] = non_prohibited_char + str(char_counter)\n                    char_counter += 1\n        parameters_values = \"_\".join(\n            [\n                (\n                    str(value)\n                    if not isinstance(value, (list, tuple))\n                    else \"_\".join([str(x) for x in value])\n                )\n                for value in kwargs_name.values()\n            ]\n        )\n        return os.path.join(\n            data_dir, test_name + parameters_values + suffix_part + extension_part\n        )\n    else:\n        import uuid\n\n        return os.path.join(data_dir, uuid.uuid1().hex + suffix_part + extension_part)\n\n\ndef get_random_string():\n    random_string = \"\".join(\n        random_state.choice([x for x in ascii_letters], size=10).tolist()\n    )\n    return random_string\n\n\ndef insert_lines_to_csv(\n    csv_name: str,\n    lines_positions: list,\n    lines_type: str = \"blank\",\n    encoding: str = None,\n    **csv_reader_writer_params,\n):\n    \"\"\"Insert lines to \".csv\" file.\n\n    Parameters\n    ----------\n    csv_name: str\n        \".csv\" file that should be modified.\n    lines_positions: list of ints\n        Lines postions that sghould be modified (serial number\n        of line - begins from 0, ends in <rows_number> - 1).\n    lines_type: str\n        Lines types that should be inserted to \".csv\" file. Possible types:\n        \"blank\" - empty line without any delimiters/separators,\n        \"bad\" - lines with len(lines_data) > cols_number\n    encoding: str\n        Encoding type that should be used during file reading and writing.\n    \"\"\"\n    if lines_type == \"blank\":\n        lines_data = []\n    elif lines_type == \"bad\":\n        cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)\n        lines_data = [x for x in range(cols_number + 1)]\n    else:\n        raise ValueError(\n            f\"acceptable values for  parameter are ['blank', 'bad'], actually passed {lines_type}\"\n        )\n    lines = []\n    with open(csv_name, \"r\", encoding=encoding, newline=\"\") as read_file:\n        try:\n            dialect = csv.Sniffer().sniff(read_file.read())\n            read_file.seek(0)\n        except Exception:\n            dialect = None\n\n        reader = csv.reader(\n            read_file,\n            dialect=dialect if dialect is not None else \"excel\",\n            **csv_reader_writer_params,\n        )\n        counter = 0\n        for row in reader:\n            if counter in lines_positions:\n                lines.append(lines_data)\n            else:\n                lines.append(row)\n            counter += 1\n    with open(csv_name, \"w\", encoding=encoding, newline=\"\") as write_file:\n        writer = csv.writer(\n            write_file,\n            dialect=dialect if dialect is not None else \"excel\",\n            **csv_reader_writer_params,\n        )\n        writer.writerows(lines)\n\n\ndef _get_open_files():\n    \"\"\"\n    psutil open_files() can return a lot of extra information that we can allow to\n    be different, like file position; for simplicity we care about path and fd only.\n    \"\"\"\n    return sorted((info.path, info.fd) for info in psutil.Process().open_files())\n\n\ndef check_file_leaks(func):\n    \"\"\"\n    A decorator that ensures that no *newly* opened file handles are left\n    after decorated function is finished.\n    \"\"\"\n    if not TrackFileLeaks.get():\n        return func\n\n    @functools.wraps(func)\n    def check(*a, **kw):\n        fstart = _get_open_files()\n        try:\n            return func(*a, **kw)\n        finally:\n            leaks = []\n            for item in _get_open_files():\n                try:\n                    fstart.remove(item)\n                except ValueError:\n                    # Ignore files in /proc/, as they have nothing to do with\n                    # modin reading any data (and this is what we care about).\n                    if item[0].startswith(\"/proc/\"):\n                        continue\n                    # Ignore files in /tmp/ray/session_*/logs (ray session logs)\n                    # because Ray intends to keep these logs open even after\n                    # work has been done.\n                    if re.search(r\"/tmp/ray/session_.*/logs\", item[0]):\n                        continue\n                    leaks.append(item)\n\n            assert (\n                not leaks\n            ), f\"Unexpected open handles left for: {', '.join(item[0] for item in leaks)}\"\n\n    return check\n\n\ndef dummy_decorator():\n    \"\"\"A problematic decorator that does not use `functools.wraps`. This introduces unwanted local variables for\n    inspect.currentframe. This decorator is used in test_io to test `read_csv` and `read_table`\n    \"\"\"\n\n    def wrapper(method):\n        def wrapped_function(self, *args, **kwargs):\n            result = method(self, *args, **kwargs)\n            return result\n\n        return wrapped_function\n\n    return wrapper\n\n\ndef generate_dataframe(row_size=NROWS, additional_col_values=None, idx_name=None):\n    dates = pandas.date_range(\"2000\", freq=\"h\", periods=row_size)\n    data = {\n        \"col1\": np.arange(row_size) * 10,\n        \"col2\": [str(x.date()) for x in dates],\n        \"col3\": np.arange(row_size) * 10,\n        \"col4\": [str(x.time()) for x in dates],\n        \"col5\": [get_random_string() for _ in range(row_size)],\n        \"col6\": random_state.uniform(low=0.0, high=10000.0, size=row_size),\n    }\n    index = None if idx_name is None else pd.RangeIndex(0, row_size, name=idx_name)\n\n    if additional_col_values is not None:\n        assert isinstance(additional_col_values, (list, tuple))\n        data.update({\"col7\": random_state.choice(additional_col_values, size=row_size)})\n    return pandas.DataFrame(data, index=index)\n\n\ndef _make_csv_file(data_dir):\n    def _csv_file_maker(\n        filename=None,\n        row_size=NROWS,\n        force=True,\n        delimiter=\",\",\n        encoding=None,\n        compression=\"infer\",\n        additional_col_values=None,\n        remove_randomness=False,\n        add_blank_lines=False,\n        add_bad_lines=False,\n        add_nan_lines=False,\n        thousands_separator=None,\n        decimal_separator=None,\n        comment_col_char=None,\n        quoting=csv.QUOTE_MINIMAL,\n        quotechar='\"',\n        doublequote=True,\n        escapechar=None,\n        lineterminator=None,\n    ):\n        if filename is None:\n            filename = get_unique_filename(data_dir=data_dir)\n        if os.path.exists(filename) and not force:\n            return None\n        else:\n            df = generate_dataframe(row_size, additional_col_values)\n            if remove_randomness:\n                df = df[[\"col1\", \"col2\", \"col3\", \"col4\"]]\n            if add_nan_lines:\n                for i in range(0, row_size, row_size // (row_size // 10)):\n                    df.loc[i] = pandas.Series()\n            if comment_col_char:\n                char = comment_col_char if isinstance(comment_col_char, str) else \"#\"\n                df.insert(\n                    loc=0,\n                    column=\"col_with_comments\",\n                    value=[char if (x + 2) == 0 else x for x in range(row_size)],\n                )\n\n            if thousands_separator is not None:\n                for col_id in [\"col1\", \"col3\"]:\n                    df[col_id] = df[col_id].apply(\n                        lambda x: f\"{x:,d}\".replace(\",\", thousands_separator)\n                    )\n                df[\"col6\"] = df[\"col6\"].apply(\n                    lambda x: f\"{x:,f}\".replace(\",\", thousands_separator)\n                )\n            filename = (\n                f\"{filename}.{COMP_TO_EXT[compression]}\"\n                if compression != \"infer\"\n                else filename\n            )\n            df.to_csv(\n                filename,\n                sep=delimiter,\n                encoding=encoding,\n                compression=compression,\n                index=False,\n                decimal=decimal_separator if decimal_separator else \".\",\n                lineterminator=lineterminator,\n                quoting=quoting,\n                quotechar=quotechar,\n                doublequote=doublequote,\n                escapechar=escapechar,\n            )\n            csv_reader_writer_params = {\n                \"delimiter\": delimiter,\n                \"doublequote\": doublequote,\n                \"escapechar\": escapechar,\n                \"lineterminator\": lineterminator if lineterminator else os.linesep,\n                \"quotechar\": quotechar,\n                \"quoting\": quoting,\n            }\n            if add_blank_lines:\n                insert_lines_to_csv(\n                    csv_name=filename,\n                    lines_positions=[\n                        x for x in range(5, row_size, row_size // (row_size // 10))\n                    ],\n                    lines_type=\"blank\",\n                    encoding=encoding,\n                    **csv_reader_writer_params,\n                )\n            if add_bad_lines:\n                insert_lines_to_csv(\n                    csv_name=filename,\n                    lines_positions=[\n                        x for x in range(6, row_size, row_size // (row_size // 10))\n                    ],\n                    lines_type=\"bad\",\n                    encoding=encoding,\n                    **csv_reader_writer_params,\n                )\n            return filename\n\n    return _csv_file_maker\n\n\ndef sort_index_for_equal_values(df, ascending=True):\n    \"\"\"Sort `df` indices of equal rows.\"\"\"\n    if df.index.dtype == np.float64:\n        # HACK: workaround for pandas bug:\n        # https://github.com/pandas-dev/pandas/issues/34455\n        df.index = df.index.astype(\"str\")\n    res = df.groupby(by=df if df.ndim == 1 else df.columns, sort=False).apply(\n        lambda df: df.sort_index(ascending=ascending)\n    )\n    if res.index.nlevels > df.index.nlevels:\n        # Sometimes GroupBy adds an extra level with 'by' to the result index.\n        # GroupBy is very inconsistent about when it's doing this, so that's\n        # why this clumsy if-statement is used.\n        res.index = res.index.droplevel(0)\n    # GroupBy overwrites original index names with 'by', so the following line restores original names\n    res.index.names = df.index.names\n    return res\n\n\ndef df_equals_with_non_stable_indices(df1, df2):\n    \"\"\"Assert equality of two frames regardless of the index order for equal values.\"\"\"\n    df1, df2 = map(try_cast_to_pandas, (df1, df2))\n    np.testing.assert_array_equal(df1.values, df2.values)\n    sorted1, sorted2 = map(sort_index_for_equal_values, (df1, df2))\n    df_equals(sorted1, sorted2)\n\n\ndef rotate_decimal_digits_or_symbols(value):\n    if value.dtype == object:\n        # When dtype is object, we assume that it is actually strings from MultiIndex level names\n        return [x[-1] + x[:-1] for x in value]\n    else:\n        tens = value // 10\n        ones = value % 10\n        return tens + ones * 10\n\n\ndef make_default_file(file_type: str, data_dir: str):\n    \"\"\"Helper function for pytest fixtures.\"\"\"\n\n    def _create_file(filename, force, nrows, ncols, func: str, func_kw=None):\n        \"\"\"\n        Helper function that creates a dataframe before writing it to a file.\n\n        Eliminates the duplicate code that is needed before of output functions calls.\n\n        Notes\n        -----\n        Importantly, names of created files are added to `filenames` variable for\n        their further automatic deletion. Without this step, files created by\n        `pytest` fixtures will not be deleted.\n        \"\"\"\n        if force or not os.path.exists(filename):\n            df = pandas.DataFrame(\n                {f\"col{x + 1}\": np.arange(nrows) for x in range(ncols)}\n            )\n            getattr(df, func)(filename, **func_kw if func_kw else {})\n\n    file_type_to_extension = {\n        \"excel\": \"xlsx\",\n        \"fwf\": \"txt\",\n        \"pickle\": \"pkl\",\n    }\n    extension = file_type_to_extension.get(file_type, file_type)\n\n    def _make_default_file(nrows=NROWS, ncols=2, force=True, **kwargs):\n        filename = get_unique_filename(extension=extension, data_dir=data_dir)\n\n        if file_type == \"json\":\n            lines = kwargs.get(\"lines\")\n            func_kw = {\"lines\": lines, \"orient\": \"records\"} if lines else {}\n            _create_file(filename, force, nrows, ncols, \"to_json\", func_kw)\n        elif file_type in (\"html\", \"excel\", \"feather\", \"stata\", \"pickle\"):\n            _create_file(filename, force, nrows, ncols, f\"to_{file_type}\")\n        elif file_type == \"hdf\":\n            func_kw = {\"key\": \"df\", \"format\": kwargs.get(\"format\")}\n            _create_file(filename, force, nrows, ncols, \"to_hdf\", func_kw)\n        elif file_type == \"fwf\":\n            if force or not os.path.exists(filename):\n                fwf_data = kwargs.get(\"fwf_data\")\n                if fwf_data is None:\n                    with open(\"modin/tests/pandas/data/test_data.fwf\", \"r\") as fwf_file:\n                        fwf_data = fwf_file.read()\n                with open(filename, \"w\") as f:\n                    f.write(fwf_data)\n        else:\n            raise ValueError(f\"Unsupported file type: {file_type}\")\n        return filename\n\n    return _make_default_file\n\n\ndef value_equals(obj1, obj2):\n    \"\"\"Check wherher two scalar or list-like values are equal and raise an ``AssertionError`` if they aren't.\"\"\"\n    if is_list_like(obj1):\n        np.testing.assert_array_equal(obj1, obj2)\n    else:\n        assert (obj1 == obj2) or (np.isnan(obj1) and np.isnan(obj2))\n\n\ndef dict_equals(dict1, dict2):\n    \"\"\"Check whether two dictionaries are equal and raise an ``AssertionError`` if they aren't.\"\"\"\n    for key1, key2 in itertools.zip_longest(sorted(dict1), sorted(dict2)):\n        value_equals(key1, key2)\n        value_equals(dict1[key1], dict2[key2])\n\n\n@contextmanager\ndef switch_execution(engine: str, storage_format: str):\n    old_engine = Engine.get()\n    old_storage = StorageFormat.get()\n    try:\n        set_execution(engine, storage_format)\n        yield\n    finally:\n        set_execution(old_engine, old_storage)\n\n\ndef is_native_shallow_copy() -> bool:\n    \"\"\"Return if the current configuration uses native pandas execution and performs shallow copies.\"\"\"\n    return (\n        Backend.get() == \"Pandas\"\n        and not NativePandasDeepCopy.get()\n        and not pandas.get_option(\"mode.copy_on_write\")\n    )\n"
  },
  {
    "path": "modin/tests/polars/test_dataframe.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport polars\nimport polars.testing\n\nimport modin.polars as pl\n\n\ndef test_init_roundtrip():\n    data = {\"a\": [1, 2, 3], \"b\": [4, 5, 6]}\n    df = pl.DataFrame(data)\n    polars_df = polars.DataFrame(data)\n    to_polars = polars.from_pandas(df._query_compiler.to_pandas())\n    polars.testing.assert_frame_equal(polars_df, to_polars)\n"
  },
  {
    "path": "modin/tests/test_dataframe_api_standard.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pytest\n\nimport modin.pandas\n\n\ndef test_dataframe_api_standard() -> None:\n    \"\"\"\n    Test some basic methods of the dataframe consortium standard.\n\n    Full testing is done at https://github.com/data-apis/dataframe-api-compat,\n    this is just to check that the entry point works as expected.\n    \"\"\"\n    pytest.importorskip(\"dataframe_api_compat\")\n    df_pd = modin.pandas.DataFrame({\"a\": [1, 2, 3], \"b\": [4, 5, 6]})\n    df = df_pd.__dataframe_consortium_standard__()\n    result_1 = df.get_column_names()\n    expected_1 = [\"a\", \"b\"]\n    assert result_1 == expected_1\n\n    ser = modin.pandas.Series([1, 2, 3])\n    col = ser.__column_consortium_standard__()\n    result_2 = col.get_value(1)\n    expected_2 = 2\n    assert result_2 == expected_2\n"
  },
  {
    "path": "modin/tests/test_docstring_urls.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport importlib\nimport pkgutil\nfrom concurrent.futures import ThreadPoolExecutor\nfrom urllib.error import HTTPError\nfrom urllib.request import urlopen\n\nimport pytest\n\nimport modin.pandas\nfrom modin.utils import PANDAS_API_URL_TEMPLATE\n\n\n@pytest.fixture\ndef doc_urls(get_generated_doc_urls):\n    # ensure all docstring are generated - import _everything_ under 'modin.pandas'\n    for modinfo in pkgutil.walk_packages(modin.pandas.__path__, \"modin.pandas.\"):\n        try:\n            importlib.import_module(modinfo.name)\n        except ModuleNotFoundError:\n            # some optional 3rd-party dep missing, ignore\n            pass\n    return sorted(get_generated_doc_urls())\n\n\ndef test_all_urls_exist(doc_urls):\n    broken = []\n    # TODO: remove the hack after pandas fixes it\n    methods_with_broken_urls = (\n        \"pandas.DataFrame.flags\",\n        \"pandas.Series.info\",\n        \"pandas.DataFrame.isetitem\",\n        \"pandas.Series.swapaxes\",\n        \"pandas.DataFrame.to_numpy\",\n        \"pandas.Series.axes\",\n        \"pandas.Series.divmod\",\n        \"pandas.Series.rdivmod\",\n    )\n    for broken_method in methods_with_broken_urls:\n        doc_urls.remove(PANDAS_API_URL_TEMPLATE.format(broken_method))\n\n    def _test_url(url):\n        try:\n            with urlopen(url):\n                pass\n        except HTTPError:\n            broken.append(url)\n\n    with ThreadPoolExecutor(32) as pool:\n        pool.map(_test_url, doc_urls)\n\n    assert not broken, \"Invalid URLs detected\"\n"
  },
  {
    "path": "modin/tests/test_envvar_catcher.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport os\n\nimport pytest\n\n\n@pytest.fixture\ndef nameset():\n    name = \"hey_i_am_an_env_var\"\n    os.environ[name] = \"i am a value\"\n    yield name\n    del os.environ[name]\n\n\ndef test_envvar_catcher(nameset):\n    with pytest.raises(AssertionError):\n        os.environ.get(\"Modin_FOO\", \"bar\")\n    with pytest.raises(AssertionError):\n        \"modin_qux\" not in os.environ\n    assert \"yay_random_name\" not in os.environ\n    assert os.environ[nameset]\n"
  },
  {
    "path": "modin/tests/test_envvar_npartitions.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import NPartitions\n\n\n@pytest.mark.parametrize(\"num_partitions\", [2, 4, 6, 8, 10])\ndef test_set_npartitions(num_partitions):\n    NPartitions.put(num_partitions)\n    data = np.random.randint(0, 100, size=(2**16, 2**8))\n    df = pd.DataFrame(data)\n    part_shape = df._query_compiler._modin_frame._partitions.shape\n    assert part_shape[0] == num_partitions and part_shape[1] == min(num_partitions, 8)\n\n\n@pytest.mark.parametrize(\"left_num_partitions\", [2, 4, 6, 8, 10])\n@pytest.mark.parametrize(\"right_num_partitions\", [2, 4, 6, 8, 10])\ndef test_runtime_change_npartitions(left_num_partitions, right_num_partitions):\n    NPartitions.put(left_num_partitions)\n    data = np.random.randint(0, 100, size=(2**16, 2**8))\n    left_df = pd.DataFrame(data)\n    part_shape = left_df._query_compiler._modin_frame._partitions.shape\n    assert part_shape[0] == left_num_partitions and part_shape[1] == min(\n        left_num_partitions, 8\n    )\n\n    NPartitions.put(right_num_partitions)\n    right_df = pd.DataFrame(data)\n    part_shape = right_df._query_compiler._modin_frame._partitions.shape\n    assert part_shape[0] == right_num_partitions and part_shape[1] == min(\n        right_num_partitions, 8\n    )\n"
  },
  {
    "path": "modin/tests/test_executions_api.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pytest\n\nfrom modin.core.storage_formats import BaseQueryCompiler, PandasQueryCompiler\n\nBASE_EXECUTION = BaseQueryCompiler\nEXECUTIONS = [PandasQueryCompiler]\n\n\ndef test_base_abstract_methods():\n    allowed_abstract_methods = [\n        \"__init__\",\n        \"free\",\n        \"finalize\",\n        \"execute\",\n        \"to_pandas\",\n        \"from_pandas\",\n        \"from_arrow\",\n        \"default_to_pandas\",\n        \"from_interchange_dataframe\",\n        \"to_interchange_dataframe\",\n        \"engine\",\n        \"storage_format\",\n    ]\n\n    not_implemented_methods = BASE_EXECUTION.__abstractmethods__.difference(\n        allowed_abstract_methods\n    )\n\n    # sorting for beauty output in error\n    not_implemented_methods = list(not_implemented_methods)\n    not_implemented_methods.sort()\n\n    assert (\n        len(not_implemented_methods) == 0\n    ), f\"{BASE_EXECUTION} has not implemented abstract methods: {not_implemented_methods}\"\n\n\n@pytest.mark.parametrize(\"execution\", EXECUTIONS)\ndef test_api_consistent(execution):\n    base_methods = set(BASE_EXECUTION.__dict__)\n    custom_methods = set(\n        [key for key in execution.__dict__.keys() if not key.startswith(\"_\")]\n    )\n\n    extra_methods = custom_methods.difference(base_methods)\n    # checking that custom execution do not implements extra api methods\n    assert (\n        len(extra_methods) == 0\n    ), f\"{execution} implement these extra methods: {extra_methods}\"\n"
  },
  {
    "path": "modin/tests/test_headers.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport os\nfrom os.path import abspath, dirname\n\n# This is the python file root directory (modin/modin)\nrootdir = dirname(dirname(abspath(__file__)))\nexclude_files = [\"_version.py\"]\n\n\ndef test_headers():\n    with open(\"{}{}\".format(dirname(rootdir), \"/LICENSE_HEADER\"), \"r\") as f:\n        # Lines to check each line individually\n        header_lines = f.readlines()\n\n    for subdir, dirs, files in os.walk(rootdir):\n        for file in files:\n            filepath = os.path.join(subdir, file)\n            if file.endswith(\".py\") and file not in exclude_files:\n                with open(filepath, \"r\", encoding=\"utf8\") as f:\n                    # Lines for line by line comparison\n                    py_file_lines = f.readlines()\n                    for left, right in zip(\n                        header_lines, py_file_lines[: len(header_lines)]\n                    ):\n                        assert left == right\n\n\ndef test_line_endings():\n    # This is the project root\n    rootdir = dirname(dirname(abspath(__file__)))\n    for subdir, dirs, files in os.walk(rootdir):\n        if any(i in subdir for i in [\".git\", \".idea\", \"__pycache__\"]):\n            continue\n        for file in files:\n            if file.endswith(\".parquet\"):\n                continue\n            filepath = os.path.join(subdir, file)\n            with open(filepath, \"rb+\") as f:\n                file_contents = f.read()\n                new_contents = file_contents.replace(b\"\\r\\n\", b\"\\n\")\n                assert new_contents == file_contents, \"File has CRLF: {}\".format(\n                    filepath\n                )\n"
  },
  {
    "path": "modin/tests/test_logging.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport collections\nimport logging\n\nimport pytest\n\nimport modin.logging\nfrom modin.config import LogMode\n\n\nclass _FakeLogger:\n    _loggers = {}\n\n    def __init__(self, namespace):\n        self.messages = collections.defaultdict(list)\n        self.namespace = namespace\n\n    def log(self, log_level, message, *args, **kw):\n        self.messages[log_level].append(message.format(*args, **kw))\n\n    def exception(self, message, *args, **kw):\n        self.messages[\"exception\"].append(message.format(*args, **kw))\n\n    @classmethod\n    def make(cls, namespace):\n        return cls._loggers.setdefault(namespace, cls(namespace))\n\n    @classmethod\n    def get(cls, namespace=\"modin.logger.default\"):\n        return cls._loggers[namespace].messages\n\n    @classmethod\n    def clear(cls):\n        cls._loggers = {}\n\n\ndef _get_logger(namespace=\"modin.logger.default\"):\n    return _FakeLogger.make(namespace)\n\n\ndef mock_get_logger(ctx):\n    ctx.setattr(logging, \"getLogger\", _get_logger)\n\n\n@pytest.fixture\ndef get_log_messages():\n    old = LogMode.get()\n    LogMode.enable()\n    modin.logging.get_logger()  # initialize the logging pior to mocking getLogger()\n\n    yield _FakeLogger.get\n\n    _FakeLogger.clear()\n    LogMode.put(old)\n\n\ndef test_function_decorator(monkeypatch, get_log_messages):\n    @modin.logging.enable_logging\n    def func(do_raise):\n        if do_raise:\n            raise ValueError()\n\n    with monkeypatch.context() as ctx:\n        # NOTE: we cannot patch in the fixture as mockin logger.getLogger()\n        # without monkeypatch.context() breaks pytest\n        mock_get_logger(ctx)\n\n        func(do_raise=False)\n        with pytest.raises(ValueError):\n            func(do_raise=True)\n\n    assert \"func\" in get_log_messages()[logging.INFO][0]\n    assert \"START\" in get_log_messages()[logging.INFO][0]\n    assert get_log_messages(\"modin.logger.errors\")[\"exception\"] == [\n        \"STOP::PANDAS-API::func\"\n    ]\n\n\ndef test_function_decorator_on_outer_function_6237(monkeypatch, get_log_messages):\n    @modin.logging.enable_logging\n    def inner_func():\n        raise ValueError()\n\n    @modin.logging.enable_logging\n    def outer_func():\n        inner_func()\n\n    with monkeypatch.context() as ctx:\n        # NOTE: we cannot patch in the fixture as mockin logger.getLogger()\n        # without monkeypatch.context() breaks pytest\n        mock_get_logger(ctx)\n\n        with pytest.raises(ValueError):\n            outer_func()\n\n    assert get_log_messages(\"modin.logger.errors\")[\"exception\"] == [\n        \"STOP::PANDAS-API::inner_func\"\n    ]\n\n\ndef test_class_decorator(monkeypatch, get_log_messages):\n    @modin.logging.enable_logging(\"CUSTOM\")\n    class Foo:\n        def method1(self):\n            pass\n\n        @classmethod\n        def method2(cls):\n            pass\n\n        @staticmethod\n        def method3():\n            pass\n\n    class Bar(Foo):\n        def method4(self):\n            pass\n\n    with monkeypatch.context() as ctx:\n        mock_get_logger(ctx)\n        Foo().method1()\n        Foo.method2()\n        Foo.method3()\n\n        Bar().method1()\n        Bar().method4()\n\n    assert get_log_messages()[logging.INFO] == [\n        \"START::CUSTOM::Foo.method1\",\n        \"STOP::CUSTOM::Foo.method1\",\n        \"START::CUSTOM::Foo.method2\",\n        \"STOP::CUSTOM::Foo.method2\",\n        \"START::CUSTOM::Foo.method3\",\n        \"STOP::CUSTOM::Foo.method3\",\n        \"START::CUSTOM::Foo.method1\",\n        \"STOP::CUSTOM::Foo.method1\",\n    ]\n\n\ndef test_class_inheritance(monkeypatch, get_log_messages):\n    class Foo(modin.logging.ClassLogger, modin_layer=\"CUSTOM\"):\n        def method1(self):\n            pass\n\n    class Bar(Foo):\n        def method2(self):\n            pass\n\n    with monkeypatch.context() as ctx:\n        mock_get_logger(ctx)\n        Foo().method1()\n        Bar().method1()\n        Bar().method2()\n\n    assert get_log_messages()[logging.INFO] == [\n        \"START::CUSTOM::Foo.method1\",\n        \"STOP::CUSTOM::Foo.method1\",\n        \"START::CUSTOM::Foo.method1\",\n        \"STOP::CUSTOM::Foo.method1\",\n        \"START::CUSTOM::Bar.method2\",\n        \"STOP::CUSTOM::Bar.method2\",\n    ]\n"
  },
  {
    "path": "modin/tests/test_metrics.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nfrom typing import Union\n\nimport pytest\n\nimport modin.logging\nimport modin.pandas as pd\nfrom modin.config import MetricsMode\nfrom modin.logging.metrics import (\n    _metric_handlers,\n    add_metric_handler,\n    clear_metric_handler,\n    emit_metric,\n)\n\n\nclass FakeTelemetryClient:\n\n    def __init__(self):\n        self._metrics = {}\n        self._metric_handler = None\n\n    def metric_handler_fail(self, name: str, value: Union[int, float]):\n        raise KeyError(\"Poorly implemented metric handler\")\n\n    def metric_handler_pass(self, name: str, value: Union[int, float]):\n        self._metrics[name] = value\n\n\n@modin.logging.enable_logging\ndef func(do_raise):\n    if do_raise:\n        raise ValueError()\n\n\n@pytest.fixture()\ndef metric_client():\n    MetricsMode.enable()\n    client = FakeTelemetryClient()\n    yield client\n    clear_metric_handler(client._metric_handler)\n    MetricsMode.disable()\n\n\ndef test_metrics_api_timings(metric_client):\n    assert len(_metric_handlers) == 0\n    metric_client._metric_handler = metric_client.metric_handler_pass\n    add_metric_handler(metric_client._metric_handler)\n    assert len(_metric_handlers) == 1\n    assert _metric_handlers[0] == metric_client._metric_handler\n    func(do_raise=False)\n    assert len(metric_client._metrics) == 1\n    assert metric_client._metrics[\"modin.pandas-api.func\"] is not None\n    assert metric_client._metrics[\"modin.pandas-api.func\"] > 0.0\n\n\ndef test_df_metrics(metric_client):\n    metric_client._metric_handler = metric_client.metric_handler_pass\n    add_metric_handler(metric_client._metric_handler)\n    df = pd.DataFrame({\"a\": [1, 2], \"b\": [3, 4]})\n    df.sum()\n    assert len(metric_client._metrics) == 54\n    assert metric_client._metrics[\"modin.pandas-api.dataframe.sum\"] is not None\n    assert metric_client._metrics[\"modin.pandas-api.dataframe.sum\"] > 0.0\n\n\ndef test_metrics_handler_fails(metric_client):\n    assert len(metric_client._metrics) == 0\n    metric_client._metric_handler = metric_client.metric_handler_fail\n    add_metric_handler(metric_client._metric_handler)\n    assert len(_metric_handlers) == 1\n    func(do_raise=False)\n    assert len(_metric_handlers) == 0\n    assert len(metric_client._metrics) == 0\n\n\ndef test_emit_name_enforced():\n    MetricsMode.enable()\n    with pytest.raises(KeyError):\n        emit_metric(\"Not::A::Valid::Metric::Name\", 1.0)\n\n\ndef test_metrics_can_be_opt_out(metric_client):\n    MetricsMode.enable()\n    assert len(metric_client._metrics) == 0\n    metric_client._metric_handler = metric_client.metric_handler_pass\n    add_metric_handler(metric_client._metric_handler)\n    # If Metrics are disabled after the addition of a handler\n    # no metrics are emitted\n    MetricsMode.disable()\n    assert len(_metric_handlers) == 1\n    func(do_raise=False)\n    assert len(metric_client._metrics) == 0\n"
  },
  {
    "path": "modin/tests/test_partition_api.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nfrom modin.config import Engine, NPartitions\nfrom modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\nfrom modin.distributed.dataframe.pandas import from_partitions, unwrap_partitions\nfrom modin.pandas.indexing import compute_sliced_len\nfrom modin.tests.pandas.utils import df_equals, test_data\n\nPartitionClass = (\n    FactoryDispatcher.get_factory().io_cls.frame_cls._partition_mgr_cls._partition_class\n)\n\nif Engine.get() == \"Ray\":\n    from modin.core.execution.ray.common import RayWrapper\n    from modin.core.execution.ray.common.utils import ObjectIDType\n\n    put_func = RayWrapper.put\n    get_func = RayWrapper.materialize\n    is_future = lambda obj: isinstance(obj, ObjectIDType)  # noqa: E731\nelif Engine.get() == \"Dask\":\n    from distributed import Future\n\n    from modin.core.execution.dask.common import DaskWrapper\n\n    # Looks like there is a key collision;\n    # https://github.com/dask/distributed/issues/3703#issuecomment-619446739\n    # recommends to use `hash=False`. Perhaps this should be the default value of `put`.\n    put_func = lambda obj: DaskWrapper.put(obj, hash=False)  # noqa: E731\n    get_func = DaskWrapper.materialize\n    is_future = lambda obj: isinstance(obj, Future)  # noqa: E731\nelif Engine.get() == \"Unidist\":\n    from unidist import is_object_ref\n\n    from modin.core.execution.unidist.common import UnidistWrapper\n\n    put_func = UnidistWrapper.put\n    get_func = UnidistWrapper.materialize\n    is_future = is_object_ref\nelif Engine.get() == \"Python\":\n    put_func = lambda x: x  # noqa: E731\n    get_func = lambda x: x  # noqa: E731\n    is_future = lambda obj: isinstance(obj, object)  # noqa: E731\nelse:\n    raise NotImplementedError(\n        f\"'{Engine.get()}' engine is not supported by these test suites\"\n    )\n\nNPartitions.put(4)\n# HACK: implicit engine initialization (Modin issue #2989)\npd.DataFrame([])\n\n\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\n@pytest.mark.parametrize(\"reverse_index\", [True, False])\n@pytest.mark.parametrize(\"reverse_columns\", [True, False])\ndef test_unwrap_partitions(axis, reverse_index, reverse_columns):\n    data = test_data[\"int_data\"]\n\n    def get_df(lib, data):\n        df = lib.DataFrame(data)\n        if reverse_index:\n            df.index = df.index[::-1]\n        if reverse_columns:\n            df.columns = df.columns[::-1]\n        return df\n\n    df = get_df(pd, data)\n    # `df` should not have propagated the index and column updates to its\n    # partitions yet. The partitions of `expected_df` should have the updated\n    # metadata because we construct `expected_df` directly from the updated\n    # pandas dataframe.\n    expected_df = pd.DataFrame(get_df(pandas, data))\n    expected_partitions = expected_df._query_compiler._modin_frame._partitions\n    if axis is None:\n        actual_partitions = np.array(unwrap_partitions(df, axis=axis))\n        assert expected_partitions.shape == actual_partitions.shape\n        for row_idx in range(expected_partitions.shape[0]):\n            for col_idx in range(expected_partitions.shape[1]):\n                df_equals(\n                    get_func(expected_partitions[row_idx][col_idx].list_of_blocks[0]),\n                    get_func(actual_partitions[row_idx][col_idx]),\n                )\n    else:\n        expected_axis_partitions = (\n            expected_df._query_compiler._modin_frame._partition_mgr_cls.axis_partition(\n                expected_partitions, axis ^ 1\n            )\n        )\n        expected_axis_partitions = [\n            axis_partition.force_materialization().unwrap(squeeze=True)\n            for axis_partition in expected_axis_partitions\n        ]\n        actual_axis_partitions = unwrap_partitions(df, axis=axis)\n        assert len(expected_axis_partitions) == len(actual_axis_partitions)\n        for item_idx in range(len(expected_axis_partitions)):\n            if Engine.get() in [\"Ray\", \"Dask\", \"Unidist\"]:\n                df_equals(\n                    get_func(expected_axis_partitions[item_idx]),\n                    get_func(actual_axis_partitions[item_idx]),\n                )\n\n\ndef test_unwrap_virtual_partitions():\n    # see #5164 for details\n    data = test_data[\"int_data\"]\n    df = pd.DataFrame(data)\n    virtual_partitioned_df = pd.concat([df] * 10)\n    actual_partitions = np.array(unwrap_partitions(virtual_partitioned_df, axis=None))\n    expected_df = pd.concat([pd.DataFrame(data)] * 10)\n    expected_partitions = expected_df._query_compiler._modin_frame._partitions\n    assert expected_partitions.shape == actual_partitions.shape\n\n    for row_idx in range(expected_partitions.shape[0]):\n        for col_idx in range(expected_partitions.shape[1]):\n            df_equals(\n                get_func(\n                    expected_partitions[row_idx][col_idx]\n                    .force_materialization()\n                    .list_of_blocks[0]\n                ),\n                get_func(actual_partitions[row_idx][col_idx]),\n            )\n\n\n@pytest.mark.parametrize(\"column_widths\", [None, \"column_widths\"])\n@pytest.mark.parametrize(\"row_lengths\", [None, \"row_lengths\"])\n@pytest.mark.parametrize(\"columns\", [None, \"columns\"])\n@pytest.mark.parametrize(\"index\", [None, \"index\"])\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\ndef test_from_partitions(axis, index, columns, row_lengths, column_widths):\n    data = test_data[\"int_data\"]\n    df1, df2 = pandas.DataFrame(data), pandas.DataFrame(data)\n    num_rows, num_cols = df1.shape\n    expected_df = pandas.concat([df1, df2], axis=1 if axis is None else axis)\n\n    index = expected_df.index if index == \"index\" else None\n    columns = expected_df.columns if columns == \"columns\" else None\n    row_lengths = (\n        None\n        if row_lengths is None\n        else [num_rows, num_rows] if axis == 0 else [num_rows]\n    )\n    column_widths = (\n        None\n        if column_widths is None\n        else [num_cols] if axis == 0 else [num_cols, num_cols]\n    )\n    futures = []\n    if axis is None:\n        futures = [[put_func(df1), put_func(df2)]]\n    else:\n        futures = [put_func(df1), put_func(df2)]\n    actual_df = from_partitions(\n        futures,\n        axis,\n        index=index,\n        columns=columns,\n        row_lengths=row_lengths,\n        column_widths=column_widths,\n    )\n    df_equals(expected_df, actual_df)\n\n\n@pytest.mark.parametrize(\"columns\", [\"original_col\", \"new_col\"])\n@pytest.mark.parametrize(\"index\", [\"original_idx\", \"new_idx\"])\n@pytest.mark.parametrize(\"axis\", [None, 0, 1])\ndef test_from_partitions_mismatched_labels(axis, index, columns):\n    expected_df = pd.DataFrame(test_data[\"int_data\"])\n    partitions = unwrap_partitions(expected_df, axis=axis)\n\n    index = (\n        expected_df.index\n        if index == \"original_idx\"\n        else [f\"row{i}\" for i in expected_df.index]\n    )\n    columns = (\n        expected_df.columns\n        if columns == \"original_col\"\n        else [f\"col{i}\" for i in expected_df.columns]\n    )\n\n    expected_df.index = index\n    expected_df.columns = columns\n    actual_df = from_partitions(partitions, axis=axis, index=index, columns=columns)\n    df_equals(expected_df, actual_df)\n\n\n@pytest.mark.parametrize(\"row_labels\", [[0, 2], slice(None)])\n@pytest.mark.parametrize(\"col_labels\", [[0, 2], slice(None)])\n@pytest.mark.parametrize(\"is_length_future\", [False, True])\n@pytest.mark.parametrize(\"is_width_future\", [False, True])\ndef test_mask_preserve_cache(row_labels, col_labels, is_length_future, is_width_future):\n    def deserialize(obj):\n        if is_future(obj):\n            return get_func(obj)\n        return obj\n\n    def compute_length(indices, length):\n        if not isinstance(indices, slice):\n            return len(indices)\n        return compute_sliced_len(indices, length)\n\n    df = pandas.DataFrame({\"a\": [1, 2, 3, 4], \"b\": [5, 6, 7, 8], \"c\": [9, 10, 11, 12]})\n    obj_id = put_func(df)\n\n    partition_shape = [\n        put_func(len(df)) if is_length_future else len(df),\n        put_func(len(df.columns)) if is_width_future else len(df.columns),\n    ]\n\n    source_partition = PartitionClass(obj_id, *partition_shape)\n    masked_partition = source_partition.mask(\n        row_labels=row_labels, col_labels=col_labels\n    )\n\n    expected_length = compute_length(row_labels, len(df))\n    expected_width = compute_length(col_labels, len(df.columns))\n\n    # Check that the cache is preserved\n    assert expected_length == deserialize(masked_partition._length_cache)\n    assert expected_width == deserialize(masked_partition._width_cache)\n    # Check that the cache is interpreted properly\n    assert expected_length == masked_partition.length()\n    assert expected_width == masked_partition.width()\n    # Recompute shape explicitly to check that the cached data was correct\n    expected_length, expected_width = [\n        masked_partition._length_cache,\n        masked_partition._width_cache,\n    ]\n    masked_partition._length_cache = None\n    masked_partition._width_cache = None\n    assert expected_length == masked_partition.length()\n    assert expected_width == masked_partition.width()\n"
  },
  {
    "path": "modin/tests/test_utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport contextlib\nimport json\nfrom textwrap import dedent, indent\nfrom typing import Optional, Union\nfrom unittest.mock import Mock, patch\n\nimport numpy as np\nimport pandas\nimport pytest\n\nimport modin.pandas as pd\nimport modin.utils\nfrom modin.config import Engine, StorageFormat\nfrom modin.error_message import ErrorMessage\nfrom modin.tests.pandas.utils import create_test_dfs\n\n\n# Note: classes below are used for purely testing purposes - they\n# simulate real-world use cases for _inherit_docstring\nclass BaseParent:\n    def method(self):\n        \"\"\"ordinary method (base)\"\"\"\n\n    def base_method(self):\n        \"\"\"ordinary method in base only\"\"\"\n\n    @property\n    def prop(self):\n        \"\"\"property\"\"\"\n\n    @staticmethod\n    def static():\n        \"\"\"static method\"\"\"\n\n    @classmethod\n    def clsmtd(cls):\n        \"\"\"class method\"\"\"\n\n\nclass BaseChild(BaseParent):\n    \"\"\"this is class docstring\"\"\"\n\n    def method(self):\n        \"\"\"ordinary method (child)\"\"\"\n\n    def own_method(self):\n        \"\"\"own method\"\"\"\n\n    def no_overwrite(self):\n        \"\"\"another own method\"\"\"\n\n    F = property(method)\n\n\n@pytest.fixture(scope=\"module\")\ndef wrapped_cls():\n    @modin.utils._inherit_docstrings(BaseChild)\n    class Wrapped:\n        def method(self):\n            pass\n\n        def base_method(self):\n            pass\n\n        def own_method(self):\n            pass\n\n        def no_overwrite(self):\n            \"\"\"not overwritten doc\"\"\"\n\n        @property\n        def prop(self):\n            return None\n\n        @staticmethod\n        def static():\n            pass\n\n        @classmethod\n        def clsmtd(cls):\n            pass\n\n        F = property(method)\n\n    return Wrapped\n\n\ndef _check_doc(wrapped, orig):\n    assert wrapped.__doc__ == orig.__doc__\n    if isinstance(wrapped, property):\n        assert wrapped.fget.__doc_inherited__\n    else:\n        assert wrapped.__doc_inherited__\n\n\ndef test_doc_inherit_clslevel(wrapped_cls):\n    _check_doc(wrapped_cls, BaseChild)\n\n\ndef test_doc_inherit_methods(wrapped_cls):\n    _check_doc(wrapped_cls.method, BaseChild.method)\n    _check_doc(wrapped_cls.base_method, BaseParent.base_method)\n    _check_doc(wrapped_cls.own_method, BaseChild.own_method)\n    assert wrapped_cls.no_overwrite.__doc__ != BaseChild.no_overwrite.__doc__\n    assert not getattr(wrapped_cls.no_overwrite, \"__doc_inherited__\", False)\n\n\ndef test_doc_inherit_special(wrapped_cls):\n    _check_doc(wrapped_cls.static, BaseChild.static)\n    _check_doc(wrapped_cls.clsmtd, BaseChild.clsmtd)\n\n\ndef test_doc_inherit_props(wrapped_cls):\n    assert type(wrapped_cls.method) == type(BaseChild.method)  # noqa: E721\n    _check_doc(wrapped_cls.prop, BaseChild.prop)\n    _check_doc(wrapped_cls.F, BaseChild.F)\n\n\ndef test_doc_inherit_prop_builder():\n    def builder(name):\n        return property(lambda self: name)\n\n    class Parent:\n        prop = builder(\"Parent\")\n\n    @modin.utils._inherit_docstrings(Parent)\n    class Child(Parent):\n        prop = builder(\"Child\")\n\n    assert Parent().prop == \"Parent\"\n    assert Child().prop == \"Child\"\n\n\n@pytest.mark.parametrize(\n    \"source_doc,to_append,expected\",\n    [\n        (\n            \"One-line doc.\",\n            \"One-line message.\",\n            \"One-line doc.One-line message.\",\n        ),\n        (\n            \"\"\"\n            Regular doc-string\n                With the setted indent style.\n            \"\"\",\n            \"\"\"\n                    Doc-string having different indents\n                        in comparison with the regular one.\n            \"\"\",\n            \"\"\"\n            Regular doc-string\n                With the setted indent style.\n\n            Doc-string having different indents\n                in comparison with the regular one.\n            \"\"\",\n        ),\n    ],\n)\ndef test_append_to_docstring(source_doc, to_append, expected):\n    def source_fn():\n        pass\n\n    source_fn.__doc__ = source_doc\n    result_fn = modin.utils.append_to_docstring(to_append)(source_fn)\n\n    answer = dedent(result_fn.__doc__)\n    expected = dedent(expected)\n\n    assert answer == expected\n\n\ndef test_align_indents():\n    source = \"\"\"\n    Source string that sets\n        the indent pattern.\"\"\"\n    target = indent(source, \" \" * 5)\n    result = modin.utils.align_indents(source, target)\n    assert source == result\n\n\ndef test_format_string():\n    template = \"\"\"\n            Source template string that has some {inline_placeholder}s.\n            Placeholder1:\n            {new_line_placeholder1}\n            Placeholder2:\n            {new_line_placeholder2}\n            Placeholder3:\n            {new_line_placeholder3}\n            Placeholder4:\n            {new_line_placeholder4}Text text:\n                Placeholder5:\n                {new_line_placeholder5}\n    \"\"\"\n\n    singleline_value = \"Single-line value\"\n    multiline_value = \"\"\"\n        Some string\n            Having different indentation\n        From the source one.\"\"\"\n    multiline_value_new_line_at_the_end = multiline_value + \"\\n\"\n    multiline_value_new_line_at_the_begin = \"\\n\" + multiline_value\n\n    expected = \"\"\"\n            Source template string that has some Single-line values.\n            Placeholder1:\n            Some string\n                Having different indentation\n            From the source one.\n            Placeholder2:\n            Single-line value\n            Placeholder3:\n            \n            Some string\n                Having different indentation\n            From the source one.\n            Placeholder4:\n            Some string\n                Having different indentation\n            From the source one.\n            Text text:\n                Placeholder5:\n                Some string\n                    Having different indentation\n                From the source one.\n    \"\"\"  # noqa: W293\n    answer = modin.utils.format_string(\n        template,\n        inline_placeholder=singleline_value,\n        new_line_placeholder1=multiline_value,\n        new_line_placeholder2=singleline_value,\n        new_line_placeholder3=multiline_value_new_line_at_the_begin,\n        new_line_placeholder4=multiline_value_new_line_at_the_end,\n        new_line_placeholder5=multiline_value,\n    )\n    assert answer == expected\n\n\ndef warns_that_defaulting_to_pandas_if(\n    condition: bool, prefix: Optional[str] = None, suffix: Optional[str] = None\n):\n    \"\"\"\n    Get a context manager that checks for a default to pandas warning if `condition`  is True.\n\n    Parameters\n    ----------\n    condition : bool\n        Whether to check for the default to pandas warning.\n    prefix : Optional[str]\n        If specified, checks that the start of the warning message matches this argument\n        before \"[Dd]efaulting to pandas\".\n    suffix : Optional[str]\n        If specified, checks that the end of the warning message matches this argument\n        after \"[Dd]efaulting to pandas\".\n\n    Returns\n    -------\n    pytest.recwarn.WarningsChecker or contextlib.nullcontext\n        If ``condition`` is True, ``WarningsChecker`` is returned, which will check for a\n        ``UserWarning`` indicating that Modin is defaulting to Pandas.\n        If it is False, a ``nullcontext`` is returned to avoid checking for the warning about\n        defaulting to Pandas.\n    \"\"\"\n    assert isinstance(condition, bool)\n    return (\n        warns_that_defaulting_to_pandas(prefix=prefix, suffix=suffix)\n        if condition\n        else contextlib.nullcontext()\n    )\n\n\ndef warns_that_defaulting_to_pandas(prefix=None, suffix=None):\n    \"\"\"\n    Assert that code warns that it's defaulting to pandas.\n\n    Parameters\n    ----------\n    prefix : Optional[str]\n        If specified, checks that the start of the warning message matches this argument\n        before \"[Dd]efaulting to pandas\".\n    suffix : Optional[str]\n        If specified, checks that the end of the warning message matches this argument\n        after \"[Dd]efaulting to pandas\".\n\n    Returns\n    -------\n    pytest.recwarn.WarningsChecker\n    \"\"\"\n    match = \"[Dd]efaulting to pandas\"\n    if prefix:\n        # Message may be separated by newlines\n        match = match + \"(.|\\\\n)+\"\n    if suffix:\n        match += \"(.|\\\\n)+\" + suffix\n    return pytest.warns(UserWarning, match=match)\n\n\n@pytest.mark.parametrize(\"as_json\", [True, False])\ndef test_show_versions(as_json, capsys):\n    modin.utils.show_versions(as_json=as_json)\n    versions = capsys.readouterr().out\n    assert modin.__version__ in versions\n\n    if as_json:\n        versions = json.loads(versions)\n        assert versions[\"modin dependencies\"][\"modin\"] == modin.__version__\n\n\ndef test_warns_that_defaulting_to_pandas():\n    with warns_that_defaulting_to_pandas():\n        ErrorMessage.default_to_pandas()\n\n    with warns_that_defaulting_to_pandas():\n        ErrorMessage.default_to_pandas(message=\"Function name\")\n\n\ndef test_warns_that_defaulting_to_pandas_if_false():\n    with pytest.raises(UserWarning):\n        with warns_that_defaulting_to_pandas_if(False):\n            ErrorMessage.default_to_pandas()\n\n\ndef test_warns_that_defaulting_to_pandas_if_true():\n    with warns_that_defaulting_to_pandas_if(True):\n        ErrorMessage.default_to_pandas()\n\n\ndef test_warns_that_defaulting_to_pandas_if_non_bool():\n    with pytest.raises(AssertionError):\n        warns_that_defaulting_to_pandas_if(3)\n\n\ndef test_assert_dtypes_equal():\n    \"\"\"Verify that `assert_dtypes_equal` from test utils works correctly (raises an error when it has to).\"\"\"\n    from modin.tests.pandas.utils import assert_dtypes_equal\n\n    # Serieses with equal dtypes\n    sr1, sr2 = pd.Series([1.0]), pandas.Series([1.0])\n    assert sr1.dtype == sr2.dtype == \"float\"\n    assert_dtypes_equal(sr1, sr2)  # shouldn't raise an error since dtypes are equal\n\n    # Serieses with different dtypes belonging to the same class\n    sr1 = sr1.astype(\"int\")\n    assert sr1.dtype != sr2.dtype and sr1.dtype == \"int\"\n    assert_dtypes_equal(sr1, sr2)  # shouldn't raise an error since both are numeric\n\n    # Serieses with different dtypes not belonging to the same class\n    sr2 = sr2.astype(\"str\")\n    assert sr1.dtype != sr2.dtype and sr2.dtype == \"object\"\n    with pytest.raises(AssertionError):\n        assert_dtypes_equal(sr1, sr2)\n\n    # Dfs with equal dtypes\n    df1, df2 = create_test_dfs({\"a\": [1], \"b\": [1.0]})\n    assert_dtypes_equal(df1, df2)  # shouldn't raise an error since dtypes are equal\n\n    # Dfs with different dtypes belonging to the same class\n    df1 = df1.astype({\"a\": \"float\"})\n    assert df1.dtypes[\"a\"] != df2.dtypes[\"a\"]\n    assert_dtypes_equal(df1, df2)  # shouldn't raise an error since both are numeric\n\n    # Dfs with different dtypes\n    df2 = df2.astype(\"str\")\n    with pytest.raises(AssertionError):\n        assert_dtypes_equal(sr1, sr2)\n\n    # Dfs with categorical dtypes\n    df1 = df1.astype(\"category\")\n    df2 = df2.astype(\"category\")\n    assert_dtypes_equal(df1, df2)  # shouldn't raise an error since both are categorical\n\n    # Dfs with different dtypes (categorical and str)\n    df1 = df1.astype({\"a\": \"str\"})\n    with pytest.raises(AssertionError):\n        assert_dtypes_equal(df1, df2)\n\n\ndef test_execute():\n    data = np.random.rand(100, 64)\n    modin_df, pandas_df = create_test_dfs(data)\n    partitions = modin_df._query_compiler._modin_frame._partitions.flatten()\n    mgr_cls = modin_df._query_compiler._modin_frame._partition_mgr_cls\n\n    # check modin case\n    with patch.object(mgr_cls, \"wait_partitions\", new=Mock()):\n        modin.utils.execute(modin_df)\n        mgr_cls.wait_partitions.assert_called_once()\n        assert (mgr_cls.wait_partitions.call_args[0] == partitions).all()\n\n    # check pandas case without error\n    with patch.object(mgr_cls, \"wait_partitions\", new=Mock()):\n        modin.utils.execute(pandas_df)\n        mgr_cls.wait_partitions.assert_not_called()\n\n    with patch.object(mgr_cls, \"wait_partitions\", new=Mock()):\n        modin.utils.execute(modin_df)\n        mgr_cls.wait_partitions.assert_called_once()\n\n    # check several modin dataframes\n    with patch.object(mgr_cls, \"wait_partitions\", new=Mock()):\n        modin.utils.execute(modin_df, modin_df[modin_df.columns[:4]])\n        mgr_cls.wait_partitions.assert_called\n        assert mgr_cls.wait_partitions.call_count == 2\n\n\ndef current_execution_is_native() -> bool:\n    \"\"\"Whether the current global execution mode is native.\"\"\"\n    return StorageFormat.get() == \"Native\" and Engine.get() == \"Native\"\n\n\ndef df_or_series_using_native_execution(df: Union[pd.DataFrame, pd.Series]) -> bool:\n    \"\"\"Whether this Modin DataFrame or Series is using native execution.\"\"\"\n    return (\n        df._query_compiler.engine == \"Native\"\n        and df._query_compiler.storage_format == \"Native\"\n    )\n"
  },
  {
    "path": "modin/utils.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"Collection of general utility functions, mostly for internal use.\"\"\"\n\nimport codecs\nimport functools\nimport importlib\nimport inspect\nimport json\nimport os\nimport re\nimport sys\nimport types\nimport warnings\nfrom pathlib import Path\nfrom textwrap import dedent, indent\nfrom typing import (\n    Any,\n    Callable,\n    Iterable,\n    List,\n    Mapping,\n    Optional,\n    Protocol,\n    TypeVar,\n    Union,\n    runtime_checkable,\n)\n\nimport numpy as np\nimport pandas\nfrom packaging import version\nfrom pandas._typing import JSONSerializable\nfrom pandas.util._decorators import Appender  # type: ignore\nfrom pandas.util._print_versions import (  # type: ignore[attr-defined]\n    _get_dependency_info,\n    _get_sys_info,\n)\n\nfrom modin._version import get_versions\nfrom modin.config import DocModule, Engine, StorageFormat\n\n# Similar to pandas, sentinel value to use as kwarg in place of None when None has\n# special meaning and needs to be distinguished from a user explicitly passing None.\nsentinel = object()\n\nT = TypeVar(\"T\")\n\"\"\"Generic type parameter\"\"\"\n\nFn = TypeVar(\"Fn\", bound=Callable)\n\"\"\"Function type parameter (used in decorators that don't change a function's signature)\"\"\"\n\n\n@runtime_checkable\nclass SupportsPublicToPandas(Protocol):  # noqa: PR01\n    \"\"\"Structural type for objects with a ``to_pandas`` method (without a leading underscore).\"\"\"\n\n    def to_pandas(self) -> Any:  # noqa: GL08\n        pass\n\n\n@runtime_checkable\nclass SupportsPublicToNumPy(Protocol):  # noqa: PR01\n    \"\"\"Structural type for objects with a ``to_numpy`` method (without a leading underscore).\"\"\"\n\n    def to_numpy(self) -> Any:  # noqa: GL08\n        pass\n\n\n@runtime_checkable\nclass SupportsPrivateToNumPy(Protocol):  # noqa: PR01\n    \"\"\"Structural type for objects with a ``_to_numpy`` method (note the leading underscore).\"\"\"\n\n    def _to_numpy(self) -> Any:  # noqa: GL08\n        pass\n\n\nMIN_RAY_VERSION = version.parse(\"2.10.0\")\nMIN_DASK_VERSION = version.parse(\"2.22.0\")\nMIN_UNIDIST_VERSION = version.parse(\"0.2.1\")\n\nPANDAS_API_URL_TEMPLATE = f\"https://pandas.pydata.org/pandas-docs/version/{pandas.__version__}/reference/api/{{}}.html\"\n\n# The '__reduced__' name is used internally by the query compiler as a column name to\n# represent pandas Series objects that are not explicitly assigned a name, so as to\n# distinguish between an N-element series and 1xN dataframe.\nMODIN_UNNAMED_SERIES_LABEL = \"__reduced__\"\n\n\ndef _make_api_url(token: str) -> str:\n    \"\"\"\n    Generate the link to pandas documentation.\n\n    Parameters\n    ----------\n    token : str\n        Part of URL to use for generation.\n\n    Returns\n    -------\n    str\n        URL to pandas doc.\n\n    Notes\n    -----\n    This function is extracted for better testability.\n    \"\"\"\n    return PANDAS_API_URL_TEMPLATE.format(token)\n\n\ndef _get_indent(doc: str) -> int:\n    \"\"\"\n    Compute indentation in docstring.\n\n    Parameters\n    ----------\n    doc : str\n        The docstring to compute indentation for.\n\n    Returns\n    -------\n    int\n        Minimal indent (excluding empty lines).\n    \"\"\"\n    indents = _get_indents(doc)\n    return min(indents) if indents else 0\n\n\ndef _get_indents(source: Union[list, str]) -> list:\n    \"\"\"\n    Compute indentation for each line of the source string.\n\n    Parameters\n    ----------\n    source : str or list of str\n        String to compute indents for. Passed list considered\n        as a list of lines of the source string.\n\n    Returns\n    -------\n    list of ints\n        List containing computed indents for each line.\n    \"\"\"\n    indents = []\n\n    if not isinstance(source, list):\n        source = source.splitlines()\n\n    for line in source:\n        if not line.strip():\n            continue\n        for pos, ch in enumerate(line):\n            if ch != \" \":\n                break\n        indents.append(pos)\n    return indents\n\n\ndef format_string(template: str, **kwargs: str) -> str:\n    \"\"\"\n    Insert passed values at the corresponding placeholders of the specified template.\n\n    In contrast with the regular ``str.format()`` this function computes proper\n    indents for the placeholder values.\n\n    Parameters\n    ----------\n    template : str\n        Template to substitute values in.\n    **kwargs : dict\n        Dictionary that maps placeholder names with values.\n\n    Returns\n    -------\n    str\n        Formated string.\n    \"\"\"\n    # We want to change indentation only for those values which placeholders are located\n    # at the start of the line, in that case the placeholder sets an indentation\n    # that the filling value has to obey.\n    # RegExp determining placeholders located at the beginning of the line.\n    regex = r\"^( *)\\{(\\w+)\\}\"\n    for line in template.splitlines():\n        if line.strip() == \"\":\n            continue\n        match = re.search(regex, line)\n        if match is None:\n            continue\n        nspaces = len(match.group(1))\n        key = match.group(2)\n\n        value = kwargs.get(key)\n        if not value:\n            continue\n        value = dedent(value)\n\n        # Since placeholder is located at the beginning of a new line,\n        # it already has '\\n' before it, so to avoid double new lines\n        # we want to discard the first leading '\\n' at the value line,\n        # the others leading '\\n' are considered as being put on purpose\n        if value[0] == \"\\n\":\n            value = value[1:]\n        # `.splitlines()` doesn't preserve last empty line,\n        # so we have to restore it further\n        value_lines = value.splitlines()\n        # We're not indenting the first line of the value, since it's already indented\n        # properly because of the placeholder indentation.\n        indented_lines = [\n            indent(line, \" \" * nspaces) if line != \"\\n\" else line\n            for line in value_lines[1:]\n        ]\n        # If necessary, restoring the last line dropped by `.splitlines()`\n        if value[-1] == \"\\n\":\n            indented_lines += [\" \" * nspaces]\n\n        indented_value = \"\\n\".join([value_lines[0], *indented_lines])\n        kwargs[key] = indented_value\n\n    return template.format(**kwargs)\n\n\ndef align_indents(source: str, target: str) -> str:\n    \"\"\"\n    Align indents of two strings.\n\n    Parameters\n    ----------\n    source : str\n        Source string to align indents with.\n    target : str\n        Target string to align indents.\n\n    Returns\n    -------\n    str\n        Target string with indents aligned with the source.\n    \"\"\"\n    source_indent = _get_indent(source)\n    target = dedent(target)\n    return indent(target, \" \" * source_indent)\n\n\ndef append_to_docstring(message: str) -> Callable[[Fn], Fn]:\n    \"\"\"\n    Create a decorator which appends passed message to the function's docstring.\n\n    Parameters\n    ----------\n    message : str\n        Message to append.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n\n    def decorator(func: Fn) -> Fn:\n        to_append = align_indents(func.__doc__ or \"\", message)\n        return Appender(to_append)(func)\n\n    return decorator\n\n\ndef _replace_doc(\n    source_obj: object,\n    target_obj: object,\n    overwrite: bool,\n    apilink: Optional[Union[str, List[str]]],\n    parent_cls: Optional[Fn] = None,\n    attr_name: Optional[str] = None,\n) -> None:\n    \"\"\"\n    Replace docstring in `target_obj`, possibly taking from `source_obj` and augmenting.\n\n    Can append the link to pandas API online documentation.\n\n    Parameters\n    ----------\n    source_obj : object\n        Any object from which to take docstring from.\n    target_obj : object\n        The object which docstring to replace.\n    overwrite : bool\n        Forces replacing the docstring with the one from `source_obj` even\n        if `target_obj` has its own non-empty docstring.\n    apilink : str | List[str], optional\n        If non-empty, insert the link(s) to pandas API documentation.\n        Should be the prefix part in the URL template, e.g. \"pandas.DataFrame\".\n    parent_cls : class, optional\n        If `target_obj` is an attribute of a class, `parent_cls` should be that class.\n        This is used for generating the API URL as well as for handling special cases\n        like `target_obj` being a property or a cached_property.\n    attr_name : str, optional\n        Gives the name to `target_obj` if it's an attribute of `parent_cls`.\n        Needed to handle some special cases and in most cases could be determined automatically.\n    \"\"\"\n    if isinstance(target_obj, (staticmethod, classmethod)):\n        # we cannot replace docs on decorated objects, we must replace them\n        # on original functions instead\n        target_obj = target_obj.__func__\n\n    source_doc = source_obj.__doc__ or \"\"\n    target_doc = target_obj.__doc__ or \"\"\n    overwrite = overwrite or not target_doc\n    doc = source_doc if overwrite else target_doc\n    if doc == \"\":\n        # Empty docstrings do not need to be inherited\n        return\n\n    if parent_cls and not attr_name:\n        if isinstance(target_obj, property):\n            attr_name = target_obj.fget.__name__  # type: ignore[union-attr]\n        elif isinstance(target_obj, functools.cached_property):\n            attr_name = target_obj.func.__name__\n        elif isinstance(target_obj, (staticmethod, classmethod)):\n            attr_name = target_obj.__func__.__name__\n        else:\n            attr_name = target_obj.__name__  # type: ignore[attr-defined]\n\n    if (\n        source_doc.strip()\n        and apilink\n        and \"pandas API documentation for \" not in target_doc\n        and (not (attr_name or \"\").startswith(\"_\"))\n    ):\n        apilink_l = [apilink] if not isinstance(apilink, list) and apilink else apilink\n        links = []\n        for link in apilink_l:\n            if attr_name:\n                token = f\"{link}.{attr_name}\"\n            else:\n                token = link\n            url = _make_api_url(token)\n            links.append(f\"`{token} <{url}>`_\")\n\n        indent_line = \" \" * _get_indent(doc)\n        notes_section = f\"\\n{indent_line}Notes\\n{indent_line}-----\\n\"\n\n        url_line = f\"{indent_line}See pandas API documentation for {', '.join(links)} for more.\\n\"\n        notes_section_with_url = notes_section + url_line\n\n        if notes_section in doc:\n            doc = doc.replace(notes_section, notes_section_with_url)\n        else:\n            doc += notes_section_with_url\n\n    if parent_cls and isinstance(target_obj, property):\n        if overwrite:\n            target_obj.fget.__doc_inherited__ = True  # type: ignore[union-attr]\n        assert attr_name is not None\n        setattr(\n            parent_cls,\n            attr_name,\n            property(target_obj.fget, target_obj.fset, target_obj.fdel, doc),\n        )\n    elif parent_cls and isinstance(target_obj, functools.cached_property):\n        if overwrite:\n            target_obj.func.__doc_inherited__ = True  # type: ignore[attr-defined]\n        assert attr_name is not None\n        target_obj.func.__doc__ = doc\n        setattr(\n            parent_cls,\n            attr_name,\n            functools.cached_property(target_obj.func),\n        )\n        # otherwise: `TypeError: Cannot use cached_property instance without calling __set_name__ on it.`\n        getattr(parent_cls, attr_name).__set_name__(parent_cls, attr_name)\n    else:\n        if overwrite:\n            target_obj.__doc_inherited__ = True  # type: ignore[attr-defined]\n        target_obj.__doc__ = doc\n\n\n# This is a map from objects whose docstrings we are overriding to functions that\n# take a DocModule string and override the docstring according to the\n# DocModule. When we update DocModule, we can use this map to update all\n# inherited docstrings.\n_docstring_inheritance_calls: list[Callable[[str], None]] = []\n\n# This is a set of (class, attribute_name) pairs whose docstrings we have\n# already replaced since we last updated DocModule. Note that we don't store\n# the attributes themselves since we replace property attributes instead of\n# modifying them in place:\n# https://github.com/modin-project/modin/blob/e9dbcc127913db77473a83936e8b6bb94ef84f0d/modin/utils.py#L353\n_attributes_with_docstrings_replaced: set[tuple[type, str]] = set()\n\n\ndef _documentable_obj(obj: object) -> bool:\n    \"\"\"\n    Check whether we can replace the docstring of `obj`.\n\n    Parameters\n    ----------\n    obj : object\n        Object whose docstring we want to replace.\n\n    Returns\n    -------\n    bool\n        Whether we can replace the docstring.\n    \"\"\"\n    return bool(\n        callable(obj)\n        and not inspect.isclass(obj)\n        or (isinstance(obj, property) and obj.fget)\n        or (isinstance(obj, functools.cached_property))\n        or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)\n    )\n\n\ndef _update_inherited_docstrings(doc_module: DocModule) -> None:\n    \"\"\"\n    Update all inherited docstrings.\n\n    Parameters\n    ----------\n    doc_module : DocModule\n        The current DocModule.\n    \"\"\"\n    _attributes_with_docstrings_replaced.clear()\n    _doc_module = doc_module.get()\n    for doc_inheritance_call in _docstring_inheritance_calls:\n        doc_inheritance_call(doc_module=_doc_module)  # type: ignore[call-arg]\n\n\ndef _inherit_docstrings_in_place(\n    cls_or_func: Fn,\n    doc_module: str,\n    parent: object,\n    excluded: List[object],\n    overwrite_existing: bool = False,\n    apilink: Optional[Union[str, List[str]]] = None,\n) -> None:\n    \"\"\"\n    Replace `cls_or_func` docstrings with `parent` docstrings in place.\n\n    Parameters\n    ----------\n    cls_or_func : Fn\n        The class or function whose docstrings we need to update.\n    doc_module : str\n        The docs module.\n    parent : object\n        Parent object from which the decorated object inherits __doc__.\n    excluded : list, default: []\n        List of parent objects from which the class does not\n        inherit docstrings.\n    overwrite_existing : bool, default: False\n        Allow overwriting docstrings that already exist in\n        the decorated class.\n    apilink : str | List[str], optional\n        If non-empty, insert the link(s) to pandas API documentation.\n        Should be the prefix part in the URL template, e.g. \"pandas.DataFrame\".\n    \"\"\"\n    # Import the docs module and get the class (e.g. `DataFrame`).\n    imported_doc_module = importlib.import_module(doc_module)\n    # Set the default parent so we can use it in case some docs are missing from\n    # parent module.\n    default_parent = parent\n    # Try to get the parent object from the doc module, and if it isn't there,\n    # get it from parent instead. We only do this if we are overriding pandas\n    # documentation. We don't touch other docs.\n    if doc_module != DocModule.default and \"pandas\" in str(\n        getattr(parent, \"__module__\", \"\")\n    ):\n        parent_name = (\n            # DocModule should use the class BasePandasDataset to override the\n            # docstrings of BasePandasDataset, even if BasePandasDataset\n            # normally inherits docstrings from a different `parent`.\n            \"BasePandasDataset\"\n            if getattr(cls_or_func, \"__name__\", \"\") == \"BasePandasDataset\"\n            # For other classes, override docstrings with the class that has the\n            # same name as the `parent` class, e.g. DataFrame inherits\n            # docstrings from doc_module.DataFrame.\n            else getattr(parent, \"__name__\", \"\")\n        )\n        parent = getattr(imported_doc_module, parent_name, parent)\n    if parent != default_parent:\n        # Reset API link in case the docs are overridden.\n        apilink = None\n        overwrite_existing = True\n\n    if parent not in excluded:\n        _replace_doc(parent, cls_or_func, overwrite_existing, apilink)\n\n    if not isinstance(cls_or_func, types.FunctionType):\n        seen = set()\n        for base in cls_or_func.__mro__:  # type: ignore[attr-defined]\n            if base is object:\n                continue\n            for attr, obj in base.__dict__.items():\n                # only replace docstrings once to prevent https://github.com/modin-project/modin/issues/7113\n                if attr in seen or (base, attr) in _attributes_with_docstrings_replaced:\n                    continue\n                seen.add(attr)\n                if hasattr(obj, \"_wrapped_superclass_method\"):\n                    # If this method originally comes from a superclass, we get\n                    # docstrings directly from the wrapped superclass method\n                    # rather than inheriting docstrings from the usual parent.\n                    # For example, for BasePandasDataset and Series, the behavior is:\n                    # - If Series inherits a method from BasePandasDataset, then\n                    #   it gets the docstring from that method in BasePandasDataset.\n                    # - If Series overrides a method or defines its own method\n                    #   that's not present in BasePandasDataset, it follows the usual\n                    #   inheritance hierarchy of `parent` and `default_parent`.\n                    parent_obj = obj._wrapped_superclass_method\n                else:\n                    # Try to get the attribute from the docs class first, then\n                    # from the default parent (pandas), and if it's not in either,\n                    # set `parent_obj` to `None`.\n                    parent_obj = getattr(\n                        parent, attr, getattr(default_parent, attr, None)\n                    )\n                    if (\n                        parent_obj in excluded\n                        or not _documentable_obj(parent_obj)\n                        or not _documentable_obj(obj)\n                    ):\n                        continue\n\n                _replace_doc(\n                    parent_obj,\n                    obj,\n                    overwrite_existing,\n                    apilink,\n                    parent_cls=base,\n                    attr_name=attr,\n                )\n\n                _attributes_with_docstrings_replaced.add((base, attr))\n\n\ndef _inherit_docstrings(\n    parent: object,\n    excluded: List[object] = [],\n    overwrite_existing: bool = False,\n    apilink: Optional[Union[str, List[str]]] = None,\n) -> Callable[[Fn], Fn]:\n    \"\"\"\n    Create a decorator which overwrites decorated object docstring(s).\n\n    It takes `parent` __doc__ attribute. Also overwrites __doc__ of\n    methods and properties defined in the target or its ancestors if it's a class\n    with the __doc__ of matching methods and properties from the `parent`.\n\n    Parameters\n    ----------\n    parent : object\n        Parent object from which the decorated object inherits __doc__.\n    excluded : list, default: []\n        List of parent objects from which the class does not\n        inherit docstrings.\n    overwrite_existing : bool, default: False\n        Allow overwriting docstrings that already exist in\n        the decorated class.\n    apilink : str | List[str], optional\n        If non-empty, insert the link(s) to pandas API documentation.\n        Should be the prefix part in the URL template, e.g. \"pandas.DataFrame\".\n\n    Returns\n    -------\n    callable\n        Decorator which replaces the decorated object's documentation with `parent` documentation.\n\n    Notes\n    -----\n    Keep in mind that the function will override docstrings even for attributes which\n    are not defined in target class (but are defined in the ancestor class),\n    which means that ancestor class attribute docstrings could also change.\n    \"\"\"\n\n    def decorator(cls_or_func: Fn) -> Fn:\n        inherit_docstring_in_place = functools.partial(\n            _inherit_docstrings_in_place,\n            cls_or_func=cls_or_func,\n            parent=parent,\n            excluded=excluded,\n            overwrite_existing=overwrite_existing,\n            apilink=apilink,\n        )\n        inherit_docstring_in_place(doc_module=DocModule.get())\n        _docstring_inheritance_calls.append(inherit_docstring_in_place)\n        return cls_or_func\n\n    return decorator\n\n\nDocModule.subscribe(_update_inherited_docstrings)\n\n\ndef expanduser_path_arg(argname: str) -> Callable[[Fn], Fn]:\n    \"\"\"\n    Decorate a function replacing its path argument with \"user-expanded\" value.\n\n    Parameters\n    ----------\n    argname : str\n        Name of the argument which is containing a path to be expanded.\n\n    Returns\n    -------\n    callable\n        Decorator which performs the replacement.\n    \"\"\"\n\n    def decorator(func: Fn) -> Fn:\n        signature = inspect.signature(func)\n        assert (\n            getattr(signature.parameters.get(argname), \"name\", None) == argname\n        ), f\"Function {func} does not take '{argname}' as argument\"\n\n        @functools.wraps(func)\n        def wrapped(*args: tuple, **kw: dict) -> Any:\n            params = signature.bind(*args, **kw)\n            if patharg := params.arguments.get(argname, None):\n                if isinstance(patharg, str) and patharg.startswith(\"~\"):\n                    params.arguments[argname] = os.path.expanduser(patharg)\n                elif isinstance(patharg, Path):\n                    params.arguments[argname] = patharg.expanduser()\n                return func(*params.args, **params.kwargs)\n            return func(*args, **kw)\n\n        return wrapped  # type: ignore[return-value]\n\n    return decorator\n\n\ndef func_from_deprecated_location(\n    func_name: str, module: str, deprecation_message: str\n) -> Callable:\n    \"\"\"\n    Create a function that decorates a function ``module.func_name`` with a ``FutureWarning``.\n\n    Parameters\n    ----------\n    func_name : str\n        Function name to decorate.\n    module : str\n        Module where the function is located.\n    deprecation_message : str\n        Message to print in a future warning.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n\n    def deprecated_func(*args: tuple[Any], **kwargs: dict[Any, Any]) -> Any:\n        \"\"\"Call deprecated function.\"\"\"\n        func = getattr(importlib.import_module(module), func_name)\n        # using 'FutureWarning' as 'DeprecationWarnings' are filtered out by default\n        warnings.warn(deprecation_message, FutureWarning)\n        return func(*args, **kwargs)\n\n    return deprecated_func\n\n\ndef hashable(obj: bool) -> bool:\n    \"\"\"\n    Return whether the `obj` is hashable.\n\n    Parameters\n    ----------\n    obj : object\n        The object to check.\n\n    Returns\n    -------\n    bool\n    \"\"\"\n    # Happy path: if there's no __hash__ method, the object definitely isn't hashable\n    if not hasattr(obj, \"__hash__\"):\n        return False\n    # Otherwise, we may still need to check for type errors, as in the case of `hash(([],))`.\n    # (e.g. an unhashable object inside a tuple)\n    try:\n        hash(obj)\n    except TypeError:\n        return False\n    return True\n\n\ndef try_cast_to_pandas(obj: Any, squeeze: bool = False) -> Any:\n    \"\"\"\n    Convert `obj` and all nested objects from Modin to pandas if it is possible.\n\n    If no convertion possible return `obj`.\n\n    Parameters\n    ----------\n    obj : object\n        Object to convert from Modin to pandas.\n    squeeze : bool, default: False\n        Squeeze the converted object(s) before returning them.\n\n    Returns\n    -------\n    object\n        Converted object.\n    \"\"\"\n    if isinstance(obj, SupportsPublicToPandas) or hasattr(obj, \"modin\"):\n        result = obj.modin.to_pandas() if hasattr(obj, \"modin\") else obj.to_pandas()\n        if squeeze:\n            result = result.squeeze(axis=1)\n\n        # QueryCompiler/low-level ModinFrame case, it doesn't have logic about convertion to Series\n        if (\n            isinstance(getattr(result, \"name\", None), str)\n            and result.name == MODIN_UNNAMED_SERIES_LABEL\n        ):\n            result.name = None\n        return result\n    if isinstance(obj, (list, tuple)):\n        return type(obj)([try_cast_to_pandas(o, squeeze=squeeze) for o in obj])\n    if isinstance(obj, dict):\n        return {k: try_cast_to_pandas(v, squeeze=squeeze) for k, v in obj.items()}\n    if callable(obj):\n        module_hierarchy = getattr(obj, \"__module__\", \"\").split(\".\")\n        fn_name = getattr(obj, \"__name__\", None)\n        if fn_name and module_hierarchy[0] == \"modin\":\n            return (\n                getattr(pandas.DataFrame, fn_name, obj)\n                if module_hierarchy[-1] == \"dataframe\"\n                else getattr(pandas.Series, fn_name, obj)\n            )\n    return obj\n\n\ndef execute(*objs: Iterable[Any]) -> None:\n    \"\"\"\n    Trigger the lazy computations for each obj in `objs`, if any, and wait for them to complete.\n\n    Parameters\n    ----------\n    *objs : Iterable[Any]\n        A collection of objects to trigger lazy computations.\n    \"\"\"\n    for obj in objs:\n        if not hasattr(obj, \"_query_compiler\"):\n            continue\n        query_compiler = obj._query_compiler\n        query_compiler.execute()\n\n\ndef wrap_into_list(*args: Any, skipna: bool = True) -> List[Any]:\n    \"\"\"\n    Wrap a sequence of passed values in a flattened list.\n\n    If some value is a list by itself the function appends its values\n    to the result one by one instead inserting the whole list object.\n\n    Parameters\n    ----------\n    *args : tuple\n        Objects to wrap into a list.\n    skipna : bool, default: True\n        Whether or not to skip nan or None values.\n\n    Returns\n    -------\n    list\n        Passed values wrapped in a list.\n    \"\"\"\n\n    def isnan(o: Any) -> bool:\n        return o is None or (isinstance(o, float) and np.isnan(o))\n\n    res = []\n    for o in args:\n        if skipna and isnan(o):\n            continue\n        if isinstance(o, list):\n            res.extend(o)\n        else:\n            res.append(o)\n    return res\n\n\ndef wrap_udf_function(func: Callable) -> Callable:\n    \"\"\"\n    Create a decorator that makes `func` return pandas objects instead of Modin.\n\n    Parameters\n    ----------\n    func : callable\n        Function to wrap.\n\n    Returns\n    -------\n    callable\n    \"\"\"\n\n    def wrapper(*args: Any, **kwargs: Any) -> Any:\n        result = func(*args, **kwargs)\n        # if user accidently returns modin DataFrame or Series\n        # casting it back to pandas to properly process\n        return try_cast_to_pandas(result)\n\n    wrapper.__name__ = func.__name__\n    return wrapper\n\n\ndef get_current_execution() -> str:\n    \"\"\"\n    Return current execution name as a string.\n\n    Returns\n    -------\n    str\n        Returns <StorageFormat>On<Engine>-like string.\n    \"\"\"\n    return f\"{StorageFormat.get()}On{Engine.get()}\"\n\n\ndef instancer(_class: Callable[[], T]) -> T:\n    \"\"\"\n    Create a dummy instance each time this is imported.\n\n    This serves the purpose of allowing us to use all of pandas plotting methods\n    without aliasing and writing each of them ourselves.\n\n    Parameters\n    ----------\n    _class : object\n\n    Returns\n    -------\n    object\n        Instance of `_class`.\n    \"\"\"\n    return _class()\n\n\ndef import_optional_dependency(name: str, message: str) -> types.ModuleType:\n    \"\"\"\n    Import an optional dependecy.\n\n    Parameters\n    ----------\n    name : str\n        The module name.\n    message : str\n        Additional text to include in the ImportError message.\n\n    Returns\n    -------\n    module : ModuleType\n        The imported module.\n    \"\"\"\n    try:\n        return importlib.import_module(name)\n    except ImportError:\n        raise ImportError(\n            f\"Missing optional dependency '{name}'. {message} \"\n            + f\"Use pip or conda to install {name}.\"\n        ) from None\n\n\ndef _get_modin_deps_info() -> Mapping[str, Optional[JSONSerializable]]:\n    \"\"\"\n    Return Modin-specific dependencies information as a JSON serializable dictionary.\n\n    Returns\n    -------\n    Mapping[str, Optional[pandas.JSONSerializable]]\n        The dictionary of Modin dependencies and their versions.\n    \"\"\"\n    import modin  # delayed import so modin.__init__ is fully initialized\n\n    result = {\"modin\": modin.__version__}\n\n    for pkg_name, pkg_version in [\n        (\"ray\", MIN_RAY_VERSION),\n        (\"dask\", MIN_DASK_VERSION),\n        (\"distributed\", MIN_DASK_VERSION),\n    ]:\n        try:\n            pkg = importlib.import_module(pkg_name)\n        except ImportError:\n            result[pkg_name] = None\n        else:\n            result[pkg_name] = pkg.__version__ + (\n                f\" (outdated; >={pkg_version} required)\"\n                if version.parse(pkg.__version__) < pkg_version\n                else \"\"\n            )\n    return result\n\n\ndef show_versions(as_json: Union[str, bool] = False) -> None:\n    \"\"\"\n    Provide useful information, important for bug reports.\n\n    It comprises info about hosting operation system, pandas version,\n    and versions of other installed relative packages.\n\n    Parameters\n    ----------\n    as_json : str or bool, default: False\n        * If False, outputs info in a human readable form to the console.\n        * If str, it will be considered as a path to a file.\n          Info will be written to that file in JSON format.\n        * If True, outputs info in JSON format to the console.\n\n    Notes\n    -----\n    This is mostly a copy of pandas.show_versions() but adds separate listing\n    of Modin-specific dependencies.\n    \"\"\"\n    sys_info = _get_sys_info()\n    sys_info[\"commit\"] = get_versions()[\"full-revisionid\"]\n    modin_deps = _get_modin_deps_info()\n    deps = _get_dependency_info()\n\n    if as_json:\n        j = {\n            \"system\": sys_info,\n            \"modin dependencies\": modin_deps,\n            \"dependencies\": deps,\n        }\n\n        if as_json is True:\n            sys.stdout.writelines(json.dumps(j, indent=2))\n        else:\n            assert isinstance(as_json, str)  # needed for mypy\n            with codecs.open(as_json, \"wb\", encoding=\"utf8\") as f:\n                json.dump(j, f, indent=2)\n\n    else:\n        assert isinstance(sys_info[\"LOCALE\"], dict)  # needed for mypy\n        language_code = sys_info[\"LOCALE\"][\"language-code\"]\n        encoding = sys_info[\"LOCALE\"][\"encoding\"]\n        sys_info[\"LOCALE\"] = f\"{language_code}.{encoding}\"\n\n        maxlen = max(max(len(x) for x in d) for d in (deps, modin_deps))\n        print(\"\\nINSTALLED VERSIONS\\n------------------\")  # noqa: T201\n        for k, v in sys_info.items():\n            print(f\"{k:<{maxlen}}: {v}\")  # noqa: T201\n        for name, d in ((\"Modin\", modin_deps), (\"pandas\", deps)):\n            print(f\"\\n{name} dependencies\\n{'-' * (len(name) + 13)}\")  # noqa: T201\n            for k, v in d.items():\n                print(f\"{k:<{maxlen}}: {v}\")  # noqa: T201\n\n\nclass ModinAssumptionError(Exception):\n    \"\"\"An exception that allows us defaults to pandas if any assumption fails.\"\"\"\n\n    pass\n\n\ndef _maybe_warn_on_default(message: str = \"\", *, reason: str = \"\") -> None:\n    \"\"\"\n    Raise a warning on an operation that defaults to pandas if necessary.\n\n    This checks the query compiler used by the current configured active backend, and prints\n    a warning message about defaulting to pandas if needed.\n\n    Parameters\n    ----------\n    message : str, default: \"\"\n        The message to show.\n    reason : str, default: \"\"\n        The reason for defaulting.\n    \"\"\"\n    # Avoids a module-level circular import\n    from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\n\n    FactoryDispatcher.get_factory().io_cls.query_compiler_cls._maybe_warn_on_default(\n        message=message, reason=reason\n    )\n\n\nclass classproperty:\n    \"\"\"\n    Decorator that allows creating read-only class properties.\n\n    Parameters\n    ----------\n    func : method\n\n    Examples\n    --------\n    >>> class A:\n    ...     field = 10\n    ...     @classproperty\n    ...     def field_x2(cls):\n    ...             return cls.field * 2\n    ...\n    >>> print(A.field_x2)\n    20\n    \"\"\"\n\n    def __init__(self, func: Any):\n        self.fget = func\n\n    def __get__(self, instance: Any, owner: Any) -> Any:  # noqa: GL08\n        return self.fget(owner)\n\n\ndef reload_modin() -> None:\n    \"\"\"\n    Reload all previously imported Modin modules.\n\n    The call to this function is required\n    if an execution engine has been shut down and\n    is going to be started up once again.\n    \"\"\"\n    modules = sys.modules.copy()\n    for name, module in modules.items():\n        if name.startswith(\"modin\"):\n            importlib.reload(module)\n"
  },
  {
    "path": "modin-autoimport-pandas.pth",
    "content": "import os; os.environ.get(\"__MODIN_AUTOIMPORT_PANDAS__\", None) and __import__(\"pandas\")\n"
  },
  {
    "path": "mypy.ini",
    "content": "[mypy]\n# Ignoring missing imports can be dangerous, should do this at module-by-module level\nignore_missing_imports = True\nshow_error_codes = True\nshow_column_numbers = True\ncheck_untyped_defs = True\nfollow_imports = silent\n\n# be strict\ndisallow_untyped_calls=True\ndisallow_untyped_defs=True\nstrict_optional=True\nwarn_no_return=True\nwarn_redundant_casts=True\nwarn_unused_ignores=True\ndisallow_any_generics=False\nwarn_unreachable=True\n\n# We will add more files over time to increase coverage\nfiles =\n        modin/config/,\n        modin/core/dataframe/base/,\n        modin/logging/,\n        modin/distributed/,\n        modin/*.py\n\nexclude = .*/tests/.*\n"
  },
  {
    "path": "requirements/env_unidist_linux.yml",
    "content": "name: modin_on_unidist\nchannels:\n  - conda-forge\ndependencies:\n  - pip\n\n  # required dependencies\n  - pandas>=2.2,<2.4\n  - numpy>=1.22.4\n  - unidist-mpi>=0.2.1\n  - mpich\n  - fsspec>=2022.11.0\n  - packaging>=21.0\n  - psutil>=5.8.0\n\n  # optional dependencies\n  # NOTE Keep the ray and dask dependencies in sync with the Windows Unidist\n  # environment and the general environment-dev.yml.\n  # We include the ray and dask dependencies here because we want to test\n  # switching dataframe backends to ray or dask.\n  - ray-core>=2.10.0,<3\n  # workaround for https://github.com/conda/conda/issues/11744\n  - grpcio!=1.45.*\n  - grpcio!=1.46.*\n  - dask>=2.22.0\n  - pyarrow>=10.0.1\n  - xarray>=2022.12.0\n  - jinja2>=3.1.2\n  - scipy>=1.10.0\n  - s3fs>=2022.11.0\n  - lxml>=4.9.2\n  - openpyxl>=3.1.0\n  - xlrd>=2.0.1\n  - matplotlib>=3.6.3\n  - sqlalchemy>=2.0.0\n  - pandas-gbq>=0.19.0\n  - pytables>=3.8.0\n  # pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429\n  - pymssql>=2.1.5,!=2.2.8\n  - psycopg2>=2.9.6\n  - fastparquet>=2022.12.0\n  - tqdm>=4.60.0\n  - numexpr>=2.8.4\n\n  # dependencies for making release\n  - pygithub>=v1.58.0\n  - pygit2>=1.9.2\n\n  # test dependencies\n  - coverage>=7.1.0\n  - moto>=4.1.0\n  - pytest>=7.3.2\n  - pytest-cov>=4.0.0\n  - pytest-xdist>=3.2.0\n  - typing_extensions\n\n  # code linters\n  - black>=24.1.0\n  - flake8>=6.0.0\n  - flake8-no-implicit-concat>=0.3.4\n  - flake8-print>=5.0.0\n  - mypy>=1.0.0\n  - pandas-stubs>=2.0.0\n\n  - pip:\n      # Fixes breaking ipywidgets changes, but didn't release yet.\n      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5\n      - connectorx>=0.2.6a4\n      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.\n      - numpydoc==1.6.0\n"
  },
  {
    "path": "requirements/env_unidist_win.yml",
    "content": "name: modin_on_unidist\nchannels:\n  - conda-forge\ndependencies:\n  - pip\n\n  # required dependencies\n  - pandas>=2.2,<2.4\n  - numpy>=1.22.4\n  - unidist-mpi>=0.2.1\n  - msmpi\n  - fsspec>=2022.11.0\n  - packaging>=21.0\n  - psutil>=5.8.0\n\n  # optional dependencies\n  # NOTE Keep the ray and dask dependencies in sync with the Linux Unidist\n  # environment and the general environment-dev.yml.\n  # We include the ray and dask dependencies here because we want to test\n  # switching dataframe backends to ray or dask.\n  - ray-core>=2.10.0,<3\n  # workaround for https://github.com/conda/conda/issues/11744\n  - grpcio!=1.45.*\n  - grpcio!=1.46.*\n  - dask>=2.22.0\n  - pyarrow>=10.0.1\n  - xarray>=2022.12.0\n  - jinja2>=3.1.2\n  - scipy>=1.10.0\n  - s3fs>=2022.11.0\n  - lxml>=4.9.2\n  - openpyxl>=3.1.0\n  - xlrd>=2.0.1\n  - matplotlib>=3.6.3\n  - sqlalchemy>=2.0.0\n  - pandas-gbq>=0.19.0\n  - pytables>=3.8.0\n  # pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429\n  - pymssql>=2.1.5,!=2.2.8\n  - psycopg2>=2.9.6\n  - fastparquet>=2022.12.0\n  - tqdm>=4.60.0\n  - numexpr>=2.8.4\n\n  # dependencies for making release\n  - pygithub>=v1.58.0\n  - pygit2>=1.9.2\n\n  # test dependencies\n  - coverage>=7.1.0\n  - moto>=4.1.0\n  - pytest>=7.3.2\n  - pytest-cov>=4.0.0\n  - pytest-xdist>=3.2.0\n  - typing_extensions\n\n  # code linters\n  - black>=24.1.0\n  - flake8>=6.0.0\n  - flake8-no-implicit-concat>=0.3.4\n  - flake8-print>=5.0.0\n  - mypy>=1.0.0\n  - pandas-stubs>=2.0.0\n\n  - pip:\n      - dataframe-api-compat>=0.2.7\n      # Fixes breaking ipywidgets changes, but didn't release yet.\n      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5\n      - connectorx>=0.2.6a4\n      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.\n      - numpydoc==1.6.0\n"
  },
  {
    "path": "requirements/requirements-no-engine.yml",
    "content": "channels:\n  - conda-forge\ndependencies:\n  - pip\n\n  # required dependencies\n  - pandas>=2.2,<2.4\n  - numpy>=1.22.4\n  - fsspec>=2022.11.0\n  - packaging>=21.0\n  - psutil>=5.8.0\n\n  # optional dependencies\n  - pyarrow>=10.0.1\n  - xarray>=2022.12.0\n  - jinja2>=3.1.2\n  - scipy>=1.10.0\n  - s3fs>=2022.11.0\n  - lxml>=4.9.2\n  - openpyxl>=3.1.0\n  - xlrd>=2.0.1\n  - matplotlib>=3.6.3\n  - sqlalchemy>=2.0.0\n  - pandas-gbq>=0.19.0\n  - pytables>=3.8.0\n  - tqdm>=4.60.0\n  - numexpr>=2.8.4\n\n  # dependencies for making release\n  - pygithub>=v1.58.0\n  - pygit2>=1.9.2\n\n  # test dependencies\n  - coverage>=7.1.0\n  - moto>=4.1.0\n  - pytest>=7.3.2\n  - pytest-cov>=4.0.0\n  - pytest-xdist>=3.2.0\n  - typing_extensions\n\n  # code linters\n  - black>=24.1.0\n  - flake8>=6.0.0\n  - flake8-no-implicit-concat>=0.3.4\n  - flake8-print>=5.0.0\n\n  - pip:\n      - dataframe-api-compat>=0.2.7\n      - asv==0.5.1\n      # no conda package for windows\n      - connectorx>=0.2.6a4\n      # Fixes breaking ipywidgets changes, but didn't release yet.\n      - git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5\n      # The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.\n      - numpydoc==1.6.0\n"
  },
  {
    "path": "requirements-dev.txt",
    "content": "## required dependencies\npandas>=2.2,<2.4\nnumpy>=1.22.4\nfsspec>=2022.11.0\npackaging>=21.0\npsutil>=5.8.0\n\n## optional dependencies\nray>=2.10.0,<3\npyarrow>=10.0.1\ndask[complete]>=2.22.0\ndistributed>=2.22.0\nxarray>=2022.12.0\nJinja2>=3.1.2\nscipy>=1.10.0\ns3fs>=2022.11.0\nlxml>=4.9.2\nopenpyxl>=3.1.0\nxlrd>=2.0.1\nmatplotlib>=3.6.3\nsqlalchemy>=2.0.0\npandas-gbq>=0.19.0\ntables>=3.7.0\n# pymssql==2.2.8 broken: https://github.com/modin-project/modin/issues/6429\npymssql>=2.1.5,!=2.2.8\n# psycopg devs recommend the other way of installation for production\n# but this is ok for testing and development\npsycopg2-binary>=2.9.3\nconnectorx>=0.2.6a4\nfastparquet>=2022.12.0\nflask-cors\ntqdm>=4.60.0\nnumexpr>=2.8.4\n# Latest modin-spreadsheet with widget fix\ngit+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5\ndataframe-api-compat>=0.2.7\n\n## dependencies for making release\nPyGithub>=1.58.0\npygit2>=1.9.2\n\n## test dependencies\nasv==0.5.1\ncoverage>=7.1.0\nfuzzydata>=0.0.11\n# The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.\nnumpydoc==1.1.0\nmoto>=4.1.0\npytest>=7.3.2\npytest-benchmark>=4.0.0\npytest-cov>=4.0.0\npytest-xdist>=3.2.0\ntyping_extensions\n\n## code linters\nblack>=24.1.0\nflake8>=6.0.0\nflake8-no-implicit-concat>=0.3.4\nflake8-print>=5.0.0\nmypy>=1.0.0\npandas-stubs>=2.0.0\nisort>=5.12\n"
  },
  {
    "path": "scripts/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "scripts/doc_checker.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n\"\"\"\nValidate docstrings using pydocstyle and numpydoc.\n\nExample usage:\npython scripts/doc_checker.py asv_bench/benchmarks/utils.py modin/pandas\n\"\"\"\n\nimport argparse\nimport ast\nimport functools\nimport inspect\nimport logging\nimport os\nimport pathlib\nimport re\nimport shutil\nimport subprocess\nimport sys\nfrom typing import List\n\nfrom numpydoc.docscrape import NumpyDocString, get_doc_object\nfrom numpydoc.validate import Validator\n\n# Let the other modules to know that the doc checker is running.\nos.environ[\"_MODIN_DOC_CHECKER_\"] = \"1\"\n\nlogging.basicConfig(\n    stream=sys.stdout, format=\"%(levelname)s:%(message)s\", level=logging.INFO\n)\n\nMODIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), \"..\"))\nsys.path.insert(0, MODIN_PATH)\n\n# error codes that pandas test in CI\n# https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks\nNUMPYDOC_BASE_ERROR_CODES = {\n    *(\"GL01\", \"GL02\", \"GL03\", \"GL05\", \"GL06\", \"GL07\", \"GL08\", \"GL09\", \"GL10\"),\n    *(\"SS02\", \"SS03\", \"SS04\", \"SS05\", \"PR01\", \"PR02\", \"PR03\", \"PR04\", \"PR05\"),\n    *(\"PR08\", \"PR09\", \"PR10\", \"RT01\", \"RT04\", \"RT05\", \"SA02\", \"SA03\"),\n}\n\nMODIN_ERROR_CODES = {\n    \"MD01\": \"'{parameter}' description should be '[type], default: [value]', found: '{found}'\",\n    \"MD02\": \"Spelling error in line: {line}, found: '{word}', reference: '{reference}'\",\n    \"MD03\": \"Section contents is over-indented (in section '{section}')\",\n}\n\n\ndef get_optional_args(doc: Validator) -> dict:\n    \"\"\"\n    Get optional parameters for the object for which the docstring is checked.\n\n    Parameters\n    ----------\n    doc : numpydoc.validate.Validator\n        Validator handler.\n\n    Returns\n    -------\n    dict\n        Dict with default argument names and its values.\n    \"\"\"\n    obj = doc.obj\n    if not callable(obj) or inspect.isclass(obj):\n        return {}\n    signature = inspect.signature(obj)\n    return {\n        k: v.default\n        for k, v in signature.parameters.items()\n        if v.default is not inspect.Parameter.empty\n    }\n\n\ndef check_optional_args(doc: Validator) -> list:\n    \"\"\"\n    Check type description of optional arguments.\n\n    Parameters\n    ----------\n    doc : numpydoc.validate.Validator\n\n    Returns\n    -------\n    list\n        List of tuples with Modin error code and its description.\n    \"\"\"\n    # `not doc.raw_doc and doc.clean_doc` - means that docstring was\n    # automatically generated by numpydoc with help of `pydoc.getdoc`.\n    if not doc.doc_parameters or (not doc.raw_doc and doc.clean_doc):\n        return []\n    optional_args = get_optional_args(doc)\n    if not optional_args:\n        return []\n\n    errors = []\n    for parameter in optional_args:\n        # case when not all parameters are listed in \"Parameters\" section;\n        # it's handled by numpydoc itself\n        if parameter not in doc.doc_parameters:\n            continue\n        type_line = doc.doc_parameters[parameter][0]\n        has_default = \"default: \" in type_line\n        has_optional = \"optional\" in type_line\n        if not (has_default ^ has_optional):\n            errors.append(\n                (\n                    \"MD01\",\n                    MODIN_ERROR_CODES[\"MD01\"].format(\n                        parameter=parameter,\n                        found=type_line,\n                    ),\n                )\n            )\n    return errors\n\n\ndef check_spelling_words(doc: Validator) -> list:\n    \"\"\"\n    Check spelling of chosen words in doc.\n\n    Parameters\n    ----------\n    doc : numpydoc.validate.Validator\n        Validator handler.\n\n    Returns\n    -------\n    list\n        List of tuples with Modin error code and its description.\n\n    Notes\n    -----\n    Any special words enclosed in apostrophes(\") are treated as python string\n    constants and are not checked for spelling.\n    \"\"\"\n    if not doc.raw_doc:\n        return []\n    components = set(\n        [\"Modin\", \"pandas\", \"NumPy\", \"Ray\", \"Dask\"] + [\"PyArrow\", \"XGBoost\", \"Plasma\"]\n    )\n    check_words = \"|\".join(x.lower() for x in components)\n\n    # comments work only with re.VERBOSE\n    pattern = r\"\"\"\n    (?:                     # non-capturing group\n        [^-\\\\\\w\\/]          # any symbol except: '-', '\\', '/' and any from [a-zA-Z0-9_]\n        | ^                 # or line start\n    )\n    ({check_words})         # words to check, example - \"modin|pandas|numpy\"\n    (?:                     # non-capturing group\n        [^-\"\\.\\/\\w\\\\]       # any symbol except: '-', '\"', '.', '\\', '/' and any from [a-zA-Z0-9_]\n        | \\.\\s              # or '.' and any whitespace\n        | \\.$               # or '.' and line end\n        | $                 # or line end\n    )\n    \"\"\".format(\n        check_words=check_words\n    )\n    results = [\n        set(re.findall(pattern, line, re.I | re.VERBOSE)) - components\n        for line in doc.raw_doc.splitlines()\n    ]\n\n    docstring_start_line = None\n    for idx, line in enumerate(inspect.getsourcelines(doc.code_obj)[0]):\n        if '\"\"\"' in line or \"'''\" in line:\n            docstring_start_line = doc.source_file_def_line + idx\n            break\n\n    errors = []\n    for line_idx, words_in_line in enumerate(results):\n        for word in words_in_line:\n            reference = [x for x in components if x.lower() == word.lower()][0]\n            errors.append(\n                (\n                    \"MD02\",\n                    MODIN_ERROR_CODES[\"MD02\"].format(\n                        line=docstring_start_line + line_idx,\n                        word=word,\n                        reference=reference,\n                    ),\n                )\n            )\n    return errors\n\n\ndef check_docstring_indention(doc: Validator) -> list:\n    \"\"\"\n    Check indention of docstring since numpydoc reports weird results.\n\n    Parameters\n    ----------\n    doc : numpydoc.validate.Validator\n        Validator handler.\n\n    Returns\n    -------\n    list\n        List of tuples with Modin error code and its description.\n    \"\"\"\n    from modin.utils import _get_indent\n\n    numpy_docstring = NumpyDocString(doc.clean_doc)\n    numpy_docstring._doc.reset()\n    numpy_docstring._parse_summary()\n    sections = list(numpy_docstring._read_sections())\n    errors = []\n    for section in sections:\n        description = \"\\n\".join(section[1])\n        if _get_indent(description) != 0:\n            errors.append(\n                (\"MD03\", MODIN_ERROR_CODES[\"MD03\"].format(section=section[0]))\n            )\n    return errors\n\n\ndef validate_modin_error(doc: Validator, results: dict) -> list:\n    \"\"\"\n    Validate custom Modin errors.\n\n    Parameters\n    ----------\n    doc : numpydoc.validate.Validator\n        Validator handler.\n    results : dict\n        Dictionary that numpydoc.validate.validate return.\n\n    Returns\n    -------\n    dict\n        Updated dict with Modin custom errors.\n    \"\"\"\n    errors = check_optional_args(doc)\n    errors += check_spelling_words(doc)\n    errors += check_docstring_indention(doc)\n    results[\"errors\"].extend(errors)\n    return results\n\n\ndef skip_check_if_noqa(doc: Validator, err_code: str, noqa_checks: list) -> bool:\n    \"\"\"\n    Skip the check that matches `err_code` if `err_code` found in noqa string.\n\n    Parameters\n    ----------\n    doc : numpydoc.validate.Validator\n        Validator handler.\n    err_code : str\n        Error code found by numpydoc.\n    noqa_checks : list\n        Found noqa checks.\n\n    Returns\n    -------\n    bool\n        Return True if 'noqa' found.\n    \"\"\"\n    if noqa_checks == [\"all\"]:\n        return True\n\n    # GL08 - missing docstring in an arbitary object; numpydoc code\n    if err_code == \"GL08\":\n        name = doc.name.split(\".\")[-1]\n        # Numpydoc recommends to add docstrings of __init__ method in class docstring.\n        # So there is no error if docstring is missing in __init__\n        if name == \"__init__\":\n            return True\n    return err_code in noqa_checks\n\n\ndef get_noqa_checks(doc: Validator) -> list:\n    \"\"\"\n    Get codes after `# noqa`.\n\n    Parameters\n    ----------\n    doc : numpydoc.validate.Validator\n        Validator handler.\n\n    Returns\n    -------\n    list\n        List with codes.\n\n    Notes\n    -----\n    If noqa doesn't have any codes - returns [\"all\"].\n    \"\"\"\n    source = doc.method_source\n    if not source:\n        return []\n\n    noqa_str = \"\"\n    if not inspect.ismodule(doc.obj):\n        # find last line of obj definition\n        for line in source.split(\"\\n\"):\n            if \")\" in line and \":\" in line.split(\")\", 1)[1]:\n                noqa_str = line\n                break\n    else:\n        # noqa string is defined as the first line before the docstring\n        if not doc.raw_doc:\n            # noqa string is meaningless if there is no docstring in module\n            return []\n        lines = source.split(\"\\n\")\n        for idx, line in enumerate(lines):\n            if '\"\"\"' in line or \"'''\" in line:\n                noqa_str = lines[idx - 1]\n                break\n\n    if \"# noqa:\" in noqa_str:\n        noqa_checks = noqa_str.split(\"# noqa:\", 1)[1].split(\",\")\n    elif \"# noqa\" in noqa_str:\n        noqa_checks = [\"all\"]\n    else:\n        noqa_checks = []\n    return [check.strip() for check in noqa_checks]\n\n\ndef construct_validator(import_path: str) -> Validator:  # noqa: GL08\n    # helper function\n    return Validator(get_doc_object(Validator._load_obj(import_path)))\n\n\n# code snippet from numpydoc\ndef validate_object(import_path: str) -> list:\n    \"\"\"\n    Check docstrings of an entity that can be imported.\n\n    Parameters\n    ----------\n    import_path : str\n        Python-like import path.\n\n    Returns\n    -------\n    errors : list\n        List with string representations of errors.\n    \"\"\"\n    from numpydoc.validate import validate\n\n    errors = []\n    doc = construct_validator(import_path)\n    if (\n        getattr(doc.obj, \"__doc_inherited__\", False)\n        or (\n            isinstance(doc.obj, property)\n            and getattr(doc.obj.fget, \"__doc_inherited__\", False)\n        )\n        or (\n            isinstance(doc.obj, functools.cached_property)\n            and getattr(doc.obj.func, \"__doc_inherited__\", False)\n        )\n    ):\n        # do not check inherited docstrings\n        return errors\n    results = validate(import_path)\n    results = validate_modin_error(doc, results)\n    noqa_checks = get_noqa_checks(doc)\n    for err_code, err_desc in results[\"errors\"]:\n        if (\n            err_code not in NUMPYDOC_BASE_ERROR_CODES\n            and err_code not in MODIN_ERROR_CODES\n        ) or skip_check_if_noqa(doc, err_code, noqa_checks):\n            continue\n        errors.append(\n            \":\".join([import_path, str(results[\"file_line\"]), err_code, err_desc])\n        )\n    return errors\n\n\ndef numpydoc_validate(path: pathlib.Path) -> bool:\n    \"\"\"\n    Perform numpydoc checks.\n\n    Parameters\n    ----------\n    path : pathlib.Path\n        Filename or directory path for check.\n\n    Returns\n    -------\n    is_successfull : bool\n        Return True if all checks are successful.\n    \"\"\"\n    is_successfull = True\n\n    if path.is_file():\n        walker = ((str(path.parent), [], [path.name]),)\n    else:\n        walker = os.walk(path)\n\n    for root, _, files in walker:\n        if \"__pycache__\" in root:\n            continue\n        for _file in files:\n            if not _file.endswith(\".py\"):\n                continue\n\n            current_path = os.path.join(root, _file)\n            # get importable name\n            module_name = current_path.replace(\"/\", \".\").replace(\"\\\\\", \".\")\n            # remove \".py\"\n            module_name = os.path.splitext(module_name)[0]\n\n            with open(current_path) as fd:\n                file_contents = fd.read()\n\n            # using static parsing for collecting module, functions, classes and their methods\n            module = ast.parse(file_contents)\n\n            def is_public_func(node):\n                return isinstance(node, ast.FunctionDef) and (\n                    not node.name.startswith(\"__\") or node.name.endswith(\"__\")\n                )\n\n            functions = [node for node in module.body if is_public_func(node)]\n            classes = [node for node in module.body if isinstance(node, ast.ClassDef)]\n            methods = [\n                f\"{module_name}.{_class.name}.{node.name}\"\n                for _class in classes\n                for node in _class.body\n                if is_public_func(node)\n            ]\n\n            # numpydoc docstrings validation\n            # docstrings are taken dynamically\n            to_validate = (\n                [module_name]\n                + [f\"{module_name}.{x.name}\" for x in (functions + classes)]\n                + methods\n            )\n            results = list(map(validate_object, to_validate))\n            is_successfull_file = not any(results)\n            if not is_successfull_file:\n                logging.info(f\"NUMPYDOC OUTPUT FOR {current_path}\")\n            [logging.error(error) for errors in results for error in errors]\n            is_successfull &= is_successfull_file\n    return is_successfull\n\n\ndef pydocstyle_validate(\n    path: pathlib.Path, add_ignore: List[str], use_numpydoc: bool\n) -> int:\n    \"\"\"\n    Perform pydocstyle checks.\n\n    Parameters\n    ----------\n    path : pathlib.Path\n        Filename or directory path for check.\n    add_ignore : List[int]\n        `pydocstyle` error codes which are not verified.\n    use_numpydoc : bool\n        Disable duplicate `pydocstyle` checks if `numpydoc` is in use.\n\n    Returns\n    -------\n    bool\n        Return True if all pydocstyle checks are successful.\n    \"\"\"\n    pydocstyle = \"pydocstyle\"\n    if not shutil.which(pydocstyle):\n        raise ValueError(f\"{pydocstyle} not found in PATH\")\n    # These check can be done with numpydoc tool, so disable them for pydocstyle.\n    if use_numpydoc:\n        add_ignore.extend([\"D100\", \"D101\", \"D102\", \"D103\", \"D104\", \"D105\"])\n    result = subprocess.run(\n        [\n            pydocstyle,\n            \"--convention\",\n            \"numpy\",\n            \"--add-ignore\",\n            \",\".join(add_ignore),\n            str(path),\n        ],\n        text=True,\n        capture_output=True,\n    )\n    if result.returncode:\n        logging.info(f\"PYDOCSTYLE OUTPUT FOR {path}\")\n        logging.error(result.stdout)\n        logging.error(result.stderr)\n    return True if result.returncode == 0 else False\n\n\ndef monkeypatching():\n    \"\"\"Monkeypatch not installed modules and decorators which change __doc__ attribute.\"\"\"\n    from unittest.mock import Mock\n\n    import ray\n\n    import modin.utils\n\n    def monkeypatch(*args, **kwargs):\n        if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):\n            # This is the case where the decorator is just @ray.remote without parameters.\n            return args[0]\n        return lambda cls_or_func: cls_or_func\n\n    ray.remote = monkeypatch\n\n    # We are mocking packages we don't need for docs checking in order to avoid import errors\n    sys.modules[\"sqlalchemy\"] = Mock()\n\n    modin.utils.instancer = functools.wraps(modin.utils.instancer)(lambda cls: cls)\n\n    # monkey-patch numpydoc for working correctly with properties\n    # until https://github.com/numpy/numpydoc/issues/551 is fixed\n    def load_obj(name, old_load_obj=Validator._load_obj):\n        obj = old_load_obj(name)\n        if isinstance(obj, property):\n            obj = obj.fget\n        elif isinstance(obj, functools.cached_property):\n            obj = obj.func\n        return obj\n\n    Validator._load_obj = staticmethod(load_obj)\n\n    # enable docs testing on windows\n    sys.getdlopenflags = Mock()\n    sys.setdlopenflags = Mock()\n    xgboost_mock = Mock()\n\n    class Booster:\n        pass\n\n    xgboost_mock.Booster = Booster\n    sys.modules[\"xgboost\"] = xgboost_mock\n\n\ndef validate(\n    paths: List[pathlib.Path], add_ignore: List[str], use_numpydoc: bool\n) -> bool:\n    \"\"\"\n    Perform pydocstyle and numpydoc checks.\n\n    Parameters\n    ----------\n    paths : List[pathlib.Path]\n        Filenames of directories for check.\n    add_ignore : List[str]\n        `pydocstyle` error codes which are not verified.\n    use_numpydoc : bool\n        Determine if numpydoc checks are needed.\n\n    Returns\n    -------\n    is_successfull : bool\n        Return True if all checks are successful.\n    \"\"\"\n    is_successfull = True\n    for path in paths:\n        if not pydocstyle_validate(path, add_ignore, use_numpydoc):\n            is_successfull = False\n        if use_numpydoc:\n            if not numpydoc_validate(path):\n                is_successfull = False\n    return is_successfull\n\n\ndef check_args(args: argparse.Namespace):\n    \"\"\"\n    Check the obtained values for correctness.\n\n    Parameters\n    ----------\n    args : argparse.Namespace\n        Parser arguments.\n\n    Raises\n    ------\n    ValueError\n        Occurs in case of non-existent files or directories.\n    \"\"\"\n    for path in args.paths:\n        if not path.exists():\n            raise ValueError(f\"{path} does not exist\")\n        abs_path = os.path.abspath(path)\n        if not abs_path.startswith(MODIN_PATH):\n            raise ValueError(\n                \"it is unsupported to use this script on files from another \"\n                + f\"repository; script' repo '{MODIN_PATH}', \"\n                + f\"input path '{abs_path}'\"\n            )\n\n\ndef get_args() -> argparse.Namespace:\n    \"\"\"\n    Get args from cli with validation.\n\n    Returns\n    -------\n    argparse.Namespace\n    \"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Check docstrings by using pydocstyle and numpydoc\"\n    )\n    parser.add_argument(\n        \"paths\",\n        nargs=\"+\",\n        type=pathlib.Path,\n        help=\"Filenames or directories; in case of direstories perform recursive check\",\n    )\n    parser.add_argument(\n        \"--add-ignore\",\n        nargs=\"*\",\n        default=[],\n        help=\"Pydocstyle error codes; for example: D100,D100,D102\",\n    )\n    parser.add_argument(\n        \"--disable-numpydoc\",\n        default=False,\n        action=\"store_true\",\n        help=\"Determine if numpydoc checks are not needed\",\n    )\n    args = parser.parse_args()\n    check_args(args)\n    return args\n\n\nif __name__ == \"__main__\":\n    args = get_args()\n    monkeypatching()\n    if not validate(args.paths, args.add_ignore, not args.disable_numpydoc):\n        logging.error(\"INVALID DOCUMENTATION FOUND\")\n        exit(1)\n    logging.info(\"SUCCESSFUL CHECK\")\n"
  },
  {
    "path": "scripts/release.py",
    "content": "import argparse\nimport atexit\nimport collections\nimport json\nimport re\nimport sys\nfrom pathlib import Path\n\nimport github\nimport pygit2\nfrom packaging import version\n\n\nclass GithubUserResolver:\n    def __init__(self, email2commit, token):\n        self.__cache_file = Path(__file__).parent / \"gh-users-cache.json\"\n        self.__cache = (\n            json.loads(self.__cache_file.read_text())\n            if self.__cache_file.exists()\n            else {}\n        )\n        # filter unknown users hoping we'd be able to find them this time\n        self.__cache = {key: value for key, value in self.__cache.items() if value}\n        # using anonymous access if token not specified\n        self.__github = github.Github(token or None)\n        self.__modin_repo = self.__github.get_repo(\"modin-project/modin\")\n        self.__email2commit = email2commit\n        atexit.register(self.__save)\n\n    def __search_commits(self, term):\n        if commit := self.__email2commit.get(term):\n            gh_commit = self.__modin_repo.get_commit(str(commit))\n            return gh_commit.author.login\n        return None\n\n    @staticmethod\n    def __is_email(term):\n        return re.match(r\".*@.*\\..*\", term)\n\n    def __search_github(self, term):\n        search = f\"in:email {term}\" if self.__is_email(term) else f\"fullname:{term}\"\n        match = [user.login for user in self.__github.search_users(search)]\n        return match[0] if len(match) == 1 else None\n\n    def __try_user(self, term):\n        if self.__is_email(term):\n            return None\n        try:\n            return self.__github.get_user(term).login\n        except github.GithubException as ex:\n            if ex.status != 404:\n                raise\n            return None\n\n    def __resolve_single(self, term):\n        return (\n            self.__search_commits(term)\n            or self.__search_github(term)\n            or self.__try_user(term)\n        )\n\n    def __resolve_cache(self, name, email):\n        return self.__cache.get(f\"{name} <{email}>\", None)\n\n    def __register(self, name, email, match):\n        self.__cache[f\"{name} <{email}>\"] = match\n\n    def resolve(self, people):\n        logins, unknowns = set(), set()\n\n        for name, email in people:\n            if match := self.__resolve_cache(name, email):\n                logins.add(match)\n            elif match := self.__resolve_single(email):\n                self.__register(name, email, match)\n                logins.add(match)\n            else:\n                if match := self.__resolve_single(name):\n                    logins.add(match)\n                else:\n                    unknowns.add((name, email))\n                self.__register(name, email, match)\n\n        return logins, unknowns\n\n    def resolve_by_reviews(self, unknowns, email2pr):\n        logins, new_unknowns = set(), set()\n        for name, email in unknowns:\n            commit = self.__modin_repo.get_commit(str(email2pr[email]))\n            found = set()\n            for pull in commit.get_pulls():\n                for review in pull.get_reviews():\n                    user = review.user\n                    if user.name == name and (not user.email or user.email == email):\n                        found.add(user.login)\n\n            if len(found) == 1:\n                self.__register(name, email, list(found)[0])\n                logins |= found\n            else:\n                new_unknowns.add((name, email))\n\n        return logins, new_unknowns\n\n    def __save(self):\n        self.__cache_file.write_text(json.dumps(self.__cache, indent=4, sort_keys=True))\n\n\nclass GitWrapper:\n    def __init__(self):\n        self.repo = pygit2.Repository(Path(__file__).parent)\n\n    def is_on_main(self):\n        return self.repo.references[\"refs/heads/main\"] == self.repo.head\n\n    @staticmethod\n    def __get_tag_version(entry):\n        try:\n            return version.parse(entry.lstrip(\"refs/tags/\"))\n        except version.InvalidVersion as ex:\n            return f'<bad version \"{entry}\": {ex}>'\n\n    def get_previous_release(self, rel_type):\n        tags = [\n            (entry, self.__get_tag_version(entry))\n            for entry in self.repo.references\n            if entry.startswith(\"refs/tags/\")\n        ]\n        # filter away legacy versions (which aren't following the proper naming schema);\n        # also skip pre-releases\n        tags = [\n            (entry, ver)\n            for entry, ver in tags\n            if isinstance(ver, version.Version) and not ver.pre\n        ]\n        if rel_type == \"minor\":\n            # leave only minor releases\n            tags = [(entry, ver) for entry, ver in tags if ver.micro == 0]\n        else:\n            assert rel_type == \"patch\"\n        prev_ref, prev_ver = max(tags, key=lambda pair: pair[1])\n        return prev_ref, self.repo.references[prev_ref].peel(), prev_ver\n\n    def get_commits_upto(self, stop_commit):\n        history = []\n        for obj in self.repo.walk(self.repo.head.target):\n            if obj.id == stop_commit.id:\n                break\n            history.append(obj)\n        else:\n            raise ValueError(\"Current HEAD is not derived from previous release\")\n        return history\n\n    def ensure_title_link(self, obj: pygit2.Commit):\n        title = obj.message.splitlines()[0]\n        if not re.match(r\".*\\(#(\\d+)\\)$\", title):\n            title += f\" ({obj.short_id})\"\n        return title\n\n\ndef make_notes(args):\n    wrapper = GitWrapper()\n    release_type = \"minor\" if wrapper.is_on_main() else \"patch\"\n    sys.stderr.write(f\"Detected release type: {release_type}\\n\")\n\n    prev_ref, prev_commit, prev_ver = wrapper.get_previous_release(release_type)\n    sys.stderr.write(f\"Previous {release_type} release: {prev_ref}\\n\")\n\n    next_major, next_minor, next_patch = prev_ver.release\n    if release_type == \"minor\":\n        next_minor += 1\n    elif release_type == \"patch\":\n        next_patch += 1\n    else:\n        raise ValueError(f\"Unexpected release type: {release_type}\")\n    next_ver = version.Version(f\"{next_major}.{next_minor}.{next_patch}\")\n\n    sys.stderr.write(f\"Computing release notes for {prev_ver} -> {next_ver}...\\n\")\n    try:\n        history = wrapper.get_commits_upto(prev_commit)\n    except ValueError as ex:\n        sys.stderr.write(\n            f\"{ex}: did you forget to checkout correct branch or pull tags?\"\n        )\n        return 1\n    if not history:\n        sys.stderr.write(f\"No commits since {prev_ver} found, nothing to generate!\\n\")\n        return 1\n\n    titles = collections.defaultdict(list)\n    people = set()\n    email2commit, email2pr = {}, {}\n    for obj in history:\n        title = obj.message.splitlines()[0]\n        titles[title.split(\"-\")[0]].append(obj)\n        new_people = set(\n            re.findall(\n                r\"(?:(?:Signed-off-by|Co-authored-by):\\s*)([\\w\\s,]+?)\\s*<([^>]+)>\",\n                obj.message,\n            )\n        )\n        for _, email in new_people:\n            email2pr[email] = obj.id\n        people |= new_people\n        email2commit[obj.author.email] = obj.id\n    sys.stderr.write(f\"Found {len(history)} commit(s) since {prev_ref}\\n\")\n\n    sys.stderr.write(\"Resolving contributors...\\n\")\n    user_resolver = GithubUserResolver(email2commit, args.token)\n    logins, unknowns = user_resolver.resolve(people)\n    new_logins, unknowns = user_resolver.resolve_by_reviews(unknowns, email2pr)\n    logins |= new_logins\n    sys.stderr.write(f\"Found {len(logins)} GitHub usernames.\\n\")\n    if unknowns:\n        sys.stderr.write(\n            f\"Warning! Failed to resolve {len(unknowns)} usernames, please resolve them manually!\\n\"\n        )\n\n    sections = [\n        (\"Stability and Bugfixes\", \"FIX\"),\n        (\"Performance enhancements\", \"PERF\"),\n        (\"Refactor Codebase\", \"REFACTOR\"),\n        (\"Update testing suite\", \"TEST\"),\n        (\"Documentation improvements\", \"DOCS\"),\n        (\"New Features\", \"FEAT\"),\n    ]\n\n    notes = rf\"\"\"Modin {next_ver}\n\n<Please fill in short release summary>\n\nKey Features and Updates Since {prev_ver}\n-------------------------------{'-' * len(str(prev_ver))}\n\"\"\"\n\n    def _add_section(section, prs):\n        nonlocal notes\n        if prs:\n            notes += f\"* {section}\\n\"\n            notes += \"\\n\".join(\n                [\n                    f\"  * {wrapper.ensure_title_link(obj)}\"\n                    for obj in sorted(prs, key=lambda obj: obj.message)\n                ]\n            )\n            notes += \"\\n\"\n\n    for section, key in sections:\n        _add_section(section, titles.pop(key, None))\n\n    uncategorized = sum(titles.values(), [])\n    _add_section(\"Uncategorized improvements\", uncategorized)\n\n    notes += r\"\"\"\nContributors\n------------\n\"\"\"\n    notes += \"\\n\".join(f\"@{login}\" for login in sorted(logins)) + \"\\n\"\n    notes += (\n        \"\\n\".join(\n            f\"<unknown-login> {name} <{email}>\" for name, email in sorted(unknowns)\n        )\n        + \"\\n\"\n    )\n\n    sys.stdout.write(notes)\n\n\ndef main():\n    parse = argparse.ArgumentParser()\n    parse.add_argument(\n        \"--token\",\n        type=str,\n        default=\"\",\n        help=\"GitHub token for queries (optional, bumps up rate limit)\",\n    )\n    parse.set_defaults(func=lambda _: parse.print_usage())\n    subparsers = parse.add_subparsers()\n\n    notes = subparsers.add_parser(\"notes\", help=\"Generate release notes\")\n    notes.set_defaults(func=make_notes)\n\n    args = parse.parse_args()\n    sys.exit(args.func(args))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "scripts/test/__init__.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n"
  },
  {
    "path": "scripts/test/examples.py",
    "content": "# noqa: MD01\n# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\n# noqa: MD02\n\"\"\"Function examples for docstring testing.\"\"\"\n\n\nclass weakdict(dict):  # noqa: GL08\n    __slots__ = (\"__weakref__\",)\n\n\ndef optional_square(number: int = 5) -> int:  # noqa\n    \"\"\"\n    Square `number`.\n\n    The function from Modin.\n\n    Parameters\n    ----------\n    number : int\n        Some number.\n\n    Notes\n    -----\n    The `optional_square` Modin function from modin/scripts/examples.py.\n    \"\"\"\n    return number**2\n\n\ndef optional_square_empty_parameters(number: int = 5) -> int:\n    \"\"\"\n    Parameters\n    ----------\n    \"\"\"\n    return number**2\n\n\ndef square_summary(number: int) -> int:  # noqa: PR01, GL08\n    \"\"\"\n    Square `number`.\n\n    See https://github.com/ray-project/ray.\n\n    Examples\n    --------\n    The function that will never be used in modin.pandas.DataFrame same as in\n    pandas or NumPy.\n    \"\"\"\n    return number**2\n"
  },
  {
    "path": "scripts/test/test_doc_checker.py",
    "content": "# Licensed to Modin Development Team under one or more contributor license agreements.\n# See the NOTICE file distributed with this work for additional information regarding\n# copyright ownership.  The Modin Development Team licenses this file to you under the\n# Apache License, Version 2.0 (the \"License\"); you may not use this file except in\n# compliance with the License.  You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software distributed under\n# the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF\n# ANY KIND, either express or implied. See the License for the specific language\n# governing permissions and limitations under the License.\n\nimport pytest\n\nfrom scripts.doc_checker import (\n    MODIN_ERROR_CODES,\n    check_optional_args,\n    check_spelling_words,\n    construct_validator,\n    get_noqa_checks,\n    get_optional_args,\n)\n\n\n@pytest.mark.parametrize(\n    \"import_path, result\",\n    [\n        (\"scripts.test.examples.optional_square\", {\"number\": 5}),\n        (\"scripts.test.examples.optional_square_empty_parameters\", {\"number\": 5}),\n        (\"scripts.test.examples.square_summary\", {}),\n        (\"scripts.test.examples.weakdict\", {}),\n        (\"scripts.test.examples\", {}),\n    ],\n)\ndef test_get_optional_args(import_path, result):\n    optional_args = get_optional_args(construct_validator(import_path))\n    assert optional_args == result\n\n\n@pytest.mark.parametrize(\n    \"import_path, result\",\n    [\n        (\n            \"scripts.test.examples.optional_square\",\n            [\n                (\n                    \"MD01\",\n                    MODIN_ERROR_CODES[\"MD01\"].format(parameter=\"number\", found=\"int\"),\n                )\n            ],\n        ),\n        (\"scripts.test.examples.optional_square_empty_parameters\", []),\n        (\"scripts.test.examples.square_summary\", []),\n        (\"scripts.test.examples.weakdict\", []),\n        (\"scripts.test.examples\", []),\n    ],\n)\ndef test_check_optional_args(import_path, result):\n    errors = check_optional_args(construct_validator(import_path))\n    assert errors == result\n\n\n@pytest.mark.parametrize(\n    \"import_path, result\",\n    [\n        (\"scripts.test.examples.optional_square\", []),\n        (\n            \"scripts.test.examples.square_summary\",\n            [\n                (\"MD02\", 57, \"Pandas\", \"pandas\"),\n                (\"MD02\", 57, \"Numpy\", \"NumPy\"),\n            ],\n        ),\n        (\"scripts.test.examples.optional_square_empty_parameters\", []),\n        (\"scripts.test.examples.weakdict\", []),\n        (\"scripts.test.examples\", []),\n    ],\n)\ndef test_check_spelling_words(import_path, result):\n    result_errors = []\n    for code, line, word, reference in result:\n        result_errors.append(\n            (\n                code,\n                MODIN_ERROR_CODES[code].format(\n                    line=line, word=word, reference=reference\n                ),\n            )\n        )\n    errors = check_spelling_words(construct_validator(import_path))\n    # the order of incorrect words found on the same line is not guaranteed\n    for error in errors:\n        assert error in result_errors\n\n\n@pytest.mark.parametrize(\n    \"import_path, result\",\n    [\n        (\"scripts.test.examples.optional_square\", [\"all\"]),\n        (\"scripts.test.examples.optional_square_empty_parameters\", []),\n        (\"scripts.test.examples.square_summary\", [\"PR01\", \"GL08\"]),\n        (\"scripts.test.examples.weakdict\", [\"GL08\"]),\n        (\"scripts.test.examples\", [\"MD02\"]),\n    ],\n)\ndef test_get_noqa_checks(import_path, result):\n    noqa_checks = get_noqa_checks(construct_validator(import_path))\n    assert noqa_checks == result\n"
  },
  {
    "path": "setup.cfg",
    "content": "\n# See the docstring in versioneer.py for instructions. Note that you must\n# re-run 'versioneer.py setup' after changing this section, and commit the\n# resulting files.\n\n[versioneer]\nVCS = git\nstyle = pep440\nversionfile_source = modin/_version.py\nversionfile_build = modin/_version.py\ntag_prefix =\nparentdir_prefix = modin-\n\n[tool:pytest]\naddopts = --cov-config=setup.cfg --cov=modin --cov-append --cov-report= -m \"not exclude_by_default\"\nxfail_strict=true\nmarkers =\n    exclude_in_sanity\n    exclude_by_default\nfilterwarnings =\n    error:.*defaulting to pandas.*:UserWarning\n\n[isort]\nprofile = black\n\n[flake8]\nmax-line-length = 88\nignore = E203, E266, E501, W503\nselect = B,C,E,F,W,T,B9,NIC\nper-file-ignores =\n    modin/pandas/__init__.py:E402,F401\n    stress_tests/kaggle/*:E402\n    modin/experimental/pandas/__init__.py:E402\n    modin/_version.py:T201\n    modin/tests/*:E402\n\n[coverage:run]\nsource =\n    # modin sources\n    modin/*\nomit =\n    # These are not covered by any test because it is an experimental API\n    modin/sql/*\n    modin/experimental/sql*\n    # This is not used yet\n    modin/pandas/index/*\n    # Skip tests\n    modin/tests/*\n    # Plotting is not tested\n    modin/pandas/plotting.py\n    # Skip CLI part\n    modin/__main__.py\n    # Skip third-party stuff\n    modin/_version.py\nparallel = True\n# The use of this feature is one of the recommendations of codecov if the\n# tests are run in different environments (for example, on different operating\n# systems): https://coverage.readthedocs.io/en/stable/config.html#run-relative-files\nrelative_files = true\n\n[coverage:report]\nexclude_lines =\n    # Have to re-enable the standard pragma\n    pragma: no cover\n    # Don't complain if tests don't hit defensive assertion code:\n    raise AssertionError\n    raise NotImplementedError\n    raise ImportError\n    assert\n    pass\n"
  },
  {
    "path": "setup.py",
    "content": "from setuptools import find_packages, setup\n\nimport versioneer\n\nwith open(\"README.md\", \"r\", encoding=\"utf-8\") as fh:\n    long_description = fh.read()\n\ndask_deps = [\"dask>=2.22.0\", \"distributed>=2.22.0\"]\nray_deps = [\"ray>=2.10.0,<3\", \"pyarrow>=10.0.1\"]\nmpi_deps = [\"unidist[mpi]>=0.2.1\"]\nconsortium_standard_deps = [\"dataframe-api-compat>=0.2.7\"]\nspreadsheet_deps = [\"modin-spreadsheet>=0.1.0\"]\n# Currently, Modin does not include `mpi` option in `all`.\n# Otherwise, installation of modin[all] would fail because\n# users need to have a working MPI implementation and\n# certain software installed beforehand.\nall_deps = dask_deps + ray_deps + spreadsheet_deps + consortium_standard_deps\n\n# Distribute 'modin-autoimport-pandas.pth' along with binary and source distributions.\n# This file provides the \"import pandas before Ray init\" feature if specific\n# environment variable is set (see https://github.com/modin-project/modin/issues/4564).\ncmdclass = versioneer.get_cmdclass()\nextra_files = [\"modin-autoimport-pandas.pth\"]\n\n\nclass AddPthFileBuild(cmdclass[\"build_py\"]):\n    def _get_data_files(self):\n        return (super()._get_data_files() or []) + [\n            (\".\", \".\", self.build_lib, extra_files)\n        ]\n\n\nclass AddPthFileSDist(cmdclass[\"sdist\"]):\n    def make_distribution(self):\n        self.filelist.extend(extra_files)\n        return super().make_distribution()\n\n\ncmdclass[\"build_py\"] = AddPthFileBuild\ncmdclass[\"sdist\"] = AddPthFileSDist\n\nsetup(\n    name=\"modin\",\n    version=versioneer.get_version(),\n    cmdclass=cmdclass,\n    description=\"Modin: Make your pandas code run faster by changing one line of code.\",\n    packages=find_packages(exclude=[\"scripts\", \"scripts.*\"]),\n    include_package_data=True,\n    license=\"Apache 2\",\n    url=\"https://github.com/modin-project/modin\",\n    long_description=long_description,\n    long_description_content_type=\"text/markdown\",\n    install_requires=[\n        \"pandas>=2.2,<2.4\",\n        \"packaging>=21.0\",\n        \"numpy>=1.22.4\",\n        \"fsspec>=2022.11.0\",\n        \"psutil>=5.8.0\",\n        \"typing-extensions\",\n    ],\n    extras_require={\n        # can be installed by pip install modin[dask]\n        \"dask\": dask_deps,\n        \"ray\": ray_deps,\n        \"mpi\": mpi_deps,\n        \"consortium-standard\": consortium_standard_deps,\n        \"spreadsheet\": spreadsheet_deps,\n        \"all\": all_deps,\n    },\n    python_requires=\">=3.9\",\n)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle10.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np  # linear algebra\nimport seaborn as sns\n\nimport modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)\n\nwarnings.filterwarnings(\"ignore\")\ndata = pd.read_csv(\"column_2C_weka.csv\")\nprint(plt.style.available)  # look at available plot styles\nplt.style.use(\"ggplot\")\ndata.head()\ndata.info()\ndata.describe()\ncolor_list = [\"red\" if i == \"Abnormal\" else \"green\" for i in data.loc[:, \"class\"]]\npd.plotting.scatter_matrix(\n    data.loc[:, data.columns != \"class\"],\n    c=color_list,\n    figsize=[15, 15],\n    diagonal=\"hist\",\n    alpha=0.5,\n    s=200,\n    marker=\"*\",\n    edgecolor=\"black\",\n)\nplt.show()\nsns.countplot(x=\"class\", data=data)\ndata.loc[:, \"class\"].value_counts()\nfrom sklearn.neighbors import KNeighborsClassifier\n\nknn = KNeighborsClassifier(n_neighbors=3)\nx, y = data.loc[:, data.columns != \"class\"], data.loc[:, \"class\"]\nknn.fit(x, y)\nprediction = knn.predict(x)\nprint(\"Prediction: {}\".format(prediction))\nfrom sklearn.model_selection import train_test_split\n\nx_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)\nknn = KNeighborsClassifier(n_neighbors=3)\nx, y = data.loc[:, data.columns != \"class\"], data.loc[:, \"class\"]\nknn.fit(x_train, y_train)\nprediction = knn.predict(x_test)\nprint(\"With KNN (K=3) accuracy is: \", knn.score(x_test, y_test))  # accuracy\nneig = np.arange(1, 25)\ntrain_accuracy = []\ntest_accuracy = []\nfor i, k in enumerate(neig):\n    knn = KNeighborsClassifier(n_neighbors=k)\n    knn.fit(x_train, y_train)\n    train_accuracy.append(knn.score(x_train, y_train))\n    test_accuracy.append(knn.score(x_test, y_test))\nplt.figure(figsize=[13, 8])\nplt.plot(neig, test_accuracy, label=\"Testing Accuracy\")\nplt.plot(neig, train_accuracy, label=\"Training Accuracy\")\nplt.legend()\nplt.title(\"-value VS Accuracy\")\nplt.xlabel(\"Number of Neighbors\")\nplt.ylabel(\"Accuracy\")\nplt.xticks(neig)\nplt.savefig(\"graph.png\")\nplt.show()\nprint(\n    \"Best accuracy is {} with K = {}\".format(\n        np.max(test_accuracy), 1 + test_accuracy.index(np.max(test_accuracy))\n    )\n)\ndata1 = data[data[\"class\"] == \"A\"]\nx = np.array(data1.loc[:, \"pelvic_incidence\"]).reshape(-1, 1)\ny = np.array(data1.loc[:, \"sacral_slope\"]).reshape(-1, 1)\nplt.figure(figsize=[10, 10])\nplt.scatter(x=x, y=y)\nplt.xlabel(\"pelvic_incidence\")\nplt.ylabel(\"sacral_slope\")\nplt.show()\nfrom sklearn.linear_model import LinearRegression\n\nreg = LinearRegression()\npredict_space = np.linspace(min(x), max(x)).reshape(-1, 1)\nreg.fit(x, y)\npredicted = reg.predict(predict_space)\nprint(\"R^2 score: \", reg.score(x, y))\nplt.plot(predict_space, predicted, color=\"black\", linewidth=3)\nplt.scatter(x=x, y=y)\nplt.xlabel(\"pelvic_incidence\")\nplt.ylabel(\"sacral_slope\")\nplt.show()\nfrom sklearn.model_selection import cross_val_score\n\nreg = LinearRegression()\nk = 5\ncv_result = cross_val_score(reg, x, y, cv=k)  # uses R^2 as score\nprint(\"CV Scores: \", cv_result)\nprint(\"CV scores average: \", np.sum(cv_result) / k)\nfrom sklearn.linear_model import Ridge\n\nx_train, x_test, y_train, y_test = train_test_split(x, y, random_state=2, test_size=0.3)\nridge = Ridge(alpha=0.1, normalize=True)\nridge.fit(x_train, y_train)\nridge_predict = ridge.predict(x_test)\nprint(\"Ridge score: \", ridge.score(x_test, y_test))\nfrom sklearn.linear_model import Lasso\n\nx = np.array(\n    data1.loc[\n        :,\n        [\n            \"pelvic_incidence\",\n            \"pelvic_tilt numeric\",\n            \"lumbar_lordosis_angle\",\n            \"pelvic_radius\",\n        ],\n    ]\n)\nx_train, x_test, y_train, y_test = train_test_split(x, y, random_state=3, test_size=0.3)\nlasso = Lasso(alpha=0.1, normalize=True)\nlasso.fit(x_train, y_train)\nridge_predict = lasso.predict(x_test)\nprint(\"Lasso score: \", lasso.score(x_test, y_test))\nprint(\"Lasso coefficients: \", lasso.coef_)\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import classification_report, confusion_matrix\n\nx, y = data.loc[:, data.columns != \"class\"], data.loc[:, \"class\"]\nx_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)\nrf = RandomForestClassifier(random_state=4)\nrf.fit(x_train, y_train)\ny_pred = rf.predict(x_test)\ncm = confusion_matrix(y_test, y_pred)\nprint(\"Confusion matrix: \\n\", cm)\nprint(\"Classification report: \\n\", classification_report(y_test, y_pred))\nsns.heatmap(cm, annot=True, fmt=\"d\")\nplt.show()\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.metrics import classification_report, confusion_matrix, roc_curve\n\ndata[\"class_binary\"] = [1 if i == \"Abnormal\" else 0 for i in data.loc[:, \"class\"]]\nx, y = (\n    data.loc[:, (data.columns != \"class\") & (data.columns != \"class_binary\")],\n    data.loc[:, \"class_binary\"],\n)\nx_train, x_test, y_train, y_test = train_test_split(\n    x, y, test_size=0.3, random_state=42\n)\nlogreg = LogisticRegression()\nlogreg.fit(x_train, y_train)\ny_pred_prob = logreg.predict_proba(x_test)[:, 1]\nfpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\nplt.plot([0, 1], [0, 1], \"k--\")\nplt.plot(fpr, tpr)\nplt.xlabel(\"False Positive Rate\")\nplt.ylabel(\"True Positive Rate\")\nplt.title(\"ROC\")\nplt.show()\nfrom sklearn.model_selection import GridSearchCV\n\ngrid = {\"n_neighbors\": np.arange(1, 50)}\nknn = KNeighborsClassifier()\nknn_cv = GridSearchCV(knn, grid, cv=3)  # GridSearchCV\nknn_cv.fit(x, y)  # Fit\nprint(\"Tuned hyperparameter k: {}\".format(knn_cv.best_params_))\nprint(\"Best score: {}\".format(knn_cv.best_score_))\nparam_grid = {\"C\": np.logspace(-3, 3, 7), \"penalty\": [\"l1\", \"l2\"]}\nx_train, x_test, y_train, y_test = train_test_split(\n    x, y, test_size=0.3, random_state=12\n)\nlogreg = LogisticRegression()\nlogreg_cv = GridSearchCV(logreg, param_grid, cv=3)\nlogreg_cv.fit(x_train, y_train)\nprint(\"Tuned hyperparameters : {}\".format(logreg_cv.best_params_))\nprint(\"Best Accuracy: {}\".format(logreg_cv.best_score_))\ndata = pd.read_csv(\"column_2C_weka.csv\")\ndf = pd.get_dummies(data)\ndf.head(10)\ndf.drop(\"class_Normal\", axis=1, inplace=True)\ndf.head(10)\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import SVC\n\nsteps = [(\"scalar\", StandardScaler()), (\"SVM\", SVC())]\npipeline = Pipeline(steps)\nparameters = {\"SVM__C\": [1, 10, 100], \"SVM__gamma\": [0.1, 0.01]}\nx_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)\ncv = GridSearchCV(pipeline, param_grid=parameters, cv=3)\ncv.fit(x_train, y_train)\ny_pred = cv.predict(x_test)\nprint(\"Accuracy: {}\".format(cv.score(x_test, y_test)))\nprint(\"Tuned Model Parameters: {}\".format(cv.best_params_))\ndata = pd.read_csv(\"column_2C_weka.csv\")\nplt.scatter(data[\"pelvic_radius\"], data[\"degree_spondylolisthesis\"])\nplt.xlabel(\"pelvic_radius\")\nplt.ylabel(\"degree_spondylolisthesis\")\nplt.show()\ndata2 = data.loc[:, [\"degree_spondylolisthesis\", \"pelvic_radius\"]]\nfrom sklearn.cluster import KMeans\n\nkmeans = KMeans(n_clusters=2)\nkmeans.fit(data2)\nlabels = kmeans.predict(data2)\nplt.scatter(data[\"pelvic_radius\"], data[\"degree_spondylolisthesis\"], c=labels)\nplt.xlabel(\"pelvic_radius\")\nplt.xlabel(\"degree_spondylolisthesis\")\nplt.show()\ndf = pd.DataFrame({\"labels\": labels, \"class\": data[\"class\"]})\nct = pd.crosstab(df[\"labels\"], df[\"class\"])\nprint(ct)\ninertia_list = np.empty(8)\nfor i in range(1, 8):\n    kmeans = KMeans(n_clusters=i)\n    kmeans.fit(data2)\n    inertia_list[i] = kmeans.inertia_\nplt.plot(range(0, 8), inertia_list, \"-o\")\nplt.xlabel(\"Number of cluster\")\nplt.ylabel(\"Inertia\")\nplt.show()\ndata = pd.read_csv(\"column_2C_weka.csv\")\ndata3 = data.drop(\"class\", axis=1)\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\nscalar = StandardScaler()\nkmeans = KMeans(n_clusters=2)\npipe = make_pipeline(scalar, kmeans)\npipe.fit(data3)\nlabels = pipe.predict(data3)\ndf = pd.DataFrame({\"labels\": labels, \"class\": data[\"class\"]})\nct = pd.crosstab(df[\"labels\"], df[\"class\"])\nprint(ct)\nfrom scipy.cluster.hierarchy import dendrogram, linkage\n\nmerg = linkage(data3.iloc[200:220, :], method=\"single\")\ndendrogram(merg, leaf_rotation=90, leaf_font_size=6)\nplt.show()\nfrom sklearn.manifold import TSNE\n\nmodel = TSNE(learning_rate=100)\ntransformed = model.fit_transform(data2)\nx = transformed[:, 0]\ny = transformed[:, 1]\nplt.scatter(x, y, c=color_list)\nplt.xlabel(\"pelvic_radius\")\nplt.xlabel(\"degree_spondylolisthesis\")\nplt.show()\nfrom sklearn.decomposition import PCA\n\nmodel = PCA()\nmodel.fit(data3)\ntransformed = model.transform(data3)\nprint(\"Principle components: \", model.components_)\nscaler = StandardScaler()\npca = PCA()\npipeline = make_pipeline(scaler, pca)\npipeline.fit(data3)\nplt.bar(range(pca.n_components_), pca.explained_variance_)\nplt.xlabel(\"PCA feature\")\nplt.ylabel(\"variance\")\nplt.show()\npca = PCA(n_components=2)\npca.fit(data3)\ntransformed = pca.transform(data3)\nx = transformed[:, 0]\ny = transformed[:, 1]\nplt.scatter(x, y, c=color_list)\nplt.show()\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle12.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nfrom collections import Counter\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport seaborn as sns\nfrom sklearn.discriminant_analysis import LinearDiscriminantAnalysis\nfrom sklearn.ensemble import (\n    AdaBoostClassifier,\n    ExtraTreesClassifier,\n    GradientBoostingClassifier,\n    RandomForestClassifier,\n    VotingClassifier,\n)\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import (\n    GridSearchCV,\n    StratifiedKFold,\n    cross_val_score,\n    learning_curve,\n)\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.tree import DecisionTreeClassifier\n\nimport modin.pandas as pd\n\nsns.set(style=\"white\", context=\"notebook\", palette=\"deep\")\ntrain = pd.read_csv(\"train.csv\")\ntest = pd.read_csv(\"test.csv\")\nIDtest = test[\"PassengerId\"]\n\n\ndef detect_outliers(df, n, features):\n    outlier_indices = []\n    for col in features:\n        Q1 = np.percentile(df[col], 25)\n        Q3 = np.percentile(df[col], 75)\n        IQR = Q3 - Q1\n        outlier_step = 1.5 * IQR\n        outlier_list_col = df[\n            (df[col] < Q1 - outlier_step) | (df[col] > Q3 + outlier_step)\n        ].index\n        outlier_indices.extend(outlier_list_col)\n    outlier_indices = Counter(outlier_indices)\n    multiple_outliers = [k for k, v in outlier_indices.items() if v > n]\n    return multiple_outliers\n\n\nOutliers_to_drop = detect_outliers(train, 2, [\"Age\", \"SibSp\", \"Parch\", \"Fare\"])\ntrain.loc[Outliers_to_drop]  # Show the outliers rows\ntrain = train.drop(Outliers_to_drop, axis=0).reset_index(drop=True)\ntrain_len = len(train)\ndataset = pd.concat(list_of_objs=[train, test], axis=0).reset_index(drop=True)\ndataset = dataset.fillna(np.nan)\ndataset.isnull().sum()\ntrain.info()\ntrain.isnull().sum()\ntrain.head()\ntrain.dtypes\ntrain.describe()\ng = sns.heatmap(\n    train[[\"Survived\", \"SibSp\", \"Parch\", \"Age\", \"Fare\"]].corr(),\n    annot=True,\n    fmt=\".2f\",\n    cmap=\"coolwarm\",\n)\ng = sns.factorplot(\n    x=\"SibSp\", y=\"Survived\", data=train, kind=\"bar\", size=6, palette=\"muted\"\n)\ng.despine(left=True)\ng = g.set_ylabels(\"survival probability\")\ng = sns.factorplot(\n    x=\"Parch\", y=\"Survived\", data=train, kind=\"bar\", size=6, palette=\"muted\"\n)\ng.despine(left=True)\ng = g.set_ylabels(\"survival probability\")\ndataset[\"Fare\"].isnull().sum()\ndataset[\"Fare\"] = dataset[\"Fare\"].fillna(dataset[\"Fare\"].median())\ng = sns.distplot(\n    dataset[\"Fare\"], color=\"m\", label=\"Skewness : %.2f\" % (dataset[\"Fare\"].skew())\n)\ng = g.legend(loc=\"best\")\ndataset[\"Fare\"] = dataset[\"Fare\"].map(lambda i: np.log(i) if i > 0 else 0)\ng = sns.distplot(\n    dataset[\"Fare\"], color=\"b\", label=\"Skewness : %.2f\" % (dataset[\"Fare\"].skew())\n)\ng = g.legend(loc=\"best\")\ng = sns.barplot(x=\"Sex\", y=\"Survived\", data=train)\ng = g.set_ylabel(\"Survival Probability\")\ntrain[[\"Sex\", \"Survived\"]].groupby(\"Sex\").mean()\ng = sns.factorplot(\n    x=\"Pclass\", y=\"Survived\", data=train, kind=\"bar\", size=6, palette=\"muted\"\n)\ng.despine(left=True)\ng = g.set_ylabels(\"survival probability\")\ng = sns.factorplot(\n    x=\"Pclass\", y=\"Survived\", hue=\"Sex\", data=train, size=6, kind=\"bar\", palette=\"muted\"\n)\ng.despine(left=True)\ng = g.set_ylabels(\"survival probability\")\ndataset[\"Embarked\"].isnull().sum()\ndataset[\"Embarked\"] = dataset[\"Embarked\"].fillna(\"S\")\ng = sns.factorplot(\n    x=\"Embarked\", y=\"Survived\", data=train, size=6, kind=\"bar\", palette=\"muted\"\n)\ng.despine(left=True)\ng = g.set_ylabels(\"survival probability\")\ng = sns.factorplot(\n    \"Pclass\", col=\"Embarked\", data=train, size=6, kind=\"count\", palette=\"muted\"\n)\ng.despine(left=True)\ng = g.set_ylabels(\"Count\")\ng = sns.factorplot(y=\"Age\", x=\"Sex\", data=dataset, kind=\"box\")\ng = sns.factorplot(y=\"Age\", x=\"Sex\", hue=\"Pclass\", data=dataset, kind=\"box\")\ng = sns.factorplot(y=\"Age\", x=\"Parch\", data=dataset, kind=\"box\")\ng = sns.factorplot(y=\"Age\", x=\"SibSp\", data=dataset, kind=\"box\")\ndataset[\"Sex\"] = dataset[\"Sex\"].map({\"male\": 0, \"female\": 1})\ng = sns.heatmap(\n    dataset[[\"Age\", \"Sex\", \"SibSp\", \"Parch\", \"Pclass\"]].corr(), cmap=\"BrBG\", annot=True\n)\nindex_NaN_age = list(dataset[\"Age\"][dataset[\"Age\"].isnull()].index)\nfor i in index_NaN_age:\n    age_med = dataset[\"Age\"].median()\n    age_pred = dataset[\"Age\"][\n        (\n            (dataset[\"SibSp\"] == dataset.iloc[i][\"SibSp\"])\n            & (dataset[\"Parch\"] == dataset.iloc[i][\"Parch\"])\n            & (dataset[\"Pclass\"] == dataset.iloc[i][\"Pclass\"])\n        )\n    ].median()\n    if not np.isnan(age_pred):\n        dataset[\"Age\"].iloc[i] = age_pred\n    else:\n        dataset[\"Age\"].iloc[i] = age_med\ng = sns.factorplot(x=\"Survived\", y=\"Age\", data=train, kind=\"box\")\ng = sns.factorplot(x=\"Survived\", y=\"Age\", data=train, kind=\"violin\")\ndataset[\"Name\"].head()\ndataset_title = [i.split(\",\")[1].split(\".\")[0].strip() for i in dataset[\"Name\"]]\ndataset[\"Title\"] = pd.Series(dataset_title)\ndataset[\"Title\"].head()\ng = sns.countplot(x=\"Title\", data=dataset)\ng = plt.setp(g.get_xticklabels(), rotation=45)\ndataset[\"Title\"] = dataset[\"Title\"].replace(\n    [\n        \"Lady\",\n        \"the Countess\",\n        \"Countess\",\n        \"Capt\",\n        \"Col\",\n        \"Don\",\n        \"Dr\",\n        \"Major\",\n        \"Rev\",\n        \"Sir\",\n        \"Jonkheer\",\n        \"Dona\",\n    ],\n    \"Rare\",\n)\ndataset[\"Title\"] = dataset[\"Title\"].map(\n    {\"Master\": 0, \"Miss\": 1, \"Ms\": 1, \"Mme\": 1, \"Mlle\": 1, \"Mrs\": 1, \"Mr\": 2, \"Rare\": 3}\n)\ndataset[\"Title\"] = dataset[\"Title\"].astype(int)\ng = sns.countplot(dataset[\"Title\"])\ng = g.set_xticklabels([\"Master\", \"Miss/Ms/Mme/Mlle/Mrs\", \"Mr\", \"Rare\"])\ng = sns.factorplot(x=\"Title\", y=\"Survived\", data=dataset, kind=\"bar\")\ng = g.set_xticklabels([\"Master\", \"Miss-Mrs\", \"Mr\", \"Rare\"])\ng = g.set_ylabels(\"survival probability\")\ndataset.drop(labels=[\"Name\"], axis=1, inplace=True)\ndataset[\"Fsize\"] = dataset[\"SibSp\"] + dataset[\"Parch\"] + 1\ng = sns.factorplot(x=\"Fsize\", y=\"Survived\", data=dataset)\ng = g.set_ylabels(\"Survival Probability\")\ndataset[\"Single\"] = dataset[\"Fsize\"].map(lambda s: 1 if s == 1 else 0)\ndataset[\"SmallF\"] = dataset[\"Fsize\"].map(lambda s: 1 if s == 2 else 0)\ndataset[\"MedF\"] = dataset[\"Fsize\"].map(lambda s: 1 if 3 <= s <= 4 else 0)\ndataset[\"LargeF\"] = dataset[\"Fsize\"].map(lambda s: 1 if s >= 5 else 0)\ng = sns.factorplot(x=\"Single\", y=\"Survived\", data=dataset, kind=\"bar\")\ng = g.set_ylabels(\"Survival Probability\")\ng = sns.factorplot(x=\"SmallF\", y=\"Survived\", data=dataset, kind=\"bar\")\ng = g.set_ylabels(\"Survival Probability\")\ng = sns.factorplot(x=\"MedF\", y=\"Survived\", data=dataset, kind=\"bar\")\ng = g.set_ylabels(\"Survival Probability\")\ng = sns.factorplot(x=\"LargeF\", y=\"Survived\", data=dataset, kind=\"bar\")\ng = g.set_ylabels(\"Survival Probability\")\ndataset = pd.get_dummies(dataset, columns=[\"Title\"])\ndataset = pd.get_dummies(dataset, columns=[\"Embarked\"], prefix=\"Em\")\ndataset.head()\ndataset[\"Cabin\"].head()\ndataset[\"Cabin\"].describe()\ndataset[\"Cabin\"].isnull().sum()\ndataset[\"Cabin\"][dataset[\"Cabin\"].notnull()].head()\ndataset[\"Cabin\"] = pd.Series(\n    [i[0] if not pd.isnull(i) else \"X\" for i in dataset[\"Cabin\"]]\n)\ng = sns.countplot(dataset[\"Cabin\"], order=[\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\", \"T\", \"X\"])\ng = sns.factorplot(\n    y=\"Survived\",\n    x=\"Cabin\",\n    data=dataset,\n    kind=\"bar\",\n    order=[\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\", \"T\", \"X\"],\n)\ng = g.set_ylabels(\"Survival Probability\")\ndataset = pd.get_dummies(dataset, columns=[\"Cabin\"], prefix=\"Cabin\")\ndataset[\"Ticket\"].head()\nTicket = []\nfor i in list(dataset.Ticket):\n    if not i.isdigit():\n        Ticket.append(\n            i.replace(\".\", \"\").replace(\"/\", \"\").strip().split(\" \")[0]\n        )  # Take prefix\n    else:\n        Ticket.append(\"X\")\ndataset[\"Ticket\"] = Ticket\ndataset[\"Ticket\"].head()\ndataset = pd.get_dummies(dataset, columns=[\"Ticket\"], prefix=\"T\")\ndataset[\"Pclass\"] = dataset[\"Pclass\"].astype(\"category\")\ndataset = pd.get_dummies(dataset, columns=[\"Pclass\"], prefix=\"Pc\")\ndataset.drop(labels=[\"PassengerId\"], axis=1, inplace=True)\ndataset.head()\ntrain = dataset[:train_len]\ntest = dataset[train_len:]\ntest.drop(labels=[\"Survived\"], axis=1, inplace=True)\ntrain[\"Survived\"] = train[\"Survived\"].astype(int)\nY_train = train[\"Survived\"]\nX_train = train.drop(labels=[\"Survived\"], axis=1)\nkfold = StratifiedKFold(n_splits=10)\nrandom_state = 2\nclassifiers = []\nclassifiers.append(SVC(random_state=random_state))\nclassifiers.append(DecisionTreeClassifier(random_state=random_state))\nclassifiers.append(\n    AdaBoostClassifier(\n        DecisionTreeClassifier(random_state=random_state),\n        random_state=random_state,\n        learning_rate=0.1,\n    )\n)\nclassifiers.append(RandomForestClassifier(random_state=random_state))\nclassifiers.append(ExtraTreesClassifier(random_state=random_state))\nclassifiers.append(GradientBoostingClassifier(random_state=random_state))\nclassifiers.append(MLPClassifier(random_state=random_state))\nclassifiers.append(KNeighborsClassifier())\nclassifiers.append(LogisticRegression(random_state=random_state))\nclassifiers.append(LinearDiscriminantAnalysis())\ncv_results = []\nfor classifier in classifiers:\n    cv_results.append(\n        cross_val_score(\n            classifier, X_train, y=Y_train, scoring=\"accuracy\", cv=kfold, n_jobs=4\n        )\n    )\ncv_means = []\ncv_std = []\nfor cv_result in cv_results:\n    cv_means.append(cv_result.mean())\n    cv_std.append(cv_result.std())\ncv_res = pd.DataFrame(\n    {\n        \"CrossValMeans\": cv_means,\n        \"CrossValerrors\": cv_std,\n        \"Algorithm\": [\n            \"SVC\",\n            \"DecisionTree\",\n            \"AdaBoost\",\n            \"RandomForest\",\n            \"ExtraTrees\",\n            \"GradientBoosting\",\n            \"MultipleLayerPerceptron\",\n            \"KNeighboors\",\n            \"LogisticRegression\",\n            \"LinearDiscriminantAnalysis\",\n        ],\n    }\n)\ng = sns.barplot(\n    \"CrossValMeans\",\n    \"Algorithm\",\n    data=cv_res,\n    palette=\"Set3\",\n    orient=\"h\",\n    **{\"xerr\": cv_std}\n)\ng.set_xlabel(\"Mean Accuracy\")\ng = g.set_title(\"Cross validation scores\")\nDTC = DecisionTreeClassifier()\nadaDTC = AdaBoostClassifier(DTC, random_state=7)\nada_param_grid = {\n    \"base_estimator__criterion\": [\"gini\", \"entropy\"],\n    \"base_estimator__splitter\": [\"best\", \"random\"],\n    \"algorithm\": [\"SAMME\", \"SAMME.R\"],\n    \"n_estimators\": [1, 2],\n    \"learning_rate\": [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 1.5],\n}\ngsadaDTC = GridSearchCV(\n    adaDTC, param_grid=ada_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1\n)\ngsadaDTC.fit(X_train, Y_train)\nada_best = gsadaDTC.best_estimator_\ngsadaDTC.best_score_\nExtC = ExtraTreesClassifier()\nex_param_grid = {\n    \"max_depth\": [None],\n    \"max_features\": [1, 3, 10],\n    \"min_samples_split\": [2, 3, 10],\n    \"min_samples_leaf\": [1, 3, 10],\n    \"bootstrap\": [False],\n    \"n_estimators\": [100, 300],\n    \"criterion\": [\"gini\"],\n}\ngsExtC = GridSearchCV(\n    ExtC, param_grid=ex_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1\n)\ngsExtC.fit(X_train, Y_train)\nExtC_best = gsExtC.best_estimator_\ngsExtC.best_score_\nRFC = RandomForestClassifier()\nrf_param_grid = {\n    \"max_depth\": [None],\n    \"max_features\": [1, 3, 10],\n    \"min_samples_split\": [2, 3, 10],\n    \"min_samples_leaf\": [1, 3, 10],\n    \"bootstrap\": [False],\n    \"n_estimators\": [100, 300],\n    \"criterion\": [\"gini\"],\n}\ngsRFC = GridSearchCV(\n    RFC, param_grid=rf_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1\n)\ngsRFC.fit(X_train, Y_train)\nRFC_best = gsRFC.best_estimator_\ngsRFC.best_score_\nGBC = GradientBoostingClassifier()\ngb_param_grid = {\n    \"loss\": [\"deviance\"],\n    \"n_estimators\": [100, 200, 300],\n    \"learning_rate\": [0.1, 0.05, 0.01],\n    \"max_depth\": [4, 8],\n    \"min_samples_leaf\": [100, 150],\n    \"max_features\": [0.3, 0.1],\n}\ngsGBC = GridSearchCV(\n    GBC, param_grid=gb_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1\n)\ngsGBC.fit(X_train, Y_train)\nGBC_best = gsGBC.best_estimator_\ngsGBC.best_score_\nSVMC = SVC(probability=True)\nsvc_param_grid = {\n    \"kernel\": [\"rbf\"],\n    \"gamma\": [0.001, 0.01, 0.1, 1],\n    \"C\": [1, 10, 50, 100, 200, 300, 1000],\n}\ngsSVMC = GridSearchCV(\n    SVMC, param_grid=svc_param_grid, cv=kfold, scoring=\"accuracy\", n_jobs=4, verbose=1\n)\ngsSVMC.fit(X_train, Y_train)\nSVMC_best = gsSVMC.best_estimator_\ngsSVMC.best_score_\n\n\ndef plot_learning_curve(\n    estimator,\n    title,\n    X,\n    y,\n    ylim=None,\n    cv=None,\n    n_jobs=-1,\n    train_sizes=np.linspace(0.1, 1.0, 5),\n):\n    \"\"\"Generate a simple plot of the test and training learning curve\"\"\"\n    plt.figure()\n    plt.title(title)\n    if ylim is not None:\n        plt.ylim(*ylim)\n    plt.xlabel(\"Training examples\")\n    plt.ylabel(\"Score\")\n    train_sizes, train_scores, test_scores = learning_curve(\n        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes\n    )\n    train_scores_mean = np.mean(train_scores, axis=1)\n    train_scores_std = np.std(train_scores, axis=1)\n    test_scores_mean = np.mean(test_scores, axis=1)\n    test_scores_std = np.std(test_scores, axis=1)\n    plt.grid()\n    plt.fill_between(\n        train_sizes,\n        train_scores_mean - train_scores_std,\n        train_scores_mean + train_scores_std,\n        alpha=0.1,\n        color=\"r\",\n    )\n    plt.fill_between(\n        train_sizes,\n        test_scores_mean - test_scores_std,\n        test_scores_mean + test_scores_std,\n        alpha=0.1,\n        color=\"g\",\n    )\n    plt.plot(train_sizes, train_scores_mean, \"o-\", color=\"r\", label=\"Training score\")\n    plt.plot(\n        train_sizes, test_scores_mean, \"o-\", color=\"g\", label=\"Cross-validation score\"\n    )\n    plt.legend(loc=\"best\")\n    return plt\n\n\ng = plot_learning_curve(\n    gsRFC.best_estimator_, \"RF mearning curves\", X_train, Y_train, cv=kfold\n)\ng = plot_learning_curve(\n    gsExtC.best_estimator_, \"ExtraTrees learning curves\", X_train, Y_train, cv=kfold\n)\ng = plot_learning_curve(\n    gsSVMC.best_estimator_, \"SVC learning curves\", X_train, Y_train, cv=kfold\n)\ng = plot_learning_curve(\n    gsadaDTC.best_estimator_, \"AdaBoost learning curves\", X_train, Y_train, cv=kfold\n)\ng = plot_learning_curve(\n    gsGBC.best_estimator_,\n    \"GradientBoosting learning curves\",\n    X_train,\n    Y_train,\n    cv=kfold,\n)\nnrows = ncols = 2\nfig, axes = plt.subplots(nrows=nrows, ncols=ncols, sharex=\"all\", figsize=(15, 15))\nnames_classifiers = [\n    (\"AdaBoosting\", ada_best),\n    (\"ExtraTrees\", ExtC_best),\n    (\"RandomForest\", RFC_best),\n    (\"GradientBoosting\", GBC_best),\n]\nnclassifier = 0\nfor row in range(nrows):\n    for col in range(ncols):\n        name = names_classifiers[nclassifier][0]\n        classifier = names_classifiers[nclassifier][1]\n        indices = np.argsort(classifier.feature_importances_)[::-1][:40]\n        g = sns.barplot(\n            y=X_train.columns[indices][:40],\n            x=classifier.feature_importances_[indices][:40],\n            orient=\"h\",\n            ax=axes[row][col],\n        )\n        g.set_xlabel(\"Relative importance\", fontsize=12)\n        g.set_ylabel(\"Features\", fontsize=12)\n        g.tick_params(labelsize=9)\n        g.set_title(name + \" feature importance\")\n        nclassifier += 1\ntest_Survived_RFC = pd.Series(RFC_best.predict(test), name=\"RFC\")\ntest_Survived_ExtC = pd.Series(ExtC_best.predict(test), name=\"ExtC\")\ntest_Survived_SVMC = pd.Series(SVMC_best.predict(test), name=\"SVC\")\ntest_Survived_AdaC = pd.Series(ada_best.predict(test), name=\"Ada\")\ntest_Survived_GBC = pd.Series(GBC_best.predict(test), name=\"GBC\")\nensemble_results = pd.concat(\n    [\n        test_Survived_RFC,\n        test_Survived_ExtC,\n        test_Survived_AdaC,\n        test_Survived_GBC,\n        test_Survived_SVMC,\n    ],\n    axis=1,\n)\ng = sns.heatmap(ensemble_results.corr(), annot=True)\nvotingC = VotingClassifier(\n    estimators=[\n        (\"rfc\", RFC_best),\n        (\"extc\", ExtC_best),\n        (\"svc\", SVMC_best),\n        (\"adac\", ada_best),\n        (\"gbc\", GBC_best),\n    ],\n    voting=\"soft\",\n    n_jobs=4,\n)\nvotingC = votingC.fit(X_train, Y_train)\ntest_Survived = pd.Series(votingC.predict(test), name=\"Survived\")\nresults = pd.concat([IDtest, test_Survived], axis=1)\nresults.to_csv(\"ensemble_python_voting.csv\", index=False)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle13.py",
    "content": "#!/usr/bin/env python\nimport matplotlib\n\nmatplotlib.use(\"PS\")\nimport warnings  # current version of seaborn generates a bunch of warnings that we'll ignore\n\nimport modin.pandas as pd\n\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\nsns.set(style=\"white\", color_codes=True)\niris = pd.read_csv(\"Iris.csv\")  # the iris dataset is now a Pandas DataFrame\niris.head()\niris[\"Species\"].value_counts()\niris.plot(kind=\"scatter\", x=\"SepalLengthCm\", y=\"SepalWidthCm\")\nsns.jointplot(x=\"SepalLengthCm\", y=\"SepalWidthCm\", data=iris, size=5)\nsns.FacetGrid(iris, hue=\"Species\", size=5).map(\n    plt.scatter, \"SepalLengthCm\", \"SepalWidthCm\"\n).add_legend()\nsns.boxplot(x=\"Species\", y=\"PetalLengthCm\", data=iris)\nax = sns.boxplot(x=\"Species\", y=\"PetalLengthCm\", data=iris)\nax = sns.stripplot(\n    x=\"Species\", y=\"PetalLengthCm\", data=iris, jitter=True, edgecolor=\"gray\"\n)\nsns.violinplot(x=\"Species\", y=\"PetalLengthCm\", data=iris, size=6)\nsns.FacetGrid(iris, hue=\"Species\", size=6).map(\n    sns.kdeplot, \"PetalLengthCm\"\n).add_legend()\niris.drop(\"Id\", axis=1).boxplot(by=\"Species\", figsize=(12, 6))\nfrom pandas.tools.plotting import andrews_curves\n\nandrews_curves(iris.drop(\"Id\", axis=1), \"Species\")\nfrom pandas.tools.plotting import parallel_coordinates\n\nparallel_coordinates(iris.drop(\"Id\", axis=1), \"Species\")\nfrom pandas.tools.plotting import radviz\n\nradviz(iris.drop(\"Id\", axis=1), \"Species\")\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle14.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\nimport modin.pandas as pd\n\nplt.style.use(\"fivethirtyeight\")\nimport warnings\n\nwarnings.filterwarnings(\"ignore\")\ndata = pd.read_csv(\"train.csv\")\ndata.head()\ndata.isnull().sum()  # checking for total null values\ndata.groupby([\"Sex\", \"Survived\"])[\"Survived\"].count()\nf, ax = plt.subplots(1, 2, figsize=(18, 8))\ndata[[\"Sex\", \"Survived\"]].groupby([\"Sex\"]).mean().plot.bar(ax=ax[0])\nax[0].set_title(\"Survived vs Sex\")\nsns.countplot(\"Sex\", hue=\"Survived\", data=data, ax=ax[1])\nax[1].set_title(\"Sex:Survived vs Dead\")\nplt.show()\npd.crosstab(data.Pclass, data.Survived, margins=True).style.background_gradient(\n    cmap=\"summer_r\"\n)\nf, ax = plt.subplots(1, 2, figsize=(18, 8))\ndata[\"Pclass\"].value_counts().plot.bar(\n    color=[\"#CD7F32\", \"#FFDF00\", \"#D3D3D3\"], ax=ax[0]\n)\nax[0].set_title(\"Number Of Passengers By Pclass\")\nax[0].set_ylabel(\"Count\")\nsns.countplot(\"Pclass\", hue=\"Survived\", data=data, ax=ax[1])\nax[1].set_title(\"Pclass:Survived vs Dead\")\nplt.show()\npd.crosstab(\n    [data.Sex, data.Survived], data.Pclass, margins=True\n).style.background_gradient(cmap=\"summer_r\")\nsns.factorplot(\"Pclass\", \"Survived\", hue=\"Sex\", data=data)\nplt.show()\nprint(\"Oldest Passenger was of:\", data[\"Age\"].max(), \"Years\")\nprint(\"Youngest Passenger was of:\", data[\"Age\"].min(), \"Years\")\nprint(\"Average Age on the ship:\", data[\"Age\"].mean(), \"Years\")\nf, ax = plt.subplots(1, 2, figsize=(18, 8))\nsns.violinplot(\"Pclass\", \"Age\", hue=\"Survived\", data=data, split=True, ax=ax[0])\nax[0].set_title(\"Pclass and Age vs Survived\")\nax[0].set_yticks(range(0, 110, 10))\nsns.violinplot(\"Sex\", \"Age\", hue=\"Survived\", data=data, split=True, ax=ax[1])\nax[1].set_title(\"Sex and Age vs Survived\")\nax[1].set_yticks(range(0, 110, 10))\nplt.show()\ndata[\"Initial\"] = 0\nfor i in data:\n    data[\"Initial\"] = data.Name.str.extract(\n        r\"([A-Za-z]+)\\.\"  # noqa: W605\n    )  # lets extract the Salutations\npd.crosstab(data.Initial, data.Sex).T.style.background_gradient(\n    cmap=\"summer_r\"\n)  # Checking the Initials with the Sex\ndata[\"Initial\"].replace(\n    [\n        \"Mlle\",\n        \"Mme\",\n        \"Ms\",\n        \"Dr\",\n        \"Major\",\n        \"Lady\",\n        \"Countess\",\n        \"Jonkheer\",\n        \"Col\",\n        \"Rev\",\n        \"Capt\",\n        \"Sir\",\n        \"Don\",\n    ],\n    [\n        \"Miss\",\n        \"Miss\",\n        \"Miss\",\n        \"Mr\",\n        \"Mr\",\n        \"Mrs\",\n        \"Mrs\",\n        \"Other\",\n        \"Other\",\n        \"Other\",\n        \"Mr\",\n        \"Mr\",\n        \"Mr\",\n    ],\n    inplace=True,\n)\ndata.groupby(\"Initial\")[\"Age\"].mean()  # lets check the average age by Initials\ndata.loc[(data.Age.isnull()) & (data.Initial == \"Mr\"), \"Age\"] = 33\ndata.loc[(data.Age.isnull()) & (data.Initial == \"Mrs\"), \"Age\"] = 36\ndata.loc[(data.Age.isnull()) & (data.Initial == \"Master\"), \"Age\"] = 5\ndata.loc[(data.Age.isnull()) & (data.Initial == \"Miss\"), \"Age\"] = 22\ndata.loc[(data.Age.isnull()) & (data.Initial == \"Other\"), \"Age\"] = 46\ndata.Age.isnull().any()  # So no null values left finally\nf, ax = plt.subplots(1, 2, figsize=(20, 10))\ndata[data[\"Survived\"] == 0].Age.plot.hist(\n    ax=ax[0], bins=20, edgecolor=\"black\", color=\"red\"\n)\nax[0].set_title(\"Survived= 0\")\nx1 = list(range(0, 85, 5))\nax[0].set_xticks(x1)\ndata[data[\"Survived\"] == 1].Age.plot.hist(\n    ax=ax[1], color=\"green\", bins=20, edgecolor=\"black\"\n)\nax[1].set_title(\"Survived= 1\")\nx2 = list(range(0, 85, 5))\nax[1].set_xticks(x2)\nplt.show()\nsns.factorplot(\"Pclass\", \"Survived\", col=\"Initial\", data=data)\nplt.show()\npd.crosstab(\n    [data.Embarked, data.Pclass], [data.Sex, data.Survived], margins=True\n).style.background_gradient(cmap=\"summer_r\")\nsns.factorplot(\"Embarked\", \"Survived\", data=data)\nfig = plt.gcf()\nfig.set_size_inches(5, 3)\nplt.show()\nf, ax = plt.subplots(2, 2, figsize=(20, 15))\nsns.countplot(\"Embarked\", data=data, ax=ax[0, 0])\nax[0, 0].set_title(\"No. Of Passengers Boarded\")\nsns.countplot(\"Embarked\", hue=\"Sex\", data=data, ax=ax[0, 1])\nax[0, 1].set_title(\"Male-Female Split for Embarked\")\nsns.countplot(\"Embarked\", hue=\"Survived\", data=data, ax=ax[1, 0])\nax[1, 0].set_title(\"Embarked vs Survived\")\nsns.countplot(\"Embarked\", hue=\"Pclass\", data=data, ax=ax[1, 1])\nax[1, 1].set_title(\"Embarked vs Pclass\")\nplt.subplots_adjust(wspace=0.2, hspace=0.5)\nplt.show()\nsns.factorplot(\"Pclass\", \"Survived\", hue=\"Sex\", col=\"Embarked\", data=data)\nplt.show()\ndata[\"Embarked\"].fillna(\"S\", inplace=True)\ndata.Embarked.isnull().any()  # Finally No NaN values\npd.crosstab([data.SibSp], data.Survived).style.background_gradient(cmap=\"summer_r\")\nf, ax = plt.subplots(1, 2, figsize=(20, 8))\nsns.barplot(\"SibSp\", \"Survived\", data=data, ax=ax[0])\nax[0].set_title(\"SibSp vs Survived\")\nsns.factorplot(\"SibSp\", \"Survived\", data=data, ax=ax[1])\nax[1].set_title(\"SibSp vs Survived\")\nplt.close(2)\nplt.show()\npd.crosstab(data.SibSp, data.Pclass).style.background_gradient(cmap=\"summer_r\")\npd.crosstab(data.Parch, data.Pclass).style.background_gradient(cmap=\"summer_r\")\nf, ax = plt.subplots(1, 2, figsize=(20, 8))\nsns.barplot(\"Parch\", \"Survived\", data=data, ax=ax[0])\nax[0].set_title(\"Parch vs Survived\")\nsns.factorplot(\"Parch\", \"Survived\", data=data, ax=ax[1])\nax[1].set_title(\"Parch vs Survived\")\nplt.close(2)\nplt.show()\nprint(\"Highest Fare was:\", data[\"Fare\"].max())\nprint(\"Lowest Fare was:\", data[\"Fare\"].min())\nprint(\"Average Fare was:\", data[\"Fare\"].mean())\nf, ax = plt.subplots(1, 3, figsize=(20, 8))\nsns.distplot(data[data[\"Pclass\"] == 1].Fare, ax=ax[0])\nax[0].set_title(\"Fares in Pclass 1\")\nsns.distplot(data[data[\"Pclass\"] == 2].Fare, ax=ax[1])\nax[1].set_title(\"Fares in Pclass 2\")\nsns.distplot(data[data[\"Pclass\"] == 3].Fare, ax=ax[2])\nax[2].set_title(\"Fares in Pclass 3\")\nplt.show()\nsns.heatmap(\n    data.corr(), annot=True, cmap=\"RdYlGn\", linewidths=0.2\n)  # data.corr()-->correlation matrix\nfig = plt.gcf()\nfig.set_size_inches(10, 8)\nplt.show()\ndata[\"Age_band\"] = 0\ndata.loc[data[\"Age\"] <= 16, \"Age_band\"] = 0\ndata.loc[(data[\"Age\"] > 16) & (data[\"Age\"] <= 32), \"Age_band\"] = 1\ndata.loc[(data[\"Age\"] > 32) & (data[\"Age\"] <= 48), \"Age_band\"] = 2\ndata.loc[(data[\"Age\"] > 48) & (data[\"Age\"] <= 64), \"Age_band\"] = 3\ndata.loc[data[\"Age\"] > 64, \"Age_band\"] = 4\ndata.head(2)\ndata[\"Age_band\"].value_counts().to_frame().style.background_gradient(\n    cmap=\"summer\"\n)  # checking the number of passenegers in each band\nsns.factorplot(\"Age_band\", \"Survived\", data=data, col=\"Pclass\")\nplt.show()\ndata[\"Family_Size\"] = 0\ndata[\"Family_Size\"] = data[\"Parch\"] + data[\"SibSp\"]  # family size\ndata[\"Alone\"] = 0\ndata.loc[data.Family_Size == 0, \"Alone\"] = 1  # Alone\nf, ax = plt.subplots(1, 2, figsize=(18, 6))\nsns.factorplot(\"Family_Size\", \"Survived\", data=data, ax=ax[0])\nax[0].set_title(\"Family_Size vs Survived\")\nsns.factorplot(\"Alone\", \"Survived\", data=data, ax=ax[1])\nax[1].set_title(\"Alone vs Survived\")\nplt.close(2)\nplt.close(3)\nplt.show()\nsns.factorplot(\"Alone\", \"Survived\", data=data, hue=\"Sex\", col=\"Pclass\")\nplt.show()\ndata[\"Fare_Range\"] = pd.qcut(data[\"Fare\"], 4)\ndata.groupby([\"Fare_Range\"])[\"Survived\"].mean().to_frame().style.background_gradient(\n    cmap=\"summer_r\"\n)\ndata[\"Fare_cat\"] = 0\ndata.loc[data[\"Fare\"] <= 7.91, \"Fare_cat\"] = 0\ndata.loc[(data[\"Fare\"] > 7.91) & (data[\"Fare\"] <= 14.454), \"Fare_cat\"] = 1\ndata.loc[(data[\"Fare\"] > 14.454) & (data[\"Fare\"] <= 31), \"Fare_cat\"] = 2\ndata.loc[(data[\"Fare\"] > 31) & (data[\"Fare\"] <= 513), \"Fare_cat\"] = 3\nsns.factorplot(\"Fare_cat\", \"Survived\", data=data, hue=\"Sex\")\nplt.show()\ndata[\"Sex\"].replace([\"male\", \"female\"], [0, 1], inplace=True)\ndata[\"Embarked\"].replace([\"S\", \"C\", \"Q\"], [0, 1, 2], inplace=True)\ndata[\"Initial\"].replace(\n    [\"Mr\", \"Mrs\", \"Miss\", \"Master\", \"Other\"], [0, 1, 2, 3, 4], inplace=True\n)\ndata.drop(\n    [\"Name\", \"Age\", \"Ticket\", \"Fare\", \"Cabin\", \"Fare_Range\", \"PassengerId\"],\n    axis=1,\n    inplace=True,\n)\nsns.heatmap(\n    data.corr(), annot=True, cmap=\"RdYlGn\", linewidths=0.2, annot_kws={\"size\": 20}\n)\nfig = plt.gcf()\nfig.set_size_inches(18, 15)\nplt.xticks(fontsize=14)\nplt.yticks(fontsize=14)\nplt.show()\nfrom sklearn import metrics  # accuracy measure\nfrom sklearn import svm  # support vector Machine\nfrom sklearn.ensemble import RandomForestClassifier  # Random Forest\nfrom sklearn.linear_model import LogisticRegression  # logistic regression\nfrom sklearn.metrics import confusion_matrix  # for confusion matrix\nfrom sklearn.model_selection import train_test_split  # training and testing data split\nfrom sklearn.naive_bayes import GaussianNB  # Naive bayes\nfrom sklearn.neighbors import KNeighborsClassifier  # KNN\nfrom sklearn.tree import DecisionTreeClassifier  # Decision Tree\n\ntrain, test = train_test_split(\n    data, test_size=0.3, random_state=0, stratify=data[\"Survived\"]\n)\ntrain_X = train[train.columns[1:]]\ntrain_Y = train[train.columns[:1]]\ntest_X = test[test.columns[1:]]\ntest_Y = test[test.columns[:1]]\nX = data[data.columns[1:]]\nY = data[\"Survived\"]\nmodel = svm.SVC(kernel=\"rbf\", C=1, gamma=0.1)\nmodel.fit(train_X, train_Y)\nprediction1 = model.predict(test_X)\nprint(\"Accuracy for rbf SVM is \", metrics.accuracy_score(prediction1, test_Y))\nmodel = svm.SVC(kernel=\"linear\", C=0.1, gamma=0.1)\nmodel.fit(train_X, train_Y)\nprediction2 = model.predict(test_X)\nprint(\"Accuracy for linear SVM is\", metrics.accuracy_score(prediction2, test_Y))\nmodel = LogisticRegression()\nmodel.fit(train_X, train_Y)\nprediction3 = model.predict(test_X)\nprint(\n    \"The accuracy of the Logistic Regression is\",\n    metrics.accuracy_score(prediction3, test_Y),\n)\nmodel = DecisionTreeClassifier()\nmodel.fit(train_X, train_Y)\nprediction4 = model.predict(test_X)\nprint(\n    \"The accuracy of the Decision Tree is\", metrics.accuracy_score(prediction4, test_Y)\n)\nmodel = KNeighborsClassifier()\nmodel.fit(train_X, train_Y)\nprediction5 = model.predict(test_X)\nprint(\"The accuracy of the KNN is\", metrics.accuracy_score(prediction5, test_Y))\na_index = list(range(1, 11))\na = pd.Series()\nx = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\nfor i in list(range(1, 11)):\n    model = KNeighborsClassifier(n_neighbors=i)\n    model.fit(train_X, train_Y)\n    prediction = model.predict(test_X)\n    a = a.append(pd.Series(metrics.accuracy_score(prediction, test_Y)))\nplt.plot(a_index, a)\nplt.xticks(x)\nfig = plt.gcf()\nfig.set_size_inches(12, 6)\nplt.show()\nprint(\n    \"Accuracies for different values of n are:\",\n    a.values,\n    \"with the max value as \",\n    a.values.max(),\n)\nmodel = GaussianNB()\nmodel.fit(train_X, train_Y)\nprediction6 = model.predict(test_X)\nprint(\"The accuracy of the NaiveBayes is\", metrics.accuracy_score(prediction6, test_Y))\nmodel = RandomForestClassifier(n_estimators=100)\nmodel.fit(train_X, train_Y)\nprediction7 = model.predict(test_X)\nprint(\n    \"The accuracy of the Random Forests is\", metrics.accuracy_score(prediction7, test_Y)\n)\nfrom sklearn.model_selection import KFold  # for K-fold cross validation\nfrom sklearn.model_selection import cross_val_predict  # prediction\nfrom sklearn.model_selection import cross_val_score  # score evaluation\n\nkfold = KFold(n_splits=10, random_state=22)  # k=10, split the data into 10 equal parts\nxyz = []\naccuracy = []\nstd = []\nclassifiers = [\n    \"Linear Svm\",\n    \"Radial Svm\",\n    \"Logistic Regression\",\n    \"KNN\",\n    \"Decision Tree\",\n    \"Naive Bayes\",\n    \"Random Forest\",\n]\nmodels = [\n    svm.SVC(kernel=\"linear\"),\n    svm.SVC(kernel=\"rbf\"),\n    LogisticRegression(),\n    KNeighborsClassifier(n_neighbors=9),\n    DecisionTreeClassifier(),\n    GaussianNB(),\n    RandomForestClassifier(n_estimators=100),\n]\nfor i in models:\n    model = i\n    cv_result = cross_val_score(model, X, Y, cv=kfold, scoring=\"accuracy\")\n    xyz.append(cv_result.mean())\n    std.append(cv_result.std())\n    accuracy.append(cv_result)\nnew_models_dataframe2 = pd.DataFrame({\"CV Mean\": xyz, \"Std\": std}, index=classifiers)\nnew_models_dataframe2\nplt.subplots(figsize=(12, 6))\nbox = pd.DataFrame(accuracy, index=[classifiers])\nbox.T.boxplot()\nnew_models_dataframe2[\"CV Mean\"].plot.barh(width=0.8)\nplt.title(\"Average CV Mean Accuracy\")\nfig = plt.gcf()\nfig.set_size_inches(8, 5)\nplt.show()\nf, ax = plt.subplots(3, 3, figsize=(12, 10))\ny_pred = cross_val_predict(svm.SVC(kernel=\"rbf\"), X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, y_pred), ax=ax[0, 0], annot=True, fmt=\"2.0f\")\nax[0, 0].set_title(\"Matrix for rbf-SVM\")\ny_pred = cross_val_predict(svm.SVC(kernel=\"linear\"), X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, y_pred), ax=ax[0, 1], annot=True, fmt=\"2.0f\")\nax[0, 1].set_title(\"Matrix for Linear-SVM\")\ny_pred = cross_val_predict(KNeighborsClassifier(n_neighbors=9), X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, y_pred), ax=ax[0, 2], annot=True, fmt=\"2.0f\")\nax[0, 2].set_title(\"Matrix for KNN\")\ny_pred = cross_val_predict(RandomForestClassifier(n_estimators=100), X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, y_pred), ax=ax[1, 0], annot=True, fmt=\"2.0f\")\nax[1, 0].set_title(\"Matrix for Random-Forests\")\ny_pred = cross_val_predict(LogisticRegression(), X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, y_pred), ax=ax[1, 1], annot=True, fmt=\"2.0f\")\nax[1, 1].set_title(\"Matrix for Logistic Regression\")\ny_pred = cross_val_predict(DecisionTreeClassifier(), X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, y_pred), ax=ax[1, 2], annot=True, fmt=\"2.0f\")\nax[1, 2].set_title(\"Matrix for Decision Tree\")\ny_pred = cross_val_predict(GaussianNB(), X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, y_pred), ax=ax[2, 0], annot=True, fmt=\"2.0f\")\nax[2, 0].set_title(\"Matrix for Naive Bayes\")\nplt.subplots_adjust(hspace=0.2, wspace=0.2)\nplt.show()\nfrom sklearn.model_selection import GridSearchCV\n\nC = [0.05, 0.1, 0.2, 0.3, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]\ngamma = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]\nkernel = [\"rbf\", \"linear\"]\nhyper = {\"kernel\": kernel, \"C\": C, \"gamma\": gamma}\ngd = GridSearchCV(estimator=svm.SVC(), param_grid=hyper, verbose=True)\ngd.fit(X, Y)\nprint(gd.best_score_)\nprint(gd.best_estimator_)\nn_estimators = range(100, 1000, 100)\nhyper = {\"n_estimators\": n_estimators}\ngd = GridSearchCV(\n    estimator=RandomForestClassifier(random_state=0), param_grid=hyper, verbose=True\n)\ngd.fit(X, Y)\nprint(gd.best_score_)\nprint(gd.best_estimator_)\nfrom sklearn.ensemble import VotingClassifier\n\nensemble_lin_rbf = VotingClassifier(\n    estimators=[\n        (\"KNN\", KNeighborsClassifier(n_neighbors=10)),\n        (\"RBF\", svm.SVC(probability=True, kernel=\"rbf\", C=0.5, gamma=0.1)),\n        (\"RFor\", RandomForestClassifier(n_estimators=500, random_state=0)),\n        (\"LR\", LogisticRegression(C=0.05)),\n        (\"DT\", DecisionTreeClassifier(random_state=0)),\n        (\"NB\", GaussianNB()),\n        (\"svm\", svm.SVC(kernel=\"linear\", probability=True)),\n    ],\n    voting=\"soft\",\n).fit(train_X, train_Y)\nprint(\"The accuracy for ensembled model is:\", ensemble_lin_rbf.score(test_X, test_Y))\ncross = cross_val_score(ensemble_lin_rbf, X, Y, cv=10, scoring=\"accuracy\")\nprint(\"The cross validated score is\", cross.mean())\nfrom sklearn.ensemble import BaggingClassifier\n\nmodel = BaggingClassifier(\n    base_estimator=KNeighborsClassifier(n_neighbors=3), random_state=0, n_estimators=700\n)\nmodel.fit(train_X, train_Y)\nprediction = model.predict(test_X)\nprint(\"The accuracy for bagged KNN is:\", metrics.accuracy_score(prediction, test_Y))\nresult = cross_val_score(model, X, Y, cv=10, scoring=\"accuracy\")\nprint(\"The cross validated score for bagged KNN is:\", result.mean())\nmodel = BaggingClassifier(\n    base_estimator=DecisionTreeClassifier(), random_state=0, n_estimators=100\n)\nmodel.fit(train_X, train_Y)\nprediction = model.predict(test_X)\nprint(\n    \"The accuracy for bagged Decision Tree is:\",\n    metrics.accuracy_score(prediction, test_Y),\n)\nresult = cross_val_score(model, X, Y, cv=10, scoring=\"accuracy\")\nprint(\"The cross validated score for bagged Decision Tree is:\", result.mean())\nfrom sklearn.ensemble import AdaBoostClassifier\n\nada = AdaBoostClassifier(n_estimators=200, random_state=0, learning_rate=0.1)\nresult = cross_val_score(ada, X, Y, cv=10, scoring=\"accuracy\")\nprint(\"The cross validated score for AdaBoost is:\", result.mean())\nfrom sklearn.ensemble import GradientBoostingClassifier\n\ngrad = GradientBoostingClassifier(n_estimators=500, random_state=0, learning_rate=0.1)\nresult = cross_val_score(grad, X, Y, cv=10, scoring=\"accuracy\")\nprint(\"The cross validated score for Gradient Boosting is:\", result.mean())\nimport xgboost as xg\n\nxgboost = xg.XGBClassifier(n_estimators=900, learning_rate=0.1)\nresult = cross_val_score(xgboost, X, Y, cv=10, scoring=\"accuracy\")\nprint(\"The cross validated score for XGBoost is:\", result.mean())\nn_estimators = list(range(100, 1100, 100))\nlearn_rate = [0.05, 0.1, 0.2, 0.3, 0.25, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]\nhyper = {\"n_estimators\": n_estimators, \"learning_rate\": learn_rate}\ngd = GridSearchCV(estimator=AdaBoostClassifier(), param_grid=hyper, verbose=True)\ngd.fit(X, Y)\nprint(gd.best_score_)\nprint(gd.best_estimator_)\nada = AdaBoostClassifier(n_estimators=200, random_state=0, learning_rate=0.05)\nresult = cross_val_predict(ada, X, Y, cv=10)\nsns.heatmap(confusion_matrix(Y, result), cmap=\"winter\", annot=True, fmt=\"2.0f\")\nplt.show()\nf, ax = plt.subplots(2, 2, figsize=(15, 12))\nmodel = RandomForestClassifier(n_estimators=500, random_state=0)\nmodel.fit(X, Y)\npd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(\n    width=0.8, ax=ax[0, 0]\n)\nax[0, 0].set_title(\"Feature Importance in Random Forests\")\nmodel = AdaBoostClassifier(n_estimators=200, learning_rate=0.05, random_state=0)\nmodel.fit(X, Y)\npd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(\n    width=0.8, ax=ax[0, 1], color=\"#ddff11\"\n)\nax[0, 1].set_title(\"Feature Importance in AdaBoost\")\nmodel = GradientBoostingClassifier(n_estimators=500, learning_rate=0.1, random_state=0)\nmodel.fit(X, Y)\npd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(\n    width=0.8, ax=ax[1, 0], cmap=\"RdYlGn_r\"\n)\nax[1, 0].set_title(\"Feature Importance in Gradient Boosting\")\nmodel = xg.XGBClassifier(n_estimators=900, learning_rate=0.1)\nmodel.fit(X, Y)\npd.Series(model.feature_importances_, X.columns).sort_values(ascending=True).plot.barh(\n    width=0.8, ax=ax[1, 1], color=\"#FD0F00\"\n)\nax[1, 1].set_title(\"Feature Importance in XgBoost\")\nplt.show()\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle17.py",
    "content": "import modin.pandas as pd\n\nmelbourne_file_path = \"melb_data.csv\"\nmelbourne_data = pd.read_csv(melbourne_file_path)\nprint(melbourne_data.columns)\nmelbourne_price_data = melbourne_data.Price\nprint(melbourne_price_data.head())\ncolumns_of_interest = [\"Landsize\", \"BuildingArea\"]\ntwo_columns_of_data = melbourne_data[columns_of_interest]\ntwo_columns_of_data.describe()\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle18.py",
    "content": "#!/usr/bin/env python  # noqa: E902\nimport matplotlib\n\nmatplotlib.use(\"PS\")\nimport re\nimport string\n\nimport matplotlib.pyplot as plt\nimport nltk\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nsns.set(style=\"white\")\nimport warnings\nfrom collections import Counter\n\nimport bokeh.plotting as bp\nimport plotly.graph_objs as go\nimport plotly.offline as py\nfrom bokeh.models import HoverTool  # BoxSelectTool\nfrom bokeh.models import ColumnDataSource\nfrom bokeh.plotting import output_notebook, show  # figure\nfrom nltk.corpus import stopwords\nfrom nltk.tokenize import sent_tokenize, word_tokenize\nfrom sklearn.decomposition import LatentDirichletAllocation\nfrom sklearn.feature_extraction import stop_words\nfrom sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\nfrom wordcloud import WordCloud\n\nwarnings.filterwarnings(\"ignore\")\nimport logging\n\nlogging.getLogger(\"lda\").setLevel(logging.WARNING)\nnltk.download(\"punkt\")\nnltk.download(\"stopwords\")\ntrain = pd.read_csv(\"train.csv\")\ntest = pd.read_csv(\"test.csv\")\nprint(train.shape)\nprint(test.shape)\ntrain.dtypes\ntrain.head()\ntrain.price.describe()\nplt.subplot(1, 2, 1)\n(train[\"price\"]).plot.hist(bins=50, figsize=(20, 10), edgecolor=\"white\", range=[0, 250])\nplt.xlabel(\"price+\", fontsize=17)\nplt.ylabel(\"frequency\", fontsize=17)\nplt.tick_params(labelsize=15)\nplt.title(\"Price Distribution - Training Set\", fontsize=17)\nplt.subplot(1, 2, 2)\nnp.log(train[\"price\"] + 1).plot.hist(bins=50, figsize=(20, 10), edgecolor=\"white\")\nplt.xlabel(\"log(price+1)\", fontsize=17)\nplt.ylabel(\"frequency\", fontsize=17)\nplt.tick_params(labelsize=15)\nplt.title(\"Log(Price) Distribution - Training Set\", fontsize=17)\nplt.show()\ntrain.shipping.value_counts() / len(train)\nprc_shipBySeller = train.loc[train.shipping == 1, \"price\"]\nprc_shipByBuyer = train.loc[train.shipping == 0, \"price\"]\nfig, ax = plt.subplots(figsize=(20, 10))\nax.hist(\n    np.log(prc_shipBySeller + 1),\n    color=\"#8CB4E1\",\n    alpha=1.0,\n    bins=50,\n    label=\"Price when Seller pays Shipping\",\n)\nax.hist(\n    np.log(prc_shipByBuyer + 1),\n    color=\"#007D00\",\n    alpha=0.7,\n    bins=50,\n    label=\"Price when Buyer pays Shipping\",\n)\nax.set(title=\"Histogram Comparison\", ylabel=\"% of Dataset in Bin\")\nplt.xlabel(\"log(price+1)\", fontsize=17)\nplt.ylabel(\"frequency\", fontsize=17)\nplt.title(\"Price Distribution by Shipping Type\", fontsize=17)\nplt.tick_params(labelsize=15)\nplt.show()\nprint(\n    \"There are %d unique values in the category column.\"\n    % train[\"category_name\"].nunique()\n)\ntrain[\"category_name\"].value_counts()[:5]\nprint(\n    \"There are %d items that do not have a label.\"\n    % train[\"category_name\"].isnull().sum()\n)\n\n\ndef split_cat(text):\n    try:\n        return text.split(\"/\")\n    except Exception:\n        return (\"No Label\", \"No Label\", \"No Label\")\n\n\ntrain[\"general_cat\"], train[\"subcat_1\"], train[\"subcat_2\"] = zip(\n    *train[\"category_name\"].apply(lambda x: split_cat(x))\n)\ntrain.head()\ntest[\"general_cat\"], test[\"subcat_1\"], test[\"subcat_2\"] = zip(\n    *test[\"category_name\"].apply(lambda x: split_cat(x))\n)\nprint(\"There are %d unique first sub-categories.\" % train[\"subcat_1\"].nunique())\nprint(\"There are %d unique second sub-categories.\" % train[\"subcat_2\"].nunique())\nx = train[\"general_cat\"].value_counts().index.values.astype(\"str\")\ny = train[\"general_cat\"].value_counts().values\npct = [(\"%.2f\" % (v * 100)) + \"%\" for v in (y / len(train))]\ntrace1 = go.Bar(x=x, y=y, text=pct)\nlayout = {\n    \"title\": \"Number of Items by Main Category\",\n    \"yaxis\": {\"title\": \"Count\"},\n    \"xaxis\": {\"title\": \"Category\"},\n}\nfig = {\"data\": [trace1], \"layout\": layout}\npy.iplot(fig)\nx = train[\"subcat_1\"].value_counts().index.values.astype(\"str\")[:15]\ny = train[\"subcat_1\"].value_counts().values[:15]\npct = [(\"%.2f\" % (v * 100)) + \"%\" for v in (y / len(train))][:15]\ntrace1 = go.Bar(\n    x=x,\n    y=y,\n    text=pct,\n    marker={\n        \"color\": y,\n        \"colorscale\": \"Portland\",\n        \"showscale\": True,\n        \"reversescale\": False,\n    },\n)\nlayout = {\n    \"title\": \"Number of Items by Sub Category (Top 15)\",\n    \"yaxis\": {\"title\": \"Count\"},\n    \"xaxis\": {\"title\": \"SubCategory\"},\n}\nfig = {\"data\": [trace1], \"layout\": layout}\npy.iplot(fig)\ngeneral_cats = train[\"general_cat\"].unique()\nx = [train.loc[train[\"general_cat\"] == cat, \"price\"] for cat in general_cats]\ndata = [\n    go.Box(x=np.log(x[i] + 1), name=general_cats[i]) for i in range(len(general_cats))\n]\nlayout = {\n    \"title\": \"Price Distribution by General Category\",\n    \"yaxis\": {\"title\": \"Frequency\"},\n    \"xaxis\": {\"title\": \"Category\"},\n}\nfig = {\"data\": data, \"layout\": layout}\npy.iplot(fig)\nprint(\n    \"There are %d unique brand names in the training dataset.\"\n    % train[\"brand_name\"].nunique()\n)\nx = train[\"brand_name\"].value_counts().index.values.astype(\"str\")[:10]\ny = train[\"brand_name\"].value_counts().values[:10]\n\n\ndef wordCount(text):\n    try:\n        text = text.lower()\n        regex = re.compile(\"[\" + re.escape(string.punctuation) + \"0-9\\\\r\\\\t\\\\n]\")\n        txt = regex.sub(\" \", text)\n        words = [\n            w\n            for w in txt.split(\" \")\n            if w not in stop_words.ENGLISH_STOP_WORDS and len(w) > 3\n        ]\n        return len(words)\n    except Exception:\n        return 0\n\n\ntrain[\"desc_len\"] = train[\"item_description\"].apply(lambda x: wordCount(x))\ntest[\"desc_len\"] = test[\"item_description\"].apply(lambda x: wordCount(x))\ntrain.head()\ndf = train.groupby(\"desc_len\")[\"price\"].mean().reset_index()\ntrace1 = go.Scatter(\n    x=df[\"desc_len\"],\n    y=np.log(df[\"price\"] + 1),\n    mode=\"lines+markers\",\n    name=\"lines+markers\",\n)\nlayout = {\n    \"title\": \"Average Log(Price) by Description Length\",\n    \"yaxis\": {\"title\": \"Average Log(Price)\"},\n    \"xaxis\": {\"title\": \"Description Length\"},\n}\nfig = {\"data\": [trace1], \"layout\": layout}\npy.iplot(fig)\ntrain.item_description.isnull().sum()\ntrain = train[pd.notnull(train[\"item_description\"])]\nstop = set(stopwords.words(\"english\"))\n\n\ndef tokenize(text):\n    \"\"\"\n    sent_tokenize(): segment text into sentences\n    word_tokenize(): break sentences into words\n    \"\"\"\n    try:\n        regex = re.compile(\"[\" + re.escape(string.punctuation) + \"0-9\\\\r\\\\t\\\\n]\")\n        text = regex.sub(\" \", text)  # remove punctuation\n        tokens_ = [word_tokenize(s) for s in sent_tokenize(text)]\n        tokens = []\n        for token_by_sent in tokens_:\n            tokens += token_by_sent\n        tokens = list(filter(lambda t: t.lower() not in stop, tokens))\n        filtered_tokens = [w for w in tokens if re.search(\"[a-zA-Z]\", w)]\n        filtered_tokens = [w.lower() for w in filtered_tokens if len(w) >= 3]\n        return filtered_tokens\n    except TypeError as err:\n        print(text, err)\n\n\ncat_desc = {}\nfor cat in general_cats:\n    text = \" \".join(train.loc[train[\"general_cat\"] == cat, \"item_description\"].values)\n    cat_desc[cat] = tokenize(text)\nflat_lst = [item for sublist in list(cat_desc.values()) for item in sublist]\nallWordsCount = Counter(flat_lst)\nall_top10 = allWordsCount.most_common(20)\nx = [w[0] for w in all_top10]\ny = [w[1] for w in all_top10]\ntrace1 = go.Bar(x=x, y=y, text=pct)\nlayout = {\n    \"title\": \"Word Frequency\",\n    \"yaxis\": {\"title\": \"Count\"},\n    \"xaxis\": {\"title\": \"Word\"},\n}\nfig = {\"data\": [trace1], \"layout\": layout}\npy.iplot(fig)\nstop = set(stopwords.words(\"english\"))\n\n\ndef tokenize(text):\n    try:\n        regex = re.compile(\"[\" + re.escape(string.punctuation) + \"0-9\\\\r\\\\t\\\\n]\")\n        text = regex.sub(\" \", text)  # remove punctuation\n        tokens_ = [word_tokenize(s) for s in sent_tokenize(text)]\n        tokens = []\n        for token_by_sent in tokens_:\n            tokens += token_by_sent\n        tokens = list(filter(lambda t: t.lower() not in stop, tokens))\n        filtered_tokens = [w for w in tokens if re.search(\"[a-zA-Z]\", w)]\n        filtered_tokens = [w.lower() for w in filtered_tokens if len(w) >= 3]\n        return filtered_tokens\n    except TypeError as err:\n        print(text, err)\n\n\ntrain[\"tokens\"] = train[\"item_description\"].map(tokenize)\ntest[\"tokens\"] = test[\"item_description\"].map(tokenize)\ntrain.reset_index(drop=True, inplace=True)\ntest.reset_index(drop=True, inplace=True)\nfor description, tokens in zip(\n    train[\"item_description\"].head(), train[\"tokens\"].head()\n):\n    print(\"description:\", description)\n    print(\"tokens:\", tokens)\n    print()\ncat_desc = {}\nfor cat in general_cats:\n    text = \" \".join(train.loc[train[\"general_cat\"] == cat, \"item_description\"].values)\n    cat_desc[cat] = tokenize(text)\nimport sys\n\nsys.exit()\nwomen100 = Counter(cat_desc[\"Women\"]).most_common(100)\nbeauty100 = Counter(cat_desc[\"Beauty\"]).most_common(100)\nkids100 = Counter(cat_desc[\"Kids\"]).most_common(100)\nelectronics100 = Counter(cat_desc[\"Electronics\"]).most_common(100)\n\n\ndef generate_wordcloud(tup):\n    wordcloud = WordCloud(\n        background_color=\"white\", max_words=50, max_font_size=40, random_state=42\n    ).generate(str(tup))\n    return wordcloud\n\n\nfig, axes = plt.subplots(2, 2, figsize=(30, 15))\nax = axes[0, 0]\nax.imshow(generate_wordcloud(women100), interpolation=\"bilinear\")\nax.axis(\"off\")\nax.set_title(\"Women Top 100\", fontsize=30)\nax = axes[0, 1]\nax.imshow(generate_wordcloud(beauty100))\nax.axis(\"off\")\nax.set_title(\"Beauty Top 100\", fontsize=30)\nax = axes[1, 0]\nax.imshow(generate_wordcloud(kids100))\nax.axis(\"off\")\nax.set_title(\"Kids Top 100\", fontsize=30)\nax = axes[1, 1]\nax.imshow(generate_wordcloud(electronics100))\nax.axis(\"off\")\nax.set_title(\"Electronic Top 100\", fontsize=30)\nvectorizer = TfidfVectorizer(\n    min_df=10, max_features=180000, tokenizer=tokenize, ngram_range=(1, 2)\n)\nall_desc = np.append(train[\"item_description\"].values, test[\"item_description\"].values)\nvz = vectorizer.fit_transform(list(all_desc))\ntfidf = dict(zip(vectorizer.get_feature_names(), vectorizer.idf_))\ntfidf = pd.DataFrame(columns=[\"tfidf\"]).from_dict(dict(tfidf), orient=\"index\")\ntfidf.columns = [\"tfidf\"]\ntfidf.sort_values(by=[\"tfidf\"], ascending=True).head(10)\ntfidf.sort_values(by=[\"tfidf\"], ascending=False).head(10)\ntrn = train.copy()\ntst = test.copy()\ntrn[\"is_train\"] = 1\ntst[\"is_train\"] = 0\nsample_sz = 15000\ncombined_df = pd.concat([trn, tst])\ncombined_sample = combined_df.sample(n=sample_sz)\nvz_sample = vectorizer.fit_transform(list(combined_sample[\"item_description\"]))\nfrom sklearn.decomposition import TruncatedSVD\n\nn_comp = 30\nsvd = TruncatedSVD(n_components=n_comp, random_state=42)\nsvd_tfidf = svd.fit_transform(vz_sample)\nfrom sklearn.manifold import TSNE\n\ntsne_model = TSNE(n_components=2, verbose=1, random_state=42, n_iter=500)\ntsne_tfidf = tsne_model.fit_transform(svd_tfidf)\noutput_notebook()\nplot_tfidf = bp.figure(\n    plot_width=700,\n    plot_height=600,\n    title=\"tf-idf clustering of the item description\",\n    tools=\"pan,wheel_zoom,box_zoom,reset,hover,previewsave\",\n    x_axis_type=None,\n    y_axis_type=None,\n    min_border=1,\n)\ncombined_sample.reset_index(inplace=True, drop=True)\ntfidf_df = pd.DataFrame(tsne_tfidf, columns=[\"x\", \"y\"])\ntfidf_df[\"description\"] = combined_sample[\"item_description\"]\ntfidf_df[\"tokens\"] = combined_sample[\"tokens\"]\ntfidf_df[\"category\"] = combined_sample[\"general_cat\"]\nplot_tfidf.scatter(x=\"x\", y=\"y\", source=tfidf_df, alpha=0.7)\nhover = plot_tfidf.select({\"type\": HoverTool})\nhover.tooltips = {\n    \"description\": \"@description\",\n    \"tokens\": \"@tokens\",\n    \"category\": \"@category\",\n}\nshow(plot_tfidf)\nfrom sklearn.cluster import MiniBatchKMeans\n\nnum_clusters = 30  # need to be selected wisely\nkmeans_model = MiniBatchKMeans(\n    n_clusters=num_clusters,\n    init=\"k-means++\",\n    n_init=1,\n    init_size=1000,\n    batch_size=1000,\n    verbose=0,\n    max_iter=1000,\n)\nkmeans = kmeans_model.fit(vz)\nkmeans_clusters = kmeans.predict(vz)\nkmeans_distances = kmeans.transform(vz)\nsorted_centroids = kmeans.cluster_centers_.argsort()[:, ::-1]\nterms = vectorizer.get_feature_names()\nfor i in range(num_clusters):\n    print(\"Cluster %d:\" % i)\n    aux = \"\"\n    for j in sorted_centroids[i, :10]:\n        aux += terms[j] + \" | \"\n    print(aux)\n    print()\nkmeans = kmeans_model.fit(vz_sample)\nkmeans_clusters = kmeans.predict(vz_sample)\nkmeans_distances = kmeans.transform(vz_sample)\ntsne_kmeans = tsne_model.fit_transform(kmeans_distances)\ncolormap = np.array(\n    [\n        \"#6d8dca\",\n        \"#69de53\",\n        \"#723bca\",\n        \"#c3e14c\",\n        \"#c84dc9\",\n        \"#68af4e\",\n        \"#6e6cd5\",\n        \"#e3be38\",\n        \"#4e2d7c\",\n        \"#5fdfa8\",\n        \"#d34690\",\n        \"#3f6d31\",\n        \"#d44427\",\n        \"#7fcdd8\",\n        \"#cb4053\",\n        \"#5e9981\",\n        \"#803a62\",\n        \"#9b9e39\",\n        \"#c88cca\",\n        \"#e1c37b\",\n        \"#34223b\",\n        \"#bdd8a3\",\n        \"#6e3326\",\n        \"#cfbdce\",\n        \"#d07d3c\",\n        \"#52697d\",\n        \"#194196\",\n        \"#d27c88\",\n        \"#36422b\",\n        \"#b68f79\",\n    ]\n)\nkmeans_df = pd.DataFrame(tsne_kmeans, columns=[\"x\", \"y\"])\nkmeans_df[\"cluster\"] = kmeans_clusters\nkmeans_df[\"description\"] = combined_sample[\"item_description\"]\nkmeans_df[\"category\"] = combined_sample[\"general_cat\"]\nplot_kmeans = bp.figure(\n    plot_width=700,\n    plot_height=600,\n    title=\"KMeans clustering of the description\",\n    tools=\"pan,wheel_zoom,box_zoom,reset,hover,previewsave\",\n    x_axis_type=None,\n    y_axis_type=None,\n    min_border=1,\n)\nsource = ColumnDataSource(\n    data={\n        \"x\": kmeans_df[\"x\"],\n        \"y\": kmeans_df[\"y\"],\n        \"color\": colormap[kmeans_clusters],\n        \"description\": kmeans_df[\"description\"],\n        \"category\": kmeans_df[\"category\"],\n        \"cluster\": kmeans_df[\"cluster\"],\n    }\n)\nplot_kmeans.scatter(x=\"x\", y=\"y\", color=\"color\", source=source)\nhover = plot_kmeans.select({\"type\": HoverTool})\nhover.tooltips = {\n    \"description\": \"@description\",\n    \"category\": \"@category\",\n    \"cluster\": \"@cluster\",\n}\nshow(plot_kmeans)\ncvectorizer = CountVectorizer(\n    min_df=4, max_features=180000, tokenizer=tokenize, ngram_range=(1, 2)\n)\ncvz = cvectorizer.fit_transform(combined_sample[\"item_description\"])\nlda_model = LatentDirichletAllocation(\n    n_components=20, learning_method=\"online\", max_iter=20, random_state=42\n)\nX_topics = lda_model.fit_transform(cvz)\nn_top_words = 10\ntopic_summaries = []\ntopic_word = lda_model.components_  # get the topic words\nvocab = cvectorizer.get_feature_names()\nfor i, topic_dist in enumerate(topic_word):\n    topic_words = np.array(vocab)[np.argsort(topic_dist)][: -(n_top_words + 1) : -1]\n    topic_summaries.append(\" \".join(topic_words))\n    print(\"Topic {}: {}\".format(i, \" | \".join(topic_words)))\ntsne_lda = tsne_model.fit_transform(X_topics)\nunnormalized = np.matrix(X_topics)\ndoc_topic = unnormalized / unnormalized.sum(axis=1)\nlda_keys = []\nfor i, tweet in enumerate(combined_sample[\"item_description\"]):\n    lda_keys += [doc_topic[i].argmax()]\nlda_df = pd.DataFrame(tsne_lda, columns=[\"x\", \"y\"])\nlda_df[\"description\"] = combined_sample[\"item_description\"]\nlda_df[\"category\"] = combined_sample[\"general_cat\"]\nlda_df[\"topic\"] = lda_keys\nlda_df[\"topic\"] = lda_df[\"topic\"].map(int)\nplot_lda = bp.figure(\n    plot_width=700,\n    plot_height=600,\n    title=\"LDA topic visualization\",\n    tools=\"pan,wheel_zoom,box_zoom,reset,hover,previewsave\",\n    x_axis_type=None,\n    y_axis_type=None,\n    min_border=1,\n)\nsource = ColumnDataSource(\n    data={\n        \"x\": lda_df[\"x\"],\n        \"y\": lda_df[\"y\"],\n        \"color\": colormap[lda_keys],\n        \"description\": lda_df[\"description\"],\n        \"topic\": lda_df[\"topic\"],\n        \"category\": lda_df[\"category\"],\n    }\n)\nplot_lda.scatter(source=source, x=\"x\", y=\"y\", color=\"color\")\nhover = plot_kmeans.select({\"type\": HoverTool})\nhover = plot_lda.select({\"type\": HoverTool})\nhover.tooltips = {\n    \"description\": \"@description\",\n    \"topic\": \"@topic\",\n    \"category\": \"@category\",\n}\nshow(plot_lda)\n\n\ndef prepareLDAData():\n    data = {\n        \"vocab\": vocab,\n        \"doc_topic_dists\": doc_topic,\n        \"doc_lengths\": list(lda_df[\"len_docs\"]),\n        \"term_frequency\": cvectorizer.vocabulary_,\n        \"topic_term_dists\": lda_model.components_,\n    }\n    return data\n\n\nimport pyLDAvis\n\nlda_df[\"len_docs\"] = combined_sample[\"tokens\"].map(len)\nldadata = prepareLDAData()\npyLDAvis.enable_notebook()\nprepared_data = pyLDAvis.prepare(**ldadata)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle19.py",
    "content": "#!/usr/bin/env python\n# coding: utf-8\nimport matplotlib\n\nmatplotlib.use(\"PS\")\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nwarnings.filterwarnings(\"ignore\")\ntrain = pd.read_csv(\"train.csv\")\ntrain.info()\ntrain.head()\nprint(\n    \"The average person kills {:.4f} players, 99% of people have {} kills or less, while the most kills ever recorded is {}.\".format(\n        train[\"kills\"].mean(), train[\"kills\"].quantile(0.99), train[\"kills\"].max()\n    )\n)\ndata = train.copy()\ndata.loc[data[\"kills\"] > data[\"kills\"].quantile(0.99)] = \"8+\"\nplt.figure(figsize=(15, 10))\nsns.countplot(data[\"kills\"].astype(\"str\").sort_values())\nplt.title(\"Kill Count\", fontsize=15)\nplt.show()\ndata = train.copy()\ndata = data[data[\"kills\"] == 0]\nplt.figure(figsize=(15, 10))\nplt.title(\"Damage Dealt by 0 killers\", fontsize=15)\nplt.show()\nprint(\n    \"{} players ({:.4f}%) have won without a single kill!\".format(\n        len(data[data[\"winPlacePerc\"] == 1]),\n        100 * len(data[data[\"winPlacePerc\"] == 1]) / len(train),\n    )\n)\ndata1 = train[train[\"damageDealt\"] == 0].copy()\nprint(\n    \"{} players ({:.4f}%) have won without dealing damage!\".format(\n        len(data1[data1[\"winPlacePerc\"] == 1]),\n        100 * len(data1[data1[\"winPlacePerc\"] == 1]) / len(train),\n    )\n)\nkills = train.copy()\nkills[\"killsCategories\"] = pd.cut(\n    kills[\"kills\"],\n    [-1, 0, 2, 5, 10, 60],\n    labels=[\"0_kills\", \"1-2_kills\", \"3-5_kills\", \"6-10_kills\", \"10+_kills\"],\n)\nplt.figure(figsize=(15, 8))\nsns.boxplot(x=\"killsCategories\", y=\"winPlacePerc\", data=kills)\nplt.show()\nprint(\n    \"The average person walks for {:.1f}m, 99% of people have walked {}m or less, while the marathoner champion walked for {}m.\".format(\n        train[\"walkDistance\"].mean(),\n        train[\"walkDistance\"].quantile(0.99),\n        train[\"walkDistance\"].max(),\n    )\n)\ndata = train.copy()\ndata = data[data[\"walkDistance\"] < train[\"walkDistance\"].quantile(0.99)]\nplt.figure(figsize=(15, 10))\nplt.title(\"Walking Distance Distribution\", fontsize=15)\nsns.distplot(data[\"walkDistance\"])\nplt.show()\nprint(\n    \"{} players ({:.4f}%) walked 0 meters. This means that they die before even taking a step or they are afk (more possible).\".format(\n        len(data[data[\"walkDistance\"] == 0]),\n        100 * len(data1[data1[\"walkDistance\"] == 0]) / len(train),\n    )\n)\nprint(\n    \"The average person drives for {:.1f}m, 99% of people have drived {}m or less, while the formula 1 champion drived for {}m.\".format(\n        train[\"rideDistance\"].mean(),\n        train[\"rideDistance\"].quantile(0.99),\n        train[\"rideDistance\"].max(),\n    )\n)\ndata = train.copy()\ndata = data[data[\"rideDistance\"] < train[\"rideDistance\"].quantile(0.9)]\nplt.figure(figsize=(15, 10))\nplt.title(\"Ride Distance Distribution\", fontsize=15)\nsns.distplot(data[\"rideDistance\"])\nplt.show()\nprint(\n    \"{} players ({:.4f}%) drived for 0 meters. This means that they don't have a driving licence yet.\".format(\n        len(data[data[\"rideDistance\"] == 0]),\n        100 * len(data1[data1[\"rideDistance\"] == 0]) / len(train),\n    )\n)\nf, ax1 = plt.subplots(figsize=(20, 10))\nsns.pointplot(\n    x=\"vehicleDestroys\", y=\"winPlacePerc\", data=data, color=\"#606060\", alpha=0.8\n)\nplt.xlabel(\"Number of Vehicle Destroys\", fontsize=15, color=\"blue\")\nplt.ylabel(\"Win Percentage\", fontsize=15, color=\"blue\")\nplt.title(\"Vehicle Destroys/ Win Ratio\", fontsize=20, color=\"blue\")\nplt.grid()\nplt.show()\nprint(\n    \"The average person swims for {:.1f}m, 99% of people have swimemd {}m or less, while the olympic champion swimmed for {}m.\".format(\n        train[\"swimDistance\"].mean(),\n        train[\"swimDistance\"].quantile(0.99),\n        train[\"swimDistance\"].max(),\n    )\n)\ndata = train.copy()\ndata = data[data[\"swimDistance\"] < train[\"swimDistance\"].quantile(0.95)]\nplt.figure(figsize=(15, 10))\nplt.title(\"Swim Distance Distribution\", fontsize=15)\nsns.distplot(data[\"swimDistance\"])\nplt.show()\nswim = train.copy()\nswim[\"swimDistance\"] = pd.cut(\n    swim[\"swimDistance\"], [-1, 0, 5, 20, 5286], labels=[\"0m\", \"1-5m\", \"6-20m\", \"20m+\"]\n)\nplt.figure(figsize=(15, 8))\nsns.boxplot(x=\"swimDistance\", y=\"winPlacePerc\", data=swim)\nplt.show()\nprint(\n    \"The average person uses {:.1f} heal items, 99% of people use {} or less, while the doctor used {}.\".format(\n        train[\"heals\"].mean(), train[\"heals\"].quantile(0.99), train[\"heals\"].max()\n    )\n)\nprint(\n    \"The average person uses {:.1f} boost items, 99% of people use {} or less, while the doctor used {}.\".format(\n        train[\"boosts\"].mean(), train[\"boosts\"].quantile(0.99), train[\"boosts\"].max()\n    )\n)\ndata = train.copy()\ndata = data[data[\"heals\"] < data[\"heals\"].quantile(0.99)]\ndata = data[data[\"boosts\"] < data[\"boosts\"].quantile(0.99)]\nf, ax1 = plt.subplots(figsize=(20, 10))\nsns.pointplot(x=\"heals\", y=\"winPlacePerc\", data=data, color=\"lime\", alpha=0.8)\nsns.pointplot(x=\"boosts\", y=\"winPlacePerc\", data=data, color=\"blue\", alpha=0.8)\nplt.text(4, 0.6, \"Heals\", color=\"lime\", fontsize=17, style=\"italic\")\nplt.text(4, 0.55, \"Boosts\", color=\"blue\", fontsize=17, style=\"italic\")\nplt.xlabel(\"Number of heal/boost items\", fontsize=15, color=\"blue\")\nplt.ylabel(\"Win Percentage\", fontsize=15, color=\"blue\")\nplt.title(\"Heals vs Boosts\", fontsize=20, color=\"blue\")\nplt.grid()\nplt.show()\nsolos = train[train[\"numGroups\"] > 50]\nduos = train[(train[\"numGroups\"] > 25) & (train[\"numGroups\"] <= 50)]\nsquads = train[train[\"numGroups\"] <= 25]\nprint(\n    \"There are {} ({:.2f}%) solo games, {} ({:.2f}%) duo games and {} ({:.2f}%) squad games.\".format(\n        len(solos),\n        100 * len(solos) / len(train),\n        len(duos),\n        100 * len(duos) / len(train),\n        len(squads),\n        100 * len(squads) / len(train),\n    )\n)\nf, ax1 = plt.subplots(figsize=(20, 10))\nsns.pointplot(x=\"kills\", y=\"winPlacePerc\", data=solos, color=\"black\", alpha=0.8)\nsns.pointplot(x=\"kills\", y=\"winPlacePerc\", data=duos, color=\"#CC0000\", alpha=0.8)\nsns.pointplot(x=\"kills\", y=\"winPlacePerc\", data=squads, color=\"#3399FF\", alpha=0.8)\nplt.text(37, 0.6, \"Solos\", color=\"black\", fontsize=17, style=\"italic\")\nplt.text(37, 0.55, \"Duos\", color=\"#CC0000\", fontsize=17, style=\"italic\")\nplt.text(37, 0.5, \"Squads\", color=\"#3399FF\", fontsize=17, style=\"italic\")\nplt.xlabel(\"Number of kills\", fontsize=15, color=\"blue\")\nplt.ylabel(\"Win Percentage\", fontsize=15, color=\"blue\")\nplt.title(\"Solo vs Duo vs Squad Kills\", fontsize=20, color=\"blue\")\nplt.grid()\nplt.show()\nf, ax1 = plt.subplots(figsize=(20, 10))\nsns.pointplot(x=\"DBNOs\", y=\"winPlacePerc\", data=duos, color=\"#CC0000\", alpha=0.8)\nsns.pointplot(x=\"DBNOs\", y=\"winPlacePerc\", data=squads, color=\"#3399FF\", alpha=0.8)\nsns.pointplot(x=\"assists\", y=\"winPlacePerc\", data=duos, color=\"#FF6666\", alpha=0.8)\nsns.pointplot(x=\"assists\", y=\"winPlacePerc\", data=squads, color=\"#CCE5FF\", alpha=0.8)\nsns.pointplot(x=\"revives\", y=\"winPlacePerc\", data=duos, color=\"#660000\", alpha=0.8)\nsns.pointplot(x=\"revives\", y=\"winPlacePerc\", data=squads, color=\"#000066\", alpha=0.8)\nplt.text(14, 0.5, \"Duos - Assists\", color=\"#FF6666\", fontsize=17, style=\"italic\")\nplt.text(14, 0.45, \"Duos - DBNOs\", color=\"#CC0000\", fontsize=17, style=\"italic\")\nplt.text(14, 0.4, \"Duos - Revives\", color=\"#660000\", fontsize=17, style=\"italic\")\nplt.text(14, 0.35, \"Squads - Assists\", color=\"#CCE5FF\", fontsize=17, style=\"italic\")\nplt.text(14, 0.3, \"Squads - DBNOs\", color=\"#3399FF\", fontsize=17, style=\"italic\")\nplt.text(14, 0.25, \"Squads - Revives\", color=\"#000066\", fontsize=17, style=\"italic\")\nplt.xlabel(\"Number of DBNOs/Assits/Revives\", fontsize=15, color=\"blue\")\nplt.ylabel(\"Win Percentage\", fontsize=15, color=\"blue\")\nplt.title(\"Duo vs Squad DBNOs, Assists, and Revives\", fontsize=20, color=\"blue\")\nplt.grid()\nplt.show()\nf, ax = plt.subplots(figsize=(15, 15))\nsns.heatmap(train.corr(), annot=True, linewidths=0.5, fmt=\".1f\", ax=ax)\nplt.show()\nk = 5  # number of variables for heatmap\nf, ax = plt.subplots(figsize=(11, 11))\ncols = train.corr().nlargest(k, \"winPlacePerc\")[\"winPlacePerc\"].index\ncm = np.corrcoef(train[cols].values.T)\nsns.set(font_scale=1.25)\nhm = sns.heatmap(\n    cm,\n    cbar=True,\n    annot=True,\n    square=True,\n    fmt=\".2f\",\n    annot_kws={\"size\": 10},\n    yticklabels=cols.values,\n    xticklabels=cols.values,\n)\nplt.show()\ntrain[\"playersJoined\"] = train.groupby(\"matchId\")[\"matchId\"].transform(\"count\")\ndata = train.copy()\ndata = data[data[\"playersJoined\"] > 49]\ntrain[\"killsNorm\"] = train[\"kills\"] * ((100 - train[\"playersJoined\"]) / 100 + 1)\ntrain[\"damageDealtNorm\"] = train[\"damageDealt\"] * (\n    (100 - train[\"playersJoined\"]) / 100 + 1\n)\ntrain[[\"playersJoined\", \"kills\", \"killsNorm\", \"damageDealt\", \"damageDealtNorm\"]][5:8]\ntrain[\"healsAndBoosts\"] = train[\"heals\"] + train[\"boosts\"]\ntrain[\"totalDistance\"] = (\n    train[\"walkDistance\"] + train[\"rideDistance\"] + train[\"swimDistance\"]\n)\ntrain[\"boostsPerWalkDistance\"] = train[\"boosts\"] / (\n    train[\"walkDistance\"] + 1\n)  # The +1 is to avoid infinity, because there are entries where boosts>0 and walkDistance=0. Strange.\ntrain[\"boostsPerWalkDistance\"].fillna(0, inplace=True)\ntrain[\"healsPerWalkDistance\"] = train[\"heals\"] / (\n    train[\"walkDistance\"] + 1\n)  # The +1 is to avoid infinity, because there are entries where heals>0 and walkDistance=0. Strange.\ntrain[\"healsPerWalkDistance\"].fillna(0, inplace=True)\ntrain[\"healsAndBoostsPerWalkDistance\"] = train[\"healsAndBoosts\"] / (\n    train[\"walkDistance\"] + 1\n)  # The +1 is to avoid infinity.\ntrain[\"healsAndBoostsPerWalkDistance\"].fillna(0, inplace=True)\ntrain[\n    [\n        \"walkDistance\",\n        \"boosts\",\n        \"boostsPerWalkDistance\",\n        \"heals\",\n        \"healsPerWalkDistance\",\n        \"healsAndBoosts\",\n        \"healsAndBoostsPerWalkDistance\",\n    ]\n][40:45]\ntrain[\"killsPerWalkDistance\"] = train[\"kills\"] / (\n    train[\"walkDistance\"] + 1\n)  # The +1 is to avoid infinity, because there are entries where kills>0 and walkDistance=0. Strange.\ntrain[\"killsPerWalkDistance\"].fillna(0, inplace=True)\ntrain[\n    [\"kills\", \"walkDistance\", \"rideDistance\", \"killsPerWalkDistance\", \"winPlacePerc\"]\n].sort_values(by=\"killsPerWalkDistance\").tail(10)\ntrain[\"team\"] = [\n    1 if i > 50 else 2 if (i > 25 & i <= 50) else 4 for i in train[\"numGroups\"]\n]\ntrain.head()\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle20.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport time\n\nimport matplotlib.pyplot as plt\nimport numpy as np  # linear algebra\nimport seaborn as sns  # data visualization library\n\nimport modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)\n\ndata = pd.read_csv(\"data.csv\")\ndata.head()  # head method show only first 5 rows\ncol = data.columns\nprint(col)\ny = data.diagnosis  # M or B\nlist = [\"Unnamed: 32\", \"id\", \"diagnosis\"]\nx = data.drop(list, axis=1)\nx.head()\nax = sns.countplot(y, label=\"Count\")  # M = 212, B = 357\nx.describe()\ndata_dia = y\ndata = x\ndata_n_2 = (data - data.mean()) / (data.std())  # standardization\ndata = pd.concat([y, data_n_2.iloc[:, 0:10]], axis=1)\ndata = pd.melt(data, id_vars=\"diagnosis\", var_name=\"features\", value_name=\"value\")\nplt.figure(figsize=(10, 10))\nsns.violinplot(\n    x=\"features\", y=\"value\", hue=\"diagnosis\", data=data, split=True, inner=\"quart\"\n)\nplt.xticks(rotation=90)\ndata = pd.concat([y, data_n_2.iloc[:, 10:20]], axis=1)\ndata = pd.melt(data, id_vars=\"diagnosis\", var_name=\"features\", value_name=\"value\")\nplt.figure(figsize=(10, 10))\nsns.violinplot(\n    x=\"features\", y=\"value\", hue=\"diagnosis\", data=data, split=True, inner=\"quart\"\n)\nplt.xticks(rotation=90)\ndata = pd.concat([y, data_n_2.iloc[:, 20:31]], axis=1)\ndata = pd.melt(data, id_vars=\"diagnosis\", var_name=\"features\", value_name=\"value\")\nplt.figure(figsize=(10, 10))\nsns.violinplot(\n    x=\"features\", y=\"value\", hue=\"diagnosis\", data=data, split=True, inner=\"quart\"\n)\nplt.xticks(rotation=90)\nplt.figure(figsize=(10, 10))\nsns.boxplot(x=\"features\", y=\"value\", hue=\"diagnosis\", data=data)\nplt.xticks(rotation=90)\nsns.jointplot(\n    x.loc[:, \"concavity_worst\"],\n    x.loc[:, \"concave points_worst\"],\n    kind=\"regg\",\n    color=\"#ce1414\",\n)\nsns.set(style=\"white\")\ndf = x.loc[:, [\"radius_worst\", \"perimeter_worst\", \"area_worst\"]]\ng = sns.PairGrid(df, diag_sharey=False)\ng.map_lower(sns.kdeplot, cmap=\"Blues_d\")\ng.map_upper(plt.scatter)\ng.map_diag(sns.kdeplot, lw=3)\nsns.set(style=\"whitegrid\", palette=\"muted\")\ndata_dia = y\ndata = x\ndata_n_2 = (data - data.mean()) / (data.std())  # standardization\ndata = pd.concat([y, data_n_2.iloc[:, 0:10]], axis=1)\ndata = pd.melt(data, id_vars=\"diagnosis\", var_name=\"features\", value_name=\"value\")\nplt.figure(figsize=(10, 10))\ntic = time.time()\nsns.swarmplot(x=\"features\", y=\"value\", hue=\"diagnosis\", data=data)\nplt.xticks(rotation=90)\ndata = pd.concat([y, data_n_2.iloc[:, 10:20]], axis=1)\ndata = pd.melt(data, id_vars=\"diagnosis\", var_name=\"features\", value_name=\"value\")\nplt.figure(figsize=(10, 10))\nsns.swarmplot(x=\"features\", y=\"value\", hue=\"diagnosis\", data=data)\nplt.xticks(rotation=90)\ndata = pd.concat([y, data_n_2.iloc[:, 20:31]], axis=1)\ndata = pd.melt(data, id_vars=\"diagnosis\", var_name=\"features\", value_name=\"value\")\nplt.figure(figsize=(10, 10))\nsns.swarmplot(x=\"features\", y=\"value\", hue=\"diagnosis\", data=data)\ntoc = time.time()\nplt.xticks(rotation=90)\nprint(\"swarm plot time: \", toc - tic, \" s\")\nf, ax = plt.subplots(figsize=(18, 18))\nsns.heatmap(x.corr(), annot=True, linewidths=0.5, fmt=\".1f\", ax=ax)\ndrop_list1 = [\n    \"perimeter_mean\",\n    \"radius_mean\",\n    \"compactness_mean\",\n    \"concave points_mean\",\n    \"radius_se\",\n    \"perimeter_se\",\n    \"radius_worst\",\n    \"perimeter_worst\",\n    \"compactness_worst\",\n    \"concave points_worst\",\n    \"compactness_se\",\n    \"concave points_se\",\n    \"texture_worst\",\n    \"area_worst\",\n]\nx_1 = x.drop(drop_list1, axis=1)  # do not modify x, we will use it later\nx_1.head()\nf, ax = plt.subplots(figsize=(14, 14))\nsns.heatmap(x_1.corr(), annot=True, linewidths=0.5, fmt=\".1f\", ax=ax)\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import confusion_matrix  # f1_score\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split\n\nx_train, x_test, y_train, y_test = train_test_split(\n    x_1, y, test_size=0.3, random_state=42\n)\nclf_rf = RandomForestClassifier(random_state=43)\nclr_rf = clf_rf.fit(x_train, y_train)\nac = accuracy_score(y_test, clf_rf.predict(x_test))\nprint(\"Accuracy is: \", ac)\ncm = confusion_matrix(y_test, clf_rf.predict(x_test))\nsns.heatmap(cm, annot=True, fmt=\"d\")\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nselect_feature = SelectKBest(chi2, k=5).fit(x_train, y_train)\nprint(\"Score list:\", select_feature.scores_)\nprint(\"Feature list:\", x_train.columns)\nx_train_2 = select_feature.transform(x_train)\nx_test_2 = select_feature.transform(x_test)\nclf_rf_2 = RandomForestClassifier()\nclr_rf_2 = clf_rf_2.fit(x_train_2, y_train)\nac_2 = accuracy_score(y_test, clf_rf_2.predict(x_test_2))\nprint(\"Accuracy is: \", ac_2)\ncm_2 = confusion_matrix(y_test, clf_rf_2.predict(x_test_2))\nsns.heatmap(cm_2, annot=True, fmt=\"d\")\nfrom sklearn.feature_selection import RFE\n\nclf_rf_3 = RandomForestClassifier()\nrfe = RFE(estimator=clf_rf_3, n_features_to_select=5, step=1)\nrfe = rfe.fit(x_train, y_train)\nprint(\"Chosen best 5 feature by rfe:\", x_train.columns[rfe.support_])\nfrom sklearn.feature_selection import RFECV\n\nclf_rf_4 = RandomForestClassifier()\nrfecv = RFECV(\n    estimator=clf_rf_4, step=1, cv=5, scoring=\"accuracy\"\n)  # 5-fold cross-validation\nrfecv = rfecv.fit(x_train, y_train)\nprint(\"Optimal number of features :\", rfecv.n_features_)\nprint(\"Best features :\", x_train.columns[rfecv.support_])\nimport matplotlib.pyplot as plt\n\nplt.figure()\nplt.xlabel(\"Number of features selected\")\nplt.ylabel(\"Cross validation score of number of selected features\")\nplt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)\nplt.show()\nclf_rf_5 = RandomForestClassifier()\nclr_rf_5 = clf_rf_5.fit(x_train, y_train)\nimportances = clr_rf_5.feature_importances_\nstd = np.std([tree.feature_importances_ for tree in clf_rf.estimators_], axis=0)\nindices = np.argsort(importances)[::-1]\nprint(\"Feature ranking:\")\nfor f in range(x_train.shape[1]):\n    print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\nplt.figure(1, figsize=(14, 13))\nplt.title(\"Feature importances\")\nplt.bar(\n    range(x_train.shape[1]),\n    importances[indices],\n    color=\"g\",\n    yerr=std[indices],\n    align=\"center\",\n)\nplt.xticks(range(x_train.shape[1]), x_train.columns[indices], rotation=90)\nplt.xlim([-1, x_train.shape[1]])\nplt.show()\nx_train, x_test, y_train, y_test = train_test_split(\n    x, y, test_size=0.3, random_state=42\n)\nx_train_N = (x_train - x_train.mean()) / (x_train.max() - x_train.min())\nx_test_N = (x_test - x_test.mean()) / (x_test.max() - x_test.min())\nfrom sklearn.decomposition import PCA\n\npca = PCA()\npca.fit(x_train_N)\nplt.figure(1, figsize=(14, 13))\nplt.clf()\nplt.axes([0.2, 0.2, 0.7, 0.7])\nplt.plot(pca.explained_variance_ratio_, linewidth=2)\nplt.axis(\"tight\")\nplt.xlabel(\"n_components\")\nplt.ylabel(\"explained_variance_ratio_\")\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle22.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport numpy as np\nfrom sklearn.feature_extraction.text import TfidfVectorizer  # CountVectorizer\nfrom sklearn.linear_model import LogisticRegression\n\nimport modin.pandas as pd\n\ntrain = pd.read_csv(\"train.csv\")\ntest = pd.read_csv(\"test.csv\")\nsubm = pd.read_csv(\"sample_submission.csv\")\ntrain.head()\ntrain[\"comment_text\"][0]\ntrain[\"comment_text\"][2]\nlens = train.comment_text.str.len()\nlens.mean(), lens.std(), lens.max()\nlens.hist()\nlabel_cols = [\"toxic\", \"severe_toxic\", \"obscene\", \"threat\", \"insult\", \"identity_hate\"]\ntrain[\"none\"] = 1 - train[label_cols].max(axis=1)\ntrain.describe()\nlen(train), len(test)\nCOMMENT = \"comment_text\"\ntrain[COMMENT].fillna(\"unknown\", inplace=True)\ntest[COMMENT].fillna(\"unknown\", inplace=True)\nimport re\nimport string\n\nre_tok = re.compile(f\"([{string.punctuation}“”¨«»®´·º½¾¿¡§£₤‘’])\")\n\n\ndef tokenize(s):\n    return re_tok.sub(r\" \\1 \", s).split()\n\n\nn = train.shape[0]\nvec = TfidfVectorizer(\n    ngram_range=(1, 2),\n    tokenizer=tokenize,\n    min_df=3,\n    max_df=0.9,\n    strip_accents=\"unicode\",\n    use_idf=1,\n    smooth_idf=1,\n    sublinear_tf=1,\n)\ntrn_term_doc = vec.fit_transform(train[COMMENT])\ntest_term_doc = vec.transform(test[COMMENT])\ntrn_term_doc, test_term_doc\n\n\ndef pr(y_i, y):\n    p = x[y == y_i].sum(0)\n    return (p + 1) / ((y == y_i).sum() + 1)\n\n\nx = trn_term_doc\ntest_x = test_term_doc\n\n\ndef get_mdl(y):\n    y = y.values\n    r = np.log(pr(1, y) / pr(0, y))\n    m = LogisticRegression(C=4, dual=True)\n    x_nb = x.multiply(r)\n    return m.fit(x_nb, y), r\n\n\npreds = np.zeros((len(test), len(label_cols)))\nfor i, j in enumerate(label_cols):\n    print(\"fit\", j)\n    m, r = get_mdl(train[j])\n    preds[:, i] = m.predict_proba(test_x.multiply(r))[:, 1]\nsubmid = pd.DataFrame({\"id\": subm[\"id\"]})\nsubmission = pd.concat([submid, pd.DataFrame(preds, columns=label_cols)], axis=1)\nsubmission.to_csv(\"submission.csv\", index=False)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle3.py",
    "content": "#!/usr/bin/env python\nimport matplotlib\n\nmatplotlib.use(\"PS\")\nimport matplotlib.pyplot as plt\nimport numpy as np  # linear algebra\nimport seaborn as sns  # visualization tool\n\nimport modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)\n\ndata = pd.read_csv(\"pokemon.csv\")\ndata.info()\ndata.corr()\nf, ax = plt.subplots(figsize=(18, 18))\nsns.heatmap(data.corr(), annot=True, linewidths=0.5, fmt=\".1f\", ax=ax)\ndata.head(10)\ndata.columns\ndata.Speed.plot(\n    kind=\"line\",\n    color=\"g\",\n    label=\"Speed\",\n    linewidth=1,\n    alpha=0.5,\n    grid=True,\n    linestyle=\":\",\n)\ndata.Defense.plot(\n    color=\"r\", label=\"Defense\", linewidth=1, alpha=0.5, grid=True, linestyle=\"-.\"\n)\nplt.legend(loc=\"upper right\")  # legend = puts label into plot\nplt.xlabel(\"x axis\")  # label = name of label\nplt.ylabel(\"y axis\")\nplt.title(\"Line Plot\")  # title = title of plot\ndata.plot(kind=\"scatter\", x=\"Attack\", y=\"Defense\", alpha=0.5, color=\"red\")\nplt.xlabel(\"Attack\")  # label = name of label\nplt.ylabel(\"Defence\")\nplt.title(\"Attack Defense Scatter Plot\")  # title = title of plot\ndata.Speed.plot(kind=\"hist\", bins=50, figsize=(12, 12))\ndata.Speed.plot(kind=\"hist\", bins=50)\ndictionary = {\"spain\": \"madrid\", \"usa\": \"vegas\"}\nprint(dictionary.keys())\nprint(dictionary.values())\ndictionary[\"spain\"] = \"barcelona\"  # update existing entry\nprint(dictionary)\ndictionary[\"france\"] = \"paris\"  # Add new entry\nprint(dictionary)\ndel dictionary[\"spain\"]  # remove entry with key 'spain'\nprint(dictionary)\nprint(\"france\" in dictionary)  # check include or not\ndictionary.clear()  # remove all entries in dict\nprint(dictionary)\nprint(dictionary)  # it gives error because dictionary is deleted\ndata = pd.read_csv(\"pokemon.csv\")\nseries = data[\"Defense\"]  # data['Defense'] = series\nprint(type(series))\ndata_frame = data[[\"Defense\"]]  # data[['Defense']] = data frame\nprint(type(data_frame))\nprint(3 > 2)\nprint(3 != 2)\nprint(True and False)\nprint(True or False)\nx = (\n    data[\"Defense\"] > 200\n)  # There are only 3 pokemons who have higher defense value than 200\ndata[x]\ndata[np.logical_and(data[\"Defense\"] > 200, data[\"Attack\"] > 100)]\ndata[(data[\"Defense\"] > 200) & (data[\"Attack\"] > 100)]\ni = 0\nwhile i != 5:\n    print(\"i is: \", i)\n    i += 1\nprint(i, \" is equal to 5\")\nlis = [1, 2, 3, 4, 5]\nfor i in lis:\n    print(\"i is: \", i)\nprint(\"\")\nfor index, value in enumerate(lis):\n    print(index, \" : \", value)\nprint(\"\")\ndictionary = {\"spain\": \"madrid\", \"france\": \"paris\"}\nfor key, value in dictionary.items():\n    print(key, \" : \", value)\nprint(\"\")\nfor index, value in data[[\"Attack\"]][0:1].iterrows():\n    print(index, \" : \", value)\n\n\ndef tuble_ex():\n    \"\"\"return defined t tuble\"\"\"\n    t = (1, 2, 3)\n    return t\n\n\na, b, c = tuble_ex()\nprint(a, b, c)\nx = 2\n\n\ndef f():\n    x = 3\n    return x\n\n\nprint(x)  # x = 2 global scope\nprint(f())  # x = 3 local scope\nx = 5\n\n\ndef f():\n    y = 2 * x  # there is no local scope x\n    return y\n\n\nprint(f())  # it uses global scope x\nimport builtins\n\ndir(builtins)\n\n\ndef square():\n    \"\"\"return square of value\"\"\"\n\n    def add():\n        \"\"\"add two local variable\"\"\"\n        x = 2\n        y = 3\n        z = x + y\n        return z\n\n    return add() ** 2\n\n\nprint(square())\n\n\ndef f(a, b=1, c=2):\n    y = a + b + c\n    return y\n\n\nprint(f(5))\nprint(f(5, 4, 3))\n\n\ndef f(*args):\n    for i in args:\n        print(i)\n\n\nf(1)\nprint(\"\")\nf(1, 2, 3, 4)\n\n\ndef f(**kwargs):\n    \"\"\"print key and value of dictionary\"\"\"\n    for (\n        key,\n        value,\n    ) in (\n        kwargs.items()\n    ):  # If you do not understand this part turn for loop part and look at dictionary in for loop\n        print(key, \" \", value)\n\n\nf(country=\"spain\", capital=\"madrid\", population=123456)\nnumber_list = [1, 2, 3]\ny = map(lambda x: x**2, number_list)\nprint(list(y))\nname = \"ronaldo\"\nit = iter(name)\nprint(next(it))  # print next iteration\nprint(*it)  # print remaining iteration\nlist1 = [1, 2, 3, 4]\nlist2 = [5, 6, 7, 8]\nz = zip(list1, list2)\nprint(z)\nz_list = list(z)\nprint(z_list)\nun_zip = zip(*z_list)\nun_list1, un_list2 = list(un_zip)  # unzip returns tuble\nprint(un_list1)\nprint(un_list2)\nprint(type(un_list2))\nnum1 = [1, 2, 3]\nnum2 = [i + 1 for i in num1]\nprint(num2)\nnum1 = [5, 10, 15]\nnum2 = [i**2 if i == 10 else i - 5 if i < 7 else i + 5 for i in num1]\nprint(num2)\nthreshold = sum(data.Speed) / len(data.Speed)\ndata[\"speed_level\"] = [\"high\" if i > threshold else \"low\" for i in data.Speed]\ndata.loc[:10, [\"speed_level\", \"Speed\"]]  # we will learn loc more detailed later\ndata = pd.read_csv(\"pokemon.csv\")\ndata.head()  # head shows first 5 rows\ndata.tail()\ndata.columns\ndata.shape\ndata.info()\nprint(\n    data[\"Type 1\"].value_counts(dropna=False)\n)  # if there are nan values that also be counted\ndata.describe()  # ignore null entries\ndata.boxplot(column=\"Attack\", by=\"Legendary\")\ndata_new = data.head()  # I only take 5 rows into new data\ndata_new\nmelted = pd.melt(frame=data_new, id_vars=\"Name\", value_vars=[\"Attack\", \"Defense\"])\nmelted\nmelted.pivot(index=\"Name\", columns=\"variable\", values=\"value\")\ndata1 = data.head()\ndata2 = data.tail()\nconc_data_row = pd.concat(\n    [data1, data2], axis=0, ignore_index=True\n)  # axis = 0 : adds dataframes in row\nconc_data_row\ndata1 = data[\"Attack\"].head()\ndata2 = data[\"Defense\"].head()\nconc_data_col = pd.concat([data1, data2], axis=1)  # axis = 0 : adds dataframes in row\nconc_data_col\ndata.dtypes\ndata[\"Type 1\"] = data[\"Type 1\"].astype(\"category\")\ndata[\"Speed\"] = data[\"Speed\"].astype(\"float\")\ndata.dtypes\ndata.info()\ndata[\"Type 2\"].value_counts(dropna=False)\ndata1 = (\n    data  # also we will use data to fill missing value so I assign it to data1 variable\n)\ndata1[\"Type 2\"].dropna(\n    inplace=True\n)  # inplace = True means we do not assign it to new variable. Changes automatically assigned to data\nassert 1 == 1  # return nothing because it is true\nassert data[\"Type 2\"].notnull().all()  # returns nothing because we drop nan values\ndata[\"Type 2\"].fillna(\"empty\", inplace=True)\nassert (\n    data[\"Type 2\"].notnull().all()\n)  # returns nothing because we do not have nan values\ncountry = [\"Spain\", \"France\"]\npopulation = [\"11\", \"12\"]\nlist_label = [\"country\", \"population\"]\nlist_col = [country, population]\nzipped = list(zip(list_label, list_col))\ndata_dict = dict(zipped)\ndf = pd.DataFrame(data_dict)\ndf\ndf[\"capital\"] = [\"madrid\", \"paris\"]\ndf\ndf[\"income\"] = 0  # Broadcasting entire column\ndf\ndata1 = data.loc[:, [\"Attack\", \"Defense\", \"Speed\"]]\ndata1.plot()\ndata1.plot(subplots=True)\nplt.show()\ndata1.plot(kind=\"scatter\", x=\"Attack\", y=\"Defense\")\nplt.show()\ndata1.plot(kind=\"hist\", y=\"Defense\", bins=50, range=(0, 250), normed=True)\nfig, axes = plt.subplots(nrows=2, ncols=1)\ndata1.plot(kind=\"hist\", y=\"Defense\", bins=50, range=(0, 250), normed=True, ax=axes[0])\ndata1.plot(\n    kind=\"hist\",\n    y=\"Defense\",\n    bins=50,\n    range=(0, 250),\n    normed=True,\n    ax=axes[1],\n    cumulative=True,\n)\nplt.savefig(\"graph.png\")\nplt\ndata.describe()\ntime_list = [\"1992-03-08\", \"1992-04-12\"]\nprint(type(time_list[1]))  # As you can see date is string\ndatetime_object = pd.to_datetime(time_list)\nprint(type(datetime_object))\nimport warnings\n\nwarnings.filterwarnings(\"ignore\")\ndata2 = data.head()\ndate_list = [\"1992-01-10\", \"1992-02-10\", \"1992-03-10\", \"1993-03-15\", \"1993-03-16\"]\ndatetime_object = pd.to_datetime(date_list)\ndata2[\"date\"] = datetime_object\ndata2 = data2.set_index(\"date\")\ndata2\nprint(data2.loc[\"1993-03-16\"])\nprint(data2.loc[\"1992-03-10\":\"1993-03-16\"])\ndata2.resample(\"A\").mean()\ndata2.resample(\"M\").mean()\ndata2.resample(\"M\").first().interpolate(\"linear\")\ndata2.resample(\"M\").mean().interpolate(\"linear\")\ndata = pd.read_csv(\"pokemon.csv\")\ndata = data.set_index(\"#\")\ndata.head()\ndata[\"HP\"][1]\ndata.HP[1]\ndata.loc[1, [\"HP\"]]\ndata[[\"HP\", \"Attack\"]]\nprint(type(data[\"HP\"]))  # series\nprint(type(data[[\"HP\"]]))  # data frames\ndata.loc[1:10, \"HP\":\"Defense\"]  # 10 and \"Defense\" are inclusive\ndata.loc[10:1:-1, \"HP\":\"Defense\"]\ndata.loc[1:10, \"Speed\":]\nboolean = data.HP > 200\ndata[boolean]\nfirst_filter = data.HP > 150\nsecond_filter = data.Speed > 35\ndata[first_filter & second_filter]\ndata.HP[data.Speed < 15]\n\n\ndef div(n):\n    return n / 2\n\n\ndata.HP.apply(div)\ndata.HP.apply(lambda n: n / 2)\ndata[\"total_power\"] = data.Attack + data.Defense\ndata.head()\nprint(data.index.name)\ndata.index.name = \"index_name\"\ndata.head()\ndata.head()\ndata3 = data.copy()\ndata3.index = range(100, 100 + len(data3.index), 1)\ndata3.head()\ndata = pd.read_csv(\"pokemon.csv\")\ndata.head()\ndata1 = data.set_index([\"Type 1\", \"Type 2\"])\ndata1.head(100)\ndic = {\n    \"treatment\": [\"A\", \"A\", \"B\", \"B\"],\n    \"gender\": [\"F\", \"M\", \"F\", \"M\"],\n    \"response\": [10, 45, 5, 9],\n    \"age\": [15, 4, 72, 65],\n}\ndf = pd.DataFrame(dic)\ndf\ndf.pivot(index=\"treatment\", columns=\"gender\", values=\"response\")\ndf1 = df.set_index([\"treatment\", \"gender\"])\ndf1\ndf1.unstack(level=0)\ndf1.unstack(level=1)\ndf2 = df1.swaplevel(0, 1)\ndf2\ndf\npd.melt(df, id_vars=\"treatment\", value_vars=[\"age\", \"response\"])\ndf\ndf.groupby(\"treatment\").mean()  # mean is aggregation / reduce method\ndf.groupby(\"treatment\").age.max()\ndf.groupby(\"treatment\")[[\"age\", \"response\"]].min()\ndf.info()\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle4.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport matplotlib.pyplot as plt  # Matlab-style plotting\nimport numpy as np  # linear algebra\nimport seaborn as sns\n\nimport modin.pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)\n\ncolor = sns.color_palette()\nsns.set_style(\"darkgrid\")\nimport warnings\n\n\ndef ignore_warn(*args, **kwargs):\n    pass\n\n\nwarnings.warn = ignore_warn  # ignore annoying warning (from sklearn and seaborn)\nfrom scipy import stats\nfrom scipy.stats import norm, skew  # for some statistics\n\npd.set_option(\n    \"display.float_format\", lambda x: \"{:.3f}\".format(x)\n)  # Limiting floats output to 3 decimal points\ntrain = pd.read_csv(\"train.csv\")\ntest = pd.read_csv(\"test.csv\")\ntrain.head(5)\ntest.head(5)\nprint(\"The train data size before dropping Id feature is : {} \".format(train.shape))\nprint(\"The test data size before dropping Id feature is : {} \".format(test.shape))\ntrain_ID = train[\"Id\"]\ntest_ID = test[\"Id\"]\ntrain.drop(\"Id\", axis=1, inplace=True)\ntest.drop(\"Id\", axis=1, inplace=True)\nprint(\"\\nThe train data size after dropping Id feature is : {} \".format(train.shape))\nprint(\"The test data size after dropping Id feature is : {} \".format(test.shape))\nfig, ax = plt.subplots()\nax.scatter(x=train[\"GrLivArea\"], y=train[\"SalePrice\"])\nplt.ylabel(\"SalePrice\", fontsize=13)\nplt.xlabel(\"GrLivArea\", fontsize=13)\nplt.show()\ntrain = train.drop(\n    train[(train[\"GrLivArea\"] > 4000) & (train[\"SalePrice\"] < 300000)].index\n)\nfig, ax = plt.subplots()\nax.scatter(train[\"GrLivArea\"], train[\"SalePrice\"])\nplt.ylabel(\"SalePrice\", fontsize=13)\nplt.xlabel(\"GrLivArea\", fontsize=13)\nplt.show()\nsns.distplot(train[\"SalePrice\"], fit=norm)\n(mu, sigma) = norm.fit(train[\"SalePrice\"])\nprint(\"\\n mu = {:.2f} and sigma = {:.2f}\\n\".format(mu, sigma))\nplt.legend(\n    [r\"Normal dist. ($\\mu=$ {:.2f} and $\\sigma=$ {:.2f} )\".format(mu, sigma)],\n    loc=\"best\",  # noqa: W605\n)\nplt.ylabel(\"Frequency\")\nplt.title(\"SalePrice distribution\")\nfig = plt.figure()\nres = stats.probplot(train[\"SalePrice\"], plot=plt)\nplt.show()\ntrain[\"SalePrice\"] = np.log1p(train[\"SalePrice\"])\nsns.distplot(train[\"SalePrice\"], fit=norm)\n(mu, sigma) = norm.fit(train[\"SalePrice\"])\nprint(\"\\n mu = {:.2f} and sigma = {:.2f}\\n\".format(mu, sigma))\nplt.legend(\n    [r\"Normal dist. ($\\mu=$ {:.2f} and $\\sigma=$ {:.2f} )\".format(mu, sigma)],\n    loc=\"best\",  # noqa: W605\n)\nplt.ylabel(\"Frequency\")\nplt.title(\"SalePrice distribution\")\nfig = plt.figure()\nres = stats.probplot(train[\"SalePrice\"], plot=plt)\nplt.show()\nntrain = train.shape[0]\nntest = test.shape[0]\ny_train = train.SalePrice.values\nall_data = pd.concat((train, test)).reset_index(drop=True)\nall_data.drop([\"SalePrice\"], axis=1, inplace=True)\nprint(\"all_data size is : {}\".format(all_data.shape))\nall_data_na = (all_data.isnull().sum() / len(all_data)) * 100\nall_data_na = all_data_na.drop(all_data_na[all_data_na == 0].index).sort_values(\n    ascending=False\n)[:30]\nmissing_data = pd.DataFrame({\"Missing Ratio\": all_data_na})\nmissing_data.head(20)\ncorrmat = train.corr()\nplt.subplots(figsize=(12, 9))\nsns.heatmap(corrmat, vmax=0.9, square=True)\nall_data[\"PoolQC\"] = all_data[\"PoolQC\"].fillna(\"None\")\nall_data[\"MiscFeature\"] = all_data[\"MiscFeature\"].fillna(\"None\")\nall_data[\"Alley\"] = all_data[\"Alley\"].fillna(\"None\")\nall_data[\"Fence\"] = all_data[\"Fence\"].fillna(\"None\")\nall_data[\"FireplaceQu\"] = all_data[\"FireplaceQu\"].fillna(\"None\")\nall_data[\"LotFrontage\"] = all_data.groupby(\"Neighborhood\")[\"LotFrontage\"].transform(\n    lambda x: x.fillna(x.median())\n)\nfor col in (\"GarageType\", \"GarageFinish\", \"GarageQual\", \"GarageCond\"):\n    all_data[col] = all_data[col].fillna(\"None\")\nfor col in (\"GarageYrBlt\", \"GarageArea\", \"GarageCars\"):\n    all_data[col] = all_data[col].fillna(0)\nfor col in (\n    \"BsmtFinSF1\",\n    \"BsmtFinSF2\",\n    \"BsmtUnfSF\",\n    \"TotalBsmtSF\",\n    \"BsmtFullBath\",\n    \"BsmtHalfBath\",\n):\n    all_data[col] = all_data[col].fillna(0)\nfor col in (\"BsmtQual\", \"BsmtCond\", \"BsmtExposure\", \"BsmtFinType1\", \"BsmtFinType2\"):\n    all_data[col] = all_data[col].fillna(\"None\")\nall_data[\"MasVnrType\"] = all_data[\"MasVnrType\"].fillna(\"None\")\nall_data[\"MasVnrArea\"] = all_data[\"MasVnrArea\"].fillna(0)\nall_data[\"MSZoning\"] = all_data[\"MSZoning\"].fillna(all_data[\"MSZoning\"].mode()[0])\nall_data = all_data.drop([\"Utilities\"], axis=1)\nall_data[\"Functional\"] = all_data[\"Functional\"].fillna(\"Typ\")\nall_data[\"Electrical\"] = all_data[\"Electrical\"].fillna(all_data[\"Electrical\"].mode()[0])\nall_data[\"KitchenQual\"] = all_data[\"KitchenQual\"].fillna(\n    all_data[\"KitchenQual\"].mode()[0]\n)\nall_data[\"Exterior1st\"] = all_data[\"Exterior1st\"].fillna(\n    all_data[\"Exterior1st\"].mode()[0]\n)\nall_data[\"Exterior2nd\"] = all_data[\"Exterior2nd\"].fillna(\n    all_data[\"Exterior2nd\"].mode()[0]\n)\nall_data[\"SaleType\"] = all_data[\"SaleType\"].fillna(all_data[\"SaleType\"].mode()[0])\nall_data[\"MSSubClass\"] = all_data[\"MSSubClass\"].fillna(\"None\")\nall_data_na = (all_data.isnull().sum() / len(all_data)) * 100\nall_data_na = all_data_na.drop(all_data_na[all_data_na == 0].index).sort_values(\n    ascending=False\n)\nmissing_data = pd.DataFrame({\"Missing Ratio\": all_data_na})\nmissing_data.head()\nall_data[\"MSSubClass\"] = all_data[\"MSSubClass\"].apply(str)\nall_data[\"OverallCond\"] = all_data[\"OverallCond\"].astype(str)\nall_data[\"YrSold\"] = all_data[\"YrSold\"].astype(str)\nall_data[\"MoSold\"] = all_data[\"MoSold\"].astype(str)\nfrom sklearn.preprocessing import LabelEncoder\n\ncols = (\n    \"FireplaceQu\",\n    \"BsmtQual\",\n    \"BsmtCond\",\n    \"GarageQual\",\n    \"GarageCond\",\n    \"ExterQual\",\n    \"ExterCond\",\n    \"HeatingQC\",\n    \"PoolQC\",\n    \"KitchenQual\",\n    \"BsmtFinType1\",\n    \"BsmtFinType2\",\n    \"Functional\",\n    \"Fence\",\n    \"BsmtExposure\",\n    \"GarageFinish\",\n    \"LandSlope\",\n    \"LotShape\",\n    \"PavedDrive\",\n    \"Street\",\n    \"Alley\",\n    \"CentralAir\",\n    \"MSSubClass\",\n    \"OverallCond\",\n    \"YrSold\",\n    \"MoSold\",\n)\nfor c in cols:\n    lbl = LabelEncoder()\n    lbl.fit(list(all_data[c].values))\n    all_data[c] = lbl.transform(list(all_data[c].values))\nprint(\"Shape all_data: {}\".format(all_data.shape))\nall_data[\"TotalSF\"] = (\n    all_data[\"TotalBsmtSF\"] + all_data[\"1stFlrSF\"] + all_data[\"2ndFlrSF\"]\n)\nnumeric_feats = all_data.dtypes[all_data.dtypes != \"object\"].index\nskewed_feats = (\n    all_data[numeric_feats]\n    .apply(lambda x: skew(x.dropna()))\n    .sort_values(ascending=False)\n)\nprint(\"\\nSkew in numerical features: \\n\")\nskewness = pd.DataFrame({\"Skew\": skewed_feats})\nskewness.head(10)\nskewness = skewness[abs(skewness) > 0.75]\nprint(\n    \"There are {} skewed numerical features to Box Cox transform\".format(\n        skewness.shape[0]\n    )\n)\nfrom scipy.special import boxcox1p\n\nskewed_features = skewness.index\nlam = 0.15\nfor feat in skewed_features:\n    # all_data[feat] += 1\n    all_data[feat] = boxcox1p(all_data[feat], lam)\nall_data = pd.get_dummies(all_data)\nprint(all_data.shape)\ntrain = all_data[:ntrain]\ntest = all_data[ntrain:]\nimport lightgbm as lgb\nimport xgboost as xgb\nfrom sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin, clone\nfrom sklearn.ensemble import GradientBoostingRegressor  # RandomForestRegressor\nfrom sklearn.kernel_ridge import KernelRidge\nfrom sklearn.linear_model import ElasticNet  # BayesianRidge, LassoLarsIC\nfrom sklearn.linear_model import Lasso\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.model_selection import KFold, cross_val_score  # train_test_split\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import RobustScaler\n\nn_folds = 5\n\n\ndef rmsle_cv(model):\n    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)\n    rmse = np.sqrt(\n        -cross_val_score(\n            model, train.values, y_train, scoring=\"neg_mean_squared_error\", cv=kf\n        )\n    )\n    return rmse\n\n\nlasso = make_pipeline(RobustScaler(), Lasso(alpha=0.0005, random_state=1))\nENet = make_pipeline(\n    RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=0.9, random_state=3)\n)\nKRR = KernelRidge(alpha=0.6, kernel=\"polynomial\", degree=2, coef0=2.5)\nGBoost = GradientBoostingRegressor(\n    n_estimators=1,\n    learning_rate=0.05,\n    max_depth=4,\n    max_features=\"sqrt\",\n    min_samples_leaf=15,\n    min_samples_split=10,\n    loss=\"huber\",\n    random_state=5,\n)\nmodel_xgb = xgb.XGBRegressor(\n    colsample_bytree=0.4603,\n    gamma=0.0468,\n    learning_rate=0.05,\n    max_depth=3,\n    min_child_weight=1.7817,\n    n_estimators=1,\n    reg_alpha=0.4640,\n    reg_lambda=0.8571,\n    subsample=0.5213,\n    silent=1,\n    random_state=7,\n    nthread=-1,\n)\nmodel_lgb = lgb.LGBMRegressor(\n    objective=\"regression\",\n    num_leaves=5,\n    learning_rate=0.05,\n    n_estimators=1,\n    max_bin=55,\n    bagging_fraction=0.8,\n    bagging_freq=5,\n    feature_fraction=0.2319,\n    feature_fraction_seed=9,\n    bagging_seed=9,\n    min_data_in_leaf=6,\n    min_sum_hessian_in_leaf=11,\n)\nscore = rmsle_cv(lasso)\nprint(\"\\nLasso score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()))\nscore = rmsle_cv(ENet)\nprint(\"ElasticNet score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()))\nscore = rmsle_cv(KRR)\nprint(\"Kernel Ridge score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()))\nscore = rmsle_cv(GBoost)\nprint(\"Gradient Boosting score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()))\nscore = rmsle_cv(model_xgb)\nprint(\"Xgboost score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()))\nscore = rmsle_cv(model_lgb)\nprint(\"LGBM score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std()))\n\n\nclass AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):\n    def __init__(self, models):\n        self.models = models\n\n    def fit(self, X, y):\n        self.models_ = [clone(x) for x in self.models]\n        for model in self.models_:\n            model.fit(X, y)\n        return self\n\n    def predict(self, X):\n        predictions = np.column_stack([model.predict(X) for model in self.models_])\n        return np.mean(predictions, axis=1)\n\n\naveraged_models = AveragingModels(models=(ENet, GBoost, KRR, lasso))\nscore = rmsle_cv(averaged_models)\nprint(\n    \" Averaged base models score: {:.4f} ({:.4f})\\n\".format(score.mean(), score.std())\n)\n\n\nclass StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):\n    def __init__(self, base_models, meta_model, n_folds=5):\n        self.base_models = base_models\n        self.meta_model = meta_model\n        self.n_folds = n_folds\n\n    def fit(self, X, y):\n        self.base_models_ = [[] for _ in self.base_models]\n        self.meta_model_ = clone(self.meta_model)\n        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)\n        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))\n        for i, model in enumerate(self.base_models):\n            for train_index, holdout_index in kfold.split(X, y):\n                instance = clone(model)\n                self.base_models_[i].append(instance)\n                instance.fit(X[train_index], y[train_index])\n                y_pred = instance.predict(X[holdout_index])\n                out_of_fold_predictions[holdout_index, i] = y_pred\n        self.meta_model_.fit(out_of_fold_predictions, y)\n        return self\n\n\ndef predict(self, X):\n    meta_features = np.column_stack(\n        [\n            np.column_stack([model.predict(X) for model in base_models]).mean(axis=1)\n            for base_models in self.base_models_\n        ]\n    )\n    return self.meta_model_.predict(meta_features)\n\n\nstacked_averaged_models = StackingAveragedModels(\n    base_models=(ENet, GBoost, KRR), meta_model=lasso\n)\nscore = rmsle_cv(stacked_averaged_models)\nprint(\n    \"Stacking Averaged models score: {:.4f} ({:.4f})\".format(score.mean(), score.std())\n)\n\n\ndef rmsle(y, y_pred):\n    return np.sqrt(mean_squared_error(y, y_pred))\n\n\nstacked_averaged_models.fit(train.values, y_train)\nstacked_train_pred = stacked_averaged_models.predict(train.values)\nstacked_pred = np.expm1(stacked_averaged_models.predict(test.values))\nprint(rmsle(y_train, stacked_train_pred))\nmodel_xgb.fit(train, y_train)\nxgb_train_pred = model_xgb.predict(train)\nxgb_pred = np.expm1(model_xgb.predict(test))\nprint(rmsle(y_train, xgb_train_pred))\nmodel_lgb.fit(train, y_train)\nlgb_train_pred = model_lgb.predict(train)\nlgb_pred = np.expm1(model_lgb.predict(test.values))\nprint(rmsle(y_train, lgb_train_pred))\nprint(\"RMSLE score on train data:\")\nprint(\n    rmsle(\n        y_train,\n        stacked_train_pred * 0.70 + xgb_train_pred * 0.15 + lgb_train_pred * 0.15,\n    )\n)\nensemble = stacked_pred * 0.70 + xgb_pred * 0.15 + lgb_pred * 0.15\nsub = pd.DataFrame()\nsub[\"Id\"] = test_ID\nsub[\"SalePrice\"] = ensemble\nsub.to_csv(\"submission.csv\", index=False)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle5.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport seaborn as sns\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.linear_model import LogisticRegression, Perceptron, SGDClassifier\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.svm import SVC, LinearSVC\nfrom sklearn.tree import DecisionTreeClassifier\n\nimport modin.pandas as pd\n\ntrain_df = pd.read_csv(\"train.csv\")\ntest_df = pd.read_csv(\"test.csv\")\ncombine = [train_df, test_df]\nprint(train_df.columns.values)\ntrain_df.head()\ntrain_df.tail()\ntrain_df.info()\nprint(\"_\" * 40)\ntest_df.info()\ntrain_df.describe()\ntrain_df.describe(include=[\"O\"])\ntrain_df[[\"Pclass\", \"Survived\"]].groupby([\"Pclass\"], as_index=False).mean().sort_values(\n    by=\"Survived\", ascending=False\n)\ntrain_df[[\"Sex\", \"Survived\"]].groupby([\"Sex\"], as_index=False).mean().sort_values(\n    by=\"Survived\", ascending=False\n)\ntrain_df[[\"SibSp\", \"Survived\"]].groupby([\"SibSp\"], as_index=False).mean().sort_values(\n    by=\"Survived\", ascending=False\n)\ntrain_df[[\"Parch\", \"Survived\"]].groupby([\"Parch\"], as_index=False).mean().sort_values(\n    by=\"Survived\", ascending=False\n)\ngrid = sns.FacetGrid(train_df, col=\"Survived\", row=\"Pclass\", size=2.2, aspect=1.6)\ngrid.map(plt.hist, \"Age\", alpha=0.5, bins=20)\ngrid.add_legend()\ngrid = sns.FacetGrid(train_df, row=\"Embarked\", size=2.2, aspect=1.6)\ngrid.map(sns.pointplot, \"Pclass\", \"Survived\", \"Sex\", palette=\"deep\")\ngrid.add_legend()\ngrid = sns.FacetGrid(train_df, row=\"Embarked\", col=\"Survived\", size=2.2, aspect=1.6)\ngrid.map(sns.barplot, \"Sex\", \"Fare\", alpha=0.5, ci=None)\ngrid.add_legend()\nprint(\"Before\", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape)\ntrain_df = train_df.drop([\"Ticket\", \"Cabin\"], axis=1)\ntest_df = test_df.drop([\"Ticket\", \"Cabin\"], axis=1)\ncombine = [train_df, test_df]\n\"After\", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape\nfor dataset in combine:\n    dataset[\"Title\"] = dataset.Name.str.extract(\n        r\" ([A-Za-z]+)\\.\", expand=False\n    )  # noqa: W605\npd.crosstab(train_df[\"Title\"], train_df[\"Sex\"])\nfor dataset in combine:\n    dataset[\"Title\"] = dataset[\"Title\"].replace(\n        [\n            \"Lady\",\n            \"Countess\",\n            \"Capt\",\n            \"Col\",\n            \"Don\",\n            \"Dr\",\n            \"Major\",\n            \"Rev\",\n            \"Sir\",\n            \"Jonkheer\",\n            \"Dona\",\n        ],\n        \"Rare\",\n    )\n    dataset[\"Title\"] = dataset[\"Title\"].replace(\"Mlle\", \"Miss\")\n    dataset[\"Title\"] = dataset[\"Title\"].replace(\"Ms\", \"Miss\")\n    dataset[\"Title\"] = dataset[\"Title\"].replace(\"Mme\", \"Mrs\")\ntrain_df[[\"Title\", \"Survived\"]].groupby([\"Title\"], as_index=False).mean()\n\n\ndef title_mapping(string):\n    return np.random.randint(1, high=6)\n\n\nfor dataset in combine:\n    dataset[\"Title\"] = dataset[\"Title\"].map(title_mapping)\n    dataset[\"Title\"] = dataset[\"Title\"].fillna(0)\ntrain_df.head()\ntrain_df = train_df.drop([\"Name\", \"PassengerId\"], axis=1)\ntest_df = test_df.drop([\"Name\"], axis=1)\ncombine = [train_df, test_df]\ntrain_df.shape, test_df.shape\n\n\ndef gender_mapping(string):\n    return np.random.randint(0, high=2)\n\n\nfor dataset in combine:\n    # dataset['Sex'] = dataset['Sex'].map( {'female': 1, 'male': 0} ).astype(int)\n    dataset[\"Sex\"] = dataset[\"Sex\"].map(gender_mapping).astype(int)\ntrain_df.head()\ngrid = sns.FacetGrid(train_df, row=\"Pclass\", col=\"Sex\", size=2.2, aspect=1.6)\ngrid.map(plt.hist, \"Age\", alpha=0.5, bins=20)\ngrid.add_legend()\nguess_ages = np.zeros((2, 3))\nguess_ages\nfor dataset in combine:\n    for i in range(0, 2):\n        for j in range(0, 3):\n            guess_df = dataset[(dataset[\"Sex\"] == i) & (dataset[\"Pclass\"] == j + 1)][\n                \"Age\"\n            ].dropna()\n# age_mean = guess_df.mean()\n# age_std = guess_df.std()\n# age_guess = rnd.uniform(age_mean - age_std, age_mean + age_std)\nage_guess = guess_df.median()\n# Convert random age float to nearest .5 age\nguess_ages[i, j] = int(age_guess / 0.5 + 0.5) * 0.5\nfor i in range(0, 2):\n    for j in range(0, 3):\n        dataset.loc[\n            (dataset.Age.isnull()) & (dataset.Sex == i) & (dataset.Pclass == j + 1),\n            \"Age\",\n        ] = guess_ages[i, j]\ndataset[\"Age\"] = dataset[\"Age\"].astype(int)\ntrain_df.head()\ntrain_df[\"AgeBand\"] = pd.cut(train_df[\"Age\"], 5)\ntrain_df[[\"AgeBand\", \"Survived\"]].groupby(\n    [\"AgeBand\"], as_index=False\n).mean().sort_values(by=\"AgeBand\", ascending=True)\nfor dataset in combine:\n    dataset.loc[dataset[\"Age\"] <= 16, \"Age\"] = 0\n    dataset.loc[(dataset[\"Age\"] > 16) & (dataset[\"Age\"] <= 32), \"Age\"] = 1\n    dataset.loc[(dataset[\"Age\"] > 32) & (dataset[\"Age\"] <= 48), \"Age\"] = 2\n    dataset.loc[(dataset[\"Age\"] > 48) & (dataset[\"Age\"] <= 64), \"Age\"] = 3\n    dataset.loc[dataset[\"Age\"] > 64, \"Age\"]\ntrain_df.head()\ntrain_df = train_df.drop([\"AgeBand\"], axis=1)\ncombine = [train_df, test_df]\ntrain_df.head()\nfor dataset in combine:\n    dataset[\"FamilySize\"] = dataset[\"SibSp\"] + dataset[\"Parch\"] + 1\ntrain_df[[\"FamilySize\", \"Survived\"]].groupby(\n    [\"FamilySize\"], as_index=False\n).mean().sort_values(by=\"Survived\", ascending=False)\nfor dataset in combine:\n    dataset[\"IsAlone\"] = 0\n    dataset.loc[dataset[\"FamilySize\"] == 1, \"IsAlone\"] = 1\ntrain_df[[\"IsAlone\", \"Survived\"]].groupby([\"IsAlone\"], as_index=False).mean()\ntrain_df = train_df.drop([\"Parch\", \"SibSp\", \"FamilySize\"], axis=1)\ntest_df = test_df.drop([\"Parch\", \"SibSp\", \"FamilySize\"], axis=1)\ncombine = [train_df, test_df]\ntrain_df.head()\nfor dataset in combine:\n    dataset[\"Age*Class\"] = dataset.Age * dataset.Pclass\ntrain_df.loc[:, [\"Age*Class\", \"Age\", \"Pclass\"]].head(10)\nfreq_port = train_df.Embarked.dropna().mode()[0]\nfreq_port\nfor dataset in combine:\n    dataset[\"Embarked\"] = dataset[\"Embarked\"].fillna(freq_port)\ntrain_df[[\"Embarked\", \"Survived\"]].groupby(\n    [\"Embarked\"], as_index=False\n).mean().sort_values(by=\"Survived\", ascending=False)\n\n\ndef embarked_mapping(string):\n    return np.random.randint(0, high=3)\n\n\nfor dataset in combine:\n    dataset[\"Embarked\"] = dataset[\"Embarked\"].map({\"S\": 0, \"C\": 1, \"Q\": 2}).astype(int)\ntrain_df.head()\ntest_df[\"Fare\"].fillna(test_df[\"Fare\"].dropna().median(), inplace=True)\ntest_df.head()\ntrain_df[\"FareBand\"] = pd.qcut(train_df[\"Fare\"], 4)\ntrain_df[[\"FareBand\", \"Survived\"]].groupby(\n    [\"FareBand\"], as_index=False\n).mean().sort_values(by=\"FareBand\", ascending=True)\nfor dataset in combine:\n    dataset.loc[dataset[\"Fare\"] <= 7.91, \"Fare\"] = 0\n    dataset.loc[(dataset[\"Fare\"] > 7.91) & (dataset[\"Fare\"] <= 14.454), \"Fare\"] = 1\n    dataset.loc[(dataset[\"Fare\"] > 14.454) & (dataset[\"Fare\"] <= 31), \"Fare\"] = 2\n    dataset.loc[dataset[\"Fare\"] > 31, \"Fare\"] = 3\n    dataset[\"Fare\"] = dataset[\"Fare\"].astype(int)\ntrain_df = train_df.drop([\"FareBand\"], axis=1)\ncombine = [train_df, test_df]\ntrain_df.head(10)\ntest_df.head(10)\nX_train = train_df.drop(\"Survived\", axis=1)\nY_train = train_df[\"Survived\"]\nX_test = test_df.drop(\"PassengerId\", axis=1).copy()\nX_train.shape, Y_train.shape, X_test.shape\nlogreg = LogisticRegression()\nlogreg.fit(X_train, Y_train)\nY_pred = logreg.predict(X_test)\nacc_log = round(logreg.score(X_train, Y_train) * 100, 2)\nacc_log\ncoeff_df = pd.DataFrame(train_df.columns.delete(0))\ncoeff_df.columns = [\"Feature\"]\ncoeff_df[\"Correlation\"] = pd.Series(logreg.coef_[0])\ncoeff_df.sort_values(by=\"Correlation\", ascending=False)\nsvc = SVC()\nsvc.fit(X_train, Y_train)\nY_pred = svc.predict(X_test)\nacc_svc = round(svc.score(X_train, Y_train) * 100, 2)\nacc_svc\nknn = KNeighborsClassifier(n_neighbors=3)\nknn.fit(X_train, Y_train)\nY_pred = knn.predict(X_test)\nacc_knn = round(knn.score(X_train, Y_train) * 100, 2)\nacc_knn\ngaussian = GaussianNB()\ngaussian.fit(X_train, Y_train)\nY_pred = gaussian.predict(X_test)\nacc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)\nacc_gaussian\nperceptron = Perceptron()\nperceptron.fit(X_train, Y_train)\nY_pred = perceptron.predict(X_test)\nacc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)\nacc_perceptron\nlinear_svc = LinearSVC()\nlinear_svc.fit(X_train, Y_train)\nY_pred = linear_svc.predict(X_test)\nacc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)\nacc_linear_svc\nsgd = SGDClassifier()\nsgd.fit(X_train, Y_train)\nY_pred = sgd.predict(X_test)\nacc_sgd = round(sgd.score(X_train, Y_train) * 100, 2)\nacc_sgd\ndecision_tree = DecisionTreeClassifier()\ndecision_tree.fit(X_train, Y_train)\nY_pred = decision_tree.predict(X_test)\nacc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2)\nacc_decision_tree\nrandom_forest = RandomForestClassifier(n_estimators=1)\nrandom_forest.fit(X_train, Y_train)\nY_pred = random_forest.predict(X_test)\nrandom_forest.score(X_train, Y_train)\nacc_random_forest = round(random_forest.score(X_train, Y_train) * 100, 2)\nacc_random_forest\nmodels = pd.DataFrame(\n    {\n        \"Model\": [\n            \"Support Vector Machines\",\n            \"KNN\",\n            \"Logistic Regression\",\n            \"Random Forest\",\n            \"Naive Bayes\",\n            \"Perceptron\",\n            \"Stochastic Gradient Decent\",\n            \"Linear SVC\",\n            \"Decision Tree\",\n        ],\n        \"Score\": [\n            acc_svc,\n            acc_knn,\n            acc_log,\n            acc_random_forest,\n            acc_gaussian,\n            acc_perceptron,\n            acc_sgd,\n            acc_linear_svc,\n            acc_decision_tree,\n        ],\n    }\n)\nmodels.sort_values(by=\"Score\", ascending=False)\nsubmission = pd.DataFrame({\"PassengerId\": test_df[\"PassengerId\"], \"Survived\": Y_pred})\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle6.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nnp.random.seed(2)\nimport itertools\n\nfrom keras.callbacks import ReduceLROnPlateau\nfrom keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPool2D\nfrom keras.models import Sequential\nfrom keras.optimizers import RMSprop\nfrom keras.preprocessing.image import ImageDataGenerator\nfrom keras.utils.np_utils import to_categorical  # convert to one-hot-encoding\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.model_selection import train_test_split\n\nsns.set(style=\"white\", context=\"notebook\", palette=\"deep\")\ntrain = pd.read_csv(\"train.csv\")\ntest = pd.read_csv(\"test.csv\")\nY_train = train[\"label\"]\nX_train = train.drop(labels=[\"label\"], axis=1)\ndel train\ng = sns.countplot(Y_train)\nY_train.value_counts()\nX_train.isnull().any().describe()\ntest.isnull().any().describe()\nX_train = X_train / 255.0\ntest = test / 255.0\nX_train = X_train.values.reshape(-1, 28, 28, 1)\ntest = test.values.reshape(-1, 28, 28, 1)\nY_train = to_categorical(Y_train, num_classes=10)\nrandom_seed = 2\nX_train, X_val, Y_train, Y_val = train_test_split(\n    X_train, Y_train, test_size=0.1, random_state=random_seed\n)\ng = plt.imshow(X_train[0][:, :, 0])\nmodel = Sequential()\nmodel.add(\n    Conv2D(\n        filters=32,\n        kernel_size=(5, 5),\n        padding=\"Same\",\n        activation=\"relu\",\n        input_shape=(28, 28, 1),\n    )\n)\nmodel.add(Conv2D(filters=32, kernel_size=(5, 5), padding=\"Same\", activation=\"relu\"))\nmodel.add(MaxPool2D(pool_size=(2, 2)))\nmodel.add(Dropout(0.25))\nmodel.add(Conv2D(filters=64, kernel_size=(3, 3), padding=\"Same\", activation=\"relu\"))\nmodel.add(Conv2D(filters=64, kernel_size=(3, 3), padding=\"Same\", activation=\"relu\"))\nmodel.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))\nmodel.add(Dropout(0.25))\nmodel.add(Flatten())\nmodel.add(Dense(256, activation=\"relu\"))\nmodel.add(Dropout(0.5))\nmodel.add(Dense(10, activation=\"softmax\"))\noptimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)\nmodel.compile(\n    optimizer=optimizer, loss=\"categorical_crossentropy\", metrics=[\"accuracy\"]\n)\nlearning_rate_reduction = ReduceLROnPlateau(\n    monitor=\"val_acc\", patience=3, verbose=1, factor=0.5, min_lr=0.00001\n)\nepochs = 1  # Turn epochs to 30 to get 0.9967 accuracy\nbatch_size = 86\ndatagen = ImageDataGenerator(\n    featurewise_center=False,  # set input mean to 0 over the dataset\n    samplewise_center=False,  # set each sample mean to 0\n    featurewise_std_normalization=False,  # divide inputs by std of the dataset\n    samplewise_std_normalization=False,  # divide each input by its std\n    zca_whitening=False,  # apply ZCA whitening\n    rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)\n    zoom_range=0.1,  # Randomly zoom image\n    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)\n    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)\n    horizontal_flip=False,  # randomly flip images\n    vertical_flip=False,\n)  # randomly flip images\ndatagen.fit(X_train)\nhistory = model.fit_generator(\n    datagen.flow(X_train, Y_train, batch_size=batch_size),\n    epochs=epochs,\n    validation_data=(X_val, Y_val),\n    verbose=2,\n    steps_per_epoch=X_train.shape[0] // batch_size,\n    callbacks=[learning_rate_reduction],\n)\nfig, ax = plt.subplots(2, 1)\nax[0].plot(history.history[\"loss\"], color=\"b\", label=\"Training loss\")\nax[0].plot(history.history[\"val_loss\"], color=\"r\", label=\"validation loss\", axes=ax[0])\nlegend = ax[0].legend(loc=\"best\", shadow=True)\nax[1].plot(history.history[\"acc\"], color=\"b\", label=\"Training accuracy\")\nax[1].plot(history.history[\"val_acc\"], color=\"r\", label=\"Validation accuracy\")\nlegend = ax[1].legend(loc=\"best\", shadow=True)\n\n\ndef plot_confusion_matrix(\n    cm, classes, normalize=False, title=\"Confusion matrix\", cmap=plt.cm.Blues\n):\n    \"\"\"\n    This function prints and plots the confusion matrix.\n    Normalization can be applied by setting `normalize=True`.\n    \"\"\"\n    plt.imshow(cm, interpolation=\"nearest\", cmap=cmap)\n    plt.title(title)\n    plt.colorbar()\n    tick_marks = np.arange(len(classes))\n    plt.xticks(tick_marks, classes, rotation=45)\n    plt.yticks(tick_marks, classes)\n    if normalize:\n        cm = cm.astype(\"float\") / cm.sum(axis=1)[:, np.newaxis]\n        thresh = cm.max() / 2.0\n    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n        plt.text(\n            j,\n            i,\n            cm[i, j],\n            horizontalalignment=\"center\",\n            color=\"white\" if cm[i, j] > thresh else \"black\",\n        )\n    plt.tight_layout()\n    plt.ylabel(\"True label\")\n    plt.xlabel(\"Predicted label\")\n\n\nY_pred = model.predict(X_val)\nY_pred_classes = np.argmax(Y_pred, axis=1)\nY_true = np.argmax(Y_val, axis=1)\nconfusion_mtx = confusion_matrix(Y_true, Y_pred_classes)\nplot_confusion_matrix(confusion_mtx, classes=range(10))\nerrors = Y_pred_classes - Y_true != 0\nY_pred_classes_errors = Y_pred_classes[errors]\nY_pred_errors = Y_pred[errors]\nY_true_errors = Y_true[errors]\nX_val_errors = X_val[errors]\n\n\ndef display_errors(errors_index, img_errors, pred_errors, obs_errors):\n    \"\"\"This function shows 6 images with their predicted and real labels\"\"\"\n    n = 0\n    nrows = 2\n    ncols = 3\n    fig, ax = plt.subplots(nrows, ncols, sharex=True, sharey=True)\n    for row in range(nrows):\n        for col in range(ncols):\n            error = errors_index[n]\n            ax[row, col].imshow((img_errors[error]).reshape((28, 28)))\n            ax[row, col].set_title(\n                \"Predicted label :{}\\nTrue label :{}\".format(\n                    pred_errors[error], obs_errors[error]\n                )\n            )\n            n += 1\n\n\nY_pred_errors_prob = np.max(Y_pred_errors, axis=1)\ntrue_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))\ndelta_pred_true_errors = Y_pred_errors_prob - true_prob_errors\nsorted_dela_errors = np.argsort(delta_pred_true_errors)\nmost_important_errors = sorted_dela_errors[-6:]\ndisplay_errors(\n    most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors\n)\nresults = model.predict(test)\nresults = np.argmax(results, axis=1)\nresults = pd.Series(results, name=\"Label\")\nsubmission = pd.concat([pd.Series(range(1, 28001), name=\"ImageId\"), results], axis=1)\nsubmission.to_csv(\"cnn_mnist_datagen.csv\", index=False)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle7.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport warnings\n\nimport numpy as np\nfrom sklearn.preprocessing import LabelEncoder\n\nimport modin.pandas as pd\n\nwarnings.filterwarnings(\"ignore\")\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\napp_train = pd.read_csv(\"application_train.csv\")\nprint(\"Training data shape: \", app_train.shape)\napp_train.head()\napp_test = pd.read_csv(\"application_test.csv\")\nprint(\"Testing data shape: \", app_test.shape)\napp_test.head()\napp_train[\"TARGET\"].value_counts()\napp_train[\"TARGET\"].astype(int).plot.hist()\n\n\ndef missing_values_table(df):\n    # Total missing values\n    mis_val = df.isnull().sum()\n    mis_val_percent = 100 * df.isnull().sum() / len(df)\n    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)\n    mis_val_table_ren_columns = mis_val_table.rename(\n        columns={0: \"Missing Values\", 1: \"% of Total Values\"}\n    )\n    mis_val_table_ren_columns = (\n        mis_val_table_ren_columns[mis_val_table_ren_columns.iloc[:, 1] != 0]\n        .sort_values(\"% of Total Values\", ascending=False)\n        .round(1)\n    )\n    print(\n        \"Your selected dataframe has \" + str(df.shape[1]) + \" columns.\\n\"\n        \"There are \"\n        + str(mis_val_table_ren_columns.shape[0])\n        + \" columns that have missing values.\"\n    )\n    return mis_val_table_ren_columns\n\n\napp_train.dtypes.value_counts()\napp_train.select_dtypes(\"object\").apply(pd.Series.nunique, axis=0)\nle = LabelEncoder()\nle_count = 0\nfor col in app_train:\n    if app_train[col].dtype == \"object\":\n        # If 2 or fewer unique categories\n        if len(list(app_train[col].unique())) <= 2:\n            # Train on the training data\n            le.fit(app_train[col])\n            # Transform both training and testing data\n            app_train[col] = le.transform(app_train[col])\n            app_test[col] = le.transform(app_test[col])\n            le_count += 1\nprint(\"%d columns were label encoded.\" % le_count)\napp_train = pd.get_dummies(app_train)\napp_test = pd.get_dummies(app_test)\nprint(\"Training Features shape: \", app_train.shape)\nprint(\"Testing Features shape: \", app_test.shape)\ntrain_labels = app_train[\"TARGET\"]\napp_train, app_test = app_train.align(app_test, join=\"inner\", axis=1)\napp_train[\"TARGET\"] = train_labels\nprint(\"Training Features shape: \", app_train.shape)\nprint(\"Testing Features shape: \", app_test.shape)\n(app_train[\"DAYS_BIRTH\"] / -365).describe()\napp_train[\"DAYS_EMPLOYED\"].describe()\napp_train[\"DAYS_EMPLOYED\"].plot.hist(title=\"Days Employment Histogram\")\nplt.xlabel(\"Days Employment\")\nanom = app_train[app_train[\"DAYS_EMPLOYED\"] == 3]\nnon_anom = app_train[app_train[\"DAYS_EMPLOYED\"] != 3]\nprint(\n    \"The non-anomalies default on %0.2f%% of loans\" % (100 * non_anom[\"TARGET\"].mean())\n)\nprint(\"The anomalies default on %0.2f%% of loans\" % (100 * anom[\"TARGET\"].mean()))\nprint(\"There are %d anomalous days of employment\" % len(anom))\napp_train[\"DAYS_EMPLOYED_ANOM\"] = app_train[\"DAYS_EMPLOYED\"] == 3\napp_train[\"DAYS_EMPLOYED\"].replace({3: np.nan}, inplace=True)\napp_train[\"DAYS_EMPLOYED\"].plot.hist(title=\"Days Employment Histogram\")\nplt.xlabel(\"Days Employment\")\napp_test[\"DAYS_EMPLOYED_ANOM\"] = app_test[\"DAYS_EMPLOYED\"] == 3\napp_test[\"DAYS_EMPLOYED\"].replace({3: np.nan}, inplace=True)\nprint(\n    \"There are %d anomalies in the test data out of %d entries\"\n    % (app_test[\"DAYS_EMPLOYED_ANOM\"].sum(), len(app_test))\n)\ncorrelations = app_train.corr()[\"TARGET\"].sort_values()\nprint(\"Most Positive Correlations:\\n\", correlations.tail(15))\nprint(\"\\nMost Negative Correlations:\\n\", correlations.head(15))\napp_train[\"DAYS_BIRTH\"] = abs(app_train[\"DAYS_BIRTH\"])\napp_train[\"DAYS_BIRTH\"].corr(app_train[\"TARGET\"])\nplt.style.use(\"fivethirtyeight\")\nplt.hist(app_train[\"DAYS_BIRTH\"] / 365, edgecolor=\"k\", bins=25)\nplt.title(\"Age of Client\")\nplt.xlabel(\"Age (years)\")\nplt.ylabel(\"Count\")\nplt.figure(figsize=(10, 8))\n#\nplt.xlabel(\"Age (years)\")\nplt.ylabel(\"Density\")\nplt.title(\"Distribution of Ages\")\nage_data = app_train[[\"TARGET\", \"DAYS_BIRTH\"]]\nage_data[\"YEARS_BIRTH\"] = age_data[\"DAYS_BIRTH\"] / 365\nage_data[\"YEARS_BINNED\"] = pd.cut(\n    age_data[\"YEARS_BIRTH\"], bins=np.linspace(20, 70, num=11)\n)\nage_data.head(10)\nage_groups = age_data.groupby(\"YEARS_BINNED\").mean()\nage_groups\next_data = app_train[\n    [\"TARGET\", \"EXT_SOURCE_1\", \"EXT_SOURCE_2\", \"EXT_SOURCE_3\", \"DAYS_BIRTH\"]\n]\next_data_corrs = ext_data.corr()\next_data_corrs\nplt.figure(figsize=(8, 6))\nsns.heatmap(ext_data_corrs, cmap=plt.cm.RdYlBu_r, vmin=-0.25, annot=True, vmax=0.6)\nplt.title(\"Correlation Heatmap\")\nplot_data = ext_data.drop(columns=[\"DAYS_BIRTH\"]).copy()\nplot_data[\"YEARS_BIRTH\"] = age_data[\"YEARS_BIRTH\"]\nplot_data = plot_data.dropna().loc[:100000, :]\n\n\ndef corr_func(x, y, **kwargs):\n    r = np.corrcoef(x, y)[0][1]\n    ax = plt.gca()\n    ax.annotate(\"r = {:.2f}\".format(r), xy=(0.2, 0.8), xycoords=ax.transAxes, size=20)\n\n\npoly_features = app_train[\n    [\"EXT_SOURCE_1\", \"EXT_SOURCE_2\", \"EXT_SOURCE_3\", \"DAYS_BIRTH\", \"TARGET\"]\n]\npoly_features_test = app_test[\n    [\"EXT_SOURCE_1\", \"EXT_SOURCE_2\", \"EXT_SOURCE_3\", \"DAYS_BIRTH\"]\n]\nfrom sklearn.preprocessing import Imputer\n\nimputer = Imputer(strategy=\"median\")\npoly_target = poly_features[\"TARGET\"]\npoly_features = poly_features.drop(columns=[\"TARGET\"])\npoly_features = imputer.fit_transform(poly_features)\npoly_features_test = imputer.transform(poly_features_test)\nfrom sklearn.preprocessing import PolynomialFeatures\n\npoly_transformer = PolynomialFeatures(degree=3)\npoly_transformer.fit(poly_features)\npoly_features = poly_transformer.transform(poly_features)\npoly_features_test = poly_transformer.transform(poly_features_test)\nprint(\"Polynomial Features shape: \", poly_features.shape)\npoly_transformer.get_feature_names(\n    input_features=[\"EXT_SOURCE_1\", \"EXT_SOURCE_2\", \"EXT_SOURCE_3\", \"DAYS_BIRTH\"]\n)[:15]\npoly_features = pd.DataFrame(\n    poly_features,\n    columns=poly_transformer.get_feature_names(\n        [\"EXT_SOURCE_1\", \"EXT_SOURCE_2\", \"EXT_SOURCE_3\", \"DAYS_BIRTH\"]\n    ),\n)\npoly_features[\"TARGET\"] = poly_target\npoly_corrs = poly_features.corr()[\"TARGET\"].sort_values()\nprint(poly_corrs.head(10))\nprint(poly_corrs.tail(5))\npoly_features_test = pd.DataFrame(\n    poly_features_test,\n    columns=poly_transformer.get_feature_names(\n        [\"EXT_SOURCE_1\", \"EXT_SOURCE_2\", \"EXT_SOURCE_3\", \"DAYS_BIRTH\"]\n    ),\n)\npoly_features[\"SK_ID_CURR\"] = app_train[\"SK_ID_CURR\"]\napp_train_poly = app_train.merge(poly_features, on=\"SK_ID_CURR\", how=\"left\")\npoly_features_test[\"SK_ID_CURR\"] = app_test[\"SK_ID_CURR\"]\napp_test_poly = app_test.merge(poly_features_test, on=\"SK_ID_CURR\", how=\"left\")\napp_train_poly, app_test_poly = app_train_poly.align(\n    app_test_poly, join=\"inner\", axis=1\n)\nprint(\"Training data with polynomial features shape: \", app_train_poly.shape)\nprint(\"Testing data with polynomial features shape:  \", app_test_poly.shape)\napp_train_domain = app_train.copy()\napp_test_domain = app_test.copy()\napp_train_domain[\"CREDIT_INCOME_PERCENT\"] = (\n    app_train_domain[\"AMT_CREDIT\"] / app_train_domain[\"AMT_INCOME_TOTAL\"]\n)\napp_train_domain[\"ANNUITY_INCOME_PERCENT\"] = (\n    app_train_domain[\"AMT_ANNUITY\"] / app_train_domain[\"AMT_INCOME_TOTAL\"]\n)\napp_train_domain[\"CREDIT_TERM\"] = (\n    app_train_domain[\"AMT_ANNUITY\"] / app_train_domain[\"AMT_CREDIT\"]\n)\napp_train_domain[\"DAYS_EMPLOYED_PERCENT\"] = (\n    app_train_domain[\"DAYS_EMPLOYED\"] / app_train_domain[\"DAYS_BIRTH\"]\n)\napp_test_domain[\"CREDIT_INCOME_PERCENT\"] = (\n    app_test_domain[\"AMT_CREDIT\"] / app_test_domain[\"AMT_INCOME_TOTAL\"]\n)\napp_test_domain[\"ANNUITY_INCOME_PERCENT\"] = (\n    app_test_domain[\"AMT_ANNUITY\"] / app_test_domain[\"AMT_INCOME_TOTAL\"]\n)\napp_test_domain[\"CREDIT_TERM\"] = (\n    app_test_domain[\"AMT_ANNUITY\"] / app_test_domain[\"AMT_CREDIT\"]\n)\napp_test_domain[\"DAYS_EMPLOYED_PERCENT\"] = (\n    app_test_domain[\"DAYS_EMPLOYED\"] / app_test_domain[\"DAYS_BIRTH\"]\n)\nfrom sklearn.preprocessing import Imputer, MinMaxScaler\n\nif \"TARGET\" in app_train.columns:\n    train = app_train.drop(columns=[\"TARGET\"])\n    # TODO (williamma12): Not sure why this line is necessary but it is\n    app_test = app_test.drop(columns=[\"TARGET\"])\nelse:\n    train = app_train.copy()\nfeatures = list(train.columns)\ntest = app_test.copy()\nimputer = Imputer(strategy=\"median\")\nscaler = MinMaxScaler(feature_range=(0, 1))\nimputer.fit(train)\ntrain = imputer.transform(train)\ntest = imputer.transform(app_test)\nscaler.fit(train)\ntrain = scaler.transform(train)\ntest = scaler.transform(test)\nprint(\"Training data shape: \", train.shape)\nprint(\"Testing data shape: \", test.shape)\nfrom sklearn.linear_model import LogisticRegression\n\nlog_reg = LogisticRegression(C=0.0001)\nlog_reg.fit(train, train_labels)\nlog_reg_pred = log_reg.predict_proba(test)[:, 1]\nsubmit = app_test[[\"SK_ID_CURR\"]]\nsubmit[\"TARGET\"] = log_reg_pred\nsubmit.head()\nsubmit.to_csv(\"log_reg_baseline.csv\", index=False)\nfrom sklearn.ensemble import RandomForestClassifier\n\nrandom_forest = RandomForestClassifier(\n    n_estimators=100, random_state=50, verbose=1, n_jobs=-1\n)\nrandom_forest.fit(train, train_labels)\nfeature_importance_values = random_forest.feature_importances_\nfeature_importances = pd.DataFrame(\n    {\"feature\": features, \"importance\": feature_importance_values}\n)\npredictions = random_forest.predict_proba(test)[:, 1]\nsubmit = app_test[[\"SK_ID_CURR\"]]\nsubmit[\"TARGET\"] = predictions\nsubmit.to_csv(\"random_forest_baseline.csv\", index=False)\npoly_features_names = list(app_train_poly.columns)\nimputer = Imputer(strategy=\"median\")\npoly_features = imputer.fit_transform(app_train_poly)\npoly_features_test = imputer.transform(app_test_poly)\nscaler = MinMaxScaler(feature_range=(0, 1))\npoly_features = scaler.fit_transform(poly_features)\npoly_features_test = scaler.transform(poly_features_test)\nrandom_forest_poly = RandomForestClassifier(\n    n_estimators=100, random_state=50, verbose=1, n_jobs=-1\n)\nrandom_forest_poly.fit(poly_features, train_labels)\npredictions = random_forest_poly.predict_proba(poly_features_test)[:, 1]\nsubmit = app_test[[\"SK_ID_CURR\"]]\nsubmit[\"TARGET\"] = predictions\nsubmit.to_csv(\"random_forest_baseline_engineered.csv\", index=False)\napp_train_domain = app_train_domain.drop(columns=\"TARGET\")\napp_test_domain = app_test_domain.drop(columns=\"TARGET\")\ndomain_features_names = list(app_train_domain.columns)\nimputer = Imputer(strategy=\"median\")\ndomain_features = imputer.fit_transform(app_train_domain)\ndomain_features_test = imputer.transform(app_test_domain)\nscaler = MinMaxScaler(feature_range=(0, 1))\ndomain_features = scaler.fit_transform(domain_features)\ndomain_features_test = scaler.transform(domain_features_test)\nrandom_forest_domain = RandomForestClassifier(\n    n_estimators=100, random_state=50, verbose=1, n_jobs=-1\n)\nrandom_forest_domain.fit(domain_features, train_labels)\nfeature_importance_values_domain = random_forest_domain.feature_importances_\nfeature_importances_domain = pd.DataFrame(\n    {\"feature\": domain_features_names, \"importance\": feature_importance_values_domain}\n)\npredictions = random_forest_domain.predict_proba(domain_features_test)[:, 1]\nsubmit = app_test[[\"SK_ID_CURR\"]]\nsubmit[\"TARGET\"] = predictions\nsubmit.to_csv(\"random_forest_baseline_domain.csv\", index=False)\n\n\ndef plot_feature_importances(df):\n    df = df.sort_values(\"importance\", ascending=False).reset_index()\n    df[\"importance_normalized\"] = df[\"importance\"] / df[\"importance\"].sum()\n    plt.figure(figsize=(10, 6))\n    ax = plt.subplot()\n    ax.barh(\n        list(reversed(list(df.index[:15]))),\n        df[\"importance_normalized\"].head(15),\n        align=\"center\",\n        edgecolor=\"k\",\n    )\n    ax.set_yticks(list(reversed(list(df.index[:15]))))\n    ax.set_yticklabels(df[\"feature\"].head(15))\n    plt.xlabel(\"Normalized Importance\")\n    plt.title(\"Feature Importances\")\n    return df\n\n\nfeature_importances_sorted = plot_feature_importances(feature_importances)\nfeature_importances_domain_sorted = plot_feature_importances(feature_importances_domain)\nimport gc\n\nimport lightgbm as lgb\nfrom sklearn.metrics import roc_auc_score\nfrom sklearn.model_selection import KFold\n\n\ndef model(features, test_features, encoding=\"ohe\", n_folds=5):\n    test_ids = test_features[\"SK_ID_CURR\"]\n    labels = features[\"TARGET\"]\n    features = features.drop(columns=[\"SK_ID_CURR\", \"TARGET\"])\n    test_features = test_features.drop(columns=[\"SK_ID_CURR\"])\n    if encoding == \"ohe\":\n        features = pd.get_dummies(features)\n        test_features = pd.get_dummies(test_features)\n        features, test_features = features.align(test_features, join=\"inner\", axis=1)\n        cat_indices = \"auto\"\n    elif encoding == \"le\":\n        label_encoder = LabelEncoder()\n        cat_indices = []\n        for i, col in enumerate(features):\n            if features[col].dtype == \"object\":\n                features[col] = label_encoder.fit_transform(\n                    np.array(features[col].astype(str)).reshape((-1,))\n                )\n                test_features[col] = label_encoder.transform(\n                    np.array(test_features[col].astype(str)).reshape((-1,))\n                )\n                cat_indices.append(i)\n    else:\n        raise ValueError(\"Encoding must be either 'ohe' or 'le'\")\n    print(\"Training Data Shape: \", features.shape)\n    print(\"Testing Data Shape: \", test_features.shape)\n    feature_names = list(features.columns)\n    features = np.array(features)\n    test_features = np.array(test_features)\n    k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=50)\n    feature_importance_values = np.zeros(len(feature_names))\n    test_predictions = np.zeros(test_features.shape[0])\n    out_of_fold = np.zeros(features.shape[0])\n    valid_scores = []\n    train_scores = []\n    for train_indices, valid_indices in k_fold.split(features):\n        train_features, train_labels = features[train_indices], labels[train_indices]\n        valid_features, valid_labels = features[valid_indices], labels[valid_indices]\n        model = lgb.LGBMClassifier(\n            n_estimators=10000,\n            objective=\"binary\",\n            class_weight=\"balanced\",\n            learning_rate=0.05,\n            reg_alpha=0.1,\n            reg_lambda=0.1,\n            subsample=0.8,\n            n_jobs=-1,\n            random_state=50,\n        )\n        model.fit(\n            train_features,\n            train_labels,\n            eval_metric=\"auc\",\n            eval_set=[(valid_features, valid_labels), (train_features, train_labels)],\n            eval_names=[\"valid\", \"train\"],\n            categorical_feature=cat_indices,\n            early_stopping_rounds=100,\n            verbose=200,\n        )\n        best_iteration = model.best_iteration_\n        feature_importance_values += model.feature_importances_ / k_fold.n_splits\n        test_predictions += (\n            model.predict_proba(test_features, num_iteration=best_iteration)[:, 1]\n            / k_fold.n_splits\n        )\n        out_of_fold[valid_indices] = model.predict_proba(\n            valid_features, num_iteration=best_iteration\n        )[:, 1]\n        valid_score = model.best_score_[\"valid\"][\"auc\"]\n        train_score = model.best_score_[\"train\"][\"auc\"]\n        valid_scores.append(valid_score)\n        train_scores.append(train_score)\n        gc.enable()\n        del model, train_features, valid_features\n        gc.collect()\n    submission = pd.DataFrame({\"SK_ID_CURR\": test_ids, \"TARGET\": test_predictions})\n    feature_importances = pd.DataFrame(\n        {\"feature\": feature_names, \"importance\": feature_importance_values}\n    )\n    valid_auc = roc_auc_score(labels, out_of_fold)\n    valid_scores.append(valid_auc)\n    train_scores.append(np.mean(train_scores))\n    fold_names = list(range(n_folds))\n    fold_names.append(\"overall\")\n    metrics = pd.DataFrame(\n        {\"fold\": fold_names, \"train\": train_scores, \"valid\": valid_scores}\n    )\n    return submission, feature_importances, metrics\n\n\nsubmission, fi, metrics = model(app_train, app_test)\nprint(\"Baseline metrics\")\nprint(metrics)\nfi_sorted = plot_feature_importances(fi)\nsubmission.to_csv(\"baseline_lgb.csv\", index=False)\napp_train_domain[\"TARGET\"] = train_labels\nsubmission_domain, fi_domain, metrics_domain = model(app_train_domain, app_test_domain)\nprint(\"Baseline with domain knowledge features metrics\")\nprint(metrics_domain)\nfi_sorted = plot_feature_importances(fi_domain)\nsubmission_domain.to_csv(\"baseline_lgb_domain_features.csv\", index=False)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle8.py",
    "content": "from sklearn.ensemble import RandomForestRegressor\n\nimport modin.pandas as pd\n\ntrain = pd.read_csv(\"train.csv\")\ntrain_y = train.SalePrice\npredictor_cols = [\"LotArea\", \"OverallQual\", \"YearBuilt\", \"TotRmsAbvGrd\"]\ntrain_X = train[predictor_cols]\nmy_model = RandomForestRegressor()\nmy_model.fit(train_X, train_y)\ntest = pd.read_csv(\"test.csv\")\ntest_X = test[predictor_cols]\npredicted_prices = my_model.predict(test_X)\nprint(predicted_prices)\nmy_submission = pd.DataFrame({\"Id\": test.Id, \"SalePrice\": predicted_prices})\nmy_submission.to_csv(\"submission.csv\", index=False)\n"
  },
  {
    "path": "stress_tests/kaggle/kaggle9.py",
    "content": "import matplotlib\n\nmatplotlib.use(\"PS\")\nimport matplotlib\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom scipy.stats import skew\n\nimport modin.pandas as pd\n\ntrain = pd.read_csv(\"train.csv\")\ntest = pd.read_csv(\"test.csv\")\ntrain.head()\nall_data = pd.concat(\n    (\n        train.loc[:, \"MSSubClass\":\"SaleCondition\"],\n        test.loc[:, \"MSSubClass\":\"SaleCondition\"],\n    )\n)\nmatplotlib.rcParams[\"figure.figsize\"] = (12.0, 6.0)\nprices = pd.DataFrame(\n    {\"price\": train[\"SalePrice\"], \"log(price + 1)\": np.log1p(train[\"SalePrice\"])}\n)\nprices.hist()\ntrain[\"SalePrice\"] = np.log1p(train[\"SalePrice\"])\nnumeric_feats = all_data.dtypes[all_data.dtypes != \"object\"].index\nskewed_feats = train[numeric_feats].apply(\n    lambda x: skew(x.dropna())\n)  # compute skewness\nskewed_feats = skewed_feats[skewed_feats > 0.75]\nskewed_feats = skewed_feats.index\nall_data[skewed_feats] = np.log1p(all_data[skewed_feats])\nall_data = pd.get_dummies(all_data)\nall_data = all_data.fillna(all_data.mean())\nX_train = all_data[: train.shape[0]]\nX_test = all_data[train.shape[0] :]\ny = train.SalePrice\nfrom sklearn.linear_model import LassoCV  # RidgeCV, ElasticNet, LassoLarsCV\nfrom sklearn.linear_model import Ridge\nfrom sklearn.model_selection import cross_val_score\n\n\ndef rmse_cv(model):\n    rmse = np.sqrt(\n        -cross_val_score(model, X_train, y, scoring=\"neg_mean_squared_error\", cv=5)\n    )\n    return rmse\n\n\nmodel_ridge = Ridge()\nalphas = [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75]\ncv_ridge = [rmse_cv(Ridge(alpha=alpha)).mean() for alpha in alphas]\ncv_ridge = pd.Series(cv_ridge, index=alphas)\ncv_ridge.plot(title=\"Validation - Just Do It\")\nplt.xlabel(\"alpha\")\nplt.ylabel(\"rmse\")\ncv_ridge.min()\nmodel_lasso = LassoCV(alphas=[1, 0.1, 0.001, 0.0005]).fit(X_train, y)\nrmse_cv(model_lasso).mean()\ncoef = pd.Series(model_lasso.coef_, index=X_train.columns)\nprint(\n    \"Lasso picked \"\n    + str(sum(coef != 0))\n    + \" variables and eliminated the other \"\n    + str(sum(coef == 0))\n    + \" variables\"\n)\nimp_coef = pd.concat([coef.sort_values().head(10), coef.sort_values().tail(10)])\nmatplotlib.rcParams[\"figure.figsize\"] = (8.0, 10.0)\nimp_coef.plot(kind=\"barh\")\nplt.title(\"Coefficients in the Lasso Model\")\nmatplotlib.rcParams[\"figure.figsize\"] = (6.0, 6.0)\npreds = pd.DataFrame({\"preds\": model_lasso.predict(X_train), \"true\": y})\npreds[\"residuals\"] = preds[\"true\"] - preds[\"preds\"]\npreds.plot(x=\"preds\", y=\"residuals\", kind=\"scatter\")\nimport xgboost as xgb\n\ndtrain = xgb.DMatrix(X_train, label=y)\ndtest = xgb.DMatrix(X_test)\nparams = {\"max_depth\": 2, \"eta\": 0.1}\nmodel = xgb.cv(params, dtrain, num_boost_round=500, early_stopping_rounds=100)\nmodel.loc[30:, [\"test-rmse-mean\", \"train-rmse-mean\"]].plot()\nmodel_xgb = xgb.XGBRegressor(\n    n_estimators=360, max_depth=2, learning_rate=0.1\n)  # the params were tuned using xgb.cv\nmodel_xgb.fit(X_train, y)\nxgb_preds = np.expm1(model_xgb.predict(X_test))\nlasso_preds = np.expm1(model_lasso.predict(X_test))\npredictions = pd.DataFrame({\"xgb\": xgb_preds, \"lasso\": lasso_preds})\npredictions.plot(x=\"xgb\", y=\"lasso\", kind=\"scatter\")\npreds = 0.7 * lasso_preds + 0.3 * xgb_preds\nsolution = pd.DataFrame({\"id\": test.Id, \"SalePrice\": preds})\nsolution.to_csv(\"ridge_sol.csv\", index=False)\nfrom keras.layers import Dense\nfrom keras.models import Sequential\nfrom keras.regularizers import l1\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\n\nX_train = StandardScaler().fit_transform(X_train)\nX_tr, X_val, y_tr, y_val = train_test_split(X_train, y, random_state=3)\nX_tr.shape\nX_tr\nmodel = Sequential()\nmodel.add(Dense(1, input_dim=X_train.shape[1], W_regularizer=l1(0.001)))\nmodel.compile(loss=\"mse\", optimizer=\"adam\")\nmodel.summary()\nhist = model.fit(X_tr, y_tr, validation_data=(X_val, y_val))\npd.Series(model.predict(X_val)[:, 0]).hist()\n"
  },
  {
    "path": "stress_tests/run_stress_tests.sh",
    "content": "#!/usr/bin/env bash\n\n# Show explicitly which commands are currently running.\nset -x\n\n# TODO (williamma12): Once we use clusters, make sure to download latest wheels\n# from s3 bucket instead of building ray\n# Ray directory\nRAY_DIR=${1}\n\nROOT_DIR=$(cd \"$(dirname \"${BASH_SOURCE:-$0}\")\"; pwd)\nRESULT_FILE=$ROOT_DIR/results-$(date '+%Y-%m-%d_%H-%M-%S').log\necho \"Logging to\" $RESULT_FILE\ntouch $RESULT_FILE\n\nsetup_environment(){\n    pushd \"$ROOT_DIR\"\n    # Create a virtual environment for the stress tests\n    python -m virtualenv stress_tests_env >> $RESULT_FILE\n    source stress_tests_env/bin/activate >> $RESULT_FILE\n\n    # Install ray from source if available\n    if [[ ! -z \"$RAY_DIR\" ]]; then\n        pushd \"$RAY_DIR\"\n        pip install -e . --verbose >> $RESULT_FILE\n        popd\n    fi\n\n    # Install modin from source to virtual environment\n    pushd \"$ROOT_DIR/..\"\n    pip install -e . >> $RESULT_FILE\n    popd\n\n    # Install basic data science packages\n    pip install matplotlib numpy seaborn scipy >> $RESULT_FILE\n\n    # Install machine learning packages\n    pip install scikit-learn xgboost lightgbm keras >> $RESULT_FILE\n\n    # Install packages for kaggle18\n    pip install nltk wordcloud plotly bokeh pyLDAvis >> $RESULT_FILE\n\n    popd\n}\n\nteardown_environment(){\n    pushd \"$ROOT_DIR\"\n    rm -rf stress_tests_env >> $RESULT_FILE\n    popd\n}\n\nrun_test(){\n    local test_name=$1\n\n    echo \"Try running $test_name.\"\n    {\n        pytest -vls \"$test_name.py\" >> $RESULT_FILE\n    } || echo \"FAIL: $test_name\" >> $RESULT_FILE\n}\n\npushd \"$ROOT_DIR\"\n    setup_environment\n    run_test test_kaggle_ipynb\n    teardown_environment\npopd\n\ncat $RESULT_FILE\n[ ! -s $RESULT_FILE ] || exit 1\n"
  },
  {
    "path": "stress_tests/test_kaggle_ipynb.py",
    "content": "import logging\nimport os\nimport subprocess\n\nimport numpy as np\nimport pytest\n\nimport modin.pandas as pd\n\n# import ray\n# ray.init(address=\"localhost:6379\")\n\n\nlogger = logging.getLogger(__name__)\n\n# Size for synthetic datasets\nDF_SIZE = 1 * 2**10 * 2**10  # * 2**10 # 1 GiB dataframes\n# This file path\nDIR_PATH = os.path.dirname(os.path.realpath(__file__))\nKAGGLE_DIR_PATH = \"{}/kaggle\".format(DIR_PATH)\n\n\ndef create_dataframe(columns, dtypes, size):\n    def _num_to_str(x):\n        letters = \"\"\n        while x:\n            mod = (x - 1) % 26\n            letters += chr(mod + 65)\n            x = (x - 1) // 26\n        result = \"\".join(reversed(letters))\n        if \"NA\" in result:\n            return _num_to_str(x + 1)\n        else:\n            return result\n\n    result_dict = {}\n    for col, dtype in zip(columns, dtypes):\n        if dtype is str:\n            result_dict[col] = [_num_to_str(x + 1) for x in np.arange(size, dtype=int)]\n        elif dtype is bool:\n            result_dict[col] = [x % 2 == 0 for x in np.arange(size, dtype=int)]\n        else:\n            result_dict[col] = np.arange(size, dtype=dtype)\n    return pd.DataFrame(result_dict)\n\n\n@pytest.fixture\ndef generate_dataset():\n    \"\"\"Generates a synthetic dataset using the given arguments.\n\n    Args:\n        columns (list): Column names of the result\n        dtypes (list): List of dtypes for the corresponding column\n        size (int): Number of rows for result\n\n    Returns:\n        Modin dataframe of synthetic data following arguments.\n    \"\"\"\n    # Record of files generated for a test\n    filenames = []\n\n    def _dataset_builder(filename, columns, dtypes, size=DF_SIZE, files_to_remove=[]):\n        # Add the files generated by the script to be removed\n        for file in files_to_remove:\n            filenames.append(\"{}/{}\".format(KAGGLE_DIR_PATH, file))\n\n        # Update filename to include path\n        filename = \"{}/{}\".format(KAGGLE_DIR_PATH, filename)\n\n        # Check that the number of column names is the same as the nubmer of dtypes\n        if len(columns) != len(dtypes):\n            raise ValueError(\"len(columns) != len(dtypes)\")\n\n        # Determine number of rows for synthetic dataset\n        row_size = (\n            create_dataframe(columns, dtypes, 1)\n            .memory_usage(index=False, deep=True)\n            .sum()\n        )\n        result = create_dataframe(columns, dtypes, np.ceil(size / row_size))\n\n        result.to_csv(filename)\n        filenames.append(filename)\n        return result\n\n    # Return dataset builder factory\n    yield _dataset_builder\n\n    # Delete files created\n    for filename in filenames:\n        if os.path.exists(filename):\n            os.remove(filename)\n\n\ndef test_kaggle3(generate_dataset):\n    pokemon_columns = [\n        \"#\",\n        \"Name\",\n        \"Type 1\",\n        \"Type 2\",\n        \"HP\",\n        \"Attack\",\n        \"Defense\",\n        \"Sp. Atk\",\n        \"Sp. Def\",\n        \"Speed\",\n        \"Generation\",\n        \"Legendary\",\n    ]\n    pokemon_dtypes = [int, str, str, str, int, int, int, int, int, int, int, bool]\n    generate_dataset(\n        \"pokemon.csv\", pokemon_columns, pokemon_dtypes, files_to_remove=[\"graph.png\"]\n    )\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle3.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle3\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle4(generate_dataset):\n    columns = [\n        \"Id\",\n        \"MSSubClass\",\n        \"MSZoning\",\n        \"LotFrontage\",\n        \"LotArea\",\n        \"Street\",\n        \"Alley\",\n        \"LotShape\",\n        \"LandContour\",\n        \"Utilities\",\n        \"LotConfig\",\n        \"LandSlope\",\n        \"Neighborhood\",\n        \"Condition1\",\n        \"Condition2\",\n        \"BldgType\",\n        \"HouseStyle\",\n        \"OverallQual\",\n        \"OverallCond\",\n        \"YearBuilt\",\n        \"YearRemodAdd\",\n        \"RoofStyle\",\n        \"RoofMatl\",\n        \"Exterior1st\",\n        \"Exterior2nd\",\n        \"MasVnrType\",\n        \"MasVnrArea\",\n        \"ExterQual\",\n        \"ExterCond\",\n        \"Foundation\",\n        \"BsmtQual\",\n        \"BsmtCond\",\n        \"BsmtExposure\",\n        \"BsmtFinType1\",\n        \"BsmtFinSF1\",\n        \"BsmtFinType2\",\n        \"BsmtFinSF2\",\n        \"BsmtUnfSF\",\n        \"TotalBsmtSF\",\n        \"Heating\",\n        \"HeatingQC\",\n        \"CentralAir\",\n        \"Electrical\",\n        \"1stFlrSF\",\n        \"2ndFlrSF\",\n        \"LowQualFinSF\",\n        \"GrLivArea\",\n        \"BsmtFullBath\",\n        \"BsmtHalfBath\",\n        \"FullBath\",\n        \"HalfBath\",\n        \"BedroomAbvGr\",\n        \"KitchenAbvGr\",\n        \"KitchenQual\",\n        \"TotRmsAbvGrd\",\n        \"Functional\",\n        \"Fireplaces\",\n        \"FireplaceQu\",\n        \"GarageType\",\n        \"GarageYrBlt\",\n        \"GarageFinish\",\n        \"GarageCars\",\n        \"GarageArea\",\n        \"GarageQual\",\n        \"GarageCond\",\n        \"PavedDrive\",\n        \"WoodDeckSF\",\n        \"OpenPorchSF\",\n        \"EnclosedPorch\",\n        \"3SsnPorch\",\n        \"ScreenPorch\",\n        \"PoolArea\",\n        \"PoolQC\",\n        \"Fence\",\n        \"MiscFeature\",\n        \"MiscVal\",\n        \"MoSold\",\n        \"YrSold\",\n        \"SaleType\",\n        \"SaleCondition\",\n        \"SalePrice\",\n    ]\n    dtypes = [\n        int,\n        int,\n        str,\n        float,\n        int,\n        str,\n        float,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        str,\n        str,\n        str,\n        str,\n        str,\n        float,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        int,\n        str,\n        int,\n        int,\n        int,\n        str,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        str,\n        int,\n        str,\n        int,\n        float,\n        str,\n        float,\n        str,\n        int,\n        int,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        float,\n        float,\n        float,\n        int,\n        int,\n        int,\n        str,\n        str,\n        int,\n    ]\n    generate_dataset(\"train.csv\", columns, dtypes)\n    generate_dataset(\"test.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle4.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle4\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle5(generate_dataset):\n    columns = [\n        \"PassengerId\",\n        \"Survived\",\n        \"Pclass\",\n        \"Name\",\n        \"Sex\",\n        \"Age\",\n        \"SibSp\",\n        \"Parch\",\n        \"Ticket\",\n        \"Fare\",\n        \"Cabin\",\n        \"Embarked\",\n    ]\n    dtypes = [int, int, int, str, str, float, int, int, str, float, float, str]\n    generate_dataset(\"train.csv\", columns, dtypes)\n    generate_dataset(\"test.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle5.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle5\")\n    assert ipynb.returncode == 0\n\n\n@pytest.mark.skip(\"Missing Original Data Schema\")\ndef test_kaggle6(generate_dataset):\n    columns = []\n    dtypes = []\n    generate_dataset(\"test.csv\", columns, dtypes)\n    generate_dataset(\"train.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle6.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle6\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle7(generate_dataset):\n    columns = [\n        \"SK_ID_CURR\",\n        \"TARGET\",\n        \"NAME_CONTRACT_TYPE\",\n        \"CODE_GENDER\",\n        \"FLAG_OWN_CAR\",\n        \"FLAG_OWN_REALTY\",\n        \"CNT_CHILDREN\",\n        \"AMT_INCOME_TOTAL\",\n        \"AMT_CREDIT\",\n        \"AMT_ANNUITY\",\n        \"AMT_GOODS_PRICE\",\n        \"NAME_TYPE_SUITE\",\n        \"NAME_INCOME_TYPE\",\n        \"NAME_EDUCATION_TYPE\",\n        \"NAME_FAMILY_STATUS\",\n        \"NAME_HOUSING_TYPE\",\n        \"REGION_POPULATION_RELATIVE\",\n        \"DAYS_BIRTH\",\n        \"DAYS_EMPLOYED\",\n        \"DAYS_REGISTRATION\",\n        \"DAYS_ID_PUBLISH\",\n        \"OWN_CAR_AGE\",\n        \"FLAG_MOBIL\",\n        \"FLAG_EMP_PHONE\",\n        \"FLAG_WORK_PHONE\",\n        \"FLAG_CONT_MOBILE\",\n        \"FLAG_PHONE\",\n        \"FLAG_EMAIL\",\n        \"OCCUPATION_TYPE\",\n        \"CNT_FAM_MEMBERS\",\n        \"REGION_RATING_CLIENT\",\n        \"REGION_RATING_CLIENT_W_CITY\",\n        \"WEEKDAY_APPR_PROCESS_START\",\n        \"HOUR_APPR_PROCESS_START\",\n        \"REG_REGION_NOT_LIVE_REGION\",\n        \"REG_REGION_NOT_WORK_REGION\",\n        \"LIVE_REGION_NOT_WORK_REGION\",\n        \"REG_CITY_NOT_LIVE_CITY\",\n        \"REG_CITY_NOT_WORK_CITY\",\n        \"LIVE_CITY_NOT_WORK_CITY\",\n        \"ORGANIZATION_TYPE\",\n        \"EXT_SOURCE_1\",\n        \"EXT_SOURCE_2\",\n        \"EXT_SOURCE_3\",\n        \"APARTMENTS_AVG\",\n        \"BASEMENTAREA_AVG\",\n        \"YEARS_BEGINEXPLUATATION_AVG\",\n        \"YEARS_BUILD_AVG\",\n        \"COMMONAREA_AVG\",\n        \"ELEVATORS_AVG\",\n        \"ENTRANCES_AVG\",\n        \"FLOORSMAX_AVG\",\n        \"FLOORSMIN_AVG\",\n        \"LANDAREA_AVG\",\n        \"LIVINGAPARTMENTS_AVG\",\n        \"LIVINGAREA_AVG\",\n        \"NONLIVINGAPARTMENTS_AVG\",\n        \"NONLIVINGAREA_AVG\",\n        \"APARTMENTS_MODE\",\n        \"BASEMENTAREA_MODE\",\n        \"YEARS_BEGINEXPLUATATION_MODE\",\n        \"YEARS_BUILD_MODE\",\n        \"COMMONAREA_MODE\",\n        \"ELEVATORS_MODE\",\n        \"ENTRANCES_MODE\",\n        \"FLOORSMAX_MODE\",\n        \"FLOORSMIN_MODE\",\n        \"LANDAREA_MODE\",\n        \"LIVINGAPARTMENTS_MODE\",\n        \"LIVINGAREA_MODE\",\n        \"NONLIVINGAPARTMENTS_MODE\",\n        \"NONLIVINGAREA_MODE\",\n        \"APARTMENTS_MEDI\",\n        \"BASEMENTAREA_MEDI\",\n        \"YEARS_BEGINEXPLUATATION_MEDI\",\n        \"YEARS_BUILD_MEDI\",\n        \"COMMONAREA_MEDI\",\n        \"ELEVATORS_MEDI\",\n        \"ENTRANCES_MEDI\",\n        \"FLOORSMAX_MEDI\",\n        \"FLOORSMIN_MEDI\",\n        \"LANDAREA_MEDI\",\n        \"LIVINGAPARTMENTS_MEDI\",\n        \"LIVINGAREA_MEDI\",\n        \"NONLIVINGAPARTMENTS_MEDI\",\n        \"NONLIVINGAREA_MEDI\",\n        \"FONDKAPREMONT_MODE\",\n        \"HOUSETYPE_MODE\",\n        \"TOTALAREA_MODE\",\n        \"WALLSMATERIAL_MODE\",\n        \"EMERGENCYSTATE_MODE\",\n        \"OBS_30_CNT_SOCIAL_CIRCLE\",\n        \"DEF_30_CNT_SOCIAL_CIRCLE\",\n        \"OBS_60_CNT_SOCIAL_CIRCLE\",\n        \"DEF_60_CNT_SOCIAL_CIRCLE\",\n        \"DAYS_LAST_PHONE_CHANGE\",\n        \"FLAG_DOCUMENT_2\",\n        \"FLAG_DOCUMENT_3\",\n        \"FLAG_DOCUMENT_4\",\n        \"FLAG_DOCUMENT_5\",\n        \"FLAG_DOCUMENT_6\",\n        \"FLAG_DOCUMENT_7\",\n        \"FLAG_DOCUMENT_8\",\n        \"FLAG_DOCUMENT_9\",\n        \"FLAG_DOCUMENT_10\",\n        \"FLAG_DOCUMENT_11\",\n        \"FLAG_DOCUMENT_12\",\n        \"FLAG_DOCUMENT_13\",\n        \"FLAG_DOCUMENT_14\",\n        \"FLAG_DOCUMENT_15\",\n        \"FLAG_DOCUMENT_16\",\n        \"FLAG_DOCUMENT_17\",\n        \"FLAG_DOCUMENT_18\",\n        \"FLAG_DOCUMENT_19\",\n        \"FLAG_DOCUMENT_20\",\n        \"FLAG_DOCUMENT_21\",\n        \"AMT_REQ_CREDIT_BUREAU_HOUR\",\n        \"AMT_REQ_CREDIT_BUREAU_DAY\",\n        \"AMT_REQ_CREDIT_BUREAU_WEEK\",\n        \"AMT_REQ_CREDIT_BUREAU_MON\",\n        \"AMT_REQ_CREDIT_BUREAU_QRT\",\n        \"AMT_REQ_CREDIT_BUREAU_YEAR\",\n    ]\n    dtypes = [\n        int,\n        int,\n        str,\n        str,\n        str,\n        str,\n        int,\n        float,\n        float,\n        float,\n        float,\n        str,\n        str,\n        str,\n        str,\n        str,\n        float,\n        int,\n        int,\n        float,\n        int,\n        float,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        str,\n        float,\n        int,\n        int,\n        str,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        str,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        str,\n        str,\n        float,\n        str,\n        str,\n        float,\n        float,\n        float,\n        float,\n        float,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n    ]\n    generate_dataset(\n        \"application_train.csv\",\n        columns,\n        dtypes,\n        files_to_remove=[\n            \"log_reg_baseline.csv\",\n            \"random_forest_baseline.csv\",\n            \"random_forest_baseline_engineered.csv\",\n            \"random_forest_baseline_domain.csv\",\n            \"baseline_lgb.csv\",\n            \"baseline_lgb_domain_features.csv\",\n        ],\n    )\n    generate_dataset(\"application_test.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle7.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle7\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle8(generate_dataset):\n    columns = [\n        \"Id\",\n        \"MSSubClass\",\n        \"MSZoning\",\n        \"LotFrontage\",\n        \"LotArea\",\n        \"Street\",\n        \"Alley\",\n        \"LotShape\",\n        \"LandContour\",\n        \"Utilities\",\n        \"LotConfig\",\n        \"LandSlope\",\n        \"Neighborhood\",\n        \"Condition1\",\n        \"Condition2\",\n        \"BldgType\",\n        \"HouseStyle\",\n        \"OverallQual\",\n        \"OverallCond\",\n        \"YearBuilt\",\n        \"YearRemodAdd\",\n        \"RoofStyle\",\n        \"RoofMatl\",\n        \"Exterior1st\",\n        \"Exterior2nd\",\n        \"MasVnrType\",\n        \"MasVnrArea\",\n        \"ExterQual\",\n        \"ExterCond\",\n        \"Foundation\",\n        \"BsmtQual\",\n        \"BsmtCond\",\n        \"BsmtExposure\",\n        \"BsmtFinType1\",\n        \"BsmtFinSF1\",\n        \"BsmtFinType2\",\n        \"BsmtFinSF2\",\n        \"BsmtUnfSF\",\n        \"TotalBsmtSF\",\n        \"Heating\",\n        \"HeatingQC\",\n        \"CentralAir\",\n        \"Electrical\",\n        \"1stFlrSF\",\n        \"2ndFlrSF\",\n        \"LowQualFinSF\",\n        \"GrLivArea\",\n        \"BsmtFullBath\",\n        \"BsmtHalfBath\",\n        \"FullBath\",\n        \"HalfBath\",\n        \"BedroomAbvGr\",\n        \"KitchenAbvGr\",\n        \"KitchenQual\",\n        \"TotRmsAbvGrd\",\n        \"Functional\",\n        \"Fireplaces\",\n        \"FireplaceQu\",\n        \"GarageType\",\n        \"GarageYrBlt\",\n        \"GarageFinish\",\n        \"GarageCars\",\n        \"GarageArea\",\n        \"GarageQual\",\n        \"GarageCond\",\n        \"PavedDrive\",\n        \"WoodDeckSF\",\n        \"OpenPorchSF\",\n        \"EnclosedPorch\",\n        \"3SsnPorch\",\n        \"ScreenPorch\",\n        \"PoolArea\",\n        \"PoolQC\",\n        \"Fence\",\n        \"MiscFeature\",\n        \"MiscVal\",\n        \"MoSold\",\n        \"YrSold\",\n        \"SaleType\",\n        \"SaleCondition\",\n        \"SalePrice\",\n    ]\n    dtypes = [\n        int,\n        int,\n        str,\n        float,\n        int,\n        str,\n        float,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        str,\n        str,\n        str,\n        str,\n        str,\n        float,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        float,\n        str,\n        float,\n        float,\n        float,\n        str,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        float,\n        float,\n        int,\n        int,\n        int,\n        int,\n        str,\n        int,\n        str,\n        int,\n        float,\n        str,\n        float,\n        str,\n        float,\n        float,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        float,\n        str,\n        float,\n        int,\n        int,\n        int,\n        str,\n        str,\n        int,\n    ]\n    generate_dataset(\"test.csv\", columns, dtypes, files_to_remove=[\"submission.csv\"])\n    generate_dataset(\"train.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle8.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle8\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle9(generate_dataset):\n    columns = [\n        \"Id\",\n        \"MSSubClass\",\n        \"MSZoning\",\n        \"LotFrontage\",\n        \"LotArea\",\n        \"Street\",\n        \"Alley\",\n        \"LotShape\",\n        \"LandContour\",\n        \"Utilities\",\n        \"LotConfig\",\n        \"LandSlope\",\n        \"Neighborhood\",\n        \"Condition1\",\n        \"Condition2\",\n        \"BldgType\",\n        \"HouseStyle\",\n        \"OverallQual\",\n        \"OverallCond\",\n        \"YearBuilt\",\n        \"YearRemodAdd\",\n        \"RoofStyle\",\n        \"RoofMatl\",\n        \"Exterior1st\",\n        \"Exterior2nd\",\n        \"MasVnrType\",\n        \"MasVnrArea\",\n        \"ExterQual\",\n        \"ExterCond\",\n        \"Foundation\",\n        \"BsmtQual\",\n        \"BsmtCond\",\n        \"BsmtExposure\",\n        \"BsmtFinType1\",\n        \"BsmtFinSF1\",\n        \"BsmtFinType2\",\n        \"BsmtFinSF2\",\n        \"BsmtUnfSF\",\n        \"TotalBsmtSF\",\n        \"Heating\",\n        \"HeatingQC\",\n        \"CentralAir\",\n        \"Electrical\",\n        \"1stFlrSF\",\n        \"2ndFlrSF\",\n        \"LowQualFinSF\",\n        \"GrLivArea\",\n        \"BsmtFullBath\",\n        \"BsmtHalfBath\",\n        \"FullBath\",\n        \"HalfBath\",\n        \"BedroomAbvGr\",\n        \"KitchenAbvGr\",\n        \"KitchenQual\",\n        \"TotRmsAbvGrd\",\n        \"Functional\",\n        \"Fireplaces\",\n        \"FireplaceQu\",\n        \"GarageType\",\n        \"GarageYrBlt\",\n        \"GarageFinish\",\n        \"GarageCars\",\n        \"GarageArea\",\n        \"GarageQual\",\n        \"GarageCond\",\n        \"PavedDrive\",\n        \"WoodDeckSF\",\n        \"OpenPorchSF\",\n        \"EnclosedPorch\",\n        \"3SsnPorch\",\n        \"ScreenPorch\",\n        \"PoolArea\",\n        \"PoolQC\",\n        \"Fence\",\n        \"MiscFeature\",\n        \"MiscVal\",\n        \"MoSold\",\n        \"YrSold\",\n        \"SaleType\",\n        \"SaleCondition\",\n        \"SalePrice\",\n    ]\n    dtypes = [\n        int,\n        int,\n        str,\n        float,\n        int,\n        str,\n        float,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        str,\n        str,\n        str,\n        str,\n        str,\n        float,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        str,\n        int,\n        str,\n        int,\n        int,\n        int,\n        str,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        str,\n        int,\n        str,\n        int,\n        float,\n        str,\n        float,\n        str,\n        int,\n        int,\n        str,\n        str,\n        str,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        float,\n        float,\n        float,\n        int,\n        int,\n        int,\n        str,\n        str,\n        int,\n    ]\n    generate_dataset(\"test.csv\", columns, dtypes, files_to_remove=[\"ridge_sol.csv\"])\n    generate_dataset(\"train.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle9.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle9\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle10(generate_dataset):\n    columns = [\n        \"pelvic_incidence\",\n        \"pelvic_tilt numeric\",\n        \"lumbar_lordosis_angle\",\n        \"sacral_slope\",\n        \"pelvic_radius\",\n        \"degree_spondylolisthesis\",\n        \"class\",\n    ]\n    dtypes = [float, float, float, float, float, float, str]\n    generate_dataset(\n        \"column_2C_weka.csv\", columns, dtypes, files_to_remove=[\"graph.png\"]\n    )\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle10.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle10\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle12(generate_dataset):\n    columns = [\n        \"PassengerId\",\n        \"Survived\",\n        \"Pclass\",\n        \"Name\",\n        \"Sex\",\n        \"Age\",\n        \"SibSp\",\n        \"Parch\",\n        \"Ticket\",\n        \"Fare\",\n        \"Cabin\",\n        \"Embarked\",\n    ]\n    dtypes = [int, int, int, str, str, float, int, int, str, float, float, str]\n    generate_dataset(\n        \"train.csv\", columns, dtypes, files_to_remove=[\"ensemble_python_voting.csv\"]\n    )\n    generate_dataset(\"test.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle12.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle12\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle13(generate_dataset):\n    columns = [\n        \"Id\",\n        \"SepalLengthCm\",\n        \"SepalWidthCm\",\n        \"PetalLengthCm\",\n        \"PetalWidthCm\",\n        \"Species\",\n    ]\n    dtypes = [int, float, float, float, float, str]\n    generate_dataset(\"Iris.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle13.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle13\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle14(generate_dataset):\n    columns = [\n        \"PassengerId\",\n        \"Survived\",\n        \"Pclass\",\n        \"Name\",\n        \"Sex\",\n        \"Age\",\n        \"SibSp\",\n        \"Parch\",\n        \"Ticket\",\n        \"Fare\",\n        \"Cabin\",\n        \"Embarked\",\n    ]\n    dtypes = [int, int, int, str, str, float, int, int, str, float, float, str]\n    generate_dataset(\"train.csv\", columns, dtypes)\n    generate_dataset(\"test.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle14.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle14\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle17(generate_dataset):\n    columns = [\n        \"Suburb\",\n        \"Address\",\n        \"Rooms\",\n        \"Type\",\n        \"Price\",\n        \"Method\",\n        \"SellerG\",\n        \"Date\",\n        \"Distance\",\n        \"Postcode\",\n        \"Bedroom2\",\n        \"Bathroom\",\n        \"Car\",\n        \"Landsize\",\n        \"BuildingArea\",\n        \"YearBuilt\",\n        \"CouncilArea\",\n        \"Lattitude\",\n        \"Longtitude\",\n        \"Regionname\",\n        \"Propertycount\",\n    ]\n    dtypes = [\n        str,\n        str,\n        int,\n        str,\n        float,\n        str,\n        str,\n        str,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        str,\n        float,\n        float,\n        str,\n        float,\n    ]\n    generate_dataset(\"melb_data.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle17.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle17\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle18(generate_dataset):\n    columns = [\n        \"train_id\",\n        \"name\",\n        \"item_condition_id\",\n        \"category_name\",\n        \"brand_name\",\n        \"price\",\n        \"shipping\",\n        \"item_description\",\n    ]\n    # TODO (williamma12): \"category_name\" should be strings but original data\n    # that is not currently captured by the data generation\n    dtypes = [int, str, int, int, float, float, int, str]\n    generate_dataset(\"test.csv\", columns, dtypes)\n    generate_dataset(\"train.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle18.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle18\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle19(generate_dataset):\n    columns = [\n        \"Id\",\n        \"groupId\",\n        \"matchId\",\n        \"assists\",\n        \"boosts\",\n        \"damageDealt\",\n        \"DBNOs\",\n        \"headshotKills\",\n        \"heals\",\n        \"killPlace\",\n        \"killPoints\",\n        \"kills\",\n        \"killStreaks\",\n        \"longestKill\",\n        \"matchDuration\",\n        \"matchType\",\n        \"maxPlace\",\n        \"numGroups\",\n        \"rankPoints\",\n        \"revives\",\n        \"rideDistance\",\n        \"roadKills\",\n        \"swimDistance\",\n        \"teamKills\",\n        \"vehicleDestroys\",\n        \"walkDistance\",\n        \"weaponsAcquired\",\n        \"winPoints\",\n        \"winPlacePerc\",\n    ]\n    dtypes = [\n        str,\n        str,\n        str,\n        int,\n        int,\n        float,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        int,\n        float,\n        int,\n        str,\n        int,\n        int,\n        int,\n        int,\n        float,\n        int,\n        float,\n        int,\n        int,\n        float,\n        int,\n        int,\n        int,\n    ]\n    generate_dataset(\"train.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle19.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle19\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle20(generate_dataset):\n    columns = [\n        \"id\",\n        \"diagnosis\",\n        \"radius_mean\",\n        \"texture_mean\",\n        \"perimeter_mean\",\n        \"area_mean\",\n        \"smoothness_mean\",\n        \"compactness_mean\",\n        \"concavity_mean\",\n        \"concave points_mean\",\n        \"symmetry_mean\",\n        \"fractal_dimension_mean\",\n        \"radius_se\",\n        \"texture_se\",\n        \"perimeter_se\",\n        \"area_se\",\n        \"smoothness_se\",\n        \"compactness_se\",\n        \"concavity_se\",\n        \"concave points_se\",\n        \"symmetry_se\",\n        \"fractal_dimension_se\",\n        \"radius_worst\",\n        \"texture_worst\",\n        \"perimeter_worst\",\n        \"area_worst\",\n        \"smoothness_worst\",\n        \"compactness_worst\",\n        \"concavity_worst\",\n        \"concave points_worst\",\n        \"symmetry_worst\",\n        \"fractal_dimension_worst\",\n        \"Unnamed: 32\",\n    ]\n    dtypes = [\n        int,\n        str,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n        float,\n    ]\n    generate_dataset(\"data.csv\", columns, dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle20.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle20\")\n    assert ipynb.returncode == 0\n\n\ndef test_kaggle22(generate_dataset):\n    train_columns = [\n        \"id\",\n        \"comment_text\",\n        \"toxic\",\n        \"severe_toxic\",\n        \"obscene\",\n        \"threat\",\n        \"insult\",\n        \"identity_hate\",\n    ]\n    train_dtypes = [str, str, float, float, float, float, float, float]\n    test_columns = [\"id\", \"comment_text\"]\n    test_dtypes = [str, str]\n    submission_columns = [\n        \"id\",\n        \"toxic\",\n        \"severe_toxic\",\n        \"obscene\",\n        \"threat\",\n        \"insult\",\n        \"identity_hate\",\n    ]\n    submission_dtypes = [str, float, float, float, float, float, float]\n    generate_dataset(\n        \"train.csv\", train_columns, train_dtypes, files_to_remove=[\"submission.csv\"]\n    )\n    generate_dataset(\"test.csv\", test_columns, test_dtypes)\n    generate_dataset(\"sample_submission.csv\", submission_columns, submission_dtypes)\n\n    ipynb = subprocess.Popen(\n        [\"python\", \"kaggle22.py\"],\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        cwd=KAGGLE_DIR_PATH,\n    )\n    outs, errs = ipynb.communicate()\n\n    if ipynb.returncode:\n        logging.debug(\"Error message\\n-------------\\n %s\", errs.decode(\"utf-8\"))\n\n    logging.info(\"Finished kaggle22\")\n    assert ipynb.returncode == 0\n"
  },
  {
    "path": "versioneer.py",
    "content": "# Version: 0.29\n\n\"\"\"The Versioneer - like a rocketeer, but for versions.\n\nThe Versioneer\n==============\n\n* like a rocketeer, but for versions!\n* https://github.com/python-versioneer/python-versioneer\n* Brian Warner\n* License: Public Domain (Unlicense)\n* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3\n* [![Latest Version][pypi-image]][pypi-url]\n* [![Build Status][travis-image]][travis-url]\n\nThis is a tool for managing a recorded version number in setuptools-based\npython projects. The goal is to remove the tedious and error-prone \"update\nthe embedded version string\" step from your release process. Making a new\nrelease should be as easy as recording a new tag in your version-control\nsystem, and maybe making new tarballs.\n\n\n## Quick Install\n\nVersioneer provides two installation modes. The \"classic\" vendored mode installs\na copy of versioneer into your repository. The experimental build-time dependency mode\nis intended to allow you to skip this step and simplify the process of upgrading.\n\n### Vendored mode\n\n* `pip install versioneer` to somewhere in your $PATH\n   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is\n     available, so you can also use `conda install -c conda-forge versioneer`\n* add a `[tool.versioneer]` section to your `pyproject.toml` or a\n  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))\n   * Note that you will need to add `tomli; python_version < \"3.11\"` to your\n     build-time dependencies if you use `pyproject.toml`\n* run `versioneer install --vendor` in your source tree, commit the results\n* verify version information with `python setup.py version`\n\n### Build-time dependency mode\n\n* `pip install versioneer` to somewhere in your $PATH\n   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is\n     available, so you can also use `conda install -c conda-forge versioneer`\n* add a `[tool.versioneer]` section to your `pyproject.toml` or a\n  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))\n* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`)\n  to the `requires` key of the `build-system` table in `pyproject.toml`:\n  ```toml\n  [build-system]\n  requires = [\"setuptools\", \"versioneer[toml]\"]\n  build-backend = \"setuptools.build_meta\"\n  ```\n* run `versioneer install --no-vendor` in your source tree, commit the results\n* verify version information with `python setup.py version`\n\n## Version Identifiers\n\nSource trees come from a variety of places:\n\n* a version-control system checkout (mostly used by developers)\n* a nightly tarball, produced by build automation\n* a snapshot tarball, produced by a web-based VCS browser, like github's\n  \"tarball from tag\" feature\n* a release tarball, produced by \"setup.py sdist\", distributed through PyPI\n\nWithin each source tree, the version identifier (either a string or a number,\nthis tool is format-agnostic) can come from a variety of places:\n\n* ask the VCS tool itself, e.g. \"git describe\" (for checkouts), which knows\n  about recent \"tags\" and an absolute revision-id\n* the name of the directory into which the tarball was unpacked\n* an expanded VCS keyword ($Id$, etc)\n* a `_version.py` created by some earlier build step\n\nFor released software, the version identifier is closely related to a VCS\ntag. Some projects use tag names that include more than just the version\nstring (e.g. \"myproject-1.2\" instead of just \"1.2\"), in which case the tool\nneeds to strip the tag prefix to extract the version identifier. For\nunreleased software (between tags), the version identifier should provide\nenough information to help developers recreate the same tree, while also\ngiving them an idea of roughly how old the tree is (after version 1.2, before\nversion 1.3). Many VCS systems can report a description that captures this,\nfor example `git describe --tags --dirty --always` reports things like\n\"0.7-1-g574ab98-dirty\" to indicate that the checkout is one revision past the\n0.7 tag, has a unique revision id of \"574ab98\", and is \"dirty\" (it has\nuncommitted changes).\n\nThe version identifier is used for multiple purposes:\n\n* to allow the module to self-identify its version: `myproject.__version__`\n* to choose a name and prefix for a 'setup.py sdist' tarball\n\n## Theory of Operation\n\nVersioneer works by adding a special `_version.py` file into your source\ntree, where your `__init__.py` can import it. This `_version.py` knows how to\ndynamically ask the VCS tool for version information at import time.\n\n`_version.py` also contains `$Revision$` markers, and the installation\nprocess marks `_version.py` to have this marker rewritten with a tag name\nduring the `git archive` command. As a result, generated tarballs will\ncontain enough information to get the proper version.\n\nTo allow `setup.py` to compute a version too, a `versioneer.py` is added to\nthe top level of your source tree, next to `setup.py` and the `setup.cfg`\nthat configures it. This overrides several distutils/setuptools commands to\ncompute the version when invoked, and changes `setup.py build` and `setup.py\nsdist` to replace `_version.py` with a small static file that contains just\nthe generated version data.\n\n## Installation\n\nSee [INSTALL.md](./INSTALL.md) for detailed installation instructions.\n\n## Version-String Flavors\n\nCode which uses Versioneer can learn about its version string at runtime by\nimporting `_version` from your main `__init__.py` file and running the\n`get_versions()` function. From the \"outside\" (e.g. in `setup.py`), you can\nimport the top-level `versioneer.py` and run `get_versions()`.\n\nBoth functions return a dictionary with different flavors of version\ninformation:\n\n* `['version']`: A condensed version string, rendered using the selected\n  style. This is the most commonly used value for the project's version\n  string. The default \"pep440\" style yields strings like `0.11`,\n  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the \"Styles\" section\n  below for alternative styles.\n\n* `['full-revisionid']`: detailed revision identifier. For Git, this is the\n  full SHA1 commit id, e.g. \"1076c978a8d3cfc70f408fe5974aa6c092c949ac\".\n\n* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the\n  commit date in ISO 8601 format. This will be None if the date is not\n  available.\n\n* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that\n  this is only accurate if run in a VCS checkout, otherwise it is likely to\n  be False or None\n\n* `['error']`: if the version string could not be computed, this will be set\n  to a string describing the problem, otherwise it will be None. It may be\n  useful to throw an exception in setup.py if this is set, to avoid e.g.\n  creating tarballs with a version string of \"unknown\".\n\nSome variants are more useful than others. Including `full-revisionid` in a\nbug report should allow developers to reconstruct the exact code being tested\n(or indicate the presence of local changes that should be shared with the\ndevelopers). `version` is suitable for display in an \"about\" box or a CLI\n`--version` output: it can be easily compared against release notes and lists\nof bugs fixed in various releases.\n\nThe installer adds the following text to your `__init__.py` to place a basic\nversion in `YOURPROJECT.__version__`:\n\n    from ._version import get_versions\n    __version__ = get_versions()['version']\n    del get_versions\n\n## Styles\n\nThe setup.cfg `style=` configuration controls how the VCS information is\nrendered into a version string.\n\nThe default style, \"pep440\", produces a PEP440-compliant string, equal to the\nun-prefixed tag name for actual releases, and containing an additional \"local\nversion\" section with more detail for in-between builds. For Git, this is\nTAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags\n--dirty --always`. For example \"0.11+2.g1076c97.dirty\" indicates that the\ntree is like the \"1076c97\" commit but has uncommitted changes (\".dirty\"), and\nthat this commit is two revisions (\"+2\") beyond the \"0.11\" tag. For released\nsoftware (exactly equal to a known tag), the identifier will only contain the\nstripped tag, e.g. \"0.11\".\n\nOther styles are available. See [details.md](details.md) in the Versioneer\nsource tree for descriptions.\n\n## Debugging\n\nVersioneer tries to avoid fatal errors: if something goes wrong, it will tend\nto return a version of \"0+unknown\". To investigate the problem, run `setup.py\nversion`, which will run the version-lookup code in a verbose mode, and will\ndisplay the full contents of `get_versions()` (including the `error` string,\nwhich may help identify what went wrong).\n\n## Known Limitations\n\nSome situations are known to cause problems for Versioneer. This details the\nmost significant ones. More can be found on Github\n[issues page](https://github.com/python-versioneer/python-versioneer/issues).\n\n### Subprojects\n\nVersioneer has limited support for source trees in which `setup.py` is not in\nthe root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are\ntwo common reasons why `setup.py` might not be in the root:\n\n* Source trees which contain multiple subprojects, such as\n  [Buildbot](https://github.com/buildbot/buildbot), which contains both\n  \"master\" and \"slave\" subprojects, each with their own `setup.py`,\n  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI\n  distributions (and upload multiple independently-installable tarballs).\n* Source trees whose main purpose is to contain a C library, but which also\n  provide bindings to Python (and perhaps other languages) in subdirectories.\n\nVersioneer will look for `.git` in parent directories, and most operations\nshould get the right version string. However `pip` and `setuptools` have bugs\nand implementation details which frequently cause `pip install .` from a\nsubproject directory to fail to find a correct version string (so it usually\ndefaults to `0+unknown`).\n\n`pip install --editable .` should work correctly. `setup.py install` might\nwork too.\n\nPip-8.1.1 is known to have this problem, but hopefully it will get fixed in\nsome later version.\n\n[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking\nthis issue. The discussion in\n[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the\nissue from the Versioneer side in more detail.\n[pip PR#3176](https://github.com/pypa/pip/pull/3176) and\n[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve\npip to let Versioneer work correctly.\n\nVersioneer-0.16 and earlier only looked for a `.git` directory next to the\n`setup.cfg`, so subprojects were completely unsupported with those releases.\n\n### Editable installs with setuptools <= 18.5\n\n`setup.py develop` and `pip install --editable .` allow you to install a\nproject into a virtualenv once, then continue editing the source code (and\ntest) without re-installing after every change.\n\n\"Entry-point scripts\" (`setup(entry_points={\"console_scripts\": ..})`) are a\nconvenient way to specify executable scripts that should be installed along\nwith the python package.\n\nThese both work as expected when using modern setuptools. When using\nsetuptools-18.5 or earlier, however, certain operations will cause\n`pkg_resources.DistributionNotFound` errors when running the entrypoint\nscript, which must be resolved by re-installing the package. This happens\nwhen the install happens with one version, then the egg_info data is\nregenerated while a different version is checked out. Many setup.py commands\ncause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into\na different virtualenv), so this can be surprising.\n\n[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes\nthis one, but upgrading to a newer version of setuptools should probably\nresolve it.\n\n\n## Updating Versioneer\n\nTo upgrade your project to a new release of Versioneer, do the following:\n\n* install the new Versioneer (`pip install -U versioneer` or equivalent)\n* edit `setup.cfg` and `pyproject.toml`, if necessary,\n  to include any new configuration settings indicated by the release notes.\n  See [UPGRADING](./UPGRADING.md) for details.\n* re-run `versioneer install --[no-]vendor` in your source tree, to replace\n  `SRC/_version.py`\n* commit any changed files\n\n## Future Directions\n\nThis tool is designed to make it easily extended to other version-control\nsystems: all VCS-specific components are in separate directories like\nsrc/git/ . The top-level `versioneer.py` script is assembled from these\ncomponents by running make-versioneer.py . In the future, make-versioneer.py\nwill take a VCS name as an argument, and will construct a version of\n`versioneer.py` that is specific to the given VCS. It might also take the\nconfiguration arguments that are currently provided manually during\ninstallation by editing setup.py . Alternatively, it might go the other\ndirection and include code from all supported VCS systems, reducing the\nnumber of intermediate scripts.\n\n## Similar projects\n\n* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time\n  dependency\n* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of\n  versioneer\n* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools\n  plugin\n\n## License\n\nTo make Versioneer easier to embed, all its code is dedicated to the public\ndomain. The `_version.py` that it creates is also in the public domain.\nSpecifically, both are released under the \"Unlicense\", as described in\nhttps://unlicense.org/.\n\n[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg\n[pypi-url]: https://pypi.python.org/pypi/versioneer/\n[travis-image]:\nhttps://img.shields.io/travis/com/python-versioneer/python-versioneer.svg\n[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer\n\n\"\"\"\n# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring\n# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements\n# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error\n# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with\n# pylint:disable=attribute-defined-outside-init,too-many-arguments\n\nimport configparser\nimport errno\nimport functools\nimport json\nimport os\nimport re\nimport subprocess\nimport sys\nfrom pathlib import Path\nfrom typing import Any, Callable, Dict, List, NoReturn, Optional, Tuple, Union, cast\n\nhave_tomllib = True\nif sys.version_info >= (3, 11):\n    import tomllib\nelse:\n    try:\n        import tomli as tomllib\n    except ImportError:\n        have_tomllib = False\n\n\nclass VersioneerConfig:\n    \"\"\"Container for Versioneer configuration parameters.\"\"\"\n\n    VCS: str\n    style: str\n    tag_prefix: str\n    versionfile_source: str\n    versionfile_build: Optional[str]\n    parentdir_prefix: Optional[str]\n    verbose: Optional[bool]\n\n\ndef get_root() -> str:\n    \"\"\"Get the project root directory.\n\n    We require that all commands are run from the project root, i.e. the\n    directory that contains setup.py, setup.cfg, and versioneer.py .\n    \"\"\"\n    root = os.path.realpath(os.path.abspath(os.getcwd()))\n    setup_py = os.path.join(root, \"setup.py\")\n    pyproject_toml = os.path.join(root, \"pyproject.toml\")\n    versioneer_py = os.path.join(root, \"versioneer.py\")\n    if not (\n        os.path.exists(setup_py)\n        or os.path.exists(pyproject_toml)\n        or os.path.exists(versioneer_py)\n    ):\n        # allow 'python path/to/setup.py COMMAND'\n        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))\n        setup_py = os.path.join(root, \"setup.py\")\n        pyproject_toml = os.path.join(root, \"pyproject.toml\")\n        versioneer_py = os.path.join(root, \"versioneer.py\")\n    if not (\n        os.path.exists(setup_py)\n        or os.path.exists(pyproject_toml)\n        or os.path.exists(versioneer_py)\n    ):\n        err = (\n            \"Versioneer was unable to run the project root directory. \"\n            \"Versioneer requires setup.py to be executed from \"\n            \"its immediate directory (like 'python setup.py COMMAND'), \"\n            \"or in a way that lets it use sys.argv[0] to find the root \"\n            \"(like 'python path/to/setup.py COMMAND').\"\n        )\n        raise VersioneerBadRootError(err)\n    try:\n        # Certain runtime workflows (setup.py install/develop in a setuptools\n        # tree) execute all dependencies in a single python process, so\n        # \"versioneer\" may be imported multiple times, and python's shared\n        # module-import table will cache the first one. So we can't use\n        # os.path.dirname(__file__), as that will find whichever\n        # versioneer.py was first imported, even in later projects.\n        my_path = os.path.realpath(os.path.abspath(__file__))\n        me_dir = os.path.normcase(os.path.splitext(my_path)[0])\n        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])\n        if me_dir != vsr_dir and \"VERSIONEER_PEP518\" not in globals():\n            print(\n                \"Warning: build in %s is using versioneer.py from %s\"\n                % (os.path.dirname(my_path), versioneer_py)\n            )\n    except NameError:\n        pass\n    return root\n\n\ndef get_config_from_root(root: str) -> VersioneerConfig:\n    \"\"\"Read the project setup.cfg file to determine Versioneer config.\"\"\"\n    # This might raise OSError (if setup.cfg is missing), or\n    # configparser.NoSectionError (if it lacks a [versioneer] section), or\n    # configparser.NoOptionError (if it lacks \"VCS=\"). See the docstring at\n    # the top of versioneer.py for instructions on writing your setup.cfg .\n    root_pth = Path(root)\n    pyproject_toml = root_pth / \"pyproject.toml\"\n    setup_cfg = root_pth / \"setup.cfg\"\n    section: Union[Dict[str, Any], configparser.SectionProxy, None] = None\n    if pyproject_toml.exists() and have_tomllib:\n        try:\n            with open(pyproject_toml, \"rb\") as fobj:\n                pp = tomllib.load(fobj)\n            section = pp[\"tool\"][\"versioneer\"]\n        except (tomllib.TOMLDecodeError, KeyError) as e:\n            print(f\"Failed to load config from {pyproject_toml}: {e}\")\n            print(\"Try to load it from setup.cfg\")\n    if not section:\n        parser = configparser.ConfigParser()\n        with open(setup_cfg) as cfg_file:\n            parser.read_file(cfg_file)\n        parser.get(\"versioneer\", \"VCS\")  # raise error if missing\n\n        section = parser[\"versioneer\"]\n\n    # `cast`` really shouldn't be used, but its simplest for the\n    # common VersioneerConfig users at the moment. We verify against\n    # `None` values elsewhere where it matters\n\n    cfg = VersioneerConfig()\n    cfg.VCS = section[\"VCS\"]\n    cfg.style = section.get(\"style\", \"\")\n    cfg.versionfile_source = cast(str, section.get(\"versionfile_source\"))\n    cfg.versionfile_build = section.get(\"versionfile_build\")\n    cfg.tag_prefix = cast(str, section.get(\"tag_prefix\"))\n    if cfg.tag_prefix in (\"''\", '\"\"', None):\n        cfg.tag_prefix = \"\"\n    cfg.parentdir_prefix = section.get(\"parentdir_prefix\")\n    if isinstance(section, configparser.SectionProxy):\n        # Make sure configparser translates to bool\n        cfg.verbose = section.getboolean(\"verbose\")\n    else:\n        cfg.verbose = section.get(\"verbose\")\n\n    return cfg\n\n\nclass NotThisMethod(Exception):\n    \"\"\"Exception raised if a method is not valid for the current scenario.\"\"\"\n\n\n# these dictionaries contain VCS-specific tools\nLONG_VERSION_PY: Dict[str, str] = {}\nHANDLERS: Dict[str, Dict[str, Callable]] = {}\n\n\ndef register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator\n    \"\"\"Create decorator to mark a method as the handler of a VCS.\"\"\"\n\n    def decorate(f: Callable) -> Callable:\n        \"\"\"Store f in HANDLERS[vcs][method].\"\"\"\n        HANDLERS.setdefault(vcs, {})[method] = f\n        return f\n\n    return decorate\n\n\ndef run_command(\n    commands: List[str],\n    args: List[str],\n    cwd: Optional[str] = None,\n    verbose: bool = False,\n    hide_stderr: bool = False,\n    env: Optional[Dict[str, str]] = None,\n) -> Tuple[Optional[str], Optional[int]]:\n    \"\"\"Call the given command(s).\"\"\"\n    assert isinstance(commands, list)\n    process = None\n\n    popen_kwargs: Dict[str, Any] = {}\n    if sys.platform == \"win32\":\n        # This hides the console window if pythonw.exe is used\n        startupinfo = subprocess.STARTUPINFO()\n        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW\n        popen_kwargs[\"startupinfo\"] = startupinfo\n\n    for command in commands:\n        try:\n            dispcmd = str([command] + args)\n            # remember shell=False, so use git.cmd on windows, not just git\n            process = subprocess.Popen(\n                [command] + args,\n                cwd=cwd,\n                env=env,\n                stdout=subprocess.PIPE,\n                stderr=(subprocess.PIPE if hide_stderr else None),\n                **popen_kwargs,\n            )\n            break\n        except OSError as e:\n            if e.errno == errno.ENOENT:\n                continue\n            if verbose:\n                print(\"unable to run %s\" % dispcmd)\n                print(e)\n            return None, None\n    else:\n        if verbose:\n            print(\"unable to find command, tried %s\" % (commands,))\n        return None, None\n    stdout = process.communicate()[0].strip().decode()\n    if process.returncode != 0:\n        if verbose:\n            print(\"unable to run %s (error)\" % dispcmd)\n            print(\"stdout was %s\" % stdout)\n        return None, process.returncode\n    return stdout, process.returncode\n\n\nLONG_VERSION_PY[\n    \"git\"\n] = r'''\n# This file helps to compute a version number in source trees obtained from\n# git-archive tarball (such as those provided by githubs download-from-tag\n# feature). Distribution tarballs (built by setup.py sdist) and build\n# directories (produced by setup.py build) will contain a much shorter file\n# that just contains the computed version number.\n\n# This file is released into the public domain.\n# Generated by versioneer-0.29\n# https://github.com/python-versioneer/python-versioneer\n\n\"\"\"Git implementation of _version.py.\"\"\"\n\nimport errno\nimport os\nimport re\nimport subprocess\nimport sys\nfrom typing import Any, Callable, Dict, List, Optional, Tuple\nimport functools\n\n\ndef get_keywords() -> Dict[str, str]:\n    \"\"\"Get the keywords needed to look up the version information.\"\"\"\n    # these strings will be replaced by git during git-archive.\n    # setup.py/versioneer.py will grep for the variable names, so they must\n    # each be defined on a line of their own. _version.py will just call\n    # get_keywords().\n    git_refnames = \"%(DOLLAR)sFormat:%%d%(DOLLAR)s\"\n    git_full = \"%(DOLLAR)sFormat:%%H%(DOLLAR)s\"\n    git_date = \"%(DOLLAR)sFormat:%%ci%(DOLLAR)s\"\n    keywords = {\"refnames\": git_refnames, \"full\": git_full, \"date\": git_date}\n    return keywords\n\n\nclass VersioneerConfig:\n    \"\"\"Container for Versioneer configuration parameters.\"\"\"\n\n    VCS: str\n    style: str\n    tag_prefix: str\n    parentdir_prefix: str\n    versionfile_source: str\n    verbose: bool\n\n\ndef get_config() -> VersioneerConfig:\n    \"\"\"Create, populate and return the VersioneerConfig() object.\"\"\"\n    # these strings are filled in when 'setup.py versioneer' creates\n    # _version.py\n    cfg = VersioneerConfig()\n    cfg.VCS = \"git\"\n    cfg.style = \"%(STYLE)s\"\n    cfg.tag_prefix = \"%(TAG_PREFIX)s\"\n    cfg.parentdir_prefix = \"%(PARENTDIR_PREFIX)s\"\n    cfg.versionfile_source = \"%(VERSIONFILE_SOURCE)s\"\n    cfg.verbose = False\n    return cfg\n\n\nclass NotThisMethod(Exception):\n    \"\"\"Exception raised if a method is not valid for the current scenario.\"\"\"\n\n\nLONG_VERSION_PY: Dict[str, str] = {}\nHANDLERS: Dict[str, Dict[str, Callable]] = {}\n\n\ndef register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator\n    \"\"\"Create decorator to mark a method as the handler of a VCS.\"\"\"\n    def decorate(f: Callable) -> Callable:\n        \"\"\"Store f in HANDLERS[vcs][method].\"\"\"\n        if vcs not in HANDLERS:\n            HANDLERS[vcs] = {}\n        HANDLERS[vcs][method] = f\n        return f\n    return decorate\n\n\ndef run_command(\n    commands: List[str],\n    args: List[str],\n    cwd: Optional[str] = None,\n    verbose: bool = False,\n    hide_stderr: bool = False,\n    env: Optional[Dict[str, str]] = None,\n) -> Tuple[Optional[str], Optional[int]]:\n    \"\"\"Call the given command(s).\"\"\"\n    assert isinstance(commands, list)\n    process = None\n\n    popen_kwargs: Dict[str, Any] = {}\n    if sys.platform == \"win32\":\n        # This hides the console window if pythonw.exe is used\n        startupinfo = subprocess.STARTUPINFO()\n        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW\n        popen_kwargs[\"startupinfo\"] = startupinfo\n\n    for command in commands:\n        try:\n            dispcmd = str([command] + args)\n            # remember shell=False, so use git.cmd on windows, not just git\n            process = subprocess.Popen([command] + args, cwd=cwd, env=env,\n                                       stdout=subprocess.PIPE,\n                                       stderr=(subprocess.PIPE if hide_stderr\n                                               else None), **popen_kwargs)\n            break\n        except OSError as e:\n            if e.errno == errno.ENOENT:\n                continue\n            if verbose:\n                print(\"unable to run %%s\" %% dispcmd)\n                print(e)\n            return None, None\n    else:\n        if verbose:\n            print(\"unable to find command, tried %%s\" %% (commands,))\n        return None, None\n    stdout = process.communicate()[0].strip().decode()\n    if process.returncode != 0:\n        if verbose:\n            print(\"unable to run %%s (error)\" %% dispcmd)\n            print(\"stdout was %%s\" %% stdout)\n        return None, process.returncode\n    return stdout, process.returncode\n\n\ndef versions_from_parentdir(\n    parentdir_prefix: str,\n    root: str,\n    verbose: bool,\n) -> Dict[str, Any]:\n    \"\"\"Try to determine the version from the parent directory name.\n\n    Source tarballs conventionally unpack into a directory that includes both\n    the project name and a version string. We will also support searching up\n    two directory levels for an appropriately named parent directory\n    \"\"\"\n    rootdirs = []\n\n    for _ in range(3):\n        dirname = os.path.basename(root)\n        if dirname.startswith(parentdir_prefix):\n            return {\"version\": dirname[len(parentdir_prefix):],\n                    \"full-revisionid\": None,\n                    \"dirty\": False, \"error\": None, \"date\": None}\n        rootdirs.append(root)\n        root = os.path.dirname(root)  # up a level\n\n    if verbose:\n        print(\"Tried directories %%s but none started with prefix %%s\" %%\n              (str(rootdirs), parentdir_prefix))\n    raise NotThisMethod(\"rootdir doesn't start with parentdir_prefix\")\n\n\n@register_vcs_handler(\"git\", \"get_keywords\")\ndef git_get_keywords(versionfile_abs: str) -> Dict[str, str]:\n    \"\"\"Extract version information from the given file.\"\"\"\n    # the code embedded in _version.py can just fetch the value of these\n    # keywords. When used from setup.py, we don't want to import _version.py,\n    # so we do it with a regexp instead. This function is not used from\n    # _version.py.\n    keywords: Dict[str, str] = {}\n    try:\n        with open(versionfile_abs, \"r\") as fobj:\n            for line in fobj:\n                if line.strip().startswith(\"git_refnames =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"refnames\"] = mo.group(1)\n                if line.strip().startswith(\"git_full =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"full\"] = mo.group(1)\n                if line.strip().startswith(\"git_date =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"date\"] = mo.group(1)\n    except OSError:\n        pass\n    return keywords\n\n\n@register_vcs_handler(\"git\", \"keywords\")\ndef git_versions_from_keywords(\n    keywords: Dict[str, str],\n    tag_prefix: str,\n    verbose: bool,\n) -> Dict[str, Any]:\n    \"\"\"Get version information from git keywords.\"\"\"\n    if \"refnames\" not in keywords:\n        raise NotThisMethod(\"Short version file found\")\n    date = keywords.get(\"date\")\n    if date is not None:\n        # Use only the last line.  Previous lines may contain GPG signature\n        # information.\n        date = date.splitlines()[-1]\n\n        # git-2.2.0 added \"%%cI\", which expands to an ISO-8601 -compliant\n        # datestamp. However we prefer \"%%ci\" (which expands to an \"ISO-8601\n        # -like\" string, which we must then edit to make compliant), because\n        # it's been around since git-1.5.3, and it's too difficult to\n        # discover which version we're using, or to work around using an\n        # older one.\n        date = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n    refnames = keywords[\"refnames\"].strip()\n    if refnames.startswith(\"$Format\"):\n        if verbose:\n            print(\"keywords are unexpanded, not using\")\n        raise NotThisMethod(\"unexpanded keywords, not a git-archive tarball\")\n    refs = {r.strip() for r in refnames.strip(\"()\").split(\",\")}\n    # starting in git-1.8.3, tags are listed as \"tag: foo-1.0\" instead of\n    # just \"foo-1.0\". If we see a \"tag: \" prefix, prefer those.\n    TAG = \"tag: \"\n    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}\n    if not tags:\n        # Either we're using git < 1.8.3, or there really are no tags. We use\n        # a heuristic: assume all version tags have a digit. The old git %%d\n        # expansion behaves like git log --decorate=short and strips out the\n        # refs/heads/ and refs/tags/ prefixes that would let us distinguish\n        # between branches and tags. By ignoring refnames without digits, we\n        # filter out many common branch names like \"release\" and\n        # \"stabilization\", as well as \"HEAD\" and \"master\".\n        tags = {r for r in refs if re.search(r'\\d', r)}\n        if verbose:\n            print(\"discarding '%%s', no digits\" %% \",\".join(refs - tags))\n    if verbose:\n        print(\"likely tags: %%s\" %% \",\".join(sorted(tags)))\n    for ref in sorted(tags):\n        # sorting will prefer e.g. \"2.0\" over \"2.0rc1\"\n        if ref.startswith(tag_prefix):\n            r = ref[len(tag_prefix):]\n            # Filter out refs that exactly match prefix or that don't start\n            # with a number once the prefix is stripped (mostly a concern\n            # when prefix is '')\n            if not re.match(r'\\d', r):\n                continue\n            if verbose:\n                print(\"picking %%s\" %% r)\n            return {\"version\": r,\n                    \"full-revisionid\": keywords[\"full\"].strip(),\n                    \"dirty\": False, \"error\": None,\n                    \"date\": date}\n    # no suitable tags, so version is \"0+unknown\", but full hex is still there\n    if verbose:\n        print(\"no suitable tags, using unknown + full revision id\")\n    return {\"version\": \"0+unknown\",\n            \"full-revisionid\": keywords[\"full\"].strip(),\n            \"dirty\": False, \"error\": \"no suitable tags\", \"date\": None}\n\n\n@register_vcs_handler(\"git\", \"pieces_from_vcs\")\ndef git_pieces_from_vcs(\n    tag_prefix: str,\n    root: str,\n    verbose: bool,\n    runner: Callable = run_command\n) -> Dict[str, Any]:\n    \"\"\"Get version from 'git describe' in the root of the source tree.\n\n    This only gets called if the git-archive 'subst' keywords were *not*\n    expanded, and _version.py hasn't already been rewritten with a short\n    version string, meaning we're inside a checked out source tree.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n\n    # GIT_DIR can interfere with correct operation of Versioneer.\n    # It may be intended to be passed to the Versioneer-versioned project,\n    # but that should not change where we get our version from.\n    env = os.environ.copy()\n    env.pop(\"GIT_DIR\", None)\n    runner = functools.partial(runner, env=env)\n\n    _, rc = runner(GITS, [\"rev-parse\", \"--git-dir\"], cwd=root,\n                   hide_stderr=not verbose)\n    if rc != 0:\n        if verbose:\n            print(\"Directory %%s not under git control\" %% root)\n        raise NotThisMethod(\"'git rev-parse --git-dir' returned error\")\n\n    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]\n    # if there isn't one, this yields HEX[-dirty] (no NUM)\n    describe_out, rc = runner(GITS, [\n        \"describe\", \"--tags\", \"--dirty\", \"--always\", \"--long\",\n        \"--match\", f\"{tag_prefix}[[:digit:]]*\"\n    ], cwd=root)\n    # --long was added in git-1.5.5\n    if describe_out is None:\n        raise NotThisMethod(\"'git describe' failed\")\n    describe_out = describe_out.strip()\n    full_out, rc = runner(GITS, [\"rev-parse\", \"HEAD\"], cwd=root)\n    if full_out is None:\n        raise NotThisMethod(\"'git rev-parse' failed\")\n    full_out = full_out.strip()\n\n    pieces: Dict[str, Any] = {}\n    pieces[\"long\"] = full_out\n    pieces[\"short\"] = full_out[:7]  # maybe improved later\n    pieces[\"error\"] = None\n\n    branch_name, rc = runner(GITS, [\"rev-parse\", \"--abbrev-ref\", \"HEAD\"],\n                             cwd=root)\n    # --abbrev-ref was added in git-1.6.3\n    if rc != 0 or branch_name is None:\n        raise NotThisMethod(\"'git rev-parse --abbrev-ref' returned error\")\n    branch_name = branch_name.strip()\n\n    if branch_name == \"HEAD\":\n        # If we aren't exactly on a branch, pick a branch which represents\n        # the current commit. If all else fails, we are on a branchless\n        # commit.\n        branches, rc = runner(GITS, [\"branch\", \"--contains\"], cwd=root)\n        # --contains was added in git-1.5.4\n        if rc != 0 or branches is None:\n            raise NotThisMethod(\"'git branch --contains' returned error\")\n        branches = branches.split(\"\\n\")\n\n        # Remove the first line if we're running detached\n        if \"(\" in branches[0]:\n            branches.pop(0)\n\n        # Strip off the leading \"* \" from the list of branches.\n        branches = [branch[2:] for branch in branches]\n        if \"master\" in branches:\n            branch_name = \"master\"\n        elif not branches:\n            branch_name = None\n        else:\n            # Pick the first branch that is returned. Good or bad.\n            branch_name = branches[0]\n\n    pieces[\"branch\"] = branch_name\n\n    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]\n    # TAG might have hyphens.\n    git_describe = describe_out\n\n    # look for -dirty suffix\n    dirty = git_describe.endswith(\"-dirty\")\n    pieces[\"dirty\"] = dirty\n    if dirty:\n        git_describe = git_describe[:git_describe.rindex(\"-dirty\")]\n\n    # now we have TAG-NUM-gHEX or HEX\n\n    if \"-\" in git_describe:\n        # TAG-NUM-gHEX\n        mo = re.search(r'^(.+)-(\\d+)-g([0-9a-f]+)$', git_describe)\n        if not mo:\n            # unparsable. Maybe git-describe is misbehaving?\n            pieces[\"error\"] = (\"unable to parse git-describe output: '%%s'\"\n                               %% describe_out)\n            return pieces\n\n        # tag\n        full_tag = mo.group(1)\n        if not full_tag.startswith(tag_prefix):\n            if verbose:\n                fmt = \"tag '%%s' doesn't start with prefix '%%s'\"\n                print(fmt %% (full_tag, tag_prefix))\n            pieces[\"error\"] = (\"tag '%%s' doesn't start with prefix '%%s'\"\n                               %% (full_tag, tag_prefix))\n            return pieces\n        pieces[\"closest-tag\"] = full_tag[len(tag_prefix):]\n\n        # distance: number of commits since tag\n        pieces[\"distance\"] = int(mo.group(2))\n\n        # commit: short hex revision ID\n        pieces[\"short\"] = mo.group(3)\n\n    else:\n        # HEX: no tags\n        pieces[\"closest-tag\"] = None\n        out, rc = runner(GITS, [\"rev-list\", \"HEAD\", \"--left-right\"], cwd=root)\n        pieces[\"distance\"] = len(out.split())  # total number of commits\n\n    # commit date: see ISO-8601 comment in git_versions_from_keywords()\n    date = runner(GITS, [\"show\", \"-s\", \"--format=%%ci\", \"HEAD\"], cwd=root)[0].strip()\n    # Use only the last line.  Previous lines may contain GPG signature\n    # information.\n    date = date.splitlines()[-1]\n    pieces[\"date\"] = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n\n    return pieces\n\n\ndef plus_or_dot(pieces: Dict[str, Any]) -> str:\n    \"\"\"Return a + if we don't already have one, else return a .\"\"\"\n    if \"+\" in pieces.get(\"closest-tag\", \"\"):\n        return \".\"\n    return \"+\"\n\n\ndef render_pep440(pieces: Dict[str, Any]) -> str:\n    \"\"\"Build up version string, with post-release \"local version identifier\".\n\n    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you\n    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty\n\n    Exceptions:\n    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += plus_or_dot(pieces)\n            rendered += \"%%d.g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0+untagged.%%d.g%%s\" %% (pieces[\"distance\"],\n                                          pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_branch(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch. Note that .dev0 sorts backwards\n    (a feature branch will appear \"older\" than the master branch).\n\n    Exceptions:\n    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"%%d.g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0\"\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+untagged.%%d.g%%s\" %% (pieces[\"distance\"],\n                                          pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:\n    \"\"\"Split pep440 version string at the post-release segment.\n\n    Returns the release segments before the post-release and the\n    post-release version number (or -1 if no post-release segment is present).\n    \"\"\"\n    vc = str.split(ver, \".post\")\n    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None\n\n\ndef render_pep440_pre(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postN.devDISTANCE] -- No -dirty.\n\n    Exceptions:\n    1: no tags. 0.post0.devDISTANCE\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        if pieces[\"distance\"]:\n            # update the post release segment\n            tag_version, post_version = pep440_split_post(pieces[\"closest-tag\"])\n            rendered = tag_version\n            if post_version is not None:\n                rendered += \".post%%d.dev%%d\" %% (post_version + 1, pieces[\"distance\"])\n            else:\n                rendered += \".post0.dev%%d\" %% (pieces[\"distance\"])\n        else:\n            # no commits, use the tag as the version\n            rendered = pieces[\"closest-tag\"]\n    else:\n        # exception #1\n        rendered = \"0.post0.dev%%d\" %% pieces[\"distance\"]\n    return rendered\n\n\ndef render_pep440_post(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX] .\n\n    The \".dev0\" means dirty. Note that .dev0 sorts backwards\n    (a dirty tree will appear \"older\" than the corresponding clean one),\n    but you shouldn't be releasing software with -dirty anyways.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%%d\" %% pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%%s\" %% pieces[\"short\"]\n    else:\n        # exception #1\n        rendered = \"0.post%%d\" %% pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n        rendered += \"+g%%s\" %% pieces[\"short\"]\n    return rendered\n\n\ndef render_pep440_post_branch(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%%d\" %% pieces[\"distance\"]\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%%s\" %% pieces[\"short\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0.post%%d\" %% pieces[\"distance\"]\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+g%%s\" %% pieces[\"short\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_old(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]] .\n\n    The \".dev0\" means dirty.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%%d\" %% pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n    else:\n        # exception #1\n        rendered = \"0.post%%d\" %% pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n    return rendered\n\n\ndef render_git_describe(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[-DISTANCE-gHEX][-dirty].\n\n    Like 'git describe --tags --dirty --always'.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"]:\n            rendered += \"-%%d-g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render_git_describe_long(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG-DISTANCE-gHEX[-dirty].\n\n    Like 'git describe --tags --dirty --always -long'.\n    The distance/hash is unconditional.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        rendered += \"-%%d-g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:\n    \"\"\"Render the given version pieces into the requested style.\"\"\"\n    if pieces[\"error\"]:\n        return {\"version\": \"unknown\",\n                \"full-revisionid\": pieces.get(\"long\"),\n                \"dirty\": None,\n                \"error\": pieces[\"error\"],\n                \"date\": None}\n\n    if not style or style == \"default\":\n        style = \"pep440\"  # the default\n\n    if style == \"pep440\":\n        rendered = render_pep440(pieces)\n    elif style == \"pep440-branch\":\n        rendered = render_pep440_branch(pieces)\n    elif style == \"pep440-pre\":\n        rendered = render_pep440_pre(pieces)\n    elif style == \"pep440-post\":\n        rendered = render_pep440_post(pieces)\n    elif style == \"pep440-post-branch\":\n        rendered = render_pep440_post_branch(pieces)\n    elif style == \"pep440-old\":\n        rendered = render_pep440_old(pieces)\n    elif style == \"git-describe\":\n        rendered = render_git_describe(pieces)\n    elif style == \"git-describe-long\":\n        rendered = render_git_describe_long(pieces)\n    else:\n        raise ValueError(\"unknown style '%%s'\" %% style)\n\n    return {\"version\": rendered, \"full-revisionid\": pieces[\"long\"],\n            \"dirty\": pieces[\"dirty\"], \"error\": None,\n            \"date\": pieces.get(\"date\")}\n\n\ndef get_versions() -> Dict[str, Any]:\n    \"\"\"Get version information or return default if unable to do so.\"\"\"\n    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have\n    # __file__, we can work backwards from there to the root. Some\n    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which\n    # case we can only use expanded keywords.\n\n    cfg = get_config()\n    verbose = cfg.verbose\n\n    try:\n        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,\n                                          verbose)\n    except NotThisMethod:\n        pass\n\n    try:\n        root = os.path.realpath(__file__)\n        # versionfile_source is the relative path from the top of the source\n        # tree (where the .git directory might live) to this file. Invert\n        # this to find the root from __file__.\n        for _ in cfg.versionfile_source.split('/'):\n            root = os.path.dirname(root)\n    except NameError:\n        return {\"version\": \"0+unknown\", \"full-revisionid\": None,\n                \"dirty\": None,\n                \"error\": \"unable to find root of source tree\",\n                \"date\": None}\n\n    try:\n        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)\n        return render(pieces, cfg.style)\n    except NotThisMethod:\n        pass\n\n    try:\n        if cfg.parentdir_prefix:\n            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)\n    except NotThisMethod:\n        pass\n\n    return {\"version\": \"0+unknown\", \"full-revisionid\": None,\n            \"dirty\": None,\n            \"error\": \"unable to compute version\", \"date\": None}\n'''\n\n\n@register_vcs_handler(\"git\", \"get_keywords\")\ndef git_get_keywords(versionfile_abs: str) -> Dict[str, str]:\n    \"\"\"Extract version information from the given file.\"\"\"\n    # the code embedded in _version.py can just fetch the value of these\n    # keywords. When used from setup.py, we don't want to import _version.py,\n    # so we do it with a regexp instead. This function is not used from\n    # _version.py.\n    keywords: Dict[str, str] = {}\n    try:\n        with open(versionfile_abs, \"r\") as fobj:\n            for line in fobj:\n                if line.strip().startswith(\"git_refnames =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"refnames\"] = mo.group(1)\n                if line.strip().startswith(\"git_full =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"full\"] = mo.group(1)\n                if line.strip().startswith(\"git_date =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"date\"] = mo.group(1)\n    except OSError:\n        pass\n    return keywords\n\n\n@register_vcs_handler(\"git\", \"keywords\")\ndef git_versions_from_keywords(\n    keywords: Dict[str, str],\n    tag_prefix: str,\n    verbose: bool,\n) -> Dict[str, Any]:\n    \"\"\"Get version information from git keywords.\"\"\"\n    if \"refnames\" not in keywords:\n        raise NotThisMethod(\"Short version file found\")\n    date = keywords.get(\"date\")\n    if date is not None:\n        # Use only the last line.  Previous lines may contain GPG signature\n        # information.\n        date = date.splitlines()[-1]\n\n        # git-2.2.0 added \"%cI\", which expands to an ISO-8601 -compliant\n        # datestamp. However we prefer \"%ci\" (which expands to an \"ISO-8601\n        # -like\" string, which we must then edit to make compliant), because\n        # it's been around since git-1.5.3, and it's too difficult to\n        # discover which version we're using, or to work around using an\n        # older one.\n        date = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n    refnames = keywords[\"refnames\"].strip()\n    if refnames.startswith(\"$Format\"):\n        if verbose:\n            print(\"keywords are unexpanded, not using\")\n        raise NotThisMethod(\"unexpanded keywords, not a git-archive tarball\")\n    refs = {r.strip() for r in refnames.strip(\"()\").split(\",\")}\n    # starting in git-1.8.3, tags are listed as \"tag: foo-1.0\" instead of\n    # just \"foo-1.0\". If we see a \"tag: \" prefix, prefer those.\n    TAG = \"tag: \"\n    tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}\n    if not tags:\n        # Either we're using git < 1.8.3, or there really are no tags. We use\n        # a heuristic: assume all version tags have a digit. The old git %d\n        # expansion behaves like git log --decorate=short and strips out the\n        # refs/heads/ and refs/tags/ prefixes that would let us distinguish\n        # between branches and tags. By ignoring refnames without digits, we\n        # filter out many common branch names like \"release\" and\n        # \"stabilization\", as well as \"HEAD\" and \"master\".\n        tags = {r for r in refs if re.search(r\"\\d\", r)}\n        if verbose:\n            print(\"discarding '%s', no digits\" % \",\".join(refs - tags))\n    if verbose:\n        print(\"likely tags: %s\" % \",\".join(sorted(tags)))\n    for ref in sorted(tags):\n        # sorting will prefer e.g. \"2.0\" over \"2.0rc1\"\n        if ref.startswith(tag_prefix):\n            r = ref[len(tag_prefix) :]\n            # Filter out refs that exactly match prefix or that don't start\n            # with a number once the prefix is stripped (mostly a concern\n            # when prefix is '')\n            if not re.match(r\"\\d\", r):\n                continue\n            if verbose:\n                print(\"picking %s\" % r)\n            return {\n                \"version\": r,\n                \"full-revisionid\": keywords[\"full\"].strip(),\n                \"dirty\": False,\n                \"error\": None,\n                \"date\": date,\n            }\n    # no suitable tags, so version is \"0+unknown\", but full hex is still there\n    if verbose:\n        print(\"no suitable tags, using unknown + full revision id\")\n    return {\n        \"version\": \"0+unknown\",\n        \"full-revisionid\": keywords[\"full\"].strip(),\n        \"dirty\": False,\n        \"error\": \"no suitable tags\",\n        \"date\": None,\n    }\n\n\n@register_vcs_handler(\"git\", \"pieces_from_vcs\")\ndef git_pieces_from_vcs(\n    tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command\n) -> Dict[str, Any]:\n    \"\"\"Get version from 'git describe' in the root of the source tree.\n\n    This only gets called if the git-archive 'subst' keywords were *not*\n    expanded, and _version.py hasn't already been rewritten with a short\n    version string, meaning we're inside a checked out source tree.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n\n    # GIT_DIR can interfere with correct operation of Versioneer.\n    # It may be intended to be passed to the Versioneer-versioned project,\n    # but that should not change where we get our version from.\n    env = os.environ.copy()\n    env.pop(\"GIT_DIR\", None)\n    runner = functools.partial(runner, env=env)\n\n    _, rc = runner(GITS, [\"rev-parse\", \"--git-dir\"], cwd=root, hide_stderr=not verbose)\n    if rc != 0:\n        if verbose:\n            print(\"Directory %s not under git control\" % root)\n        raise NotThisMethod(\"'git rev-parse --git-dir' returned error\")\n\n    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]\n    # if there isn't one, this yields HEX[-dirty] (no NUM)\n    describe_out, rc = runner(\n        GITS,\n        [\n            \"describe\",\n            \"--tags\",\n            \"--dirty\",\n            \"--always\",\n            \"--long\",\n            \"--match\",\n            f\"{tag_prefix}[[:digit:]]*\",\n        ],\n        cwd=root,\n    )\n    # --long was added in git-1.5.5\n    if describe_out is None:\n        raise NotThisMethod(\"'git describe' failed\")\n    describe_out = describe_out.strip()\n    full_out, rc = runner(GITS, [\"rev-parse\", \"HEAD\"], cwd=root)\n    if full_out is None:\n        raise NotThisMethod(\"'git rev-parse' failed\")\n    full_out = full_out.strip()\n\n    pieces: Dict[str, Any] = {}\n    pieces[\"long\"] = full_out\n    pieces[\"short\"] = full_out[:7]  # maybe improved later\n    pieces[\"error\"] = None\n\n    branch_name, rc = runner(GITS, [\"rev-parse\", \"--abbrev-ref\", \"HEAD\"], cwd=root)\n    # --abbrev-ref was added in git-1.6.3\n    if rc != 0 or branch_name is None:\n        raise NotThisMethod(\"'git rev-parse --abbrev-ref' returned error\")\n    branch_name = branch_name.strip()\n\n    if branch_name == \"HEAD\":\n        # If we aren't exactly on a branch, pick a branch which represents\n        # the current commit. If all else fails, we are on a branchless\n        # commit.\n        branches, rc = runner(GITS, [\"branch\", \"--contains\"], cwd=root)\n        # --contains was added in git-1.5.4\n        if rc != 0 or branches is None:\n            raise NotThisMethod(\"'git branch --contains' returned error\")\n        branches = branches.split(\"\\n\")\n\n        # Remove the first line if we're running detached\n        if \"(\" in branches[0]:\n            branches.pop(0)\n\n        # Strip off the leading \"* \" from the list of branches.\n        branches = [branch[2:] for branch in branches]\n        if \"master\" in branches:\n            branch_name = \"master\"\n        elif not branches:\n            branch_name = None\n        else:\n            # Pick the first branch that is returned. Good or bad.\n            branch_name = branches[0]\n\n    pieces[\"branch\"] = branch_name\n\n    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]\n    # TAG might have hyphens.\n    git_describe = describe_out\n\n    # look for -dirty suffix\n    dirty = git_describe.endswith(\"-dirty\")\n    pieces[\"dirty\"] = dirty\n    if dirty:\n        git_describe = git_describe[: git_describe.rindex(\"-dirty\")]\n\n    # now we have TAG-NUM-gHEX or HEX\n\n    if \"-\" in git_describe:\n        # TAG-NUM-gHEX\n        mo = re.search(r\"^(.+)-(\\d+)-g([0-9a-f]+)$\", git_describe)\n        if not mo:\n            # unparsable. Maybe git-describe is misbehaving?\n            pieces[\"error\"] = \"unable to parse git-describe output: '%s'\" % describe_out\n            return pieces\n\n        # tag\n        full_tag = mo.group(1)\n        if not full_tag.startswith(tag_prefix):\n            if verbose:\n                fmt = \"tag '%s' doesn't start with prefix '%s'\"\n                print(fmt % (full_tag, tag_prefix))\n            pieces[\"error\"] = \"tag '%s' doesn't start with prefix '%s'\" % (\n                full_tag,\n                tag_prefix,\n            )\n            return pieces\n        pieces[\"closest-tag\"] = full_tag[len(tag_prefix) :]\n\n        # distance: number of commits since tag\n        pieces[\"distance\"] = int(mo.group(2))\n\n        # commit: short hex revision ID\n        pieces[\"short\"] = mo.group(3)\n\n    else:\n        # HEX: no tags\n        pieces[\"closest-tag\"] = None\n        out, rc = runner(GITS, [\"rev-list\", \"HEAD\", \"--left-right\"], cwd=root)\n        pieces[\"distance\"] = len(out.split())  # total number of commits\n\n    # commit date: see ISO-8601 comment in git_versions_from_keywords()\n    date = runner(GITS, [\"show\", \"-s\", \"--format=%ci\", \"HEAD\"], cwd=root)[0].strip()\n    # Use only the last line.  Previous lines may contain GPG signature\n    # information.\n    date = date.splitlines()[-1]\n    pieces[\"date\"] = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n\n    return pieces\n\n\ndef do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None:\n    \"\"\"Git-specific installation logic for Versioneer.\n\n    For Git, this means creating/changing .gitattributes to mark _version.py\n    for export-subst keyword substitution.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n    files = [versionfile_source]\n    if ipy:\n        files.append(ipy)\n    if \"VERSIONEER_PEP518\" not in globals():\n        try:\n            my_path = __file__\n            if my_path.endswith((\".pyc\", \".pyo\")):\n                my_path = os.path.splitext(my_path)[0] + \".py\"\n            versioneer_file = os.path.relpath(my_path)\n        except NameError:\n            versioneer_file = \"versioneer.py\"\n        files.append(versioneer_file)\n    present = False\n    try:\n        with open(\".gitattributes\", \"r\") as fobj:\n            for line in fobj:\n                if line.strip().startswith(versionfile_source):\n                    if \"export-subst\" in line.strip().split()[1:]:\n                        present = True\n                        break\n    except OSError:\n        pass\n    if not present:\n        with open(\".gitattributes\", \"a+\") as fobj:\n            fobj.write(f\"{versionfile_source} export-subst\\n\")\n        files.append(\".gitattributes\")\n    run_command(GITS, [\"add\", \"--\"] + files)\n\n\ndef versions_from_parentdir(\n    parentdir_prefix: str,\n    root: str,\n    verbose: bool,\n) -> Dict[str, Any]:\n    \"\"\"Try to determine the version from the parent directory name.\n\n    Source tarballs conventionally unpack into a directory that includes both\n    the project name and a version string. We will also support searching up\n    two directory levels for an appropriately named parent directory\n    \"\"\"\n    rootdirs = []\n\n    for _ in range(3):\n        dirname = os.path.basename(root)\n        if dirname.startswith(parentdir_prefix):\n            return {\n                \"version\": dirname[len(parentdir_prefix) :],\n                \"full-revisionid\": None,\n                \"dirty\": False,\n                \"error\": None,\n                \"date\": None,\n            }\n        rootdirs.append(root)\n        root = os.path.dirname(root)  # up a level\n\n    if verbose:\n        print(\n            \"Tried directories %s but none started with prefix %s\"\n            % (str(rootdirs), parentdir_prefix)\n        )\n    raise NotThisMethod(\"rootdir doesn't start with parentdir_prefix\")\n\n\nSHORT_VERSION_PY = \"\"\"\n# This file was generated by 'versioneer.py' (0.29) from\n# revision-control system data, or from the parent directory name of an\n# unpacked source archive. Distribution tarballs contain a pre-generated copy\n# of this file.\n\nimport json\n\nversion_json = '''\n%s\n'''  # END VERSION_JSON\n\n\ndef get_versions():\n    return json.loads(version_json)\n\"\"\"\n\n\ndef versions_from_file(filename: str) -> Dict[str, Any]:\n    \"\"\"Try to determine the version from _version.py if present.\"\"\"\n    try:\n        with open(filename) as f:\n            contents = f.read()\n    except OSError:\n        raise NotThisMethod(\"unable to read _version.py\")\n    mo = re.search(\n        r\"version_json = '''\\n(.*)'''  # END VERSION_JSON\", contents, re.M | re.S\n    )\n    if not mo:\n        mo = re.search(\n            r\"version_json = '''\\r\\n(.*)'''  # END VERSION_JSON\", contents, re.M | re.S\n        )\n    if not mo:\n        raise NotThisMethod(\"no version_json in _version.py\")\n    return json.loads(mo.group(1))\n\n\ndef write_to_version_file(filename: str, versions: Dict[str, Any]) -> None:\n    \"\"\"Write the given version number to the given _version.py file.\"\"\"\n    contents = json.dumps(versions, sort_keys=True, indent=1, separators=(\",\", \": \"))\n    with open(filename, \"w\") as f:\n        f.write(SHORT_VERSION_PY % contents)\n\n    print(\"set %s to '%s'\" % (filename, versions[\"version\"]))\n\n\ndef plus_or_dot(pieces: Dict[str, Any]) -> str:\n    \"\"\"Return a + if we don't already have one, else return a .\"\"\"\n    if \"+\" in pieces.get(\"closest-tag\", \"\"):\n        return \".\"\n    return \"+\"\n\n\ndef render_pep440(pieces: Dict[str, Any]) -> str:\n    \"\"\"Build up version string, with post-release \"local version identifier\".\n\n    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you\n    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty\n\n    Exceptions:\n    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += plus_or_dot(pieces)\n            rendered += \"%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0+untagged.%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_branch(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch. Note that .dev0 sorts backwards\n    (a feature branch will appear \"older\" than the master branch).\n\n    Exceptions:\n    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0\"\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+untagged.%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:\n    \"\"\"Split pep440 version string at the post-release segment.\n\n    Returns the release segments before the post-release and the\n    post-release version number (or -1 if no post-release segment is present).\n    \"\"\"\n    vc = str.split(ver, \".post\")\n    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None\n\n\ndef render_pep440_pre(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postN.devDISTANCE] -- No -dirty.\n\n    Exceptions:\n    1: no tags. 0.post0.devDISTANCE\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        if pieces[\"distance\"]:\n            # update the post release segment\n            tag_version, post_version = pep440_split_post(pieces[\"closest-tag\"])\n            rendered = tag_version\n            if post_version is not None:\n                rendered += \".post%d.dev%d\" % (post_version + 1, pieces[\"distance\"])\n            else:\n                rendered += \".post0.dev%d\" % (pieces[\"distance\"])\n        else:\n            # no commits, use the tag as the version\n            rendered = pieces[\"closest-tag\"]\n    else:\n        # exception #1\n        rendered = \"0.post0.dev%d\" % pieces[\"distance\"]\n    return rendered\n\n\ndef render_pep440_post(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX] .\n\n    The \".dev0\" means dirty. Note that .dev0 sorts backwards\n    (a dirty tree will appear \"older\" than the corresponding clean one),\n    but you shouldn't be releasing software with -dirty anyways.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%s\" % pieces[\"short\"]\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n        rendered += \"+g%s\" % pieces[\"short\"]\n    return rendered\n\n\ndef render_pep440_post_branch(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%s\" % pieces[\"short\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+g%s\" % pieces[\"short\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_old(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[.postDISTANCE[.dev0]] .\n\n    The \".dev0\" means dirty.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n    return rendered\n\n\ndef render_git_describe(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG[-DISTANCE-gHEX][-dirty].\n\n    Like 'git describe --tags --dirty --always'.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"]:\n            rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render_git_describe_long(pieces: Dict[str, Any]) -> str:\n    \"\"\"TAG-DISTANCE-gHEX[-dirty].\n\n    Like 'git describe --tags --dirty --always -long'.\n    The distance/hash is unconditional.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:\n    \"\"\"Render the given version pieces into the requested style.\"\"\"\n    if pieces[\"error\"]:\n        return {\n            \"version\": \"unknown\",\n            \"full-revisionid\": pieces.get(\"long\"),\n            \"dirty\": None,\n            \"error\": pieces[\"error\"],\n            \"date\": None,\n        }\n\n    if not style or style == \"default\":\n        style = \"pep440\"  # the default\n\n    if style == \"pep440\":\n        rendered = render_pep440(pieces)\n    elif style == \"pep440-branch\":\n        rendered = render_pep440_branch(pieces)\n    elif style == \"pep440-pre\":\n        rendered = render_pep440_pre(pieces)\n    elif style == \"pep440-post\":\n        rendered = render_pep440_post(pieces)\n    elif style == \"pep440-post-branch\":\n        rendered = render_pep440_post_branch(pieces)\n    elif style == \"pep440-old\":\n        rendered = render_pep440_old(pieces)\n    elif style == \"git-describe\":\n        rendered = render_git_describe(pieces)\n    elif style == \"git-describe-long\":\n        rendered = render_git_describe_long(pieces)\n    else:\n        raise ValueError(\"unknown style '%s'\" % style)\n\n    return {\n        \"version\": rendered,\n        \"full-revisionid\": pieces[\"long\"],\n        \"dirty\": pieces[\"dirty\"],\n        \"error\": None,\n        \"date\": pieces.get(\"date\"),\n    }\n\n\nclass VersioneerBadRootError(Exception):\n    \"\"\"The project root directory is unknown or missing key files.\"\"\"\n\n\ndef get_versions(verbose: bool = False) -> Dict[str, Any]:\n    \"\"\"Get the project version from whatever source is available.\n\n    Returns dict with two keys: 'version' and 'full'.\n    \"\"\"\n    if \"versioneer\" in sys.modules:\n        # see the discussion in cmdclass.py:get_cmdclass()\n        del sys.modules[\"versioneer\"]\n\n    root = get_root()\n    cfg = get_config_from_root(root)\n\n    assert cfg.VCS is not None, \"please set [versioneer]VCS= in setup.cfg\"\n    handlers = HANDLERS.get(cfg.VCS)\n    assert handlers, \"unrecognized VCS '%s'\" % cfg.VCS\n    verbose = verbose or bool(cfg.verbose)  # `bool()` used to avoid `None`\n    assert (\n        cfg.versionfile_source is not None\n    ), \"please set versioneer.versionfile_source\"\n    assert cfg.tag_prefix is not None, \"please set versioneer.tag_prefix\"\n\n    versionfile_abs = os.path.join(root, cfg.versionfile_source)\n\n    # extract version from first of: _version.py, VCS command (e.g. 'git\n    # describe'), parentdir. This is meant to work for developers using a\n    # source checkout, for users of a tarball created by 'setup.py sdist',\n    # and for users of a tarball/zipball created by 'git archive' or github's\n    # download-from-tag feature or the equivalent in other VCSes.\n\n    get_keywords_f = handlers.get(\"get_keywords\")\n    from_keywords_f = handlers.get(\"keywords\")\n    if get_keywords_f and from_keywords_f:\n        try:\n            keywords = get_keywords_f(versionfile_abs)\n            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)\n            if verbose:\n                print(\"got version from expanded keyword %s\" % ver)\n            return ver\n        except NotThisMethod:\n            pass\n\n    try:\n        ver = versions_from_file(versionfile_abs)\n        if verbose:\n            print(\"got version from file %s %s\" % (versionfile_abs, ver))\n        return ver\n    except NotThisMethod:\n        pass\n\n    from_vcs_f = handlers.get(\"pieces_from_vcs\")\n    if from_vcs_f:\n        try:\n            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)\n            ver = render(pieces, cfg.style)\n            if verbose:\n                print(\"got version from VCS %s\" % ver)\n            return ver\n        except NotThisMethod:\n            pass\n\n    try:\n        if cfg.parentdir_prefix:\n            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)\n            if verbose:\n                print(\"got version from parentdir %s\" % ver)\n            return ver\n    except NotThisMethod:\n        pass\n\n    if verbose:\n        print(\"unable to compute version\")\n\n    return {\n        \"version\": \"0+unknown\",\n        \"full-revisionid\": None,\n        \"dirty\": None,\n        \"error\": \"unable to compute version\",\n        \"date\": None,\n    }\n\n\ndef get_version() -> str:\n    \"\"\"Get the short version string for this project.\"\"\"\n    return get_versions()[\"version\"]\n\n\ndef get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None):\n    \"\"\"Get the custom setuptools subclasses used by Versioneer.\n\n    If the package uses a different cmdclass (e.g. one from numpy), it\n    should be provide as an argument.\n    \"\"\"\n    if \"versioneer\" in sys.modules:\n        del sys.modules[\"versioneer\"]\n        # this fixes the \"python setup.py develop\" case (also 'install' and\n        # 'easy_install .'), in which subdependencies of the main project are\n        # built (using setup.py bdist_egg) in the same python process. Assume\n        # a main project A and a dependency B, which use different versions\n        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in\n        # sys.modules by the time B's setup.py is executed, causing B to run\n        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a\n        # sandbox that restores sys.modules to it's pre-build state, so the\n        # parent is protected against the child's \"import versioneer\". By\n        # removing ourselves from sys.modules here, before the child build\n        # happens, we protect the child from the parent's versioneer too.\n        # Also see https://github.com/python-versioneer/python-versioneer/issues/52\n\n    cmds = {} if cmdclass is None else cmdclass.copy()\n\n    # we add \"version\" to setuptools\n    from setuptools import Command\n\n    class cmd_version(Command):\n        description = \"report generated version string\"\n        user_options: List[Tuple[str, str, str]] = []\n        boolean_options: List[str] = []\n\n        def initialize_options(self) -> None:\n            pass\n\n        def finalize_options(self) -> None:\n            pass\n\n        def run(self) -> None:\n            vers = get_versions(verbose=True)\n            print(\"Version: %s\" % vers[\"version\"])\n            print(\" full-revisionid: %s\" % vers.get(\"full-revisionid\"))\n            print(\" dirty: %s\" % vers.get(\"dirty\"))\n            print(\" date: %s\" % vers.get(\"date\"))\n            if vers[\"error\"]:\n                print(\" error: %s\" % vers[\"error\"])\n\n    cmds[\"version\"] = cmd_version\n\n    # we override \"build_py\" in setuptools\n    #\n    # most invocation pathways end up running build_py:\n    #  distutils/build -> build_py\n    #  distutils/install -> distutils/build ->..\n    #  setuptools/bdist_wheel -> distutils/install ->..\n    #  setuptools/bdist_egg -> distutils/install_lib -> build_py\n    #  setuptools/install -> bdist_egg ->..\n    #  setuptools/develop -> ?\n    #  pip install:\n    #   copies source tree to a tempdir before running egg_info/etc\n    #   if .git isn't copied too, 'git describe' will fail\n    #   then does setup.py bdist_wheel, or sometimes setup.py install\n    #  setup.py egg_info -> ?\n\n    # pip install -e . and setuptool/editable_wheel will invoke build_py\n    # but the build_py command is not expected to copy any files.\n\n    # we override different \"build_py\" commands for both environments\n    if \"build_py\" in cmds:\n        _build_py: Any = cmds[\"build_py\"]\n    else:\n        from setuptools.command.build_py import build_py as _build_py\n\n    class cmd_build_py(_build_py):\n        def run(self) -> None:\n            root = get_root()\n            cfg = get_config_from_root(root)\n            versions = get_versions()\n            _build_py.run(self)\n            if getattr(self, \"editable_mode\", False):\n                # During editable installs `.py` and data files are\n                # not copied to build_lib\n                return\n            # now locate _version.py in the new build/ directory and replace\n            # it with an updated value\n            if cfg.versionfile_build:\n                target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)\n                print(\"UPDATING %s\" % target_versionfile)\n                write_to_version_file(target_versionfile, versions)\n\n    cmds[\"build_py\"] = cmd_build_py\n\n    if \"build_ext\" in cmds:\n        _build_ext: Any = cmds[\"build_ext\"]\n    else:\n        from setuptools.command.build_ext import build_ext as _build_ext\n\n    class cmd_build_ext(_build_ext):\n        def run(self) -> None:\n            root = get_root()\n            cfg = get_config_from_root(root)\n            versions = get_versions()\n            _build_ext.run(self)\n            if self.inplace:\n                # build_ext --inplace will only build extensions in\n                # build/lib<..> dir with no _version.py to write to.\n                # As in place builds will already have a _version.py\n                # in the module dir, we do not need to write one.\n                return\n            # now locate _version.py in the new build/ directory and replace\n            # it with an updated value\n            if not cfg.versionfile_build:\n                return\n            target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)\n            if not os.path.exists(target_versionfile):\n                print(\n                    f\"Warning: {target_versionfile} does not exist, skipping \"\n                    \"version update. This can happen if you are running build_ext \"\n                    \"without first running build_py.\"\n                )\n                return\n            print(\"UPDATING %s\" % target_versionfile)\n            write_to_version_file(target_versionfile, versions)\n\n    cmds[\"build_ext\"] = cmd_build_ext\n\n    if \"cx_Freeze\" in sys.modules:  # cx_freeze enabled?\n        from cx_Freeze.dist import build_exe as _build_exe  # type: ignore\n\n        # nczeczulin reports that py2exe won't like the pep440-style string\n        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.\n        # setup(console=[{\n        #   \"version\": versioneer.get_version().split(\"+\", 1)[0], # FILEVERSION\n        #   \"product_version\": versioneer.get_version(),\n        #   ...\n\n        class cmd_build_exe(_build_exe):\n            def run(self) -> None:\n                root = get_root()\n                cfg = get_config_from_root(root)\n                versions = get_versions()\n                target_versionfile = cfg.versionfile_source\n                print(\"UPDATING %s\" % target_versionfile)\n                write_to_version_file(target_versionfile, versions)\n\n                _build_exe.run(self)\n                os.unlink(target_versionfile)\n                with open(cfg.versionfile_source, \"w\") as f:\n                    LONG = LONG_VERSION_PY[cfg.VCS]\n                    f.write(\n                        LONG\n                        % {\n                            \"DOLLAR\": \"$\",\n                            \"STYLE\": cfg.style,\n                            \"TAG_PREFIX\": cfg.tag_prefix,\n                            \"PARENTDIR_PREFIX\": cfg.parentdir_prefix,\n                            \"VERSIONFILE_SOURCE\": cfg.versionfile_source,\n                        }\n                    )\n\n        cmds[\"build_exe\"] = cmd_build_exe\n        del cmds[\"build_py\"]\n\n    if \"py2exe\" in sys.modules:  # py2exe enabled?\n        try:\n            from py2exe.setuptools_buildexe import py2exe as _py2exe  # type: ignore\n        except ImportError:\n            from py2exe.distutils_buildexe import py2exe as _py2exe  # type: ignore\n\n        class cmd_py2exe(_py2exe):\n            def run(self) -> None:\n                root = get_root()\n                cfg = get_config_from_root(root)\n                versions = get_versions()\n                target_versionfile = cfg.versionfile_source\n                print(\"UPDATING %s\" % target_versionfile)\n                write_to_version_file(target_versionfile, versions)\n\n                _py2exe.run(self)\n                os.unlink(target_versionfile)\n                with open(cfg.versionfile_source, \"w\") as f:\n                    LONG = LONG_VERSION_PY[cfg.VCS]\n                    f.write(\n                        LONG\n                        % {\n                            \"DOLLAR\": \"$\",\n                            \"STYLE\": cfg.style,\n                            \"TAG_PREFIX\": cfg.tag_prefix,\n                            \"PARENTDIR_PREFIX\": cfg.parentdir_prefix,\n                            \"VERSIONFILE_SOURCE\": cfg.versionfile_source,\n                        }\n                    )\n\n        cmds[\"py2exe\"] = cmd_py2exe\n\n    # sdist farms its file list building out to egg_info\n    if \"egg_info\" in cmds:\n        _egg_info: Any = cmds[\"egg_info\"]\n    else:\n        from setuptools.command.egg_info import egg_info as _egg_info\n\n    class cmd_egg_info(_egg_info):\n        def find_sources(self) -> None:\n            # egg_info.find_sources builds the manifest list and writes it\n            # in one shot\n            super().find_sources()\n\n            # Modify the filelist and normalize it\n            root = get_root()\n            cfg = get_config_from_root(root)\n            self.filelist.append(\"versioneer.py\")\n            if cfg.versionfile_source:\n                # There are rare cases where versionfile_source might not be\n                # included by default, so we must be explicit\n                self.filelist.append(cfg.versionfile_source)\n            self.filelist.sort()\n            self.filelist.remove_duplicates()\n\n            # The write method is hidden in the manifest_maker instance that\n            # generated the filelist and was thrown away\n            # We will instead replicate their final normalization (to unicode,\n            # and POSIX-style paths)\n            from setuptools import unicode_utils\n\n            normalized = [\n                unicode_utils.filesys_decode(f).replace(os.sep, \"/\")\n                for f in self.filelist.files\n            ]\n\n            manifest_filename = os.path.join(self.egg_info, \"SOURCES.txt\")\n            with open(manifest_filename, \"w\") as fobj:\n                fobj.write(\"\\n\".join(normalized))\n\n    cmds[\"egg_info\"] = cmd_egg_info\n\n    # we override different \"sdist\" commands for both environments\n    if \"sdist\" in cmds:\n        _sdist: Any = cmds[\"sdist\"]\n    else:\n        from setuptools.command.sdist import sdist as _sdist\n\n    class cmd_sdist(_sdist):\n        def run(self) -> None:\n            versions = get_versions()\n            self._versioneer_generated_versions = versions\n            # unless we update this, the command will keep using the old\n            # version\n            self.distribution.metadata.version = versions[\"version\"]\n            return _sdist.run(self)\n\n        def make_release_tree(self, base_dir: str, files: List[str]) -> None:\n            root = get_root()\n            cfg = get_config_from_root(root)\n            _sdist.make_release_tree(self, base_dir, files)\n            # now locate _version.py in the new base_dir directory\n            # (remembering that it may be a hardlink) and replace it with an\n            # updated value\n            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)\n            print(\"UPDATING %s\" % target_versionfile)\n            write_to_version_file(\n                target_versionfile, self._versioneer_generated_versions\n            )\n\n    cmds[\"sdist\"] = cmd_sdist\n\n    return cmds\n\n\nCONFIG_ERROR = \"\"\"\nsetup.cfg is missing the necessary Versioneer configuration. You need\na section like:\n\n [versioneer]\n VCS = git\n style = pep440\n versionfile_source = src/myproject/_version.py\n versionfile_build = myproject/_version.py\n tag_prefix =\n parentdir_prefix = myproject-\n\nYou will also need to edit your setup.py to use the results:\n\n import versioneer\n setup(version=versioneer.get_version(),\n       cmdclass=versioneer.get_cmdclass(), ...)\n\nPlease read the docstring in ./versioneer.py for configuration instructions,\nedit setup.cfg, and re-run the installer or 'python versioneer.py setup'.\n\"\"\"\n\nSAMPLE_CONFIG = \"\"\"\n# See the docstring in versioneer.py for instructions. Note that you must\n# re-run 'versioneer.py setup' after changing this section, and commit the\n# resulting files.\n\n[versioneer]\n#VCS = git\n#style = pep440\n#versionfile_source =\n#versionfile_build =\n#tag_prefix =\n#parentdir_prefix =\n\n\"\"\"\n\nOLD_SNIPPET = \"\"\"\nfrom ._version import get_versions\n__version__ = get_versions()['version']\ndel get_versions\n\"\"\"\n\nINIT_PY_SNIPPET = \"\"\"\nfrom . import {0}\n__version__ = {0}.get_versions()['version']\n\"\"\"\n\n\ndef do_setup() -> int:\n    \"\"\"Do main VCS-independent setup function for installing Versioneer.\"\"\"\n    root = get_root()\n    try:\n        cfg = get_config_from_root(root)\n    except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e:\n        if isinstance(e, (OSError, configparser.NoSectionError)):\n            print(\"Adding sample versioneer config to setup.cfg\", file=sys.stderr)\n            with open(os.path.join(root, \"setup.cfg\"), \"a\") as f:\n                f.write(SAMPLE_CONFIG)\n        print(CONFIG_ERROR, file=sys.stderr)\n        return 1\n\n    print(\" creating %s\" % cfg.versionfile_source)\n    with open(cfg.versionfile_source, \"w\") as f:\n        LONG = LONG_VERSION_PY[cfg.VCS]\n        f.write(\n            LONG\n            % {\n                \"DOLLAR\": \"$\",\n                \"STYLE\": cfg.style,\n                \"TAG_PREFIX\": cfg.tag_prefix,\n                \"PARENTDIR_PREFIX\": cfg.parentdir_prefix,\n                \"VERSIONFILE_SOURCE\": cfg.versionfile_source,\n            }\n        )\n\n    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), \"__init__.py\")\n    maybe_ipy: Optional[str] = ipy\n    if os.path.exists(ipy):\n        try:\n            with open(ipy, \"r\") as f:\n                old = f.read()\n        except OSError:\n            old = \"\"\n        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]\n        snippet = INIT_PY_SNIPPET.format(module)\n        if OLD_SNIPPET in old:\n            print(\" replacing boilerplate in %s\" % ipy)\n            with open(ipy, \"w\") as f:\n                f.write(old.replace(OLD_SNIPPET, snippet))\n        elif snippet not in old:\n            print(\" appending to %s\" % ipy)\n            with open(ipy, \"a\") as f:\n                f.write(snippet)\n        else:\n            print(\" %s unmodified\" % ipy)\n    else:\n        print(\" %s doesn't exist, ok\" % ipy)\n        maybe_ipy = None\n\n    # Make VCS-specific changes. For git, this means creating/changing\n    # .gitattributes to mark _version.py for export-subst keyword\n    # substitution.\n    do_vcs_install(cfg.versionfile_source, maybe_ipy)\n    return 0\n\n\ndef scan_setup_py() -> int:\n    \"\"\"Validate the contents of setup.py against Versioneer's expectations.\"\"\"\n    found = set()\n    setters = False\n    errors = 0\n    with open(\"setup.py\", \"r\") as f:\n        for line in f.readlines():\n            if \"import versioneer\" in line:\n                found.add(\"import\")\n            if \"versioneer.get_cmdclass()\" in line:\n                found.add(\"cmdclass\")\n            if \"versioneer.get_version()\" in line:\n                found.add(\"get_version\")\n            if \"versioneer.VCS\" in line:\n                setters = True\n            if \"versioneer.versionfile_source\" in line:\n                setters = True\n    if len(found) != 3:\n        print(\"\")\n        print(\"Your setup.py appears to be missing some important items\")\n        print(\"(but I might be wrong). Please make sure it has something\")\n        print(\"roughly like the following:\")\n        print(\"\")\n        print(\" import versioneer\")\n        print(\" setup( version=versioneer.get_version(),\")\n        print(\"        cmdclass=versioneer.get_cmdclass(),  ...)\")\n        print(\"\")\n        errors += 1\n    if setters:\n        print(\"You should remove lines like 'versioneer.VCS = ' and\")\n        print(\"'versioneer.versionfile_source = ' . This configuration\")\n        print(\"now lives in setup.cfg, and should be removed from setup.py\")\n        print(\"\")\n        errors += 1\n    return errors\n\n\ndef setup_command() -> NoReturn:\n    \"\"\"Set up Versioneer and exit with appropriate error code.\"\"\"\n    errors = do_setup()\n    errors += scan_setup_py()\n    sys.exit(1 if errors else 0)\n\n\nif __name__ == \"__main__\":\n    cmd = sys.argv[1]\n    if cmd == \"setup\":\n        setup_command()\n"
  }
]