[
  {
    "path": ".clippy.toml",
    "content": "disallowed-macros = [\n  { path = \"std::assert_ne\", reason = \"use `pretty_assertions::assert_ne` instead\" },\n  { path = \"std::assert_eq\", reason = \"use `pretty_assertions::assert_eq` instead\" },\n  { path = \"std::assert_matches\", reason = \"use `pretty_assertions::assert_matches` instead\" },\n]\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "version: 2\nupdates:\n  # python\n  - package-ecosystem: \"pip\"\n    directory: \"/\"\n    schedule:\n      interval: \"daily\"\n    labels:\n      - \"dependencies\"\n      - \":snake: python :snake:\"\n  # rust\n  - package-ecosystem: \"cargo\"\n    directory: \"/\"\n    schedule:\n      interval: \"daily\"\n    groups:\n      prod-deps:\n        dependency-type: \"production\"\n      dev-deps:\n        dependency-type: \"development\"\n    labels:\n      - \"dependencies\"\n      - \":crab: rust :crab:\"\n  # actions\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"daily\"\n"
  },
  {
    "path": ".github/workflows/CI.yml",
    "content": "name: CI\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n    types: [opened, synchronize, reopened]\n\nenv:\n  MIN_PYTHON_VERSION: \"3.10\"\n\ndefaults:\n  run:\n    # Prevents windows runners from running on powershell\n    shell: bash\n\njobs:\n  lint:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: \"${{ env.MIN_PYTHON_VERSION }}\"\n      - name: Set up rust toolchain\n        uses: dtolnay/rust-toolchain@stable\n        with:\n          components: rustfmt, clippy\n      - name: Set up rustfmt\n        run: rustup component add rustfmt\n\n      - name: install uv\n        uses: astral-sh/setup-uv@v7\n\n      - name: Install dependencies and lint\n        run: |\n          make install\n          make lint\n\n  check-docs:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: \"3.11\"\n      - name: Set up rust toolchain\n        uses: dtolnay/rust-toolchain@stable\n      - name: install uv\n        uses: astral-sh/setup-uv@v7\n      - name: Check documentation\n        run: |\n          make install\n          make doc\n\n  test:\n    runs-on: ${{ matrix.os }}\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.11\", \"3.12\", \"3.13\", \"3.14\", \"3.14t\"]\n        os:\n          - \"ubuntu-latest\"\n          - \"ubuntu-24.04-arm\"\n          - \"macos-14\"\n          - \"windows-latest\"\n          # windows-11-arm excluded: pyarrow is not available for Windows ARM64\n          # https://github.com/apache/arrow/issues/47195\n    steps:\n      - uses: actions/checkout@v6\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.python-version }}\n      - name: Set up rust toolchain\n        uses: dtolnay/rust-toolchain@stable\n\n      - name: install uv\n        uses: astral-sh/setup-uv@v7\n\n      - name: Install dependencies and test\n        run: |\n          make install\n          make test\n\n      - name: Test with pandas<3\n        run: |\n          uv pip install \"pandas<3\"\n          make test-python\n\n  check-wheel-build:\n    runs-on: ${{ matrix.os }}\n    strategy:\n      matrix:\n        # Only testing the build on the smallest supported Python version for abi3 wheels\n        python-version: [\"3.10\", \"3.14t\"]\n        os: [\"ubuntu-latest\", \"macos-14\", \"windows-latest\"]\n        architecture: [x86-64, aarch64]\n        exclude:\n          # Cross-compiling x86_64 → aarch64 on Windows doesn't work; use windows-11-arm instead\n          - os: windows-latest\n            architecture: aarch64\n        include:\n          # Windows ARM64: build natively on windows-11-arm (3.11 is the minimum available)\n          - os: windows-11-arm\n            python-version: \"3.11\"\n            architecture: aarch64\n          # TODO: re-enable once setup-python supports windows-11-arm + python 3.14t\n          # (setup-python is currently broken with that combination)\n          # - os: windows-11-arm\n          #   python-version: \"3.14t\"\n          #   architecture: aarch64\n    steps:\n      - uses: actions/checkout@v6\n      - uses: dtolnay/rust-toolchain@stable\n      - name: Set Rust target\n        id: target\n        run: |\n          TARGET=${{\n            (matrix.os == 'macos-14' && (matrix.architecture == 'aarch64' && 'aarch64-apple-darwin' || 'x86_64-apple-darwin'))\n            || (matrix.os == 'ubuntu-latest' && (matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-gnu' || 'x86_64-unknown-linux-gnu'))\n            || (matrix.os == 'windows-latest' && 'x86_64-pc-windows-msvc')\n            || (matrix.os == 'windows-11-arm' && 'aarch64-pc-windows-msvc')\n          }}\n          echo \"target=$TARGET\" >> $GITHUB_OUTPUT\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.python-version }}\n      - name: build (fast)\n        uses: PyO3/maturin-action@v1\n        with:\n          manylinux: auto\n          command: build\n          args: \"-o dist -i python${{ matrix.python-version }}\"\n          target: ${{ steps.target.outputs.target }}\n\n      - name: Upload wheels\n        uses: actions/upload-artifact@v7\n        with:\n          name: \"wheels-${{ matrix.os }}-python-${{ matrix.python-version }}-${{ matrix.architecture }}\"\n          path: dist\n\n  check-wheel-build-musllinux:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.14t\"]\n        architecture: [x86-64, aarch64]\n    steps:\n      - uses: actions/checkout@v6\n      - uses: dtolnay/rust-toolchain@stable\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: ${{ matrix.python-version }}\n      - name: build (fast)\n        uses: PyO3/maturin-action@v1\n        with:\n          manylinux: musllinux_1_2\n          command: build\n          args: \"-o dist -i python${{ matrix.python-version }}\"\n          target: ${{ matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-musl' || 'x86_64-unknown-linux-musl' }}\n      - name: Upload wheels\n        uses: actions/upload-artifact@v7\n        with:\n          name: \"wheels-linux-musl-python-${{ matrix.python-version }}-${{ matrix.architecture }}\"\n          path: dist\n\n  check-sdist-build:\n    runs-on: \"ubuntu-latest\"\n    steps:\n      - uses: actions/checkout@v6\n      - uses: dtolnay/rust-toolchain@stable\n      - name: build sdist\n        uses: PyO3/maturin-action@v1\n        with:\n          manylinux: auto\n          command: sdist\n          args: \"-o dist\"\n      - name: upload sdist\n        uses: actions/upload-artifact@v7\n        with:\n          name: sdist\n          path: dist\n"
  },
  {
    "path": ".github/workflows/docs.yml",
    "content": "name: Docs\n\non:\n  push:\n    branches:\n      - main\n    tags:\n      - 'v*'\n  workflow_dispatch:\n    inputs:\n      version_tag:\n        description: 'Tag to build docs for (e.g. v0.18.0). Checks out the tag before building.'\n        required: true\n      mark_as_stable:\n        description: 'Mark this version as the stable default (updates root redirect)'\n        type: boolean\n        default: false\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          fetch-depth: 0\n\n      - name: Checkout tag (workflow_dispatch)\n        if: github.event_name == 'workflow_dispatch'\n        env:\n          VERSION_TAG: ${{ github.event.inputs.version_tag }}\n        run: git checkout \"refs/tags/$VERSION_TAG\"\n\n      - name: Set up Python\n        uses: actions/setup-python@v6\n        with:\n          python-version: \"3.11\"\n\n      - name: Set up rust toolchain\n        uses: dtolnay/rust-toolchain@stable\n\n      - name: install uv\n        uses: astral-sh/setup-uv@v7\n\n      - name: Determine version\n        id: version\n        env:\n          INPUT_VERSION_TAG: ${{ github.event.inputs.version_tag }}\n          INPUT_MARK_AS_STABLE: ${{ github.event.inputs.mark_as_stable }}\n        run: |\n          if [[ \"$GITHUB_EVENT_NAME\" == \"workflow_dispatch\" ]]; then\n            echo \"version=$INPUT_VERSION_TAG\" >> \"$GITHUB_OUTPUT\"\n            echo \"is_stable=$INPUT_MARK_AS_STABLE\" >> \"$GITHUB_OUTPUT\"\n          elif [[ \"${GITHUB_REF}\" == refs/tags/v* ]]; then\n            echo \"version=${GITHUB_REF#refs/tags/}\" >> \"$GITHUB_OUTPUT\"\n            echo \"is_stable=true\" >> \"$GITHUB_OUTPUT\"\n          else\n            echo \"version=latest\" >> \"$GITHUB_OUTPUT\"\n            echo \"is_stable=false\" >> \"$GITHUB_OUTPUT\"\n          fi\n\n      - name: Build docs\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          make install\n          make doc-versioned\n\n      - name: Deploy to gh-pages\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n          IS_STABLE: ${{ steps.version.outputs.is_stable }}\n        run: |\n          git config user.name github-actions\n          git config user.email github-actions@github.com\n\n          # Stash built docs\n          cp -r \"docs/$VERSION\" /tmp/docs-build\n\n          # Switch to gh-pages (gh-pages exists)\n          git checkout gh-pages\n          git merge -m 'Merge main' origin/main\n\n          # Place versioned docs\n          rm -rf \"docs/$VERSION\"\n          mv /tmp/docs-build \"docs/$VERSION\"\n\n          # Update versions.json and root redirect\n          STABLE_FLAG=\"\"\n          if [[ \"$IS_STABLE\" == \"true\" ]]; then\n            STABLE_FLAG=\"--stable\"\n          fi\n          ./scripts/update_versions.py \\\n            --version \"$VERSION\" \\\n            --docs-dir docs \\\n            $STABLE_FLAG\n\n          git add -f docs\n          git commit -m \"Update docs ($VERSION)\" --allow-empty\n          git push origin gh-pages\n"
  },
  {
    "path": ".github/workflows/release.yml",
    "content": "name: Release\n\non:\n  push:\n    # Sequence of patterns matched against refs/tags\n    tags:\n    - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10\n\njobs:\n  linux:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.14t\"]\n        architecture: [x86-64, aarch64]\n    steps:\n    - uses: actions/checkout@v6\n    - uses: dtolnay/rust-toolchain@stable\n    - uses: actions/setup-python@v6\n      with:\n        python-version: ${{ matrix.python-version }}\n    - name: build (release)\n      uses: PyO3/maturin-action@v1\n      with:\n        manylinux: auto\n        command: build\n        args: \"--release -o dist -i python${{ matrix.python-version }}\"\n        target: ${{ matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-gnu' || null }}\n    - name: Upload wheels\n      uses: actions/upload-artifact@v7\n      with:\n        name: \"wheels-linux-python-${{ matrix.python-version }}-${{ matrix.architecture }}\"\n        path: dist\n\n  linux-musl:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.14t\"]\n        architecture: [x86-64, aarch64]\n    steps:\n    - uses: actions/checkout@v6\n    - uses: dtolnay/rust-toolchain@stable\n    - uses: actions/setup-python@v6\n      with:\n        python-version: ${{ matrix.python-version }}\n    - name: build (release)\n      uses: PyO3/maturin-action@v1\n      with:\n        manylinux: musllinux_1_2\n        command: build\n        args: \"--release -o dist -i python${{ matrix.python-version }}\"\n        target: ${{ matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-musl' || 'x86_64-unknown-linux-musl' }}\n    - name: Upload wheels\n      uses: actions/upload-artifact@v7\n      with:\n        name: \"wheels-linux-musl-python-${{ matrix.python-version }}-${{ matrix.architecture }}\"\n        path: dist\n\n  macos:\n    runs-on: macos-14\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.14t\"]\n        architecture: [x86-64, aarch64]\n    steps:\n    - uses: actions/checkout@v6\n    - uses: dtolnay/rust-toolchain@stable\n    - uses: actions/setup-python@v6\n      with:\n        python-version: ${{ matrix.python-version }}\n    - name: build (release)\n      uses: PyO3/maturin-action@v1\n      with:\n        command: build\n        args: \"--release -o dist -i python${{ matrix.python-version }}\"\n        target: ${{ matrix.architecture == 'aarch64' && 'aarch64-apple-darwin' || 'x86_64-apple-darwin' }}\n    - name: Upload wheels\n      uses: actions/upload-artifact@v7\n      with:\n        name: \"wheels-macos-python-${{ matrix.python-version }}-${{ matrix.architecture }}\"\n        path: dist\n\n  windows:\n    runs-on: ${{ matrix.os }}\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.14t\"]\n        os: [windows-latest]\n        architecture: [x86-64]\n        include:\n          # Windows ARM64: build natively on windows-11-arm (3.11 is the minimum available)\n          - os: windows-11-arm\n            python-version: \"3.11\"\n            architecture: aarch64\n          # TODO: re-enable once setup-python supports windows-11-arm + python 3.14t\n          # (setup-python is currently broken with that combination)\n          # - os: windows-11-arm\n          #   python-version: \"3.14t\"\n          #   architecture: aarch64\n    steps:\n    - uses: actions/checkout@v6\n    - uses: dtolnay/rust-toolchain@stable\n    - uses: actions/setup-python@v6\n      with:\n        python-version: ${{ matrix.python-version }}\n    - name: build (release)\n      uses: PyO3/maturin-action@v1\n      with:\n        command: build\n        args: \"--release -o dist -i python${{ matrix.python-version }}\"\n        target: ${{ matrix.architecture == 'aarch64' && 'aarch64-pc-windows-msvc' || 'x86_64-pc-windows-msvc' }}\n    - name: Upload wheels\n      uses: actions/upload-artifact@v7\n      with:\n        name: \"wheels-windows-python-${{ matrix.python-version }}-${{ matrix.architecture }}\"\n        path: dist\n\n  sdist:\n    runs-on: \"ubuntu-latest\"\n    steps:\n      - uses: actions/checkout@v6\n      - uses: dtolnay/rust-toolchain@stable\n      - uses: actions/setup-python@v6\n        with:\n          python-version: \"3.10\"\n      - name: build (sdist)\n        uses: PyO3/maturin-action@v1\n        with:\n          manylinux: auto\n          command: sdist\n          args: \"-o dist\"\n      - name: Upload sdist\n        uses: actions/upload-artifact@v7\n        with:\n          name: sdist\n          path: dist\n\n\n  # NOTE: Cannot use a matrix here, as we only want a single release\n  release:\n    name: Release\n    runs-on: ubuntu-latest\n    needs: [linux, linux-musl, macos, windows, sdist]\n    permissions:\n      id-token: write     # Required for OIDC token exchange with crates.io\n      contents: write     # Required to be able to create a GitHub release\n    steps:\n    - uses: actions/checkout@v6\n    - uses: dtolnay/rust-toolchain@stable\n    - uses: rust-lang/crates-io-auth-action@v1\n      id: auth\n\n    - name: Download Linux wheels\n      uses: actions/download-artifact@v8\n      with:\n        pattern: \"wheels-linux-*\"\n        merge-multiple: true\n        path: wheels-linux\n\n    - name: Download MacOS wheels\n      uses: actions/download-artifact@v8\n      with:\n        pattern: \"wheels-macos-*\"\n        merge-multiple: true\n        path: wheels-macos\n\n    - name: Download Windows wheels\n      uses: actions/download-artifact@v8\n      with:\n        pattern: \"wheels-windows-*\"\n        merge-multiple: true\n        path: wheels-windows\n\n    - name: Download sdist\n      uses: actions/download-artifact@v8\n      with:\n        name: \"sdist\"\n        path: sdist\n\n    - name: Publish to PyPI\n      uses: PyO3/maturin-action@v1\n      env:\n        MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}\n      with:\n        command: upload\n        args: \"--skip-existing wheels-linux/*.whl wheels-macos/*.whl wheels-windows/*.whl sdist/*.tar.gz\"\n\n    - name: Publish to crates.io\n      run: cargo publish\n      env:\n        CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}\n\n    - name: Release\n      uses: softprops/action-gh-release@v3\n      with:\n        generate_release_notes: true\n        files: |\n          wheels-linux/*.whl\n          wheels-macos/*.whl\n          wheels-windows/*.whl\n          sdist/*.tar.gz\n"
  },
  {
    "path": ".gitignore",
    "content": "/target\n\nbigfile.*\n__pycache__\n*.pyc\n*.so\n*.dat\n.DS_Store\n\n.python-version\npyrightconfig.json\n.venv\ndocs\n.vscode\n.idea\n.benchmarks\nnotebooks\n/python/tests/fixtures/~$*.xlsx\n.zed\ndist\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "# See https://pre-commit.com for more information\n# See https://pre-commit.com/hooks.html for more hooks\nrepos:\n-   repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v3.2.0\n    hooks:\n    -   id: trailing-whitespace\n    -   id: end-of-file-fixer\n-   repo: http://github.com/doublify/pre-commit-rust\n    rev: v1.0\n    hooks:\n    -   id: cargo-check\n-   repo: local\n    hooks:\n    -   id: lint\n        name: Lint\n        entry: make lint\n        types_or: [python, rust]\n        language: system\n        pass_filenames: false\n    -   id: format\n        name: Format\n        entry: make format\n        types_or: [python, rust]\n        language: system\n        pass_filenames: false\n"
  },
  {
    "path": "Cargo.toml",
    "content": "[package]\nname = \"fastexcel\"\nversion = \"0.20.2\"\ndescription = \"A fast excel reader for Rust and Python\"\nrust-version = \"1.85.0\"\nedition = \"2024\"\nlicense = \"MIT\"\nhomepage = \"https://github.com/ToucanToco/fastexcel\"\nrepository = \"https://github.com/ToucanToco/fastexcel.git\"\nreadme = \"README.md\"\ninclude = [\n    \"/pyproject.toml\",\n    \"/README.md\",\n    \"/LICENSE\",\n    \"/Makefile\",\n    \"/src\",\n    \"/python/fastexcel\",\n    \"!__pycache__\",\n    \"!*.pyc\",\n    \"!*.so\",\n]\n\n# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html\n[lib]\nname = \"fastexcel\"\ncrate-type = [\"cdylib\", \"rlib\"]\n\n[dependencies]\narrow-array = { version = \"^58\", features = [\"ffi\"], optional = true }\narrow-pyarrow = { version = \"^58\", optional = true }\narrow-schema = { version = \"^58\", optional = true }\ncalamine = { version = \"^0.35.0\", features = [\"chrono\"] }\nchrono = { version = \"^0.4.40\", default-features = false }\nlog = \"^0.4\"\npolars-core = { version = \">=0.53\", features = [\n    \"dtype-date\",\n    \"dtype-datetime\",\n    \"dtype-duration\",\n], optional = true }\npyo3 = { version = \"^0.28\", features = [\"abi3-py310\"], optional = true }\npyo3-arrow = { version = \"^0.17\", default-features = false, optional = true }\npyo3-log = { version = \"^0.13.3\", optional = true }\n\n[dev-dependencies]\nanyhow = \"1.0.102\"\npretty_assertions = { version = \"^1.4.1\", features = [\"unstable\"] }\nrstest = { version = \"^0.26.1\", default-features = false }\n\n# NOTE: This is a hack to bypass pyo3 limitations when testing:\n# https://pyo3.rs/v0.22.3/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror\n[features]\ndefault = []\n__arrow = [\"dep:arrow-schema\", \"dep:arrow-array\"]\npython = [\"__arrow\", \"dep:pyo3\", \"dep:pyo3-log\", \"dep:pyo3-arrow\"]\nextension-module = [\"pyo3/extension-module\"]\npolars = [\"dep:polars-core\"]\npyarrow = [\"dep:arrow-pyarrow\", \"python\"]\n# Private features for internal usage, should not be used directly as they may\n# change without notice\n__pyo3-tests = [\n    # feature for tests only. This makes Python::with_gil auto-initialize Python\n    # interpreters, which allows us to instantiate Python objects in tests\n    # (see https://pyo3.rs/v0.22.3/features#auto-initialize)\n    \"pyo3/auto-initialize\",\n    \"pyarrow\",\n]\n__rust-tests-standalone = []\n__rust-tests-polars = [\"polars\"]\n# Private feature for maturin usage, should not be used directly\n__maturin = [\"extension-module\", \"pyarrow\"]\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2024 ToucanToco\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "Makefile",
    "content": ".DEFAULT_GOAL := all\nsources = python/fastexcel python/tests\n\nexport CARGO_TERM_COLOR=$(shell (test -t 0 && echo always) || echo auto)\n\n.PHONY: .uv  ## Check that uv is installed\n.uv:\n\t@uv -V || echo 'Please install uv: https://docs.astral.sh/uv/getting-started/installation/'\n\n.PHONY: install  ## Install the package & dependencies with debug build\ninstall: .uv\n\tuv sync --frozen --group all\n\tuv run maturin develop --uv -E pyarrow,pandas,polars\n\n.PHONY: install-prod  ## Install the package & dependencies with release build\ninstall-prod: .uv\n\tuv sync --frozen --group all\n\tuv run maturin develop --uv --release -E pyarrow,pandas,polars\n\n.PHONY: setup-dev  ## First-time setup: install + pre-commit hooks\nsetup-dev: install\n\tuv run pre-commit install --install-hooks\n\n.PHONY: rebuild-lockfiles  ## Rebuild lockfiles from scratch, updating all dependencies\nrebuild-lockfiles: .uv\n\tuv lock --upgrade\n\tcargo update\n\n.PHONY: build-dev  ## Build the development version of the package\nbuild-dev:\n\tuv run maturin build\n\n.PHONY: build-wheel  ## Build production wheel and install it\nbuild-wheel:\n\t@rm -rf target/wheels/\n\tuv run maturin build --release\n\t@wheel=$$(ls target/wheels/*.whl); uv pip install --force-reinstall \"$$wheel[pandas,polars]\"\n\n.PHONY: lint-python  ## Lint python source files\nlint-python:\n\tuv run ruff check $(sources)\n\tuv run ruff format --check $(sources)\n\tuv run mypy $(sources)\n\n.PHONY: lint-rust  ## Lint rust source files\nlint-rust:\n\tcargo fmt --all -- --check\n\t# Rust\n\tcargo clippy --tests -- -D warnings\n\t# Python-related code\n\tcargo clippy --features __maturin,__pyo3-tests --tests -- -D warnings\n\t# Rust+polars\n\tcargo clippy --features polars --tests -- -D warnings\n\n.PHONY: lint  ## Lint rust and python source files\nlint: lint-python lint-rust\n\n.PHONY: format-python  ## Auto-format python source files\nformat-python:\n\tuv run ruff check --fix $(sources)\n\tuv run ruff format $(sources)\n\n.PHONY: format-rust  ## Auto-format rust source files\nformat-rust:\n\tcargo fmt --all\n\tcargo clippy --all-features --tests --fix --lib -p fastexcel --allow-dirty --allow-staged\n\n.PHONY: format  ## Auto-format python and rust source files\nformat: format-rust format-python\n\n.PHONY: test-python  ## Run python tests\ntest-python: install\n\tuv run pytest\n\n.PHONY: test-rust-pyo3  ## Run PyO3 rust tests\ntest-rust-pyo3:\n\t# --lib to skip integration tests\n\tcargo test --no-default-features --features __pyo3-tests --lib\n\n.PHONY: test-rust-standalone  ## Run standalone rust tests\ntest-rust-standalone:\n\tcargo test --no-default-features --features __rust-tests-standalone\n\n.PHONY: test-rust-polars  ## Run polars rust tests\ntest-rust-polars:\n\tcargo test --no-default-features --features __rust-tests-polars\n\n.PHONY: test-rust  ## Run rust tests\ntest-rust: test-rust-pyo3 test-rust-standalone test-rust-polars\n\n.PHONY: test  ## Run all tests\ntest: test-rust test-python\n\n.PHONY: doc-serve  ## Serve documentation with live reload\ndoc-serve: build-dev\n\tuv run pdoc --template-directory doc-templates python/fastexcel\n\n.PHONY: doc  ## Build documentation\ndoc: build-dev\n\tuv run pdoc --template-directory doc-templates -o docs/latest python/fastexcel\n\tuv run scripts/update_versions.py --version latest --docs-dir docs\n\tcargo doc --no-deps --lib -p fastexcel --features polars\n\n.PHONY: doc-versioned  ## Build versioned documentation (CI usage: VERSION=v0.19.0 [STABLE=1] make doc-versioned)\ndoc-versioned: build-dev\n\t@test -n \"$(VERSION)\" || (echo \"ERROR: VERSION is not set. Usage: VERSION=v0.19.0 [STABLE=1] make doc-versioned\" && exit 1)\n\tuv run pdoc --template-directory doc-templates -o docs/$(VERSION) python/fastexcel\n\tuv run scripts/update_versions.py --version $(VERSION) --docs-dir docs $(if $(filter 1,$(STABLE)),--stable,)\n\n.PHONY: all  ## Run the standard set of checks performed in CI\nall: format build-dev lint test\n\n.PHONY: benchmarks  ## Run benchmarks\nbenchmarks: build-wheel\n\tuv run pytest ./python/tests/benchmarks/speed.py\n\n.PHONY: clean  ## Clear local caches and build artifacts\nclean:\n\trm -rf `find . -name __pycache__`\n\trm -f `find . -type f -name '*.py[co]' `\n\trm -f `find . -type f -name '*~' `\n\trm -f `find . -type f -name '.*~' `\n\trm -rf .cache\n\trm -rf htmlcov\n\trm -rf .pytest_cache\n\trm -rf *.egg-info\n\trm -f .coverage\n\trm -f .coverage.*\n\trm -rf build\n\trm -rf perf.data*\n\trm -rf python/fastexcel/*.so\n\n.PHONY: help  ## Display this message\nhelp:\n\t@grep -E \\\n\t\t'^.PHONY: .*?## .*$$' $(MAKEFILE_LIST) | \\\n\t\tsort | \\\n\t\tawk 'BEGIN {FS = \".PHONY: |## \"}; {printf \"\\033[36m%-19s\\033[0m %s\\n\", $$2, $$3}'\n"
  },
  {
    "path": "README.md",
    "content": "# `fastexcel`\n\nA fast excel file reader for Python and Rust.\n\nDocs:\n * [Python](https://fastexcel.toucantoco.dev/).\n * [Rust](https://docs.rs/fastexcel).\n\n## Stability\n\nThe Python library is considered production-ready. The API is mostly stable, and we avoid breaking changes as much as\npossible. v1.0.0 will be released once the [milestone](https://github.com/ToucanToco/fastexcel/milestone/2) is reached.\n\n> ⚠️ The free-threaded build is still considered experimental\n\nThe Rust crate is still experimental, and breaking changes are to be expected.\n\n## Installation\n\n```bash\n# Lightweight installation (no PyArrow dependency)\npip install fastexcel\n\n# With Polars support only (no PyArrow needed)\npip install fastexcel[polars]\n\n# With Pandas support (includes PyArrow)\npip install fastexcel[pandas]\n\n# With PyArrow support\npip install fastexcel[pyarrow]\n\n# With all integrations\npip install fastexcel[pandas,polars]\n```\n\n## Quick Start\n\n### Modern usage (recommended)\n\nFastExcel supports the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for zero-copy data exchange with libraries like Polars, without requiring pyarrow as a dependency.\nUse fastexcel with any Arrow-compatible library without requiring pyarrow.\n\n```python\nimport fastexcel\n\n# Load an Excel file\nreader = fastexcel.read_excel(\"data.xlsx\")\nsheet = reader.load_sheet(0)  # Load first sheet\n\n# Use with Polars (zero-copy, no pyarrow needed)\nimport polars as pl\ndf = pl.DataFrame(sheet)  # Direct PyCapsule interface\nprint(df)\n\n# Or use the to_polars() method (also via PyCapsule)\ndf = sheet.to_polars()\nprint(df)\n\n# Or access the raw Arrow data via PyCapsule interface\nschema = sheet.__arrow_c_schema__()\narray_data = sheet.__arrow_c_array__()\n```\n\n### Traditional usage (with pandas/pyarrow)\n\n```python\nimport fastexcel\n\nreader = fastexcel.read_excel(\"data.xlsx\")\nsheet = reader.load_sheet(0)\n\n# Convert to pandas (requires `pandas` extra)\ndf = sheet.to_pandas()\n\n# Or get pyarrow RecordBatch directly\nrecord_batch = sheet.to_arrow()\n```\n\n### Working with tables\n\n```python\nreader = fastexcel.read_excel(\"data.xlsx\")\n\n# List available tables\ntables = reader.table_names()\nprint(f\"Available tables: {tables}\")\n\n# Load a specific table\ntable = reader.load_table(\"MyTable\")\ndf = pl.DataFrame(table)  # Zero-copy via PyCapsule, no pyarrow needed\n```\n\n## Key Features\n\n- **Zero-copy data exchange** via [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)\n- **Flexible dependencies** - use with Polars (no PyArrow needed) or Pandas (includes PyArrow)\n- **Seamless Polars integration** - `pl.DataFrame(sheet)` and `sheet.to_polars()` work without PyArrow via PyCapsule interface\n- **High performance** - written in Rust with [calamine](https://github.com/tafia/calamine) and [Apache Arrow](https://arrow.apache.org/)\n- **Memory efficient** - lazy loading and optional eager evaluation\n- **Type safety** - automatic type inference with manual override options\n\n## Contributing & Development\n\n### Prerequisites\n\nYou'll need:\n1. **[Rust](https://rustup.rs/)** - Rust stable or nightly\n2. **[uv](https://docs.astral.sh/uv/getting-started/installation/)** - Fast Python package manager (will install Python 3.10+ automatically)\n3. **[git](https://git-scm.com/)** - For version control\n4. **[make](https://www.gnu.org/software/make/)** - For running development commands\n\n**Python Version Management:**\nuv handles Python installation automatically. To use a specific Python version:\n```bash\nuv python install 3.13  # Install Python 3.13\nuv python pin 3.13      # Pin project to Python 3.13\n```\n\n### Quick Start\n\n```bash\n# Clone the repository (or from your fork)\ngit clone https://github.com/ToucanToco/fastexcel.git\ncd fastexcel\n\n# First-time setup: install dependencies, build debug version, and setup pre-commit hooks\nmake setup-dev\n```\n\nVerify your installation by running:\n\n```bash\nmake\n```\n\nThis runs a full development cycle: formatting, building, linting, and testing\n\n### Development Commands\n\nRun `make help` to see all available commands, or use these common ones:\n\n```bash\nmake all          # full dev cycle: format, build, lint, test\nmake install      # install with debug build (daily development)\nmake install-prod # install with release build (benchmarking)\nmake test         # to run the tests\nmake lint         # to run the linter\nmake format       # to format python and rust code\nmake doc-serve    # to serve the documentation locally\n```\n\n### Useful Resources\n\n* [`python/fastexcel/_fastexcel.pyi`](./python/fastexcel/_fastexcel.pyi) - Python API types\n* [`python/tests/`](./python/tests) - Comprehensive usage examples\n\n## Benchmarking\n\nFor benchmarking, use `make benchmarks` which automatically builds an optimised wheel.\nThis is required for profiling, as dev mode builds are much slower.\n\n### Speed benchmarks\n```bash\nmake benchmarks\n```\n\n### Memory profiling\n```bash\nmprof run -T 0.01 python python/tests/benchmarks/memory.py python/tests/benchmarks/fixtures/plain_data.xls\n```\n\n## Creating a release\n\n1. Create a PR containing a commit that only updates the version in `Cargo.toml`.\n2. Once it is approved, squash and merge it into main.\n3. Tag the squashed commit, and push it.\n4. The `release` GitHub action will take care of the rest.\n\n## Dev tips\n\n* Use `cargo check` to verify that your rust code compiles, no need to go through `maturin` every time\n* `cargo clippy` = 💖\n* Careful with arrow constructors, they tend to allocate a lot\n* [`mprof`](https://github.com/pythonprofilers/memory_profiler) and `time` go a long way for perf checks,\n  no need to go fancy right from the start\n"
  },
  {
    "path": "doc-templates/module.html.jinja2",
    "content": "{% extends \"default/module.html.jinja2\" %}\n{% block nav_title %}\n    {{ super() }}\n    <div id=\"version-switcher\" style=\"padding: 0.5rem 1.5rem 0.5rem 0;\">\n        <label for=\"version-select\" style=\"font-size: 0.85rem; font-weight: bold;\">Version</label>\n        <select id=\"version-select\"\n                style=\"display: block; width: 100%; margin-top: 0.25rem; padding: 0.25rem 0.4rem;\n                       font-size: 0.85rem; border-radius: 4px; border: 1px solid var(--accent2);\n                       background: var(--bg); color: var(--text);\">\n            <option>loading...</option>\n        </select>\n    </div>\n    <script>\n    (function() {\n        var parts = window.location.pathname.replace(/\\/+$/, '').split('/');\n        // Find the version segment: first path part that looks like a version or \"latest\"\n        var currentVersion = '';\n        for (var i = 1; i < parts.length; i++) {\n            if (parts[i] === 'latest' || /^v\\d/.test(parts[i])) {\n                currentVersion = parts[i];\n                break;\n            }\n        }\n        if (!currentVersion) {\n            var switcher = document.getElementById('version-switcher');\n            if (switcher) switcher.style.display = 'none';\n            return;\n        }\n\n        // Build base URL (everything before the version segment)\n        var idx = window.location.pathname.indexOf(currentVersion);\n        var baseUrl = window.location.pathname.substring(0, idx);\n        // Get the page path after the version segment\n        var pagePath = window.location.pathname.substring(idx + currentVersion.length);\n\n        fetch(baseUrl + 'versions.json')\n            .then(function(r) { return r.json(); })\n            .then(function(versions) {\n                var select = document.getElementById('version-select');\n                select.innerHTML = '';\n                versions.forEach(function(v) {\n                    var opt = document.createElement('option');\n                    opt.value = baseUrl + v.path + pagePath + window.location.search + window.location.hash;\n                    opt.textContent = v.label;\n                    if (v.path === currentVersion) opt.selected = true;\n                    select.appendChild(opt);\n                });\n                select.onchange = function() {\n                    if (this.value) window.location.href = this.value;\n                };\n            })\n            .catch(function() {\n                var select = document.getElementById('version-select');\n                var opt = document.createElement('option');\n                select.innerHTML = '';\n                opt.textContent = currentVersion || 'unknown';\n                select.appendChild(opt);\n            });\n    })();\n    </script>\n{% endblock %}\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nrequires = [\"maturin>=1.7.0,<2.0\"]\nbuild-backend = \"maturin\"\n\n[project]\nname = \"fastexcel\"\ndescription = \"A fast excel file reader for Python, written in Rust\"\nreadme = \"README.md\"\nlicense = { file = \"LICENSE\" }\nrequires-python = \">=3.10\"\nclassifiers = [\n    \"Development Status :: 5 - Production/Stable\",\n    \"License :: OSI Approved :: MIT License\",\n    \"Operating System :: OS Independent\",\n    \"Programming Language :: Rust\",\n    \"Programming Language :: Python\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3 :: Only\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n    \"Programming Language :: Python :: Implementation :: CPython\",\n    \"Programming Language :: Python :: Free Threading :: 1 - Unstable\"\n]\ndependencies = [\"typing-extensions>=4.0.0; python_version<'3.10'\"]\ndynamic = [\"version\"]\n\n[project.optional-dependencies]\npyarrow = [\"pyarrow>=8.0.0\"]\npandas = [\"pandas>=1.4.4\", \"pyarrow>=8.0.0\"]\npolars = [\"polars>=1\"]\n\n[dependency-groups]\ndev = [\"maturin>=1.7.0,<2.0\"]\ntesting = [\n    { include-group = \"dev\" },\n    \"pytest>=7.1.3\",\n    \"pytest-benchmark>=4.0.0,<6\",\n    \"pytest-mock>=3.1\",\n    \"pyarrow>=8.0.0\",\n    \"pandas>=1.4.4\",\n    \"polars>=0.16.14\",\n    \"openpyxl>=3.1.2,<4\",\n    \"xlrd>=2.0.1,<3\",\n]\nlinting = [\n    { include-group = \"dev\" },\n    \"mypy>=2,<3\",\n    \"pre-commit>=2.20.0,<5\",\n    \"ruff>=0.15\",\n]\ndocs = [{ include-group = \"dev\" }, \"pdoc\"]\nall = [\n    { include-group = \"testing\" },\n    { include-group = \"linting\" },\n    { include-group = \"docs\" },\n]\n\n[project.urls]\n\"Source Code\" = \"https://github.com/ToucanToco/fastexcel\"\nIssues = \"https://github.com/ToucanToco/fastexcel\"\n\n[tool.maturin]\npython-source = \"python\"\nmodule-name = \"fastexcel._fastexcel\"\nfeatures = [\"__maturin\"]\n\n[tool.mypy]\npython_version = \"3.10\"\nfollow_imports = \"silent\"\nignore_missing_imports = true\n# A few custom options\nshow_error_codes = true\nwarn_no_return = true\nwarn_unused_configs = true\nwarn_unused_ignores = true\n\n[tool.pytest.ini_options]\ntestpaths = \"python/tests\"\nlog_cli = true\nlog_cli_level = \"INFO\"\n\n[tool.ruff]\nline-length = 100\ntarget-version = \"py310\"\n\n[tool.ruff.lint]\n# Enable Pyflakes `E` and `F` codes by default.\nselect = [\"E\", \"F\", \"I\", \"Q\", \"FA102\", \"UP\"]\n\n[tool.uv]\n# this ensures that `uv run` doesn't actually build the package; a `make`\n# command is needed to build\npackage = false\nrequired-version = '>=0.8.4'\n"
  },
  {
    "path": "python/fastexcel/__init__.py",
    "content": "from __future__ import annotations\n\nimport typing\nfrom collections.abc import Callable\nfrom typing import TYPE_CHECKING, Literal, TypeAlias\n\nif TYPE_CHECKING:\n    import pandas as pd\n    import polars as pl\n    import pyarrow as pa\n\nfrom os.path import expanduser\nfrom pathlib import Path\n\ntry:\n    import importlib.util\n\n    importlib.util.find_spec(\"pyarrow\")\n    _PYARROW_AVAILABLE = True\nexcept ImportError:\n    _PYARROW_AVAILABLE = False\n\nfrom ._fastexcel import (\n    ArrowError,\n    CalamineCellError,\n    CalamineError,\n    CannotRetrieveCellDataError,\n    CellError,\n    CellErrors,\n    ColumnInfo,\n    ColumnInfoNoDtype,\n    ColumnNotFoundError,\n    DefinedName,\n    FastExcelError,\n    InvalidParametersError,\n    SheetNotFoundError,\n    UnsupportedColumnTypeCombinationError,\n    __version__,\n    _ExcelReader,\n    _ExcelSheet,\n    _ExcelTable,\n)\nfrom ._fastexcel import read_excel as _read_excel\n\nDType = Literal[\"null\", \"int\", \"float\", \"string\", \"boolean\", \"datetime\", \"date\", \"duration\"]\nDTypeMap: TypeAlias = \"dict[str | int, DType]\"\nColumnNameFrom: TypeAlias = Literal[\"provided\", \"looked_up\", \"generated\"]\nDTypeFrom: TypeAlias = Literal[\n    \"provided_for_all\", \"provided_by_index\", \"provided_by_name\", \"guessed\"\n]\nSheetVisible: TypeAlias = Literal[\"visible\", \"hidden\", \"veryhidden\"]\n\n\nclass ExcelSheet:\n    \"\"\"A class representing a single sheet in an Excel File\"\"\"\n\n    def __init__(self, sheet: _ExcelSheet) -> None:\n        self._sheet = sheet\n\n    @property\n    def name(self) -> str:\n        \"\"\"The name of the sheet\"\"\"\n        return self._sheet.name\n\n    @property\n    def width(self) -> int:\n        \"\"\"The sheet's width\"\"\"\n        return self._sheet.width\n\n    @property\n    def height(self) -> int:\n        \"\"\"The sheet's height, with `skip_rows` and `nrows` applied\"\"\"\n        return self._sheet.height\n\n    @property\n    def total_height(self) -> int:\n        \"\"\"The sheet's total height\"\"\"\n        return self._sheet.total_height\n\n    @property\n    def selected_columns(self) -> list[ColumnInfo]:\n        \"\"\"The sheet's selected columns\"\"\"\n        return self._sheet.selected_columns\n\n    def available_columns(self) -> list[ColumnInfo]:\n        \"\"\"The columns available for the given sheet\"\"\"\n        return self._sheet.available_columns()\n\n    @property\n    def specified_dtypes(self) -> DTypeMap | None:\n        \"\"\"The dtypes specified for the sheet\"\"\"\n        return self._sheet.specified_dtypes\n\n    @property\n    def visible(self) -> SheetVisible:\n        \"\"\"The visibility of the sheet\"\"\"\n        return self._sheet.visible\n\n    def to_arrow(self) -> pa.RecordBatch:\n        \"\"\"Converts the sheet to a pyarrow `RecordBatch`\n\n        Requires the `pyarrow` extra to be installed.\n        \"\"\"\n        if not _PYARROW_AVAILABLE:\n            raise ImportError(\n                \"pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'\"\n            )\n        return self._sheet.to_arrow()\n\n    def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors | None]:\n        \"\"\"Converts the sheet to a pyarrow `RecordBatch` with error information.\n\n        Stores the positions of any values that cannot be parsed as the specified type and were\n        therefore converted to None.\n\n        Requires the `pyarrow` extra to be installed.\n        \"\"\"\n        if not _PYARROW_AVAILABLE:\n            raise ImportError(\n                \"pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'\"  # noqa: E501\n            )\n        rb, cell_errors = self._sheet.to_arrow_with_errors()\n        if not cell_errors.errors:\n            return (rb, None)\n        return (rb, cell_errors)\n\n    def to_pandas(self) -> pd.DataFrame:\n        \"\"\"Converts the sheet to a Pandas `DataFrame`.\n\n        Requires the `pandas` extra to be installed.\n        \"\"\"\n        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__\n        # which we don't implement. Using pyarrow conversion for now.\n        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)\n        return self.to_arrow().to_pandas()\n\n    def to_polars(self) -> pl.DataFrame:\n        \"\"\"Converts the sheet to a Polars `DataFrame`.\n\n        Uses the Arrow PyCapsule Interface for zero-copy data exchange.\n        Requires the `polars` extra to be installed.\n        \"\"\"\n        import polars as pl\n\n        return pl.DataFrame(self)\n\n    def __arrow_c_schema__(self) -> object:\n        \"\"\"Export the schema as an `ArrowSchema` `PyCapsule`.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n        return self._sheet.__arrow_c_schema__()\n\n    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:\n        \"\"\"Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.\n\n        The optional `requested_schema` parameter allows for potential schema conversion.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n        return self._sheet.__arrow_c_array__(requested_schema)\n\n    def __repr__(self) -> str:\n        return self._sheet.__repr__()\n\n\nclass ExcelTable:\n    \"\"\"A class representing a single table in an Excel file\"\"\"\n\n    def __init__(self, table: _ExcelTable) -> None:\n        self._table = table\n\n    @property\n    def name(self) -> str:\n        \"\"\"The name of the table\"\"\"\n        return self._table.name\n\n    @property\n    def sheet_name(self) -> str:\n        \"\"\"The name of the sheet this table belongs to\"\"\"\n        return self._table.sheet_name\n\n    @property\n    def width(self) -> int:\n        \"\"\"The table's width\"\"\"\n        return self._table.width\n\n    @property\n    def height(self) -> int:\n        \"\"\"The table's height\"\"\"\n        return self._table.height\n\n    @property\n    def total_height(self) -> int:\n        \"\"\"The table's total height\"\"\"\n        return self._table.total_height\n\n    @property\n    def offset(self) -> int:\n        \"\"\"The table's offset before data starts\"\"\"\n        return self._table.offset\n\n    @property\n    def selected_columns(self) -> list[ColumnInfo]:\n        \"\"\"The table's selected columns\"\"\"\n        return self._table.selected_columns\n\n    def available_columns(self) -> list[ColumnInfo]:\n        \"\"\"The columns available for the given table\"\"\"\n        return self._table.available_columns()\n\n    @property\n    def specified_dtypes(self) -> DTypeMap | None:\n        \"\"\"The dtypes specified for the table\"\"\"\n        return self._table.specified_dtypes\n\n    def to_arrow(self) -> pa.RecordBatch:\n        \"\"\"Converts the table to a pyarrow `RecordBatch`\n\n        Requires the `pyarrow` extra to be installed.\n        \"\"\"\n        if not _PYARROW_AVAILABLE:\n            raise ImportError(\n                \"pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'\"\n            )\n        return self._table.to_arrow()\n\n    def to_pandas(self) -> pd.DataFrame:\n        \"\"\"Converts the table to a Pandas `DataFrame`.\n\n        Requires the `pandas` extra to be installed.\n        \"\"\"\n        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__\n        # which we don't implement. Using pyarrow conversion for now.\n        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)\n        return self.to_arrow().to_pandas()\n\n    def to_polars(self) -> pl.DataFrame:\n        \"\"\"Converts the table to a Polars `DataFrame`.\n\n        Uses the Arrow PyCapsule Interface for zero-copy data exchange.\n        Requires the `polars` extra to be installed.\n        \"\"\"\n        import polars as pl\n\n        return pl.DataFrame(self)\n\n    def __arrow_c_schema__(self) -> object:\n        \"\"\"Export the schema as an `ArrowSchema` `PyCapsule`.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n        return self._table.__arrow_c_schema__()\n\n    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:\n        \"\"\"Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.\n\n        The optional `requested_schema` parameter allows for potential schema conversion.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n        return self._table.__arrow_c_array__(requested_schema)\n\n\nclass ExcelReader:\n    \"\"\"A class representing an open Excel file and allowing to read its sheets\"\"\"\n\n    def __init__(self, reader: _ExcelReader) -> None:\n        self._reader = reader\n\n    @property\n    def sheet_names(self) -> list[str]:\n        \"\"\"The list of sheet names\"\"\"\n        return self._reader.sheet_names\n\n    @typing.overload\n    def load_sheet(\n        self,\n        idx_or_name: int | str,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[False] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> ExcelSheet: ...\n\n    @typing.overload\n    def load_sheet(\n        self,\n        idx_or_name: int | str,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[True] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> pa.RecordBatch: ...\n\n    def load_sheet(\n        self,\n        idx_or_name: int | str,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: bool = False,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> ExcelSheet | pa.RecordBatch:\n        \"\"\"Loads a sheet by index or name.\n\n        :param idx_or_name: The index (starting at 0) or the name of the sheet to load.\n        :param header_row: The index of the row containing the column labels, default index is 0.\n                           If `None`, the sheet does not have any column labels.\n                           Any rows before the `header_row` will be automatically skipped.\n        :param column_names: Overrides headers found in the document.\n                             If `column_names` is used, `header_row` will be ignored.\n        :param n_rows: Specifies how many rows should be loaded.\n                       If `None`, all rows are loaded\n        :param skip_rows: Specifies which rows should be skipped after the `header_row`.\n                          Any rows before the `header_row` are automatically skipped.\n                          It means row indices are relative to data rows, not the sheet!\n                          Can be one of:\n                          - `int`: Skip this many rows after the header row\n                          - `list[int]`: Skip specific row indices (0-based relative to data rows)\n                          - `Callable[[int], bool]`: Function that receives row index (0-based\n                          relative to data rows) and returns True to skip the row\n                          - `None`: If `header_row` is None, skips empty rows at beginning\n        :param schema_sample_rows: Specifies how many rows should be used to determine\n                                   the dtype of a column. Cannot be 0. A specific dtype can be\n                                   enforced for some or all columns through the `dtypes` parameter.\n                                   If `None`, all rows will be used.\n        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)\n                               will try to coerce different dtypes in a column to the same one,\n                               whereas `strict` will raise an error in case a column contains\n                               several dtypes. Note that this only applies to columns whose dtype\n                               is guessed, i.e. not specified via `dtypes`.\n        :param use_columns: Specifies the columns to use. Can either be:\n                            - `None` to select all columns\n                            - A list of strings and ints, the column names and/or indices\n                              (starting at 0)\n                            - A string, a comma separated list of Excel column letters and column\n                              ranges (e.g. `\"A:E\"` or `\"A,C,E:F\"`, which would result in\n                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges\n                              (e.g. `\"B:\"` to select all columns from B onwards) and from-beginning\n                              ranges (e.g. `\":C\"` to select columns from A to C). These can be\n                              combined for \"except\" patterns (e.g. `\":C,E:\"` to select everything\n                              except column D)\n                            - A callable, a function that takes a column and returns a boolean\n                              indicating whether the column should be used\n        :param dtypes: An optional dtype (for all columns)\n                       or dict of dtypes with keys as column indices or names.\n        :param eager: Specifies whether the sheet should be loaded eagerly.\n                      `False` (default) will load the sheet lazily using the `PyCapsule` interface,\n                      whereas `True` will load it eagerly via `pyarrow`.\n\n                      Eager loading requires the `pyarrow` extra to be installed.\n        :param skip_whitespace_tail_rows: Skip rows at the end of the sheet\n                                          containing only whitespace and null values.\n        :param whitespace_as_null: Consider cells containing only whitespace as null values.\n        \"\"\"\n        sheet_or_rb = self._reader.load_sheet(\n            idx_or_name=idx_or_name,\n            header_row=header_row,\n            column_names=column_names,\n            skip_rows=skip_rows,\n            n_rows=n_rows,\n            schema_sample_rows=schema_sample_rows,\n            dtype_coercion=dtype_coercion,\n            use_columns=use_columns,\n            dtypes=dtypes,\n            eager=eager,\n            skip_whitespace_tail_rows=skip_whitespace_tail_rows,\n            whitespace_as_null=whitespace_as_null,\n        )\n        return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)\n\n    def table_names(self, sheet_name: str | None = None) -> list[str]:\n        \"\"\"The list of table names.\n\n        Will return an empty list if no tables are found.\n\n        :param sheet_name: If given, will limit the list to the given sheet, will be faster\n        too.\n        \"\"\"\n        return self._reader.table_names(sheet_name)\n\n    def defined_names(self) -> list[DefinedName]:\n        \"\"\"The list of defined names (named ranges) in the workbook.\n\n        Returns a list of DefinedName objects with 'name' and 'formula' attributes.\n        The formula is a string representation of the range or expression.\n\n        Will return an empty list if no defined names are found.\n        \"\"\"\n        return self._reader.defined_names()\n\n    @typing.overload\n    def load_table(\n        self,\n        name: str,\n        *,\n        header_row: int | None = None,\n        column_names: list[str] | None = None,\n        skip_rows: int | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[False] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> ExcelTable: ...\n\n    @typing.overload\n    def load_table(\n        self,\n        name: str,\n        *,\n        header_row: int | None = None,\n        column_names: list[str] | None = None,\n        skip_rows: int | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[True] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> pa.RecordBatch: ...\n\n    def load_table(\n        self,\n        name: str,\n        *,\n        header_row: int | None = None,\n        column_names: list[str] | None = None,\n        skip_rows: int | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: bool = False,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> ExcelTable | pa.RecordBatch:\n        \"\"\"Loads a table by name.\n\n        :param name: The name of the table to load.\n        :param header_row: The index of the row containing the column labels.\n                           If `None`, the table's column names will be used.\n                           Any rows before the `header_row` will be automatically skipped.\n        :param column_names: Overrides headers found in the document.\n                             If `column_names` is used, `header_row` will be ignored.\n        :param n_rows: Specifies how many rows should be loaded.\n                       If `None`, all rows are loaded\n        :param skip_rows: Specifies how many rows should be skipped after the `header_row`.\n                          Any rows before the `header_row` are automatically skipped.\n                          If `header_row` is `None`, it skips the number of rows from the\n                          start of the sheet.\n        :param schema_sample_rows: Specifies how many rows should be used to determine\n                                   the dtype of a column. Cannot be 0. A specific dtype can be\n                                   enforced for some or all columns through the `dtypes` parameter.\n                                   If `None`, all rows will be used.\n        :param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)\n                               will try to coerce different dtypes in a column to the same one,\n                               whereas `strict` will raise an error in case a column contains\n                               several dtypes. Note that this only applies to columns whose dtype\n                               is guessed, i.e. not specified via `dtypes`.\n        :param use_columns: Specifies the columns to use. Can either be:\n                            - `None` to select all columns\n                            - A list of strings and ints, the column names and/or indices\n                              (starting at 0)\n                            - A string, a comma separated list of Excel column letters and column\n                              ranges (e.g. `\"A:E\"` or `\"A,C,E:F\"`, which would result in\n                              `A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges\n                              (e.g. `\"B:\"` to select all columns from B onwards) and from-beginning\n                              ranges (e.g. `\":C\"` to select columns from A to C). These can be\n                              combined for \"except\" patterns (e.g. `\":C,E:\"` to select everything\n                              except column D)\n                            - A callable, a function that takes a column and returns a boolean\n                              indicating whether the column should be used\n        :param dtypes: An optional dtype (for all columns)\n                       or dict of dtypes with keys as column indices or names.\n        :param eager: Specifies whether the table should be loaded eagerly.\n                      `False` (default) will load the table lazily using the `PyCapsule` interface,\n                      whereas `True` will load it eagerly via `pyarrow`.\n\n                      Eager loading requires the `pyarrow` extra to be installed.\n        :param skip_whitespace_tail_rows: Skip rows at the end of the table\n                                          containing only whitespace and null values.\n        :param whitespace_as_null: Consider cells containing only whitespace as null values.\n        \"\"\"\n        if eager:\n            return self._reader.load_table(\n                name=name,\n                header_row=header_row,\n                column_names=column_names,\n                skip_rows=skip_rows,\n                n_rows=n_rows,\n                schema_sample_rows=schema_sample_rows,\n                dtype_coercion=dtype_coercion,\n                use_columns=use_columns,\n                dtypes=dtypes,\n                eager=True,\n                skip_whitespace_tail_rows=skip_whitespace_tail_rows,\n                whitespace_as_null=whitespace_as_null,\n            )\n        else:\n            return ExcelTable(\n                self._reader.load_table(\n                    name=name,\n                    header_row=header_row,\n                    column_names=column_names,\n                    skip_rows=skip_rows,\n                    n_rows=n_rows,\n                    schema_sample_rows=schema_sample_rows,\n                    dtype_coercion=dtype_coercion,\n                    use_columns=use_columns,\n                    dtypes=dtypes,\n                    eager=False,\n                    skip_whitespace_tail_rows=skip_whitespace_tail_rows,\n                    whitespace_as_null=whitespace_as_null,\n                )\n            )\n\n    def load_sheet_eager(\n        self,\n        idx_or_name: int | str,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str] | list[int] | str | None = None,\n        dtypes: DType | DTypeMap | None = None,\n    ) -> pa.RecordBatch:\n        \"\"\"Loads a sheet eagerly by index or name.\n\n        For xlsx files, this will be faster and more memory-efficient, as it will use\n        `worksheet_range_ref` under the hood, which returns borrowed types.\n\n        Refer to `load_sheet` for parameter documentation\n\n        Requires the `pyarrow` extra to be installed.\n        \"\"\"\n        return self._reader.load_sheet(\n            idx_or_name=idx_or_name,\n            header_row=header_row,\n            column_names=column_names,\n            skip_rows=skip_rows,\n            n_rows=n_rows,\n            schema_sample_rows=schema_sample_rows,\n            dtype_coercion=dtype_coercion,\n            use_columns=use_columns,\n            dtypes=dtypes,\n            eager=True,\n        )\n\n    def load_sheet_by_name(\n        self,\n        name: str,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n    ) -> ExcelSheet:\n        \"\"\"Loads a sheet by name.\n\n        Refer to `load_sheet` for parameter documentation\n        \"\"\"\n        return self.load_sheet(\n            name,\n            header_row=header_row,\n            column_names=column_names,\n            skip_rows=skip_rows,\n            n_rows=n_rows,\n            schema_sample_rows=schema_sample_rows,\n            dtype_coercion=dtype_coercion,\n            use_columns=use_columns,\n            dtypes=dtypes,\n        )\n\n    def load_sheet_by_idx(\n        self,\n        idx: int,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n    ) -> ExcelSheet:\n        \"\"\"Loads a sheet by index.\n\n        Refer to `load_sheet` for parameter documentation\n        \"\"\"\n        return self.load_sheet(\n            idx,\n            header_row=header_row,\n            column_names=column_names,\n            skip_rows=skip_rows,\n            n_rows=n_rows,\n            schema_sample_rows=schema_sample_rows,\n            dtype_coercion=dtype_coercion,\n            use_columns=use_columns,\n            dtypes=dtypes,\n        )\n\n    def __repr__(self) -> str:\n        return self._reader.__repr__()\n\n\ndef read_excel(source: Path | str | bytes) -> ExcelReader:\n    \"\"\"Opens and loads an excel file.\n\n    :param source: The path to a file or its content as bytes\n    \"\"\"\n    if isinstance(source, str | Path):\n        source = expanduser(source)\n    return ExcelReader(_read_excel(source))\n\n\n__all__ = (\n    # version\n    \"__version__\",\n    # main entrypoint\n    \"read_excel\",\n    # Python types\n    \"DType\",\n    \"DTypeMap\",\n    # Excel reader\n    \"ExcelReader\",\n    # Excel sheet\n    \"ExcelSheet\",\n    # Excel table\n    \"ExcelTable\",\n    # Column metadata\n    \"DTypeFrom\",\n    \"ColumnNameFrom\",\n    \"ColumnInfo\",\n    # Defined names\n    \"DefinedName\",\n    # Parse error information\n    \"CellError\",\n    \"CellErrors\",\n    # Exceptions\n    \"FastExcelError\",\n    \"CannotRetrieveCellDataError\",\n    \"CalamineCellError\",\n    \"CalamineError\",\n    \"SheetNotFoundError\",\n    \"ColumnNotFoundError\",\n    \"ArrowError\",\n    \"InvalidParametersError\",\n    \"UnsupportedColumnTypeCombinationError\",\n)\n"
  },
  {
    "path": "python/fastexcel/_fastexcel.pyi",
    "content": "from __future__ import annotations\n\nimport typing\nfrom collections.abc import Callable\nfrom typing import TYPE_CHECKING, Literal\n\nif TYPE_CHECKING:\n    import pyarrow as pa\n\nDType = Literal[\"null\", \"int\", \"float\", \"string\", \"boolean\", \"datetime\", \"date\", \"duration\"]\nDTypeMap = dict[str | int, DType]\nColumnNameFrom = Literal[\"provided\", \"looked_up\", \"generated\"]\nDTypeFrom = Literal[\"provided_for_all\", \"provided_by_index\", \"provided_by_name\", \"guessed\"]\nSheetVisible = Literal[\"visible\", \"hidden\", \"veryhidden\"]\n\nclass ColumnInfoNoDtype:\n    def __init__(\n        self,\n        *,\n        name: str,\n        index: int,\n        absolute_index: int,\n        column_name_from: ColumnNameFrom,\n    ) -> None: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def index(self) -> int: ...\n    @property\n    def absolute_index(self) -> int: ...\n    @property\n    def column_name_from(self) -> ColumnNameFrom: ...\n\nclass ColumnInfo:\n    def __init__(\n        self,\n        *,\n        name: str,\n        index: int,\n        absolute_index: int,\n        column_name_from: ColumnNameFrom,\n        dtype: DType,\n        dtype_from: DTypeFrom,\n    ) -> None: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def index(self) -> int: ...\n    @property\n    def absolute_index(self) -> int: ...\n    @property\n    def dtype(self) -> DType: ...\n    @property\n    def column_name_from(self) -> ColumnNameFrom: ...\n    @property\n    def dtype_from(self) -> DTypeFrom: ...\n\nclass DefinedName:\n    def __init__(\n        self,\n        *,\n        name: str,\n        formula: str,\n    ) -> None: ...\n    @property\n    def name(self) -> str: ...\n    @property\n    def formula(self) -> str: ...\n\nclass CellError:\n    @property\n    def position(self) -> tuple[int, int]: ...\n    @property\n    def row_offset(self) -> int: ...\n    @property\n    def offset_position(self) -> tuple[int, int]: ...\n    @property\n    def detail(self) -> str: ...\n    def __repr__(self) -> str: ...\n\nclass CellErrors:\n    @property\n    def errors(self) -> list[CellError]: ...\n    def __repr__(self) -> str: ...\n\nclass _ExcelSheet:\n    @property\n    def name(self) -> str:\n        \"\"\"The name of the sheet\"\"\"\n    @property\n    def width(self) -> int:\n        \"\"\"The sheet's width\"\"\"\n    @property\n    def height(self) -> int:\n        \"\"\"The sheet's height\"\"\"\n    @property\n    def total_height(self) -> int:\n        \"\"\"The sheet's total height\"\"\"\n    @property\n    def offset(self) -> int:\n        \"\"\"The sheet's offset before data starts\"\"\"\n    @property\n    def selected_columns(self) -> list[ColumnInfo]:\n        \"\"\"The sheet's selected columns\"\"\"\n    def available_columns(self) -> list[ColumnInfo]:\n        \"\"\"The columns available for the given sheet\"\"\"\n    @property\n    def specified_dtypes(self) -> DTypeMap | None:\n        \"\"\"The dtypes specified for the sheet\"\"\"\n    @property\n    def visible(self) -> SheetVisible:\n        \"\"\"The visibility of the sheet\"\"\"\n    def to_arrow(self) -> pa.RecordBatch:\n        \"\"\"Converts the sheet to a pyarrow `RecordBatch`\n\n        Requires the `pyarrow` extra to be installed.\n        \"\"\"\n    def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors]:\n        \"\"\"Converts the sheet to a pyarrow `RecordBatch` with error information.\n\n        Stores the positions of any values that cannot be parsed as the specified type and were\n        therefore converted to None.\n\n        Requires the `pyarrow` extra to be installed.\n        \"\"\"\n    def __arrow_c_schema__(self) -> object:\n        \"\"\"Export the schema as an `ArrowSchema` `PyCapsule`.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n    def __arrow_c_array__(self, requested_schema: object = None) -> tuple[object, object]:\n        \"\"\"Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.\n\n        The optional `requested_schema` parameter allows for potential schema conversion.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n\nclass _ExcelTable:\n    @property\n    def name(self) -> str:\n        \"\"\"The name of the table\"\"\"\n    @property\n    def sheet_name(self) -> str:\n        \"\"\"The name of the sheet this table belongs to\"\"\"\n    @property\n    def width(self) -> int:\n        \"\"\"The table's width\"\"\"\n    @property\n    def height(self) -> int:\n        \"\"\"The table's height\"\"\"\n    @property\n    def total_height(self) -> int:\n        \"\"\"The table's total height\"\"\"\n    @property\n    def offset(self) -> int:\n        \"\"\"The table's offset before data starts\"\"\"\n    @property\n    def selected_columns(self) -> list[ColumnInfo]:\n        \"\"\"The table's selected columns\"\"\"\n    def available_columns(self) -> list[ColumnInfo]:\n        \"\"\"The columns available for the given table\"\"\"\n    @property\n    def specified_dtypes(self) -> DTypeMap | None:\n        \"\"\"The dtypes specified for the table\"\"\"\n    def to_arrow(self) -> pa.RecordBatch:\n        \"\"\"Converts the table to a pyarrow `RecordBatch`\n\n        Requires the `pyarrow` extra to be installed.\n        \"\"\"\n    def __arrow_c_schema__(self) -> object:\n        \"\"\"Export the schema as an `ArrowSchema` `PyCapsule`.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n\n    def __arrow_c_array__(self, requested_schema: object = None) -> tuple[object, object]:\n        \"\"\"Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.\n\n        The optional `requested_schema` parameter allows for potential schema conversion.\n\n        https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export\n\n        The Arrow PyCapsule Interface enables zero-copy data exchange with\n        Arrow-compatible libraries without requiring PyArrow as a dependency.\n        \"\"\"\n\nclass _ExcelReader:\n    \"\"\"A class representing an open Excel file and allowing to read its sheets\"\"\"\n\n    @typing.overload\n    def load_sheet(\n        self,\n        idx_or_name: str | int,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[False] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> _ExcelSheet: ...\n    @typing.overload\n    def load_sheet(\n        self,\n        idx_or_name: str | int,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[True] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> pa.RecordBatch: ...\n    @typing.overload\n    def load_sheet(\n        self,\n        idx_or_name: str | int,\n        *,\n        header_row: int | None = 0,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: bool = False,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> pa.RecordBatch: ...\n    @typing.overload\n    def load_table(\n        self,\n        name: str,\n        *,\n        header_row: int | None = None,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[False] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> _ExcelTable: ...\n    @typing.overload\n    def load_table(\n        self,\n        name: str,\n        *,\n        header_row: int | None = None,\n        column_names: list[str] | None = None,\n        skip_rows: int | list[int] | Callable[[int], bool] | None = None,\n        n_rows: int | None = None,\n        schema_sample_rows: int | None = 1_000,\n        dtype_coercion: Literal[\"coerce\", \"strict\"] = \"coerce\",\n        use_columns: list[str]\n        | list[int]\n        | str\n        | Callable[[ColumnInfoNoDtype], bool]\n        | None = None,\n        dtypes: DType | DTypeMap | None = None,\n        eager: Literal[True] = ...,\n        skip_whitespace_tail_rows: bool = False,\n        whitespace_as_null: bool = False,\n    ) -> pa.RecordBatch: ...\n    @property\n    def sheet_names(self) -> list[str]: ...\n    def table_names(self, sheet_name: str | None = None) -> list[str]: ...\n    def defined_names(self) -> list[DefinedName]: ...\n\ndef read_excel(source: str | bytes) -> _ExcelReader:\n    \"\"\"Reads an excel file and returns an ExcelReader\"\"\"\n\n__version__: str\n\n# Exceptions\nclass FastExcelError(Exception): ...\nclass UnsupportedColumnTypeCombinationError(FastExcelError): ...\nclass CannotRetrieveCellDataError(FastExcelError): ...\nclass CalamineCellError(FastExcelError): ...\nclass CalamineError(FastExcelError): ...\nclass SheetNotFoundError(FastExcelError): ...\nclass ColumnNotFoundError(FastExcelError): ...\nclass ArrowError(FastExcelError): ...\nclass InvalidParametersError(FastExcelError): ...\n"
  },
  {
    "path": "python/fastexcel/py.typed",
    "content": ""
  },
  {
    "path": "python/tests/__init__.py",
    "content": ""
  },
  {
    "path": "python/tests/benchmarks/README.md",
    "content": "# Benchmarks\n\nThese benchmarks were generated using `pytest-benchmark`.\n\n> **_NOTE:_**  formulas.xlsx was found [here](https://foss.heptapod.net/openpyxl/openpyxl/-/issues/494) plain_data.xls and plain_data.xlsx can be found [here](https://public.opendatasoft.com/explore/dataset/covid-19-pandemic-worldwide-data/export/?disjunctive.zone&disjunctive.category)\n\nUsing the following command:\n\n```bash\nmake benchmarks\n```\n\nThe results are from my local machine. This is not 100% accurate.\n\n## Speed\n### 'xls': 2 tests\n|Name (time in ms)|Min|Max|Mean|StdDev|Median|IQR|Outliers|OPS|Rounds|Iterations|\n|-----------------|---|---|----|------|------|---|-------|---|-------|----------|\n|test_fastexcel_xls|27.0991 (1.0)|33.7495 (1.0)|29.5819 (1.0)|1.6429 (1.0)|29.3559 (1.0)|2.7158 (1.0)|10;0|33.8044 (1.0)|29|1|\n|test_xlrd|596.5040 (22.01)|628.7964 (18.63)|612.5730 (20.71)|12.9967 (7.91)|615.1620 (20.96)|20.7911 (7.66)|2;0|1.6325 (0.05)|5|1|\n\n\n\n### 'xlsx': 4 tests\n|Name (time in ms)|Min|Max|Mean|StdDev|Median|IQR|Outliers|OPS|Rounds  Iterations|\n|-----------------|---|---|----|------|------|---|--------|---|------------------|\n|test_fastexcel_xlsx|437.5810 (1.0)|470.7615 (1.0)|457.9611 (1.0)|13.7401 (1.0)|457.7006 (1.0)|21.0743 (1.25)|1;0|2.1836 (1.0)|5|1|\n|test_fastexcel_with_formulas|3,106.7454 (7.10)|3,150.2050 (6.69)|3,122.5234 (6.82)|16.6031 (1.21)|3,120.9000 (6.82)|16.8614 (1.0)|1;0  0.3203 (0.15)|5|1|\n|test_pyxl|4,780.2341 (10.92)|4,998.7753 (10.62)|4,899.6885 (10.70)|110.4665 (8.04)|4,948.7550 (10.81)|211.6149 (12.55)|2;0|0.2041 (0.09)|5|1|\ntest_pyxl_with_formulas|25,312.8494 (57.85)|26,621.4687 (56.55)|25,808.5418 (56.36)|545.0540 (39.67)|25,748.0901 (56.26)|852.3171 (50.55)|1;0|0.0387 (0.02)|5|1|\n\n\n## Memory usage\n\n| fastexcel memory usage | other memory usage |\n|-|-|\n|![fastexcel xls](memory_profiles/test_xls_fastexcel.png \"fastexcel xls\") |![xlrd xls](memory_profiles/test_xls_xlrd.png \"xlrd xls\")|\n|![fastexcel xlsx](memory_profiles/test_xlsx_fastexcel.png \"fastexcel xlsx\") |![pyxl xlsx](memory_profiles/test_xlsx_openpyxl.png \"pyxl xlsx\")|\n|![fastexcel formulas xlsx](memory_profiles/test_xlsx_formulas_fastexcel.png \"fastexcel formulas xlsx\") |![pyxl formulas xlsx](memory_profiles/test_xlsx_formulas_openpyxl.png \"pyxl formulas xlsx\")|\n"
  },
  {
    "path": "python/tests/benchmarks/memory.py",
    "content": "import argparse\nfrom enum import Enum\n\nfrom .readers import fastexcel_read, pyxl_read, xlrd_read\n\n\nclass Engine(str, Enum):\n    FASTEXCEL = \"fastexcel\"\n    XLRD = \"xlrd\"\n    OPENPYXL = \"pyxl\"\n\n\ndef get_args() -> argparse.Namespace:\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"-e\", \"--engine\", default=Engine.FASTEXCEL)\n    parser.add_argument(\"file\")\n    return parser.parse_args()\n\n\ndef main():\n    args = get_args()\n    engine = args.engine\n\n    if engine == Engine.FASTEXCEL:\n        fastexcel_read(args.file)\n    elif engine == Engine.XLRD:\n        xlrd_read(args.file)\n    elif engine == Engine.OPENPYXL:\n        pyxl_read(args.file)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "python/tests/benchmarks/readers.py",
    "content": "from fastexcel import read_excel\nfrom openpyxl import load_workbook\nfrom xlrd import open_workbook\n\n\ndef pyxl_read(test_file_path: str):\n    wb = load_workbook(test_file_path, read_only=True, keep_links=False, data_only=True)\n    for ws in wb:\n        rows = ws.iter_rows()\n        rows = ws.values\n        for row in rows:\n            for _ in row:\n                pass\n\n\ndef xlrd_read(test_file_path: str):\n    wb = open_workbook(test_file_path)\n    for ws in wb.sheets():\n        for idx in range(ws.nrows):\n            for _ in ws.row_values(idx):\n                pass\n\n\ndef fastexcel_read(test_file_path: str):\n    reader = read_excel(test_file_path)\n    for sheet_name in reader.sheet_names:\n        sheet = reader.load_sheet_by_name(sheet_name)\n        sheet.to_arrow()\n"
  },
  {
    "path": "python/tests/benchmarks/speed.py",
    "content": "\"\"\"\nCompare read performance with fastexcel, xlrd and different openpyxl options\n\"\"\"\n\nimport pytest\n\nfrom .readers import fastexcel_read, pyxl_read, xlrd_read\n\n\n@pytest.fixture\ndef plain_data_xls():\n    return \"./python/tests/benchmarks/fixtures/plain_data.xls\"\n\n\n@pytest.fixture\ndef plain_data_xlsx():\n    return \"./python/tests/benchmarks/fixtures/plain_data.xlsx\"\n\n\n@pytest.fixture\ndef formula_xlsx():\n    return \"./python/tests/benchmarks/fixtures/formulas.xlsx\"\n\n\n@pytest.mark.benchmark(group=\"xlsx\")\ndef test_pyxl(benchmark, plain_data_xlsx):\n    benchmark(pyxl_read, plain_data_xlsx)\n\n\n@pytest.mark.benchmark(group=\"xls\")\ndef test_xlrd(benchmark, plain_data_xls):\n    benchmark(xlrd_read, plain_data_xls)\n\n\n@pytest.mark.benchmark(group=\"xls\")\ndef test_fastexcel_xls(benchmark, plain_data_xls):\n    benchmark(fastexcel_read, plain_data_xls)\n\n\n@pytest.mark.benchmark(group=\"xlsx\")\ndef test_fastexcel_xlsx(benchmark, plain_data_xlsx):\n    benchmark(fastexcel_read, plain_data_xlsx)\n\n\n@pytest.mark.benchmark(group=\"xlsx\")\ndef test_pyxl_with_formulas(benchmark, formula_xlsx):\n    benchmark(pyxl_read, formula_xlsx)\n\n\n@pytest.mark.benchmark(group=\"xlsx\")\ndef test_fastexcel_with_formulas(benchmark, formula_xlsx):\n    benchmark(fastexcel_read, formula_xlsx)\n"
  },
  {
    "path": "python/tests/conftest.py",
    "content": "from __future__ import annotations\n\nfrom datetime import datetime\nfrom typing import Any\n\nimport pytest\n\n\n@pytest.fixture\ndef expected_data_sheet_null_strings() -> dict[str, list[Any]]:\n    return {\n        \"FIRST_LABEL\": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],\n        \"SECOND_LABEL\": [\"AA\", \"BB\", \"CC\", \"DD\", \"EE\", \"FF\", \"GG\", \"HH\", \"II\", \"JJ\"],\n        \"DATES_AND_NULLS\": [\n            None,\n            None,\n            None,\n            datetime(2022, 12, 19, 0, 0),\n            datetime(2022, 8, 26, 0, 0),\n            datetime(2023, 5, 6, 0, 0),\n            datetime(2023, 3, 20, 0, 0),\n            datetime(2022, 8, 29, 0, 0),\n            None,\n            None,\n        ],\n        \"TIMESTAMPS_AND_NULLS\": [\n            None,\n            None,\n            datetime(2023, 2, 18, 6, 13, 56, 730000),\n            datetime(2022, 9, 20, 20, 0, 7, 50000),\n            datetime(2022, 9, 24, 17, 4, 31, 236000),\n            None,\n            None,\n            None,\n            datetime(2022, 9, 14, 1, 50, 58, 390000),\n            datetime(2022, 10, 21, 17, 20, 12, 223000),\n        ],\n        \"INTS_AND_NULLS\": [\n            2076.0,\n            2285.0,\n            39323.0,\n            None,\n            None,\n            None,\n            11953.0,\n            None,\n            30192.0,\n            None,\n        ],\n        \"FLOATS_AND_NULLS\": [\n            141.02023312814603,\n            778.0655928608671,\n            None,\n            497.60307287584106,\n            627.446112513911,\n            None,\n            None,\n            None,\n            488.3509486743364,\n            None,\n        ],\n    }\n"
  },
  {
    "path": "python/tests/test_alias_generation.py",
    "content": "from __future__ import annotations\n\nimport fastexcel\nimport pandas as pd\nimport polars as pl\nimport pytest\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\n\nfrom .utils import path_for_fixture\n\n\n@pytest.mark.parametrize(\n    \"use_columns\", [None, [0, 1, 2], [\"col\", \"col_1\", \"col_2\"], [0, \"col_1\", 2]]\n)\ndef test_alias_generation_with_use_columns(use_columns: list[str] | list[int] | None) -> None:\n    excel_reader = fastexcel.read_excel(\n        path_for_fixture(\"fixture-single-sheet-duplicated-columns.xlsx\")\n    )\n\n    sheet = excel_reader.load_sheet(0, use_columns=use_columns)\n    assert [col.name for col in sheet.available_columns()] == [\"col\", \"col_1\", \"col_2\"]\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"col\": [1.0, 2.0],\n                \"col_1\": [2019.0, 2020.0],\n                \"col_2\": pd.Series(\n                    [pd.Timestamp(\"2019-02-01 00:01:02\"), pd.Timestamp(\"2014-01-02 06:01:02\")]\n                ).astype(\"datetime64[ms]\"),\n            }\n        ),\n    )\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"col\": [1.0, 2.0],\n                \"col_1\": [2019.0, 2020.0],\n                \"col_2\": [\"2019-02-01 00:01:02\", \"2014-01-02 06:01:02\"],\n            }\n        ).with_columns(pl.col(\"col_2\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")),\n    )\n"
  },
  {
    "path": "python/tests/test_column_selection.py",
    "content": "# ruff: noqa: E501\nfrom __future__ import annotations\n\nimport re\nfrom typing import Any\n\nimport fastexcel\nimport numpy as np\nimport pandas as pd\nimport polars as pl\nimport pytest\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\n\nfrom .utils import path_for_fixture\n\n\n@pytest.fixture\ndef excel_reader_single_sheet() -> fastexcel.ExcelReader:\n    return fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n\n\n@pytest.fixture\ndef expected_column_info() -> list[fastexcel.ColumnInfo]:\n    return [\n        fastexcel.ColumnInfo(\n            name=\"Month\",\n            index=0,\n            absolute_index=0,\n            column_name_from=\"looked_up\",\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Year\",\n            index=1,\n            absolute_index=1,\n            column_name_from=\"looked_up\",\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n        ),\n    ]\n\n\ndef test_single_sheet_all_columns(\n    excel_reader_single_sheet: fastexcel.ExcelReader,\n    expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    sheet = excel_reader_single_sheet.load_sheet(0)\n\n    sheet_explicit_arg = excel_reader_single_sheet.load_sheet(0, use_columns=None)\n    assert sheet.selected_columns == expected_column_info\n    assert sheet.available_columns() == expected_column_info\n\n    expected = {\"Month\": [1.0, 2.0], \"Year\": [2019.0, 2020.0]}\n    expected_pd_df = pd.DataFrame(expected)\n    expected_pl_df = pl.DataFrame(expected)\n\n    pd_df = sheet.to_pandas()\n    pd_assert_frame_equal(pd_df, expected_pd_df)\n    pd_df_explicit_arg = sheet_explicit_arg.to_pandas()\n    pd_assert_frame_equal(pd_df_explicit_arg, expected_pd_df)\n\n    pl_df = sheet.to_polars()\n    pl_assert_frame_equal(pl_df, expected_pl_df)\n    pl_df_explicit_arg = sheet_explicit_arg.to_polars()\n    pl_assert_frame_equal(pl_df_explicit_arg, expected_pl_df)\n\n\ndef test_single_sheet_subset_by_str(\n    excel_reader_single_sheet: fastexcel.ExcelReader,\n    expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    expected = {\"Month\": [1.0, 2.0], \"Year\": [2019.0, 2020.0]}\n\n    # looks like mypy 1.8 became more stupid\n    sheets: list[str | int] = [0, \"January\"]\n    for sheet_name_or_idx in sheets:\n        for idx, col in enumerate([\"Month\", \"Year\"]):\n            sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[col])\n            assert sheet.selected_columns == [expected_column_info[idx]]\n            assert sheet.available_columns() == expected_column_info\n\n            pd_df = sheet.to_pandas()\n            pd_assert_frame_equal(pd_df, pd.DataFrame({col: expected[col]}))\n\n            pl_df = sheet.to_polars()\n            pl_assert_frame_equal(pl_df, pl.DataFrame({col: expected[col]}))\n\n\ndef test_single_sheet_subset_by_index(\n    excel_reader_single_sheet: fastexcel.ExcelReader,\n    expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    expected = {\"Month\": [1.0, 2.0], \"Year\": [2019.0, 2020.0]}\n\n    sheets: list[str | int] = [0, \"January\"]\n    for sheet_name_or_idx in sheets:\n        for idx, col_name in enumerate([\"Month\", \"Year\"]):\n            sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[idx])\n            assert sheet.selected_columns == [expected_column_info[idx]]\n            assert sheet.available_columns() == expected_column_info\n\n            pd_df = sheet.to_pandas()\n            pd_assert_frame_equal(pd_df, pd.DataFrame({col_name: expected[col_name]}))\n\n            pl_df = sheet.to_polars()\n            pl_assert_frame_equal(pl_df, pl.DataFrame({col_name: expected[col_name]}))\n\n\n@pytest.fixture\ndef excel_reader_single_sheet_with_unnamed_columns() -> fastexcel.ExcelReader:\n    return fastexcel.read_excel(path_for_fixture(\"fixture-multi-sheet.xlsx\"))\n\n\n@pytest.fixture\ndef single_sheet_with_unnamed_columns_expected() -> dict[str, list[Any]]:\n    return {\n        \"col1\": [2.0, 3.0],\n        \"__UNNAMED__1\": [1.5, 2.5],\n        \"col3\": [\"hello\", \"world\"],\n        \"__UNNAMED__3\": [-5.0, -6.0],\n        \"col5\": [\"a\", \"b\"],\n    }\n\n\n@pytest.fixture\ndef sheet_with_unnamed_columns_expected_column_info() -> list[fastexcel.ColumnInfo]:\n    return [\n        fastexcel.ColumnInfo(\n            name=\"col1\",\n            index=0,\n            absolute_index=0,\n            column_name_from=\"looked_up\",\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__1\",\n            index=1,\n            absolute_index=1,\n            column_name_from=\"generated\",\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"col3\",\n            index=2,\n            absolute_index=2,\n            column_name_from=\"looked_up\",\n            dtype=\"string\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__3\",\n            index=3,\n            absolute_index=3,\n            column_name_from=\"generated\",\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"col5\",\n            index=4,\n            absolute_index=4,\n            column_name_from=\"looked_up\",\n            dtype=\"string\",\n            dtype_from=\"guessed\",\n        ),\n    ]\n\n\ndef test_single_sheet_with_unnamed_columns(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    use_columns_str = [\"col1\", \"col3\", \"__UNNAMED__3\"]\n    use_columns_idx = [0, 2, 3]\n    expected = {\n        k: v for k, v in single_sheet_with_unnamed_columns_expected.items() if k in use_columns_str\n    }\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    assert sheet.selected_columns == [\n        sheet_with_unnamed_columns_expected_column_info[0],\n        sheet_with_unnamed_columns_expected_column_info[2],\n        sheet_with_unnamed_columns_expected_column_info[3],\n    ]\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_idx\n    )\n    assert sheet.selected_columns == [\n        sheet_with_unnamed_columns_expected_column_info[0],\n        sheet_with_unnamed_columns_expected_column_info[2],\n        sheet_with_unnamed_columns_expected_column_info[3],\n    ]\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_pagination(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    use_columns_str = [\"col1\", \"col3\", \"__UNNAMED__3\"]\n    use_columns_idx = [0, 2, 3]\n\n    # first row only\n    expected = {\n        k: v[:1]\n        for k, v in single_sheet_with_unnamed_columns_expected.items()\n        if k in use_columns_str\n    }\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str, n_rows=1\n    )\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_idx, n_rows=1\n    )\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n    # second row\n    expected = {\n        k: v[1:]\n        for k, v in single_sheet_with_unnamed_columns_expected.items()\n        if k in use_columns_str\n    }\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str, skip_rows=1\n    )\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_idx, skip_rows=1\n    )\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n) -> None:\n    use_columns_str = [\"col0\", \"col2\", \"col3\"]\n    use_columns_idx = [0, 2, 3]\n    expected: dict[str, list[Any]] = {\n        \"col0\": [2.0, 3.0],\n        \"col1\": [\"hello\", \"world\"],\n        \"col2\": [-5.0, -6.0],\n    }\n    column_names = [f\"col{i}\" for i in range(3)]\n    expected_columns_names = [\"col0\", \"__UNNAMED__1\", \"col1\", \"col2\", \"__UNNAMED__4\"]\n\n    # skipping the header row only\n    with pytest.raises(\n        fastexcel.InvalidParametersError,\n        match='use_columns can only contain integers when used with columns_names, got \"col0\"',\n    ):\n        excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n            \"With unnamed columns\",\n            use_columns=use_columns_str,\n            skip_rows=1,\n            column_names=column_names,\n        )\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_idx, skip_rows=1, column_names=column_names\n    )\n    assert [col.name for col in sheet.available_columns()] == expected_columns_names\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n    # skipping the header row + first data row\n    expected_first_row_skipped = {k: v[1:] for k, v in expected.items()}\n\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_idx, skip_rows=2, column_names=column_names\n    )\n    assert [col.name for col in sheet.available_columns()] == expected_columns_names\n\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))\n\n\ndef test_single_sheet_with_unnamed_columns_and_str_range(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    use_columns_str = \"A,C:E\"\n    expected = {\n        k: v\n        for k, v in single_sheet_with_unnamed_columns_expected.items()\n        if k in [\"col1\", \"col3\", \"__UNNAMED__3\", \"col5\"]\n    }\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    assert sheet.selected_columns == (\n        sheet_with_unnamed_columns_expected_column_info[:1]\n        + sheet_with_unnamed_columns_expected_column_info[2:]\n    )\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_open_ended_range(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    # Test B: (should get columns B, C, D, E - indices 1, 2, 3, 4)\n    use_columns_str = \"B:\"\n    expected = {\n        k: v\n        for k, v in single_sheet_with_unnamed_columns_expected.items()\n        if k in [\"__UNNAMED__1\", \"col3\", \"__UNNAMED__3\", \"col5\"]\n    }\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info[1:]\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_open_ended_range_from_start(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    # Test A: (should get all columns)\n    use_columns_str = \"A:\"\n    expected = single_sheet_with_unnamed_columns_expected\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_mixed_open_ended_range(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    # Test A,C: (should get column A and columns from C onwards - indices 0, 2, 3, 4)\n    use_columns_str = \"A,C:\"\n    expected = {\n        k: v\n        for k, v in single_sheet_with_unnamed_columns_expected.items()\n        if k in [\"col1\", \"col3\", \"__UNNAMED__3\", \"col5\"]\n    }\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    expected_selected_cols = [\n        sheet_with_unnamed_columns_expected_column_info[0]\n    ] + sheet_with_unnamed_columns_expected_column_info[2:]\n    assert sheet.selected_columns == expected_selected_cols\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_from_beginning_range(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    # Test :C (should get columns A, B, C - indices 0, 1, 2)\n    use_columns_str = \":C\"\n    expected = {\n        k: v\n        for k, v in single_sheet_with_unnamed_columns_expected.items()\n        if k in [\"col1\", \"__UNNAMED__1\", \"col3\"]\n    }\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info[:3]\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_from_beginning_range_single_column(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    # Test :A (should get only column A - index 0)\n    use_columns_str = \":A\"\n    expected = {\n        k: v for k, v in single_sheet_with_unnamed_columns_expected.items() if k in [\"col1\"]\n    }\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    assert sheet.selected_columns == [sheet_with_unnamed_columns_expected_column_info[0]]\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_with_unnamed_columns_and_complex_mixed_pattern(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n    single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],\n    sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],\n) -> None:\n    # Test A,:B,D,E: (should get A, A,B again (deduplicated), D, and E)\n    # This effectively becomes A,B,D,E (columns 0,1,3,4)\n    use_columns_str = \"A,:B,D,E:\"\n    expected = {\n        k: v\n        for k, v in single_sheet_with_unnamed_columns_expected.items()\n        if k in [\"col1\", \"__UNNAMED__1\", \"__UNNAMED__3\", \"col5\"]\n    }\n    sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(\n        \"With unnamed columns\", use_columns=use_columns_str\n    )\n    # Expected: columns A, A,B (from :B), D, E (from E:)\n    # After deduplication: 0,1,3,4\n    expected_selected_cols = [\n        sheet_with_unnamed_columns_expected_column_info[0],  # A\n        sheet_with_unnamed_columns_expected_column_info[1],  # B\n        sheet_with_unnamed_columns_expected_column_info[3],  # D\n        sheet_with_unnamed_columns_expected_column_info[4],  # E\n    ]\n    assert sheet.selected_columns == expected_selected_cols\n    assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_single_sheet_invalid_column_indices_negative_integer(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n) -> None:\n    expected_message = \"\"\"invalid parameters: expected list[int] | list[str], got [-2]\nContext:\n    0: could not determine selected columns from provided object: [-2]\n    1: expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got Some([-2])\n\"\"\"\n    with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):\n        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[-2])\n\n\ndef test_single_sheet_invalid_column_indices_empty_list(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n) -> None:\n    expected_message = \"\"\"invalid parameters: list of selected columns is empty\nContext:\n    0: could not determine selected columns from provided object: []\n    1: expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got Some([])\n\"\"\"\n    with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):\n        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[])\n\n\ndef test_single_sheet_invalid_column_indices_column_does_not_exist_str(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n) -> None:\n    expected_message = \"\"\"column with name \\\"nope\\\" not found\nContext:\n    0: available columns are: .*\n\"\"\"\n    with pytest.raises(fastexcel.ColumnNotFoundError, match=expected_message):\n        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[\"nope\"])\n\n\ndef test_single_sheet_invalid_column_indices_column_does_not_exist_int(\n    excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,\n) -> None:\n    expected_message = \"\"\"column at index 42 not found\nContext:\n    0: available columns are: .*\n\"\"\"\n    with pytest.raises(fastexcel.ColumnNotFoundError, match=expected_message):\n        excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[42])\n\n\ndef test_use_columns_with_column_names() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n\n    sheet = excel_reader.load_sheet(\n        0,\n        use_columns=[1, 2],\n        header_row=None,\n        skip_rows=1,\n        column_names=[\"bools_renamed\", \"dates_renamed\"],\n    )\n\n    assert sheet.available_columns() == [\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__0\",\n            column_name_from=\"generated\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"bools_renamed\",\n            index=1,\n            absolute_index=1,\n            dtype=\"boolean\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"dates_renamed\",\n            index=2,\n            absolute_index=2,\n            dtype=\"datetime\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__3\",\n            index=3,\n            absolute_index=3,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"generated\",\n        ),\n    ]\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"bools_renamed\": [True, False, True],\n                \"dates_renamed\": pd.Series([pd.Timestamp(\"2022-03-02 05:43:04\")] * 3).astype(\n                    \"datetime64[ms]\"\n                ),\n            }\n        ),\n    )\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"bools_renamed\": [True, False, True],\n                \"dates_renamed\": [\"2022-03-02 05:43:04\"] * 3,\n            }\n        ).with_columns(\n            pl.col(\"dates_renamed\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")\n        ),\n    )\n\n\ndef test_use_columns_with_callable() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-sheet.xlsx\"))\n\n    sheet = excel_reader.load_sheet(2)\n    assert (\n        [(c.name, c.dtype) for c in sheet.available_columns()]\n        == [(c.name, c.dtype) for c in sheet.selected_columns]\n        == [\n            (\"col1\", \"float\"),\n            (\"__UNNAMED__1\", \"float\"),\n            (\"col3\", \"string\"),\n            (\"__UNNAMED__3\", \"float\"),\n            (\"col5\", \"string\"),\n        ]\n    )\n\n    sheet = excel_reader.load_sheet(\n        2,\n        use_columns=lambda col: col.name.startswith(\"col\"),\n    )\n    assert [(c.name, c.dtype) for c in sheet.selected_columns] == [\n        (\"col1\", \"float\"),\n        (\"col3\", \"string\"),\n        (\"col5\", \"string\"),\n    ]\n\n    sheet = excel_reader.load_sheet(\n        2,\n        use_columns=lambda col: col.index % 2 == 1,\n    )\n    assert [(c.name, c.dtype) for c in sheet.selected_columns] == [\n        (\"__UNNAMED__1\", \"float\"),\n        (\"__UNNAMED__3\", \"float\"),\n    ]\n\n\ndef test_use_columns_with_bad_callable() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-sheet.xlsx\"))\n    with pytest.raises(\n        fastexcel.InvalidParametersError,\n        match=re.escape(\"`use_columns` callable could not be called (TypeError: \"),\n    ):\n        excel_reader.load_sheet(\n            2,\n            use_columns=lambda: True,  # type: ignore\n        )\n\n    with pytest.raises(\n        fastexcel.InvalidParametersError, match=\"`use_columns` callable should return a boolean\"\n    ):\n        excel_reader.load_sheet(\n            2,\n            use_columns=lambda _: 42,  # type: ignore\n        )\n\n\ndef test_use_columns_with_eager_loading() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    expected_months = [1.0, 2.0]\n    expected_years = [2019.0, 2020.0]\n\n    # default\n    rb = excel_reader.load_sheet_eager(0)\n    assert rb.schema.names == [\"Month\", \"Year\"]\n    assert rb[\"Year\"].tolist() == expected_years\n    assert rb[\"Month\"].tolist() == expected_months\n\n    # changing order\n    rb = excel_reader.load_sheet_eager(0, use_columns=[\"Year\", \"Month\"])\n    assert rb.schema.names == [\"Year\", \"Month\"]\n    assert rb[\"Year\"].tolist() == expected_years\n    assert rb[\"Month\"].tolist() == expected_months\n\n    # subset\n    rb = excel_reader.load_sheet_eager(0, use_columns=[\"Year\"])\n    assert rb.schema.names == [\"Year\"]\n    assert rb[\"Year\"].tolist() == expected_years\n    assert \"Month\" not in (field.name for field in rb.schema)\n\n\n@pytest.mark.parametrize(\"excel_file\", [\"sheet-null-strings.xlsx\", \"sheet-null-strings-empty.xlsx\"])\ndef test_use_columns_dtypes_eager_loading(\n    excel_file: str, expected_data_sheet_null_strings: dict[str, list[Any]]\n) -> None:\n    expected_pl_df = pl.DataFrame(expected_data_sheet_null_strings).with_columns(\n        pl.col(\"DATES_AND_NULLS\").dt.cast_time_unit(\"ms\"),\n        pl.col(\"TIMESTAMPS_AND_NULLS\").dt.cast_time_unit(\"ms\"),\n    )\n    expected_pd_df = pd.DataFrame(expected_data_sheet_null_strings)\n    expected_pd_df[\"DATES_AND_NULLS\"] = expected_pd_df[\"DATES_AND_NULLS\"].dt.as_unit(\"ms\")\n    expected_pd_df[\"TIMESTAMPS_AND_NULLS\"] = expected_pd_df[\"TIMESTAMPS_AND_NULLS\"].dt.as_unit(\"ms\")\n\n    for use_columns in (\n        list(expected_data_sheet_null_strings.keys()),\n        [key for idx, key in enumerate(expected_data_sheet_null_strings.keys()) if idx % 2],\n        [key for idx, key in enumerate(expected_data_sheet_null_strings.keys()) if idx % 2 == 0],\n        list(reversed(expected_data_sheet_null_strings.keys())),\n        [\n            key\n            for idx, key in enumerate(reversed(expected_data_sheet_null_strings.keys()))\n            if idx % 2\n        ],\n        [\n            key\n            for idx, key in enumerate(reversed(expected_data_sheet_null_strings.keys()))\n            if idx % 2 == 0\n        ],\n    ):\n        excel_reader = fastexcel.read_excel(path_for_fixture(excel_file))\n        sheet = excel_reader.load_sheet_eager(0, use_columns=use_columns)\n        pd_df = sheet.to_pandas()\n        pl_df = pl.from_arrow(data=sheet)\n        assert isinstance(pl_df, pl.DataFrame)\n        sheet_lazy = excel_reader.load_sheet(0, use_columns=use_columns)\n        pl_df_lazy = sheet_lazy.to_polars()\n        pd_df_lazy = sheet_lazy.to_pandas()\n\n        pl_assert_frame_equal(pl_df_lazy, pl_df)\n        pd_assert_frame_equal(pd_df_lazy, pd_df)\n\n        pl_assert_frame_equal(expected_pl_df.select(use_columns), pl_df)\n        pd_assert_frame_equal(expected_pd_df[use_columns], pd_df)\n\n        assert pd_df.columns.to_list() == use_columns\n        assert pl_df.columns == use_columns\n\n\ndef test_use_columns_with_table() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n\n    table = excel_reader.load_table(\"users\", use_columns=[\"User Id\", \"FirstName\"])\n\n    expected_available_columns = [\n        fastexcel.ColumnInfo(\n            name=\"User Id\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"FirstName\",\n            index=1,\n            absolute_index=1,\n            dtype=\"string\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__2\",\n            index=2,\n            absolute_index=2,\n            dtype=\"string\",\n            column_name_from=\"generated\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__3\",\n            index=3,\n            absolute_index=3,\n            dtype=\"datetime\",\n            column_name_from=\"generated\",\n            dtype_from=\"guessed\",\n        ),\n    ]\n\n    expected_selected_columns = [\n        fastexcel.ColumnInfo(\n            name=\"User Id\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"FirstName\",\n            index=1,\n            absolute_index=1,\n            dtype=\"string\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n    ]\n\n    assert table.available_columns() == expected_available_columns\n    assert table.selected_columns == expected_selected_columns\n\n    expected_pl_df = pl.DataFrame(\n        {\"User Id\": [1.0, 2.0, 5.0], \"FirstName\": [\"Peter\", \"John\", \"Hans\"]}\n    )\n    expected_pd_df = pd.DataFrame(\n        {\"User Id\": [1.0, 2.0, 5.0], \"FirstName\": [\"Peter\", \"John\", \"Hans\"]}\n    )\n\n    pl_df = table.to_polars()\n    pl_assert_frame_equal(pl_df, expected_pl_df)\n\n    pd_df = table.to_pandas()\n    pd_assert_frame_equal(pd_df, expected_pd_df)\n\n\ndef test_use_columns_with_table_and_provided_columns() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n\n    table = excel_reader.load_table(\n        \"users\", use_columns=[0, 2], column_names=[\"user_id\", \"last_name\"]\n    )\n\n    expected_available_columns = [\n        fastexcel.ColumnInfo(\n            name=\"user_id\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__1\",\n            index=1,\n            absolute_index=1,\n            dtype=\"string\",\n            column_name_from=\"generated\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"last_name\",\n            index=2,\n            absolute_index=2,\n            dtype=\"string\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__3\",\n            index=3,\n            absolute_index=3,\n            dtype=\"datetime\",\n            column_name_from=\"generated\",\n            dtype_from=\"guessed\",\n        ),\n    ]\n\n    expected_selected_columns = [\n        fastexcel.ColumnInfo(\n            name=\"user_id\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"last_name\",\n            index=2,\n            absolute_index=2,\n            dtype=\"string\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n    ]\n\n    assert table.available_columns() == expected_available_columns\n    assert table.selected_columns == expected_selected_columns\n\n    expected_pl_df = pl.DataFrame(\n        {\"user_id\": [1.0, 2.0, 5.0], \"last_name\": [\"Müller\", \"Meier\", \"Fricker\"]}\n    )\n    expected_pd_df = pd.DataFrame(\n        {\"user_id\": [1.0, 2.0, 5.0], \"last_name\": [\"Müller\", \"Meier\", \"Fricker\"]}\n    )\n\n    pl_df = table.to_polars()\n    pl_assert_frame_equal(pl_df, expected_pl_df)\n\n    pd_df = table.to_pandas()\n    pd_assert_frame_equal(pd_df, expected_pd_df)\n\n\ndef test_use_column_range_with_offset_without_table() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n\n    sheet = excel_reader.load_sheet(\"without-table\", use_columns=\"H:I\", header_row=9)\n\n    expected_pl_df = pl.DataFrame(\n        {\n            \"Column at H10\": [1.0, 2.0, 3.0],\n            \"Column at I10\": [4.0, 5.0, 6.0],\n        }\n    )\n\n    expected_pd_df = pd.DataFrame(\n        {\n            \"Column at H10\": [1.0, 2.0, 3.0],\n            \"Column at I10\": [4.0, 5.0, 6.0],\n        }\n    )\n\n    pl_df = sheet.to_polars()\n    pl_assert_frame_equal(pl_df, expected_pl_df)\n\n    pd_df = sheet.to_pandas()\n    pd_assert_frame_equal(pd_df, expected_pd_df)\n\n\ndef test_use_column_range_with_offset_with_table() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n\n    sheet = excel_reader.load_sheet(\"with-table\", use_columns=\"D:E\", header_row=4)\n\n    expected_pl_df = pl.DataFrame(\n        {\n            \"Column at D5\": [1.0, 2.0, 3.0, 4.0],\n            \"Column at E5\": [4.0, 5.0, 6.0, 8.0],\n        }\n    )\n\n    expected_pd_df = pd.DataFrame(\n        {\n            \"Column at D5\": [1.0, 2.0, 3.0, 4.0],\n            \"Column at E5\": [4.0, 5.0, 6.0, 8.0],\n        }\n    )\n\n    pl_df = sheet.to_polars()\n    pl_assert_frame_equal(pl_df, expected_pl_df)\n\n    pd_df = sheet.to_pandas()\n    pd_assert_frame_equal(pd_df, expected_pd_df)\n\n\ndef test_use_column_names_with_offset_table_by_index_and_name() -> None:\n    \"\"\"Index-based selection should resolve correctly when used with an offset table.\n\n    The selected indices should be absolute, and it should be able to handle both index-based\n    and name-based selection.\n    \"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n\n    # Mix name-based and index-based selection\n    # \"Column at D5\" is at table index 0, absolute index 3\n    # Index 4 is absolute index for column E\n    table = excel_reader.load_table(\"TableAtD5\", use_columns=[\"Column at D5\", 4])  # type:ignore[arg-type]\n\n    expected_selected_columns = [\n        fastexcel.ColumnInfo(\n            name=\"Column at D5\",\n            index=0,\n            absolute_index=3,\n            dtype=\"float\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Column at E5\",\n            index=1,\n            absolute_index=4,\n            dtype=\"float\",\n            column_name_from=\"provided\",\n            dtype_from=\"guessed\",\n        ),\n    ]\n\n    assert table.selected_columns == expected_selected_columns\n\n    expected_pl_df = pl.DataFrame(\n        {\n            \"Column at D5\": [1.0, 2.0, 3.0, 4.0],\n            \"Column at E5\": [4.0, 5.0, 6.0, 8.0],\n        }\n    )\n    expected_pd_df = pd.DataFrame(\n        {\n            \"Column at D5\": [1.0, 2.0, 3.0, 4.0],\n            \"Column at E5\": [4.0, 5.0, 6.0, 8.0],\n        }\n    )\n\n    pl_df = table.to_polars()\n    pl_assert_frame_equal(pl_df, expected_pl_df)\n\n    pd_df = table.to_pandas()\n    pd_assert_frame_equal(pd_df, expected_pd_df)\n\n\ndef test_use_column_range_with_offset_with_table_and_specified_dtypes() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n\n    table_closed = excel_reader.load_table(\n        \"TableAtD5\", use_columns=\"D:E\", dtypes={3: \"int\", \"Column at E5\": \"string\"}\n    )\n\n    table_open_ended = excel_reader.load_table(\n        \"TableAtD5\", use_columns=\"D:\", dtypes={3: \"int\", \"Column at E5\": \"string\"}\n    )\n\n    expected_data = {\n        # Dtype should be int, looked up by index\n        \"Column at D5\": [1, 2, 3, 4],\n        # Dtype should be string, looked up by name\n        \"Column at E5\": [\"4\", \"5\", \"6\", \"8\"],\n    }\n    expected_column_info = [\n        fastexcel.ColumnInfo(\n            name=\"Column at D5\",\n            index=0,\n            absolute_index=3,\n            dtype=\"int\",\n            dtype_from=\"provided_by_index\",\n            column_name_from=\"provided\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Column at E5\",\n            index=1,\n            absolute_index=4,\n            dtype=\"string\",\n            dtype_from=\"provided_by_name\",\n            column_name_from=\"provided\",\n        ),\n    ]\n\n    assert table_closed.selected_columns == expected_column_info\n    assert table_open_ended.selected_columns == expected_column_info\n\n    expected_pl_df = pl.DataFrame(expected_data)\n    expected_pd_df = pd.DataFrame(expected_data)\n\n    pl_df_closed = table_closed.to_polars()\n    pl_assert_frame_equal(pl_df_closed, expected_pl_df)\n\n    pl_df_open_ended = table_open_ended.to_polars()\n    pl_assert_frame_equal(pl_df_open_ended, expected_pl_df)\n\n    pd_df_closed = table_closed.to_pandas()\n    pd_assert_frame_equal(pd_df_closed, expected_pd_df)\n\n    pd_df_open_ended = table_open_ended.to_pandas()\n    pd_assert_frame_equal(pd_df_open_ended, expected_pd_df)\n\n\ndef test_use_column_range_with_offset_with_sheet_and_specified_dtypes() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n\n    sheet_closed = excel_reader.load_sheet(\n        \"without-table\",\n        use_columns=\"H:K\",\n        header_row=9,\n        dtypes={7: \"int\", \"Column at I10\": \"string\"},\n    )\n\n    sheet_open_ended = excel_reader.load_sheet(\n        \"without-table\",\n        use_columns=\"H:\",\n        header_row=9,\n        dtypes={7: \"int\", \"Column at I10\": \"string\"},\n    )\n\n    expected_data_polars = {\n        # Dtype should be int, looked up by index\n        \"Column at H10\": [1, 2, 3],\n        # Dtype should be string, looked up by name\n        \"Column at I10\": [\"4\", \"5\", \"6\"],\n        \"__UNNAMED__2\": pl.Series([None, None, None], dtype=pl.String),\n        \"Column at K10\": [7.0, 8.0, 9.0],\n    }\n    # In pandas 3, string columns use nan instead of None for missing values\n    pd_version = tuple(int(x) for x in pd.__version__.split(\".\")[:2])\n    na_value = np.nan if pd_version >= (3, 0) else None\n\n    expected_data_pandas = {\n        # Dtype should be int, looked up by index\n        \"Column at H10\": [1, 2, 3],\n        # Dtype should be string, looked up by name\n        \"Column at I10\": [\"4\", \"5\", \"6\"],\n        \"__UNNAMED__2\": [na_value, na_value, na_value],\n        \"Column at K10\": [7.0, 8.0, 9.0],\n    }\n    expected_column_info = [\n        fastexcel.ColumnInfo(\n            name=\"Column at H10\",\n            index=0,\n            absolute_index=7,\n            dtype=\"int\",\n            dtype_from=\"provided_by_index\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Column at I10\",\n            index=1,\n            absolute_index=8,\n            dtype=\"string\",\n            dtype_from=\"provided_by_name\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__2\",\n            index=2,\n            absolute_index=9,\n            dtype=\"string\",\n            dtype_from=\"guessed\",\n            column_name_from=\"generated\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Column at K10\",\n            index=3,\n            absolute_index=10,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n    ]\n\n    assert sheet_closed.selected_columns == expected_column_info\n    assert sheet_open_ended.selected_columns == expected_column_info\n\n    expected_pl_df = pl.DataFrame(expected_data_polars)\n    expected_pd_df = pd.DataFrame(expected_data_pandas)\n\n    pl_df_closed = sheet_closed.to_polars()\n    pl_assert_frame_equal(pl_df_closed, expected_pl_df)\n\n    pl_df_open_ended = sheet_open_ended.to_polars()\n    pl_assert_frame_equal(pl_df_open_ended, expected_pl_df)\n\n    pd_df_closed = sheet_closed.to_pandas()\n    pd_assert_frame_equal(pd_df_closed, expected_pd_df, check_dtype=False)\n\n    pd_df_open_ended = sheet_open_ended.to_pandas()\n    pd_assert_frame_equal(pd_df_open_ended, expected_pd_df, check_dtype=False)\n"
  },
  {
    "path": "python/tests/test_defined_names.py",
    "content": "import fastexcel\nimport pytest\n\nfrom .utils import path_for_fixture\n\n\n@pytest.mark.parametrize(\"path\", (\"sheet-with-defined-names.xlsx\",))\ndef test_defined_names(path: str) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(path))\n    defined_names = excel_reader.defined_names()\n\n    expected_defined_names = [\n        fastexcel.DefinedName(name=\"AddingValues\", formula=\"SUM(sheet1!$K$5:$K$6)\"),\n        fastexcel.DefinedName(name=\"DefinedRange\", formula=\"sheet1!$A$5:$D$7\"),\n        fastexcel.DefinedName(name=\"NamedConstant\", formula=\"3.4\"),\n    ]\n\n    assert defined_names == expected_defined_names\n"
  },
  {
    "path": "python/tests/test_dtypes.py",
    "content": "from __future__ import annotations\n\nimport logging\nfrom datetime import date, datetime\nfrom typing import Any, Literal\n\nimport fastexcel\nimport numpy as np\nimport pandas as pd\nimport polars as pl\nimport pytest\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\n\nfrom .utils import get_expected_pandas_dtype, path_for_fixture\n\n\n@pytest.fixture\ndef expected_data() -> dict[str, list[Any]]:\n    return {\n        \"Employee ID\": [\n            \"123456\",\n            \"44333\",\n            \"44333\",\n            \"87878\",\n            \"87878\",\n            \"US00011\",\n            \"135967\",\n            \"IN86868\",\n            \"IN86868\",\n        ],\n        \"Employee Name\": [\n            \"Test1\",\n            \"Test2\",\n            \"Test2\",\n            \"Test3\",\n            \"Test3\",\n            \"Test4\",\n            \"Test5\",\n            \"Test6\",\n            \"Test6\",\n        ],\n        \"Date\": [datetime(2023, 7, 21)] * 9,\n        \"Details\": [\"Healthcare\"] * 7 + [\"Something\"] * 2,\n        \"Asset ID\": [\"84444\"] * 7 + [\"ABC123\"] * 2,\n        \"Mixed dates\": [\"2023-07-21 00:00:00\"] * 6 + [\"July 23rd\"] * 3,\n        \"Mixed bools\": [\"true\"] * 5 + [\"false\"] * 3 + [\"other\"],\n    }\n\n\ndef test_sheet_with_mixed_dtypes(expected_data: dict[str, list[Any]]) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n    sheet = excel_reader.load_sheet(0)\n\n    pd_df = sheet.to_pandas()\n    pd_assert_frame_equal(pd_df, pd.DataFrame(expected_data).astype({\"Date\": \"datetime64[ms]\"}))\n\n    pl_df = sheet.to_polars()\n    pl_assert_frame_equal(\n        pl_df, pl.DataFrame(expected_data, schema_overrides={\"Date\": pl.Datetime(time_unit=\"ms\")})\n    )\n\n\ndef test_sheet_with_mixed_dtypes_and_sample_rows(expected_data: dict[str, list[Any]]) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n\n    # Since we skip rows here, the dtypes should be correctly guessed, even if we only check 5 rows\n    sheet = excel_reader.load_sheet(0, schema_sample_rows=5, skip_rows=5)\n\n    expected_data_subset = {col_name: values[5:] for col_name, values in expected_data.items()}\n    pd_df = sheet.to_pandas()\n    pd_assert_frame_equal(\n        pd_df, pd.DataFrame(expected_data_subset).astype({\"Date\": \"datetime64[ms]\"})\n    )\n\n    pl_df = sheet.to_polars()\n    pl_assert_frame_equal(\n        pl_df,\n        pl.DataFrame(expected_data_subset, schema_overrides={\"Date\": pl.Datetime(time_unit=\"ms\")}),\n    )\n\n    # Guess the sheet's dtypes on 5 rows only\n    sheet = excel_reader.load_sheet(0, schema_sample_rows=5)\n    # String fields should not have been loaded\n    expected_data[\"Employee ID\"] = [\n        123456.0,\n        44333.0,\n        44333.0,\n        87878.0,\n        87878.0,\n        None,\n        135967.0,\n        None,\n        None,\n    ]\n    expected_data[\"Asset ID\"] = [84444.0] * 7 + [None] * 2\n    expected_data[\"Mixed dates\"] = [datetime(2023, 7, 21)] * 6 + [None] * 3\n    expected_data[\"Mixed bools\"] = [True] * 5 + [False] * 3 + [None]\n\n    pd_df = sheet.to_pandas()\n    pd_assert_frame_equal(\n        pd_df,\n        pd.DataFrame(expected_data).astype(\n            {\n                \"Date\": \"datetime64[ms]\",\n                \"Mixed dates\": \"datetime64[ms]\",\n            }\n        ),\n    )\n\n    pl_df = sheet.to_polars()\n    pl_assert_frame_equal(\n        pl_df,\n        pl.DataFrame(\n            expected_data,\n            schema_overrides={\n                \"Date\": pl.Datetime(time_unit=\"ms\"),\n                \"Mixed dates\": pl.Datetime(time_unit=\"ms\"),\n            },\n        ),\n    )\n\n\n@pytest.mark.parametrize(\"dtype_by_index\", (True, False))\n@pytest.mark.parametrize(\n    \"dtype,expected_data,expected_pl_dtype\",\n    [\n        (\"int\", [123456, 44333, 44333, 87878, 87878], pl.Int64),\n        (\"float\", [123456.0, 44333.0, 44333.0, 87878.0, 87878.0], pl.Float64),\n        (\"string\", [\"123456\", \"44333\", \"44333\", \"87878\", \"87878\"], pl.Utf8),\n        (\"boolean\", [True] * 5, pl.Boolean),\n        (\n            \"datetime\",\n            [datetime(2238, 1, 3)] + [datetime(2021, 5, 17)] * 2 + [datetime(2140, 8, 6)] * 2,\n            pl.Datetime,\n        ),\n        (\n            \"date\",\n            [date(2238, 1, 3)] + [date(2021, 5, 17)] * 2 + [date(2140, 8, 6)] * 2,\n            pl.Date,\n        ),\n        #  conversion to duration not supported yet\n        (\"duration\", [pd.NaT] * 5, pl.Duration),\n    ],\n)\ndef test_sheet_with_mixed_dtypes_specify_dtypes(\n    dtype_by_index: bool,\n    dtype: fastexcel.DType,\n    expected_data: list[Any],\n    expected_pl_dtype: pl.DataType,\n) -> None:\n    dtypes: fastexcel.DTypeMap = {0: dtype} if dtype_by_index else {\"Employee ID\": dtype}\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n    sheet = excel_reader.load_sheet(0, dtypes=dtypes, n_rows=5)\n    assert sheet.specified_dtypes == dtypes\n\n    pd_df = sheet.to_pandas()\n    expected_pd_dtype = get_expected_pandas_dtype(dtype)\n    assert pd_df[\"Employee ID\"].dtype == expected_pd_dtype\n    assert pd_df[\"Employee ID\"].to_list() == expected_data\n\n    pl_df = sheet.to_polars()\n    assert pl_df[\"Employee ID\"].dtype == expected_pl_dtype\n    assert pl_df[\"Employee ID\"].to_list() == (expected_data if dtype != \"duration\" else [None] * 5)\n\n\n@pytest.mark.parametrize(\n    \"dtypes,expected,fastexcel_dtype,expected_pl_dtype\",\n    [\n        (None, datetime(2023, 7, 21), \"datetime\", pl.Datetime),\n        ({\"Date\": \"datetime\"}, datetime(2023, 7, 21), \"datetime\", pl.Datetime),\n        ({\"Date\": \"date\"}, date(2023, 7, 21), \"date\", pl.Date),\n        ({\"Date\": \"string\"}, \"2023-07-21 00:00:00\", \"string\", pl.Utf8),\n        ({2: \"datetime\"}, datetime(2023, 7, 21), \"datetime\", pl.Datetime),\n        ({2: \"date\"}, date(2023, 7, 21), \"date\", pl.Date),\n        ({2: \"string\"}, \"2023-07-21 00:00:00\", \"string\", pl.Utf8),\n    ],\n)\ndef test_sheet_datetime_conversion(\n    dtypes: fastexcel.DTypeMap | None,\n    expected: Any,\n    fastexcel_dtype: str,\n    expected_pl_dtype: pl.DataType,\n) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n\n    sheet = excel_reader.load_sheet(0, dtypes=dtypes)\n    assert sheet.specified_dtypes == dtypes\n    pd_df = sheet.to_pandas()\n    expected_pd_dtype = get_expected_pandas_dtype(fastexcel_dtype)\n    assert pd_df[\"Date\"].dtype == expected_pd_dtype\n    assert pd_df[\"Date\"].to_list() == [expected] * 9\n\n    pl_df = sheet.to_polars()\n    assert pl_df[\"Date\"].dtype == expected_pl_dtype\n    assert pl_df[\"Date\"].to_list() == [expected] * 9\n\n\n@pytest.mark.parametrize(\"eager\", [True, False])\n@pytest.mark.parametrize(\"dtype_coercion\", [\"coerce\", None])\ndef test_dtype_coercion_behavior__coerce(\n    dtype_coercion: Literal[\"coerce\"] | None, eager: bool\n) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n\n    kwargs = {\"dtype_coercion\": dtype_coercion} if dtype_coercion else {}\n    sheet_or_rb = (\n        excel_reader.load_sheet(0, eager=eager, **kwargs)  # type:ignore[call-overload]\n    )\n    rb = sheet_or_rb if eager else sheet_or_rb.to_arrow()\n\n    pd_df = rb.to_pandas()\n    expected_pd_dtype = get_expected_pandas_dtype(\"string\")\n    assert pd_df[\"Mixed dates\"].dtype == expected_pd_dtype\n    assert pd_df[\"Mixed dates\"].to_list() == [\"2023-07-21 00:00:00\"] * 6 + [\"July 23rd\"] * 3\n\n    pl_df = pl.from_arrow(data=rb)\n    assert isinstance(pl_df, pl.DataFrame)\n    assert pl_df[\"Mixed dates\"].dtype == pl.Utf8\n    assert pl_df[\"Mixed dates\"].to_list() == [\"2023-07-21 00:00:00\"] * 6 + [\"July 23rd\"] * 3\n\n\n@pytest.mark.parametrize(\"eager\", [True, False])\ndef test_dtype_coercion_behavior__strict_sampling_eveything(eager: bool) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n\n    with pytest.raises(\n        fastexcel.UnsupportedColumnTypeCombinationError, match=\"type coercion is strict\"\n    ):\n        if eager:\n            excel_reader.load_sheet_eager(0, dtype_coercion=\"strict\")\n        else:\n            excel_reader.load_sheet(0, dtype_coercion=\"strict\").to_arrow()\n\n\n@pytest.mark.parametrize(\"eager\", [True, False])\ndef test_dtype_coercion_behavior__strict_sampling_limit(eager: bool) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n\n    sheet = (\n        excel_reader.load_sheet_eager(0, dtype_coercion=\"strict\", schema_sample_rows=5)\n        if eager\n        else excel_reader.load_sheet(0, dtype_coercion=\"strict\", schema_sample_rows=5).to_arrow()\n    )\n\n    pd_df = sheet.to_pandas()\n    assert pd_df[\"Mixed dates\"].dtype == \"datetime64[ms]\"\n    assert (\n        pd_df[\"Mixed dates\"].to_list() == [pd.Timestamp(\"2023-07-21 00:00:00\")] * 6 + [pd.NaT] * 3\n    )\n    assert pd_df[\"Asset ID\"].dtype == \"float64\"\n    assert pd_df[\"Asset ID\"].replace(np.nan, None).to_list() == [84444.0] * 7 + [None] * 2\n\n    pl_df = pl.from_arrow(data=sheet)\n    assert isinstance(pl_df, pl.DataFrame)\n    assert pl_df[\"Mixed dates\"].dtype == pl.Datetime\n    assert pl_df[\"Mixed dates\"].to_list() == [datetime(2023, 7, 21)] * 6 + [None] * 3\n    assert pl_df[\"Asset ID\"].dtype == pl.Float64\n    assert pl_df[\"Asset ID\"].to_list() == [84444.0] * 7 + [None] * 2\n\n\ndef test_one_dtype_for_all() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-dtypes-columns.xlsx\"))\n    sheet = excel_reader.load_sheet(0, dtypes=\"string\")\n    assert sheet.available_columns() == [\n        fastexcel.ColumnInfo(\n            name=\"Employee ID\",\n            index=0,\n            absolute_index=0,\n            dtype=\"string\",\n            dtype_from=\"provided_for_all\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Employee Name\",\n            index=1,\n            absolute_index=1,\n            dtype=\"string\",\n            dtype_from=\"provided_for_all\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Date\",\n            index=2,\n            absolute_index=2,\n            dtype=\"string\",\n            dtype_from=\"provided_for_all\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Details\",\n            index=3,\n            absolute_index=3,\n            dtype=\"string\",\n            dtype_from=\"provided_for_all\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Asset ID\",\n            index=4,\n            absolute_index=4,\n            dtype=\"string\",\n            dtype_from=\"provided_for_all\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Mixed dates\",\n            index=5,\n            absolute_index=5,\n            dtype=\"string\",\n            dtype_from=\"provided_for_all\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Mixed bools\",\n            index=6,\n            absolute_index=6,\n            dtype=\"string\",\n            dtype_from=\"provided_for_all\",\n            column_name_from=\"looked_up\",\n        ),\n    ]\n    assert sheet.to_polars().dtypes == [pl.String] * 7\n\n\ndef test_fallback_infer_dtypes(caplog: pytest.LogCaptureFixture) -> None:\n    \"\"\"it should fallback to string if it can't infer the dtype\"\"\"\n\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"infer-dtypes-fallback.xlsx\"))\n    sheet = excel_reader.load_sheet(0)\n\n    # Ensure a warning message was logged to explain the fallback to string\n    assert caplog.record_tuples == [\n        (\n            \"fastexcel.types.dtype\",\n            logging.WARNING,\n            \"Could not determine dtype for column 1, falling back to string\",\n        )\n    ]\n\n    assert sheet.available_columns() == [\n        fastexcel.ColumnInfo(\n            name=\"id\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"label\",\n            index=1,\n            absolute_index=1,\n            dtype=\"string\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n    ]\n    assert sheet.to_polars().dtypes == [pl.Float64, pl.String]\n\n\n@pytest.mark.parametrize(\n    (\"dtype\", \"expected_data\"),\n    [\n        (\n            \"int\",\n            [None] * 2\n            + [-1.0, 0.0, 1.0, 0.0, 1.0, 1.0, -1.0, 0.0, 1.0, None, 1.0, 0.0]\n            + [None] * 7\n            + [0.0],\n        ),\n        (\n            \"float\",\n            [None] * 2\n            + [-1.0, 0.0, 1.0, 0.0, 1.0, 1.1, -1.0, 0.0, 1.0, 1.1, 1.0, 0.0]\n            + [None] * 7\n            + [0.1],\n        ),\n        (\n            \"string\",\n            [\n                None,\n                \"foo\",\n                \"-1\",\n                \"0\",\n                \"1\",\n                \"0\",\n                \"1\",\n                \"1.1\",\n                \"-1\",\n                \"0\",\n                \"1\",\n                \"1.1\",\n                \"true\",\n                \"false\",\n                \"2023-07-21 00:00:00\",\n                \"2023-07-21 12:20:00\",\n                # calamine reads a time as datetimes here, which seems wrong\n                \"1899-12-31 12:20:00\",\n                \"07/21/2023\",\n                \"7/21/2023  12:20:00 PM\",\n                \"July 23rd\",\n                \"12:20:00\",\n                \"0.1\",\n            ],\n        ),\n        (\n            \"boolean\",\n            [None] * 2\n            + [True, False, True, False, True, True]\n            + [None] * 4\n            + [True, False]\n            + [None] * 7\n            + [True],\n        ),\n        (\n            \"datetime\",\n            [pd.NaT] * 2\n            + [\n                pd.Timestamp(\"1899-12-30 00:00:00\"),\n                pd.Timestamp(\"1899-12-31 00:00:00\"),\n                pd.Timestamp(\"1900-01-01 00:00:00\"),\n                pd.Timestamp(\"1899-12-31 00:00:00\"),\n                pd.Timestamp(\"1900-01-01 00:00:00\"),\n                pd.Timestamp(\"1900-01-01 02:24:00\"),\n            ]\n            + [pd.NaT] * 6\n            + [\n                pd.Timestamp(\"2023-7-21 00:00:00\"),\n                pd.Timestamp(\"2023-7-21 12:20:00\"),\n                # calamine currently adds a date to a time, which is\n                # questionable\n                pd.Timestamp(\"1899-12-31 12:20:00\"),\n            ]\n            + [pd.NaT] * 4\n            + [\n                # calamine converts percentages to datetimes (since it does not\n                # distinguish from floats), which seems questionable\n                pd.Timestamp(\"1899-12-31 02:24:00\")\n            ],\n        ),\n        (\n            \"date\",\n            [None] * 2\n            + [\n                pd.Timestamp(\"1899-12-30\").date(),\n                pd.Timestamp(\"1899-12-31\").date(),\n                pd.Timestamp(\"1900-01-01\").date(),\n                pd.Timestamp(\"1899-12-31\").date(),\n                pd.Timestamp(\"1900-01-01\").date(),\n                pd.Timestamp(\"1900-01-01\").date(),\n            ]\n            + [None] * 6\n            + [\n                pd.Timestamp(\"2023-7-21\").date(),\n                pd.Timestamp(\"2023-7-21\").date(),\n                # calamine converts any time to 1899-12-31, which is\n                # questionable\n                pd.Timestamp(\"1899-12-31\").date(),\n            ]\n            + [None] * 4\n            + [\n                # calamine converts percentages to dates (since it does not\n                # distinguish from floats), which seems questionable\n                pd.Timestamp(\"1899-12-31\").date()\n            ],\n        ),\n        (\n            \"duration\",\n            [pd.NaT] * 14\n            + [\n                # dates/datetimes are converted to durations, which seems\n                # questionable\n                pd.Timedelta(datetime(2023, 7, 21 + 1) - datetime(1899, 12, 31)),\n                pd.Timedelta(datetime(2023, 7, 21 + 1, 12, 20, 0) - datetime(1899, 12, 31)),\n                pd.Timedelta(hours=12, minutes=20),\n            ]\n            + [pd.NaT] * 5,\n        ),\n    ],\n)\ndef test_to_arrow_with_errors(\n    dtype: fastexcel.DType,\n    expected_data: list[Any],\n):\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-type-errors.xlsx\"))\n    rb, cell_errors = excel_reader.load_sheet(0, dtypes={\"Column\": dtype}).to_arrow_with_errors()\n\n    pd_df = rb.to_pandas()\n    # For string columns in pandas 3, replace pd.NA with None for comparison\n    if dtype == \"string\":\n        column_values = pd_df[\"Column\"].replace([np.nan, pd.NA], None).to_list()\n    else:\n        column_values = pd_df[\"Column\"].replace(np.nan, None).to_list()\n    assert column_values == expected_data\n\n    def item_to_polars(item: Any):\n        if isinstance(item, pd.Timestamp):\n            return item.to_pydatetime()\n        if pd.isna(item):\n            return None\n        return item\n\n    pl_df = pl.from_arrow(rb)\n    assert isinstance(pl_df, pl.DataFrame)\n    pl_expected_data = list(map(item_to_polars, expected_data))\n    assert pl_df[\"Column\"].to_list() == pl_expected_data\n\n    # the only empty cell is (0, 0), so all other cells that were read as None\n    # should be errors\n    expected_error_positions = [\n        (i, 0) for i in range(1, len(expected_data)) if expected_data[i] in {None, pd.NaT}\n    ]\n    if expected_error_positions:\n        assert cell_errors is not None\n        error_positions = [err.offset_position for err in cell_errors.errors]\n        assert error_positions == expected_error_positions\n\n\ndef test_guess_dtypes_with_div0_error() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"div0.xlsx\"))\n    sheet = excel_reader.load_sheet(0)\n    assert sheet.available_columns() == [\n        fastexcel.ColumnInfo(\n            name=\"dividend\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"divisor\",\n            index=1,\n            absolute_index=1,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"quotient\",\n            index=2,\n            absolute_index=2,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n    ]\n    expected_data = {\n        \"dividend\": [42.0, 43.0, 44.0, 45.0],\n        \"divisor\": [0.0, 1.0, 2.0, 3.0],\n        \"quotient\": [None, 43.0, 22.0, 15.0],\n    }\n\n    pd_df = sheet.to_pandas()\n    pd_expected_data = pd.DataFrame(expected_data)\n    pd_assert_frame_equal(pd_df, pd_expected_data)\n\n    pl_df = sheet.to_polars()\n    pl_expected_data = pl.DataFrame(expected_data)\n    pl_assert_frame_equal(pl_df, pl_expected_data)\n"
  },
  {
    "path": "python/tests/test_durations.py",
    "content": "from __future__ import annotations\n\nfrom datetime import date, datetime, timedelta\n\nimport fastexcel\nimport numpy as np\nimport pandas as pd\nimport polars as pl\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.datatypes import DataType as PolarsDataType\nfrom polars.datatypes import Date as PlDate\nfrom polars.datatypes import Datetime as PlDateTime\nfrom polars.datatypes import Duration as PlDuration\nfrom polars.datatypes import Utf8 as PlUtf8\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\n\nfrom .utils import get_expected_pandas_dtype, path_for_fixture\n\n\ndef test_sheet_with_different_time_types() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"dates.ods\"))\n    sheet = excel_reader.load_sheet_by_idx(0)\n\n    pd_df = sheet.to_pandas()\n    pl_df = sheet.to_polars()\n\n    ## dtypes\n    assert pd_df[\"date\"].dtype == np.dtype(\"object\")\n    assert pd_df[\"datestr\"].dtype == get_expected_pandas_dtype(\"string\")\n    assert pd_df[\"time\"].dtype == np.dtype(\"timedelta64[ms]\")\n    assert pd_df[\"datetime\"].dtype == np.dtype(\"datetime64[ms]\")\n    expected_pl_dtypes: dict[str, PolarsDataType] = {\n        \"date\": PlDate(),\n        \"datestr\": PlUtf8(),\n        \"time\": PlDuration(time_unit=\"ms\"),\n        \"datetime\": PlDateTime(time_unit=\"ms\", time_zone=None),\n    }\n    assert dict(zip(pl_df.columns, pl_df.dtypes)) == expected_pl_dtypes\n\n    ## Contents\n\n    expected_pd = pd.DataFrame(\n        {\n            \"date\": [date(2023, 6, 1)],\n            \"datestr\": [\"2023-06-01T02:03:04+02:00\"],\n            \"time\": pd.Series([pd.to_timedelta(\"01:02:03\")]).astype(\"timedelta64[ms]\"),\n            \"datetime\": pd.Series([pd.to_datetime(\"2023-06-01 02:03:04\")]).astype(\"datetime64[ms]\"),\n        }\n    )\n    expected_pl = pl.DataFrame(\n        {\n            \"date\": [date(2023, 6, 1)],\n            \"datestr\": [\"2023-06-01T02:03:04+02:00\"],\n            \"time\": [timedelta(hours=1, minutes=2, seconds=3)],\n            \"datetime\": [datetime(2023, 6, 1, 2, 3, 4)],\n        },\n        schema=expected_pl_dtypes,\n    )\n    pd_assert_frame_equal(pd_df, expected_pd)\n    pl_assert_frame_equal(pl_df, expected_pl)\n\n\ndef test_sheet_with_offset_header_row_and_durations() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"single-sheet-skip-rows-durations.xlsx\"))\n    sheet = excel_reader.load_sheet(0, header_row=10)\n\n    pd_df = sheet.to_pandas()\n    pl_df = sheet.to_polars()\n\n    assert pd_df[\"Tot. Time Away From System\"].dtype == np.dtype(\"timedelta64[ms]\")\n    assert pd_df[\"Tot. Time Away From System\"].tolist() == [\n        pd.Timedelta(\"01:18:43\"),\n        pd.Timedelta(\"07:16:51\"),\n    ]\n\n    assert pl_df[\"Tot. Time Away From System\"].dtype == pl.Duration(time_unit=\"ms\")\n    assert pl_df[\"Tot. Time Away From System\"].to_list() == [\n        timedelta(hours=1, minutes=18, seconds=43),\n        timedelta(hours=7, minutes=16, seconds=51),\n    ]\n"
  },
  {
    "path": "python/tests/test_eagerness.py",
    "content": "from datetime import date, datetime, timedelta\n\nimport fastexcel\nimport polars as pl\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\nfrom pyarrow import RecordBatch\n\nfrom .utils import path_for_fixture\n\n\ndef test_load_sheet_eager_single_sheet() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n\n    eager_pandas = excel_reader.load_sheet_eager(0).to_pandas()\n    lazy_pandas = excel_reader.load_sheet(0).to_pandas()\n    pd_assert_frame_equal(eager_pandas, lazy_pandas)\n\n    eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager(0))\n    assert isinstance(eager_polars, pl.DataFrame)\n    lazy_polars = excel_reader.load_sheet(0).to_polars()\n    pl_assert_frame_equal(eager_polars, lazy_polars)\n\n\ndef test_multiple_sheets_with_unnamed_columns():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-sheet.xlsx\"))\n\n    eager_pandas = excel_reader.load_sheet_eager(\"With unnamed columns\").to_pandas()\n    lazy_pandas = excel_reader.load_sheet(\"With unnamed columns\").to_pandas()\n    pd_assert_frame_equal(eager_pandas, lazy_pandas)\n\n    eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager(\"With unnamed columns\"))\n    assert isinstance(eager_polars, pl.DataFrame)\n    lazy_polars = excel_reader.load_sheet(\"With unnamed columns\").to_polars()\n    pl_assert_frame_equal(eager_polars, lazy_polars)\n\n\ndef test_eager_with_an_ods_file_should_return_a_recordbatch() -> None:\n    ods_reader = fastexcel.read_excel(path_for_fixture(\"dates.ods\"))\n\n    record_batch = ods_reader.load_sheet_eager(0)\n    assert isinstance(record_batch, RecordBatch)\n    pl_df = pl.from_arrow(record_batch)\n    assert isinstance(pl_df, pl.DataFrame)\n    pl_assert_frame_equal(\n        pl_df,\n        pl.DataFrame(\n            {\n                \"date\": [date(2023, 6, 1)],\n                \"datestr\": [\"2023-06-01T02:03:04+02:00\"],\n                \"time\": [timedelta(hours=1, minutes=2, seconds=3)],\n                \"datetime\": [datetime(2023, 6, 1, 2, 3, 4)],\n            }\n        ).with_columns(*(pl.col(col).dt.cast_time_unit(\"ms\") for col in (\"datetime\", \"time\"))),\n    )\n"
  },
  {
    "path": "python/tests/test_empty.py",
    "content": "import fastexcel\nimport pytest\n\nfrom .utils import path_for_fixture\n\n\n@pytest.mark.parametrize(\"path\", (\"empty.ods\", \"empty.xlsx\"))\ndef test_empty(path: str) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(path))\n    sheet = excel_reader.load_sheet_by_idx(0)\n\n    assert sheet.to_pandas().empty\n    assert sheet.to_polars().is_empty()\n"
  },
  {
    "path": "python/tests/test_errors.py",
    "content": "from __future__ import annotations\n\nimport fastexcel\nimport pytest\n\nfrom .utils import path_for_fixture\n\n\ndef test_cell_error_repr() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-type-errors.xlsx\"))\n    _, cell_errors = excel_reader.load_sheet(0, dtypes={\"Column\": \"int\"}).to_arrow_with_errors()\n    assert cell_errors is not None\n    assert (\n        repr(cell_errors.errors[0])\n        == \"\"\"CellError(position=(2, 0), offset_position=(1, 0), row_offset=1, detail=\"Expected int but got 'String(\\\\\"foo\\\\\")'\")\"\"\"  # noqa: E501\n    )\n\n\ndef test_read_excel_bad_type() -> None:\n    expected_message = \"source must be a string or bytes\"\n    with pytest.raises(fastexcel.InvalidParametersError, match=expected_message):\n        fastexcel.read_excel(42)  # type: ignore[arg-type]\n\n\ndef test_does_not_exist() -> None:\n    expected_message = \"\"\"calamine error: Cannot detect file format\nContext:\n    0: Could not open workbook at path_does_not_exist.nope\n    1: could not load excel file at path_does_not_exist.nope\"\"\"\n\n    with pytest.raises(fastexcel.CalamineError, match=expected_message) as exc_info:\n        fastexcel.read_excel(\"path_does_not_exist.nope\")\n\n    assert exc_info.value.__doc__ == \"Generic calamine error\"\n\n    # Should also work with the base error type\n    with pytest.raises(fastexcel.FastExcelError, match=expected_message):\n        fastexcel.read_excel(\"path_does_not_exist.nope\")\n\n\ndef test_sheet_idx_not_found_error() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    expected_message = \"\"\"sheet at index 42 not found\nContext:\n    0: Sheet index 42 is out of range. File has 1 sheets.\"\"\"\n\n    with pytest.raises(fastexcel.SheetNotFoundError, match=expected_message) as exc_info:\n        excel_reader.load_sheet(42)\n\n    assert exc_info.value.__doc__ == \"Sheet was not found\"\n\n    # Should also work with the base error type\n    with pytest.raises(fastexcel.FastExcelError, match=expected_message):\n        excel_reader.load_sheet(42)\n\n\ndef test_sheet_name_not_found_error() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    expected_message = \"\"\"sheet with name \"idontexist\" not found\nContext:\n    0: Sheet \"idontexist\" not found in file. Available sheets: \"January\".\"\"\"\n\n    with pytest.raises(fastexcel.SheetNotFoundError, match=expected_message) as exc_info:\n        excel_reader.load_sheet(\"idontexist\")\n\n    assert exc_info.value.__doc__ == \"Sheet was not found\"\n\n\n@pytest.mark.parametrize(\n    \"exc_class, expected_docstring\",\n    [\n        (fastexcel.FastExcelError, \"The base class for all fastexcel errors\"),\n        (\n            fastexcel.UnsupportedColumnTypeCombinationError,\n            \"Column contains an unsupported type combination\",\n        ),\n        (fastexcel.CannotRetrieveCellDataError, \"Data for a given cell cannot be retrieved\"),\n        (\n            fastexcel.CalamineCellError,\n            \"calamine returned an error regarding the content of the cell\",\n        ),\n        (fastexcel.CalamineError, \"Generic calamine error\"),\n        (fastexcel.ColumnNotFoundError, \"Column was not found\"),\n        (fastexcel.SheetNotFoundError, \"Sheet was not found\"),\n        (fastexcel.ArrowError, \"Generic arrow error\"),\n        (fastexcel.InvalidParametersError, \"Provided parameters are invalid\"),\n    ],\n)\ndef test_docstrings(exc_class: type[Exception], expected_docstring: str) -> None:\n    assert exc_class.__doc__ == expected_docstring\n\n\ndef test_schema_sample_rows_must_be_nonzero() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n\n    with pytest.raises(\n        fastexcel.InvalidParametersError,\n        match=\"schema_sample_rows cannot be 0, as it would prevent dtype inferring\",\n    ):\n        excel_reader.load_sheet(0, schema_sample_rows=0)\n\n    with pytest.raises(\n        fastexcel.InvalidParametersError,\n        match=\"schema_sample_rows cannot be 0, as it would prevent dtype inferring\",\n    ):\n        excel_reader.load_table(\"my-table\", schema_sample_rows=0)\n"
  },
  {
    "path": "python/tests/test_fastexcel.py",
    "content": "from __future__ import annotations\n\nfrom datetime import datetime\nfrom typing import Any\n\nimport fastexcel\nimport pandas as pd\nimport polars as pl\nimport pytest\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\n\nfrom .utils import path_for_fixture\n\n\ndef test_single_sheet():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    assert excel_reader.sheet_names == [\"January\"]\n    sheet_by_name = excel_reader.load_sheet(\"January\")\n    sheet_by_idx = excel_reader.load_sheet(0)\n\n    # Metadata\n    assert sheet_by_name.name == sheet_by_idx.name == \"January\"\n    assert sheet_by_name.height == sheet_by_idx.height == 2\n    assert sheet_by_name.width == sheet_by_idx.width == 2\n\n    expected = {\"Month\": [1.0, 2.0], \"Year\": [2019.0, 2020.0]}\n\n    pd_expected = pd.DataFrame(expected)\n    pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)\n    pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)\n\n    pl_expected = pl.DataFrame(expected)\n    pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)\n    pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)\n\n\ndef test_single_sheet_bytes():\n    with open(path_for_fixture(\"fixture-single-sheet.xlsx\"), \"rb\") as f:\n        excel_reader = fastexcel.read_excel(f.read())\n    assert excel_reader.sheet_names == [\"January\"]\n    sheet_by_name = excel_reader.load_sheet(\"January\")\n    sheet_by_idx = excel_reader.load_sheet(0)\n\n    # Metadata\n    assert sheet_by_name.name == sheet_by_idx.name == \"January\"\n    assert sheet_by_name.height == sheet_by_idx.height == 2\n    assert sheet_by_name.width == sheet_by_idx.width == 2\n\n    expected = {\"Month\": [1.0, 2.0], \"Year\": [2019.0, 2020.0]}\n\n    pd_expected = pd.DataFrame(expected)\n    pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)\n    pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)\n\n    pl_expected = pl.DataFrame(expected)\n    pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)\n    pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)\n\n\ndef test_single_sheet_with_types():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\"]\n\n    sheet = excel_reader.load_sheet(0)\n    assert sheet.name == \"Sheet1\"\n    assert sheet.height == sheet.total_height == 3\n    assert sheet.width == 4\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"__UNNAMED__0\": [0.0, 1.0, 2.0],\n                \"bools\": [True, False, True],\n                \"dates\": pd.Series([pd.Timestamp(\"2022-03-02 05:43:04\")] * 3).astype(\n                    \"datetime64[ms]\"\n                ),\n                \"floats\": [12.35, 42.69, 1234567],\n            }\n        ),\n    )\n\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"__UNNAMED__0\": [0.0, 1.0, 2.0],\n                \"bools\": [True, False, True],\n                \"dates\": [\"2022-03-02 05:43:04\"] * 3,\n                \"floats\": [12.35, 42.69, 1234567],\n            }\n        ).with_columns(pl.col(\"dates\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")),\n    )\n\n\ndef test_multiple_sheets():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-multi-sheet.xlsx\"))\n    assert excel_reader.sheet_names == [\"January\", \"February\", \"With unnamed columns\"]\n\n    pd_assert_frame_equal(\n        excel_reader.load_sheet_by_idx(0).to_pandas(),\n        pd.DataFrame({\"Month\": [1.0], \"Year\": [2019.0]}),\n    )\n    pd_assert_frame_equal(\n        excel_reader.load_sheet_by_idx(1).to_pandas(),\n        pd.DataFrame({\"Month\": [2.0, 3.0, 4.0], \"Year\": [2019.0, 2021.0, 2022.0]}),\n    )\n    pd_assert_frame_equal(\n        excel_reader.load_sheet_by_name(\"With unnamed columns\").to_pandas(),\n        pd.DataFrame(\n            {\n                \"col1\": [2.0, 3.0],\n                \"__UNNAMED__1\": [1.5, 2.5],\n                \"col3\": [\"hello\", \"world\"],\n                \"__UNNAMED__3\": [-5.0, -6.0],\n                \"col5\": [\"a\", \"b\"],\n            }\n        ),\n    )\n\n    pl_assert_frame_equal(\n        excel_reader.load_sheet_by_idx(0).to_polars(),\n        pl.DataFrame({\"Month\": [1.0], \"Year\": [2019.0]}),\n    )\n    pl_assert_frame_equal(\n        excel_reader.load_sheet_by_idx(1).to_polars(),\n        pl.DataFrame({\"Month\": [2.0, 3.0, 4.0], \"Year\": [2019.0, 2021.0, 2022.0]}),\n    )\n    pl_assert_frame_equal(\n        excel_reader.load_sheet_by_name(\"With unnamed columns\").to_polars(),\n        pl.DataFrame(\n            {\n                \"col1\": [2.0, 3.0],\n                \"__UNNAMED__1\": [1.5, 2.5],\n                \"col3\": [\"hello\", \"world\"],\n                \"__UNNAMED__3\": [-5.0, -6.0],\n                \"col5\": [\"a\", \"b\"],\n            }\n        ),\n    )\n\n\ndef test_sheets_with_header_line_diff_from_zero():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-changing-header-location.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\", \"Sheet2\", \"Sheet3\"]\n    sheet_by_name = excel_reader.load_sheet(\"Sheet1\", header_row=1)\n    sheet_by_idx = excel_reader.load_sheet(0, header_row=1)\n\n    # Metadata\n    assert sheet_by_name.name == sheet_by_idx.name == \"Sheet1\"\n    assert sheet_by_name.height == sheet_by_idx.height == 2\n    assert sheet_by_name.width == sheet_by_idx.width == 2\n\n    expected = {\"Month\": [1.0, 2.0], \"Year\": [2019.0, 2020.0]}\n\n    pd_expected = pd.DataFrame(expected)\n    pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)\n    pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)\n\n    pl_expected = pl.DataFrame(expected)\n    pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)\n    pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)\n\n\ndef test_sheets_with_no_header():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-changing-header-location.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\", \"Sheet2\", \"Sheet3\"]\n    sheet_by_name = excel_reader.load_sheet(\"Sheet2\", header_row=None)\n    sheet_by_idx = excel_reader.load_sheet(1, header_row=None)\n\n    # Metadata\n    assert sheet_by_name.name == sheet_by_idx.name == \"Sheet2\"\n    assert sheet_by_name.height == sheet_by_idx.height == 2\n    assert sheet_by_name.width == sheet_by_idx.width == 3\n\n    expected = {\n        \"__UNNAMED__0\": [1.0, 2.0],\n        \"__UNNAMED__1\": [3.0, 4.0],\n        \"__UNNAMED__2\": [5.0, 6.0],\n    }\n\n    pd_expected = pd.DataFrame(expected)\n    pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)\n    pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)\n\n    pl_expected = pl.DataFrame(expected)\n    pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)\n    pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)\n\n\ndef test_sheets_with_empty_rows_before_header():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-changing-header-location.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\", \"Sheet2\", \"Sheet3\"]\n    sheet_by_name = excel_reader.load_sheet(\"Sheet3\")\n    sheet_by_idx = excel_reader.load_sheet(2)\n\n    # Metadata\n    assert sheet_by_name.name == sheet_by_idx.name == \"Sheet3\"\n    assert sheet_by_name.height == sheet_by_idx.height == 2\n    assert sheet_by_name.width == sheet_by_idx.width == 2\n\n    expected = {\"Month\": [1.0, 2.0], \"Year\": [2019.0, 2020.0]}\n\n    pd_expected = pd.DataFrame(expected)\n    pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)\n    pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)\n\n    pl_expected = pl.DataFrame(expected)\n    pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)\n    pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)\n\n\ndef test_sheets_with_custom_headers():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-changing-header-location.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\", \"Sheet2\", \"Sheet3\"]\n    sheet_by_name = excel_reader.load_sheet(\n        \"Sheet2\", header_row=None, column_names=[\"foo\", \"bar\", \"baz\"]\n    )\n    sheet_by_idx = excel_reader.load_sheet(1, header_row=None, column_names=[\"foo\", \"bar\", \"baz\"])\n\n    # Metadata\n    assert sheet_by_name.name == sheet_by_idx.name == \"Sheet2\"\n    assert sheet_by_name.height == sheet_by_idx.height == 2\n    assert sheet_by_name.width == sheet_by_idx.width == 3\n\n    expected = {\"foo\": [1.0, 2.0], \"bar\": [3.0, 4.0], \"baz\": [5.0, 6.0]}\n\n    pd_expected = pd.DataFrame(expected)\n    pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)\n    pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)\n\n    pl_expected = pl.DataFrame(expected)\n    pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)\n    pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)\n\n\ndef test_sheets_with_skipping_headers():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-changing-header-location.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\", \"Sheet2\", \"Sheet3\"]\n    sheet_by_name = excel_reader.load_sheet(\"Sheet2\", header_row=None, column_names=[\"Bugs\"])\n    sheet_by_idx = excel_reader.load_sheet(1, header_row=None, column_names=[\"Bugs\"])\n\n    # Metadata\n    assert sheet_by_name.name == sheet_by_idx.name == \"Sheet2\"\n    assert sheet_by_name.height == sheet_by_idx.height == 2\n    assert sheet_by_name.width == sheet_by_idx.width == 3\n\n    expected = {\n        \"Bugs\": [1.0, 2.0],\n        \"__UNNAMED__1\": [3.0, 4.0],\n        \"__UNNAMED__2\": [5.0, 6.0],\n    }\n\n    pd_expected = pd.DataFrame(expected)\n    pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)\n    pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)\n\n    pl_expected = pl.DataFrame(expected)\n    pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)\n    pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)\n\n\ndef test_sheet_with_pagination():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\"]\n\n    sheet = excel_reader.load_sheet(0, skip_rows=1, n_rows=1)\n    assert sheet.name == \"Sheet1\"\n    assert sheet.height == 1\n    assert sheet.total_height == 3\n    assert sheet.width == 4\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"__UNNAMED__0\": [1.0],\n                \"bools\": [False],\n                \"dates\": pd.Series([pd.Timestamp(\"2022-03-02 05:43:04\")]).astype(\"datetime64[ms]\"),\n                \"floats\": [42.69],\n            }\n        ),\n    )\n\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"__UNNAMED__0\": [1.0],\n                \"bools\": [False],\n                \"dates\": [\"2022-03-02 05:43:04\"],\n                \"floats\": [42.69],\n            }\n        ).with_columns(pl.col(\"dates\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")),\n    )\n\n\ndef test_sheet_with_skip_rows():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\"]\n\n    sheet = excel_reader.load_sheet(0, skip_rows=1)\n    assert sheet.name == \"Sheet1\"\n    assert sheet.height == 2\n    assert sheet.width == 4\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"__UNNAMED__0\": [1.0, 2.0],\n                \"bools\": [False, True],\n                \"dates\": pd.Series([pd.Timestamp(\"2022-03-02 05:43:04\")] * 2).astype(\n                    \"datetime64[ms]\"\n                ),\n                \"floats\": [42.69, 1234567],\n            }\n        ),\n    )\n\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"__UNNAMED__0\": [1.0, 2.0],\n                \"bools\": [False, True],\n                \"dates\": [\"2022-03-02 05:43:04\"] * 2,\n                \"floats\": [42.69, 1234567],\n            }\n        ).with_columns(pl.col(\"dates\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")),\n    )\n\n\ndef test_sheet_with_n_rows():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\"]\n\n    sheet = excel_reader.load_sheet(0, n_rows=1)\n    assert sheet.name == \"Sheet1\"\n    assert sheet.height == 1\n    assert sheet.width == 4\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"__UNNAMED__0\": [0.0],\n                \"bools\": [True],\n                \"dates\": pd.Series([pd.Timestamp(\"2022-03-02 05:43:04\")]).astype(\"datetime64[ms]\"),\n                \"floats\": [12.35],\n            }\n        ),\n    )\n\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"__UNNAMED__0\": [0.0],\n                \"bools\": [True],\n                \"dates\": [\"2022-03-02 05:43:04\"],\n                \"floats\": [12.35],\n            }\n        ).with_columns(pl.col(\"dates\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")),\n    )\n\n\ndef test_sheet_with_pagination_and_without_headers():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\"]\n\n    sheet = excel_reader.load_sheet(\n        0,\n        n_rows=1,\n        skip_rows=1,\n        header_row=None,\n        column_names=[\"This\", \"Is\", \"Amazing\", \"Stuff\"],\n    )\n    assert sheet.name == \"Sheet1\"\n    assert sheet.height == 1\n    assert sheet.width == 4\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"This\": [0.0],\n                \"Is\": [True],\n                \"Amazing\": pd.Series([pd.Timestamp(\"2022-03-02 05:43:04\")]).astype(\n                    \"datetime64[ms]\"\n                ),\n                \"Stuff\": [12.35],\n            }\n        ),\n    )\n\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"This\": [0.0],\n                \"Is\": [True],\n                \"Amazing\": [\"2022-03-02 05:43:04\"],\n                \"Stuff\": [12.35],\n            }\n        ).with_columns(\n            pl.col(\"Amazing\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")\n        ),\n    )\n\n\ndef test_sheet_with_pagination_out_of_bound():\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n    assert excel_reader.sheet_names == [\"Sheet1\"]\n\n    with pytest.raises(\n        fastexcel.InvalidParametersError, match=\"Too many rows skipped. Max height is 4\"\n    ):\n        excel_reader.load_sheet(\n            0,\n            skip_rows=1000000,\n            header_row=None,\n            column_names=[\"This\", \"Is\", \"Amazing\", \"Stuff\"],\n        )\n\n    sheet = excel_reader.load_sheet(\n        0,\n        n_rows=1000000,\n        skip_rows=1,\n        header_row=None,\n        column_names=[\"This\", \"Is\", \"Amazing\", \"Stuff\"],\n    )\n    assert sheet.name == \"Sheet1\"\n    assert sheet.height == 3\n    assert sheet.width == 4\n\n    pd_assert_frame_equal(\n        sheet.to_pandas(),\n        pd.DataFrame(\n            {\n                \"This\": [0.0, 1.0, 2.0],\n                \"Is\": [True, False, True],\n                \"Amazing\": pd.Series([pd.Timestamp(\"2022-03-02 05:43:04\")] * 3).astype(\n                    \"datetime64[ms]\"\n                ),\n                \"Stuff\": [12.35, 42.69, 1234567],\n            }\n        ),\n    )\n\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(\n            {\n                \"This\": [0.0, 1.0, 2.0],\n                \"Is\": [True, False, True],\n                \"Amazing\": [\"2022-03-02 05:43:04\"] * 3,\n                \"Stuff\": [12.35, 42.69, 1234567],\n            }\n        ).with_columns(\n            pl.col(\"Amazing\").str.strptime(pl.Datetime, \"%F %T\").dt.cast_time_unit(\"ms\")\n        ),\n    )\n\n\ndef test_sheet_with_na():\n    \"\"\"Test reading a sheet with #N/A cells. For now, we consider them as null\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-with-na.xlsx\"))\n    sheet = excel_reader.load_sheet(0)\n\n    assert sheet.name == \"Sheet1\"\n    assert sheet.height == sheet.total_height == 2\n    assert sheet.width == 2\n\n    expected = {\n        \"Title\": [\"A\", \"B\"],\n        \"Amount\": [None, 100.0],\n    }\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_sheet_with_ref():\n    \"\"\"Test reading a sheet with #REF! cells. For now, we consider them as null\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-with-na.xlsx\"))\n    sheet = excel_reader.load_sheet(\"Broken refs\")\n\n    assert sheet.name == \"Broken refs\"\n    assert sheet.height == sheet.total_height == 2\n    assert sheet.width == 1\n\n    expected = {\"numbers\": [1.0, None]}\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\n@pytest.mark.parametrize(\"excel_file\", [\"sheet-null-strings.xlsx\", \"sheet-null-strings-empty.xlsx\"])\ndef test_null_strings(excel_file: str, expected_data_sheet_null_strings: dict[str, list[Any]]):\n    excel_reader = fastexcel.read_excel(path_for_fixture(excel_file))\n    sheet = excel_reader.load_sheet(0)\n\n    assert sheet.height == sheet.total_height == 10\n    assert sheet.width == 6\n\n    pd_df = pd.DataFrame(expected_data_sheet_null_strings)\n    pd_df[\"DATES_AND_NULLS\"] = pd_df[\"DATES_AND_NULLS\"].dt.as_unit(\"ms\")\n    pd_df[\"TIMESTAMPS_AND_NULLS\"] = pd_df[\"TIMESTAMPS_AND_NULLS\"].dt.as_unit(\"ms\")\n    pd_assert_frame_equal(sheet.to_pandas(), pd_df)\n\n    pl_df = pl.DataFrame(expected_data_sheet_null_strings).with_columns(\n        pl.col(\"DATES_AND_NULLS\").dt.cast_time_unit(\"ms\"),\n        pl.col(\"TIMESTAMPS_AND_NULLS\").dt.cast_time_unit(\"ms\"),\n    )\n    pl_assert_frame_equal(sheet.to_polars(), pl_df)\n\n\ndef test_null_values_in_cells() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-invalid-cell-value.xlsx\"))\n    sheet = excel_reader.load_sheet(0)\n\n    expected = {\n        \"Title\": [\"A\", \"B\", \"C\", \"D\"],\n        \"Date\": [None, None, datetime(2021, 1, 1), datetime(2021, 5, 5)],\n    }\n\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame(expected).with_columns(pl.col(\"Date\").dt.cast_time_unit(\"ms\")),\n    )\n    pd_expected = pd.DataFrame(expected)\n    pd_expected[\"Date\"] = pd_expected[\"Date\"].dt.as_unit(\"ms\")\n    pd_assert_frame_equal(sheet.to_pandas(), pd_expected)\n\n\ndef test_invalid_value_num() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-invalid-cell-value-num.xlsx\"))\n    sheet = excel_reader.load_sheet(0)\n\n    expected = {\"Column\": [8.0, None]}\n    pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))\n    pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))\n\n\ndef test_null_column_is_nullable() -> None:\n    sheet = fastexcel.read_excel(path_for_fixture(\"null-column.xlsx\")).load_sheet(0)\n    assert sheet.to_arrow().schema.field(\"nullonly\").nullable is True\n\n\ndef test_sheet_with_decimal_numbers() -> None:\n    sheet = fastexcel.read_excel(path_for_fixture(\"decimal-numbers.xlsx\")).load_sheet(0)\n    pl_assert_frame_equal(\n        sheet.to_polars(),\n        pl.DataFrame({\"Decimals\": [28.14, 29.02]}),\n    )\n\n    sheet2 = fastexcel.read_excel(path_for_fixture(\"decimal-numbers.xlsx\")).load_sheet(\n        0, dtypes={0: \"string\"}\n    )\n    pl_assert_frame_equal(\n        sheet2.to_polars(),\n        pl.DataFrame({\"Decimals\": [\"28.14\", \"29.02\"]}),\n    )\n\n\n@pytest.mark.parametrize(\n    \"header_row, skip_rows, expected\",\n    [\n        (0, None, {\"a\": [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\": [1.0, 2.0, 3.0, 4.0, 5.0]}),  # default\n        (\n            None,\n            0,\n            {\n                \"__UNNAMED__0\": [None, None, \"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n                \"__UNNAMED__1\": [None, None, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0],\n            },\n        ),\n        (\n            None,\n            None,\n            {\n                \"__UNNAMED__0\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n                \"__UNNAMED__1\": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],\n            },\n        ),\n        (\n            0,\n            0,\n            {\n                \"__UNNAMED__0\": [None, \"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n                \"__UNNAMED__1\": [None, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0],\n            },\n        ),\n        (\n            0,\n            1,\n            {\n                \"__UNNAMED__0\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n                \"__UNNAMED__1\": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],\n            },\n        ),\n        (\n            None,\n            2,\n            {\n                \"__UNNAMED__0\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n                \"__UNNAMED__1\": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],\n            },\n        ),\n        (\n            None,\n            3,\n            {\"__UNNAMED__0\": [\"b\", \"c\", \"d\", \"e\", \"f\"], \"__UNNAMED__1\": [1.0, 2.0, 3.0, 4.0, 5.0]},\n        ),\n        (\n            1,\n            0,\n            {\n                \"__UNNAMED__0\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n                \"__UNNAMED__1\": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],\n            },\n        ),\n        (2, 0, {\"a\": [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\": [1.0, 2.0, 3.0, 4.0, 5.0]}),\n        (2, None, {\"a\": [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\": [1.0, 2.0, 3.0, 4.0, 5.0]}),\n        (2, 1, {\"a\": [\"c\", \"d\", \"e\", \"f\"], \"0\": [2.0, 3.0, 4.0, 5.0]}),\n        (2, [1, 3], {\"a\": [\"b\", \"d\", \"f\"], \"0\": [1.0, 3.0, 5.0]}),\n        (2, [0], {\"a\": [\"c\", \"d\", \"e\", \"f\"], \"0\": [2.0, 3.0, 4.0, 5.0]}),\n        (\n            None,\n            [2, 4],\n            {\n                \"__UNNAMED__0\": [None, None, \"b\", \"d\", \"e\", \"f\"],\n                \"__UNNAMED__1\": [None, None, 1.0, 3.0, 4.0, 5.0],\n            },\n        ),\n        (2, [], {\"a\": [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\": [1.0, 2.0, 3.0, 4.0, 5.0]}),\n        (2, [0, 1, 2, 3], {\"a\": [\"f\"], \"0\": [5.0]}),\n        (2, lambda x: x % 2 == 0, {\"a\": [\"c\", \"e\"], \"0\": [2.0, 4.0]}),\n        (2, lambda x: x in [0, 4], {\"a\": [\"c\", \"d\", \"e\"], \"0\": [2.0, 3.0, 4.0]}),\n        (2, lambda x: False, {\"a\": [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\": [1.0, 2.0, 3.0, 4.0, 5.0]}),\n        (2, lambda x: x != 2, {\"a\": [\"d\"], \"0\": [3.0]}),\n    ],\n)\ndef test_header_row_and_skip_rows(\n    header_row: int | None, skip_rows: int, expected: dict[str, Any]\n) -> None:\n    pl_assert_frame_equal(\n        fastexcel.read_excel(path_for_fixture(\"no-header.xlsx\"))\n        .load_sheet(0, header_row=header_row, skip_rows=skip_rows)\n        .to_polars(),\n        pl.DataFrame(expected),\n    )\n\n\ndef test_null_bytes_in_column_names() -> None:\n    \"\"\"https://github.com/ToucanToco/fastexcel/issues/343\"\"\"\n    reader = fastexcel.read_excel(path_for_fixture(\"null-bytes-in-columns-names.xls\"))\n    df = reader.load_sheet(0).to_polars()\n    assert df.shape == (8_763, 11)\n"
  },
  {
    "path": "python/tests/test_pycapsule.py",
    "content": "\"\"\"Tests for the Arrow PyCapsule Interface implementation.\"\"\"\n\nimport fastexcel\nimport pandas as pd\nimport polars as pl\n\nfrom .utils import path_for_fixture\n\n\ndef test_sheet_arrow_c_schema():\n    \"\"\"Test that __arrow_c_schema__ returns a valid PyCapsule.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    sheet = excel_reader.load_sheet(\"January\")\n\n    schema_capsule = sheet.__arrow_c_schema__()\n\n    # Check it's a PyCapsule with the correct name\n    assert hasattr(schema_capsule, \"__class__\")\n    assert \"PyCapsule\" in str(type(schema_capsule))\n\n\ndef test_sheet_arrow_c_array():\n    \"\"\"Test that __arrow_c_array__ returns a tuple of PyCapsules.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    sheet = excel_reader.load_sheet(\"January\")\n\n    schema_capsule, array_capsule = sheet.__arrow_c_array__()\n\n    # Check both are PyCapsules\n    assert \"PyCapsule\" in str(type(schema_capsule))\n    assert \"PyCapsule\" in str(type(array_capsule))\n\n\ndef test_table_arrow_c_schema():\n    \"\"\"Test that table __arrow_c_schema__ returns a valid PyCapsule.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n    table_names = excel_reader.table_names()\n\n    table = excel_reader.load_table(table_names[0])  # Should be 'users'\n    schema_capsule = table.__arrow_c_schema__()\n\n    # Check it's a PyCapsule\n    assert \"PyCapsule\" in str(type(schema_capsule))\n\n\ndef test_table_arrow_c_array():\n    \"\"\"Test that table __arrow_c_array__ returns a tuple of PyCapsules.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n    table_names = excel_reader.table_names()\n\n    table = excel_reader.load_table(table_names[0])  # Should be 'users'\n    schema_capsule, array_capsule = table.__arrow_c_array__()\n\n    # Check both are PyCapsules\n    assert \"PyCapsule\" in str(type(schema_capsule))\n    assert \"PyCapsule\" in str(type(array_capsule))\n\n\ndef test_pycapsule_interface_with_requested_schema():\n    \"\"\"Test PyCapsule interface methods with requested_schema parameter.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    sheet = excel_reader.load_sheet(\"January\")\n\n    # Test with None (current implementation ignores this)\n    schema_capsule, array_capsule = sheet.__arrow_c_array__(None)\n\n    assert \"PyCapsule\" in str(type(schema_capsule))\n    assert \"PyCapsule\" in str(type(array_capsule))\n\n\ndef test_integration_with_polars():\n    \"\"\"Test that polars can consume our PyCapsule interface.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    sheet = excel_reader.load_sheet(\"January\")\n\n    # Polars should be able to create a DataFrame from our PyCapsule interface\n    # This tests the actual interoperability\n    df = pl.DataFrame(sheet)\n\n    assert len(df) == 2\n    assert df.columns == [\"Month\", \"Year\"]\n\n\ndef test_to_polars_without_pyarrow():\n    \"\"\"Test that to_polars() works via PyCapsule interface without pyarrow.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    sheet = excel_reader.load_sheet(\"January\")\n\n    # This should work via PyCapsule interface, not requiring pyarrow\n    df = sheet.to_polars()\n\n    assert isinstance(df, pl.DataFrame)\n    assert len(df) == 2\n    assert df.columns == [\"Month\", \"Year\"]\n\n    # Test with table as well\n    excel_reader_table = fastexcel.read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n    table_names = excel_reader_table.table_names()\n    table = excel_reader_table.load_table(table_names[0])\n    df_table = table.to_polars()\n    assert isinstance(df_table, pl.DataFrame)\n\n\ndef test_to_pandas_still_requires_pyarrow():\n    \"\"\"Test that to_pandas() currently still requires pyarrow.\n\n    Note: pandas PyCapsule interface would require implementing __dataframe__\n    or __arrow_c_stream__, which we don't currently do.\n    \"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n    sheet = excel_reader.load_sheet(\"January\")\n\n    # This still requires pyarrow for now\n    df = sheet.to_pandas()\n\n    assert isinstance(df, pd.DataFrame)\n    assert len(df) == 2\n    assert list(df.columns) == [\"Month\", \"Year\"]\n\n    # Test with table as well\n    excel_reader_table = fastexcel.read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n    table_names = excel_reader_table.table_names()\n    table = excel_reader_table.load_table(table_names[0])\n    df_table = table.to_pandas()\n    assert isinstance(df_table, pd.DataFrame)\n"
  },
  {
    "path": "python/tests/test_sheet_visibility.py",
    "content": "import fastexcel\n\nfrom .utils import path_for_fixture\n\n\ndef test_sheet_visibilities() -> None:\n    file_path = path_for_fixture(\"fixture-sheets-different-visibilities.xlsx\")\n\n    reader = fastexcel.read_excel(file_path)\n\n    assert reader.load_sheet(0).visible == \"visible\"\n    assert reader.load_sheet(1).visible == \"hidden\"\n    assert reader.load_sheet(2).visible == \"veryhidden\"\n"
  },
  {
    "path": "python/tests/test_shifted_data.py",
    "content": "import fastexcel\n\nfrom .utils import path_for_fixture\n\n\ndef test_sheet_with_offset():\n    reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n    sheet = reader.load_sheet(\"without-table\")\n\n    assert sheet.available_columns() == [\n        fastexcel.ColumnInfo(\n            name=\"Column at H10\",\n            index=0,\n            absolute_index=7,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Column at I10\",\n            index=1,\n            absolute_index=8,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"__UNNAMED__2\",\n            index=2,\n            absolute_index=9,\n            dtype=\"string\",\n            dtype_from=\"guessed\",\n            column_name_from=\"generated\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Column at K10\",\n            index=3,\n            absolute_index=10,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"looked_up\",\n        ),\n    ]\n\n\ndef test_table_with_offset():\n    reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n    table = reader.load_table(\"TableAtD5\")\n\n    assert table.available_columns() == [\n        fastexcel.ColumnInfo(\n            name=\"Column at D5\",\n            index=0,\n            absolute_index=3,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Column at E5\",\n            index=1,\n            absolute_index=4,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n    ]\n"
  },
  {
    "path": "python/tests/test_tables.py",
    "content": "from datetime import datetime\n\nimport fastexcel\nimport pandas as pd\nimport polars as pl\nimport pytest\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\n\nfrom .utils import path_for_fixture\n\n\n@pytest.mark.parametrize(\"path\", (\"sheet-with-tables.xlsx\",))\ndef test_table_names(path: str) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(path))\n    table_names = excel_reader.table_names()\n\n    assert table_names == [\"users\"]\n\n\n@pytest.mark.parametrize(\"path\", (\"sheet-with-tables.xlsx\",))\ndef test_table_names_with_sheet_name(path: str) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(path))\n    table_names = excel_reader.table_names(\"sheet1\")\n\n    assert table_names == [\"users\"]\n\n    table_names = excel_reader.table_names(\"sheet2\")\n\n    assert table_names == []\n\n\n@pytest.mark.parametrize(\"path\", (\"sheet-with-tables.xlsx\",))\ndef test_load_table(path: str) -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(path))\n    users_tbl = excel_reader.load_table(\"users\")\n\n    assert users_tbl.name == \"users\"\n    assert users_tbl.sheet_name == \"sheet1\"\n    assert users_tbl.specified_dtypes is None\n    assert users_tbl.available_columns() == [\n        fastexcel.ColumnInfo(\n            name=\"User Id\",\n            index=0,\n            absolute_index=0,\n            dtype=\"float\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"FirstName\",\n            index=1,\n            absolute_index=1,\n            dtype=\"string\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"LastName\",\n            index=2,\n            absolute_index=2,\n            dtype=\"string\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n        fastexcel.ColumnInfo(\n            name=\"Date\",\n            index=3,\n            absolute_index=3,\n            dtype=\"datetime\",\n            dtype_from=\"guessed\",\n            column_name_from=\"provided\",\n        ),\n    ]\n    assert users_tbl.total_height == 3\n    assert users_tbl.offset == 0\n    assert users_tbl.height == 3\n    assert users_tbl.width == 4\n\n    expected_pl = pl.DataFrame(\n        {\n            \"User Id\": [1.0, 2.0, 5.0],\n            \"FirstName\": [\"Peter\", \"John\", \"Hans\"],\n            \"LastName\": [\"Müller\", \"Meier\", \"Fricker\"],\n            \"Date\": [datetime(2020, 1, 1), datetime(2024, 5, 4), datetime(2025, 2, 1)],\n        }\n    ).with_columns(pl.col(\"Date\").dt.cast_time_unit(\"ms\"))\n    pl_assert_frame_equal(users_tbl.to_polars(), expected_pl)\n\n    expected_pd = pd.DataFrame(\n        {\n            \"User Id\": [1.0, 2.0, 5.0],\n            \"FirstName\": [\"Peter\", \"John\", \"Hans\"],\n            \"LastName\": [\"Müller\", \"Meier\", \"Fricker\"],\n            \"Date\": pd.Series(\n                [datetime(2020, 1, 1), datetime(2024, 5, 4), datetime(2025, 2, 1)]\n            ).astype(\"datetime64[ms]\"),\n        }\n    )\n\n    pd_assert_frame_equal(users_tbl.to_pandas(), expected_pd)\n\n    table_eager = excel_reader.load_table(\"users\", eager=True)\n    pl_df = pl.from_arrow(table_eager)\n    assert isinstance(pl_df, pl.DataFrame)\n    pl_assert_frame_equal(pl_df, expected_pl)\n    pd_assert_frame_equal(table_eager.to_pandas(), expected_pd)\n"
  },
  {
    "path": "python/tests/test_whitespace.py",
    "content": "import datetime\n\nimport fastexcel\nimport polars as pl\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_equal\nfrom polars.testing import assert_frame_equal as pl_assert_frame_equal\n\nfrom .utils import path_for_fixture\n\n\ndef test_skip_tail_whitespace_rows() -> None:\n    \"\"\"Test that skip_whitespace_tail_rows option works correctly.\"\"\"\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-whitespace.xlsx\"))\n\n    # Expected data when NOT skipping whitespace tail rows\n    expected_with_whitespace = pl.DataFrame(\n        {\n            \"Column One\": [\"1\", \"2\", \"3\", None, \"5\", None, None, None, None, \" \"],\n            \"Column Two\": [\"one\", \"two\", None, \"four\", \"five\", None, None, \"\", None, None],\n            \"Column Three\": [\n                datetime.datetime(2025, 11, 19, 14, 34, 2),\n                datetime.datetime(2025, 11, 20, 14, 56, 34),\n                datetime.datetime(2025, 11, 21, 15, 19, 6),\n                None,\n                datetime.datetime(2025, 11, 22, 15, 41, 38),\n                datetime.datetime(2025, 11, 23, 16, 4, 10),\n                None,\n                None,\n                None,\n                None,\n            ],\n        }\n    ).with_columns(pl.col(\"Column Three\").dt.cast_time_unit(\"ms\"))\n\n    # Expected data when skipping whitespace tail rows\n    expected_without_whitespace = pl.DataFrame(\n        {\n            \"Column One\": [1.0, 2.0, 3.0, None, 5.0, None],\n            \"Column Two\": [\"one\", \"two\", None, \"four\", \"five\", None],\n            \"Column Three\": [\n                datetime.datetime(2025, 11, 19, 14, 34, 2),\n                datetime.datetime(2025, 11, 20, 14, 56, 34),\n                datetime.datetime(2025, 11, 21, 15, 19, 6),\n                None,\n                datetime.datetime(2025, 11, 22, 15, 41, 38),\n                datetime.datetime(2025, 11, 23, 16, 4, 10),\n            ],\n        }\n    ).with_columns(pl.col(\"Column Three\").dt.cast_time_unit(\"ms\"))\n\n    # Test sheet without skipping whitespace tail rows\n    sheet_with_whitespace = excel_reader.load_sheet(\"Without Table\")\n    pl_assert_frame_equal(sheet_with_whitespace.to_polars(), expected_with_whitespace)\n\n    # Test table without skipping whitespace tail rows\n    table_with_whitespace = excel_reader.load_table(\"Table_with_whitespace\")\n    pl_assert_frame_equal(table_with_whitespace.to_polars(), expected_with_whitespace)\n\n    # Test sheet with skipping whitespace tail rows\n    sheet_without_whitespace = excel_reader.load_sheet(\n        \"Without Table\", skip_whitespace_tail_rows=True\n    )\n    pl_assert_frame_equal(sheet_without_whitespace.to_polars(), expected_without_whitespace)\n\n    # Test table with skipping whitespace tail rows\n    table_without_whitespace = excel_reader.load_table(\n        \"Table_with_whitespace\", skip_whitespace_tail_rows=True\n    )\n    pl_assert_frame_equal(table_without_whitespace.to_polars(), expected_without_whitespace)\n\n    # Also verify pandas compatibility\n    pd_assert_frame_equal(\n        sheet_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()\n    )\n    pd_assert_frame_equal(\n        table_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()\n    )\n\n\ndef test_skip_tail_rows_and_whitespace_as_null_behavior() -> None:\n    excel_reader = fastexcel.read_excel(path_for_fixture(\"sheet-and-table-with-whitespace.xlsx\"))\n\n    # Expected data when converting whitespace to null but not skipping tail rows\n    expected_with_whitespace_as_null = pl.DataFrame(\n        {\n            # All rows should be taken into account but the space in the last row should be\n            # considered null\n            \"Column One\": [1.0, 2.0, 3.0, None, 5.0, None, None, None, None, None],\n            # All rows should be taken into account but the empty string in 8th row should be\n            # considered null\n            \"Column Two\": [\"one\", \"two\", None, \"four\", \"five\", None, None, None, None, None],\n            \"Column Three\": [\n                datetime.datetime(2025, 11, 19, 14, 34, 2),\n                datetime.datetime(2025, 11, 20, 14, 56, 34),\n                datetime.datetime(2025, 11, 21, 15, 19, 6),\n                None,\n                datetime.datetime(2025, 11, 22, 15, 41, 38),\n                datetime.datetime(2025, 11, 23, 16, 4, 10),\n                None,\n                None,\n                None,\n                None,\n            ],\n        }\n    ).with_columns(pl.col(\"Column Three\").dt.cast_time_unit(\"ms\"))\n\n    # Expected data when converting whitespace to null and skipping tail rows\n    expected_without_whitespace = pl.DataFrame(\n        {\n            \"Column One\": [1.0, 2.0, 3.0, None, 5.0, None],\n            \"Column Two\": [\"one\", \"two\", None, \"four\", \"five\", None],\n            \"Column Three\": [\n                datetime.datetime(2025, 11, 19, 14, 34, 2),\n                datetime.datetime(2025, 11, 20, 14, 56, 34),\n                datetime.datetime(2025, 11, 21, 15, 19, 6),\n                None,\n                datetime.datetime(2025, 11, 22, 15, 41, 38),\n                datetime.datetime(2025, 11, 23, 16, 4, 10),\n            ],\n        }\n    ).with_columns(pl.col(\"Column Three\").dt.cast_time_unit(\"ms\"))\n\n    # Test sheet with whitespace_as_null but not skipping tail rows\n    sheet_with_whitespace_as_null = excel_reader.load_sheet(\n        \"Without Table\", whitespace_as_null=True\n    )\n    pl_assert_frame_equal(\n        sheet_with_whitespace_as_null.to_polars(), expected_with_whitespace_as_null\n    )\n\n    # Test table with whitespace_as_null but not skipping tail rows\n    table_with_whitespace_as_null = excel_reader.load_table(\n        \"Table_with_whitespace\", whitespace_as_null=True\n    )\n    pl_assert_frame_equal(\n        table_with_whitespace_as_null.to_polars(), expected_with_whitespace_as_null\n    )\n\n    # Test sheet with both whitespace_as_null and skip_whitespace_tail_rows\n    sheet_without_whitespace = excel_reader.load_sheet(\n        \"Without Table\", whitespace_as_null=True, skip_whitespace_tail_rows=True\n    )\n    pl_assert_frame_equal(sheet_without_whitespace.to_polars(), expected_without_whitespace)\n\n    # Test table with both whitespace_as_null and skip_whitespace_tail_rows\n    table_without_whitespace = excel_reader.load_table(\n        \"Table_with_whitespace\", whitespace_as_null=True, skip_whitespace_tail_rows=True\n    )\n    pl_assert_frame_equal(table_without_whitespace.to_polars(), expected_without_whitespace)\n\n    # Also verify pandas compatibility\n    pd_assert_frame_equal(\n        sheet_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()\n    )\n    pd_assert_frame_equal(\n        sheet_with_whitespace_as_null.to_pandas(), expected_with_whitespace_as_null.to_pandas()\n    )\n    pd_assert_frame_equal(\n        table_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()\n    )\n    pd_assert_frame_equal(\n        table_with_whitespace_as_null.to_pandas(), expected_with_whitespace_as_null.to_pandas()\n    )\n"
  },
  {
    "path": "python/tests/utils.py",
    "content": "from __future__ import annotations\n\nfrom pathlib import Path\nfrom typing import Any\n\nimport numpy as np\nimport pandas as pd\n\n\ndef path_for_fixture(fixture_file: str) -> str:\n    return str(Path(__file__).parent.parent.parent / \"tests\" / \"fixtures\" / fixture_file)\n\n\ndef get_expected_pandas_dtype(fastexcel_dtype: str) -> Any:\n    \"\"\"Get the expected pandas dtype for a given fastexcel dtype, accounting for pandas version.\n\n    In pandas < 3.0, string columns use object dtype.\n    In pandas >= 3.0, string columns use StringDtype (with na_value=nan when from Arrow).\n    \"\"\"\n    pd_version = tuple(int(x) for x in pd.__version__.split(\".\")[:2])\n\n    dtype_map = {\n        \"int\": np.dtype(\"int64\"),\n        \"float\": np.dtype(\"float64\"),\n        \"boolean\": np.dtype(\"bool\"),\n        \"datetime\": np.dtype(\"datetime64[ms]\"),\n        \"duration\": np.dtype(\"timedelta64[ms]\"),\n    }\n\n    if fastexcel_dtype in dtype_map:\n        return dtype_map[fastexcel_dtype]\n\n    if fastexcel_dtype == \"string\":\n        if pd_version >= (3, 0):\n            # When converting from Arrow, pandas uses nan as na_value\n            return pd.StringDtype(na_value=np.nan)\n        else:\n            return np.dtype(\"object\")\n\n    if fastexcel_dtype == \"date\":\n        # Date columns are always object dtype\n        return np.dtype(\"object\")\n\n    raise ValueError(f\"Unknown fastexcel dtype: {fastexcel_dtype}\")\n\n\ndef assert_pandas_dtypes(df: pd.DataFrame, expected_dtypes: dict[str, str]) -> None:\n    \"\"\"Assert that a pandas DataFrame has the expected dtypes for each column.\n\n    Args:\n        df: The pandas DataFrame to check\n        expected_dtypes: A dict mapping column names to fastexcel dtype strings\n    \"\"\"\n    for col_name, fastexcel_dtype in expected_dtypes.items():\n        expected_dtype = get_expected_pandas_dtype(fastexcel_dtype)\n        actual_dtype = df[col_name].dtype\n        assert actual_dtype == expected_dtype, (\n            f\"Column '{col_name}': expected dtype {expected_dtype}, got {actual_dtype}\"\n        )\n"
  },
  {
    "path": "scripts/update_versions.py",
    "content": "#!/usr/bin/env -S uv run --script\n# /// script\n# requires-python = \">=3.9\"\n# dependencies = []\n# ///\n\"\"\"Manage docs/versions.json and generate the root docs/index.html redirect.\"\"\"\n\nfrom __future__ import annotations\n\nimport argparse\nimport json\nimport re\nfrom pathlib import Path\n\n\ndef parse_semver(version: str) -> tuple[int, ...]:\n    \"\"\"Extract numeric parts from a version string like 'v0.19.0'.\"\"\"\n    return tuple(int(x) for x in re.findall(r\"\\d+\", version))\n\n\ndef sort_versions(versions: list[dict]) -> list[dict]:\n    \"\"\"Sort: stable first, then tags descending by semver, 'latest' last.\"\"\"\n    def sort_key(v: dict) -> tuple[int, tuple[int, ...], str]:\n        path = v[\"path\"]\n        if v.get(\"stable\"):\n            return (0, (), \"\")\n        if path == \"latest\":\n            return (2, (), \"\")\n        return (1, tuple(-x for x in parse_semver(path)), path)\n\n    return sorted(versions, key=sort_key)\n\n\ndef update_versions(docs_dir: Path, version: str, *, stable: bool) -> None:\n    if not re.fullmatch(r\"latest|v\\d+(\\.\\d+)*\", version):\n        raise ValueError(f\"Invalid version '{version}': must be 'latest' or match 'v<semver>' (e.g. v0.19.0)\")\n\n    versions_file = docs_dir / \"versions.json\"\n\n    if versions_file.exists():\n        versions = json.loads(versions_file.read_text())\n    else:\n        versions = []\n\n    # Build label\n    if version == \"latest\":\n        label = \"latest (main)\"\n    elif stable:\n        label = f\"{version} (stable)\"\n    else:\n        label = version\n\n    # Remove old entry for this version, and clear stable flag from others if\n    # this one is now stable\n    new_versions = []\n    for v in versions:\n        if v[\"path\"] == version:\n            continue\n        if stable and v.get(\"stable\"):\n            v = {**v, \"stable\": False, \"label\": v[\"path\"]}\n        new_versions.append(v)\n\n    new_versions.append({\"label\": label, \"path\": version, \"stable\": stable})\n    new_versions = sort_versions(new_versions)\n\n    versions_file.write_text(json.dumps(new_versions, indent=2) + \"\\n\")\n\n    # Generate root redirect\n    stable_entry = next((v for v in new_versions if v.get(\"stable\")), None)\n    redirect_path = stable_entry[\"path\"] if stable_entry else version\n    index_html = docs_dir / \"index.html\"\n    index_html.write_text(\n        f\"\"\"\\\n<!doctype html>\n<html>\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"refresh\" content=\"0; url=./{redirect_path}/fastexcel.html\"/>\n</head>\n<body>\n    <p>Redirecting to <a href=\"./{redirect_path}/fastexcel.html\">{redirect_path} documentation</a>...</p>\n</body>\n</html>\n\"\"\"\n    )\n\n\ndef main() -> None:\n    parser = argparse.ArgumentParser(description=\"Update docs versions.json\")\n    parser.add_argument(\"--version\", required=True, help=\"Version name (e.g. v0.19.0 or latest)\")\n    parser.add_argument(\"--stable\", action=\"store_true\", help=\"Mark this version as the stable default\")\n    parser.add_argument(\"--docs-dir\", default=\"docs\", help=\"Path to the docs directory\")\n    args = parser.parse_args()\n\n    update_versions(Path(args.docs_dir), args.version, stable=args.stable)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "src/data/cell_extractors.rs",
    "content": "use calamine::{CellType, DataType};\nuse chrono::{NaiveDate, NaiveDateTime, TimeDelta};\n\nuse crate::types::dtype::excel_float_to_string;\n\npub(super) fn extract_boolean<DT: CellType + DataType>(cell: &DT) -> Option<bool> {\n    if let Some(b) = cell.get_bool() {\n        Some(b)\n    } else if let Some(i) = cell.get_int() {\n        Some(i != 0)\n    }\n    // clippy formats else if let Some(blah) = ... { Some(x) } else { None } to the .map form\n    else {\n        cell.get_float().map(|f| f != 0.0)\n    }\n}\n\npub(super) fn extract_int<DT: CellType + DataType>(cell: &DT) -> Option<i64> {\n    cell.as_i64()\n}\n\npub(super) fn extract_float<DT: CellType + DataType>(cell: &DT) -> Option<f64> {\n    cell.as_f64()\n}\n\npub(super) fn extract_string<DT: CellType + DataType>(cell: &DT) -> Option<String> {\n    if cell.is_string() {\n        cell.get_string().map(str::to_string)\n    } else if cell.is_datetime() {\n        cell.get_datetime()\n            .and_then(|dt| dt.as_datetime())\n            .map(|dt| dt.to_string())\n    } else if cell.is_datetime_iso() {\n        cell.get_datetime_iso().map(str::to_string)\n    } else if cell.is_bool() {\n        cell.get_bool().map(|v| v.to_string())\n    } else if cell.is_float() {\n        cell.get_float().map(excel_float_to_string)\n    } else {\n        cell.as_string()\n    }\n}\n\npub(super) fn extract_date<DT: CellType + DataType>(cell: &DT) -> Option<NaiveDate> {\n    cell.as_date()\n}\n\n#[cfg(feature = \"python\")]\nconst EPOCH: NaiveDate = NaiveDate::from_ymd_opt(1970, 1, 1).expect(\"Failed to create EPOCH\");\n\n#[cfg(feature = \"python\")]\npub(super) fn extract_date_as_num_days<DT: CellType + DataType>(cell: &DT) -> Option<i32> {\n    extract_date(cell)\n        .and_then(|date| i32::try_from(date.signed_duration_since(EPOCH).num_days()).ok())\n}\n\npub(super) fn extract_datetime<DT: CellType + DataType>(cell: &DT) -> Option<NaiveDateTime> {\n    cell.as_datetime()\n}\n\n#[cfg(feature = \"python\")]\npub(super) fn extract_datetime_as_timestamp_ms<DT: CellType + DataType>(cell: &DT) -> Option<i64> {\n    extract_datetime(cell).map(|dt| dt.and_utc().timestamp_millis())\n}\n\npub(super) fn extract_duration<DT: CellType + DataType>(cell: &DT) -> Option<TimeDelta> {\n    cell.as_duration()\n}\n\n#[cfg(feature = \"python\")]\npub(super) fn extract_duration_as_ms<DT: CellType + DataType>(cell: &DT) -> Option<i64> {\n    extract_duration(cell).map(|d| d.num_milliseconds())\n}\n"
  },
  {
    "path": "src/data/mod.rs",
    "content": "mod cell_extractors;\n#[cfg(feature = \"python\")]\nmod python;\nmod rust;\nuse chrono::{Duration, NaiveDate, NaiveDateTime};\n#[cfg(feature = \"python\")]\npub(crate) use python::*;\n\nuse calamine::{CellType, Data as CalData, DataRef as CalDataRef, DataType, Range};\n\nuse crate::{\n    data::rust::{\n        create_boolean_vec, create_date_vec, create_datetime_vec, create_duration_vec,\n        create_float_vec, create_int_vec, create_string_vec,\n    },\n    error::{FastExcelErrorKind, FastExcelResult},\n    types::{\n        dtype::{DType, DTypeCoercion, get_dtype_for_column},\n        excelsheet::{SkipRows, column_info::ColumnInfo},\n    },\n};\n\n#[derive(Debug)]\npub(crate) enum ExcelSheetData<'r> {\n    Owned(Range<CalData>),\n    Ref(Range<CalDataRef<'r>>),\n}\n\nimpl ExcelSheetData<'_> {\n    pub(crate) fn width(&self) -> usize {\n        match self {\n            ExcelSheetData::Owned(range) => range.width(),\n            ExcelSheetData::Ref(range) => range.width(),\n        }\n    }\n\n    pub(crate) fn height(&self) -> usize {\n        match self {\n            ExcelSheetData::Owned(range) => range.height(),\n            ExcelSheetData::Ref(range) => range.height(),\n        }\n    }\n\n    pub(super) fn get_as_string(&self, pos: (usize, usize)) -> Option<String> {\n        match self {\n            ExcelSheetData::Owned(range) => range.get(pos).and_then(|data| data.as_string()),\n            ExcelSheetData::Ref(range) => range.get(pos).and_then(|data| data.as_string()),\n        }\n    }\n\n    pub(crate) fn dtype_for_column(\n        &self,\n        start_row: usize,\n        end_row: usize,\n        col: usize,\n        dtype_coercion: &DTypeCoercion,\n        whitespace_as_null: bool,\n    ) -> FastExcelResult<DType> {\n        match self {\n            ExcelSheetData::Owned(data) => get_dtype_for_column(\n                data,\n                start_row,\n                end_row,\n                col,\n                dtype_coercion,\n                whitespace_as_null,\n            ),\n            ExcelSheetData::Ref(data) => get_dtype_for_column(\n                data,\n                start_row,\n                end_row,\n                col,\n                dtype_coercion,\n                whitespace_as_null,\n            ),\n        }\n    }\n\n    pub(crate) fn height_without_tail_whitespace(&self) -> usize {\n        match self {\n            ExcelSheetData::Owned(data) => {\n                height_without_tail_whitespace(data).unwrap_or_else(|| data.height())\n            }\n            ExcelSheetData::Ref(data) => {\n                height_without_tail_whitespace(data).unwrap_or_else(|| data.height())\n            }\n        }\n    }\n\n    pub(crate) fn start(&self) -> Option<(usize, usize)> {\n        let start = match self {\n            ExcelSheetData::Owned(range) => range.start(),\n            ExcelSheetData::Ref(range) => range.start(),\n        };\n        start.map(|(r, c)| (r as usize, c as usize))\n    }\n}\n\nimpl From<Range<CalData>> for ExcelSheetData<'_> {\n    fn from(range: Range<CalData>) -> Self {\n        Self::Owned(range)\n    }\n}\n\nimpl<'a> From<Range<CalDataRef<'a>>> for ExcelSheetData<'a> {\n    fn from(range: Range<CalDataRef<'a>>) -> Self {\n        Self::Ref(range)\n    }\n}\n\ntrait CellIsWhiteSpace {\n    fn is_whitespace(&self) -> bool;\n}\n\nimpl<T> CellIsWhiteSpace for T\nwhere\n    T: DataType,\n{\n    fn is_whitespace(&self) -> bool {\n        if self.is_empty() {\n            true\n        } else if self.is_string()\n            && let Some(s) = self.get_string()\n        {\n            s.trim().is_empty()\n        } else {\n            false\n        }\n    }\n}\n\npub(crate) fn height_without_tail_whitespace<CT: CellType + DataType + std::fmt::Debug>(\n    data: &Range<CT>,\n) -> Option<usize> {\n    let height = data.height();\n    let width = data.width();\n    if height < 1 {\n        return Some(0);\n    }\n    if width < 1 {\n        return None;\n    }\n    (0..width)\n        .map(|col_idx| {\n            let mut row_idx = height - 1;\n            // Start at the bottom of the column and work upwards until we find a non-empty cell\n            while row_idx > 0\n                && data\n                    .get((row_idx, col_idx))\n                    .map(CellIsWhiteSpace::is_whitespace)\n                    .unwrap_or(true)\n            {\n                row_idx -= 1;\n            }\n            row_idx + 1\n        })\n        .max()\n}\n\n/// A container for a typed vector of values. Used to represent a column of data in an Excel sheet.\n/// These should only be used when you need to work on the raw data. Otherwise, you should use a\n/// `FastExcelColumn`.\n#[derive(Debug, Clone, PartialEq)]\npub enum FastExcelSeries {\n    Null,\n    Bool(Vec<Option<bool>>),\n    String(Vec<Option<String>>),\n    Int(Vec<Option<i64>>),\n    Float(Vec<Option<f64>>),\n    Datetime(Vec<Option<NaiveDateTime>>),\n    Date(Vec<Option<NaiveDate>>),\n    Duration(Vec<Option<Duration>>),\n}\n\nimpl FastExcelSeries {\n    pub fn dtype(&self) -> DType {\n        match self {\n            FastExcelSeries::Null => DType::Null,\n            FastExcelSeries::Bool(_) => DType::Bool,\n            FastExcelSeries::String(_) => DType::String,\n            FastExcelSeries::Int(_) => DType::Int,\n            FastExcelSeries::Float(_) => DType::Float,\n            FastExcelSeries::Datetime(_) => DType::DateTime,\n            FastExcelSeries::Date(_) => DType::Date,\n            FastExcelSeries::Duration(_) => DType::Duration,\n        }\n    }\n\n    pub fn is_null(&self) -> bool {\n        matches!(self, FastExcelSeries::Null)\n    }\n}\n\nmacro_rules! impl_series_variant {\n    ($type:ty, $variant:ident, $into_fn:ident) => {\n        impl From<Vec<Option<$type>>> for FastExcelSeries {\n            fn from(vec: Vec<Option<$type>>) -> Self {\n                Self::$variant(vec)\n            }\n        }\n\n        impl<const N: usize> From<[Option<$type>; N]> for FastExcelSeries {\n            fn from(arr: [Option<$type>; N]) -> Self {\n                Self::$variant(arr.to_vec())\n            }\n        }\n\n        impl<const N: usize> From<[$type; N]> for FastExcelSeries {\n            fn from(arr: [$type; N]) -> Self {\n                Self::$variant(arr.into_iter().map(Some).collect())\n            }\n        }\n\n        impl From<&[$type]> for FastExcelSeries {\n            fn from(arr: &[$type]) -> Self {\n                Self::$variant(arr.into_iter().map(|it| Some(it.to_owned())).collect())\n            }\n        }\n\n        impl From<&[Option<$type>]> for FastExcelSeries {\n            fn from(arr: &[Option<$type>]) -> Self {\n                Self::$variant(arr.into_iter().map(ToOwned::to_owned).collect())\n            }\n        }\n\n        // Not implementing is_empty here, because we have no len information for null Series\n        impl FastExcelSeries {\n            pub fn $into_fn(self) -> FastExcelResult<Vec<Option<$type>>> {\n                if let Self::$variant(vec) = self {\n                    Ok(vec)\n                } else {\n                    Err(FastExcelErrorKind::InvalidParameters(format!(\n                        \"{self:?} cannot be converted to {type_name}\",\n                        type_name = std::any::type_name::<$type>()\n                    ))\n                    .into())\n                }\n            }\n        }\n    };\n}\n\nimpl_series_variant!(bool, Bool, into_bools);\nimpl_series_variant!(String, String, into_strings);\nimpl_series_variant!(i64, Int, into_ints);\nimpl_series_variant!(f64, Float, into_floats);\nimpl_series_variant!(NaiveDateTime, Datetime, into_datetimes);\nimpl_series_variant!(NaiveDate, Date, into_dates);\nimpl_series_variant!(Duration, Duration, into_durations);\n\n// Conflicting impls when using `From<AsRef<[&str]>>`\nimpl<const N: usize> From<[Option<&str>; N]> for FastExcelSeries {\n    fn from(arr: [Option<&str>; N]) -> Self {\n        Self::String(arr.into_iter().map(|s| s.map(|s| s.to_string())).collect())\n    }\n}\n\nimpl<const N: usize> From<[&str; N]> for FastExcelSeries {\n    fn from(arr: [&str; N]) -> Self {\n        Self::String(arr.into_iter().map(|s| Some(s.to_string())).collect())\n    }\n}\n\n/// A column in a sheet or table. A wrapper around a `FastExcelSeries` and a name.\n#[derive(Debug, Clone, PartialEq)]\npub struct FastExcelColumn {\n    pub name: String,\n    pub(crate) data: FastExcelSeries,\n    len: usize,\n}\n\nimpl FastExcelColumn {\n    pub fn try_new(\n        name: String,\n        data: FastExcelSeries,\n        len: Option<usize>,\n    ) -> FastExcelResult<Self> {\n        let data_len = match &data {\n            FastExcelSeries::Null => None,\n            FastExcelSeries::Bool(v) => Some(v.len()),\n            FastExcelSeries::String(v) => Some(v.len()),\n            FastExcelSeries::Int(v) => Some(v.len()),\n            FastExcelSeries::Float(v) => Some(v.len()),\n            FastExcelSeries::Datetime(v) => Some(v.len()),\n            FastExcelSeries::Date(v) => Some(v.len()),\n            FastExcelSeries::Duration(v) => Some(v.len()),\n        };\n        if let Some(len) = len\n            && let Some(data_len) = data_len\n            && data_len != len\n        {\n            return Err(FastExcelErrorKind::InvalidColumn(format!(\n                \"Column '{name}' has length {data_len} but expected {len}\"\n            ))\n            .into());\n        }\n        let len = len.or(data_len).ok_or_else(|| {\n            FastExcelErrorKind::InvalidColumn(\n                \"`len` is mandatory for `FastExcelSeries::Null`\".to_string(),\n            )\n        })?;\n        Ok(Self { name, data, len })\n    }\n\n    /// Create a new null series with the given name and length.\n    pub fn new_null<S: Into<String>>(name: S, len: usize) -> Self {\n        Self {\n            name: name.into(),\n            data: FastExcelSeries::Null,\n            len,\n        }\n    }\n\n    pub(crate) fn try_from_column_info<CT: CellType + DataType>(\n        column_info: &ColumnInfo,\n        data: &Range<CT>,\n        offset: usize,\n        limit: usize,\n        whitespace_as_null: bool,\n    ) -> FastExcelResult<Self> {\n        let len = limit.checked_sub(offset).ok_or_else(|| {\n            FastExcelErrorKind::InvalidParameters(format!(\n                \"limit is smaller than offset: {limit} is smaller than {offset}\"\n            ))\n        })?;\n        let data = match column_info.dtype {\n            DType::Null => FastExcelSeries::Null,\n            DType::Int => {\n                FastExcelSeries::Int(create_int_vec(data, column_info.index, offset, limit))\n            }\n            DType::Float => {\n                FastExcelSeries::Float(create_float_vec(data, column_info.index, offset, limit))\n            }\n            DType::String => FastExcelSeries::String(create_string_vec(\n                data,\n                column_info.index,\n                offset,\n                limit,\n                whitespace_as_null,\n            )),\n            DType::Bool => {\n                FastExcelSeries::Bool(create_boolean_vec(data, column_info.index, offset, limit))\n            }\n            DType::DateTime => FastExcelSeries::Datetime(create_datetime_vec(\n                data,\n                column_info.index,\n                offset,\n                limit,\n            )),\n            DType::Date => {\n                FastExcelSeries::Date(create_date_vec(data, column_info.index, offset, limit))\n            }\n            DType::Duration => FastExcelSeries::Duration(create_duration_vec(\n                data,\n                column_info.index,\n                offset,\n                limit,\n            )),\n        };\n        Ok(Self {\n            name: column_info.name.clone(),\n            data,\n            len,\n        })\n    }\n\n    pub fn len(&self) -> usize {\n        self.len\n    }\n\n    pub fn is_empty(&self) -> bool {\n        self.len == 0\n    }\n\n    pub fn name(&self) -> &str {\n        &self.name\n    }\n\n    pub fn data(&self) -> &FastExcelSeries {\n        &self.data\n    }\n}\n\nimpl From<FastExcelColumn> for FastExcelSeries {\n    fn from(column: FastExcelColumn) -> Self {\n        column.data\n    }\n}\n\n/// Enum for lazy row selection - avoids materializing Vec for simple cases\n#[derive(Debug)]\npub(crate) enum RowSelector {\n    /// Simple range - no Vec allocation needed\n    Range(std::ops::Range<usize>),\n    /// Pre-filtered list of specific row indices\n    Filtered(Vec<usize>),\n}\n\nimpl RowSelector {\n    pub(crate) fn len(&self) -> usize {\n        match self {\n            RowSelector::Range(range) => range.len(),\n            RowSelector::Filtered(vec) => vec.len(),\n        }\n    }\n}\n\n/// Generate row selector based on [`SkipRows`] and range limits\npub(crate) fn generate_row_selector(\n    skip_rows: &SkipRows,\n    offset: usize,\n    limit: usize,\n) -> FastExcelResult<RowSelector> {\n    match skip_rows {\n        SkipRows::Simple(_skip_count) => {\n            // For simple case, the offset has already been adjusted by pagination logic\n            // So we just return the normal range - no Vec allocation!\n            Ok(RowSelector::Range(offset..limit))\n        }\n        SkipRows::SkipEmptyRowsAtBeginning => {\n            // For empty rows at beginning, calamine handles this at the header level\n            // So we just return the normal range - no Vec allocation!\n            Ok(RowSelector::Range(offset..limit))\n        }\n        SkipRows::List(skip_set) => {\n            // Filter out rows that are in the skip set\n            // `skip_set` contains data-relative indices, but we need to work with absolute indices\n            let filtered: Vec<usize> = (offset..limit)\n                .enumerate()\n                .filter_map(|(data_row_idx, absolute_row_idx)| {\n                    (!skip_set.contains(&data_row_idx)).then_some(absolute_row_idx)\n                })\n                .collect();\n            Ok(RowSelector::Filtered(filtered))\n        }\n        #[cfg(feature = \"python\")]\n        SkipRows::Callable(_func) => {\n            // Call the Python function for each row to determine if it should be skipped\n            // The callable should receive data-relative row indices (0, 1, 2, ...)\n            pyo3::Python::attach(|py| {\n                Ok(RowSelector::Filtered(\n                    (offset..limit)\n                        .enumerate()\n                        .filter_map(|(data_row_idx, absolute_row_idx)| {\n                            (!skip_rows.should_skip_row(data_row_idx, py).unwrap_or(false))\n                                .then_some(absolute_row_idx)\n                        })\n                        .collect(),\n                ))\n            })\n        }\n    }\n}\n"
  },
  {
    "path": "src/data/python.rs",
    "content": "use std::sync::Arc;\nuse std::{fmt::Debug, ops::Not};\n\nuse arrow_array::{\n    Array, ArrayRef, BooleanArray, Date32Array, DurationMillisecondArray, Float64Array, Int64Array,\n    NullArray, RecordBatch, StringArray, TimestampMillisecondArray,\n};\nuse arrow_schema::{Field, Schema};\nuse calamine::{CellType, DataType, Range};\n\nuse super::cell_extractors;\nuse crate::{\n    data::{ExcelSheetData, RowSelector, generate_row_selector},\n    error::{ErrorContext, FastExcelErrorKind, FastExcelResult},\n    types::{\n        dtype::DType,\n        excelsheet::{CellError, CellErrors, SkipRows, column_info::ColumnInfo},\n    },\n};\n\nmod with_error_impls {\n    use super::*;\n\n    pub(crate) fn create_boolean_array_with_errors<CT: CellType + DataType + Debug>(\n        data: &Range<CT>,\n        col: usize,\n        offset: usize,\n        limit: usize,\n    ) -> (Arc<dyn Array>, Vec<CellError>) {\n        let mut cell_errors = vec![];\n\n        let arr = Arc::new(BooleanArray::from_iter((offset..limit).map(|row| {\n            data.get((row, col)).and_then(|cell| {\n                if cell.is_empty() {\n                    None\n                } else if let Some(b) = cell_extractors::extract_boolean(cell) {\n                    Some(b)\n                } else {\n                    cell_errors.push(CellError {\n                        position: (row, col),\n                        row_offset: offset,\n                        detail: format!(\"Expected boolean but got '{cell:?}\"),\n                    });\n                    None\n                }\n            })\n        })));\n\n        (arr, cell_errors)\n    }\n\n    pub(crate) fn create_int_array_with_errors<CT: CellType + DataType + Debug>(\n        data: &Range<CT>,\n        col: usize,\n        offset: usize,\n        limit: usize,\n    ) -> (Arc<dyn Array>, Vec<CellError>) {\n        let mut cell_errors = vec![];\n\n        let arr = Arc::new(Int64Array::from_iter((offset..limit).map(|row| {\n            data.get((row, col)).and_then(|cell| {\n                if cell.is_empty() {\n                    None\n                } else {\n                    match cell_extractors::extract_int(cell) {\n                        Some(value) => Some(value),\n                        None => {\n                            cell_errors.push(CellError {\n                                position: (row, col),\n                                row_offset: offset,\n                                detail: format!(\"Expected int but got '{cell:?}'\"),\n                            });\n                            None\n                        }\n                    }\n                }\n            })\n        })));\n        (arr, cell_errors)\n    }\n\n    pub(crate) fn create_float_array_with_errors<CT: CellType + DataType + Debug>(\n        data: &Range<CT>,\n        col: usize,\n        offset: usize,\n        limit: usize,\n    ) -> (Arc<dyn Array>, Vec<CellError>) {\n        let mut cell_errors = vec![];\n\n        let arr = Arc::new(Float64Array::from_iter((offset..limit).map(|row| {\n            data.get((row, col)).and_then(|cell| {\n                if cell.is_empty() {\n                    None\n                } else {\n                    match cell_extractors::extract_float(cell) {\n                        Some(value) => Some(value),\n                        None => {\n                            cell_errors.push(CellError {\n                                position: (row, col),\n                                row_offset: offset,\n                                detail: format!(\"Expected float but got '{cell:?}'\"),\n                            });\n                            None\n                        }\n                    }\n                }\n            })\n        })));\n        (arr, cell_errors)\n    }\n\n    pub(crate) fn create_string_array_with_errors<CT: CellType + DataType + Debug>(\n        data: &Range<CT>,\n        col: usize,\n        offset: usize,\n        limit: usize,\n        whitespace_as_null: bool,\n    ) -> (Arc<dyn Array>, Vec<CellError>) {\n        let mut cell_errors = vec![];\n\n        let arr = Arc::new(StringArray::from_iter((offset..limit).map(|row| {\n            data.get((row, col)).and_then(|cell| {\n                if cell.is_empty() {\n                    None\n                } else {\n                    match cell_extractors::extract_string(cell) {\n                        Some(value) => {\n                            if whitespace_as_null && value.trim().is_empty() {\n                                None\n                            } else {\n                                Some(value)\n                            }\n                        }\n                        None => {\n                            cell_errors.push(CellError {\n                                position: (row, col),\n                                row_offset: offset,\n                                detail: format!(\"Expected string but got '{cell:?}'\"),\n                            });\n                            None\n                        }\n                    }\n                }\n            })\n        })));\n\n        (arr, cell_errors)\n    }\n\n    pub(crate) fn create_date_array_with_errors<CT: CellType + DataType + Debug>(\n        data: &Range<CT>,\n        col: usize,\n        offset: usize,\n        limit: usize,\n    ) -> (Arc<dyn Array>, Vec<CellError>) {\n        let mut cell_errors = vec![];\n\n        let arr = Arc::new(Date32Array::from_iter((offset..limit).map(|row| {\n            data.get((row, col)).and_then(|cell| {\n                if cell.is_empty() {\n                    None\n                } else {\n                    match cell_extractors::extract_date_as_num_days(cell) {\n                        Some(value) => Some(value),\n                        None => {\n                            cell_errors.push(CellError {\n                                position: (row, col),\n                                row_offset: offset,\n                                detail: format!(\"Expected date but got '{:?}'\", cell),\n                            });\n                            None\n                        }\n                    }\n                }\n            })\n        })));\n\n        (arr, cell_errors)\n    }\n\n    pub(crate) fn create_datetime_array_with_errors<CT: CellType + DataType + Debug>(\n        data: &Range<CT>,\n        col: usize,\n        offset: usize,\n        limit: usize,\n    ) -> (Arc<dyn Array>, Vec<CellError>) {\n        let mut cell_errors = vec![];\n        let arr = Arc::new(TimestampMillisecondArray::from_iter((offset..limit).map(\n            |row| {\n                data.get((row, col)).and_then(|cell| {\n                    if cell.is_empty() {\n                        None\n                    } else {\n                        match cell_extractors::extract_datetime_as_timestamp_ms(cell) {\n                            Some(value) => Some(value),\n                            None => {\n                                cell_errors.push(CellError {\n                                    position: (row, col),\n                                    row_offset: offset,\n                                    detail: format!(\"Expected datetime but got '{:?}'\", cell),\n                                });\n                                None\n                            }\n                        }\n                    }\n                })\n            },\n        )));\n        (arr, cell_errors)\n    }\n\n    pub(crate) fn create_duration_array_with_errors<CT: CellType + DataType + Debug>(\n        data: &Range<CT>,\n        col: usize,\n        offset: usize,\n        limit: usize,\n    ) -> (Arc<dyn Array>, Vec<CellError>) {\n        let mut cell_errors = vec![];\n        let arr = Arc::new(DurationMillisecondArray::from_iter((offset..limit).map(\n            |row| {\n                data.get((row, col)).and_then(|cell| {\n                    if cell.is_empty() {\n                        None\n                    } else {\n                        match cell_extractors::extract_duration_as_ms(cell) {\n                            Some(value) => Some(value),\n                            None => {\n                                cell_errors.push(CellError {\n                                    position: (row, col),\n                                    row_offset: offset,\n                                    detail: format!(\"Expected duration but got '{cell:?}'\"),\n                                });\n                                None\n                            }\n                        }\n                    }\n                })\n            },\n        )));\n        (arr, cell_errors)\n    }\n}\n\npub(crate) fn create_boolean_array<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    row_iter: impl Iterator<Item = usize>,\n) -> Arc<dyn Array> {\n    Arc::new(BooleanArray::from_iter(row_iter.map(|row| {\n        data.get((row, col))\n            .and_then(cell_extractors::extract_boolean)\n    })))\n}\n\npub(crate) fn create_int_array<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    row_iter: impl Iterator<Item = usize>,\n) -> Arc<dyn Array> {\n    Arc::new(Int64Array::from_iter(row_iter.map(|row| {\n        data.get((row, col)).and_then(cell_extractors::extract_int)\n    })))\n}\n\npub(crate) fn create_float_array<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    row_iter: impl Iterator<Item = usize>,\n) -> Arc<dyn Array> {\n    Arc::new(Float64Array::from_iter(row_iter.map(|row| {\n        data.get((row, col))\n            .and_then(cell_extractors::extract_float)\n    })))\n}\n\npub(crate) fn create_string_array<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    row_iter: impl Iterator<Item = usize>,\n    whitespace_as_null: bool,\n) -> Arc<dyn Array> {\n    Arc::new(if whitespace_as_null {\n        StringArray::from_iter(row_iter.map(|row| {\n            data.get((row, col))\n                .and_then(cell_extractors::extract_string)\n                // Only return the string if it contains non-whitespace characters\n                .filter(|s| s.trim().is_empty().not())\n        }))\n    } else {\n        StringArray::from_iter(row_iter.map(|row| {\n            data.get((row, col))\n                .and_then(cell_extractors::extract_string)\n        }))\n    })\n}\n\npub(crate) fn create_date_array<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    row_iter: impl Iterator<Item = usize>,\n) -> Arc<dyn Array> {\n    Arc::new(Date32Array::from_iter(row_iter.map(|row| {\n        data.get((row, col))\n            .and_then(cell_extractors::extract_date_as_num_days)\n    })))\n}\n\npub(crate) fn create_datetime_array<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    row_iter: impl Iterator<Item = usize>,\n) -> Arc<dyn Array> {\n    Arc::new(TimestampMillisecondArray::from_iter(row_iter.map(|row| {\n        data.get((row, col))\n            .and_then(cell_extractors::extract_datetime_as_timestamp_ms)\n    })))\n}\n\npub(crate) fn create_duration_array<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    row_iter: impl Iterator<Item = usize>,\n) -> Arc<dyn Array> {\n    Arc::new(DurationMillisecondArray::from_iter(row_iter.map(|row| {\n        data.get((row, col))\n            .and_then(cell_extractors::extract_duration_as_ms)\n    })))\n}\n\nmacro_rules! create_array_function_with_errors {\n    ($func_name:ident) => {\n        pub(crate) fn $func_name(\n            data: &ExcelSheetData,\n            col: usize,\n            offset: usize,\n            limit: usize,\n        ) -> (Arc<dyn Array>, Vec<CellError>) {\n            match data {\n                ExcelSheetData::Owned(range) => {\n                    with_error_impls::$func_name(range, col, offset, limit)\n                }\n                ExcelSheetData::Ref(range) => {\n                    with_error_impls::$func_name(range, col, offset, limit)\n                }\n            }\n        }\n    };\n}\n\ncreate_array_function_with_errors!(create_boolean_array_with_errors);\ncreate_array_function_with_errors!(create_int_array_with_errors);\ncreate_array_function_with_errors!(create_float_array_with_errors);\ncreate_array_function_with_errors!(create_date_array_with_errors);\ncreate_array_function_with_errors!(create_datetime_array_with_errors);\ncreate_array_function_with_errors!(create_duration_array_with_errors);\n\npub(crate) fn create_string_array_with_errors(\n    data: &ExcelSheetData,\n    col: usize,\n    offset: usize,\n    limit: usize,\n    whitespace_as_null: bool,\n) -> (Arc<dyn Array>, Vec<CellError>) {\n    match data {\n        ExcelSheetData::Owned(range) => with_error_impls::create_string_array_with_errors(\n            range,\n            col,\n            offset,\n            limit,\n            whitespace_as_null,\n        ),\n        ExcelSheetData::Ref(range) => with_error_impls::create_string_array_with_errors(\n            range,\n            col,\n            offset,\n            limit,\n            whitespace_as_null,\n        ),\n    }\n}\n\n/// Converts a list of ColumnInfo to an arrow Schema\npub(crate) fn selected_columns_to_schema(columns: &[ColumnInfo]) -> Schema {\n    let fields: Vec<_> = columns.iter().map(Into::<Field>::into).collect();\n    Schema::new(fields)\n}\n\n/// Creates an arrow RecordBatch from an Iterator over (column_name, column data tuples) and an arrow schema\npub(crate) fn record_batch_from_name_array_iterator<\n    'a,\n    I: Iterator<Item = (&'a str, Arc<dyn Array>)>,\n>(\n    iter: I,\n    schema: Schema,\n) -> FastExcelResult<RecordBatch> {\n    let mut iter = iter.peekable();\n    // If the iterable is empty, try_from_iter returns an Err\n    if iter.peek().is_none() {\n        Ok(RecordBatch::new_empty(Arc::new(schema)))\n    } else {\n        // We use `try_from_iter_with_nullable` because `try_from_iter` relies on `array.null_count() > 0;`\n        // to determine if the array is nullable. This is not the case for `NullArray` which has no nulls.\n        RecordBatch::try_from_iter_with_nullable(iter.map(|(field_name, array)| {\n            let nullable = array.is_nullable();\n            (field_name, array, nullable)\n        }))\n        .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())\n        .with_context(|| \"could not create RecordBatch from iterable\")\n    }\n}\n\n/// Creates an arrow `RecordBatch` from `ExcelSheetData`. Expects the following parameters:\n/// * `columns`: a slice of `ColumnInfo`, representing the columns that should be extracted from the range\n/// * `data`: the sheets data, as an `ExcelSheetData`\n/// * `offset`: the row index at which to start\n/// * `limit`: the row index at which to stop (excluded)\npub(crate) fn record_batch_from_data_and_columns<CT: CellType + DataType>(\n    columns: &[ColumnInfo],\n    data: &Range<CT>,\n    offset: usize,\n    limit: usize,\n    whitespace_as_null: bool,\n) -> FastExcelResult<RecordBatch> {\n    // Use RowSelector::Range for simple offset..limit case - no Vec allocation!\n    let row_selector = RowSelector::Range(offset..limit);\n    record_batch_from_data_and_columns_with_row_selector(\n        columns,\n        data,\n        &row_selector,\n        whitespace_as_null,\n    )\n}\n\npub(crate) fn record_batch_from_data_and_columns_with_skip_rows<CT: CellType + DataType>(\n    columns: &[ColumnInfo],\n    data: &Range<CT>,\n    skip_rows: &SkipRows,\n    offset: usize,\n    limit: usize,\n    whitespace_as_null: bool,\n) -> FastExcelResult<RecordBatch> {\n    // Generate row selector - ranges for simple cases, filtered Vec only when needed\n    let row_selector = generate_row_selector(skip_rows, offset, limit)?;\n    record_batch_from_data_and_columns_with_row_selector(\n        columns,\n        data,\n        &row_selector,\n        whitespace_as_null,\n    )\n}\n\nfn record_batch_from_data_and_columns_with_row_selector<CT: CellType + DataType>(\n    columns: &[ColumnInfo],\n    data: &Range<CT>,\n    row_selector: &RowSelector,\n    whitespace_as_null: bool,\n) -> FastExcelResult<RecordBatch> {\n    let schema = selected_columns_to_schema(columns);\n    let row_count = row_selector.len();\n    let iter = columns.iter().map(|column_info| {\n        let col_idx = column_info.index;\n        let dtype = column_info.dtype;\n        (\n            column_info.name.as_str(),\n            match dtype {\n                DType::Null => Arc::new(NullArray::new(row_count)),\n                DType::Int => create_int_array(data, col_idx, row_selector.iter()),\n                DType::Float => create_float_array(data, col_idx, row_selector.iter()),\n                DType::String => {\n                    create_string_array(data, col_idx, row_selector.iter(), whitespace_as_null)\n                }\n                DType::Bool => create_boolean_array(data, col_idx, row_selector.iter()),\n                DType::DateTime => create_datetime_array(data, col_idx, row_selector.iter()),\n                DType::Date => create_date_array(data, col_idx, row_selector.iter()),\n                DType::Duration => create_duration_array(data, col_idx, row_selector.iter()),\n            },\n        )\n    });\n\n    record_batch_from_name_array_iterator(iter, schema)\n}\n\npub(crate) fn record_batch_from_data_and_columns_with_errors(\n    columns: &[ColumnInfo],\n    data: &ExcelSheetData,\n    offset: usize,\n    limit: usize,\n    whitespace_as_null: bool,\n) -> FastExcelResult<(RecordBatch, CellErrors)> {\n    let schema = selected_columns_to_schema(columns);\n\n    let mut cell_errors = vec![];\n\n    let iter = columns.iter().map(|column_info| {\n        let col_idx = column_info.index;\n        let dtype = column_info.dtype;\n\n        let (array, new_cell_errors) = match dtype {\n            DType::Null => (Arc::new(NullArray::new(limit - offset)) as ArrayRef, vec![]),\n            DType::Int => create_int_array_with_errors(data, col_idx, offset, limit),\n            DType::Float => create_float_array_with_errors(data, col_idx, offset, limit),\n            DType::String => {\n                create_string_array_with_errors(data, col_idx, offset, limit, whitespace_as_null)\n            }\n            DType::Bool => create_boolean_array_with_errors(data, col_idx, offset, limit),\n            DType::DateTime => create_datetime_array_with_errors(data, col_idx, offset, limit),\n            DType::Date => create_date_array_with_errors(data, col_idx, offset, limit),\n            DType::Duration => create_duration_array_with_errors(data, col_idx, offset, limit),\n        };\n\n        cell_errors.extend(new_cell_errors);\n\n        (column_info.name.as_str(), array)\n    });\n\n    let record_batch = record_batch_from_name_array_iterator(iter, schema)?;\n\n    Ok((\n        record_batch,\n        CellErrors {\n            errors: cell_errors,\n        },\n    ))\n}\n\nimpl RowSelector {\n    pub(crate) fn iter(&self) -> Box<dyn Iterator<Item = usize> + '_> {\n        match self {\n            RowSelector::Range(range) => Box::new(range.clone()),\n            RowSelector::Filtered(vec) => Box::new(vec.iter().copied()),\n        }\n    }\n}\n"
  },
  {
    "path": "src/data/rust.rs",
    "content": "use std::ops::Not;\n\nuse calamine::{CellType, DataType, Range};\nuse chrono::{NaiveDate, NaiveDateTime, TimeDelta};\n\nuse super::cell_extractors;\n\npub(crate) fn create_boolean_vec<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    offset: usize,\n    limit: usize,\n) -> Vec<Option<bool>> {\n    (offset..limit)\n        .map(|row| {\n            data.get((row, col))\n                .and_then(cell_extractors::extract_boolean)\n        })\n        .collect()\n}\n\npub(crate) fn create_int_vec<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    offset: usize,\n    limit: usize,\n) -> Vec<Option<i64>> {\n    (offset..limit)\n        .map(|row| data.get((row, col)).and_then(cell_extractors::extract_int))\n        .collect()\n}\n\npub(crate) fn create_float_vec<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    offset: usize,\n    limit: usize,\n) -> Vec<Option<f64>> {\n    (offset..limit)\n        .map(|row| {\n            data.get((row, col))\n                .and_then(cell_extractors::extract_float)\n        })\n        .collect()\n}\n\npub(crate) fn create_string_vec<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    offset: usize,\n    limit: usize,\n    whitespace_as_null: bool,\n) -> Vec<Option<String>> {\n    if whitespace_as_null {\n        (offset..limit)\n            .map(|row| {\n                data.get((row, col))\n                    .and_then(cell_extractors::extract_string)\n                    // Only return the string if it contains non-whitespace characters\n                    .filter(|s| s.trim().is_empty().not())\n            })\n            .collect()\n    } else {\n        (offset..limit)\n            .map(|row| {\n                data.get((row, col))\n                    .and_then(cell_extractors::extract_string)\n            })\n            .collect()\n    }\n}\n\npub(crate) fn create_date_vec<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    offset: usize,\n    limit: usize,\n) -> Vec<Option<NaiveDate>> {\n    (offset..limit)\n        .map(|row| data.get((row, col)).and_then(cell_extractors::extract_date))\n        .collect()\n}\n\npub(crate) fn create_datetime_vec<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    offset: usize,\n    limit: usize,\n) -> Vec<Option<NaiveDateTime>> {\n    (offset..limit)\n        .map(|row| {\n            data.get((row, col))\n                .and_then(cell_extractors::extract_datetime)\n        })\n        .collect()\n}\n\npub(crate) fn create_duration_vec<CT: CellType + DataType>(\n    data: &Range<CT>,\n    col: usize,\n    offset: usize,\n    limit: usize,\n) -> Vec<Option<TimeDelta>> {\n    (offset..limit)\n        .map(|row| {\n            data.get((row, col))\n                .and_then(cell_extractors::extract_duration)\n        })\n        .collect()\n}\n"
  },
  {
    "path": "src/error.rs",
    "content": "use crate::types::idx_or_name::IdxOrName;\nuse calamine::XlsxError;\nuse std::{error::Error, fmt::Display};\n\n/// The kind of a fastexcel error.\n#[derive(Debug)]\npub enum FastExcelErrorKind {\n    UnsupportedColumnTypeCombination(String),\n    CannotRetrieveCellData(usize, usize),\n    CalamineCellError(calamine::CellErrorType),\n    CalamineError(calamine::Error),\n    SheetNotFound(IdxOrName),\n    ColumnNotFound(IdxOrName),\n    // Arrow errors can be of several different types (arrow::error::Error, PyError), and having\n    // the actual type has not much value for us, so we just store a string context\n    ArrowError(String),\n    InvalidParameters(String),\n    InvalidColumn(String),\n    Internal(String),\n}\n\nimpl Display for FastExcelErrorKind {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            FastExcelErrorKind::UnsupportedColumnTypeCombination(detail) => {\n                write!(f, \"unsupported column type combination: {detail}\")\n            }\n            FastExcelErrorKind::CannotRetrieveCellData(row, col) => {\n                write!(f, \"cannot retrieve cell data at ({row}, {col})\")\n            }\n            FastExcelErrorKind::CalamineCellError(calamine_error) => {\n                write!(f, \"calamine cell error: {calamine_error}\")\n            }\n            FastExcelErrorKind::CalamineError(calamine_error) => {\n                write!(f, \"calamine error: {calamine_error}\")\n            }\n            FastExcelErrorKind::SheetNotFound(idx_or_name) => {\n                let message = idx_or_name.format_message();\n                write!(f, \"sheet {message} not found\")\n            }\n            FastExcelErrorKind::ColumnNotFound(idx_or_name) => {\n                let message = idx_or_name.format_message();\n                write!(f, \"column {message} not found\")\n            }\n            FastExcelErrorKind::ArrowError(err) => write!(f, \"arrow error: {err}\"),\n            FastExcelErrorKind::InvalidParameters(err) => write!(f, \"invalid parameters: {err}\"),\n            FastExcelErrorKind::InvalidColumn(err) => write!(f, \"invalid column: {err}\"),\n            FastExcelErrorKind::Internal(err) => write!(f, \"fastexcel error: {err}\"),\n        }\n    }\n}\n\n/// A `fastexcel` error.\n///\n/// Contains a kind and a context. Use the `Display` trait to format the\n/// error message with its context.\n#[derive(Debug)]\npub struct FastExcelError {\n    pub kind: FastExcelErrorKind,\n    pub context: Vec<String>,\n}\n\npub(crate) trait ErrorContext {\n    fn with_context<S: ToString, F>(self, ctx_fn: F) -> Self\n    where\n        F: FnOnce() -> S;\n}\n\nimpl FastExcelError {\n    pub(crate) fn new(kind: FastExcelErrorKind) -> Self {\n        Self {\n            kind,\n            context: vec![],\n        }\n    }\n}\n\nimpl Display for FastExcelError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{kind}\", kind = self.kind)?;\n        if !self.context.is_empty() {\n            writeln!(f, \"\\nContext:\")?;\n\n            self.context\n                .iter()\n                .enumerate()\n                .try_for_each(|(idx, ctx_value)| writeln!(f, \"    {idx}: {ctx_value}\"))?;\n        }\n        Ok(())\n    }\n}\n\nimpl Error for FastExcelError {}\n\nimpl ErrorContext for FastExcelError {\n    fn with_context<S: ToString, F>(mut self, ctx_fn: F) -> Self\n    where\n        F: FnOnce() -> S,\n    {\n        self.context.push(ctx_fn().to_string());\n        self\n    }\n}\n\nimpl From<FastExcelErrorKind> for FastExcelError {\n    fn from(kind: FastExcelErrorKind) -> Self {\n        FastExcelError::new(kind)\n    }\n}\n\nimpl From<XlsxError> for FastExcelError {\n    fn from(err: XlsxError) -> Self {\n        FastExcelErrorKind::CalamineError(calamine::Error::Xlsx(err)).into()\n    }\n}\n\npub type FastExcelResult<T> = Result<T, FastExcelError>;\n\nimpl<T> ErrorContext for FastExcelResult<T> {\n    fn with_context<S: ToString, F>(self, ctx_fn: F) -> Self\n    where\n        F: FnOnce() -> S,\n    {\n        match self {\n            Ok(_) => self,\n            Err(e) => Err(e.with_context(ctx_fn)),\n        }\n    }\n}\n\n/// Contains Python versions of our custom errors\n#[cfg(feature = \"python\")]\npub(crate) mod py_errors {\n    use super::FastExcelErrorKind;\n    use crate::error;\n    use pyo3::{PyErr, PyResult, create_exception, exceptions::PyException};\n\n    // Base fastexcel error\n    create_exception!(\n        _fastexcel,\n        FastExcelError,\n        PyException,\n        \"The base class for all fastexcel errors\"\n    );\n    // Unsupported column type\n    create_exception!(\n        _fastexcel,\n        UnsupportedColumnTypeCombinationError,\n        FastExcelError,\n        \"Column contains an unsupported type combination\"\n    );\n    // Cannot retrieve cell data\n    create_exception!(\n        _fastexcel,\n        CannotRetrieveCellDataError,\n        FastExcelError,\n        \"Data for a given cell cannot be retrieved\"\n    );\n    // Calamine cell error\n    create_exception!(\n        _fastexcel,\n        CalamineCellError,\n        FastExcelError,\n        \"calamine returned an error regarding the content of the cell\"\n    );\n    // Calamine error\n    create_exception!(\n        _fastexcel,\n        CalamineError,\n        FastExcelError,\n        \"Generic calamine error\"\n    );\n    // Sheet not found\n    create_exception!(\n        _fastexcel,\n        SheetNotFoundError,\n        FastExcelError,\n        \"Sheet was not found\"\n    );\n    // Sheet not found\n    create_exception!(\n        _fastexcel,\n        ColumnNotFoundError,\n        FastExcelError,\n        \"Column was not found\"\n    );\n    // Arrow error\n    create_exception!(\n        _fastexcel,\n        ArrowError,\n        FastExcelError,\n        \"Generic arrow error\"\n    );\n    // Invalid parameters\n    create_exception!(\n        _fastexcel,\n        InvalidParametersError,\n        FastExcelError,\n        \"Provided parameters are invalid\"\n    );\n    // Invalid column\n    create_exception!(\n        _fastexcel,\n        InvalidColumnError,\n        FastExcelError,\n        \"Column is invalid\"\n    );\n    // Internal error\n    create_exception!(\n        _fastexcel,\n        InternalError,\n        FastExcelError,\n        \"Internal fastexcel error\"\n    );\n\n    impl From<error::FastExcelError> for PyErr {\n        fn from(err: error::FastExcelError) -> Self {\n            let message = err.to_string();\n            match err.kind {\n                FastExcelErrorKind::UnsupportedColumnTypeCombination(_) => {\n                    UnsupportedColumnTypeCombinationError::new_err(message)\n                }\n                FastExcelErrorKind::CannotRetrieveCellData(_, _) => {\n                    CannotRetrieveCellDataError::new_err(message)\n                }\n                FastExcelErrorKind::CalamineCellError(_) => CalamineCellError::new_err(message),\n                FastExcelErrorKind::CalamineError(_) => CalamineError::new_err(message),\n                FastExcelErrorKind::SheetNotFound(_) => SheetNotFoundError::new_err(message),\n                FastExcelErrorKind::ColumnNotFound(_) => ColumnNotFoundError::new_err(message),\n                FastExcelErrorKind::ArrowError(_) => ArrowError::new_err(message),\n                FastExcelErrorKind::InvalidParameters(_) => {\n                    InvalidParametersError::new_err(message)\n                }\n                FastExcelErrorKind::InvalidColumn(_) => InvalidColumnError::new_err(message),\n                FastExcelErrorKind::Internal(_) => ArrowError::new_err(message),\n            }\n        }\n    }\n\n    pub(crate) trait IntoPyResult {\n        type Inner;\n\n        fn into_pyresult(self) -> PyResult<Self::Inner>;\n    }\n\n    impl<T> IntoPyResult for super::FastExcelResult<T> {\n        type Inner = T;\n\n        fn into_pyresult(self) -> PyResult<Self::Inner> {\n            self.map_err(Into::into)\n        }\n    }\n}\n"
  },
  {
    "path": "src/lib.rs",
    "content": "mod data;\nmod error;\nmod types;\nmod utils;\n\nuse std::fmt::Display;\n\n#[cfg(feature = \"python\")]\nuse error::py_errors;\n#[cfg(feature = \"python\")]\nuse pyo3::prelude::*;\n#[cfg(feature = \"python\")]\nuse types::excelsheet::{CellError, CellErrors};\n\npub use data::{FastExcelColumn, FastExcelSeries};\nuse error::ErrorContext;\npub use error::{FastExcelError, FastExcelErrorKind, FastExcelResult};\npub use types::{\n    ColumnInfo, ColumnNameFrom, DType, DTypeCoercion, DTypeFrom, DTypes, DefinedName, ExcelReader,\n    ExcelSheet, ExcelTable, IdxOrName, LoadSheetOrTableOptions, SelectedColumns, SheetVisible,\n    SkipRows,\n};\n\n/// Reads an excel file and returns an object allowing to access its sheets, tables, and a bit of metadata.\n/// This is a wrapper around `ExcelReader::try_from_path`.\npub fn read_excel<S: AsRef<str> + Display>(path: S) -> FastExcelResult<ExcelReader> {\n    ExcelReader::try_from_path(path.as_ref())\n        .with_context(|| format!(\"could not load excel file at {path}\"))\n}\n\n#[cfg(feature = \"python\")]\n/// Reads an excel file and returns an object allowing to access its sheets, tables, and a bit of metadata\n#[pyfunction(name = \"read_excel\")]\nfn py_read_excel<'py>(source: &Bound<'_, PyAny>, py: Python<'py>) -> PyResult<ExcelReader> {\n    use py_errors::IntoPyResult;\n\n    if let Ok(path) = source.extract::<String>() {\n        py.detach(|| ExcelReader::try_from_path(&path))\n            .with_context(|| format!(\"could not load excel file at {path}\"))\n            .into_pyresult()\n    } else if let Ok(bytes) = source.extract::<&[u8]>() {\n        py.detach(|| ExcelReader::try_from(bytes))\n            .with_context(|| \"could not load excel file for those bytes\")\n            .into_pyresult()\n    } else {\n        Err(py_errors::InvalidParametersError::new_err(\n            \"source must be a string or bytes\",\n        ))\n    }\n}\n\n// Taken from pydantic-core:\n// https://github.com/pydantic/pydantic-core/blob/main/src/lib.rs#L24\n#[cfg(feature = \"python\")]\nfn get_python_version() -> String {\n    let version = env!(\"CARGO_PKG_VERSION\").to_string();\n    // cargo uses \"1.0-alpha1\" etc. while python uses \"1.0.0a1\", this is not full compatibility,\n    // but it's good enough for now\n    // see https://docs.rs/semver/1.0.9/semver/struct.Version.html#method.parse for rust spec\n    // see https://peps.python.org/pep-0440/ for python spec\n    // it seems the dot after \"alpha/beta\" e.g. \"-alpha.1\" is not necessary, hence why this works\n    version.replace(\"-alpha\", \"a\").replace(\"-beta\", \"b\")\n}\n\n#[cfg(feature = \"python\")]\n#[pymodule(gil_used = false)]\nfn _fastexcel(m: &Bound<'_, PyModule>) -> PyResult<()> {\n    use crate::types::excelsheet::column_info::{ColumnInfo, ColumnInfoNoDtype};\n\n    pyo3_log::init();\n\n    let py = m.py();\n    m.add_function(wrap_pyfunction!(py_read_excel, m)?)?;\n    m.add_class::<ColumnInfo>()?;\n    m.add_class::<ColumnInfoNoDtype>()?;\n    m.add_class::<DefinedName>()?;\n    m.add_class::<CellError>()?;\n    m.add_class::<CellErrors>()?;\n    m.add_class::<ExcelSheet>()?;\n    m.add_class::<ExcelReader>()?;\n    m.add_class::<ExcelTable>()?;\n    m.add(\"__version__\", get_python_version())?;\n\n    // errors\n    [\n        (\"FastExcelError\", py.get_type::<py_errors::FastExcelError>()),\n        (\n            \"UnsupportedColumnTypeCombinationError\",\n            py.get_type::<py_errors::UnsupportedColumnTypeCombinationError>(),\n        ),\n        (\n            \"CannotRetrieveCellDataError\",\n            py.get_type::<py_errors::CannotRetrieveCellDataError>(),\n        ),\n        (\n            \"CalamineCellError\",\n            py.get_type::<py_errors::CalamineCellError>(),\n        ),\n        (\"CalamineError\", py.get_type::<py_errors::CalamineError>()),\n        (\n            \"SheetNotFoundError\",\n            py.get_type::<py_errors::SheetNotFoundError>(),\n        ),\n        (\n            \"ColumnNotFoundError\",\n            py.get_type::<py_errors::ColumnNotFoundError>(),\n        ),\n        (\"ArrowError\", py.get_type::<py_errors::ArrowError>()),\n        (\n            \"InvalidParametersError\",\n            py.get_type::<py_errors::InvalidParametersError>(),\n        ),\n    ]\n    .into_iter()\n    .try_for_each(|(exc_name, exc_type)| m.add(exc_name, exc_type))\n}\n"
  },
  {
    "path": "src/types/dtype/mod.rs",
    "content": "#[cfg(feature = \"python\")]\nmod python;\n\nuse std::{\n    collections::{HashMap, HashSet},\n    fmt::{Debug, Display},\n    str::FromStr,\n    sync::OnceLock,\n};\n\nuse calamine::{CellErrorType, CellType, DataType, Range};\nuse log::warn;\n#[cfg(feature = \"python\")]\nuse pyo3::{IntoPyObject, IntoPyObjectRef};\n\nuse crate::error::{FastExcelError, FastExcelErrorKind, FastExcelResult};\n\nuse super::idx_or_name::IdxOrName;\n\n/// A column or a cell's data type.\n#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]\npub enum DType {\n    Null,\n    Int,\n    Float,\n    String,\n    Bool,\n    DateTime,\n    Date,\n    Duration,\n}\n\nimpl FromStr for DType {\n    type Err = FastExcelError;\n\n    fn from_str(raw_dtype: &str) -> FastExcelResult<Self> {\n        match raw_dtype {\n            \"null\" => Ok(Self::Null),\n            \"int\" => Ok(Self::Int),\n            \"float\" => Ok(Self::Float),\n            \"string\" => Ok(Self::String),\n            \"boolean\" => Ok(Self::Bool),\n            \"datetime\" => Ok(Self::DateTime),\n            \"date\" => Ok(Self::Date),\n            \"duration\" => Ok(Self::Duration),\n            _ => Err(FastExcelErrorKind::InvalidParameters(format!(\n                \"unsupported dtype: \\\"{raw_dtype}\\\"\"\n            ))\n            .into()),\n        }\n    }\n}\n\nimpl Display for DType {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(match self {\n            DType::Null => \"null\",\n            DType::Int => \"int\",\n            DType::Float => \"float\",\n            DType::String => \"string\",\n            DType::Bool => \"boolean\",\n            DType::DateTime => \"datetime\",\n            DType::Date => \"date\",\n            DType::Duration => \"duration\",\n        })\n    }\n}\n\npub type DTypeMap = HashMap<IdxOrName, DType>;\n\n/// Provided data types.\n#[derive(Debug, Clone)]\n#[cfg_attr(feature = \"python\", derive(IntoPyObject, IntoPyObjectRef))]\npub enum DTypes {\n    /// Coerce all data types to the given type.\n    All(DType),\n    /// Coerce data types based on the provided map.\n    Map(DTypeMap),\n}\n\nimpl FromStr for DTypes {\n    type Err = FastExcelError;\n\n    fn from_str(dtypes: &str) -> FastExcelResult<Self> {\n        Ok(DTypes::All(DType::from_str(dtypes)?))\n    }\n}\n\n/// Whether data types should be coerced or not.\n#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy, Default)]\npub enum DTypeCoercion {\n    /// Coerce data types (default).\n    #[default]\n    Coerce,\n    /// Strictly enforce data types.\n    Strict,\n}\n\nimpl FromStr for DTypeCoercion {\n    type Err = FastExcelError;\n\n    fn from_str(raw_dtype_coercion: &str) -> FastExcelResult<Self> {\n        match raw_dtype_coercion {\n            \"coerce\" => Ok(Self::Coerce),\n            \"strict\" => Ok(Self::Strict),\n            _ => Err(FastExcelErrorKind::InvalidParameters(format!(\n                \"unsupported dtype_coercion: \\\"{raw_dtype_coercion}\\\"\"\n            ))\n            .into()),\n        }\n    }\n}\n\n/// All the possible string values that should be considered as NULL\nconst NULL_STRING_VALUES: [&str; 19] = [\n    \"\", \"#N/A\", \"#N/A N/A\", \"#NA\", \"-1.#IND\", \"-1.#QNAN\", \"-NaN\", \"-nan\", \"1.#IND\", \"1.#QNAN\",\n    \"<NA>\", \"N/A\", \"NA\", \"NULL\", \"NaN\", \"None\", \"n/a\", \"nan\", \"null\",\n];\n\nfn get_cell_dtype<DT: CellType + Debug + DataType>(\n    data: &Range<DT>,\n    row: usize,\n    col: usize,\n    whitespace_as_null: bool,\n) -> FastExcelResult<DType> {\n    let cell = data\n        .get((row, col))\n        .ok_or(FastExcelErrorKind::CannotRetrieveCellData(row, col))?;\n\n    if cell.is_int() {\n        Ok(DType::Int)\n    } else if cell.is_float() {\n        Ok(DType::Float)\n    } else if cell.is_string() {\n        if NULL_STRING_VALUES.contains(&cell.get_string().unwrap())\n        // If we want to consider whitespace as null and either the cell is empty or contains only\n        // whitespace, we return null\n            || (whitespace_as_null\n            && cell\n                .get_string()\n                .is_none_or(|s| s.trim().is_empty()))\n        {\n            Ok(DType::Null)\n        } else {\n            Ok(DType::String)\n        }\n    } else if cell.is_bool() {\n        Ok(DType::Bool)\n    } else if cell.is_datetime() {\n        // Since calamine 0.24.0, a new ExcelDateTime exists for the Datetime type. It can either be\n        // a duration or a datatime\n        let excel_datetime = cell\n            .get_datetime()\n            .expect(\"calamine indicated that cell is a datetime but get_datetime returned None\");\n        Ok(if excel_datetime.is_datetime() {\n            DType::DateTime\n        } else {\n            DType::Duration\n        })\n    }\n    // These types contain an ISO8601 representation of a date/datetime or a durat\n    else if cell.is_datetime_iso() {\n        match cell.as_datetime() {\n            // If we cannot convert the cell to a datetime, we're working on a date\n            Some(_) => Ok(DType::DateTime),\n            // NOTE: not using the Date64 type on purpose, as pyarrow converts it to a datetime\n            // rather than a date\n            None => Ok(DType::Date),\n        }\n    }\n    // Simple durations\n    else if cell.is_duration_iso() {\n        Ok(DType::Duration)\n    }\n    // Empty cell\n    else if cell.is_empty() {\n        Ok(DType::Null)\n    } else if cell.is_error() {\n        match cell.get_error() {\n            // considering cells with #N/A! or #REF! as null\n            Some(\n                CellErrorType::NA\n                | CellErrorType::Value\n                | CellErrorType::Null\n                | CellErrorType::Ref\n                | CellErrorType::Num\n                | CellErrorType::Div0,\n            ) => Ok(DType::Null),\n            Some(err) => Err(FastExcelErrorKind::CalamineCellError(err.to_owned()).into()),\n            None => Err(FastExcelErrorKind::Internal(format!(\n                \"cell is an error but get_error returned None: {cell:?}\"\n            ))\n            .into()),\n        }\n    } else {\n        Err(FastExcelErrorKind::Internal(format!(\"unsupported cell type: {cell:?}\")).into())\n    }\n}\n\nstatic FLOAT_TYPES_CELL: OnceLock<HashSet<DType>> = OnceLock::new();\nstatic INT_TYPES_CELL: OnceLock<HashSet<DType>> = OnceLock::new();\nstatic STRING_TYPES_CELL: OnceLock<HashSet<DType>> = OnceLock::new();\n\nfn float_types() -> &'static HashSet<DType> {\n    FLOAT_TYPES_CELL.get_or_init(|| HashSet::from([DType::Int, DType::Float, DType::Bool]))\n}\n\nfn int_types() -> &'static HashSet<DType> {\n    INT_TYPES_CELL.get_or_init(|| HashSet::from([DType::Int, DType::Bool]))\n}\n\nfn string_types() -> &'static HashSet<DType> {\n    STRING_TYPES_CELL.get_or_init(|| {\n        HashSet::from([\n            DType::Bool,\n            DType::Int,\n            DType::Float,\n            DType::String,\n            DType::DateTime,\n            DType::Date,\n        ])\n    })\n}\n\npub(crate) fn get_dtype_for_column<DT: CellType + Debug + DataType>(\n    data: &Range<DT>,\n    start_row: usize,\n    end_row: usize,\n    col: usize,\n    dtype_coercion: &DTypeCoercion,\n    whitespace_as_null: bool,\n) -> FastExcelResult<DType> {\n    let mut column_types = (start_row..end_row)\n        .map(|row| get_cell_dtype(data, row, col, whitespace_as_null))\n        .collect::<FastExcelResult<HashSet<_>>>()?;\n\n    // All columns are nullable anyway so we're not taking Null into account here\n    column_types.remove(&DType::Null);\n\n    if column_types.is_empty() {\n        // If no type apart from NULL was found, fallback to string except if the column is empty\n        if start_row == end_row {\n            Ok(DType::Null)\n        } else {\n            warn!(\"Could not determine dtype for column {col}, falling back to string\");\n            Ok(DType::String)\n        }\n    } else if matches!(dtype_coercion, &DTypeCoercion::Strict) && column_types.len() != 1 {\n        // If dtype coercion is strict and we do not have a single dtype, it's an error\n        Err(\n            FastExcelErrorKind::UnsupportedColumnTypeCombination(format!(\n                \"type coercion is strict and column contains {column_types:?}\"\n            ))\n            .into(),\n        )\n    } else if column_types.len() == 1 {\n        // If a single non-null type was found, return it\n        Ok(column_types.into_iter().next().unwrap())\n    } else if column_types.is_subset(int_types()) {\n        // If every cell in the column can be converted to an int, return int64\n        Ok(DType::Int)\n    } else if column_types.is_subset(float_types()) {\n        // If every cell in the column can be converted to a float, return Float64\n        Ok(DType::Float)\n    } else if column_types.is_subset(string_types()) {\n        // If every cell in the column can be converted to a string, return Utf8\n        Ok(DType::String)\n    } else {\n        // NOTE: Not being too smart about multi-types columns for now\n        Err(\n            FastExcelErrorKind::UnsupportedColumnTypeCombination(format!(\"{column_types:?}\"))\n                .into(),\n        )\n    }\n}\n\n/// Convert a float to a nice string to mimic Excel behaviour.\n///\n/// Excel can store a float like 29.02 set by the user as \"29.020000000000003\" in the XML.\n/// But in fact, the user will see \"29.02\" in the cell.\n/// Excel indeed displays decimal numbers with 8 digits in a standard cell width\n/// and 10 digits in a wide cell. Like this:\n///\n/// Format = 0.000000000 |  Unformatted, wide cell  | Unformatted, standard width\n/// ---------------------|--------------------------|----------------------------\n///     1.123456789      |        1.123456789       |           1.123457\n///    12.123456789      |        12.12345679       |           12.12346\n///         ...          |            ...           |              ...\n///   123456.123456789   |        123456.1235       |           123456.1\n///\n/// Excel also trims trailing zeros and the decimal point if there is no fractional part.\n///\n/// We do not distinguish between wide cells and standard cells here, so we retain at most\n/// nine digits after the decimal point and trim any trailing zeros.\npub(crate) fn excel_float_to_string(x: f64) -> String {\n    format!(\"{x:.9}\")\n        .trim_end_matches('0')\n        .trim_end_matches('.')\n        .to_string()\n}\n\n#[cfg(feature = \"__pyo3-tests\")]\n#[cfg(test)]\nmod tests {\n    use calamine::{Cell, Data as CalData};\n    use pretty_assertions::assert_eq;\n    use rstest::{fixture, rstest};\n\n    use super::*;\n\n    #[fixture]\n    fn range() -> Range<CalData> {\n        Range::from_sparse(vec![\n            // First column\n            Cell::new((0, 0), CalData::Bool(true)),\n            Cell::new((1, 0), CalData::Bool(false)),\n            Cell::new((2, 0), CalData::String(\"NULL\".to_string())),\n            Cell::new((3, 0), CalData::Int(42)),\n            Cell::new((4, 0), CalData::Float(13.37)),\n            Cell::new((5, 0), CalData::String(\"hello\".to_string())),\n            Cell::new((6, 0), CalData::Empty),\n            Cell::new((7, 0), CalData::String(\"#N/A\".to_string())),\n            Cell::new((8, 0), CalData::Int(12)),\n            Cell::new((9, 0), CalData::Float(12.21)),\n            Cell::new((10, 0), CalData::Bool(true)),\n            Cell::new((11, 0), CalData::Int(1337)),\n        ])\n    }\n\n    #[rstest]\n    // pure bool\n    #[case(0, 2, DType::Bool)]\n    // pure int\n    #[case(3, 4, DType::Int)]\n    // pure float\n    #[case(4, 5, DType::Float)]\n    // pure string\n    #[case(5, 6, DType::String)]\n    // pure int + float\n    #[case(3, 5, DType::Float)]\n    // null + int + float\n    #[case(2, 5, DType::Float)]\n    // float + string\n    #[case(4, 6, DType::String)]\n    // int + float + string\n    #[case(3, 6, DType::String)]\n    // null + int + float + string + empty + null\n    #[case(2, 8, DType::String)]\n    // empty + null + int\n    #[case(6, 9, DType::Int)]\n    // int + float + null\n    #[case(7, 10, DType::Float)]\n    // int + float + bool + null\n    #[case(7, 11, DType::Float)]\n    // int + bool\n    #[case(10, 12, DType::Int)]\n    fn get_arrow_column_type_multi_dtype_ok_coerce(\n        range: Range<CalData>,\n        #[case] start_row: usize,\n        #[case] end_row: usize,\n        #[case] expected: DType,\n    ) {\n        assert_eq!(\n            get_dtype_for_column(&range, start_row, end_row, 0, &DTypeCoercion::Coerce, false)\n                .unwrap(),\n            expected\n        );\n    }\n\n    #[rstest]\n    // pure bool\n    #[case(0, 2, DType::Bool)]\n    // pure int\n    #[case(3, 4, DType::Int)]\n    // pure float\n    #[case(4, 5, DType::Float)]\n    // pure string\n    #[case(5, 6, DType::String)]\n    // empty + null + int\n    #[case(6, 9, DType::Int)]\n    fn get_arrow_column_type_multi_dtype_ok_strict(\n        range: Range<CalData>,\n        #[case] start_row: usize,\n        #[case] end_row: usize,\n        #[case] expected: DType,\n    ) {\n        assert_eq!(\n            get_dtype_for_column(&range, start_row, end_row, 0, &DTypeCoercion::Strict, false)\n                .unwrap(),\n            expected\n        );\n    }\n\n    #[rstest]\n    // pure int + float\n    #[case(3, 5)]\n    // float + string\n    #[case(4, 6)]\n    // int + float + string\n    #[case(3, 6)]\n    // null + int + float + string + empty + null\n    #[case(2, 8)]\n    // int + float + null\n    #[case(7, 10)]\n    // int + float + bool + null\n    #[case(7, 11)]\n    // int + bool\n    #[case(10, 12)]\n    fn get_arrow_column_type_multi_dtype_ko_strict(\n        range: Range<CalData>,\n        #[case] start_row: usize,\n        #[case] end_row: usize,\n    ) {\n        let result =\n            get_dtype_for_column(&range, start_row, end_row, 0, &DTypeCoercion::Strict, false);\n        assert!(matches!(\n            result.unwrap_err().kind,\n            FastExcelErrorKind::UnsupportedColumnTypeCombination(_)\n        ));\n    }\n\n    #[rstest]\n    #[case(29.020000000000003, \"29.02\")]\n    #[case(10000_f64, \"10000\")]\n    #[case(23.0, \"23\")]\n    fn test_excel_float_to_string(#[case] x: f64, #[case] expected: &str) {\n        assert_eq!(excel_float_to_string(x), expected.to_string());\n    }\n}\n"
  },
  {
    "path": "src/types/dtype/python.rs",
    "content": "use arrow_schema::{DataType as ArrowDataType, TimeUnit};\nuse pyo3::{Borrowed, Bound, FromPyObject, IntoPyObject, PyAny, PyErr, Python, types::PyString};\n\nuse crate::{\n    error::{FastExcelErrorKind, py_errors::IntoPyResult},\n    types::dtype::{DType, DTypeCoercion, DTypeMap, DTypes},\n};\n\nimpl<'py> IntoPyObject<'py> for DType {\n    type Target = PyString;\n\n    type Output = Bound<'py, Self::Target>;\n\n    type Error = std::convert::Infallible;\n\n    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {\n        self.to_string().into_pyobject(py)\n    }\n}\n\nimpl<'py> IntoPyObject<'py> for &DType {\n    type Target = PyString;\n\n    type Output = Bound<'py, Self::Target>;\n\n    type Error = std::convert::Infallible;\n\n    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {\n        self.to_string().into_pyobject(py)\n    }\n}\n\nimpl<'a, 'py> FromPyObject<'a, 'py> for DType {\n    type Error = PyErr;\n    fn extract(py_dtype: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {\n        if let Ok(dtype_pystr) = py_dtype.extract::<String>() {\n            dtype_pystr.parse()\n        } else {\n            Err(FastExcelErrorKind::InvalidParameters(format!(\n                \"{py_dtype:?} cannot be converted to str\"\n            ))\n            .into())\n        }\n        .into_pyresult()\n    }\n}\n\nimpl<'a, 'py> FromPyObject<'a, 'py> for DTypes {\n    type Error = PyErr;\n    fn extract(py_dtypes: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {\n        if let Ok(py_dtypes_str) = py_dtypes.extract::<String>() {\n            py_dtypes_str.parse()\n        } else {\n            Ok(DTypes::Map(py_dtypes.extract::<DTypeMap>()?))\n        }\n        .into_pyresult()\n    }\n}\n\nimpl From<&DType> for ArrowDataType {\n    fn from(dtype: &DType) -> Self {\n        match dtype {\n            DType::Null => ArrowDataType::Null,\n            DType::Int => ArrowDataType::Int64,\n            DType::Float => ArrowDataType::Float64,\n            DType::String => ArrowDataType::Utf8,\n            DType::Bool => ArrowDataType::Boolean,\n            DType::DateTime => ArrowDataType::Timestamp(TimeUnit::Millisecond, None),\n            DType::Date => ArrowDataType::Date32,\n            DType::Duration => ArrowDataType::Duration(TimeUnit::Millisecond),\n        }\n    }\n}\n\nimpl<'a, 'py> FromPyObject<'a, 'py> for DTypeCoercion {\n    type Error = PyErr;\n    fn extract(py_dtype_coercion: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {\n        if let Ok(dtype_coercion_pystr) = py_dtype_coercion.extract::<String>() {\n            dtype_coercion_pystr.parse()\n        } else {\n            Err(FastExcelErrorKind::InvalidParameters(format!(\n                \"{py_dtype_coercion:?} cannot be converted to str\"\n            ))\n            .into())\n        }\n        .into_pyresult()\n    }\n}\n"
  },
  {
    "path": "src/types/excelreader/mod.rs",
    "content": "#[cfg(feature = \"python\")]\nmod python;\n\nuse std::{\n    fs::File,\n    io::{BufReader, Cursor},\n};\n\nuse calamine::{\n    Data, HeaderRow, Range, Reader, Sheet as CalamineSheet, Sheets, Table, open_workbook_auto,\n    open_workbook_auto_from_rs,\n};\n#[cfg(feature = \"python\")]\nuse calamine::{DataRef, ReaderRef};\n#[cfg(feature = \"python\")]\nuse pyo3::pyclass;\n\nuse crate::{\n    ExcelSheet, ExcelTable,\n    error::{ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult},\n    types::{\n        dtype::{DTypeCoercion, DTypes},\n        excelsheet::{SelectedColumns, SkipRows},\n        idx_or_name::IdxOrName,\n    },\n};\n\nuse super::excelsheet::table::{extract_table_names, extract_table_range};\n\nenum ExcelSheets {\n    File(Sheets<BufReader<File>>),\n    Bytes(Sheets<Cursor<Vec<u8>>>),\n}\n\nimpl ExcelSheets {\n    fn worksheet_range(&mut self, name: &str) -> FastExcelResult<Range<Data>> {\n        match self {\n            Self::File(sheets) => sheets.worksheet_range(name),\n            Self::Bytes(sheets) => sheets.worksheet_range(name),\n        }\n        .map_err(|err| FastExcelErrorKind::CalamineError(err).into())\n        .with_context(|| format!(\"Error while loading sheet {name}\"))\n    }\n\n    #[allow(dead_code)]\n    fn sheet_metadata(&self) -> &[CalamineSheet] {\n        match self {\n            ExcelSheets::File(sheets) => sheets.sheets_metadata(),\n            ExcelSheets::Bytes(sheets) => sheets.sheets_metadata(),\n        }\n    }\n\n    fn table_names(&mut self, sheet_name: Option<&str>) -> FastExcelResult<Vec<&str>> {\n        let names = match self {\n            Self::File(sheets) => extract_table_names(sheets, sheet_name),\n            Self::Bytes(sheets) => extract_table_names(sheets, sheet_name),\n        }?;\n        Ok(names.into_iter().map(String::as_str).collect())\n    }\n\n    fn defined_names(&mut self) -> FastExcelResult<Vec<DefinedName>> {\n        let defined_names = match self {\n            Self::File(sheets) => sheets.defined_names(),\n            Self::Bytes(sheets) => sheets.defined_names(),\n        }\n        .to_vec()\n        .into_iter()\n        .map(|(name, formula)| DefinedName { name, formula })\n        .collect();\n        Ok(defined_names)\n    }\n\n    #[cfg(feature = \"python\")]\n    fn supports_by_ref(&self) -> bool {\n        matches!(\n            self,\n            Self::File(Sheets::Xlsx(_)) | Self::Bytes(Sheets::Xlsx(_))\n        )\n    }\n\n    fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {\n        match self {\n            Self::File(sheets) => {\n                sheets.with_header_row(header_row);\n                self\n            }\n            Self::Bytes(sheets) => {\n                sheets.with_header_row(header_row);\n                self\n            }\n        }\n    }\n\n    #[cfg(feature = \"python\")]\n    fn worksheet_range_ref(&mut self, name: &str) -> FastExcelResult<Range<DataRef<'_>>> {\n        match self {\n            ExcelSheets::File(Sheets::Xlsx(sheets)) => Ok(sheets.worksheet_range_ref(name)?),\n            ExcelSheets::Bytes(Sheets::Xlsx(sheets)) => Ok(sheets.worksheet_range_ref(name)?),\n            _ => Err(FastExcelErrorKind::Internal(\n                \"sheets do not support worksheet_range_ref\".to_string(),\n            )\n            .into()),\n        }\n        .with_context(|| format!(\"Error while loading sheet {name}\"))\n    }\n\n    fn get_table(&mut self, name: &str) -> FastExcelResult<Table<Data>> {\n        match self {\n            Self::File(sheets) => extract_table_range(name, sheets),\n            Self::Bytes(sheets) => extract_table_range(name, sheets),\n        }\n    }\n}\n\n#[derive(Debug, Clone, PartialEq, Eq)]\n#[cfg_attr(feature = \"python\", pyclass(name = \"DefinedName\", skip_from_py_object))]\npub struct DefinedName {\n    pub name: String,\n    pub formula: String,\n}\n\n/// Options for loading a sheet or table.\n#[non_exhaustive]\n#[derive(Debug)]\npub struct LoadSheetOrTableOptions {\n    /// The index of the row containing the column labels. If `None`, the provided headers are used.\n    /// Any row before the header row is skipped.\n    pub header_row: Option<usize>,\n    /// The column names to use. If `None`, the column names are inferred from the header row.\n    pub column_names: Option<Vec<String>>,\n    /// How rows should be skipped.\n    pub skip_rows: SkipRows,\n    /// The number of rows to read. If `None`, all rows are read.\n    pub n_rows: Option<usize>,\n    /// The number of rows to sample for schema inference. If `None`, all rows are sampled.\n    pub schema_sample_rows: Option<usize>,\n    /// How data types should be coerced.\n    pub dtype_coercion: DTypeCoercion,\n    /// The columns to select.\n    pub selected_columns: SelectedColumns,\n    /// Override the inferred data types.\n    pub dtypes: Option<DTypes>,\n    /// Skip rows at the end of the sheet/table containing only whitespace and null values.\n    pub skip_whitespace_tail_rows: bool,\n    /// Consider cells containing only whitespace as null values.\n    pub whitespace_as_null: bool,\n}\n\nimpl LoadSheetOrTableOptions {\n    /// Returns a `calamine::HeaderRow`, indicating the first row of the range to be read. For us,\n    /// `header_row` can be `None` (meaning there is no header and we should start reading the data\n    /// at the beginning of the sheet)\n    fn calamine_header_row(&self) -> HeaderRow {\n        match (self.header_row, &self.skip_rows) {\n            (None | Some(0), SkipRows::SkipEmptyRowsAtBeginning) => HeaderRow::FirstNonEmptyRow,\n            (None, _) => HeaderRow::Row(0),\n            (Some(row), _) => HeaderRow::Row(row as u32),\n        }\n    }\n\n    /// Returns the row number of the first data row to read, if defined\n    pub(crate) fn data_header_row(&self) -> Option<usize> {\n        self.header_row.and(Some(0))\n    }\n\n    /// Returns a new `LoadSheetOrTableOptions` instance for loading a sheet. `header_row` is set to\n    /// `Some(0)`\n    pub fn new_for_sheet() -> Self {\n        Self {\n            header_row: Some(0),\n            column_names: Default::default(),\n            skip_rows: Default::default(),\n            n_rows: Default::default(),\n            schema_sample_rows: Default::default(),\n            dtype_coercion: Default::default(),\n            selected_columns: Default::default(),\n            dtypes: Default::default(),\n            skip_whitespace_tail_rows: Default::default(),\n            whitespace_as_null: Default::default(),\n        }\n    }\n\n    /// Returns a new `LoadSheetOrTableOptions` instance for loading a sheet. `header_row` is set to\n    /// `None`\n    pub fn new_for_table() -> Self {\n        Self {\n            header_row: None,\n            column_names: Default::default(),\n            skip_rows: Default::default(),\n            n_rows: Default::default(),\n            schema_sample_rows: Default::default(),\n            dtype_coercion: Default::default(),\n            selected_columns: Default::default(),\n            dtypes: Default::default(),\n            skip_whitespace_tail_rows: Default::default(),\n            whitespace_as_null: Default::default(),\n        }\n    }\n\n    pub fn header_row(mut self, header_row: usize) -> Self {\n        self.header_row = Some(header_row);\n        self\n    }\n\n    pub fn no_header_row(mut self) -> Self {\n        self.header_row = None;\n        self\n    }\n\n    pub fn column_names<I: IntoIterator<Item = impl Into<String>>>(\n        mut self,\n        column_names: I,\n    ) -> Self {\n        self.column_names = Some(column_names.into_iter().map(Into::into).collect());\n        self\n    }\n\n    pub fn skip_rows(mut self, skip_rows: SkipRows) -> Self {\n        self.skip_rows = skip_rows;\n        self\n    }\n\n    pub fn n_rows(mut self, n_rows: usize) -> Self {\n        self.n_rows = Some(n_rows);\n        self\n    }\n\n    pub fn schema_sample_rows(mut self, schema_sample_rows: usize) -> Self {\n        self.schema_sample_rows = Some(schema_sample_rows);\n        self\n    }\n\n    pub fn dtype_coercion(mut self, dtype_coercion: DTypeCoercion) -> Self {\n        self.dtype_coercion = dtype_coercion;\n        self\n    }\n\n    pub fn selected_columns(mut self, selected_columns: SelectedColumns) -> Self {\n        self.selected_columns = selected_columns;\n        self\n    }\n\n    pub fn with_dtypes(mut self, dtypes: DTypes) -> Self {\n        self.dtypes = Some(dtypes);\n        self\n    }\n\n    pub fn skip_whitespace_tail_rows(mut self, skip_whitespace_tail_rows: bool) -> Self {\n        self.skip_whitespace_tail_rows = skip_whitespace_tail_rows;\n        self\n    }\n\n    pub fn whitespace_as_null(mut self, whitespace_as_null: bool) -> Self {\n        self.whitespace_as_null = whitespace_as_null;\n        self\n    }\n}\n\n/// Represents an open Excel file and allows to access its sheets and tables.\n#[cfg_attr(feature = \"python\", pyclass(name = \"_ExcelReader\"))]\npub struct ExcelReader {\n    sheets: ExcelSheets,\n    sheet_metadata: Vec<CalamineSheet>,\n    #[cfg(feature = \"python\")]\n    source: String,\n}\n\nimpl ExcelReader {\n    // NOTE: Not implementing TryFrom here, because we're aren't building the file from the passed\n    // string, but rather from the file pointed by it. Semantically, try_from_path is clearer\n    pub(crate) fn try_from_path(path: &str) -> FastExcelResult<Self> {\n        let sheets = open_workbook_auto(path)\n            .map_err(|err| FastExcelErrorKind::CalamineError(err).into())\n            .with_context(|| format!(\"Could not open workbook at {path}\"))?;\n        let sheet_metadata = sheets.sheets_metadata().to_owned();\n        Ok(Self {\n            sheets: ExcelSheets::File(sheets),\n            sheet_metadata,\n            #[cfg(feature = \"python\")]\n            source: path.to_owned(),\n        })\n    }\n\n    fn find_sheet_meta(&self, idx_or_name: IdxOrName) -> FastExcelResult<&CalamineSheet> {\n        match idx_or_name {\n            IdxOrName::Name(name) => {\n                if let Some(sheet) = self.sheet_metadata.iter().find(|s| s.name == name) {\n                    Ok(sheet)\n                } else {\n                    Err(FastExcelErrorKind::SheetNotFound(IdxOrName::Name(name.clone())).into()).with_context(||  {\n                        let available_sheets = self.sheet_metadata.iter().map(|s| format!(\"\\\"{}\\\"\", s.name)).collect::<Vec<_>>().join(\", \");\n                        format!(\n                            \"Sheet \\\"{name}\\\" not found in file. Available sheets: {available_sheets}.\"\n                        )\n                    })\n                }\n            }\n            IdxOrName::Idx(idx) => self\n                .sheet_metadata\n                .get(idx)\n                .ok_or_else(|| FastExcelErrorKind::SheetNotFound(IdxOrName::Idx(idx)).into())\n                .with_context(|| {\n                    format!(\n                        \"Sheet index {idx} is out of range. File has {} sheets.\",\n                        self.sheet_metadata.len()\n                    )\n                }),\n        }\n    }\n\n    /// Load a sheet from the Excel file.\n    pub fn load_sheet(\n        &mut self,\n        idx_or_name: IdxOrName,\n        opts: LoadSheetOrTableOptions,\n    ) -> FastExcelResult<ExcelSheet> {\n        let calamine_header_row = opts.calamine_header_row();\n\n        let sheet_meta = self.find_sheet_meta(idx_or_name)?.to_owned();\n\n        let range = self\n            .sheets\n            .with_header_row(calamine_header_row)\n            .worksheet_range(&sheet_meta.name)?;\n\n        ExcelSheet::try_new(sheet_meta, range.into(), opts)\n    }\n\n    /// Load a table from the Excel file.\n    pub fn load_table(\n        &mut self,\n        name: &str,\n        opts: LoadSheetOrTableOptions,\n    ) -> FastExcelResult<ExcelTable> {\n        let table = self.sheets.get_table(name)?;\n        ExcelTable::try_new(table, opts)\n    }\n\n    pub fn sheet_names(&self) -> Vec<&str> {\n        self.sheet_metadata\n            .iter()\n            .map(|s| s.name.as_str())\n            .collect()\n    }\n\n    pub fn table_names(&mut self, sheet_name: Option<&str>) -> FastExcelResult<Vec<&str>> {\n        self.sheets.table_names(sheet_name)\n    }\n\n    pub fn defined_names(&mut self) -> FastExcelResult<Vec<DefinedName>> {\n        self.sheets.defined_names()\n    }\n}\n\nimpl TryFrom<&[u8]> for ExcelReader {\n    type Error = FastExcelError;\n\n    fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {\n        let cursor = Cursor::new(bytes.to_vec());\n        let sheets = open_workbook_auto_from_rs(cursor)\n            .map_err(|err| FastExcelErrorKind::CalamineError(err).into())\n            .with_context(|| \"Could not open workbook from bytes\")?;\n        let sheet_metadata = sheets.sheets_metadata().to_owned();\n        Ok(Self {\n            sheets: ExcelSheets::Bytes(sheets),\n            sheet_metadata,\n            #[cfg(feature = \"python\")]\n            source: \"bytes\".to_owned(),\n        })\n    }\n}\n"
  },
  {
    "path": "src/types/excelreader/python.rs",
    "content": "use arrow_array::RecordBatch;\nuse pyo3::{Bound, IntoPyObjectExt, PyAny, PyResult, Python, pymethods, types::PyString};\n\nuse super::{DefinedName, ExcelReader};\n\nuse crate::{\n    ExcelSheet,\n    data::{ExcelSheetData, record_batch_from_data_and_columns},\n    error::{ErrorContext, FastExcelErrorKind, FastExcelResult, py_errors::IntoPyResult},\n    types::{\n        dtype::{DTypeCoercion, DTypes},\n        excelreader::LoadSheetOrTableOptions,\n        excelsheet::{\n            Header, Pagination, SelectedColumns, SkipRows,\n            column_info::{build_available_columns_info, finalize_column_info},\n        },\n        idx_or_name::IdxOrName,\n    },\n    utils::schema::get_schema_sample_rows,\n};\n\nimpl ExcelReader {\n    fn build_selected_columns(\n        use_columns: Option<&Bound<'_, PyAny>>,\n    ) -> FastExcelResult<SelectedColumns> {\n        use_columns.try_into().with_context(|| format!(\"expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got {use_columns:?}\"))\n    }\n\n    fn load_sheet_eager(\n        data: &ExcelSheetData,\n        opts: LoadSheetOrTableOptions,\n    ) -> FastExcelResult<RecordBatch> {\n        let data_header_row = opts.data_header_row();\n        let pagination = match &data {\n            ExcelSheetData::Owned(range) => {\n                Pagination::try_new(opts.skip_rows, opts.n_rows, range)?\n            }\n            ExcelSheetData::Ref(range) => Pagination::try_new(opts.skip_rows, opts.n_rows, range)?,\n        };\n\n        let header = Header::new(data_header_row, opts.column_names);\n\n        let offset = header.offset() + pagination.offset();\n        let limit = {\n            let upper_bound = data.height();\n            if let Some(n_rows) = pagination.n_rows() {\n                // minimum value between (offset+n_rows) and the data's height\n                std::cmp::min(offset + n_rows, upper_bound)\n            } else {\n                upper_bound\n            }\n        };\n\n        let sample_rows_limit = get_schema_sample_rows(opts.schema_sample_rows, offset, limit);\n        let available_columns_info =\n            build_available_columns_info(data, &opts.selected_columns, &header)?;\n        let final_columns_info = opts\n            .selected_columns\n            .select_columns(available_columns_info)?;\n\n        let available_columns = finalize_column_info(\n            final_columns_info,\n            data,\n            offset,\n            sample_rows_limit,\n            opts.dtypes.as_ref(),\n            &opts.dtype_coercion,\n            opts.whitespace_as_null,\n        )?;\n\n        match data {\n            ExcelSheetData::Owned(data) => record_batch_from_data_and_columns(\n                &available_columns,\n                data,\n                offset,\n                limit,\n                opts.whitespace_as_null,\n            ),\n            ExcelSheetData::Ref(data) => record_batch_from_data_and_columns(\n                &available_columns,\n                data,\n                offset,\n                limit,\n                opts.whitespace_as_null,\n            ),\n        }\n    }\n\n    fn build_sheet<'py>(\n        &mut self,\n        idx_or_name: IdxOrName,\n        opts: LoadSheetOrTableOptions,\n        eager: bool,\n        py: Python<'py>,\n    ) -> PyResult<Bound<'py, PyAny>> {\n        let calamine_header_row = opts.calamine_header_row();\n\n        let sheet_meta = self\n            .find_sheet_meta(idx_or_name)\n            .into_pyresult()?\n            .to_owned();\n\n        if eager && self.sheets.supports_by_ref() {\n            let range = py\n                .detach(|| {\n                    self.sheets\n                        .with_header_row(calamine_header_row)\n                        .worksheet_range_ref(&sheet_meta.name)\n                })\n                .into_pyresult()?;\n            let rb = py\n                .detach(|| Self::load_sheet_eager(&range.into(), opts))\n                .into_pyresult()?;\n\n            #[cfg(feature = \"pyarrow\")]\n            {\n                use arrow_pyarrow::ToPyArrow;\n                rb.to_pyarrow(py)\n            }\n            #[cfg(not(feature = \"pyarrow\"))]\n            {\n                Err(pyo3::exceptions::PyRuntimeError::new_err(\n                    \"Eager loading requires pyarrow feature. Use eager=False for PyCapsule interface.\",\n                ))\n            }\n        } else {\n            let range = py\n                .detach(|| {\n                    self.sheets\n                        .with_header_row(calamine_header_row)\n                        .worksheet_range(&sheet_meta.name)\n                })\n                .into_pyresult()?;\n            let sheet = ExcelSheet::try_new(sheet_meta, range.into(), opts).into_pyresult()?;\n\n            if eager {\n                #[cfg(feature = \"pyarrow\")]\n                {\n                    sheet.to_arrow(py)\n                }\n                #[cfg(not(feature = \"pyarrow\"))]\n                {\n                    Err(pyo3::exceptions::PyRuntimeError::new_err(\n                        \"Eager loading requires pyarrow feature. Use eager=False for PyCapsule interface.\",\n                    ))\n                }\n            } else {\n                sheet.into_bound_py_any(py)\n            }\n        }\n    }\n\n    #[allow(clippy::too_many_arguments)]\n    fn build_table<'py>(\n        &mut self,\n        name: &str,\n        opts: LoadSheetOrTableOptions,\n        eager: bool,\n        py: Python<'py>,\n    ) -> PyResult<Bound<'py, PyAny>> {\n        let excel_table = py.detach(|| self.load_table(name, opts)).into_pyresult()?;\n\n        if eager {\n            #[cfg(feature = \"pyarrow\")]\n            {\n                Ok(excel_table.to_arrow(py)?)\n            }\n            #[cfg(not(feature = \"pyarrow\"))]\n            {\n                Err(pyo3::exceptions::PyRuntimeError::new_err(\n                    \"Eager loading requires pyarrow feature. Use eager=False for PyCapsule interface.\",\n                ))\n            }\n        } else {\n            excel_table.into_bound_py_any(py)\n        }\n    }\n}\n\n#[pymethods]\nimpl ExcelReader {\n    pub fn __repr__(&self) -> String {\n        format!(\"ExcelReader<{}>\", &self.source)\n    }\n\n    #[pyo3(name = \"table_names\", signature = (sheet_name = None))]\n    pub(crate) fn py_table_names(&mut self, sheet_name: Option<&str>) -> PyResult<Vec<&str>> {\n        self.sheets.table_names(sheet_name).into_pyresult()\n    }\n\n    #[pyo3(name = \"defined_names\")]\n    pub(crate) fn py_defined_names(&mut self) -> PyResult<Vec<DefinedName>> {\n        self.defined_names().into_pyresult()\n    }\n\n    #[pyo3(name = \"load_sheet\", signature = (\n        idx_or_name,\n        *,\n        header_row = 0,\n        column_names = None,\n        skip_rows = SkipRows::SkipEmptyRowsAtBeginning,\n        n_rows = None,\n        schema_sample_rows = 1_000,\n        dtype_coercion = DTypeCoercion::Coerce,\n        use_columns = None,\n        dtypes = None,\n        eager = false,\n        skip_whitespace_tail_rows = false,\n        whitespace_as_null = false,\n    ))]\n    #[allow(clippy::too_many_arguments)]\n    pub(crate) fn py_load_sheet<'py>(\n        &mut self,\n        idx_or_name: &Bound<'py, PyAny>,\n        header_row: Option<usize>,\n        column_names: Option<Vec<String>>,\n        skip_rows: SkipRows,\n        n_rows: Option<usize>,\n        schema_sample_rows: Option<usize>,\n        dtype_coercion: DTypeCoercion,\n        use_columns: Option<&Bound<'py, PyAny>>,\n        dtypes: Option<DTypes>,\n        eager: bool,\n        skip_whitespace_tail_rows: bool,\n        whitespace_as_null: bool,\n        py: Python<'py>,\n    ) -> PyResult<Bound<'py, PyAny>> {\n        // Cannot use NonZeroUsize in the parameters, as it is not supported by pyo3\n        if let Some(0) = schema_sample_rows {\n            return Err(FastExcelErrorKind::InvalidParameters(\n                \"schema_sample_rows cannot be 0, as it would prevent dtype inferring\".to_string(),\n            )\n            .into())\n            .into_pyresult();\n        }\n        let idx_or_name = idx_or_name.try_into().into_pyresult()?;\n        let selected_columns = Self::build_selected_columns(use_columns).into_pyresult()?;\n        let opts = LoadSheetOrTableOptions {\n            header_row,\n            column_names,\n            skip_rows,\n            n_rows,\n            schema_sample_rows,\n            dtype_coercion,\n            selected_columns,\n            dtypes,\n            skip_whitespace_tail_rows,\n            whitespace_as_null,\n        };\n\n        self.build_sheet(idx_or_name, opts, eager, py)\n    }\n\n    #[pyo3(name = \"load_table\", signature = (\n        name,\n        *,\n        header_row = 0,\n        column_names = None,\n        skip_rows = SkipRows::SkipEmptyRowsAtBeginning,\n        n_rows = None,\n        schema_sample_rows = 1_000,\n        dtype_coercion = DTypeCoercion::Coerce,\n        use_columns = None,\n        dtypes = None,\n        eager = false,\n        skip_whitespace_tail_rows = false,\n        whitespace_as_null = false,\n    ))]\n    #[allow(clippy::too_many_arguments)]\n    pub(crate) fn py_load_table<'py>(\n        &mut self,\n        name: &Bound<'py, PyString>,\n        header_row: Option<usize>,\n        column_names: Option<Vec<String>>,\n        skip_rows: SkipRows,\n        n_rows: Option<usize>,\n        schema_sample_rows: Option<usize>,\n        dtype_coercion: DTypeCoercion,\n        use_columns: Option<&Bound<'py, PyAny>>,\n        dtypes: Option<DTypes>,\n        eager: bool,\n        skip_whitespace_tail_rows: bool,\n        whitespace_as_null: bool,\n        py: Python<'py>,\n    ) -> PyResult<Bound<'py, PyAny>> {\n        // Cannot use NonZeroUsize in the parameters, as it is not supported by pyo3\n        if let Some(0) = schema_sample_rows {\n            return Err(FastExcelErrorKind::InvalidParameters(\n                \"schema_sample_rows cannot be 0, as it would prevent dtype inferring\".to_string(),\n            )\n            .into())\n            .into_pyresult();\n        }\n\n        let selected_columns = Self::build_selected_columns(use_columns).into_pyresult()?;\n        let opts = LoadSheetOrTableOptions {\n            header_row,\n            column_names,\n            skip_rows,\n            n_rows,\n            schema_sample_rows,\n            dtype_coercion,\n            selected_columns,\n            dtypes,\n            skip_whitespace_tail_rows,\n            whitespace_as_null,\n        };\n\n        self.build_table(&name.to_string(), opts, eager, py)\n    }\n\n    #[getter(\"sheet_names\")]\n    pub(crate) fn py_sheet_names(&self) -> Vec<&str> {\n        self.sheet_names()\n    }\n}\n\n#[pymethods]\nimpl DefinedName {\n    /// Creates a new `DefinedName` object.\n    #[new]\n    pub fn py_new(name: String, formula: String) -> Self {\n        DefinedName { name, formula }\n    }\n\n    #[getter(\"name\")]\n    pub fn py_name(&self) -> &str {\n        &self.name\n    }\n\n    #[getter(\"formula\")]\n    pub fn py_formula(&self) -> &str {\n        &self.formula\n    }\n\n    pub fn __repr__(&self) -> String {\n        format!(\n            \"DefinedName<{name} ({formula})>\",\n            name = &self.name,\n            formula = self\n                .formula\n                .get(..10)\n                .map(|s| format!(\"{}...\", s))\n                .as_deref()\n                .unwrap_or(self.formula.as_str())\n        )\n    }\n\n    pub fn __eq__(&self, other: &Self) -> bool {\n        self == other\n    }\n}\n"
  },
  {
    "path": "src/types/excelsheet/column_info/mod.rs",
    "content": "#[cfg(feature = \"python\")]\nmod python;\n\nuse std::{fmt::Display, str::FromStr};\n\nuse calamine::DataType;\n#[cfg(feature = \"python\")]\nuse pyo3::pyclass;\n\nuse crate::{\n    data::ExcelSheetData,\n    error::{ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult},\n    types::{\n        dtype::{DType, DTypeCoercion, DTypes, get_dtype_for_column},\n        idx_or_name::IdxOrName,\n    },\n};\n\nuse super::{Header, SelectedColumns};\n\n/// How the column name was determined\n#[derive(Debug, Clone, PartialEq)]\npub enum ColumnNameFrom {\n    /// The column name was provided by the user.\n    Provided,\n    /// The column name was looked up in the sheet or table.\n    LookedUp,\n    /// The column name was generated based on the column index.\n    Generated,\n}\n\nimpl FromStr for ColumnNameFrom {\n    type Err = FastExcelError;\n\n    fn from_str(s: &str) -> FastExcelResult<Self> {\n        match s {\n            \"provided\" => Ok(Self::Provided),\n            \"looked_up\" => Ok(Self::LookedUp),\n            \"generated\" => Ok(Self::Generated),\n            _ => Err(\n                FastExcelErrorKind::InvalidParameters(format!(\"invalid ColumnNameFrom: {s}\"))\n                    .into(),\n            ),\n        }\n    }\n}\n\nimpl Display for ColumnNameFrom {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(match self {\n            ColumnNameFrom::Provided => \"provided\",\n            ColumnNameFrom::LookedUp => \"looked_up\",\n            ColumnNameFrom::Generated => \"generated\",\n        })\n    }\n}\n\n/// How the data type was determined.\n#[derive(Debug, Clone, PartialEq)]\npub enum DTypeFrom {\n    /// The data type was provided for all columns.\n    ProvidedForAll,\n    /// The data type was provided via the column's index.\n    ProvidedByIndex,\n    /// The data type was provided via the column's name.\n    ProvidedByName,\n    /// The data type was guessed based on the column's data.\n    Guessed,\n}\n\nimpl Display for DTypeFrom {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.write_str(match self {\n            DTypeFrom::ProvidedForAll => \"provided_for_all\",\n            DTypeFrom::ProvidedByIndex => \"provided_by_index\",\n            DTypeFrom::ProvidedByName => \"provided_by_name\",\n            DTypeFrom::Guessed => \"guessed\",\n        })\n    }\n}\n\nimpl FromStr for DTypeFrom {\n    type Err = FastExcelError;\n\n    fn from_str(s: &str) -> FastExcelResult<Self> {\n        match s {\n            \"provided_for_all\" => Ok(Self::ProvidedForAll),\n            \"provided_by_index\" => Ok(Self::ProvidedByIndex),\n            \"provided_by_name\" => Ok(Self::ProvidedByName),\n            \"guessed\" => Ok(Self::Guessed),\n            _ => Err(\n                FastExcelErrorKind::InvalidParameters(format!(\"invalid DTypesFrom: {s}\")).into(),\n            ),\n        }\n    }\n}\n\n// NOTE: The types for properties unfortunately do not appear in the docs for this class, so we had\n// to specify them via docstrings\n/// Metadata about a single column in a sheet.\n#[derive(Debug, Clone, PartialEq)]\n#[cfg_attr(feature = \"python\", pyclass(name = \"ColumnInfo\", skip_from_py_object))]\npub struct ColumnInfo {\n    /// The column's name\n    pub name: String,\n    /// The column's index\n    pub index: usize,\n    /// The column's absolute index\n    pub absolute_index: usize,\n    /// The column's data type\n    pub dtype: DType,\n    /// How the column name was determined\n    pub column_name_from: ColumnNameFrom,\n    /// How the column data type was determined\n    pub dtype_from: DTypeFrom,\n}\n\nimpl ColumnInfo {\n    pub(crate) fn new(\n        name: String,\n        index: usize,\n        absolute_index: usize,\n        column_name_from: ColumnNameFrom,\n        dtype: DType,\n        dtype_from: DTypeFrom,\n    ) -> Self {\n        Self {\n            name,\n            index,\n            absolute_index,\n            dtype,\n            column_name_from,\n            dtype_from,\n        }\n    }\n}\n\n/// This class provides information about a single column in a sheet, without associated type\n/// information\n#[derive(Debug, Clone, PartialEq)]\n#[cfg_attr(\n    feature = \"python\",\n    pyclass(name = \"ColumnInfoNoDtype\", skip_from_py_object)\n)]\npub(crate) struct ColumnInfoNoDtype {\n    name: String,\n    index: usize,\n    absolute_index: usize,\n    column_name_from: ColumnNameFrom,\n}\n\n// Allows us to easily compare ourselves to a column index or name\nimpl PartialEq<IdxOrName> for ColumnInfoNoDtype {\n    fn eq(&self, other: &IdxOrName) -> bool {\n        match other {\n            IdxOrName::Idx(index) => index == &self.index,\n            IdxOrName::Name(name) => name == &self.name,\n        }\n    }\n}\n\nimpl ColumnInfoNoDtype {\n    pub(super) fn new(\n        name: String,\n        index: usize,\n        absolute_index: usize,\n        column_name_from: ColumnNameFrom,\n    ) -> Self {\n        Self {\n            name,\n            index,\n            absolute_index,\n            column_name_from,\n        }\n    }\n\n    pub(super) fn with_name(mut self, name: String) -> Self {\n        self.name = name;\n        self\n    }\n\n    pub(super) fn name(&self) -> &str {\n        &self.name\n    }\n\n    pub(super) fn absolute_index(&self) -> usize {\n        self.absolute_index\n    }\n\n    fn dtype_info<D: CalamineDataProvider>(\n        &self,\n        data: &D,\n        start_row: usize,\n        end_row: usize,\n        specified_dtypes: Option<&DTypes>,\n        dtype_coercion: &DTypeCoercion,\n        whitespace_as_null: bool,\n    ) -> FastExcelResult<(DType, DTypeFrom)> {\n        specified_dtypes\n            .and_then(|dtypes| {\n                match dtypes {\n                    DTypes::All(dtype) => Some((*dtype, DTypeFrom::ProvidedForAll)),\n                    DTypes::Map(dtypes) => {\n                        // if we have dtypes, look the dtype up by index, and fall back on a lookup by name\n                        // (done in this order because copying an usize is cheaper than cloning a string)\n                        if let Some(dtype) = dtypes.get(&self.absolute_index().into()) {\n                            Some((*dtype, DTypeFrom::ProvidedByIndex))\n                        } else {\n                            dtypes\n                                .get(&self.name.clone().into())\n                                .map(|dtype| (*dtype, DTypeFrom::ProvidedByName))\n                        }\n                    }\n                }\n            })\n            .map(FastExcelResult::Ok)\n            // If we could not look up a dtype, guess it from the data\n            .unwrap_or_else(|| {\n                data.dtype_for_column(\n                    start_row,\n                    end_row,\n                    self.index,\n                    dtype_coercion,\n                    whitespace_as_null,\n                )\n                .map(|dtype| (dtype, DTypeFrom::Guessed))\n            })\n    }\n\n    pub(super) fn finish<D: CalamineDataProvider>(\n        self,\n        data: &D,\n        start_row: usize,\n        end_row: usize,\n        specified_dtypes: Option<&DTypes>,\n        dtype_coercion: &DTypeCoercion,\n        whitespace_as_null: bool,\n    ) -> FastExcelResult<ColumnInfo> {\n        let (dtype, dtype_from) = self\n            .dtype_info(\n                data,\n                start_row,\n                end_row,\n                specified_dtypes,\n                dtype_coercion,\n                whitespace_as_null,\n            )\n            .with_context(|| format!(\"could not determine dtype for column {}\", self.name))?;\n        Ok(ColumnInfo::new(\n            self.name,\n            self.index,\n            self.absolute_index,\n            self.column_name_from,\n            dtype,\n            dtype_from,\n        ))\n    }\n}\n\npub(crate) trait CalamineDataProvider {\n    fn width(&self) -> usize;\n    fn get_as_string(&self, pos: (usize, usize)) -> Option<String>;\n    fn dtype_for_column(\n        &self,\n        start_row: usize,\n        end_row: usize,\n        col: usize,\n        dtype_coercion: &DTypeCoercion,\n        whitespace_as_null: bool,\n    ) -> FastExcelResult<DType>;\n    fn start(&self) -> Option<(usize, usize)>;\n}\n\nimpl CalamineDataProvider for ExcelSheetData<'_> {\n    fn width(&self) -> usize {\n        self.width()\n    }\n\n    fn get_as_string(&self, pos: (usize, usize)) -> Option<String> {\n        self.get_as_string(pos)\n    }\n\n    fn dtype_for_column(\n        &self,\n        start_row: usize,\n        end_row: usize,\n        col: usize,\n        dtype_coercion: &DTypeCoercion,\n        whitespace_as_null: bool,\n    ) -> FastExcelResult<DType> {\n        self.dtype_for_column(start_row, end_row, col, dtype_coercion, whitespace_as_null)\n    }\n\n    fn start(&self) -> Option<(usize, usize)> {\n        self.start()\n    }\n}\n\nimpl CalamineDataProvider for calamine::Range<calamine::Data> {\n    fn width(&self) -> usize {\n        self.width()\n    }\n\n    fn get_as_string(&self, pos: (usize, usize)) -> Option<String> {\n        self.get(pos).and_then(|data| data.as_string())\n    }\n\n    fn dtype_for_column(\n        &self,\n        start_row: usize,\n        end_row: usize,\n        col: usize,\n        dtype_coercion: &DTypeCoercion,\n        whitespace_as_null: bool,\n    ) -> FastExcelResult<DType> {\n        get_dtype_for_column(\n            self,\n            start_row,\n            end_row,\n            col,\n            dtype_coercion,\n            whitespace_as_null,\n        )\n    }\n    fn start(&self) -> Option<(usize, usize)> {\n        self.start().map(|(r, c)| (r as usize, c as usize))\n    }\n}\n\nfn column_info_from_header<D: CalamineDataProvider>(\n    data: &D,\n    selected_columns: &SelectedColumns,\n    header: &Header,\n) -> FastExcelResult<Vec<ColumnInfoNoDtype>> {\n    let width = data.width();\n    let (_, col_off) = data.start().unwrap_or((0, 0));\n    match header {\n        Header::None => Ok((0..width)\n            .map(|col_idx| {\n                ColumnInfoNoDtype::new(\n                    format!(\"__UNNAMED__{col_idx}\"),\n                    col_idx,\n                    col_off + col_idx,\n                    ColumnNameFrom::Generated,\n                )\n            })\n            .collect()),\n        Header::At(row_idx) => Ok((0..width)\n            .map(|col_idx| {\n                data.get_as_string((*row_idx, col_idx))\n                    .map(|col_name| {\n                        // Remove null bytes from column names to avoid CString panics in Arrow FFI.\n                        //\n                        // Excel strings (especially UTF-16 in .xls) may contain embedded nulls (`\\0`) after\n                        // conversion to Rust `String`. Arrow’s C FFI uses `CString::new()`, which fails on\n                        // null bytes, causing panics.\n                        //\n                        // This strips nulls while keeping the readable content.\n                        let sanitized_col_name = col_name.replace('\\0', \"\");\n                        ColumnInfoNoDtype::new(\n                            sanitized_col_name,\n                            col_idx,\n                            col_off + col_idx,\n                            ColumnNameFrom::LookedUp,\n                        )\n                    })\n                    .unwrap_or_else(|| {\n                        ColumnInfoNoDtype::new(\n                            format!(\"__UNNAMED__{col_idx}\"),\n                            col_idx,\n                            col_off + col_idx,\n                            ColumnNameFrom::Generated,\n                        )\n                    })\n            })\n            .collect()),\n        Header::With(names) => {\n            if let SelectedColumns::Selection(column_selection) = selected_columns {\n                if column_selection.len() != names.len() {\n                    return Err(FastExcelErrorKind::InvalidParameters(\n                        \"column_names and use_columns must have the same length when a header is provided\".to_string(),\n                    )\n                    .into());\n                }\n                let selected_indices = column_selection\n                        .iter()\n                        .map(|idx_or_name| {\n                            match idx_or_name {\n                        IdxOrName::Idx(idx) => Ok(*idx),\n                        IdxOrName::Name(name) => Err(FastExcelErrorKind::InvalidParameters(\n                            format!(\"use_columns can only contain integers when used with columns_names, got \\\"{name}\\\"\")\n                        )\n                        .into()),\n                    }\n                        })\n                        .collect::<FastExcelResult<Vec<_>>>()?;\n\n                Ok((0..width)\n                    .map(|col_idx| {\n                        let absolute_col_idx = col_idx + col_off;\n                        let provided_name_opt = if let Some(pos_in_names) = selected_indices\n                            .iter()\n                            .position(|idx| *idx == absolute_col_idx)\n                        {\n                            names.get(pos_in_names).cloned()\n                        } else {\n                            None\n                        };\n\n                        match provided_name_opt {\n                            Some(provided_name) => ColumnInfoNoDtype::new(\n                                provided_name,\n                                col_idx,\n                                col_off + col_idx,\n                                ColumnNameFrom::Provided,\n                            ),\n                            None => ColumnInfoNoDtype::new(\n                                format!(\"__UNNAMED__{col_idx}\"),\n                                col_idx,\n                                col_off + col_idx,\n                                ColumnNameFrom::Generated,\n                            ),\n                        }\n                    })\n                    .collect())\n            } else {\n                let nameless_start_idx = names.len();\n                Ok(names\n                    .iter()\n                    .enumerate()\n                    .map(|(col_idx, name)| {\n                        ColumnInfoNoDtype::new(\n                            name.to_owned(),\n                            col_idx,\n                            col_off + col_idx,\n                            ColumnNameFrom::Provided,\n                        )\n                    })\n                    .chain((nameless_start_idx..width).map(|col_idx| {\n                        ColumnInfoNoDtype::new(\n                            format!(\"__UNNAMED__{col_idx}\"),\n                            col_idx,\n                            col_off + col_idx,\n                            ColumnNameFrom::Generated,\n                        )\n                    }))\n                    .collect())\n            }\n        }\n    }\n}\n\n/// Loads available columns and sets aliases in case of name conflicts\npub(crate) fn build_available_columns_info<D: CalamineDataProvider>(\n    data: &D,\n    selected_columns: &SelectedColumns,\n    header: &Header,\n) -> FastExcelResult<Vec<ColumnInfoNoDtype>> {\n    column_info_from_header(data, selected_columns, header).map(set_aliases_for_columns_info)\n}\n\nfn set_aliases_for_columns_info(columns_info: Vec<ColumnInfoNoDtype>) -> Vec<ColumnInfoNoDtype> {\n    let mut aliased_column_names = Vec::with_capacity(columns_info.len());\n    columns_info\n        .into_iter()\n        .map(|mut column_info_builder| {\n            // Setting the right alias for every column\n            let alias = alias_for_name(column_info_builder.name(), &aliased_column_names);\n            if alias != column_info_builder.name() {\n                column_info_builder = column_info_builder.with_name(alias.clone());\n            }\n            aliased_column_names.push(alias);\n            column_info_builder\n        })\n        .collect()\n}\n\nfn alias_for_name(name: &str, existing_names: &[String]) -> String {\n    #[inline]\n    fn rec(name: &str, existing_names: &[String], depth: usize) -> String {\n        let alias = if depth == 0 {\n            name.to_owned()\n        } else {\n            format!(\"{name}_{depth}\")\n        };\n        match existing_names\n            .iter()\n            .any(|existing_name| existing_name == &alias)\n        {\n            true => rec(name, existing_names, depth + 1),\n            false => alias,\n        }\n    }\n\n    rec(name, existing_names, 0)\n}\n\n/// Turns `ColumnInfoNoDtype` into `ColumnInfo`. This will determine the right dtype when needed\npub(crate) fn finalize_column_info<D: CalamineDataProvider>(\n    available_columns_info: Vec<ColumnInfoNoDtype>,\n    data: &D,\n    start_row: usize,\n    end_row: usize,\n    specified_dtypes: Option<&DTypes>,\n    dtype_coercion: &DTypeCoercion,\n    whitespace_as_null: bool,\n) -> FastExcelResult<Vec<ColumnInfo>> {\n    available_columns_info\n        .into_iter()\n        .map(|column_info_builder| {\n            column_info_builder.finish(\n                data,\n                start_row,\n                end_row,\n                specified_dtypes,\n                dtype_coercion,\n                whitespace_as_null,\n            )\n        })\n        .collect()\n}\n\n#[derive(Debug)]\npub(crate) enum AvailableColumns {\n    Pending,\n    Loaded(Vec<ColumnInfo>),\n}\n\nimpl AvailableColumns {\n    pub(crate) fn as_loaded(&self) -> FastExcelResult<&[ColumnInfo]> {\n        match self {\n            AvailableColumns::Loaded(column_infos) => Ok(column_infos),\n            AvailableColumns::Pending => Err(FastExcelErrorKind::Internal(format!(\n                \"Expected available columns to be loaded, got {self:?}. \\\n                    This is a bug, please report it to the fastexcel repository\"\n            ))\n            .into()),\n        }\n    }\n}\n"
  },
  {
    "path": "src/types/excelsheet/column_info/python.rs",
    "content": "use arrow_schema::Field;\nuse pyo3::{PyResult, pymethods};\n\nuse crate::{\n    error::py_errors::IntoPyResult,\n    types::excelsheet::column_info::{ColumnInfo, ColumnInfoNoDtype},\n};\n\nimpl From<&ColumnInfo> for Field {\n    fn from(col_info: &ColumnInfo) -> Self {\n        Field::new(&col_info.name, (&col_info.dtype).into(), true)\n    }\n}\n\n#[pymethods]\nimpl ColumnInfo {\n    /// Creates a new ColumnInfo object.\n    ///\n    /// - `name`: `str`. The name of the column\n    /// - `index`: `int`. The index of the column. Must be >=0\n    /// - `absolute_index`: `int`. The absolute index of the column. Must be >=0\n    /// - `column_name_from`: `fastexcel.ColumnNameFrom`. The origin of the column name\n    /// - `dtype`: `fastexcel.DType`. The dtype of the column\n    /// - `dtype_from`: `fastexcel.DTypeFrom`. The origin of the dtype for the column\n    #[new]\n    pub(crate) fn py_new(\n        name: String,\n        index: usize,\n        absolute_index: usize,\n        column_name_from: &str,\n        dtype: &str,\n        dtype_from: &str,\n    ) -> PyResult<Self> {\n        Ok(Self::new(\n            name,\n            index,\n            absolute_index,\n            column_name_from.parse().into_pyresult()?,\n            dtype.parse().into_pyresult()?,\n            dtype_from.parse().into_pyresult()?,\n        ))\n    }\n\n    /// `fastexcel.DType`. The dtype of the column\n    #[getter(dtype)]\n    fn get_dtype(&self) -> String {\n        self.dtype.to_string()\n    }\n\n    #[getter(\"name\")]\n    /// `str`. The name of the column\n    pub fn py_name(&self) -> &str {\n        &self.name\n    }\n\n    #[getter(\"index\")]\n    /// `int`. The index of the column\n    pub fn py_index(&self) -> usize {\n        self.index\n    }\n\n    #[getter(\"absolute_index\")]\n    /// `int`. The absolute index of the column\n    pub fn py_absolute_index(&self) -> usize {\n        self.absolute_index\n    }\n\n    /// `fastexcel.ColumnNameFrom`. How the name of the column was determined.\n    ///\n    /// One of three possible values:\n    /// - `\"provided\"`: The column name was provided via the `use_columns` parameter\n    /// - `\"looked_up\"`: The column name was looked up from the data found in the sheet\n    /// - `\"generated\"`: The column name was generated from the column index, either because\n    ///                  `header_row` was `None`, or because it could not be looked up\n    #[getter(column_name_from)]\n    fn get_colum_name_from(&self) -> String {\n        self.column_name_from.to_string()\n    }\n\n    /// `fastexcel.DTypeFrom`. How the dtype of the column was determined.\n    ///\n    /// One of three possible values:\n    /// - `\"provided_by_index\"`: The dtype was specified via the column index\n    /// - `\"provided_by_name\"`: The dtype was specified via the column name\n    /// - `\"guessed\"`: The dtype was determined from the content of the column\n    #[getter(dtype_from)]\n    fn get_dtype_from(&self) -> String {\n        self.dtype_from.to_string()\n    }\n\n    pub fn __repr__(&self) -> String {\n        format!(\n            \"ColumnInfo(name=\\\"{name}\\\", index={index}, absolute_index={absolute_index}, dtype=\\\"{dtype}\\\", dtype_from=\\\"{dtype_from}\\\", column_name_from=\\\"{column_name_from}\\\" )\",\n            name = self.name,\n            index = self.index,\n            absolute_index = self.absolute_index,\n            dtype = self.dtype,\n            dtype_from = self.dtype_from,\n            column_name_from = self.column_name_from\n        )\n    }\n\n    pub fn __eq__(&self, other: &Self) -> bool {\n        self == other\n    }\n}\n\n#[pymethods]\nimpl ColumnInfoNoDtype {\n    #[getter(\"name\")]\n    /// `str`. The name of the column\n    pub fn py_name(&self) -> &str {\n        &self.name\n    }\n\n    #[getter(\"index\")]\n    /// `int`. The index of the column\n    pub fn py_index(&self) -> usize {\n        self.index\n    }\n\n    #[getter(\"absolute_index\")]\n    /// `int`. The absolute index of the column\n    pub fn py_absolute_index(&self) -> usize {\n        self.absolute_index\n    }\n}\n"
  },
  {
    "path": "src/types/excelsheet/mod.rs",
    "content": "pub(crate) mod column_info;\n#[cfg(feature = \"polars\")]\nmod polars;\n#[cfg(feature = \"python\")]\nmod python;\npub(crate) mod table;\n\n#[cfg(feature = \"python\")]\nuse std::sync::Arc;\nuse std::{cmp, collections::HashSet, fmt::Debug, str::FromStr};\n\nuse calamine::{CellType, Range, Sheet as CalamineSheet, SheetVisible as CalamineSheetVisible};\nuse column_info::{AvailableColumns, ColumnInfoNoDtype};\n#[cfg(feature = \"polars\")]\nuse polars_core::frame::DataFrame;\n#[cfg(feature = \"python\")]\nuse pyo3::{Py, PyAny, Python, pyclass};\n\nuse self::column_info::{ColumnInfo, build_available_columns_info, finalize_column_info};\nuse crate::utils::schema::get_schema_sample_rows;\nuse crate::{\n    LoadSheetOrTableOptions,\n    data::{ExcelSheetData, FastExcelColumn},\n    error::{ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult},\n    types::{dtype::DTypes, idx_or_name::IdxOrName},\n};\n#[cfg(feature = \"python\")]\npub(crate) use python::{CellError, CellErrors};\n\n#[derive(Debug)]\npub(crate) enum Header {\n    None,\n    At(usize),\n    With(Vec<String>),\n}\n\nimpl Header {\n    pub(crate) fn new(header_row: Option<usize>, column_names: Option<Vec<String>>) -> Self {\n        match column_names {\n            Some(headers) => Header::With(headers),\n            None => match header_row {\n                Some(row) => Header::At(row),\n                None => Header::None,\n            },\n        }\n    }\n\n    pub(crate) fn offset(&self) -> usize {\n        match self {\n            Header::At(index) => index + 1,\n            Header::None => 0,\n            Header::With(_) => 0,\n        }\n    }\n}\n\n#[derive(Debug, Clone)]\n#[cfg_attr(not(feature = \"python\"), derive(PartialEq, Eq))]\npub(crate) struct Pagination {\n    skip_rows: SkipRows,\n    n_rows: Option<usize>,\n}\n\n/// How rows should be skipped.\n#[derive(Debug, Default, Clone)]\n#[cfg_attr(not(feature = \"python\"), derive(PartialEq, Eq))]\npub enum SkipRows {\n    /// Skip a fixed number of rows.\n    Simple(usize),\n    /// Skip rows based on a list of row indices.\n    List(HashSet<usize>),\n    #[cfg(feature = \"python\")]\n    Callable(Arc<Py<PyAny>>),\n    /// Skip empty rows at the beginning of the file (default).\n    #[default]\n    SkipEmptyRowsAtBeginning,\n}\n\nimpl SkipRows {\n    pub(crate) fn simple_offset(&self) -> Option<usize> {\n        match self {\n            SkipRows::Simple(offset) => Some(*offset),\n            SkipRows::SkipEmptyRowsAtBeginning => Some(0), // Let calamine's FirstNonEmptyRow handle it\n            _ => None,\n        }\n    }\n}\n\nimpl Pagination {\n    pub(crate) fn try_new<CT: CellType>(\n        skip_rows: SkipRows,\n        n_rows: Option<usize>,\n        range: &Range<CT>,\n    ) -> FastExcelResult<Self> {\n        let max_height = range.height();\n        // Only validate for simple skip_rows case\n        if let SkipRows::Simple(skip_count) = &skip_rows {\n            if max_height < *skip_count {\n                return Err(FastExcelErrorKind::InvalidParameters(format!(\n                    \"Too many rows skipped. Max height is {max_height}\"\n                ))\n                .into());\n            }\n        }\n        Ok(Self { skip_rows, n_rows })\n    }\n\n    pub(crate) fn offset(&self) -> usize {\n        self.skip_rows.simple_offset().unwrap_or(0)\n    }\n\n    pub(crate) fn n_rows(&self) -> Option<usize> {\n        self.n_rows\n    }\n\n    pub(crate) fn skip_rows(&self) -> &SkipRows {\n        &self.skip_rows\n    }\n}\n\n#[derive(Default)]\npub enum SelectedColumns {\n    #[default]\n    All,\n    Selection(Vec<IdxOrName>),\n    #[cfg(feature = \"python\")]\n    DynamicSelection(Py<PyAny>),\n    DeferredSelection(Vec<DeferredColumnSelection>),\n}\n\n#[derive(Debug, Clone, PartialEq)]\npub enum DeferredColumnSelection {\n    Fixed(IdxOrName),\n    /// start column index, end is determined by sheet width\n    OpenEndedRange(usize),\n    /// end column index, start is 0\n    FromBeginningRange(usize),\n}\n\nimpl std::fmt::Debug for SelectedColumns {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            Self::All => write!(f, \"All\"),\n            Self::Selection(selection) => write!(f, \"Selection({selection:?})\"),\n            #[cfg(feature = \"python\")]\n            Self::DynamicSelection(func) => {\n                let addr = func as *const _ as usize;\n                write!(f, \"DynamicSelection({addr})\")\n            }\n            Self::DeferredSelection(deferred) => write!(f, \"DeferredSelection({deferred:?})\"),\n        }\n    }\n}\n\nimpl PartialEq for SelectedColumns {\n    fn eq(&self, other: &Self) -> bool {\n        match (self, other) {\n            (Self::All, Self::All) => true,\n            (Self::Selection(selection), Self::Selection(other_selection)) => {\n                selection == other_selection\n            }\n            #[cfg(feature = \"python\")]\n            (Self::DynamicSelection(f1), Self::DynamicSelection(f2)) => std::ptr::eq(f1, f2),\n            (Self::DeferredSelection(deferred1), Self::DeferredSelection(deferred2)) => {\n                deferred1 == deferred2\n            }\n            _ => false,\n        }\n    }\n}\n\npub(crate) fn deferred_selection_to_concrete(\n    deferred_selection: &[DeferredColumnSelection],\n    max_col_index: usize,\n) -> Vec<IdxOrName> {\n    // First, resolve all deferred selections into concrete column indices\n    let mut resolved_indices = Vec::new();\n\n    for deferred in deferred_selection {\n        match deferred {\n            DeferredColumnSelection::Fixed(idx_or_name) => {\n                resolved_indices.push(idx_or_name.clone());\n            }\n            DeferredColumnSelection::OpenEndedRange(start_idx) => {\n                // Add all columns from start_idx to the end\n                resolved_indices.extend((*start_idx..=max_col_index).map(IdxOrName::Idx));\n            }\n            DeferredColumnSelection::FromBeginningRange(end_idx) => {\n                // Add all columns from 0 to end_idx (inclusive)\n                let actual_end = (*end_idx).min(max_col_index);\n                resolved_indices.extend((0..=actual_end).map(IdxOrName::Idx));\n            }\n        }\n    }\n\n    resolved_indices\n}\n\nimpl SelectedColumns {\n    pub(super) fn select_columns(\n        &self,\n        available_columns: Vec<ColumnInfoNoDtype>,\n    ) -> FastExcelResult<Vec<ColumnInfoNoDtype>> {\n        match self {\n            SelectedColumns::All => Ok(available_columns),\n            SelectedColumns::Selection(selection) => {\n                let selected_indices: Vec<usize> = selection\n                    .iter()\n                    .map(|selected_column| {\n                        match selected_column {\n                            IdxOrName::Idx(index) => available_columns\n                                .iter()\n                                // Sheets have absolute column names (A, B, C, ...)\n                                .position(|col_info| &col_info.absolute_index() == index),\n                            IdxOrName::Name(name) => available_columns\n                                .iter()\n                                .position(|col_info| col_info.name() == name.as_str()),\n                        }\n                        .ok_or_else(|| {\n                            FastExcelErrorKind::ColumnNotFound(selected_column.clone()).into()\n                        })\n                        .with_context(|| format!(\"available columns are: {available_columns:?}\"))\n                    })\n                    .collect::<FastExcelResult<_>>()?;\n\n                // We need to sort `available_columns` based on the order of the provided selection.\n                // First, we associated every element in the Vec with its position in the selection,\n                // and we filter out unselected columns\n                let mut cols: Vec<(usize, ColumnInfoNoDtype)> = available_columns\n                    .into_iter()\n                    .enumerate()\n                    .filter_map(|(idx, elem)| {\n                        selected_indices\n                            .iter()\n                            .position(|selected_idx| *selected_idx == idx)\n                            .map(|position| (position, elem))\n                    })\n                    .collect();\n                // Then, we sort the columns based on their position in the selection\n                cols.sort_by_key(|(pos, _elem)| *pos);\n\n                // And finally, we drop the positions\n                Ok(cols.into_iter().map(|(_pos, elem)| elem).collect())\n            }\n            #[cfg(feature = \"python\")]\n            SelectedColumns::DynamicSelection(use_col_func) => Python::attach(|py| {\n                available_columns\n                    .into_iter()\n                    .filter_map(\n                        |col_info| match use_col_func.call1(py, (col_info.clone(),)) {\n                            Err(err) => Some(Err(FastExcelErrorKind::InvalidParameters(format!(\n                                \"`use_columns` callable could not be called ({err})\"\n                            ))\n                            .into())),\n                            Ok(should_use_col) => match should_use_col.extract::<bool>(py) {\n                                Err(_) => Some(Err(FastExcelErrorKind::InvalidParameters(\n                                    \"`use_columns` callable should return a boolean\".to_string(),\n                                )\n                                .into())),\n                                Ok(true) => Some(Ok(col_info)),\n                                Ok(false) => None,\n                            },\n                        },\n                    )\n                    .collect()\n            }),\n            SelectedColumns::DeferredSelection(deferred_selection) => {\n                let max_col_index = available_columns\n                    .last()\n                    .map_or(0, |col| col.absolute_index());\n                let concrete_selection = SelectedColumns::Selection(\n                    deferred_selection_to_concrete(deferred_selection, max_col_index),\n                );\n\n                concrete_selection.select_columns(available_columns)\n            }\n        }\n    }\n\n    const ALPHABET: [char; 26] = [\n        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',\n        'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',\n    ];\n\n    fn col_idx_for_col_as_letter(col: &str) -> FastExcelResult<usize> {\n        use FastExcelErrorKind::InvalidParameters;\n\n        if col.is_empty() {\n            return Err(InvalidParameters(\n                \"a column should have at least one character, got none\".to_string(),\n            )\n            .into());\n        }\n\n        col.chars()\n            //  iterating over all chars reversed, to have a power based on their rank\n            .rev()\n            .enumerate()\n            //  Parses every char, checks its position and returns its numeric equivalent based on\n            //  its rank. For example, AB becomes 27 (26 + 1)\n            .map(|(idx, col_chr)| {\n                let pos_in_alphabet = Self::ALPHABET\n                    .iter()\n                    .position(|chr| chr == &col_chr)\n                    .ok_or_else(|| {\n                        FastExcelError::from(InvalidParameters(format!(\n                            \"Char is not a valid column name: {col_chr}\"\n                        )))\n                    })?;\n\n                Ok(match idx {\n                    // in case it's the last char, just return its position\n                    0 => pos_in_alphabet,\n                    // otherwise, 26^idx * (position + 1)\n                    // For example, CBA is 2081:\n                    // A -> 0\n                    // B -> 26 (53^1 * (1 + 1))\n                    // C -> 2028 (26^2 * (2 + 1))\n                    _ => 26usize.pow(idx as u32) * (pos_in_alphabet + 1),\n                })\n            })\n            // Sums all previously obtained ranks\n            .try_fold(0usize, |acc, elem_result| {\n                elem_result.map(|elem| acc + elem)\n            })\n    }\n\n    fn col_indices_for_letter_range(col_range: &str) -> FastExcelResult<Vec<usize>> {\n        use FastExcelErrorKind::InvalidParameters;\n\n        let col_elements = col_range.split(':').collect::<Vec<_>>();\n        if col_elements.len() == 2 {\n            let start = Self::col_idx_for_col_as_letter(col_elements[0])\n                .with_context(|| format!(\"invalid start element for range \\\"{col_range}\\\"\"))?;\n\n            // Check if this is an open-ended range (empty end element)\n            if col_elements[1].is_empty() {\n                // For open-ended ranges, we can't return concrete indices yet\n                // This will be handled differently in the parsing logic\n                return Err(InvalidParameters(format!(\n                    \"open-ended range detected: \\\"{col_range}\\\". This should be handled by col_selection_for_letter_range\"\n                ))\n                .into());\n            }\n\n            let end = Self::col_idx_for_col_as_letter(col_elements[1])\n                .with_context(|| format!(\"invalid end element for range \\\"{col_range}\\\"\"))?;\n\n            match start.cmp(&end) {\n                cmp::Ordering::Less => Ok((start..=end).collect()),\n                cmp::Ordering::Greater => Err(InvalidParameters(format!(\n                    \"end of range is before start: \\\"{col_range}\\\"\"\n                ))\n                .into()),\n                cmp::Ordering::Equal => {\n                    Err(InvalidParameters(format!(\"empty range: \\\"{col_range}\\\"\")).into())\n                }\n            }\n        } else {\n            Err(InvalidParameters(format!(\n                \"expected range to contain exactly 2 elements, got {n_elements}: \\\"{col_range}\\\"\",\n                n_elements = col_elements.len()\n            ))\n            .into())\n        }\n    }\n\n    fn col_selection_for_letter_range(\n        col_range: &str,\n    ) -> FastExcelResult<Vec<DeferredColumnSelection>> {\n        use FastExcelErrorKind::InvalidParameters;\n\n        let col_elements = col_range.split(':').collect::<Vec<_>>();\n        if col_elements.len() == 2 {\n            // Check if this is a from-beginning range (empty start element)\n            if col_elements[0].is_empty() {\n                if col_elements[1].is_empty() {\n                    return Err(InvalidParameters(format!(\n                        \"cannot have both start and end empty in range: \\\"{col_range}\\\"\"\n                    ))\n                    .into());\n                }\n                let end = Self::col_idx_for_col_as_letter(col_elements[1])\n                    .with_context(|| format!(\"invalid end element for range \\\"{col_range}\\\"\"))?;\n                return Ok(vec![DeferredColumnSelection::FromBeginningRange(end)]);\n            }\n\n            let start = Self::col_idx_for_col_as_letter(col_elements[0])\n                .with_context(|| format!(\"invalid start element for range \\\"{col_range}\\\"\"))?;\n\n            // Check if this is an open-ended range (empty end element)\n            if col_elements[1].is_empty() {\n                return Ok(vec![DeferredColumnSelection::OpenEndedRange(start)]);\n            }\n\n            let end = Self::col_idx_for_col_as_letter(col_elements[1])\n                .with_context(|| format!(\"invalid end element for range \\\"{col_range}\\\"\"))?;\n\n            match start.cmp(&end) {\n                cmp::Ordering::Less => Ok((start..=end)\n                    .map(|idx| DeferredColumnSelection::Fixed(IdxOrName::Idx(idx)))\n                    .collect()),\n                cmp::Ordering::Greater => Err(InvalidParameters(format!(\n                    \"end of range is before start: \\\"{col_range}\\\"\"\n                ))\n                .into()),\n                cmp::Ordering::Equal => {\n                    Err(InvalidParameters(format!(\"empty range: \\\"{col_range}\\\"\")).into())\n                }\n            }\n        } else {\n            Err(InvalidParameters(format!(\n                \"expected range to contain exactly 2 elements, got {n_elements}: \\\"{col_range}\\\"\",\n                n_elements = col_elements.len()\n            ))\n            .into())\n        }\n    }\n}\n\nimpl FromStr for SelectedColumns {\n    type Err = FastExcelError;\n\n    fn from_str(s: &str) -> FastExcelResult<Self> {\n        let uppercase_s = s.to_uppercase();\n        let parts: Vec<&str> = uppercase_s.split(',').collect();\n        let has_open_ended = parts\n            .iter()\n            .any(|p| p.contains(':') && (p.ends_with(':') || p.starts_with(':')));\n\n        if has_open_ended {\n            // Use deferred selection logic\n            let deferred_selections = parts\n                .iter()\n                .map(|part| {\n                    if part.contains(':') {\n                        Self::col_selection_for_letter_range(part).map(|mut selections| {\n                            std::mem::take(&mut selections)\n                                .into_iter()\n                                .collect::<Vec<_>>()\n                        })\n                    } else {\n                        Self::col_idx_for_col_as_letter(part)\n                            .map(|idx| vec![DeferredColumnSelection::Fixed(IdxOrName::Idx(idx))])\n                    }\n                })\n                .collect::<Result<Vec<Vec<_>>, _>>()?\n                .into_iter()\n                .flatten()\n                .collect();\n            Ok(Self::DeferredSelection(deferred_selections))\n        } else {\n            // Use the original immediate resolution logic for backwards compatibility\n            let unique_col_indices: HashSet<usize> = parts\n                .iter()\n                .map(|col_or_range| {\n                    if col_or_range.contains(':') {\n                        Self::col_indices_for_letter_range(col_or_range)\n                    } else {\n                        Self::col_idx_for_col_as_letter(col_or_range).map(|idx| vec![idx])\n                    }\n                })\n                .collect::<FastExcelResult<Vec<_>>>()?\n                .into_iter()\n                .flatten()\n                .collect();\n            let mut sorted_col_indices: Vec<usize> = unique_col_indices.into_iter().collect();\n            sorted_col_indices.sort();\n            Ok(Self::Selection(\n                sorted_col_indices.into_iter().map(IdxOrName::Idx).collect(),\n            ))\n        }\n    }\n}\n\n/// Visibility of a sheet.\n#[derive(Clone, Copy, Debug, PartialEq, Eq)]\npub enum SheetVisible {\n    Visible,\n    Hidden,\n    VeryHidden,\n}\n\nimpl From<CalamineSheetVisible> for SheetVisible {\n    fn from(value: CalamineSheetVisible) -> Self {\n        match value {\n            CalamineSheetVisible::Visible => SheetVisible::Visible,\n            CalamineSheetVisible::Hidden => SheetVisible::Hidden,\n            CalamineSheetVisible::VeryHidden => SheetVisible::VeryHidden,\n        }\n    }\n}\n\n/// A single sheet in an Excel file.\n#[derive(Debug)]\n#[cfg_attr(feature = \"python\", pyclass(name = \"_ExcelSheet\"))]\npub struct ExcelSheet {\n    sheet_meta: CalamineSheet,\n    header: Header,\n    pagination: Pagination,\n    data: ExcelSheetData<'static>,\n    height: Option<usize>,\n    total_height: Option<usize>,\n    width: Option<usize>,\n    limit: usize,\n    opts: LoadSheetOrTableOptions,\n    selected_columns: Vec<ColumnInfo>,\n    available_columns: AvailableColumns,\n}\n\nimpl ExcelSheet {\n    pub(crate) fn data(&self) -> &ExcelSheetData<'_> {\n        &self.data\n    }\n\n    pub(crate) fn try_new(\n        sheet_meta: CalamineSheet,\n        data: ExcelSheetData<'static>,\n        opts: LoadSheetOrTableOptions,\n    ) -> FastExcelResult<Self> {\n        let header = Header::new(opts.data_header_row(), opts.column_names.clone());\n        let available_columns_info =\n            build_available_columns_info(&data, &opts.selected_columns, &header)?;\n        let selected_columns_info = opts\n            .selected_columns\n            .select_columns(available_columns_info)?;\n\n        let pagination = match &data {\n            ExcelSheetData::Owned(range) => {\n                Pagination::try_new(opts.skip_rows.clone(), opts.n_rows, range)?\n            }\n            ExcelSheetData::Ref(range) => {\n                Pagination::try_new(opts.skip_rows.clone(), opts.n_rows, range)?\n            }\n        };\n\n        let mut sheet = ExcelSheet {\n            sheet_meta,\n            header,\n            pagination,\n            data,\n            opts,\n            height: None,\n            total_height: None,\n            width: None,\n            // Will be replaced\n            limit: 0,\n            available_columns: AvailableColumns::Pending,\n            // Empty vec as It'll be replaced\n            selected_columns: Vec::with_capacity(0),\n        };\n        sheet.limit = sheet.compute_limit();\n\n        // Finalizing column info (figure out dtypes for every column)\n        let row_limit = sheet.schema_sample_rows();\n        let selected_columns = finalize_column_info(\n            selected_columns_info,\n            &sheet.data,\n            sheet.offset(),\n            row_limit,\n            sheet.opts.dtypes.as_ref(),\n            &sheet.opts.dtype_coercion,\n            sheet.opts.whitespace_as_null,\n        )?;\n\n        sheet.selected_columns = selected_columns;\n\n        Ok(sheet)\n    }\n\n    fn ensure_available_columns_loaded(&mut self) -> FastExcelResult<()> {\n        let available_columns = match &self.available_columns {\n            AvailableColumns::Pending => {\n                let available_columns_info = build_available_columns_info(\n                    &self.data,\n                    &self.opts.selected_columns,\n                    &self.header,\n                )?;\n                let final_info = finalize_column_info(\n                    available_columns_info,\n                    self.data(),\n                    self.offset(),\n                    self.limit(),\n                    self.opts.dtypes.as_ref(),\n                    &self.opts.dtype_coercion,\n                    self.opts.whitespace_as_null,\n                )?;\n                AvailableColumns::Loaded(final_info)\n            }\n            AvailableColumns::Loaded(_) => return Ok(()),\n        };\n\n        self.available_columns = available_columns;\n        Ok(())\n    }\n\n    fn load_available_columns(&mut self) -> FastExcelResult<&[ColumnInfo]> {\n        self.ensure_available_columns_loaded()?;\n        self.available_columns.as_loaded()\n    }\n\n    fn compute_limit(&self) -> usize {\n        let upper_bound = if self.opts.skip_whitespace_tail_rows {\n            self.data.height_without_tail_whitespace()\n        } else {\n            self.data.height()\n        };\n        if let Some(n_rows) = self.pagination.n_rows {\n            let limit = self.offset() + n_rows;\n            if limit < upper_bound {\n                return limit;\n            }\n        }\n        upper_bound\n    }\n\n    pub(crate) fn limit(&self) -> usize {\n        self.limit\n    }\n\n    pub(crate) fn schema_sample_rows(&self) -> usize {\n        get_schema_sample_rows(self.opts.schema_sample_rows, self.offset(), self.limit())\n    }\n\n    pub fn width(&mut self) -> usize {\n        self.width.unwrap_or_else(|| {\n            let width = self.data.width();\n            self.width = Some(width);\n            width\n        })\n    }\n\n    pub fn height(&mut self) -> usize {\n        self.height.unwrap_or_else(|| {\n            use crate::data::generate_row_selector;\n            let height =\n                generate_row_selector(self.pagination.skip_rows(), self.offset(), self.limit())\n                    .map(|selector| selector.len())\n                    .unwrap_or_else(|_| self.limit() - self.offset());\n            self.height = Some(height);\n            height\n        })\n    }\n\n    pub fn total_height(&mut self) -> usize {\n        self.total_height.unwrap_or_else(|| {\n            let total_height = self.data.height() - self.header.offset();\n            self.total_height = Some(total_height);\n            total_height\n        })\n    }\n\n    pub fn offset(&self) -> usize {\n        self.header.offset() + self.pagination.offset()\n    }\n\n    pub fn selected_columns(&self) -> &Vec<ColumnInfo> {\n        &self.selected_columns\n    }\n\n    pub fn available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo>> {\n        self.load_available_columns().map(|cols| cols.to_vec())\n    }\n\n    pub fn specified_dtypes(&self) -> Option<&DTypes> {\n        self.opts.dtypes.as_ref()\n    }\n\n    pub fn name(&self) -> &str {\n        &self.sheet_meta.name\n    }\n\n    pub fn visible(&self) -> SheetVisible {\n        self.sheet_meta.visible.into()\n    }\n\n    pub fn to_columns(&self) -> FastExcelResult<Vec<FastExcelColumn>> {\n        self.selected_columns\n            .iter()\n            .map(|column_info| {\n                let offset = self.offset();\n                let limit = self.limit();\n                let whitespace_as_null = self.opts.whitespace_as_null;\n\n                match self.data() {\n                    ExcelSheetData::Owned(range) => FastExcelColumn::try_from_column_info(\n                        column_info,\n                        range,\n                        offset,\n                        limit,\n                        whitespace_as_null,\n                    ),\n                    ExcelSheetData::Ref(range) => FastExcelColumn::try_from_column_info(\n                        column_info,\n                        range,\n                        offset,\n                        limit,\n                        whitespace_as_null,\n                    ),\n                }\n            })\n            .collect()\n    }\n\n    #[cfg(feature = \"polars\")]\n    pub fn to_polars(&self) -> FastExcelResult<DataFrame> {\n        let pl_columns = self.to_columns()?.into_iter().map(Into::into).collect();\n        DataFrame::new_infer_height(pl_columns).map_err(|err| {\n            FastExcelErrorKind::Internal(format!(\"could not create DataFrame: {err:?}\")).into()\n        })\n    }\n}\n\n#[cfg(feature = \"__pyo3-tests\")]\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use pretty_assertions::assert_eq;\n    use pyo3::{\n        prelude::PyListMethods,\n        types::{PyList, PyString},\n    };\n    use rstest::rstest;\n\n    #[test]\n    fn selected_columns_from_none() {\n        assert_eq!(\n            TryInto::<SelectedColumns>::try_into(None).unwrap(),\n            SelectedColumns::All\n        )\n    }\n\n    #[test]\n    fn selected_columns_from_list_of_valid_ints() {\n        Python::attach(|py| {\n            let py_list = PyList::new(py, vec![0, 1, 2]).expect(\"could not create PyList\");\n            assert_eq!(\n                TryInto::<SelectedColumns>::try_into(Some(py_list.as_ref())).unwrap(),\n                SelectedColumns::Selection([0, 1, 2].into_iter().map(IdxOrName::Idx).collect())\n            )\n        });\n    }\n\n    #[test]\n    fn selected_columns_from_list_of_valid_strings() {\n        Python::attach(|py| {\n            let py_list = PyList::new(py, vec![\"foo\", \"bar\"]).expect(\"could not create PyList\");\n            assert_eq!(\n                TryInto::<SelectedColumns>::try_into(Some(py_list.as_ref())).unwrap(),\n                SelectedColumns::Selection(\n                    [\"foo\", \"bar\"]\n                        .iter()\n                        .map(ToString::to_string)\n                        .map(IdxOrName::Name)\n                        .collect()\n                )\n            )\n        });\n    }\n\n    #[test]\n    fn selected_columns_from_list_of_valid_strings_and_ints() {\n        Python::attach(|py| {\n            let py_list = PyList::new(py, vec![\"foo\", \"bar\"]).expect(\"could not create PyList\");\n            py_list.append(42).unwrap();\n            py_list.append(5).unwrap();\n            assert_eq!(\n                TryInto::<SelectedColumns>::try_into(Some(py_list.as_ref())).unwrap(),\n                SelectedColumns::Selection(vec![\n                    IdxOrName::Name(\"foo\".to_string()),\n                    IdxOrName::Name(\"bar\".to_string()),\n                    IdxOrName::Idx(42),\n                    IdxOrName::Idx(5)\n                ])\n            )\n        });\n    }\n\n    #[test]\n    fn selected_columns_from_invalid_ints() {\n        Python::attach(|py| {\n            let py_list = PyList::new(py, vec![0, 2, -1]).expect(\"could not create PyList\");\n            let err = TryInto::<SelectedColumns>::try_into(Some(py_list.as_ref())).unwrap_err();\n\n            assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));\n        });\n    }\n\n    #[test]\n    fn selected_columns_from_empty_int_list() {\n        Python::attach(|py| {\n            let py_list = PyList::new(py, Vec::<usize>::new()).expect(\"could not create PyList\");\n            let err = TryInto::<SelectedColumns>::try_into(Some(py_list.as_ref())).unwrap_err();\n\n            assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));\n        });\n    }\n\n    #[test]\n    fn selected_columns_from_empty_string_list() {\n        Python::attach(|py| {\n            let py_list = PyList::new(py, Vec::<String>::new()).expect(\"could not create PyList\");\n            let err = TryInto::<SelectedColumns>::try_into(Some(py_list.as_ref())).unwrap_err();\n\n            assert!(matches!(err.kind, FastExcelErrorKind::InvalidParameters(_)));\n        });\n    }\n\n    #[rstest]\n    // Standard unique columns\n    #[case(\"A,B,D\", vec![0, 1, 3])]\n    // Standard unique columns + range\n    #[case(\"A,B:E,Y\", vec![0, 1, 2, 3, 4, 24])]\n    // Standard unique column + ranges with mixed case\n    #[case(\"A:c,b:E,w,Y:z\", vec![0, 1, 2, 3, 4, 22, 24, 25])]\n    // Ranges beyond Z\n    #[case(\"A,y:AB\", vec![0, 24, 25, 26, 27])]\n    #[case(\"BB:BE,DDC:DDF\", vec![53, 54, 55, 56, 2810, 2811, 2812, 2813])]\n    fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expected_indices: Vec<usize>) {\n        Python::attach(|py| {\n            let expected_range = SelectedColumns::Selection(\n                expected_indices.into_iter().map(IdxOrName::Idx).collect(),\n            );\n            let input = PyString::new(py, raw);\n\n            let range = TryInto::<SelectedColumns>::try_into(Some(input.as_ref()))\n                .expect(\"expected a valid column selection\");\n\n            assert_eq!(range, expected_range)\n        })\n    }\n\n    #[rstest]\n    #[case(\"B:\")]\n    #[case(\"A,C:\")]\n    #[case(\"A:\")]\n    #[case(\":E\")]\n    #[case(\":C\")]\n    #[case(\":A\")]\n    #[case(\":C,E:\")]\n    fn selected_columns_from_valid_open_ended_ranges(#[case] raw: &str) {\n        Python::attach(|py| {\n            let input = PyString::new(py, raw);\n\n            let range = TryInto::<SelectedColumns>::try_into(Some(input.as_ref()))\n                .expect(\"expected a valid column selection\");\n\n            assert!(matches!(range, SelectedColumns::DeferredSelection(_)));\n        })\n    }\n\n    #[rstest]\n    // Standard unique columns\n    #[case(\"\", \"at least one character\")]\n    // empty range\n    #[case(\"a:a,b:d,e\", \"empty range\")]\n    // end before start\n    #[case(\"b:a\", \"end of range is before start\")]\n    // both start and end empty\n    #[case(\":\", \"cannot have both start and end empty\")]\n    // too many elements\n    #[case(\"a:b:e\", \"exactly 2 elements, got 3\")]\n    fn selected_columns_from_invalid_ranges(#[case] raw: &str, #[case] message: &str) {\n        Python::attach(|py| {\n            let input = PyString::new(py, raw);\n\n            let err = TryInto::<SelectedColumns>::try_into(Some(input.as_ref()))\n                .expect_err(\"expected an error\");\n\n            match err.kind {\n                FastExcelErrorKind::InvalidParameters(detail) => {\n                    if !detail.contains(message) {\n                        panic!(\"expected \\\"{detail}\\\" to contain \\\"{message}\\\"\")\n                    }\n                }\n                _ => panic!(\"Expected error to be InvalidParameters, got {err:?}\"),\n            }\n        })\n    }\n}\n"
  },
  {
    "path": "src/types/excelsheet/polars.rs",
    "content": "use crate::{FastExcelColumn, FastExcelSeries};\nuse polars_core::{\n    frame::column::{Column as PolarsColumn, ScalarColumn},\n    prelude::DataType,\n    scalar::Scalar,\n};\n\nimpl From<FastExcelColumn> for PolarsColumn {\n    fn from(column: FastExcelColumn) -> Self {\n        let name = column.name().into();\n        match column.data {\n            FastExcelSeries::Null => PolarsColumn::Scalar(ScalarColumn::new(\n                name,\n                Scalar::null(DataType::Null),\n                column.len(),\n            )),\n            FastExcelSeries::Bool(values) => PolarsColumn::new(name, values),\n            FastExcelSeries::String(values) => PolarsColumn::new(name, values),\n            FastExcelSeries::Int(values) => PolarsColumn::new(name, values),\n            FastExcelSeries::Float(values) => PolarsColumn::new(name, values),\n            FastExcelSeries::Datetime(values) => PolarsColumn::new(name, values),\n            FastExcelSeries::Date(values) => PolarsColumn::new(name, values),\n            FastExcelSeries::Duration(values) => PolarsColumn::new(name, values),\n        }\n    }\n}\n"
  },
  {
    "path": "src/types/excelsheet/python.rs",
    "content": "use std::{collections::HashSet, sync::Arc};\n\nuse arrow_array::{RecordBatch, StructArray};\nuse arrow_schema::Field;\nuse pyo3::{\n    Borrowed, Bound, FromPyObject, IntoPyObject, Py, PyAny, PyErr, PyResult, Python, pyclass,\n    pymethods,\n    types::{PyAnyMethods, PyCapsule, PyList, PyListMethods, PyString, PyTuple},\n};\nuse pyo3_arrow::ffi::{to_array_pycapsules, to_schema_pycapsule};\n\nuse crate::{\n    ExcelSheet,\n    data::{\n        ExcelSheetData, record_batch_from_data_and_columns_with_skip_rows,\n        selected_columns_to_schema,\n    },\n    error::{\n        ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult, py_errors::IntoPyResult,\n    },\n    types::{\n        dtype::DTypes,\n        excelsheet::{SelectedColumns, SheetVisible, SkipRows, column_info::ColumnInfo},\n        idx_or_name::IdxOrName,\n    },\n};\n\nimpl TryFrom<&Bound<'_, PyList>> for SelectedColumns {\n    type Error = FastExcelError;\n\n    fn try_from(py_list: &Bound<'_, PyList>) -> FastExcelResult<Self> {\n        use FastExcelErrorKind::InvalidParameters;\n\n        if py_list.is_empty() {\n            Err(InvalidParameters(\"list of selected columns is empty\".to_string()).into())\n        } else if let Ok(selection) = py_list.extract::<Vec<IdxOrName>>() {\n            Ok(Self::Selection(selection))\n        } else {\n            Err(\n                InvalidParameters(format!(\"expected list[int] | list[str], got {py_list:?}\"))\n                    .into(),\n            )\n        }\n    }\n}\n\nimpl TryFrom<Option<&Bound<'_, PyAny>>> for SelectedColumns {\n    type Error = FastExcelError;\n\n    fn try_from(py_any_opt: Option<&Bound<'_, PyAny>>) -> FastExcelResult<Self> {\n        match py_any_opt {\n            None => Ok(Self::All),\n            Some(py_any) => {\n                // Not trying to downcast to PyNone here as we assume that this would result in\n                // py_any_opt being None\n                if let Ok(py_str) = py_any.extract::<String>() {\n                    py_str.parse()\n                } else if let Ok(py_list) = py_any.cast::<PyList>() {\n                    py_list.try_into()\n                } else if let Ok(py_function) = py_any.extract::<Py<PyAny>>() {\n                    Ok(Self::DynamicSelection(py_function))\n                } else {\n                    Err(FastExcelErrorKind::InvalidParameters(format!(\n                        \"unsupported object type {object_type}\",\n                        object_type = py_any.get_type()\n                    ))\n                    .into())\n                }\n            }\n            .with_context(|| {\n                format!(\"could not determine selected columns from provided object: {py_any}\")\n            }),\n        }\n    }\n}\n\nimpl<'py> IntoPyObject<'py> for &SheetVisible {\n    type Target = PyString;\n\n    type Output = Bound<'py, Self::Target>;\n\n    type Error = FastExcelError;\n\n    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {\n        Ok(PyString::new(\n            py,\n            match self {\n                SheetVisible::Visible => \"visible\",\n                SheetVisible::Hidden => \"hidden\",\n                SheetVisible::VeryHidden => \"veryhidden\",\n            },\n        ))\n    }\n}\n\nimpl SkipRows {\n    pub(crate) fn should_skip_row(&self, row_idx: usize, py: Python) -> FastExcelResult<bool> {\n        match self {\n            SkipRows::Simple(offset) => Ok(row_idx < *offset),\n            SkipRows::List(skip_set) => Ok(skip_set.contains(&row_idx)),\n            SkipRows::Callable(func) => {\n                let result = func.call1(py, (row_idx,)).map_err(|e| {\n                    FastExcelErrorKind::InvalidParameters(format!(\n                        \"Error calling skip_rows function for row {row_idx}: {e}\"\n                    ))\n                })?;\n                result.extract::<bool>(py).map_err(|e| {\n                    FastExcelErrorKind::InvalidParameters(format!(\n                        \"skip_rows callable must return bool, got error: {e}\"\n                    ))\n                    .into()\n                })\n            }\n            SkipRows::SkipEmptyRowsAtBeginning => {\n                // This is handled by calamine's FirstNonEmptyRow in the header logic\n                // For array creation, we don't need additional filtering\n                Ok(false)\n            }\n        }\n    }\n}\n\n#[derive(Debug, Clone)]\n#[pyclass(skip_from_py_object)]\npub(crate) struct CellError {\n    /// `(int, int)`. The original row and column of the error\n    #[pyo3(get)]\n    pub position: (usize, usize),\n    /// `int`. The row offset\n    #[pyo3(get)]\n    pub row_offset: usize,\n    /// `str`. The error message\n    #[pyo3(get)]\n    pub detail: String,\n}\n\n#[pymethods]\nimpl CellError {\n    #[getter]\n    pub fn offset_position(&self) -> (usize, usize) {\n        let (row, col) = self.position;\n        (row - self.row_offset, col)\n    }\n\n    pub fn __repr__(&self) -> String {\n        let (row, col) = self.position;\n        let (offset_row, offset_col) = self.offset_position();\n        format!(\n            \"CellError(position=({row}, {col}), offset_position=({offset_row}, {offset_col}), row_offset={row_offset}, detail={detail:?})\",\n            row_offset = self.row_offset,\n            detail = &self.detail,\n        )\n    }\n}\n\n#[pyclass]\npub(crate) struct CellErrors {\n    pub errors: Vec<CellError>,\n}\n\n#[pymethods]\nimpl CellErrors {\n    #[getter]\n    pub fn errors<'p>(&'p self, _py: Python<'p>) -> Vec<CellError> {\n        self.errors.clone()\n    }\n\n    pub fn __repr__(&self) -> String {\n        let errors_repr: Vec<String> = self.errors.iter().map(|e| e.__repr__()).collect();\n        format!(\"CellErrors(errors=[{}])\", errors_repr.join(\", \"))\n    }\n}\n\nimpl<'a, 'py> FromPyObject<'a, 'py> for SkipRows {\n    type Error = PyErr;\n    fn extract(obj: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {\n        // Handle None case\n        if obj.is_none() {\n            return Ok(SkipRows::SkipEmptyRowsAtBeginning);\n        }\n\n        // Try to extract as int first\n        if let Ok(skip_count) = obj.extract::<usize>() {\n            return Ok(SkipRows::Simple(skip_count));\n        }\n\n        // Try to extract as list of integers\n        if let Ok(skip_list) = obj.extract::<Vec<usize>>() {\n            let skip_set: HashSet<usize> = skip_list.into_iter().collect();\n            return Ok(SkipRows::List(skip_set));\n        }\n\n        // Check if it's callable\n        if obj.hasattr(\"__call__\").unwrap_or(false) {\n            return Ok(SkipRows::Callable(Arc::new(obj.to_owned().into())));\n        }\n\n        Err(FastExcelErrorKind::InvalidParameters(\n            \"skip_rows must be int, list of int, callable, or None\".to_string(),\n        )\n        .into())\n        .into_pyresult()\n    }\n}\n\nimpl TryFrom<&ExcelSheet> for RecordBatch {\n    type Error = FastExcelError;\n\n    fn try_from(sheet: &ExcelSheet) -> FastExcelResult<Self> {\n        let offset = sheet.offset();\n        let limit = sheet.limit();\n\n        match &sheet.data {\n            ExcelSheetData::Owned(range) => record_batch_from_data_and_columns_with_skip_rows(\n                &sheet.selected_columns,\n                range,\n                sheet.pagination.skip_rows(),\n                offset,\n                limit,\n                sheet.opts.whitespace_as_null,\n            ),\n            ExcelSheetData::Ref(range) => record_batch_from_data_and_columns_with_skip_rows(\n                &sheet.selected_columns,\n                range,\n                sheet.pagination.skip_rows(),\n                offset,\n                limit,\n                sheet.opts.whitespace_as_null,\n            ),\n        }\n        .with_context(|| format!(\"could not convert sheet {} to RecordBatch\", sheet.name()))\n    }\n}\n\n// NOTE: These proxy python implems are required because `#[getter]` does not play well with `cfg_attr`:\n// * https://github.com/PyO3/pyo3/issues/1003\n// * https://github.com/PyO3/pyo3/issues/780\n#[pymethods]\nimpl ExcelSheet {\n    #[getter(\"width\")]\n    pub fn py_width(&mut self) -> usize {\n        self.width()\n    }\n\n    #[getter(\"height\")]\n    pub fn py_height(&mut self) -> usize {\n        self.height()\n    }\n\n    #[getter(\"total_height\")]\n    pub fn py_total_height(&mut self) -> usize {\n        self.total_height()\n    }\n\n    #[getter(\"offset\")]\n    pub fn py_offset(&self) -> usize {\n        self.offset()\n    }\n\n    #[getter(\"selected_columns\")]\n    pub fn py_selected_columns(&self) -> Vec<ColumnInfo> {\n        self.selected_columns().to_owned()\n    }\n\n    #[pyo3(name = \"available_columns\")]\n    pub fn py_available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo>> {\n        self.available_columns()\n    }\n\n    #[getter(\"specified_dtypes\")]\n    pub fn py_specified_dtypes(&self) -> Option<&DTypes> {\n        self.specified_dtypes()\n    }\n\n    #[getter(\"name\")]\n    pub fn py_name(&self) -> &str {\n        self.name()\n    }\n\n    #[getter(\"visible\")]\n    pub fn py_visible<'py>(&'py self, py: Python<'py>) -> FastExcelResult<Bound<'py, PyString>> {\n        let visible: SheetVisible = self.visible();\n        (&visible).into_pyobject(py)\n    }\n\n    #[cfg(feature = \"pyarrow\")]\n    pub fn to_arrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {\n        use pyo3::IntoPyObjectExt;\n\n        use crate::error::py_errors::IntoPyResult;\n\n        py.detach(|| RecordBatch::try_from(self))\n            .with_context(|| {\n                format!(\n                    \"could not create RecordBatch from sheet \\\"{}\\\"\",\n                    self.name()\n                )\n            })\n            .and_then(|rb| {\n                use arrow_pyarrow::ToPyArrow;\n\n                rb.to_pyarrow(py)\n                    .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())\n            })\n            .with_context(|| {\n                format!(\n                    \"could not convert RecordBatch to pyarrow for sheet \\\"{}\\\"\",\n                    self.name()\n                )\n            })\n            .into_pyresult()\n            .and_then(|obj| obj.into_bound_py_any(py))\n    }\n\n    #[cfg(feature = \"pyarrow\")]\n    pub fn to_arrow_with_errors<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {\n        use arrow_pyarrow::IntoPyArrow;\n        use pyo3::IntoPyObjectExt;\n\n        use crate::data::record_batch_from_data_and_columns_with_errors;\n\n        let offset = self.offset();\n        let limit = self.limit();\n\n        let (rb, errors) = py\n            .detach(|| {\n                record_batch_from_data_and_columns_with_errors(\n                    &self.selected_columns,\n                    self.data(),\n                    offset,\n                    limit,\n                    self.opts.whitespace_as_null,\n                )\n            })\n            .with_context(|| {\n                format!(\n                    \"could not create RecordBatch from sheet \\\"{}\\\"\",\n                    self.name()\n                )\n            })?;\n\n        let rb = rb\n            .into_pyarrow(py)\n            .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())\n            .with_context(|| {\n                format!(\n                    \"could not convert RecordBatch to pyarrow for sheet \\\"{}\\\"\",\n                    self.name()\n                )\n            })?;\n        (rb, errors).into_bound_py_any(py)\n    }\n\n    /// Export the schema as an [`ArrowSchema`] [`PyCapsule`].\n    ///\n    /// <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export>\n    ///\n    /// [`ArrowSchema`]: arrow_array::ffi::FFI_ArrowSchema\n    /// [`PyCapsule`]: pyo3::types::PyCapsule\n    pub fn __arrow_c_schema__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyCapsule>> {\n        let schema = selected_columns_to_schema(&self.selected_columns);\n        Ok(to_schema_pycapsule(py, &schema)?)\n    }\n\n    /// Export the schema and data as a pair of [`ArrowSchema`] and [`ArrowArray`] [`PyCapsules`]\n    ///\n    /// The optional `requested_schema` parameter allows for potential schema conversion.\n    ///\n    /// <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export>\n    ///\n    /// [`ArrowSchema`]: arrow_array::ffi::FFI_ArrowSchema\n    /// [`ArrowArray`]: arrow_array::ffi::FFI_ArrowArray\n    /// [`PyCapsules`]: pyo3::types::PyCapsule\n    pub fn __arrow_c_array__<'py>(\n        &self,\n        py: Python<'py>,\n        requested_schema: Option<Bound<'py, PyCapsule>>,\n    ) -> PyResult<Bound<'py, PyTuple>> {\n        let record_batch = RecordBatch::try_from(self)\n            .with_context(|| {\n                format!(\n                    \"could not create RecordBatch from sheet \\\"{}\\\"\",\n                    self.name()\n                )\n            })\n            .into_pyresult()?;\n\n        let field = Field::new_struct(\"\", record_batch.schema_ref().fields().clone(), false);\n        let array = Arc::new(StructArray::from(record_batch));\n        Ok(to_array_pycapsules(\n            py,\n            field.into(),\n            array.as_ref(),\n            requested_schema,\n        )?)\n    }\n\n    pub fn __repr__(&self) -> String {\n        format!(\"ExcelSheet<{}>\", self.name())\n    }\n}\n"
  },
  {
    "path": "src/types/excelsheet/table.rs",
    "content": "use crate::error::{FastExcelErrorKind, FastExcelResult};\nuse calamine::{Data, Sheets, Table};\nuse std::io::{Read, Seek};\n\npub(crate) fn extract_table_names<'a, RS: Read + Seek>(\n    sheets: &'a mut Sheets<RS>,\n    sheet_name: Option<&str>,\n) -> FastExcelResult<Vec<&'a String>> {\n    match sheets {\n        Sheets::Xlsx(xlsx) => {\n            // Internally checks if tables already loaded; is fast\n            xlsx.load_tables()?;\n\n            match sheet_name {\n                None => Ok(xlsx.table_names()),\n                Some(sn) => Ok(xlsx.table_names_in_sheet(sn)),\n            }\n        }\n        _ => Err(FastExcelErrorKind::Internal(\n            \"Currently only XLSX files are supported for tables\".to_string(),\n        )\n        .into()),\n    }\n}\n\npub(crate) fn extract_table_range<RS: Read + Seek>(\n    name: &str,\n    sheets: &mut Sheets<RS>,\n) -> FastExcelResult<Table<Data>> {\n    match sheets {\n        Sheets::Xlsx(xlsx) => {\n            // Internally checks if tables already loaded; is fast\n            xlsx.load_tables()?;\n\n            let table_result = xlsx.table_by_name(name);\n            let table = table_result?;\n\n            Ok(table)\n        }\n        _ => Err(FastExcelErrorKind::Internal(\n            \"Currently only XLSX files are supported for tables\".to_string(),\n        )\n        .into()),\n    }\n}\n"
  },
  {
    "path": "src/types/exceltable/mod.rs",
    "content": "#[cfg(feature = \"python\")]\nmod python;\n\nuse calamine::{Data, Range, Table};\n#[cfg(feature = \"polars\")]\nuse polars_core::frame::DataFrame;\n#[cfg(feature = \"python\")]\nuse pyo3::pyclass;\n\nuse crate::{\n    FastExcelColumn, FastExcelErrorKind, IdxOrName, LoadSheetOrTableOptions, SelectedColumns,\n    data::height_without_tail_whitespace,\n    error::{ErrorContext, FastExcelResult},\n    types::{\n        dtype::DTypes,\n        excelsheet::{\n            Header, Pagination,\n            column_info::{\n                AvailableColumns, ColumnInfo, build_available_columns_info, finalize_column_info,\n            },\n            deferred_selection_to_concrete,\n        },\n    },\n    utils::schema::get_schema_sample_rows,\n};\n\n/// A single table in an Excel file.\n#[derive(Debug)]\n#[cfg_attr(feature = \"python\", pyclass(name = \"_ExcelTable\"))]\npub struct ExcelTable {\n    name: String,\n    sheet_name: String,\n    selected_columns: Vec<ColumnInfo>,\n    available_columns: AvailableColumns,\n    table: Table<Data>,\n    header: Header,\n    pagination: Pagination,\n    opts: LoadSheetOrTableOptions,\n    height: Option<usize>,\n    total_height: Option<usize>,\n    width: Option<usize>,\n    limit: usize,\n}\n\nimpl ExcelTable {\n    fn extract_selected_columns_and_table_columns(\n        table: &Table<Data>,\n        selected_columns: &[IdxOrName],\n    ) -> FastExcelResult<(Vec<String>, Vec<IdxOrName>)> {\n        let table_columns: Vec<String> = table.columns().into();\n        let column_offset = table.data().start().map_or(0, |(_row, col)| col as usize);\n        let selected_column_indices = selected_columns\n            .iter()\n            .map(|idx_or_name| match idx_or_name {\n                IdxOrName::Idx(idx) => Ok(*idx),\n                IdxOrName::Name(name) => table_columns\n                    .iter()\n                    .enumerate()\n                    .find_map(|(idx, col_name)| {\n                        (col_name.as_str() == name.as_str()).then_some(idx + column_offset)\n                    })\n                    .ok_or_else(|| FastExcelErrorKind::ColumnNotFound(name.clone().into()).into())\n                    .with_context(|| format!(\"available columns are: {table_columns:?}\")),\n            })\n            .collect::<FastExcelResult<Vec<usize>>>()?;\n\n        let table_columns = table_columns\n            .into_iter()\n            .enumerate()\n            .filter_map(|(idx, col_name)| {\n                selected_column_indices\n                    .contains(&(idx + column_offset))\n                    .then_some(col_name)\n            })\n            .collect();\n\n        let selected_columns = selected_column_indices\n            .into_iter()\n            .map(Into::into)\n            .collect();\n\n        Ok((table_columns, selected_columns))\n    }\n\n    /// Builds a `Header` for a table. This might update the column selection, if provided\n    fn build_header_and_update_selection(\n        table: &Table<Data>,\n        opts: LoadSheetOrTableOptions,\n    ) -> FastExcelResult<(Header, LoadSheetOrTableOptions)> {\n        Ok(match (&opts.column_names, opts.header_row) {\n            (None, None) => {\n                // If there is a column selection, we need to convert all elements to column\n                // indices. This is required because we will be providing the header, and it\n                // it is required to use an index-based selection when custom column names are provided\n                match &opts.selected_columns {\n                    SelectedColumns::Selection(selected_columns) => {\n                        let (table_columns, selected_columns) =\n                            Self::extract_selected_columns_and_table_columns(\n                                table,\n                                selected_columns,\n                            )?;\n                        let opts =\n                            opts.selected_columns(SelectedColumns::Selection(selected_columns));\n                        (Header::With(table_columns), opts)\n                    }\n                    SelectedColumns::DeferredSelection(deferred_selection) => {\n                        let concrete_columns = deferred_selection_to_concrete(\n                            deferred_selection,\n                            table.data().end().map_or(0, |(_row, col)| col as usize),\n                        );\n                        let (table_columns, selected_columns) =\n                            Self::extract_selected_columns_and_table_columns(\n                                table,\n                                &concrete_columns,\n                            )?;\n                        let opts =\n                            opts.selected_columns(SelectedColumns::Selection(selected_columns));\n                        (Header::With(table_columns), opts)\n                    }\n                    _ => (Header::With(table.columns().into()), opts),\n                }\n            }\n            (None, Some(row)) => (Header::At(row), opts),\n            (Some(column_names), _) => (Header::With(column_names.clone()), opts),\n        })\n    }\n\n    pub(crate) fn try_new(\n        table: Table<Data>,\n        opts: LoadSheetOrTableOptions,\n    ) -> FastExcelResult<Self> {\n        let pagination = Pagination::try_new(opts.skip_rows.clone(), opts.n_rows, table.data())?;\n\n        let (header, opts) = Self::build_header_and_update_selection(&table, opts)?;\n\n        let available_columns_info =\n            build_available_columns_info(table.data(), &opts.selected_columns, &header)?;\n        let selected_columns_info = opts\n            .selected_columns\n            .select_columns(available_columns_info)?;\n\n        let mut excel_table = ExcelTable {\n            name: table.name().to_owned(),\n            sheet_name: table.sheet_name().to_owned(),\n            available_columns: AvailableColumns::Pending,\n            // Empty vec as it'll be replaced\n            selected_columns: Vec::with_capacity(0),\n            table,\n            header,\n            pagination,\n            opts,\n            height: None,\n            total_height: None,\n            width: None,\n            // Will be replaced\n            limit: 0,\n        };\n        excel_table.limit = excel_table.compute_limit();\n\n        let row_limit = get_schema_sample_rows(\n            excel_table.opts.schema_sample_rows,\n            excel_table.offset(),\n            excel_table.limit(),\n        );\n\n        // Finalizing column info\n        let selected_columns = finalize_column_info(\n            selected_columns_info,\n            excel_table.data(),\n            excel_table.offset(),\n            row_limit,\n            excel_table.opts.dtypes.as_ref(),\n            &excel_table.opts.dtype_coercion,\n            excel_table.opts.whitespace_as_null,\n        )?;\n\n        // Figure out dtype for every column\n        excel_table.selected_columns = selected_columns;\n\n        Ok(excel_table)\n    }\n\n    pub(crate) fn data(&self) -> &Range<Data> {\n        self.table.data()\n    }\n\n    fn ensure_available_columns_loaded(&mut self) -> FastExcelResult<()> {\n        let available_columns = match &self.available_columns {\n            AvailableColumns::Pending => {\n                let available_columns_info = build_available_columns_info(\n                    self.table.data(),\n                    &self.opts.selected_columns,\n                    &self.header,\n                )?;\n                let final_info = finalize_column_info(\n                    available_columns_info,\n                    self.data(),\n                    self.offset(),\n                    self.limit(),\n                    self.opts.dtypes.as_ref(),\n                    &self.opts.dtype_coercion,\n                    self.opts.whitespace_as_null,\n                )?;\n                AvailableColumns::Loaded(final_info)\n            }\n            AvailableColumns::Loaded(_) => return Ok(()),\n        };\n\n        self.available_columns = available_columns;\n        Ok(())\n    }\n\n    fn load_available_columns(&mut self) -> FastExcelResult<&[ColumnInfo]> {\n        self.ensure_available_columns_loaded()?;\n        self.available_columns.as_loaded()\n    }\n\n    pub fn offset(&self) -> usize {\n        self.header.offset() + self.pagination.offset()\n    }\n\n    fn compute_limit(&self) -> usize {\n        let upper_bound = if self.opts.skip_whitespace_tail_rows {\n            height_without_tail_whitespace(self.data()).unwrap_or_else(|| self.data().height())\n        } else {\n            self.data().height()\n        };\n        if let Some(n_rows) = self.pagination.n_rows() {\n            let limit = self.offset() + n_rows;\n            if limit < upper_bound {\n                return limit;\n            }\n        }\n        upper_bound\n    }\n\n    pub fn limit(&self) -> usize {\n        self.limit\n    }\n\n    pub fn selected_columns(&self) -> Vec<ColumnInfo> {\n        self.selected_columns.clone()\n    }\n\n    pub fn available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo>> {\n        self.load_available_columns().map(|cols| cols.to_vec())\n    }\n\n    pub fn specified_dtypes(&self) -> Option<&DTypes> {\n        self.opts.dtypes.as_ref()\n    }\n\n    pub fn width(&mut self) -> usize {\n        self.width.unwrap_or_else(|| {\n            let width = self.data().width();\n            self.width = Some(width);\n            width\n        })\n    }\n\n    pub fn height(&mut self) -> usize {\n        self.height.unwrap_or_else(|| {\n            let height = self.limit() - self.offset();\n            self.height = Some(height);\n            height\n        })\n    }\n\n    pub fn total_height(&mut self) -> usize {\n        self.total_height.unwrap_or_else(|| {\n            let total_height = self.data().height() - self.header.offset();\n            self.total_height = Some(total_height);\n            total_height\n        })\n    }\n\n    pub fn name(&self) -> &str {\n        &self.name\n    }\n\n    pub fn sheet_name(&self) -> &str {\n        &self.sheet_name\n    }\n\n    pub fn to_columns(&self) -> FastExcelResult<Vec<FastExcelColumn>> {\n        self.selected_columns\n            .iter()\n            .map(|column_info| {\n                FastExcelColumn::try_from_column_info(\n                    column_info,\n                    self.table.data(),\n                    self.offset(),\n                    self.limit(),\n                    self.opts.whitespace_as_null,\n                )\n            })\n            .collect()\n    }\n\n    #[cfg(feature = \"polars\")]\n    pub fn to_polars(&self) -> FastExcelResult<DataFrame> {\n        use crate::error::FastExcelErrorKind;\n\n        let pl_columns = self.to_columns()?.into_iter().map(Into::into).collect();\n        DataFrame::new_infer_height(pl_columns).map_err(|err| {\n            FastExcelErrorKind::Internal(format!(\"could not create DataFrame: {err:?}\")).into()\n        })\n    }\n}\n"
  },
  {
    "path": "src/types/exceltable/python.rs",
    "content": "use std::sync::Arc;\n\nuse arrow_array::{RecordBatch, StructArray};\nuse arrow_schema::Field;\n#[cfg(feature = \"pyarrow\")]\nuse pyo3::PyAny;\nuse pyo3::{\n    Bound, PyResult, Python, pymethods,\n    types::{PyCapsule, PyTuple},\n};\nuse pyo3_arrow::ffi::{to_array_pycapsules, to_schema_pycapsule};\n\nuse crate::{\n    ExcelTable,\n    data::{record_batch_from_data_and_columns_with_skip_rows, selected_columns_to_schema},\n    error::{ErrorContext, FastExcelError, FastExcelResult, py_errors::IntoPyResult},\n    types::{dtype::DTypes, excelsheet::column_info::ColumnInfo},\n};\n\nimpl TryFrom<&ExcelTable> for RecordBatch {\n    type Error = FastExcelError;\n\n    fn try_from(table: &ExcelTable) -> FastExcelResult<Self> {\n        record_batch_from_data_and_columns_with_skip_rows(\n            &table.selected_columns,\n            table.data(),\n            table.pagination.skip_rows(),\n            table.offset(),\n            table.limit(),\n            table.opts.whitespace_as_null,\n        )\n        .with_context(|| {\n            format!(\n                \"could not convert table {table} in sheet {sheet} to RecordBatch\",\n                table = &table.name,\n                sheet = &table.sheet_name\n            )\n        })\n    }\n}\n\n// NOTE: These proxy python implems are required because `#[getter]` does not play well with `cfg_attr`:\n// * https://github.com/PyO3/pyo3/issues/1003\n// * https://github.com/PyO3/pyo3/issues/780\n#[pymethods]\nimpl ExcelTable {\n    #[getter(\"name\")]\n    pub fn py_name(&self) -> &str {\n        &self.name\n    }\n\n    #[getter(\"sheet_name\")]\n    pub fn py_sheet_name(&self) -> &str {\n        &self.sheet_name\n    }\n\n    #[getter(\"offset\")]\n    pub fn py_offset(&self) -> usize {\n        self.offset()\n    }\n\n    #[getter(\"limit\")]\n    pub fn py_limit(&self) -> usize {\n        self.limit()\n    }\n\n    #[getter(\"selected_columns\")]\n    pub fn py_selected_columns(&self) -> Vec<ColumnInfo> {\n        self.selected_columns()\n    }\n\n    #[pyo3(name = \"available_columns\")]\n    pub fn py_available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo>> {\n        self.available_columns()\n    }\n\n    #[getter(\"specified_dtypes\")]\n    pub fn py_specified_dtypes(&self) -> Option<&DTypes> {\n        self.specified_dtypes()\n    }\n\n    #[getter(\"width\")]\n    pub fn py_width(&mut self) -> usize {\n        self.width()\n    }\n\n    #[getter(\"height\")]\n    pub fn py_height(&mut self) -> usize {\n        self.height()\n    }\n\n    #[getter(\"total_height\")]\n    pub fn py_total_height(&mut self) -> usize {\n        self.total_height()\n    }\n\n    #[cfg(feature = \"pyarrow\")]\n    pub fn to_arrow<'py>(&self, py: Python<'py>) -> FastExcelResult<Bound<'py, PyAny>> {\n        RecordBatch::try_from(self)\n            .with_context(|| {\n                format!(\n                    \"could not create RecordBatch from sheet \\\"{}\\\"\",\n                    self.name\n                )\n            })\n            .and_then(|rb| {\n                use arrow_pyarrow::ToPyArrow;\n\n                use crate::error::FastExcelErrorKind;\n\n                rb.to_pyarrow(py)\n                    .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())\n            })\n            .with_context(|| {\n                format!(\n                    \"could not convert RecordBatch to pyarrow for table \\\"{table}\\\" in sheet \\\"{sheet}\\\"\",\n                    table = self.name, sheet = self.sheet_name\n                )\n            })\n    }\n\n    /// Export the schema as an [`ArrowSchema`] [`PyCapsule`].\n    ///\n    /// <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export>\n    ///\n    /// [`ArrowSchema`]: arrow_array::ffi::FFI_ArrowSchema\n    /// [`PyCapsule`]: pyo3::types::PyCapsule\n    pub fn __arrow_c_schema__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyCapsule>> {\n        let schema = selected_columns_to_schema(&self.selected_columns);\n        Ok(to_schema_pycapsule(py, &schema)?)\n    }\n\n    /// Export the schema and data as a pair of [`ArrowSchema`] and [`ArrowArray`] [`PyCapsules`]\n    ///\n    /// The optional `requested_schema` parameter allows for potential schema conversion.\n    ///\n    /// <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export>\n    ///\n    /// [`ArrowSchema`]: arrow_array::ffi::FFI_ArrowSchema\n    /// [`ArrowArray`]: arrow_array::ffi::FFI_ArrowArray\n    /// [`PyCapsules`]: pyo3::types::PyCapsule\n    pub fn __arrow_c_array__<'py>(\n        &self,\n        py: Python<'py>,\n        requested_schema: Option<Bound<'py, PyCapsule>>,\n    ) -> PyResult<Bound<'py, PyTuple>> {\n        let record_batch = RecordBatch::try_from(self)\n            .with_context(|| format!(\"could not create RecordBatch from table \\\"{}\\\"\", self.name))\n            .into_pyresult()?;\n\n        let field = Field::new_struct(\"\", record_batch.schema_ref().fields().clone(), false);\n        let array = Arc::new(StructArray::from(record_batch));\n        Ok(to_array_pycapsules(\n            py,\n            field.into(),\n            array.as_ref(),\n            requested_schema,\n        )?)\n    }\n\n    pub fn __repr__(&self) -> String {\n        format!(\n            \"ExcelTable<{sheet}/{name}>\",\n            sheet = self.sheet_name,\n            name = self.name\n        )\n    }\n}\n"
  },
  {
    "path": "src/types/idx_or_name/mod.rs",
    "content": "#[cfg(feature = \"python\")]\nmod python;\n\n/// A column index or name.\n#[derive(Debug, PartialEq, Eq, Hash, Clone)]\npub enum IdxOrName {\n    Idx(usize),\n    Name(String),\n}\n\nimpl IdxOrName {\n    pub(crate) fn format_message(&self) -> String {\n        match self {\n            Self::Idx(idx) => format!(\"at index {idx}\"),\n            Self::Name(name) => format!(\"with name \\\"{name}\\\"\"),\n        }\n    }\n}\n\nimpl From<usize> for IdxOrName {\n    fn from(index: usize) -> Self {\n        Self::Idx(index)\n    }\n}\n\nimpl From<String> for IdxOrName {\n    fn from(name: String) -> Self {\n        Self::Name(name)\n    }\n}\n\nimpl From<&str> for IdxOrName {\n    fn from(name: &str) -> Self {\n        Self::Name(name.to_owned())\n    }\n}\n"
  },
  {
    "path": "src/types/idx_or_name/python.rs",
    "content": "use pyo3::{\n    Borrowed, Bound, FromPyObject, IntoPyObject, IntoPyObjectExt, PyAny, PyErr, Python,\n    types::PyAnyMethods,\n};\n\nuse crate::{\n    error::{FastExcelError, FastExcelErrorKind, FastExcelResult, py_errors::IntoPyResult},\n    types::idx_or_name::IdxOrName,\n};\n\nimpl TryFrom<&Bound<'_, PyAny>> for IdxOrName {\n    type Error = FastExcelError;\n\n    fn try_from(value: &Bound<'_, PyAny>) -> FastExcelResult<Self> {\n        if let Ok(index) = value.extract() {\n            Ok(Self::Idx(index))\n        } else if let Ok(name) = value.extract() {\n            Ok(Self::Name(name))\n        } else {\n            Err(FastExcelErrorKind::InvalidParameters(format!(\n                \"cannot create IdxOrName from {value:?}\"\n            ))\n            .into())\n        }\n    }\n}\n\nimpl<'a, 'py> FromPyObject<'a, 'py> for IdxOrName {\n    type Error = PyErr;\n    fn extract(ob: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {\n        (&*ob).try_into().into_pyresult()\n    }\n}\n\nimpl<'py> IntoPyObject<'py> for IdxOrName {\n    type Target = PyAny;\n\n    type Output = Bound<'py, Self::Target>;\n\n    type Error = pyo3::PyErr;\n\n    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {\n        match self {\n            IdxOrName::Idx(idx) => idx.into_bound_py_any(py),\n            IdxOrName::Name(name) => name.into_bound_py_any(py),\n        }\n    }\n}\n\nimpl<'py> IntoPyObject<'py> for &IdxOrName {\n    type Target = PyAny;\n\n    type Output = Bound<'py, Self::Target>;\n\n    type Error = pyo3::PyErr;\n\n    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {\n        match self {\n            IdxOrName::Idx(idx) => idx.into_bound_py_any(py),\n            IdxOrName::Name(name) => name.into_bound_py_any(py),\n        }\n    }\n}\n"
  },
  {
    "path": "src/types/mod.rs",
    "content": "pub(crate) mod dtype;\npub(crate) mod excelreader;\npub(crate) mod excelsheet;\npub(crate) mod exceltable;\npub(crate) mod idx_or_name;\n\npub use dtype::{DType, DTypeCoercion, DTypes};\npub use excelreader::{DefinedName, ExcelReader, LoadSheetOrTableOptions};\npub use excelsheet::{\n    ExcelSheet, SelectedColumns, SheetVisible, SkipRows,\n    column_info::{ColumnInfo, ColumnNameFrom, DTypeFrom},\n};\npub use exceltable::ExcelTable;\npub use idx_or_name::IdxOrName;\n"
  },
  {
    "path": "src/utils/mod.rs",
    "content": "pub(crate) mod schema;\n"
  },
  {
    "path": "src/utils/schema.rs",
    "content": "use std::cmp::min;\n\n/// Determines how many rows should be used for schema sampling, based on the provided parameter,\n/// and the sheet's offset and limit.\n///\n/// Note that here, the limit should be retrieved from the sheet's `limit()` method, and must not\n/// be out of the sheet's bounds\npub(crate) fn get_schema_sample_rows(\n    sample_rows: Option<usize>,\n    offset: usize,\n    limit: usize,\n) -> usize {\n    // Checking how many rows we want to use to determine the dtype for a column. If sample_rows is\n    // not provided, we sample limit rows, i.e on the entire column\n    let sample_rows = offset + sample_rows.unwrap_or(limit);\n    // If sample_rows is higher than the sheet's limit, use the limit instead\n    min(sample_rows, limit)\n}\n\n#[cfg(feature = \"__pyo3-tests\")]\n#[cfg(test)]\nmod tests {\n    use super::get_schema_sample_rows;\n    use pretty_assertions::assert_eq;\n    use rstest::rstest;\n\n    #[rstest]\n    // default value, 50 rows sheet, row limit should be 50\n    #[case(Some(1000), 0, 50, 50)]\n    // default value, 5000 rows sheet, row limit should be 1000\n    #[case(Some(1000), 0, 5000, 1000)]\n    // default value, 1500 rows sheet, offset of 1000, row limit should be 1500\n    #[case(Some(1000), 1000, 1500, 1500)]\n    // 100 sampling size, 1500 rows sheet, offset of 1000, row limit should be 1100\n    #[case(Some(100), 1000, 1500, 1100)]\n    // No value, 50 rows sheet, row limit should be 50\n    #[case(None, 0, 50, 50)]\n    // No value, 5000 rows sheet, row limit should be 5000\n    #[case(None, 0, 5000, 5000)]\n    // no value, 1500 rows sheet, offset of 1000, row limit should be 1500\n    #[case(None, 1000, 1500, 1500)]\n    fn test_get_schema_sample_rows_return_values(\n        #[case] sample_rows: Option<usize>,\n        #[case] offset: usize,\n        #[case] limit: usize,\n        #[case] expected: usize,\n    ) {\n        assert_eq!(get_schema_sample_rows(sample_rows, offset, limit), expected);\n    }\n}\n"
  },
  {
    "path": "test.py",
    "content": "#!/usr/bin/env python3\nimport argparse\n\nimport fastexcel\n\n\ndef get_args() -> argparse.Namespace:\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"file\")\n    parser.add_argument(\"-c\", \"--column\", type=str, nargs=\"+\", help=\"the columns to use\")\n    parser.add_argument(\n        \"--eager\", action=\"store_true\", help=\"wether the sheet should be loaded eagerly\"\n    )\n    parser.add_argument(\n        \"-i\", \"--iterations\", type=int, help=\"the number of iterations to do\", default=1\n    )\n    parser.add_argument(\"-t\", \"--table\", type=str, help=\"the name of the table to load\")\n    parser.add_argument(\n        \"--print-tables\", action=\"store_true\", help=\"whether to print the tables in the file\"\n    )\n\n    return parser.parse_args()\n\n\ndef main():\n    args = get_args()\n    excel_file = fastexcel.read_excel(args.file)\n    use_columns = args.column or None\n\n    if args.print_tables:\n        table_names = excel_file.table_names()\n        if len(table_names) > 0:\n            print(f\"Available tables are {', '.join(table_names)}\")\n        else:\n            print(\"No tables found\")\n\n    for _ in range(args.iterations):\n        if args.table:\n            tbl = excel_file.load_table(args.table)\n            print(f\"Found table {args.table}:\")\n            print(tbl.to_polars())\n        else:\n            for sheet_name in excel_file.sheet_names:\n                if args.eager:\n                    excel_file.load_sheet_eager(sheet_name, use_columns=use_columns)\n                else:\n                    excel_file.load_sheet(sheet_name, use_columns=use_columns).to_arrow()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tests/column_selection.rs",
    "content": "use anyhow::{Context, Result};\nuse fastexcel::{DType, DTypes, IdxOrName, LoadSheetOrTableOptions, SelectedColumns};\nuse pretty_assertions::assert_eq;\nuse rstest::{fixture, rstest};\nuse std::collections::HashMap;\n\nuse crate::utils::path_for_fixture;\n\n#[macro_use]\nmod utils;\n\n#[fixture]\nfn reader() -> fastexcel::ExcelReader {\n    fastexcel::read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n        .expect(\"could not read excel file\")\n}\n\n#[rstest]\nfn test_use_columns_with_table(mut reader: fastexcel::ExcelReader) -> Result<()> {\n    let selected_columns = SelectedColumns::Selection(vec![\n        IdxOrName::Name(\"User Id\".to_string()),\n        IdxOrName::Name(\"FirstName\".to_string()),\n    ]);\n\n    let opts = LoadSheetOrTableOptions::new_for_table().selected_columns(selected_columns);\n\n    let mut table = reader\n        .load_table(\"users\", opts)\n        .context(\"Failed to load table\")?;\n\n    assert_eq!(table.name(), \"users\");\n    assert_eq!(table.width(), 4);\n    assert_eq!(table.height(), 3);\n\n    let available_columns = table\n        .available_columns()\n        .context(\"could not obtain available columns for table\")?;\n    let expected_available_columns = vec![\n        fastexcel::ColumnInfo {\n            name: \"User Id\".into(),\n            index: 0,\n            absolute_index: 0,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"FirstName\".into(),\n            index: 1,\n            absolute_index: 1,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"__UNNAMED__2\".into(),\n            index: 2,\n            absolute_index: 2,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Generated,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"__UNNAMED__3\".into(),\n            index: 3,\n            absolute_index: 3,\n            dtype: fastexcel::DType::DateTime,\n            column_name_from: fastexcel::ColumnNameFrom::Generated,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n    ];\n    assert_eq!(available_columns, expected_available_columns);\n\n    let selected_columns_info = table.selected_columns();\n    let expected_selected_columns = vec![\n        fastexcel::ColumnInfo {\n            name: \"User Id\".into(),\n            index: 0,\n            absolute_index: 0,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"FirstName\".into(),\n            index: 1,\n            absolute_index: 1,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n    ];\n    assert_eq!(selected_columns_info, expected_selected_columns);\n\n    let expected_columns = fe_columns!(\n        \"User Id\" => [1.0, 2.0, 5.0],\n        \"FirstName\" => [\"Peter\", \"John\", \"Hans\"],\n    );\n\n    let table_columns = table\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(table_columns, expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        use polars_core::df;\n\n        let expected_df = df!(\n            \"User Id\" => [1.0, 2.0, 5.0],\n            \"FirstName\" => [\"Peter\", \"John\", \"Hans\"],\n        )?;\n\n        let df = table\n            .to_polars()\n            .context(\"could not convert table to polars dataframe\")?;\n        assert!(df.equals_missing(&expected_df))\n    }\n\n    Ok(())\n}\n\n#[rstest]\nfn test_use_columns_with_table_and_provided_columns(\n    mut reader: fastexcel::ExcelReader,\n) -> Result<()> {\n    let selected_columns = SelectedColumns::Selection(vec![0.into(), 2.into()]);\n\n    let opts = LoadSheetOrTableOptions::new_for_table()\n        .column_names(vec![\"user_id\", \"last_name\"])\n        .selected_columns(selected_columns);\n\n    let mut table = reader\n        .load_table(\"users\", opts)\n        .context(\"Failed to load table\")?;\n\n    assert_eq!(table.name(), \"users\");\n    assert_eq!(table.width(), 4);\n    assert_eq!(table.height(), 3);\n\n    let available_columns = table\n        .available_columns()\n        .context(\"could not obtain available columns for table\")?;\n    let expected_available_columns = vec![\n        fastexcel::ColumnInfo {\n            name: \"user_id\".into(),\n            index: 0,\n            absolute_index: 0,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"__UNNAMED__1\".into(),\n            index: 1,\n            absolute_index: 1,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Generated,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"last_name\".into(),\n            index: 2,\n            absolute_index: 2,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"__UNNAMED__3\".into(),\n            index: 3,\n            absolute_index: 3,\n            dtype: fastexcel::DType::DateTime,\n            column_name_from: fastexcel::ColumnNameFrom::Generated,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n    ];\n    assert_eq!(available_columns, expected_available_columns);\n\n    let selected_columns_info = table.selected_columns();\n    let expected_selected_columns = vec![\n        fastexcel::ColumnInfo {\n            name: \"user_id\".into(),\n            index: 0,\n            absolute_index: 0,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"last_name\".into(),\n            index: 2,\n            absolute_index: 2,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n    ];\n    assert_eq!(selected_columns_info, expected_selected_columns);\n\n    let expected_columns = fe_columns!(\n        \"user_id\" => [1.0, 2.0, 5.0],\n        \"last_name\" => [\"Müller\", \"Meier\", \"Fricker\"],\n    );\n\n    let table_columns = table\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(table_columns, expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        use polars_core::df;\n\n        let expected_df = df!(\n            \"user_id\" => [1.0, 2.0, 5.0],\n            \"last_name\" => [\"Müller\", \"Meier\", \"Fricker\"],\n        )?;\n\n        let df = table\n            .to_polars()\n            .context(\"could not convert table to polars dataframe\")?;\n        assert!(df.equals_missing(&expected_df))\n    }\n\n    Ok(())\n}\n\n#[fixture]\nfn reader_with_offset() -> fastexcel::ExcelReader {\n    fastexcel::read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n        .expect(\"could not read excel file\")\n}\n\n#[rstest]\nfn test_use_column_range_with_offset_with_table_and_specified_dtypes(\n    mut reader_with_offset: fastexcel::ExcelReader,\n) -> Result<()> {\n    let dtypes_map: HashMap<IdxOrName, DType> = [\n        (IdxOrName::Idx(3), DType::Int),\n        (IdxOrName::Name(\"Column at E5\".to_owned()), DType::String),\n    ]\n    .into_iter()\n    .collect();\n\n    let selected_columns_closed = \"D:E\"\n        .parse::<SelectedColumns>()\n        .context(\"could not parse column selection\")?;\n\n    let opts_closed_range = LoadSheetOrTableOptions::new_for_table()\n        .selected_columns(selected_columns_closed)\n        .with_dtypes(DTypes::Map(dtypes_map.clone()));\n\n    let table_closed = reader_with_offset\n        .load_table(\"TableAtD5\", opts_closed_range)\n        .context(\"Failed to load table with closed range\")?;\n\n    let selected_columns_open_ended = \"D:\"\n        .parse::<SelectedColumns>()\n        .context(\"could not parse column selection\")?;\n\n    let opts_open_ended_range = LoadSheetOrTableOptions::new_for_table()\n        .selected_columns(selected_columns_open_ended)\n        .with_dtypes(DTypes::Map(dtypes_map.clone()));\n\n    let table_open_ended = reader_with_offset\n        .load_table(\"TableAtD5\", opts_open_ended_range)\n        .context(\"Failed to load table with open-ended range\")?;\n\n    assert_eq!(table_closed.name(), \"TableAtD5\");\n    assert_eq!(table_open_ended.name(), \"TableAtD5\");\n\n    let expected_selected_columns = vec![\n        fastexcel::ColumnInfo {\n            name: \"Column at D5\".to_owned(),\n            index: 0,\n            absolute_index: 3,\n            dtype: fastexcel::DType::Int,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::ProvidedByIndex,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Column at E5\".to_owned(),\n            index: 1,\n            absolute_index: 4,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::ProvidedByName,\n        },\n    ];\n    assert_eq!(table_closed.selected_columns(), expected_selected_columns);\n    assert_eq!(\n        table_open_ended.selected_columns(),\n        expected_selected_columns\n    );\n\n    let expected_columns = fe_columns!(\n        \"Column at D5\" => [1_i64, 2, 3, 4],\n        \"Column at E5\" => [\"4\", \"5\", \"6\", \"8\"],\n    );\n\n    assert_eq!(\n        table_closed\n            .to_columns()\n            .context(\"could not convert table to columns\")?,\n        expected_columns\n    );\n\n    assert_eq!(\n        table_open_ended\n            .to_columns()\n            .context(\"could not convert table to columns\")?,\n        expected_columns\n    );\n\n    #[cfg(feature = \"polars\")]\n    {\n        use polars_core::df;\n\n        let expected_df = df!(\n            \"Column at D5\" => [1_i64, 2, 3, 4],\n            \"Column at E5\" => [\"4\", \"5\", \"6\", \"8\"],\n        )?;\n\n        let df_closed = table_closed\n            .to_polars()\n            .context(\"could not convert table to polars dataframe\")?;\n        let df_open_ended = table_open_ended\n            .to_polars()\n            .context(\"could not convert table to polars dataframe\")?;\n\n        assert!(df_closed.equals_missing(&expected_df));\n        assert!(df_open_ended.equals_missing(&expected_df));\n    }\n\n    Ok(())\n}\n\n/// This test ensures that index-based selection is correctly resolved when used with an offset\n/// table: the selected indices should be absolute, and it should be able to handle both index-based\n/// and name-based selection.\n#[rstest]\nfn test_use_column_names_with_offset_table_by_index_and_name(\n    mut reader_with_offset: fastexcel::ExcelReader,\n) -> Result<()> {\n    let selected_columns = SelectedColumns::Selection(vec![\n        IdxOrName::Name(\"Column at D5\".to_string()),\n        IdxOrName::Idx(4),\n    ]);\n\n    let opts = LoadSheetOrTableOptions::new_for_table().selected_columns(selected_columns);\n\n    let table = reader_with_offset\n        .load_table(\"TableAtD5\", opts)\n        .context(\"Failed to load table\")?;\n\n    assert_eq!(table.name(), \"TableAtD5\");\n\n    let expected_selected_columns = vec![\n        fastexcel::ColumnInfo {\n            name: \"Column at D5\".to_owned(),\n            index: 0,\n            absolute_index: 3,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Column at E5\".to_owned(),\n            index: 1,\n            absolute_index: 4,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n    ];\n\n    let selected_columns_info = table.selected_columns();\n    assert_eq!(selected_columns_info, expected_selected_columns);\n\n    let expected_columns = fe_columns!(\n        \"Column at D5\" => [1.0, 2.0, 3.0, 4.0],\n        \"Column at E5\" => [4.0, 5.0, 6.0, 8.0],\n    );\n\n    let table_columns = table\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(table_columns, expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        use polars_core::df;\n\n        let expected_df = df!(\n            \"Column at D5\" => [1.0, 2.0, 3.0, 4.0],\n            \"Column at E5\" => [4.0, 5.0, 6.0, 8.0],\n        )?;\n\n        let df = table\n            .to_polars()\n            .context(\"could not convert table to polars dataframe\")?;\n        assert!(df.equals_missing(&expected_df))\n    }\n\n    Ok(())\n}\n\n#[rstest]\nfn test_use_column_range_with_offset_with_sheet_and_specified_dtypes(\n    mut reader_with_offset: fastexcel::ExcelReader,\n) -> Result<()> {\n    // Create dtypes map: {7: \"int\", \"Column at I10\": \"string\"}\n    // Note: Column H is at index 7, Column I is at index 8, Column K is at index 10\n    let dtypes_map: HashMap<IdxOrName, DType> = [\n        (IdxOrName::Idx(7), DType::Int),\n        (IdxOrName::Name(\"Column at I10\".to_owned()), DType::String),\n    ]\n    .into_iter()\n    .collect();\n\n    let selected_columns_closed = \"H:K\"\n        .parse::<SelectedColumns>()\n        .context(\"could not parse column selection\")?;\n\n    let opts_closed_range = LoadSheetOrTableOptions::new_for_sheet()\n        .header_row(9)\n        .selected_columns(selected_columns_closed)\n        .with_dtypes(DTypes::Map(dtypes_map.clone()));\n\n    let sheet_closed = reader_with_offset\n        .load_sheet(\"without-table\".into(), opts_closed_range)\n        .context(\"Failed to load sheet with closed range\")?;\n\n    let selected_columns_open_ended = \"H:\"\n        .parse::<SelectedColumns>()\n        .context(\"could not parse column selection\")?;\n\n    let opts_open_ended_range = LoadSheetOrTableOptions::new_for_sheet()\n        .header_row(9)\n        .selected_columns(selected_columns_open_ended)\n        .with_dtypes(DTypes::Map(dtypes_map.clone()));\n\n    let sheet_open_ended = reader_with_offset\n        .load_sheet(\"without-table\".into(), opts_open_ended_range)\n        .context(\"Failed to load sheet with open-ended range\")?;\n\n    assert_eq!(sheet_closed.name(), \"without-table\");\n    assert_eq!(sheet_open_ended.name(), \"without-table\");\n\n    let expected_selected_columns = vec![\n        fastexcel::ColumnInfo {\n            name: \"Column at H10\".to_owned(),\n            index: 0,\n            absolute_index: 7,\n            dtype: fastexcel::DType::Int,\n            column_name_from: fastexcel::ColumnNameFrom::LookedUp,\n            dtype_from: fastexcel::DTypeFrom::ProvidedByIndex,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Column at I10\".to_owned(),\n            index: 1,\n            absolute_index: 8,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::LookedUp,\n            dtype_from: fastexcel::DTypeFrom::ProvidedByName,\n        },\n        fastexcel::ColumnInfo {\n            name: \"__UNNAMED__2\".to_owned(),\n            index: 2,\n            absolute_index: 9,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Generated,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Column at K10\".to_owned(),\n            index: 3,\n            absolute_index: 10,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::LookedUp,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n    ];\n    assert_eq!(sheet_closed.selected_columns(), &expected_selected_columns);\n    assert_eq!(\n        sheet_open_ended.selected_columns(),\n        &expected_selected_columns\n    );\n\n    let expected_columns = fe_columns!(\n        \"Column at H10\" => [1_i64, 2, 3],\n        \"Column at I10\" => [\"4\", \"5\", \"6\"],\n        \"__UNNAMED__2\" => [Option::<&str>::None, None, None],\n        \"Column at K10\" => [7.0, 8.0, 9.0],\n    );\n\n    assert_eq!(\n        sheet_closed\n            .to_columns()\n            .context(\"could not convert sheet to columns\")?,\n        expected_columns\n    );\n\n    assert_eq!(\n        sheet_open_ended\n            .to_columns()\n            .context(\"could not convert sheet to columns\")?,\n        expected_columns\n    );\n\n    #[cfg(feature = \"polars\")]\n    {\n        use polars_core::df;\n\n        let expected_df = df!(\n            \"Column at H10\" => [1_i64, 2, 3],\n            \"Column at I10\" => [\"4\", \"5\", \"6\"],\n            \"__UNNAMED__2\" => [Option::<&str>::None, None, None],\n            \"Column at K10\" => [7.0, 8.0, 9.0],\n        )?;\n\n        let df_closed = sheet_closed\n            .to_polars()\n            .context(\"could not convert sheet to polars dataframe\")?;\n        let df_open_ended = sheet_open_ended\n            .to_polars()\n            .context(\"could not convert sheet to polars dataframe\")?;\n\n        assert!(df_closed.equals_missing(&expected_df));\n        assert!(df_open_ended.equals_missing(&expected_df));\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "tests/fastexcel.rs",
    "content": "#[macro_use]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse chrono::NaiveDate;\nuse fastexcel::{FastExcelColumn, LoadSheetOrTableOptions, SkipRows};\n#[cfg(feature = \"polars\")]\nuse polars_core::{df, frame::DataFrame};\nuse pretty_assertions::assert_eq;\nuse rstest::rstest;\nuse utils::path_for_fixture;\n\n#[test]\nfn test_single_sheet() -> Result<()> {\n    let mut reader = fastexcel::read_excel(path_for_fixture(\"fixture-single-sheet.xlsx\"))\n        .context(\"could not read excel file\")?;\n\n    assert_eq!(reader.sheet_names(), vec![\"January\"]);\n    let mut sheet_by_name = reader\n        .load_sheet(\"January\".into(), LoadSheetOrTableOptions::new_for_sheet())\n        .context(\"could not load sheet by name\")?;\n    let mut sheet_by_idx = reader\n        .load_sheet(0.into(), LoadSheetOrTableOptions::new_for_sheet())\n        .context(\"could not load sheet by index\")?;\n\n    assert_eq!(sheet_by_name.name(), sheet_by_idx.name());\n    assert_eq!(sheet_by_name.name(), \"January\");\n\n    assert_eq!(sheet_by_name.height(), sheet_by_idx.height());\n    assert_eq!(sheet_by_name.height(), 2);\n\n    assert_eq!(sheet_by_name.width(), sheet_by_idx.width());\n    assert_eq!(sheet_by_name.width(), 2);\n\n    let columns_by_name = sheet_by_name\n        .to_columns()\n        .context(\"could not convert sheet by name to columns\")?;\n    let columns_by_idx = sheet_by_idx\n        .to_columns()\n        .context(\"could not convert sheet by index to columns\")?;\n\n    assert_eq!(&columns_by_name, &columns_by_idx);\n    let expected_columns = fe_columns!(\n        \"Month\" => [1.0, 2.0],\n        \"Year\" => [2019.0, 2020.0],\n    );\n    assert_eq!(&columns_by_name, &expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        let df_by_name = sheet_by_name\n            .to_polars()\n            .context(\"could not convert sheet by name to DataFrame\")?;\n        let df_by_idx = sheet_by_idx\n            .to_polars()\n            .context(\"could not convert sheet by index to DataFrame\")?;\n        let expected_df = df!(\n            \"Month\" => [1.0, 2.0],\n            \"Year\" => [2019.0, 2020.0]\n        )\n        .context(\"could not create expected DataFrame\")?;\n        assert_eq!(&df_by_name, &df_by_idx);\n        assert!(df_by_name.equals_missing(&expected_df));\n    }\n\n    Ok(())\n}\n\n#[test]\nfn test_single_sheet_bytes() -> Result<()> {\n    let bytes = std::fs::read(path_for_fixture(\"fixture-single-sheet.xlsx\"))?;\n\n    let mut reader = fastexcel::ExcelReader::try_from(bytes.as_slice())\n        .context(\"could not create reader from bytes\")?;\n\n    assert_eq!(reader.sheet_names(), vec![\"January\"]);\n    let mut sheet_by_name = reader\n        .load_sheet(\"January\".into(), LoadSheetOrTableOptions::new_for_sheet())\n        .context(\"could not load sheet by name\")?;\n    let mut sheet_by_idx = reader\n        .load_sheet(0.into(), LoadSheetOrTableOptions::new_for_sheet())\n        .context(\"could not load sheet by index\")?;\n\n    assert_eq!(sheet_by_name.name(), sheet_by_idx.name());\n    assert_eq!(sheet_by_name.name(), \"January\");\n\n    assert_eq!(sheet_by_name.height(), sheet_by_idx.height());\n    assert_eq!(sheet_by_name.height(), 2);\n\n    assert_eq!(sheet_by_name.width(), sheet_by_idx.width());\n    assert_eq!(sheet_by_name.width(), 2);\n\n    let columns_by_name = sheet_by_name\n        .to_columns()\n        .context(\"could not convert sheet by name to columns\")?;\n    let columns_by_idx = sheet_by_idx\n        .to_columns()\n        .context(\"could not convert sheet by index to columns\")?;\n\n    assert_eq!(&columns_by_name, &columns_by_idx);\n    let expected_columns = fe_columns!(\n        \"Month\" => [1.0, 2.0],\n        \"Year\" => [2019.0, 2020.0]\n    );\n    assert_eq!(&columns_by_name, &expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        let df_by_name = sheet_by_name\n            .to_polars()\n            .context(\"could not convert sheet by name to DataFrame\")?;\n        let df_by_idx = sheet_by_idx\n            .to_polars()\n            .context(\"could not convert sheet by index to DataFrame\")?;\n        let expected_df = df!(\n            \"Month\" => [1.0, 2.0],\n            \"Year\" => [2019.0, 2020.0]\n        )\n        .context(\"could not create expected DataFrame\")?;\n        assert_eq!(&df_by_name, &df_by_idx);\n        assert!(df_by_name.equals_missing(&expected_df));\n    }\n\n    Ok(())\n}\n\n#[test]\nfn test_single_sheet_with_types() -> Result<()> {\n    let mut excel_reader =\n        fastexcel::read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n            .context(\"could not read excel file\")?;\n\n    let mut sheet = excel_reader\n        .load_sheet(0.into(), LoadSheetOrTableOptions::new_for_sheet())\n        .context(\"could not load sheet\")?;\n\n    assert_eq!(sheet.name(), \"Sheet1\");\n    assert_eq!(sheet.height(), sheet.total_height());\n    assert_eq!(sheet.height(), 3);\n    assert_eq!(sheet.width(), 4);\n\n    let columns = sheet\n        .to_columns()\n        .context(\"could not convert sheet by name to columns\")?;\n\n    let naive_date = NaiveDate::from_ymd_opt(2022, 3, 2)\n        .unwrap()\n        .and_hms_opt(5, 43, 4)\n        .unwrap();\n\n    let expected_columns = fe_columns!(\n        \"__UNNAMED__0\" => [0.0, 1.0, 2.0],\n        \"bools\" => [true, false, true],\n        \"dates\" => [naive_date; 3],\n        \"floats\" => [12.35, 42.69, 1234567.0],\n    );\n    assert_eq!(&columns, &expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        let df = sheet\n            .to_polars()\n            .context(\"could not convert sheet to DataFrame\")?;\n        let expected_df = df!(\n            \"__UNNAMED__0\" => [0.0, 1.0, 2.0],\n            \"bools\" => [true, false, true],\n            \"dates\" => [naive_date; 3],\n            \"floats\" => [12.35, 42.69, 1234567.0],\n        )\n        .context(\"could not create expected DataFrame\")?;\n\n        assert!(df.equals_missing(&expected_df));\n    }\n\n    Ok(())\n}\n\n#[test]\nfn test_multiple_sheets() -> Result<()> {\n    let mut excel_reader = fastexcel::read_excel(path_for_fixture(\"fixture-multi-sheet.xlsx\"))\n        .context(\"could not read excel file\")?;\n\n    let sheet_0 = excel_reader\n        .load_sheet(0.into(), LoadSheetOrTableOptions::new_for_sheet())\n        .context(\"could not load sheet 0 by idx\")?;\n    let expected_columns_sheet_0 = fe_columns!(\"Month\" => [1.0], \"Year\" => [2019.0]);\n    let sheet_0_columns = sheet_0\n        .to_columns()\n        .context(\"could not convert sheet 0 to columns\")?;\n    assert_eq!(sheet_0_columns, expected_columns_sheet_0);\n\n    let sheet_1 = excel_reader\n        .load_sheet(1.into(), LoadSheetOrTableOptions::new_for_sheet())\n        .context(\"could not load sheet 1 by idx\")?;\n    let expected_columns_sheet_1 =\n        fe_columns!(\"Month\" => [2.0, 3.0, 4.0], \"Year\" => [2019.0, 2021.0, 2022.0]);\n    let sheet_1_columns = sheet_1\n        .to_columns()\n        .context(\"could not convert sheet 1 to columns\")?;\n    assert_eq!(sheet_1_columns, expected_columns_sheet_1);\n\n    let sheet_unnamed_columns = excel_reader\n        .load_sheet(\n            \"With unnamed columns\".into(),\n            LoadSheetOrTableOptions::new_for_sheet(),\n        )\n        .context(\"could not load sheet \\\"With unnamed columns\\\" by idx\")?;\n    let expected_columns_sheet_unnamed_columns = fe_columns!(\n        \"col1\" => [2.0, 3.0],\n        \"__UNNAMED__1\" => [1.5, 2.5],\n        \"col3\" => [\"hello\", \"world\"],\n        \"__UNNAMED__3\" => [-5.0, -6.0],\n        \"col5\" => [\"a\", \"b\"],\n    );\n    let sheet_unnamed_columns_columns = sheet_unnamed_columns\n        .to_columns()\n        .context(\"could not convert sheet \\\"With unnamed columns\\\" to columns\")?;\n\n    assert_eq!(\n        sheet_unnamed_columns_columns,\n        expected_columns_sheet_unnamed_columns\n    );\n\n    #[cfg(feature = \"polars\")]\n    {\n        let expected_df_sheet_0 = df!(\"Month\" => [1.0], \"Year\" => [2019.0])?;\n        let df_sheet_0 = sheet_0\n            .to_polars()\n            .context(\"could not convert sheet 0 to DataFrame\")?;\n        assert!(expected_df_sheet_0.equals_missing(&df_sheet_0));\n\n        let expected_df_sheet_1 =\n            df!(\"Month\" => [2.0, 3.0, 4.0], \"Year\" => [2019.0, 2021.0, 2022.0])?;\n        let df_sheet_1 = sheet_1\n            .to_polars()\n            .context(\"could not convert sheet 1 to DataFrame\")?;\n        assert!(expected_df_sheet_1.equals_missing(&df_sheet_1));\n\n        let expected_df_sheet_unnamed_columns = df!(\n            \"col1\" => [2.0, 3.0],\n            \"__UNNAMED__1\" => [1.5, 2.5],\n            \"col3\" => [\"hello\", \"world\"],\n            \"__UNNAMED__3\" => [-5.0, -6.0],\n            \"col5\" => [\"a\", \"b\"],\n        )?;\n        let df_sheet_unnamed_columns = sheet_unnamed_columns\n            .to_polars()\n            .context(\"could not convert sheet \\\"With unnamed columns\\\" to DataFrame\")?;\n        assert!(expected_df_sheet_unnamed_columns.equals_missing(&df_sheet_unnamed_columns));\n    }\n\n    Ok(())\n}\n\n#[test]\nfn test_sheet_with_header_row_diff_from_zero() -> Result<()> {\n    let mut excel_reader =\n        fastexcel::read_excel(path_for_fixture(\"fixture-changing-header-location.xlsx\"))\n            .context(\"could not read excel file\")?;\n\n    assert_eq!(\n        excel_reader.sheet_names(),\n        vec![\"Sheet1\", \"Sheet2\", \"Sheet3\"]\n    );\n\n    let mut sheet_by_name = excel_reader\n        .load_sheet(\n            \"Sheet1\".into(),\n            LoadSheetOrTableOptions::new_for_sheet().header_row(1),\n        )\n        .context(\"could not load sheet \\\"Sheet1\\\" by name\")?;\n\n    let mut sheet_by_idx = excel_reader\n        .load_sheet(\n            0.into(),\n            LoadSheetOrTableOptions::new_for_sheet().header_row(1),\n        )\n        .context(\"could not load sheet 0 by index\")?;\n\n    assert_eq!(sheet_by_name.name(), sheet_by_idx.name());\n    assert_eq!(sheet_by_name.name(), \"Sheet1\");\n\n    assert_eq!(sheet_by_name.height(), sheet_by_idx.height());\n    assert_eq!(sheet_by_name.height(), 2);\n\n    assert_eq!(sheet_by_name.width(), sheet_by_idx.width());\n    assert_eq!(sheet_by_name.width(), 2);\n\n    let expected_columns = fe_columns!(\n        \"Month\" => [1.0, 2.0],\n        \"Year\" => [2019.0, 2020.0]\n    );\n\n    let columns_by_name = sheet_by_name\n        .to_columns()\n        .context(\"could not convert sheet \\\"Sheet1\\\" to columns\")?;\n    let columns_by_idx = sheet_by_idx\n        .to_columns()\n        .context(\"could not convert sheet 0 to columns\")?;\n    assert_eq!(&columns_by_name, &columns_by_idx);\n    assert_eq!(&columns_by_name, &expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        let df_by_name = sheet_by_name\n            .to_polars()\n            .context(\"could not convert sheet \\\"Sheet1\\\" to DataFrame\")?;\n        let df_by_idx = sheet_by_idx\n            .to_polars()\n            .context(\"could not convert sheet 0 to DataFrame\")?;\n        let expected_df = df!(\n            \"Month\" => [1.0, 2.0],\n            \"Year\" => [2019.0, 2020.0]\n        )?;\n\n        assert!(df_by_name.equals_missing(&df_by_idx));\n        assert!(expected_df.equals_missing(&df_by_name));\n    }\n\n    Ok(())\n}\n\n#[test]\nfn test_sheet_with_pagination_and_without_headers() -> Result<()> {\n    let mut excel_reader =\n        fastexcel::read_excel(path_for_fixture(\"fixture-single-sheet-with-types.xlsx\"))\n            .context(\"could not read excel file\")?;\n\n    let opts = LoadSheetOrTableOptions::new_for_sheet()\n        .n_rows(1)\n        .skip_rows(SkipRows::Simple(1))\n        .no_header_row()\n        .column_names([\"This\", \"Is\", \"Amazing\", \"Stuff\"]);\n    let mut sheet = excel_reader\n        .load_sheet(0.into(), opts)\n        .context(\"could not load sheet 0\")?;\n\n    assert_eq!(sheet.name(), \"Sheet1\");\n    assert_eq!(sheet.height(), 1);\n    assert_eq!(sheet.width(), 4);\n\n    let naive_dt = NaiveDate::from_ymd_opt(2022, 3, 2)\n        .unwrap()\n        .and_hms_opt(5, 43, 4)\n        .unwrap();\n\n    let expected_columns = fe_columns!(\n        \"This\" => [0.0],\n        \"Is\" => [true],\n        \"Amazing\" => [naive_dt],\n        \"Stuff\" => [12.35],\n    );\n\n    let sheet_columns = sheet\n        .to_columns()\n        .context(\"could not convert sheet to columns\")?;\n    assert_eq!(&sheet_columns, &expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        let df = sheet\n            .to_polars()\n            .context(\"could not convert sheet to DataFrame\")?;\n        let expected_df = df!(\n            \"This\" => [0.0],\n            \"Is\" => [true],\n            \"Amazing\" => [naive_dt],\n            \"Stuff\" => [12.35],\n        )?;\n\n        assert!(df.equals_missing(&expected_df));\n    }\n\n    Ok(())\n}\n\n#[rstest]\n#[case(Some(0), SkipRows::SkipEmptyRowsAtBeginning, fe_columns!(\"a\" => [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\" => [1.0, 2.0, 3.0, 4.0, 5.0]))]\n#[case(\n    None,\n    SkipRows::Simple(0),\n    fe_columns!(\n        \"__UNNAMED__0\" => [None, None, Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [None, None, Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )\n)]\n#[case(\n    None,\n    SkipRows::SkipEmptyRowsAtBeginning,\n    fe_columns!(\n        \"__UNNAMED__0\" => [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n        \"__UNNAMED__1\" => [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]\n    )\n)]\n#[case(\n    Some(0),\n    SkipRows::Simple(0),\n    fe_columns!(\n        \"__UNNAMED__0\" => [None, Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [None, Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )\n)]\n#[case(\n    Some(0),\n    SkipRows::Simple(1),\n    fe_columns!(\n        \"__UNNAMED__0\" => [Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )\n)]\n#[case(\n    None,\n    SkipRows::Simple(2),\n    fe_columns!(\n        \"__UNNAMED__0\" => [Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )\n)]\n#[case(\n    None,\n    SkipRows::Simple(3),\n    fe_columns!(\n        \"__UNNAMED__0\" => [Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )\n)]\n#[case(\n    Some(1),\n    SkipRows::Simple(0),\n    fe_columns!(\"__UNNAMED__0\" => [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"], \"__UNNAMED__1\" => [0.0, 1.0, 2.0, 3.0, 4.0, 5.0])\n)]\n#[case(Some(2), SkipRows::Simple(0), fe_columns!(\"a\" => [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\" => [1.0, 2.0, 3.0, 4.0, 5.0]))]\n#[case(\n    Some(2),\n    SkipRows::SkipEmptyRowsAtBeginning,\n    fe_columns!(\"a\" => [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\" => [1.0, 2.0, 3.0, 4.0, 5.0])\n)]\nfn test_header_row_and_skip_rows(\n    #[case] header_row: Option<usize>,\n    #[case] skip_rows: SkipRows,\n    #[case] expected: Vec<FastExcelColumn>,\n) -> Result<()> {\n    let mut excel_reader = fastexcel::read_excel(path_for_fixture(\"no-header.xlsx\"))\n        .context(\"could not read excel file\")?;\n\n    let mut opts = LoadSheetOrTableOptions::new_for_sheet();\n    opts.header_row = header_row;\n    opts.skip_rows = skip_rows;\n    let sheet = excel_reader\n        .load_sheet(0.into(), opts)\n        .context(\"could not load sheet 0\")?;\n\n    let sheet_columns = sheet\n        .to_columns()\n        .context(\"could not convert sheet to columns\")?;\n    assert_eq!(&sheet_columns, &expected);\n    Ok(())\n}\n\n#[cfg(feature = \"polars\")]\n#[rstest]\n#[case(Some(0), SkipRows::SkipEmptyRowsAtBeginning, df!(\"a\" => [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\" => [1.0, 2.0, 3.0, 4.0, 5.0])?)]\n#[case(\n    None,\n    SkipRows::Simple(0),\n    df!(\n        \"__UNNAMED__0\" => [None, None, Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [None, None, Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )?\n)]\n#[case(\n    None,\n    SkipRows::SkipEmptyRowsAtBeginning,\n    df!(\n        \"__UNNAMED__0\" => [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"],\n        \"__UNNAMED__1\" => [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]\n    )?\n)]\n#[case(\n    Some(0),\n    SkipRows::Simple(0),\n    df!(\n        \"__UNNAMED__0\" => [None, Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [None, Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )?\n)]\n#[case(\n    Some(0),\n    SkipRows::Simple(1),\n    df!(\n        \"__UNNAMED__0\" => [Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )?\n)]\n#[case(\n    None,\n    SkipRows::Simple(2),\n    df!(\n        \"__UNNAMED__0\" => [Some(\"a\"), Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [Some(0.0), Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )?\n)]\n#[case(\n    None,\n    SkipRows::Simple(3),\n    df!(\n        \"__UNNAMED__0\" => [Some(\"b\"), Some(\"c\"), Some(\"d\"), Some(\"e\"), Some(\"f\")],\n        \"__UNNAMED__1\" => [Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]\n    )?\n)]\n#[case(\n    Some(1),\n    SkipRows::Simple(0),\n    df!(\"__UNNAMED__0\" => [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"], \"__UNNAMED__1\" => [0.0, 1.0, 2.0, 3.0, 4.0, 5.0])?\n)]\n#[case(Some(2), SkipRows::Simple(0), df!(\"a\" => [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\" => [1.0, 2.0, 3.0, 4.0, 5.0])?)]\n#[case(\n    Some(2),\n    SkipRows::SkipEmptyRowsAtBeginning,\n    df!(\"a\" => [\"b\", \"c\", \"d\", \"e\", \"f\"], \"0\" => [1.0, 2.0, 3.0, 4.0, 5.0])?\n)]\nfn test_header_row_and_skip_rows_polars(\n    #[case] header_row: Option<usize>,\n    #[case] skip_rows: SkipRows,\n    #[case] expected: DataFrame,\n) -> Result<()> {\n    let mut excel_reader = fastexcel::read_excel(path_for_fixture(\"no-header.xlsx\"))\n        .context(\"could not read excel file\")?;\n\n    let mut opts = LoadSheetOrTableOptions::new_for_sheet();\n    opts.header_row = header_row;\n    opts.skip_rows = skip_rows;\n\n    let sheet = excel_reader\n        .load_sheet(0.into(), opts)\n        .context(\"could not load sheet 0\")?;\n\n    let df = sheet\n        .to_polars()\n        .context(\"could not convert sheet to DataFrame\")?;\n\n    assert!(df.equals_missing(&expected));\n\n    Ok(())\n}\n"
  },
  {
    "path": "tests/sheet_visibility.rs",
    "content": "#[allow(unused_macros)]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse fastexcel::{LoadSheetOrTableOptions, SheetVisible};\nuse pretty_assertions::assert_matches;\n\nuse crate::utils::path_for_fixture;\n\n#[test]\nfn sheet_visibility() -> Result<()> {\n    let mut reader = fastexcel::read_excel(path_for_fixture(\n        \"fixture-sheets-different-visibilities.xlsx\",\n    ))\n    .context(\"could not read excel file\")?;\n\n    let sheet_0 = reader.load_sheet(0.into(), LoadSheetOrTableOptions::new_for_sheet())?;\n    let sheet_1 = reader.load_sheet(1.into(), LoadSheetOrTableOptions::new_for_sheet())?;\n    let sheet_2 = reader.load_sheet(2.into(), LoadSheetOrTableOptions::new_for_sheet())?;\n\n    assert_matches!(sheet_0.visible(), SheetVisible::Visible);\n    assert_matches!(sheet_1.visible(), SheetVisible::Hidden);\n    assert_matches!(sheet_2.visible(), SheetVisible::VeryHidden);\n\n    Ok(())\n}\n"
  },
  {
    "path": "tests/shifted_data.rs",
    "content": "#[allow(unused_macros)]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse fastexcel::LoadSheetOrTableOptions;\nuse pretty_assertions::assert_eq;\nuse utils::path_for_fixture;\n\n#[test]\nfn test_sheet_with_offset() -> Result<()> {\n    let mut reader = fastexcel::read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n        .context(\"could not read the excel file\")?;\n    let mut sheet = reader\n        .load_sheet(\n            \"without-table\".into(),\n            LoadSheetOrTableOptions::new_for_sheet(),\n        )\n        .context(\"could not load sheet \\\"without-table\\\"\")?;\n\n    let available_columns = sheet\n        .available_columns()\n        .context(\"could not obtain available columns for sheet\")?;\n    let expected_column_info = vec![\n        fastexcel::ColumnInfo {\n            name: \"Column at H10\".into(),\n            index: 0,\n            absolute_index: 7,\n            dtype: fastexcel::DType::Float,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n            column_name_from: fastexcel::ColumnNameFrom::LookedUp,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Column at I10\".into(),\n            index: 1,\n            absolute_index: 8,\n            dtype: fastexcel::DType::Float,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n            column_name_from: fastexcel::ColumnNameFrom::LookedUp,\n        },\n        fastexcel::ColumnInfo {\n            name: \"__UNNAMED__2\".into(),\n            index: 2,\n            absolute_index: 9,\n            dtype: fastexcel::DType::String,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n            column_name_from: fastexcel::ColumnNameFrom::Generated,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Column at K10\".into(),\n            index: 3,\n            absolute_index: 10,\n            dtype: fastexcel::DType::Float,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n            column_name_from: fastexcel::ColumnNameFrom::LookedUp,\n        },\n    ];\n    assert_eq!(available_columns, expected_column_info);\n\n    Ok(())\n}\n\n#[test]\nfn test_table_with_offset() -> Result<()> {\n    let mut reader = fastexcel::read_excel(path_for_fixture(\"sheet-and-table-with-offset.xlsx\"))\n        .context(\"could not read the excel file\")?;\n    let mut table = reader\n        .load_table(\"TableAtD5\", LoadSheetOrTableOptions::new_for_table())\n        .context(\"could not load table \\\"TableAtD5\\\"\")?;\n\n    let available_columns = table\n        .available_columns()\n        .context(\"could not obtain available columns for table\")?;\n    let expected_column_info = vec![\n        fastexcel::ColumnInfo {\n            name: \"Column at D5\".into(),\n            index: 0,\n            absolute_index: 3,\n            dtype: fastexcel::DType::Float,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Column at E5\".into(),\n            index: 1,\n            absolute_index: 4,\n            dtype: fastexcel::DType::Float,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n        },\n    ];\n    assert_eq!(available_columns, expected_column_info);\n\n    Ok(())\n}\n"
  },
  {
    "path": "tests/tables.rs",
    "content": "use anyhow::{Context, Result};\nuse chrono::NaiveDate;\nuse fastexcel::LoadSheetOrTableOptions;\nuse pretty_assertions::assert_eq;\nuse rstest::{fixture, rstest};\n\nuse crate::utils::path_for_fixture;\n\n#[macro_use]\nmod utils;\n\n#[fixture]\nfn reader() -> fastexcel::ExcelReader {\n    fastexcel::read_excel(path_for_fixture(\"sheet-with-tables.xlsx\"))\n        .expect(\"could not read excel file\")\n}\n\n#[rstest]\n#[case::all_sheets(None, vec![\"users\"])]\n#[case::sheet_with_tables(Some(\"sheet1\"), vec![\"users\"])]\n#[case::sheet_without_tables(Some(\"sheet2\"), vec![])]\nfn test_table_names(\n    mut reader: fastexcel::ExcelReader,\n    #[case] sheet_name: Option<&str>,\n    #[case] expected: Vec<&str>,\n) -> Result<()> {\n    let table_names = reader\n        .table_names(sheet_name)\n        .context(\"Failed to get table names\")?;\n    assert_eq!(table_names, expected);\n    Ok(())\n}\n\n#[rstest]\nfn test_load_table(mut reader: fastexcel::ExcelReader) -> Result<()> {\n    let mut table = reader\n        .load_table(\"users\", LoadSheetOrTableOptions::new_for_table())\n        .context(\"Failed to load table\")?;\n\n    assert_eq!(table.name(), \"users\");\n    assert_eq!(table.sheet_name(), \"sheet1\");\n    assert!(table.specified_dtypes().is_none());\n    assert_eq!(table.total_height(), 3);\n    assert_eq!(table.offset(), 0);\n    assert_eq!(table.height(), 3);\n    assert_eq!(table.width(), 4);\n    let available_columns = table\n        .available_columns()\n        .context(\"could not obtain available columns for table\")?;\n    let expected_column_info = vec![\n        fastexcel::ColumnInfo {\n            name: \"User Id\".into(),\n            index: 0,\n            absolute_index: 0,\n            dtype: fastexcel::DType::Float,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"FirstName\".into(),\n            index: 1,\n            absolute_index: 1,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"LastName\".into(),\n            index: 2,\n            absolute_index: 2,\n            dtype: fastexcel::DType::String,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n        fastexcel::ColumnInfo {\n            name: \"Date\".into(),\n            index: 3,\n            absolute_index: 3,\n            dtype: fastexcel::DType::DateTime,\n            column_name_from: fastexcel::ColumnNameFrom::Provided,\n            dtype_from: fastexcel::DTypeFrom::Guessed,\n        },\n    ];\n    assert_eq!(available_columns, expected_column_info);\n\n    let dates = [\n        NaiveDate::from_ymd_opt(2020, 1, 1)\n            .unwrap()\n            .and_hms_opt(0, 0, 0)\n            .unwrap(),\n        NaiveDate::from_ymd_opt(2024, 5, 4)\n            .unwrap()\n            .and_hms_opt(0, 0, 0)\n            .unwrap(),\n        NaiveDate::from_ymd_opt(2025, 2, 1)\n            .unwrap()\n            .and_hms_opt(0, 0, 0)\n            .unwrap(),\n    ];\n\n    let expected_columns = fe_columns!(\n        \"User Id\" => [1.0, 2.0, 5.0],\n        \"FirstName\" => [\"Peter\", \"John\", \"Hans\"],\n        \"LastName\" => [\"Müller\", \"Meier\", \"Fricker\"],\n        \"Date\" => dates.as_slice(),\n    );\n\n    let table_columns = table\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(table_columns, expected_columns);\n\n    #[cfg(feature = \"polars\")]\n    {\n        use polars_core::df;\n\n        let expected_df = df!(\n            \"User Id\" => [1.0, 2.0, 5.0],\n            \"FirstName\" => [\"Peter\", \"John\", \"Hans\"],\n            \"LastName\" => [\"Müller\", \"Meier\", \"Fricker\"],\n            \"Date\" => dates.as_slice(),\n        )?;\n\n        let df = table\n            .to_polars()\n            .context(\"could not convert table to polars dataframe\")?;\n        assert!(df.equals_missing(&expected_df))\n    }\n\n    Ok(())\n}\n"
  },
  {
    "path": "tests/utils/mod.rs",
    "content": "pub fn path_for_fixture(fixture_file: &str) -> String {\n    format!(\n        \"{}/tests/fixtures/{}\",\n        env!(\"CARGO_MANIFEST_DIR\"),\n        fixture_file\n    )\n}\n\nmacro_rules! fe_column {\n    ($name:expr, $vec_or_arr:expr) => {\n        fastexcel::FastExcelColumn::try_new($name.into(), $vec_or_arr.into(), None)\n            .context(\"Failed to create column\")\n    };\n}\n\nmacro_rules! fe_columns {\n    // (name => []) Any number of times but at least once, optionally followed by a comma\n    ($($name:expr => $vec_or_arr:expr),+ $(,)?) => {\n        vec![\n            $(fe_column!($name, $vec_or_arr)?),+\n        ]\n    };\n}\n"
  },
  {
    "path": "tests/whitespace.rs",
    "content": "#[macro_use]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse chrono::{NaiveDate, NaiveDateTime};\nuse fastexcel::{ExcelReader, LoadSheetOrTableOptions};\nuse pretty_assertions::assert_eq;\nuse rstest::{fixture, rstest};\n\nuse crate::utils::path_for_fixture;\n\n#[fixture]\nfn reader() -> ExcelReader {\n    fastexcel::read_excel(path_for_fixture(\"sheet-and-table-with-whitespace.xlsx\"))\n        .expect(\"could not read fixture\")\n}\n\nconst DATES: &[Option<NaiveDateTime>] = &[\n    Some(\n        NaiveDate::from_ymd_opt(2025, 11, 19)\n            .unwrap()\n            .and_hms_opt(14, 34, 2)\n            .unwrap(),\n    ),\n    Some(\n        NaiveDate::from_ymd_opt(2025, 11, 20)\n            .unwrap()\n            .and_hms_opt(14, 56, 34)\n            .unwrap(),\n    ),\n    Some(\n        NaiveDate::from_ymd_opt(2025, 11, 21)\n            .unwrap()\n            .and_hms_opt(15, 19, 6)\n            .unwrap(),\n    ),\n    None,\n    Some(\n        NaiveDate::from_ymd_opt(2025, 11, 22)\n            .unwrap()\n            .and_hms_opt(15, 41, 38)\n            .unwrap(),\n    ),\n    Some(\n        NaiveDate::from_ymd_opt(2025, 11, 23)\n            .unwrap()\n            .and_hms_opt(16, 4, 10)\n            .unwrap(),\n    ),\n    None,\n    None,\n    None,\n    None,\n];\n\n#[rstest]\nfn test_skip_tail_rows_behavior(mut reader: ExcelReader) -> Result<()> {\n    let expected_columns_with_whitespace = fe_columns!(\n        // String because the last row contains a space\n        \"Column One\" => [Some(\"1\"), Some(\"2\"), Some(\"3\"), None, Some(\"5\"), None, None, None, None, Some(\" \")],\n        \"Column Two\" => [Some(\"one\"), Some(\"two\"), None, Some(\"four\"), Some(\"five\"), None, None, Some(\"\"), None, None],\n        \"Column Three\" => DATES,\n    );\n    let expected_columns_without_whitespace = fe_columns!(\n        // Not string rows -> float\n        \"Column One\" => [Some(1.0), Some(2.0), Some(3.0), None, Some(5.0), None],\n        \"Column Two\" => [Some(\"one\"), Some(\"two\"), None, Some(\"four\"), Some(\"five\"), None],\n        \"Column Three\" => &DATES[0..6],\n    );\n\n    let sheet = reader\n        .load_sheet(\n            \"Without Table\".into(),\n            LoadSheetOrTableOptions::new_for_sheet(),\n        )\n        .context(r#\"could not load sheet \"Without Table\"\"#)?;\n    let sheet_columns = sheet\n        .to_columns()\n        .context(\"could not convert sheet to columns\")?;\n    assert_eq!(sheet_columns, expected_columns_with_whitespace);\n\n    let table = reader\n        .load_table(\n            \"Table_with_whitespace\",\n            LoadSheetOrTableOptions::new_for_table(),\n        )\n        .context(r#\"could not load table \"Table_with_whitespace\"\"#)?;\n    let table_columns = table\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(table_columns, expected_columns_with_whitespace);\n\n    let sheet_without_tail_whitespace = reader\n        .load_sheet(\n            \"Without Table\".into(),\n            LoadSheetOrTableOptions::new_for_sheet().skip_whitespace_tail_rows(true),\n        )\n        .context(r#\"could not load sheet \"Without Table\"\"#)?;\n    let sheet_without_tail_whitespace_columns = sheet_without_tail_whitespace\n        .to_columns()\n        .context(\"could not convert sheet to columns\")?;\n    assert_eq!(\n        sheet_without_tail_whitespace_columns,\n        expected_columns_without_whitespace\n    );\n\n    let table_without_tail_whitespace = reader\n        .load_table(\n            \"Table_with_whitespace\",\n            LoadSheetOrTableOptions::new_for_table().skip_whitespace_tail_rows(true),\n        )\n        .context(r#\"could not load table \"Table_with_whitespace\"\"#)?;\n    let table_columns_without_tail_whitespace = table_without_tail_whitespace\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(\n        table_columns_without_tail_whitespace,\n        expected_columns_without_whitespace\n    );\n\n    Ok(())\n}\n\n#[rstest]\nfn test_skip_tail_rows_and_whitespace_as_null_behavior(mut reader: ExcelReader) -> Result<()> {\n    let expected_columns_with_whitespace_as_null = fe_columns!(\n        // All rows should be taken into account but the space in the last row should be considered null\n        \"Column One\" => [Some(1.0), Some(2.0), Some(3.0), None, Some(5.0), None, None, None, None, None],\n        // All rows should be taken into account but the empty string in 8th row should be considered null\n        \"Column Two\" => [Some(\"one\"), Some(\"two\"), None, Some(\"four\"), Some(\"five\"), None, None, None, None, None],\n        \"Column Three\" => DATES,\n    );\n    let expected_columns_without_whitespace = fe_columns!(\n        \"Column One\" => [Some(1.0), Some(2.0), Some(3.0), None, Some(5.0), None],\n        \"Column Two\" => [Some(\"one\"), Some(\"two\"), None, Some(\"four\"), Some(\"five\"), None],\n        \"Column Three\" => &DATES[0..6],\n    );\n\n    let sheet = reader\n        .load_sheet(\n            \"Without Table\".into(),\n            LoadSheetOrTableOptions::new_for_sheet().whitespace_as_null(true),\n        )\n        .context(r#\"could not load sheet \"Without Table\"\"#)?;\n    let sheet_columns = sheet\n        .to_columns()\n        .context(\"could not convert sheet to columns\")?;\n    assert_eq!(sheet_columns, expected_columns_with_whitespace_as_null);\n\n    let table = reader\n        .load_table(\n            \"Table_with_whitespace\",\n            LoadSheetOrTableOptions::new_for_table().whitespace_as_null(true),\n        )\n        .context(r#\"could not load table \"Table_with_whitespace\"\"#)?;\n    let table_columns = table\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(table_columns, expected_columns_with_whitespace_as_null);\n\n    let sheet_without_tail_whitespace = reader\n        .load_sheet(\n            \"Without Table\".into(),\n            LoadSheetOrTableOptions::new_for_sheet()\n                .whitespace_as_null(true)\n                .skip_whitespace_tail_rows(true),\n        )\n        .context(r#\"could not load sheet \"Without Table\"\"#)?;\n    let sheet_without_tail_whitespace_columns = sheet_without_tail_whitespace\n        .to_columns()\n        .context(\"could not convert sheet to columns\")?;\n    assert_eq!(\n        sheet_without_tail_whitespace_columns,\n        expected_columns_without_whitespace\n    );\n\n    let table_without_tail_whitespace = reader\n        .load_table(\n            \"Table_with_whitespace\",\n            LoadSheetOrTableOptions::new_for_table()\n                .whitespace_as_null(true)\n                .skip_whitespace_tail_rows(true),\n        )\n        .context(r#\"could not load table \"Table_with_whitespace\"\"#)?;\n    let table_columns_without_tail_whitespace = table_without_tail_whitespace\n        .to_columns()\n        .context(\"could not convert table to columns\")?;\n    assert_eq!(\n        table_columns_without_tail_whitespace,\n        expected_columns_without_whitespace\n    );\n\n    Ok(())\n}\n"
  }
]