Showing preview only (445K chars total). Download the full file or copy to clipboard to get everything.
Repository: ToucanToco/fastexcel
Branch: main
Commit: 98bf33293c85
Files: 99
Total size: 46.9 MB
Directory structure:
gitextract_ze2pys5u/
├── .clippy.toml
├── .github/
│ ├── dependabot.yml
│ └── workflows/
│ ├── CI.yml
│ ├── docs.yml
│ └── release.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── doc-templates/
│ └── module.html.jinja2
├── pyproject.toml
├── python/
│ ├── fastexcel/
│ │ ├── __init__.py
│ │ ├── _fastexcel.pyi
│ │ └── py.typed
│ └── tests/
│ ├── __init__.py
│ ├── benchmarks/
│ │ ├── README.md
│ │ ├── fixtures/
│ │ │ ├── formulas.xlsx
│ │ │ ├── plain_data.xls
│ │ │ └── plain_data.xlsx
│ │ ├── memory.py
│ │ ├── readers.py
│ │ └── speed.py
│ ├── conftest.py
│ ├── test_alias_generation.py
│ ├── test_column_selection.py
│ ├── test_defined_names.py
│ ├── test_dtypes.py
│ ├── test_durations.py
│ ├── test_eagerness.py
│ ├── test_empty.py
│ ├── test_errors.py
│ ├── test_fastexcel.py
│ ├── test_pycapsule.py
│ ├── test_sheet_visibility.py
│ ├── test_shifted_data.py
│ ├── test_tables.py
│ ├── test_whitespace.py
│ └── utils.py
├── scripts/
│ └── update_versions.py
├── src/
│ ├── data/
│ │ ├── cell_extractors.rs
│ │ ├── mod.rs
│ │ ├── python.rs
│ │ └── rust.rs
│ ├── error.rs
│ ├── lib.rs
│ ├── types/
│ │ ├── dtype/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ ├── excelreader/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ ├── excelsheet/
│ │ │ ├── column_info/
│ │ │ │ ├── mod.rs
│ │ │ │ └── python.rs
│ │ │ ├── mod.rs
│ │ │ ├── polars.rs
│ │ │ ├── python.rs
│ │ │ └── table.rs
│ │ ├── exceltable/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ ├── idx_or_name/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ └── mod.rs
│ └── utils/
│ ├── mod.rs
│ └── schema.rs
├── test.py
└── tests/
├── column_selection.rs
├── fastexcel.rs
├── fixtures/
│ ├── dates.ods
│ ├── decimal-numbers.xlsx
│ ├── div0.xlsx
│ ├── empty.ods
│ ├── empty.xlsx
│ ├── fixture-changing-header-location.xlsx
│ ├── fixture-invalid-cell-value-num.xlsx
│ ├── fixture-invalid-cell-value.xlsx
│ ├── fixture-multi-dtypes-columns.xlsx
│ ├── fixture-multi-sheet.xlsx
│ ├── fixture-sheets-different-visibilities.xlsx
│ ├── fixture-single-sheet-duplicated-columns.xlsx
│ ├── fixture-single-sheet-with-types.xlsx
│ ├── fixture-single-sheet.xlsx
│ ├── fixture-type-errors.xlsx
│ ├── infer-dtypes-fallback.xlsx
│ ├── no-header.xlsx
│ ├── null-bytes-in-columns-names.xls
│ ├── null-column.xlsx
│ ├── sheet-and-table-with-offset.xlsx
│ ├── sheet-and-table-with-whitespace.xlsx
│ ├── sheet-null-strings-empty.xlsx
│ ├── sheet-null-strings.xlsx
│ ├── sheet-with-defined-names.xlsx
│ ├── sheet-with-na.xlsx
│ ├── sheet-with-tables.xlsx
│ └── single-sheet-skip-rows-durations.xlsx
├── sheet_visibility.rs
├── shifted_data.rs
├── tables.rs
├── utils/
│ └── mod.rs
└── whitespace.rs
================================================
FILE CONTENTS
================================================
================================================
FILE: .clippy.toml
================================================
disallowed-macros = [
{ path = "std::assert_ne", reason = "use `pretty_assertions::assert_ne` instead" },
{ path = "std::assert_eq", reason = "use `pretty_assertions::assert_eq` instead" },
{ path = "std::assert_matches", reason = "use `pretty_assertions::assert_matches` instead" },
]
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
# python
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
labels:
- "dependencies"
- ":snake: python :snake:"
# rust
- package-ecosystem: "cargo"
directory: "/"
schedule:
interval: "daily"
groups:
prod-deps:
dependency-type: "production"
dev-deps:
dependency-type: "development"
labels:
- "dependencies"
- ":crab: rust :crab:"
# actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
================================================
FILE: .github/workflows/CI.yml
================================================
name: CI
on:
push:
branches:
- main
pull_request:
types: [opened, synchronize, reopened]
env:
MIN_PYTHON_VERSION: "3.10"
defaults:
run:
# Prevents windows runners from running on powershell
shell: bash
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "${{ env.MIN_PYTHON_VERSION }}"
- name: Set up rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
- name: Set up rustfmt
run: rustup component add rustfmt
- name: install uv
uses: astral-sh/setup-uv@v7
- name: Install dependencies and lint
run: |
make install
make lint
check-docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Set up rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: install uv
uses: astral-sh/setup-uv@v7
- name: Check documentation
run: |
make install
make doc
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"]
os:
- "ubuntu-latest"
- "ubuntu-24.04-arm"
- "macos-14"
- "windows-latest"
# windows-11-arm excluded: pyarrow is not available for Windows ARM64
# https://github.com/apache/arrow/issues/47195
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Set up rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: install uv
uses: astral-sh/setup-uv@v7
- name: Install dependencies and test
run: |
make install
make test
- name: Test with pandas<3
run: |
uv pip install "pandas<3"
make test-python
check-wheel-build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# Only testing the build on the smallest supported Python version for abi3 wheels
python-version: ["3.10", "3.14t"]
os: ["ubuntu-latest", "macos-14", "windows-latest"]
architecture: [x86-64, aarch64]
exclude:
# Cross-compiling x86_64 → aarch64 on Windows doesn't work; use windows-11-arm instead
- os: windows-latest
architecture: aarch64
include:
# Windows ARM64: build natively on windows-11-arm (3.11 is the minimum available)
- os: windows-11-arm
python-version: "3.11"
architecture: aarch64
# TODO: re-enable once setup-python supports windows-11-arm + python 3.14t
# (setup-python is currently broken with that combination)
# - os: windows-11-arm
# python-version: "3.14t"
# architecture: aarch64
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- name: Set Rust target
id: target
run: |
TARGET=${{
(matrix.os == 'macos-14' && (matrix.architecture == 'aarch64' && 'aarch64-apple-darwin' || 'x86_64-apple-darwin'))
|| (matrix.os == 'ubuntu-latest' && (matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-gnu' || 'x86_64-unknown-linux-gnu'))
|| (matrix.os == 'windows-latest' && 'x86_64-pc-windows-msvc')
|| (matrix.os == 'windows-11-arm' && 'aarch64-pc-windows-msvc')
}}
echo "target=$TARGET" >> $GITHUB_OUTPUT
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: build (fast)
uses: PyO3/maturin-action@v1
with:
manylinux: auto
command: build
args: "-o dist -i python${{ matrix.python-version }}"
target: ${{ steps.target.outputs.target }}
- name: Upload wheels
uses: actions/upload-artifact@v7
with:
name: "wheels-${{ matrix.os }}-python-${{ matrix.python-version }}-${{ matrix.architecture }}"
path: dist
check-wheel-build-musllinux:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.14t"]
architecture: [x86-64, aarch64]
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: build (fast)
uses: PyO3/maturin-action@v1
with:
manylinux: musllinux_1_2
command: build
args: "-o dist -i python${{ matrix.python-version }}"
target: ${{ matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-musl' || 'x86_64-unknown-linux-musl' }}
- name: Upload wheels
uses: actions/upload-artifact@v7
with:
name: "wheels-linux-musl-python-${{ matrix.python-version }}-${{ matrix.architecture }}"
path: dist
check-sdist-build:
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- name: build sdist
uses: PyO3/maturin-action@v1
with:
manylinux: auto
command: sdist
args: "-o dist"
- name: upload sdist
uses: actions/upload-artifact@v7
with:
name: sdist
path: dist
================================================
FILE: .github/workflows/docs.yml
================================================
name: Docs
on:
push:
branches:
- main
tags:
- 'v*'
workflow_dispatch:
inputs:
version_tag:
description: 'Tag to build docs for (e.g. v0.18.0). Checks out the tag before building.'
required: true
mark_as_stable:
description: 'Mark this version as the stable default (updates root redirect)'
type: boolean
default: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Checkout tag (workflow_dispatch)
if: github.event_name == 'workflow_dispatch'
env:
VERSION_TAG: ${{ github.event.inputs.version_tag }}
run: git checkout "refs/tags/$VERSION_TAG"
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Set up rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: install uv
uses: astral-sh/setup-uv@v7
- name: Determine version
id: version
env:
INPUT_VERSION_TAG: ${{ github.event.inputs.version_tag }}
INPUT_MARK_AS_STABLE: ${{ github.event.inputs.mark_as_stable }}
run: |
if [[ "$GITHUB_EVENT_NAME" == "workflow_dispatch" ]]; then
echo "version=$INPUT_VERSION_TAG" >> "$GITHUB_OUTPUT"
echo "is_stable=$INPUT_MARK_AS_STABLE" >> "$GITHUB_OUTPUT"
elif [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
echo "version=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
echo "is_stable=true" >> "$GITHUB_OUTPUT"
else
echo "version=latest" >> "$GITHUB_OUTPUT"
echo "is_stable=false" >> "$GITHUB_OUTPUT"
fi
- name: Build docs
env:
VERSION: ${{ steps.version.outputs.version }}
run: |
make install
make doc-versioned
- name: Deploy to gh-pages
env:
VERSION: ${{ steps.version.outputs.version }}
IS_STABLE: ${{ steps.version.outputs.is_stable }}
run: |
git config user.name github-actions
git config user.email github-actions@github.com
# Stash built docs
cp -r "docs/$VERSION" /tmp/docs-build
# Switch to gh-pages (gh-pages exists)
git checkout gh-pages
git merge -m 'Merge main' origin/main
# Place versioned docs
rm -rf "docs/$VERSION"
mv /tmp/docs-build "docs/$VERSION"
# Update versions.json and root redirect
STABLE_FLAG=""
if [[ "$IS_STABLE" == "true" ]]; then
STABLE_FLAG="--stable"
fi
./scripts/update_versions.py \
--version "$VERSION" \
--docs-dir docs \
$STABLE_FLAG
git add -f docs
git commit -m "Update docs ($VERSION)" --allow-empty
git push origin gh-pages
================================================
FILE: .github/workflows/release.yml
================================================
name: Release
on:
push:
# Sequence of patterns matched against refs/tags
tags:
- 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
jobs:
linux:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.14t"]
architecture: [x86-64, aarch64]
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: build (release)
uses: PyO3/maturin-action@v1
with:
manylinux: auto
command: build
args: "--release -o dist -i python${{ matrix.python-version }}"
target: ${{ matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-gnu' || null }}
- name: Upload wheels
uses: actions/upload-artifact@v7
with:
name: "wheels-linux-python-${{ matrix.python-version }}-${{ matrix.architecture }}"
path: dist
linux-musl:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.14t"]
architecture: [x86-64, aarch64]
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: build (release)
uses: PyO3/maturin-action@v1
with:
manylinux: musllinux_1_2
command: build
args: "--release -o dist -i python${{ matrix.python-version }}"
target: ${{ matrix.architecture == 'aarch64' && 'aarch64-unknown-linux-musl' || 'x86_64-unknown-linux-musl' }}
- name: Upload wheels
uses: actions/upload-artifact@v7
with:
name: "wheels-linux-musl-python-${{ matrix.python-version }}-${{ matrix.architecture }}"
path: dist
macos:
runs-on: macos-14
strategy:
matrix:
python-version: ["3.10", "3.14t"]
architecture: [x86-64, aarch64]
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: build (release)
uses: PyO3/maturin-action@v1
with:
command: build
args: "--release -o dist -i python${{ matrix.python-version }}"
target: ${{ matrix.architecture == 'aarch64' && 'aarch64-apple-darwin' || 'x86_64-apple-darwin' }}
- name: Upload wheels
uses: actions/upload-artifact@v7
with:
name: "wheels-macos-python-${{ matrix.python-version }}-${{ matrix.architecture }}"
path: dist
windows:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.10", "3.14t"]
os: [windows-latest]
architecture: [x86-64]
include:
# Windows ARM64: build natively on windows-11-arm (3.11 is the minimum available)
- os: windows-11-arm
python-version: "3.11"
architecture: aarch64
# TODO: re-enable once setup-python supports windows-11-arm + python 3.14t
# (setup-python is currently broken with that combination)
# - os: windows-11-arm
# python-version: "3.14t"
# architecture: aarch64
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: build (release)
uses: PyO3/maturin-action@v1
with:
command: build
args: "--release -o dist -i python${{ matrix.python-version }}"
target: ${{ matrix.architecture == 'aarch64' && 'aarch64-pc-windows-msvc' || 'x86_64-pc-windows-msvc' }}
- name: Upload wheels
uses: actions/upload-artifact@v7
with:
name: "wheels-windows-python-${{ matrix.python-version }}-${{ matrix.architecture }}"
path: dist
sdist:
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- uses: actions/setup-python@v6
with:
python-version: "3.10"
- name: build (sdist)
uses: PyO3/maturin-action@v1
with:
manylinux: auto
command: sdist
args: "-o dist"
- name: Upload sdist
uses: actions/upload-artifact@v7
with:
name: sdist
path: dist
# NOTE: Cannot use a matrix here, as we only want a single release
release:
name: Release
runs-on: ubuntu-latest
needs: [linux, linux-musl, macos, windows, sdist]
permissions:
id-token: write # Required for OIDC token exchange with crates.io
contents: write # Required to be able to create a GitHub release
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
- uses: rust-lang/crates-io-auth-action@v1
id: auth
- name: Download Linux wheels
uses: actions/download-artifact@v8
with:
pattern: "wheels-linux-*"
merge-multiple: true
path: wheels-linux
- name: Download MacOS wheels
uses: actions/download-artifact@v8
with:
pattern: "wheels-macos-*"
merge-multiple: true
path: wheels-macos
- name: Download Windows wheels
uses: actions/download-artifact@v8
with:
pattern: "wheels-windows-*"
merge-multiple: true
path: wheels-windows
- name: Download sdist
uses: actions/download-artifact@v8
with:
name: "sdist"
path: sdist
- name: Publish to PyPI
uses: PyO3/maturin-action@v1
env:
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
with:
command: upload
args: "--skip-existing wheels-linux/*.whl wheels-macos/*.whl wheels-windows/*.whl sdist/*.tar.gz"
- name: Publish to crates.io
run: cargo publish
env:
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
- name: Release
uses: softprops/action-gh-release@v3
with:
generate_release_notes: true
files: |
wheels-linux/*.whl
wheels-macos/*.whl
wheels-windows/*.whl
sdist/*.tar.gz
================================================
FILE: .gitignore
================================================
/target
bigfile.*
__pycache__
*.pyc
*.so
*.dat
.DS_Store
.python-version
pyrightconfig.json
.venv
docs
.vscode
.idea
.benchmarks
notebooks
/python/tests/fixtures/~$*.xlsx
.zed
dist
================================================
FILE: .pre-commit-config.yaml
================================================
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: http://github.com/doublify/pre-commit-rust
rev: v1.0
hooks:
- id: cargo-check
- repo: local
hooks:
- id: lint
name: Lint
entry: make lint
types_or: [python, rust]
language: system
pass_filenames: false
- id: format
name: Format
entry: make format
types_or: [python, rust]
language: system
pass_filenames: false
================================================
FILE: Cargo.toml
================================================
[package]
name = "fastexcel"
version = "0.20.2"
description = "A fast excel reader for Rust and Python"
rust-version = "1.85.0"
edition = "2024"
license = "MIT"
homepage = "https://github.com/ToucanToco/fastexcel"
repository = "https://github.com/ToucanToco/fastexcel.git"
readme = "README.md"
include = [
"/pyproject.toml",
"/README.md",
"/LICENSE",
"/Makefile",
"/src",
"/python/fastexcel",
"!__pycache__",
"!*.pyc",
"!*.so",
]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "fastexcel"
crate-type = ["cdylib", "rlib"]
[dependencies]
arrow-array = { version = "^58", features = ["ffi"], optional = true }
arrow-pyarrow = { version = "^58", optional = true }
arrow-schema = { version = "^58", optional = true }
calamine = { version = "^0.35.0", features = ["chrono"] }
chrono = { version = "^0.4.40", default-features = false }
log = "^0.4"
polars-core = { version = ">=0.53", features = [
"dtype-date",
"dtype-datetime",
"dtype-duration",
], optional = true }
pyo3 = { version = "^0.28", features = ["abi3-py310"], optional = true }
pyo3-arrow = { version = "^0.17", default-features = false, optional = true }
pyo3-log = { version = "^0.13.3", optional = true }
[dev-dependencies]
anyhow = "1.0.102"
pretty_assertions = { version = "^1.4.1", features = ["unstable"] }
rstest = { version = "^0.26.1", default-features = false }
# NOTE: This is a hack to bypass pyo3 limitations when testing:
# https://pyo3.rs/v0.22.3/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror
[features]
default = []
__arrow = ["dep:arrow-schema", "dep:arrow-array"]
python = ["__arrow", "dep:pyo3", "dep:pyo3-log", "dep:pyo3-arrow"]
extension-module = ["pyo3/extension-module"]
polars = ["dep:polars-core"]
pyarrow = ["dep:arrow-pyarrow", "python"]
# Private features for internal usage, should not be used directly as they may
# change without notice
__pyo3-tests = [
# feature for tests only. This makes Python::with_gil auto-initialize Python
# interpreters, which allows us to instantiate Python objects in tests
# (see https://pyo3.rs/v0.22.3/features#auto-initialize)
"pyo3/auto-initialize",
"pyarrow",
]
__rust-tests-standalone = []
__rust-tests-polars = ["polars"]
# Private feature for maturin usage, should not be used directly
__maturin = ["extension-module", "pyarrow"]
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2024 ToucanToco
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: Makefile
================================================
.DEFAULT_GOAL := all
sources = python/fastexcel python/tests
export CARGO_TERM_COLOR=$(shell (test -t 0 && echo always) || echo auto)
.PHONY: .uv ## Check that uv is installed
.uv:
@uv -V || echo 'Please install uv: https://docs.astral.sh/uv/getting-started/installation/'
.PHONY: install ## Install the package & dependencies with debug build
install: .uv
uv sync --frozen --group all
uv run maturin develop --uv -E pyarrow,pandas,polars
.PHONY: install-prod ## Install the package & dependencies with release build
install-prod: .uv
uv sync --frozen --group all
uv run maturin develop --uv --release -E pyarrow,pandas,polars
.PHONY: setup-dev ## First-time setup: install + pre-commit hooks
setup-dev: install
uv run pre-commit install --install-hooks
.PHONY: rebuild-lockfiles ## Rebuild lockfiles from scratch, updating all dependencies
rebuild-lockfiles: .uv
uv lock --upgrade
cargo update
.PHONY: build-dev ## Build the development version of the package
build-dev:
uv run maturin build
.PHONY: build-wheel ## Build production wheel and install it
build-wheel:
@rm -rf target/wheels/
uv run maturin build --release
@wheel=$$(ls target/wheels/*.whl); uv pip install --force-reinstall "$$wheel[pandas,polars]"
.PHONY: lint-python ## Lint python source files
lint-python:
uv run ruff check $(sources)
uv run ruff format --check $(sources)
uv run mypy $(sources)
.PHONY: lint-rust ## Lint rust source files
lint-rust:
cargo fmt --all -- --check
# Rust
cargo clippy --tests -- -D warnings
# Python-related code
cargo clippy --features __maturin,__pyo3-tests --tests -- -D warnings
# Rust+polars
cargo clippy --features polars --tests -- -D warnings
.PHONY: lint ## Lint rust and python source files
lint: lint-python lint-rust
.PHONY: format-python ## Auto-format python source files
format-python:
uv run ruff check --fix $(sources)
uv run ruff format $(sources)
.PHONY: format-rust ## Auto-format rust source files
format-rust:
cargo fmt --all
cargo clippy --all-features --tests --fix --lib -p fastexcel --allow-dirty --allow-staged
.PHONY: format ## Auto-format python and rust source files
format: format-rust format-python
.PHONY: test-python ## Run python tests
test-python: install
uv run pytest
.PHONY: test-rust-pyo3 ## Run PyO3 rust tests
test-rust-pyo3:
# --lib to skip integration tests
cargo test --no-default-features --features __pyo3-tests --lib
.PHONY: test-rust-standalone ## Run standalone rust tests
test-rust-standalone:
cargo test --no-default-features --features __rust-tests-standalone
.PHONY: test-rust-polars ## Run polars rust tests
test-rust-polars:
cargo test --no-default-features --features __rust-tests-polars
.PHONY: test-rust ## Run rust tests
test-rust: test-rust-pyo3 test-rust-standalone test-rust-polars
.PHONY: test ## Run all tests
test: test-rust test-python
.PHONY: doc-serve ## Serve documentation with live reload
doc-serve: build-dev
uv run pdoc --template-directory doc-templates python/fastexcel
.PHONY: doc ## Build documentation
doc: build-dev
uv run pdoc --template-directory doc-templates -o docs/latest python/fastexcel
uv run scripts/update_versions.py --version latest --docs-dir docs
cargo doc --no-deps --lib -p fastexcel --features polars
.PHONY: doc-versioned ## Build versioned documentation (CI usage: VERSION=v0.19.0 [STABLE=1] make doc-versioned)
doc-versioned: build-dev
@test -n "$(VERSION)" || (echo "ERROR: VERSION is not set. Usage: VERSION=v0.19.0 [STABLE=1] make doc-versioned" && exit 1)
uv run pdoc --template-directory doc-templates -o docs/$(VERSION) python/fastexcel
uv run scripts/update_versions.py --version $(VERSION) --docs-dir docs $(if $(filter 1,$(STABLE)),--stable,)
.PHONY: all ## Run the standard set of checks performed in CI
all: format build-dev lint test
.PHONY: benchmarks ## Run benchmarks
benchmarks: build-wheel
uv run pytest ./python/tests/benchmarks/speed.py
.PHONY: clean ## Clear local caches and build artifacts
clean:
rm -rf `find . -name __pycache__`
rm -f `find . -type f -name '*.py[co]' `
rm -f `find . -type f -name '*~' `
rm -f `find . -type f -name '.*~' `
rm -rf .cache
rm -rf htmlcov
rm -rf .pytest_cache
rm -rf *.egg-info
rm -f .coverage
rm -f .coverage.*
rm -rf build
rm -rf perf.data*
rm -rf python/fastexcel/*.so
.PHONY: help ## Display this message
help:
@grep -E \
'^.PHONY: .*?## .*$$' $(MAKEFILE_LIST) | \
sort | \
awk 'BEGIN {FS = ".PHONY: |## "}; {printf "\033[36m%-19s\033[0m %s\n", $$2, $$3}'
================================================
FILE: README.md
================================================
# `fastexcel`
A fast excel file reader for Python and Rust.
Docs:
* [Python](https://fastexcel.toucantoco.dev/).
* [Rust](https://docs.rs/fastexcel).
## Stability
The Python library is considered production-ready. The API is mostly stable, and we avoid breaking changes as much as
possible. v1.0.0 will be released once the [milestone](https://github.com/ToucanToco/fastexcel/milestone/2) is reached.
> ⚠️ The free-threaded build is still considered experimental
The Rust crate is still experimental, and breaking changes are to be expected.
## Installation
```bash
# Lightweight installation (no PyArrow dependency)
pip install fastexcel
# With Polars support only (no PyArrow needed)
pip install fastexcel[polars]
# With Pandas support (includes PyArrow)
pip install fastexcel[pandas]
# With PyArrow support
pip install fastexcel[pyarrow]
# With all integrations
pip install fastexcel[pandas,polars]
```
## Quick Start
### Modern usage (recommended)
FastExcel supports the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for zero-copy data exchange with libraries like Polars, without requiring pyarrow as a dependency.
Use fastexcel with any Arrow-compatible library without requiring pyarrow.
```python
import fastexcel
# Load an Excel file
reader = fastexcel.read_excel("data.xlsx")
sheet = reader.load_sheet(0) # Load first sheet
# Use with Polars (zero-copy, no pyarrow needed)
import polars as pl
df = pl.DataFrame(sheet) # Direct PyCapsule interface
print(df)
# Or use the to_polars() method (also via PyCapsule)
df = sheet.to_polars()
print(df)
# Or access the raw Arrow data via PyCapsule interface
schema = sheet.__arrow_c_schema__()
array_data = sheet.__arrow_c_array__()
```
### Traditional usage (with pandas/pyarrow)
```python
import fastexcel
reader = fastexcel.read_excel("data.xlsx")
sheet = reader.load_sheet(0)
# Convert to pandas (requires `pandas` extra)
df = sheet.to_pandas()
# Or get pyarrow RecordBatch directly
record_batch = sheet.to_arrow()
```
### Working with tables
```python
reader = fastexcel.read_excel("data.xlsx")
# List available tables
tables = reader.table_names()
print(f"Available tables: {tables}")
# Load a specific table
table = reader.load_table("MyTable")
df = pl.DataFrame(table) # Zero-copy via PyCapsule, no pyarrow needed
```
## Key Features
- **Zero-copy data exchange** via [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)
- **Flexible dependencies** - use with Polars (no PyArrow needed) or Pandas (includes PyArrow)
- **Seamless Polars integration** - `pl.DataFrame(sheet)` and `sheet.to_polars()` work without PyArrow via PyCapsule interface
- **High performance** - written in Rust with [calamine](https://github.com/tafia/calamine) and [Apache Arrow](https://arrow.apache.org/)
- **Memory efficient** - lazy loading and optional eager evaluation
- **Type safety** - automatic type inference with manual override options
## Contributing & Development
### Prerequisites
You'll need:
1. **[Rust](https://rustup.rs/)** - Rust stable or nightly
2. **[uv](https://docs.astral.sh/uv/getting-started/installation/)** - Fast Python package manager (will install Python 3.10+ automatically)
3. **[git](https://git-scm.com/)** - For version control
4. **[make](https://www.gnu.org/software/make/)** - For running development commands
**Python Version Management:**
uv handles Python installation automatically. To use a specific Python version:
```bash
uv python install 3.13 # Install Python 3.13
uv python pin 3.13 # Pin project to Python 3.13
```
### Quick Start
```bash
# Clone the repository (or from your fork)
git clone https://github.com/ToucanToco/fastexcel.git
cd fastexcel
# First-time setup: install dependencies, build debug version, and setup pre-commit hooks
make setup-dev
```
Verify your installation by running:
```bash
make
```
This runs a full development cycle: formatting, building, linting, and testing
### Development Commands
Run `make help` to see all available commands, or use these common ones:
```bash
make all # full dev cycle: format, build, lint, test
make install # install with debug build (daily development)
make install-prod # install with release build (benchmarking)
make test # to run the tests
make lint # to run the linter
make format # to format python and rust code
make doc-serve # to serve the documentation locally
```
### Useful Resources
* [`python/fastexcel/_fastexcel.pyi`](./python/fastexcel/_fastexcel.pyi) - Python API types
* [`python/tests/`](./python/tests) - Comprehensive usage examples
## Benchmarking
For benchmarking, use `make benchmarks` which automatically builds an optimised wheel.
This is required for profiling, as dev mode builds are much slower.
### Speed benchmarks
```bash
make benchmarks
```
### Memory profiling
```bash
mprof run -T 0.01 python python/tests/benchmarks/memory.py python/tests/benchmarks/fixtures/plain_data.xls
```
## Creating a release
1. Create a PR containing a commit that only updates the version in `Cargo.toml`.
2. Once it is approved, squash and merge it into main.
3. Tag the squashed commit, and push it.
4. The `release` GitHub action will take care of the rest.
## Dev tips
* Use `cargo check` to verify that your rust code compiles, no need to go through `maturin` every time
* `cargo clippy` = 💖
* Careful with arrow constructors, they tend to allocate a lot
* [`mprof`](https://github.com/pythonprofilers/memory_profiler) and `time` go a long way for perf checks,
no need to go fancy right from the start
================================================
FILE: doc-templates/module.html.jinja2
================================================
{% extends "default/module.html.jinja2" %}
{% block nav_title %}
{{ super() }}
<div id="version-switcher" style="padding: 0.5rem 1.5rem 0.5rem 0;">
<label for="version-select" style="font-size: 0.85rem; font-weight: bold;">Version</label>
<select id="version-select"
style="display: block; width: 100%; margin-top: 0.25rem; padding: 0.25rem 0.4rem;
font-size: 0.85rem; border-radius: 4px; border: 1px solid var(--accent2);
background: var(--bg); color: var(--text);">
<option>loading...</option>
</select>
</div>
<script>
(function() {
var parts = window.location.pathname.replace(/\/+$/, '').split('/');
// Find the version segment: first path part that looks like a version or "latest"
var currentVersion = '';
for (var i = 1; i < parts.length; i++) {
if (parts[i] === 'latest' || /^v\d/.test(parts[i])) {
currentVersion = parts[i];
break;
}
}
if (!currentVersion) {
var switcher = document.getElementById('version-switcher');
if (switcher) switcher.style.display = 'none';
return;
}
// Build base URL (everything before the version segment)
var idx = window.location.pathname.indexOf(currentVersion);
var baseUrl = window.location.pathname.substring(0, idx);
// Get the page path after the version segment
var pagePath = window.location.pathname.substring(idx + currentVersion.length);
fetch(baseUrl + 'versions.json')
.then(function(r) { return r.json(); })
.then(function(versions) {
var select = document.getElementById('version-select');
select.innerHTML = '';
versions.forEach(function(v) {
var opt = document.createElement('option');
opt.value = baseUrl + v.path + pagePath + window.location.search + window.location.hash;
opt.textContent = v.label;
if (v.path === currentVersion) opt.selected = true;
select.appendChild(opt);
});
select.onchange = function() {
if (this.value) window.location.href = this.value;
};
})
.catch(function() {
var select = document.getElementById('version-select');
var opt = document.createElement('option');
select.innerHTML = '';
opt.textContent = currentVersion || 'unknown';
select.appendChild(opt);
});
})();
</script>
{% endblock %}
================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["maturin>=1.7.0,<2.0"]
build-backend = "maturin"
[project]
name = "fastexcel"
description = "A fast excel file reader for Python, written in Rust"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.10"
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Rust",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Free Threading :: 1 - Unstable"
]
dependencies = ["typing-extensions>=4.0.0; python_version<'3.10'"]
dynamic = ["version"]
[project.optional-dependencies]
pyarrow = ["pyarrow>=8.0.0"]
pandas = ["pandas>=1.4.4", "pyarrow>=8.0.0"]
polars = ["polars>=1"]
[dependency-groups]
dev = ["maturin>=1.7.0,<2.0"]
testing = [
{ include-group = "dev" },
"pytest>=7.1.3",
"pytest-benchmark>=4.0.0,<6",
"pytest-mock>=3.1",
"pyarrow>=8.0.0",
"pandas>=1.4.4",
"polars>=0.16.14",
"openpyxl>=3.1.2,<4",
"xlrd>=2.0.1,<3",
]
linting = [
{ include-group = "dev" },
"mypy>=2,<3",
"pre-commit>=2.20.0,<5",
"ruff>=0.15",
]
docs = [{ include-group = "dev" }, "pdoc"]
all = [
{ include-group = "testing" },
{ include-group = "linting" },
{ include-group = "docs" },
]
[project.urls]
"Source Code" = "https://github.com/ToucanToco/fastexcel"
Issues = "https://github.com/ToucanToco/fastexcel"
[tool.maturin]
python-source = "python"
module-name = "fastexcel._fastexcel"
features = ["__maturin"]
[tool.mypy]
python_version = "3.10"
follow_imports = "silent"
ignore_missing_imports = true
# A few custom options
show_error_codes = true
warn_no_return = true
warn_unused_configs = true
warn_unused_ignores = true
[tool.pytest.ini_options]
testpaths = "python/tests"
log_cli = true
log_cli_level = "INFO"
[tool.ruff]
line-length = 100
target-version = "py310"
[tool.ruff.lint]
# Enable Pyflakes `E` and `F` codes by default.
select = ["E", "F", "I", "Q", "FA102", "UP"]
[tool.uv]
# this ensures that `uv run` doesn't actually build the package; a `make`
# command is needed to build
package = false
required-version = '>=0.8.4'
================================================
FILE: python/fastexcel/__init__.py
================================================
from __future__ import annotations
import typing
from collections.abc import Callable
from typing import TYPE_CHECKING, Literal, TypeAlias
if TYPE_CHECKING:
import pandas as pd
import polars as pl
import pyarrow as pa
from os.path import expanduser
from pathlib import Path
try:
import importlib.util
importlib.util.find_spec("pyarrow")
_PYARROW_AVAILABLE = True
except ImportError:
_PYARROW_AVAILABLE = False
from ._fastexcel import (
ArrowError,
CalamineCellError,
CalamineError,
CannotRetrieveCellDataError,
CellError,
CellErrors,
ColumnInfo,
ColumnInfoNoDtype,
ColumnNotFoundError,
DefinedName,
FastExcelError,
InvalidParametersError,
SheetNotFoundError,
UnsupportedColumnTypeCombinationError,
__version__,
_ExcelReader,
_ExcelSheet,
_ExcelTable,
)
from ._fastexcel import read_excel as _read_excel
DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
DTypeMap: TypeAlias = "dict[str | int, DType]"
ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
DTypeFrom: TypeAlias = Literal[
"provided_for_all", "provided_by_index", "provided_by_name", "guessed"
]
SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
class ExcelSheet:
"""A class representing a single sheet in an Excel File"""
def __init__(self, sheet: _ExcelSheet) -> None:
self._sheet = sheet
@property
def name(self) -> str:
"""The name of the sheet"""
return self._sheet.name
@property
def width(self) -> int:
"""The sheet's width"""
return self._sheet.width
@property
def height(self) -> int:
"""The sheet's height, with `skip_rows` and `nrows` applied"""
return self._sheet.height
@property
def total_height(self) -> int:
"""The sheet's total height"""
return self._sheet.total_height
@property
def selected_columns(self) -> list[ColumnInfo]:
"""The sheet's selected columns"""
return self._sheet.selected_columns
def available_columns(self) -> list[ColumnInfo]:
"""The columns available for the given sheet"""
return self._sheet.available_columns()
@property
def specified_dtypes(self) -> DTypeMap | None:
"""The dtypes specified for the sheet"""
return self._sheet.specified_dtypes
@property
def visible(self) -> SheetVisible:
"""The visibility of the sheet"""
return self._sheet.visible
def to_arrow(self) -> pa.RecordBatch:
"""Converts the sheet to a pyarrow `RecordBatch`
Requires the `pyarrow` extra to be installed.
"""
if not _PYARROW_AVAILABLE:
raise ImportError(
"pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
)
return self._sheet.to_arrow()
def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors | None]:
"""Converts the sheet to a pyarrow `RecordBatch` with error information.
Stores the positions of any values that cannot be parsed as the specified type and were
therefore converted to None.
Requires the `pyarrow` extra to be installed.
"""
if not _PYARROW_AVAILABLE:
raise ImportError(
"pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'" # noqa: E501
)
rb, cell_errors = self._sheet.to_arrow_with_errors()
if not cell_errors.errors:
return (rb, None)
return (rb, cell_errors)
def to_pandas(self) -> pd.DataFrame:
"""Converts the sheet to a Pandas `DataFrame`.
Requires the `pandas` extra to be installed.
"""
# Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
# which we don't implement. Using pyarrow conversion for now.
# (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
return self.to_arrow().to_pandas()
def to_polars(self) -> pl.DataFrame:
"""Converts the sheet to a Polars `DataFrame`.
Uses the Arrow PyCapsule Interface for zero-copy data exchange.
Requires the `polars` extra to be installed.
"""
import polars as pl
return pl.DataFrame(self)
def __arrow_c_schema__(self) -> object:
"""Export the schema as an `ArrowSchema` `PyCapsule`.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
return self._sheet.__arrow_c_schema__()
def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
"""Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
The optional `requested_schema` parameter allows for potential schema conversion.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
return self._sheet.__arrow_c_array__(requested_schema)
def __repr__(self) -> str:
return self._sheet.__repr__()
class ExcelTable:
"""A class representing a single table in an Excel file"""
def __init__(self, table: _ExcelTable) -> None:
self._table = table
@property
def name(self) -> str:
"""The name of the table"""
return self._table.name
@property
def sheet_name(self) -> str:
"""The name of the sheet this table belongs to"""
return self._table.sheet_name
@property
def width(self) -> int:
"""The table's width"""
return self._table.width
@property
def height(self) -> int:
"""The table's height"""
return self._table.height
@property
def total_height(self) -> int:
"""The table's total height"""
return self._table.total_height
@property
def offset(self) -> int:
"""The table's offset before data starts"""
return self._table.offset
@property
def selected_columns(self) -> list[ColumnInfo]:
"""The table's selected columns"""
return self._table.selected_columns
def available_columns(self) -> list[ColumnInfo]:
"""The columns available for the given table"""
return self._table.available_columns()
@property
def specified_dtypes(self) -> DTypeMap | None:
"""The dtypes specified for the table"""
return self._table.specified_dtypes
def to_arrow(self) -> pa.RecordBatch:
"""Converts the table to a pyarrow `RecordBatch`
Requires the `pyarrow` extra to be installed.
"""
if not _PYARROW_AVAILABLE:
raise ImportError(
"pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
)
return self._table.to_arrow()
def to_pandas(self) -> pd.DataFrame:
"""Converts the table to a Pandas `DataFrame`.
Requires the `pandas` extra to be installed.
"""
# Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
# which we don't implement. Using pyarrow conversion for now.
# (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
return self.to_arrow().to_pandas()
def to_polars(self) -> pl.DataFrame:
"""Converts the table to a Polars `DataFrame`.
Uses the Arrow PyCapsule Interface for zero-copy data exchange.
Requires the `polars` extra to be installed.
"""
import polars as pl
return pl.DataFrame(self)
def __arrow_c_schema__(self) -> object:
"""Export the schema as an `ArrowSchema` `PyCapsule`.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
return self._table.__arrow_c_schema__()
def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
"""Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
The optional `requested_schema` parameter allows for potential schema conversion.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
return self._table.__arrow_c_array__(requested_schema)
class ExcelReader:
"""A class representing an open Excel file and allowing to read its sheets"""
def __init__(self, reader: _ExcelReader) -> None:
self._reader = reader
@property
def sheet_names(self) -> list[str]:
"""The list of sheet names"""
return self._reader.sheet_names
@typing.overload
def load_sheet(
self,
idx_or_name: int | str,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[False] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> ExcelSheet: ...
@typing.overload
def load_sheet(
self,
idx_or_name: int | str,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[True] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> pa.RecordBatch: ...
def load_sheet(
self,
idx_or_name: int | str,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: bool = False,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> ExcelSheet | pa.RecordBatch:
"""Loads a sheet by index or name.
:param idx_or_name: The index (starting at 0) or the name of the sheet to load.
:param header_row: The index of the row containing the column labels, default index is 0.
If `None`, the sheet does not have any column labels.
Any rows before the `header_row` will be automatically skipped.
:param column_names: Overrides headers found in the document.
If `column_names` is used, `header_row` will be ignored.
:param n_rows: Specifies how many rows should be loaded.
If `None`, all rows are loaded
:param skip_rows: Specifies which rows should be skipped after the `header_row`.
Any rows before the `header_row` are automatically skipped.
It means row indices are relative to data rows, not the sheet!
Can be one of:
- `int`: Skip this many rows after the header row
- `list[int]`: Skip specific row indices (0-based relative to data rows)
- `Callable[[int], bool]`: Function that receives row index (0-based
relative to data rows) and returns True to skip the row
- `None`: If `header_row` is None, skips empty rows at beginning
:param schema_sample_rows: Specifies how many rows should be used to determine
the dtype of a column. Cannot be 0. A specific dtype can be
enforced for some or all columns through the `dtypes` parameter.
If `None`, all rows will be used.
:param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
will try to coerce different dtypes in a column to the same one,
whereas `strict` will raise an error in case a column contains
several dtypes. Note that this only applies to columns whose dtype
is guessed, i.e. not specified via `dtypes`.
:param use_columns: Specifies the columns to use. Can either be:
- `None` to select all columns
- A list of strings and ints, the column names and/or indices
(starting at 0)
- A string, a comma separated list of Excel column letters and column
ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
`A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
(e.g. `"B:"` to select all columns from B onwards) and from-beginning
ranges (e.g. `":C"` to select columns from A to C). These can be
combined for "except" patterns (e.g. `":C,E:"` to select everything
except column D)
- A callable, a function that takes a column and returns a boolean
indicating whether the column should be used
:param dtypes: An optional dtype (for all columns)
or dict of dtypes with keys as column indices or names.
:param eager: Specifies whether the sheet should be loaded eagerly.
`False` (default) will load the sheet lazily using the `PyCapsule` interface,
whereas `True` will load it eagerly via `pyarrow`.
Eager loading requires the `pyarrow` extra to be installed.
:param skip_whitespace_tail_rows: Skip rows at the end of the sheet
containing only whitespace and null values.
:param whitespace_as_null: Consider cells containing only whitespace as null values.
"""
sheet_or_rb = self._reader.load_sheet(
idx_or_name=idx_or_name,
header_row=header_row,
column_names=column_names,
skip_rows=skip_rows,
n_rows=n_rows,
schema_sample_rows=schema_sample_rows,
dtype_coercion=dtype_coercion,
use_columns=use_columns,
dtypes=dtypes,
eager=eager,
skip_whitespace_tail_rows=skip_whitespace_tail_rows,
whitespace_as_null=whitespace_as_null,
)
return sheet_or_rb if eager else ExcelSheet(sheet_or_rb)
def table_names(self, sheet_name: str | None = None) -> list[str]:
"""The list of table names.
Will return an empty list if no tables are found.
:param sheet_name: If given, will limit the list to the given sheet, will be faster
too.
"""
return self._reader.table_names(sheet_name)
def defined_names(self) -> list[DefinedName]:
"""The list of defined names (named ranges) in the workbook.
Returns a list of DefinedName objects with 'name' and 'formula' attributes.
The formula is a string representation of the range or expression.
Will return an empty list if no defined names are found.
"""
return self._reader.defined_names()
@typing.overload
def load_table(
self,
name: str,
*,
header_row: int | None = None,
column_names: list[str] | None = None,
skip_rows: int | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[False] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> ExcelTable: ...
@typing.overload
def load_table(
self,
name: str,
*,
header_row: int | None = None,
column_names: list[str] | None = None,
skip_rows: int | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[True] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> pa.RecordBatch: ...
def load_table(
self,
name: str,
*,
header_row: int | None = None,
column_names: list[str] | None = None,
skip_rows: int | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: bool = False,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> ExcelTable | pa.RecordBatch:
"""Loads a table by name.
:param name: The name of the table to load.
:param header_row: The index of the row containing the column labels.
If `None`, the table's column names will be used.
Any rows before the `header_row` will be automatically skipped.
:param column_names: Overrides headers found in the document.
If `column_names` is used, `header_row` will be ignored.
:param n_rows: Specifies how many rows should be loaded.
If `None`, all rows are loaded
:param skip_rows: Specifies how many rows should be skipped after the `header_row`.
Any rows before the `header_row` are automatically skipped.
If `header_row` is `None`, it skips the number of rows from the
start of the sheet.
:param schema_sample_rows: Specifies how many rows should be used to determine
the dtype of a column. Cannot be 0. A specific dtype can be
enforced for some or all columns through the `dtypes` parameter.
If `None`, all rows will be used.
:param dtype_coercion: Specifies how type coercion should behave. `coerce` (the default)
will try to coerce different dtypes in a column to the same one,
whereas `strict` will raise an error in case a column contains
several dtypes. Note that this only applies to columns whose dtype
is guessed, i.e. not specified via `dtypes`.
:param use_columns: Specifies the columns to use. Can either be:
- `None` to select all columns
- A list of strings and ints, the column names and/or indices
(starting at 0)
- A string, a comma separated list of Excel column letters and column
ranges (e.g. `"A:E"` or `"A,C,E:F"`, which would result in
`A,B,C,D,E` and `A,C,E,F`). Also supports open-ended ranges
(e.g. `"B:"` to select all columns from B onwards) and from-beginning
ranges (e.g. `":C"` to select columns from A to C). These can be
combined for "except" patterns (e.g. `":C,E:"` to select everything
except column D)
- A callable, a function that takes a column and returns a boolean
indicating whether the column should be used
:param dtypes: An optional dtype (for all columns)
or dict of dtypes with keys as column indices or names.
:param eager: Specifies whether the table should be loaded eagerly.
`False` (default) will load the table lazily using the `PyCapsule` interface,
whereas `True` will load it eagerly via `pyarrow`.
Eager loading requires the `pyarrow` extra to be installed.
:param skip_whitespace_tail_rows: Skip rows at the end of the table
containing only whitespace and null values.
:param whitespace_as_null: Consider cells containing only whitespace as null values.
"""
if eager:
return self._reader.load_table(
name=name,
header_row=header_row,
column_names=column_names,
skip_rows=skip_rows,
n_rows=n_rows,
schema_sample_rows=schema_sample_rows,
dtype_coercion=dtype_coercion,
use_columns=use_columns,
dtypes=dtypes,
eager=True,
skip_whitespace_tail_rows=skip_whitespace_tail_rows,
whitespace_as_null=whitespace_as_null,
)
else:
return ExcelTable(
self._reader.load_table(
name=name,
header_row=header_row,
column_names=column_names,
skip_rows=skip_rows,
n_rows=n_rows,
schema_sample_rows=schema_sample_rows,
dtype_coercion=dtype_coercion,
use_columns=use_columns,
dtypes=dtypes,
eager=False,
skip_whitespace_tail_rows=skip_whitespace_tail_rows,
whitespace_as_null=whitespace_as_null,
)
)
def load_sheet_eager(
self,
idx_or_name: int | str,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str] | list[int] | str | None = None,
dtypes: DType | DTypeMap | None = None,
) -> pa.RecordBatch:
"""Loads a sheet eagerly by index or name.
For xlsx files, this will be faster and more memory-efficient, as it will use
`worksheet_range_ref` under the hood, which returns borrowed types.
Refer to `load_sheet` for parameter documentation
Requires the `pyarrow` extra to be installed.
"""
return self._reader.load_sheet(
idx_or_name=idx_or_name,
header_row=header_row,
column_names=column_names,
skip_rows=skip_rows,
n_rows=n_rows,
schema_sample_rows=schema_sample_rows,
dtype_coercion=dtype_coercion,
use_columns=use_columns,
dtypes=dtypes,
eager=True,
)
def load_sheet_by_name(
self,
name: str,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
) -> ExcelSheet:
"""Loads a sheet by name.
Refer to `load_sheet` for parameter documentation
"""
return self.load_sheet(
name,
header_row=header_row,
column_names=column_names,
skip_rows=skip_rows,
n_rows=n_rows,
schema_sample_rows=schema_sample_rows,
dtype_coercion=dtype_coercion,
use_columns=use_columns,
dtypes=dtypes,
)
def load_sheet_by_idx(
self,
idx: int,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
) -> ExcelSheet:
"""Loads a sheet by index.
Refer to `load_sheet` for parameter documentation
"""
return self.load_sheet(
idx,
header_row=header_row,
column_names=column_names,
skip_rows=skip_rows,
n_rows=n_rows,
schema_sample_rows=schema_sample_rows,
dtype_coercion=dtype_coercion,
use_columns=use_columns,
dtypes=dtypes,
)
def __repr__(self) -> str:
return self._reader.__repr__()
def read_excel(source: Path | str | bytes) -> ExcelReader:
"""Opens and loads an excel file.
:param source: The path to a file or its content as bytes
"""
if isinstance(source, str | Path):
source = expanduser(source)
return ExcelReader(_read_excel(source))
__all__ = (
# version
"__version__",
# main entrypoint
"read_excel",
# Python types
"DType",
"DTypeMap",
# Excel reader
"ExcelReader",
# Excel sheet
"ExcelSheet",
# Excel table
"ExcelTable",
# Column metadata
"DTypeFrom",
"ColumnNameFrom",
"ColumnInfo",
# Defined names
"DefinedName",
# Parse error information
"CellError",
"CellErrors",
# Exceptions
"FastExcelError",
"CannotRetrieveCellDataError",
"CalamineCellError",
"CalamineError",
"SheetNotFoundError",
"ColumnNotFoundError",
"ArrowError",
"InvalidParametersError",
"UnsupportedColumnTypeCombinationError",
)
================================================
FILE: python/fastexcel/_fastexcel.pyi
================================================
from __future__ import annotations
import typing
from collections.abc import Callable
from typing import TYPE_CHECKING, Literal
if TYPE_CHECKING:
import pyarrow as pa
DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
DTypeMap = dict[str | int, DType]
ColumnNameFrom = Literal["provided", "looked_up", "generated"]
DTypeFrom = Literal["provided_for_all", "provided_by_index", "provided_by_name", "guessed"]
SheetVisible = Literal["visible", "hidden", "veryhidden"]
class ColumnInfoNoDtype:
def __init__(
self,
*,
name: str,
index: int,
absolute_index: int,
column_name_from: ColumnNameFrom,
) -> None: ...
@property
def name(self) -> str: ...
@property
def index(self) -> int: ...
@property
def absolute_index(self) -> int: ...
@property
def column_name_from(self) -> ColumnNameFrom: ...
class ColumnInfo:
def __init__(
self,
*,
name: str,
index: int,
absolute_index: int,
column_name_from: ColumnNameFrom,
dtype: DType,
dtype_from: DTypeFrom,
) -> None: ...
@property
def name(self) -> str: ...
@property
def index(self) -> int: ...
@property
def absolute_index(self) -> int: ...
@property
def dtype(self) -> DType: ...
@property
def column_name_from(self) -> ColumnNameFrom: ...
@property
def dtype_from(self) -> DTypeFrom: ...
class DefinedName:
def __init__(
self,
*,
name: str,
formula: str,
) -> None: ...
@property
def name(self) -> str: ...
@property
def formula(self) -> str: ...
class CellError:
@property
def position(self) -> tuple[int, int]: ...
@property
def row_offset(self) -> int: ...
@property
def offset_position(self) -> tuple[int, int]: ...
@property
def detail(self) -> str: ...
def __repr__(self) -> str: ...
class CellErrors:
@property
def errors(self) -> list[CellError]: ...
def __repr__(self) -> str: ...
class _ExcelSheet:
@property
def name(self) -> str:
"""The name of the sheet"""
@property
def width(self) -> int:
"""The sheet's width"""
@property
def height(self) -> int:
"""The sheet's height"""
@property
def total_height(self) -> int:
"""The sheet's total height"""
@property
def offset(self) -> int:
"""The sheet's offset before data starts"""
@property
def selected_columns(self) -> list[ColumnInfo]:
"""The sheet's selected columns"""
def available_columns(self) -> list[ColumnInfo]:
"""The columns available for the given sheet"""
@property
def specified_dtypes(self) -> DTypeMap | None:
"""The dtypes specified for the sheet"""
@property
def visible(self) -> SheetVisible:
"""The visibility of the sheet"""
def to_arrow(self) -> pa.RecordBatch:
"""Converts the sheet to a pyarrow `RecordBatch`
Requires the `pyarrow` extra to be installed.
"""
def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors]:
"""Converts the sheet to a pyarrow `RecordBatch` with error information.
Stores the positions of any values that cannot be parsed as the specified type and were
therefore converted to None.
Requires the `pyarrow` extra to be installed.
"""
def __arrow_c_schema__(self) -> object:
"""Export the schema as an `ArrowSchema` `PyCapsule`.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
def __arrow_c_array__(self, requested_schema: object = None) -> tuple[object, object]:
"""Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
The optional `requested_schema` parameter allows for potential schema conversion.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
class _ExcelTable:
@property
def name(self) -> str:
"""The name of the table"""
@property
def sheet_name(self) -> str:
"""The name of the sheet this table belongs to"""
@property
def width(self) -> int:
"""The table's width"""
@property
def height(self) -> int:
"""The table's height"""
@property
def total_height(self) -> int:
"""The table's total height"""
@property
def offset(self) -> int:
"""The table's offset before data starts"""
@property
def selected_columns(self) -> list[ColumnInfo]:
"""The table's selected columns"""
def available_columns(self) -> list[ColumnInfo]:
"""The columns available for the given table"""
@property
def specified_dtypes(self) -> DTypeMap | None:
"""The dtypes specified for the table"""
def to_arrow(self) -> pa.RecordBatch:
"""Converts the table to a pyarrow `RecordBatch`
Requires the `pyarrow` extra to be installed.
"""
def __arrow_c_schema__(self) -> object:
"""Export the schema as an `ArrowSchema` `PyCapsule`.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowschema-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
def __arrow_c_array__(self, requested_schema: object = None) -> tuple[object, object]:
"""Export the schema and data as a pair of `ArrowSchema` and `ArrowArray` `PyCapsules`.
The optional `requested_schema` parameter allows for potential schema conversion.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowarray-export
The Arrow PyCapsule Interface enables zero-copy data exchange with
Arrow-compatible libraries without requiring PyArrow as a dependency.
"""
class _ExcelReader:
"""A class representing an open Excel file and allowing to read its sheets"""
@typing.overload
def load_sheet(
self,
idx_or_name: str | int,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[False] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> _ExcelSheet: ...
@typing.overload
def load_sheet(
self,
idx_or_name: str | int,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[True] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> pa.RecordBatch: ...
@typing.overload
def load_sheet(
self,
idx_or_name: str | int,
*,
header_row: int | None = 0,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: bool = False,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> pa.RecordBatch: ...
@typing.overload
def load_table(
self,
name: str,
*,
header_row: int | None = None,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[False] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> _ExcelTable: ...
@typing.overload
def load_table(
self,
name: str,
*,
header_row: int | None = None,
column_names: list[str] | None = None,
skip_rows: int | list[int] | Callable[[int], bool] | None = None,
n_rows: int | None = None,
schema_sample_rows: int | None = 1_000,
dtype_coercion: Literal["coerce", "strict"] = "coerce",
use_columns: list[str]
| list[int]
| str
| Callable[[ColumnInfoNoDtype], bool]
| None = None,
dtypes: DType | DTypeMap | None = None,
eager: Literal[True] = ...,
skip_whitespace_tail_rows: bool = False,
whitespace_as_null: bool = False,
) -> pa.RecordBatch: ...
@property
def sheet_names(self) -> list[str]: ...
def table_names(self, sheet_name: str | None = None) -> list[str]: ...
def defined_names(self) -> list[DefinedName]: ...
def read_excel(source: str | bytes) -> _ExcelReader:
"""Reads an excel file and returns an ExcelReader"""
__version__: str
# Exceptions
class FastExcelError(Exception): ...
class UnsupportedColumnTypeCombinationError(FastExcelError): ...
class CannotRetrieveCellDataError(FastExcelError): ...
class CalamineCellError(FastExcelError): ...
class CalamineError(FastExcelError): ...
class SheetNotFoundError(FastExcelError): ...
class ColumnNotFoundError(FastExcelError): ...
class ArrowError(FastExcelError): ...
class InvalidParametersError(FastExcelError): ...
================================================
FILE: python/fastexcel/py.typed
================================================
================================================
FILE: python/tests/__init__.py
================================================
================================================
FILE: python/tests/benchmarks/README.md
================================================
# Benchmarks
These benchmarks were generated using `pytest-benchmark`.
> **_NOTE:_** formulas.xlsx was found [here](https://foss.heptapod.net/openpyxl/openpyxl/-/issues/494) plain_data.xls and plain_data.xlsx can be found [here](https://public.opendatasoft.com/explore/dataset/covid-19-pandemic-worldwide-data/export/?disjunctive.zone&disjunctive.category)
Using the following command:
```bash
make benchmarks
```
The results are from my local machine. This is not 100% accurate.
## Speed
### 'xls': 2 tests
|Name (time in ms)|Min|Max|Mean|StdDev|Median|IQR|Outliers|OPS|Rounds|Iterations|
|-----------------|---|---|----|------|------|---|-------|---|-------|----------|
|test_fastexcel_xls|27.0991 (1.0)|33.7495 (1.0)|29.5819 (1.0)|1.6429 (1.0)|29.3559 (1.0)|2.7158 (1.0)|10;0|33.8044 (1.0)|29|1|
|test_xlrd|596.5040 (22.01)|628.7964 (18.63)|612.5730 (20.71)|12.9967 (7.91)|615.1620 (20.96)|20.7911 (7.66)|2;0|1.6325 (0.05)|5|1|
### 'xlsx': 4 tests
|Name (time in ms)|Min|Max|Mean|StdDev|Median|IQR|Outliers|OPS|Rounds Iterations|
|-----------------|---|---|----|------|------|---|--------|---|------------------|
|test_fastexcel_xlsx|437.5810 (1.0)|470.7615 (1.0)|457.9611 (1.0)|13.7401 (1.0)|457.7006 (1.0)|21.0743 (1.25)|1;0|2.1836 (1.0)|5|1|
|test_fastexcel_with_formulas|3,106.7454 (7.10)|3,150.2050 (6.69)|3,122.5234 (6.82)|16.6031 (1.21)|3,120.9000 (6.82)|16.8614 (1.0)|1;0 0.3203 (0.15)|5|1|
|test_pyxl|4,780.2341 (10.92)|4,998.7753 (10.62)|4,899.6885 (10.70)|110.4665 (8.04)|4,948.7550 (10.81)|211.6149 (12.55)|2;0|0.2041 (0.09)|5|1|
test_pyxl_with_formulas|25,312.8494 (57.85)|26,621.4687 (56.55)|25,808.5418 (56.36)|545.0540 (39.67)|25,748.0901 (56.26)|852.3171 (50.55)|1;0|0.0387 (0.02)|5|1|
## Memory usage
| fastexcel memory usage | other memory usage |
|-|-|
| ||
| ||
| ||
================================================
FILE: python/tests/benchmarks/fixtures/formulas.xlsx
================================================
[File too large to display: 46.5 MB]
================================================
FILE: python/tests/benchmarks/memory.py
================================================
import argparse
from enum import Enum
from .readers import fastexcel_read, pyxl_read, xlrd_read
class Engine(str, Enum):
FASTEXCEL = "fastexcel"
XLRD = "xlrd"
OPENPYXL = "pyxl"
def get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("-e", "--engine", default=Engine.FASTEXCEL)
parser.add_argument("file")
return parser.parse_args()
def main():
args = get_args()
engine = args.engine
if engine == Engine.FASTEXCEL:
fastexcel_read(args.file)
elif engine == Engine.XLRD:
xlrd_read(args.file)
elif engine == Engine.OPENPYXL:
pyxl_read(args.file)
if __name__ == "__main__":
main()
================================================
FILE: python/tests/benchmarks/readers.py
================================================
from fastexcel import read_excel
from openpyxl import load_workbook
from xlrd import open_workbook
def pyxl_read(test_file_path: str):
wb = load_workbook(test_file_path, read_only=True, keep_links=False, data_only=True)
for ws in wb:
rows = ws.iter_rows()
rows = ws.values
for row in rows:
for _ in row:
pass
def xlrd_read(test_file_path: str):
wb = open_workbook(test_file_path)
for ws in wb.sheets():
for idx in range(ws.nrows):
for _ in ws.row_values(idx):
pass
def fastexcel_read(test_file_path: str):
reader = read_excel(test_file_path)
for sheet_name in reader.sheet_names:
sheet = reader.load_sheet_by_name(sheet_name)
sheet.to_arrow()
================================================
FILE: python/tests/benchmarks/speed.py
================================================
"""
Compare read performance with fastexcel, xlrd and different openpyxl options
"""
import pytest
from .readers import fastexcel_read, pyxl_read, xlrd_read
@pytest.fixture
def plain_data_xls():
return "./python/tests/benchmarks/fixtures/plain_data.xls"
@pytest.fixture
def plain_data_xlsx():
return "./python/tests/benchmarks/fixtures/plain_data.xlsx"
@pytest.fixture
def formula_xlsx():
return "./python/tests/benchmarks/fixtures/formulas.xlsx"
@pytest.mark.benchmark(group="xlsx")
def test_pyxl(benchmark, plain_data_xlsx):
benchmark(pyxl_read, plain_data_xlsx)
@pytest.mark.benchmark(group="xls")
def test_xlrd(benchmark, plain_data_xls):
benchmark(xlrd_read, plain_data_xls)
@pytest.mark.benchmark(group="xls")
def test_fastexcel_xls(benchmark, plain_data_xls):
benchmark(fastexcel_read, plain_data_xls)
@pytest.mark.benchmark(group="xlsx")
def test_fastexcel_xlsx(benchmark, plain_data_xlsx):
benchmark(fastexcel_read, plain_data_xlsx)
@pytest.mark.benchmark(group="xlsx")
def test_pyxl_with_formulas(benchmark, formula_xlsx):
benchmark(pyxl_read, formula_xlsx)
@pytest.mark.benchmark(group="xlsx")
def test_fastexcel_with_formulas(benchmark, formula_xlsx):
benchmark(fastexcel_read, formula_xlsx)
================================================
FILE: python/tests/conftest.py
================================================
from __future__ import annotations
from datetime import datetime
from typing import Any
import pytest
@pytest.fixture
def expected_data_sheet_null_strings() -> dict[str, list[Any]]:
return {
"FIRST_LABEL": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
"SECOND_LABEL": ["AA", "BB", "CC", "DD", "EE", "FF", "GG", "HH", "II", "JJ"],
"DATES_AND_NULLS": [
None,
None,
None,
datetime(2022, 12, 19, 0, 0),
datetime(2022, 8, 26, 0, 0),
datetime(2023, 5, 6, 0, 0),
datetime(2023, 3, 20, 0, 0),
datetime(2022, 8, 29, 0, 0),
None,
None,
],
"TIMESTAMPS_AND_NULLS": [
None,
None,
datetime(2023, 2, 18, 6, 13, 56, 730000),
datetime(2022, 9, 20, 20, 0, 7, 50000),
datetime(2022, 9, 24, 17, 4, 31, 236000),
None,
None,
None,
datetime(2022, 9, 14, 1, 50, 58, 390000),
datetime(2022, 10, 21, 17, 20, 12, 223000),
],
"INTS_AND_NULLS": [
2076.0,
2285.0,
39323.0,
None,
None,
None,
11953.0,
None,
30192.0,
None,
],
"FLOATS_AND_NULLS": [
141.02023312814603,
778.0655928608671,
None,
497.60307287584106,
627.446112513911,
None,
None,
None,
488.3509486743364,
None,
],
}
================================================
FILE: python/tests/test_alias_generation.py
================================================
from __future__ import annotations
import fastexcel
import pandas as pd
import polars as pl
import pytest
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from .utils import path_for_fixture
@pytest.mark.parametrize(
"use_columns", [None, [0, 1, 2], ["col", "col_1", "col_2"], [0, "col_1", 2]]
)
def test_alias_generation_with_use_columns(use_columns: list[str] | list[int] | None) -> None:
excel_reader = fastexcel.read_excel(
path_for_fixture("fixture-single-sheet-duplicated-columns.xlsx")
)
sheet = excel_reader.load_sheet(0, use_columns=use_columns)
assert [col.name for col in sheet.available_columns()] == ["col", "col_1", "col_2"]
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"col": [1.0, 2.0],
"col_1": [2019.0, 2020.0],
"col_2": pd.Series(
[pd.Timestamp("2019-02-01 00:01:02"), pd.Timestamp("2014-01-02 06:01:02")]
).astype("datetime64[ms]"),
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"col": [1.0, 2.0],
"col_1": [2019.0, 2020.0],
"col_2": ["2019-02-01 00:01:02", "2014-01-02 06:01:02"],
}
).with_columns(pl.col("col_2").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")),
)
================================================
FILE: python/tests/test_column_selection.py
================================================
# ruff: noqa: E501
from __future__ import annotations
import re
from typing import Any
import fastexcel
import numpy as np
import pandas as pd
import polars as pl
import pytest
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from .utils import path_for_fixture
@pytest.fixture
def excel_reader_single_sheet() -> fastexcel.ExcelReader:
return fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
@pytest.fixture
def expected_column_info() -> list[fastexcel.ColumnInfo]:
return [
fastexcel.ColumnInfo(
name="Month",
index=0,
absolute_index=0,
column_name_from="looked_up",
dtype="float",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="Year",
index=1,
absolute_index=1,
column_name_from="looked_up",
dtype="float",
dtype_from="guessed",
),
]
def test_single_sheet_all_columns(
excel_reader_single_sheet: fastexcel.ExcelReader,
expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
sheet = excel_reader_single_sheet.load_sheet(0)
sheet_explicit_arg = excel_reader_single_sheet.load_sheet(0, use_columns=None)
assert sheet.selected_columns == expected_column_info
assert sheet.available_columns() == expected_column_info
expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
expected_pd_df = pd.DataFrame(expected)
expected_pl_df = pl.DataFrame(expected)
pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, expected_pd_df)
pd_df_explicit_arg = sheet_explicit_arg.to_pandas()
pd_assert_frame_equal(pd_df_explicit_arg, expected_pd_df)
pl_df = sheet.to_polars()
pl_assert_frame_equal(pl_df, expected_pl_df)
pl_df_explicit_arg = sheet_explicit_arg.to_polars()
pl_assert_frame_equal(pl_df_explicit_arg, expected_pl_df)
def test_single_sheet_subset_by_str(
excel_reader_single_sheet: fastexcel.ExcelReader,
expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
# looks like mypy 1.8 became more stupid
sheets: list[str | int] = [0, "January"]
for sheet_name_or_idx in sheets:
for idx, col in enumerate(["Month", "Year"]):
sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[col])
assert sheet.selected_columns == [expected_column_info[idx]]
assert sheet.available_columns() == expected_column_info
pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, pd.DataFrame({col: expected[col]}))
pl_df = sheet.to_polars()
pl_assert_frame_equal(pl_df, pl.DataFrame({col: expected[col]}))
def test_single_sheet_subset_by_index(
excel_reader_single_sheet: fastexcel.ExcelReader,
expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
sheets: list[str | int] = [0, "January"]
for sheet_name_or_idx in sheets:
for idx, col_name in enumerate(["Month", "Year"]):
sheet = excel_reader_single_sheet.load_sheet(sheet_name_or_idx, use_columns=[idx])
assert sheet.selected_columns == [expected_column_info[idx]]
assert sheet.available_columns() == expected_column_info
pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, pd.DataFrame({col_name: expected[col_name]}))
pl_df = sheet.to_polars()
pl_assert_frame_equal(pl_df, pl.DataFrame({col_name: expected[col_name]}))
@pytest.fixture
def excel_reader_single_sheet_with_unnamed_columns() -> fastexcel.ExcelReader:
return fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))
@pytest.fixture
def single_sheet_with_unnamed_columns_expected() -> dict[str, list[Any]]:
return {
"col1": [2.0, 3.0],
"__UNNAMED__1": [1.5, 2.5],
"col3": ["hello", "world"],
"__UNNAMED__3": [-5.0, -6.0],
"col5": ["a", "b"],
}
@pytest.fixture
def sheet_with_unnamed_columns_expected_column_info() -> list[fastexcel.ColumnInfo]:
return [
fastexcel.ColumnInfo(
name="col1",
index=0,
absolute_index=0,
column_name_from="looked_up",
dtype="float",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="__UNNAMED__1",
index=1,
absolute_index=1,
column_name_from="generated",
dtype="float",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="col3",
index=2,
absolute_index=2,
column_name_from="looked_up",
dtype="string",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="__UNNAMED__3",
index=3,
absolute_index=3,
column_name_from="generated",
dtype="float",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="col5",
index=4,
absolute_index=4,
column_name_from="looked_up",
dtype="string",
dtype_from="guessed",
),
]
def test_single_sheet_with_unnamed_columns(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
use_columns_str = ["col1", "col3", "__UNNAMED__3"]
use_columns_idx = [0, 2, 3]
expected = {
k: v for k, v in single_sheet_with_unnamed_columns_expected.items() if k in use_columns_str
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
assert sheet.selected_columns == [
sheet_with_unnamed_columns_expected_column_info[0],
sheet_with_unnamed_columns_expected_column_info[2],
sheet_with_unnamed_columns_expected_column_info[3],
]
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx
)
assert sheet.selected_columns == [
sheet_with_unnamed_columns_expected_column_info[0],
sheet_with_unnamed_columns_expected_column_info[2],
sheet_with_unnamed_columns_expected_column_info[3],
]
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_pagination(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
use_columns_str = ["col1", "col3", "__UNNAMED__3"]
use_columns_idx = [0, 2, 3]
# first row only
expected = {
k: v[:1]
for k, v in single_sheet_with_unnamed_columns_expected.items()
if k in use_columns_str
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str, n_rows=1
)
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, n_rows=1
)
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
# second row
expected = {
k: v[1:]
for k, v in single_sheet_with_unnamed_columns_expected.items()
if k in use_columns_str
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str, skip_rows=1
)
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, skip_rows=1
)
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
) -> None:
use_columns_str = ["col0", "col2", "col3"]
use_columns_idx = [0, 2, 3]
expected: dict[str, list[Any]] = {
"col0": [2.0, 3.0],
"col1": ["hello", "world"],
"col2": [-5.0, -6.0],
}
column_names = [f"col{i}" for i in range(3)]
expected_columns_names = ["col0", "__UNNAMED__1", "col1", "col2", "__UNNAMED__4"]
# skipping the header row only
with pytest.raises(
fastexcel.InvalidParametersError,
match='use_columns can only contain integers when used with columns_names, got "col0"',
):
excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns",
use_columns=use_columns_str,
skip_rows=1,
column_names=column_names,
)
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, skip_rows=1, column_names=column_names
)
assert [col.name for col in sheet.available_columns()] == expected_columns_names
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
# skipping the header row + first data row
expected_first_row_skipped = {k: v[1:] for k, v in expected.items()}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_idx, skip_rows=2, column_names=column_names
)
assert [col.name for col in sheet.available_columns()] == expected_columns_names
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected_first_row_skipped))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected_first_row_skipped))
def test_single_sheet_with_unnamed_columns_and_str_range(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
use_columns_str = "A,C:E"
expected = {
k: v
for k, v in single_sheet_with_unnamed_columns_expected.items()
if k in ["col1", "col3", "__UNNAMED__3", "col5"]
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
assert sheet.selected_columns == (
sheet_with_unnamed_columns_expected_column_info[:1]
+ sheet_with_unnamed_columns_expected_column_info[2:]
)
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_open_ended_range(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
# Test B: (should get columns B, C, D, E - indices 1, 2, 3, 4)
use_columns_str = "B:"
expected = {
k: v
for k, v in single_sheet_with_unnamed_columns_expected.items()
if k in ["__UNNAMED__1", "col3", "__UNNAMED__3", "col5"]
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info[1:]
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_open_ended_range_from_start(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
# Test A: (should get all columns)
use_columns_str = "A:"
expected = single_sheet_with_unnamed_columns_expected
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_mixed_open_ended_range(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
# Test A,C: (should get column A and columns from C onwards - indices 0, 2, 3, 4)
use_columns_str = "A,C:"
expected = {
k: v
for k, v in single_sheet_with_unnamed_columns_expected.items()
if k in ["col1", "col3", "__UNNAMED__3", "col5"]
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
expected_selected_cols = [
sheet_with_unnamed_columns_expected_column_info[0]
] + sheet_with_unnamed_columns_expected_column_info[2:]
assert sheet.selected_columns == expected_selected_cols
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_from_beginning_range(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
# Test :C (should get columns A, B, C - indices 0, 1, 2)
use_columns_str = ":C"
expected = {
k: v
for k, v in single_sheet_with_unnamed_columns_expected.items()
if k in ["col1", "__UNNAMED__1", "col3"]
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
assert sheet.selected_columns == sheet_with_unnamed_columns_expected_column_info[:3]
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_from_beginning_range_single_column(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
# Test :A (should get only column A - index 0)
use_columns_str = ":A"
expected = {
k: v for k, v in single_sheet_with_unnamed_columns_expected.items() if k in ["col1"]
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
assert sheet.selected_columns == [sheet_with_unnamed_columns_expected_column_info[0]]
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_with_unnamed_columns_and_complex_mixed_pattern(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
single_sheet_with_unnamed_columns_expected: dict[str, list[Any]],
sheet_with_unnamed_columns_expected_column_info: list[fastexcel.ColumnInfo],
) -> None:
# Test A,:B,D,E: (should get A, A,B again (deduplicated), D, and E)
# This effectively becomes A,B,D,E (columns 0,1,3,4)
use_columns_str = "A,:B,D,E:"
expected = {
k: v
for k, v in single_sheet_with_unnamed_columns_expected.items()
if k in ["col1", "__UNNAMED__1", "__UNNAMED__3", "col5"]
}
sheet = excel_reader_single_sheet_with_unnamed_columns.load_sheet(
"With unnamed columns", use_columns=use_columns_str
)
# Expected: columns A, A,B (from :B), D, E (from E:)
# After deduplication: 0,1,3,4
expected_selected_cols = [
sheet_with_unnamed_columns_expected_column_info[0], # A
sheet_with_unnamed_columns_expected_column_info[1], # B
sheet_with_unnamed_columns_expected_column_info[3], # D
sheet_with_unnamed_columns_expected_column_info[4], # E
]
assert sheet.selected_columns == expected_selected_cols
assert sheet.available_columns() == sheet_with_unnamed_columns_expected_column_info
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_single_sheet_invalid_column_indices_negative_integer(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
) -> None:
expected_message = """invalid parameters: expected list[int] | list[str], got [-2]
Context:
0: could not determine selected columns from provided object: [-2]
1: expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got Some([-2])
"""
with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[-2])
def test_single_sheet_invalid_column_indices_empty_list(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
) -> None:
expected_message = """invalid parameters: list of selected columns is empty
Context:
0: could not determine selected columns from provided object: []
1: expected selected columns to be list[str] | list[int] | str | Callable[[ColumnInfo], bool] | None, got Some([])
"""
with pytest.raises(fastexcel.InvalidParametersError, match=re.escape(expected_message)):
excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[])
def test_single_sheet_invalid_column_indices_column_does_not_exist_str(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
) -> None:
expected_message = """column with name \"nope\" not found
Context:
0: available columns are: .*
"""
with pytest.raises(fastexcel.ColumnNotFoundError, match=expected_message):
excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=["nope"])
def test_single_sheet_invalid_column_indices_column_does_not_exist_int(
excel_reader_single_sheet_with_unnamed_columns: fastexcel.ExcelReader,
) -> None:
expected_message = """column at index 42 not found
Context:
0: available columns are: .*
"""
with pytest.raises(fastexcel.ColumnNotFoundError, match=expected_message):
excel_reader_single_sheet_with_unnamed_columns.load_sheet(0, use_columns=[42])
def test_use_columns_with_column_names() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx"))
sheet = excel_reader.load_sheet(
0,
use_columns=[1, 2],
header_row=None,
skip_rows=1,
column_names=["bools_renamed", "dates_renamed"],
)
assert sheet.available_columns() == [
fastexcel.ColumnInfo(
name="__UNNAMED__0",
column_name_from="generated",
index=0,
absolute_index=0,
dtype="float",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="bools_renamed",
index=1,
absolute_index=1,
dtype="boolean",
dtype_from="guessed",
column_name_from="provided",
),
fastexcel.ColumnInfo(
name="dates_renamed",
index=2,
absolute_index=2,
dtype="datetime",
dtype_from="guessed",
column_name_from="provided",
),
fastexcel.ColumnInfo(
name="__UNNAMED__3",
index=3,
absolute_index=3,
dtype="float",
dtype_from="guessed",
column_name_from="generated",
),
]
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"bools_renamed": [True, False, True],
"dates_renamed": pd.Series([pd.Timestamp("2022-03-02 05:43:04")] * 3).astype(
"datetime64[ms]"
),
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"bools_renamed": [True, False, True],
"dates_renamed": ["2022-03-02 05:43:04"] * 3,
}
).with_columns(
pl.col("dates_renamed").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")
),
)
def test_use_columns_with_callable() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))
sheet = excel_reader.load_sheet(2)
assert (
[(c.name, c.dtype) for c in sheet.available_columns()]
== [(c.name, c.dtype) for c in sheet.selected_columns]
== [
("col1", "float"),
("__UNNAMED__1", "float"),
("col3", "string"),
("__UNNAMED__3", "float"),
("col5", "string"),
]
)
sheet = excel_reader.load_sheet(
2,
use_columns=lambda col: col.name.startswith("col"),
)
assert [(c.name, c.dtype) for c in sheet.selected_columns] == [
("col1", "float"),
("col3", "string"),
("col5", "string"),
]
sheet = excel_reader.load_sheet(
2,
use_columns=lambda col: col.index % 2 == 1,
)
assert [(c.name, c.dtype) for c in sheet.selected_columns] == [
("__UNNAMED__1", "float"),
("__UNNAMED__3", "float"),
]
def test_use_columns_with_bad_callable() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))
with pytest.raises(
fastexcel.InvalidParametersError,
match=re.escape("`use_columns` callable could not be called (TypeError: "),
):
excel_reader.load_sheet(
2,
use_columns=lambda: True, # type: ignore
)
with pytest.raises(
fastexcel.InvalidParametersError, match="`use_columns` callable should return a boolean"
):
excel_reader.load_sheet(
2,
use_columns=lambda _: 42, # type: ignore
)
def test_use_columns_with_eager_loading() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
expected_months = [1.0, 2.0]
expected_years = [2019.0, 2020.0]
# default
rb = excel_reader.load_sheet_eager(0)
assert rb.schema.names == ["Month", "Year"]
assert rb["Year"].tolist() == expected_years
assert rb["Month"].tolist() == expected_months
# changing order
rb = excel_reader.load_sheet_eager(0, use_columns=["Year", "Month"])
assert rb.schema.names == ["Year", "Month"]
assert rb["Year"].tolist() == expected_years
assert rb["Month"].tolist() == expected_months
# subset
rb = excel_reader.load_sheet_eager(0, use_columns=["Year"])
assert rb.schema.names == ["Year"]
assert rb["Year"].tolist() == expected_years
assert "Month" not in (field.name for field in rb.schema)
@pytest.mark.parametrize("excel_file", ["sheet-null-strings.xlsx", "sheet-null-strings-empty.xlsx"])
def test_use_columns_dtypes_eager_loading(
excel_file: str, expected_data_sheet_null_strings: dict[str, list[Any]]
) -> None:
expected_pl_df = pl.DataFrame(expected_data_sheet_null_strings).with_columns(
pl.col("DATES_AND_NULLS").dt.cast_time_unit("ms"),
pl.col("TIMESTAMPS_AND_NULLS").dt.cast_time_unit("ms"),
)
expected_pd_df = pd.DataFrame(expected_data_sheet_null_strings)
expected_pd_df["DATES_AND_NULLS"] = expected_pd_df["DATES_AND_NULLS"].dt.as_unit("ms")
expected_pd_df["TIMESTAMPS_AND_NULLS"] = expected_pd_df["TIMESTAMPS_AND_NULLS"].dt.as_unit("ms")
for use_columns in (
list(expected_data_sheet_null_strings.keys()),
[key for idx, key in enumerate(expected_data_sheet_null_strings.keys()) if idx % 2],
[key for idx, key in enumerate(expected_data_sheet_null_strings.keys()) if idx % 2 == 0],
list(reversed(expected_data_sheet_null_strings.keys())),
[
key
for idx, key in enumerate(reversed(expected_data_sheet_null_strings.keys()))
if idx % 2
],
[
key
for idx, key in enumerate(reversed(expected_data_sheet_null_strings.keys()))
if idx % 2 == 0
],
):
excel_reader = fastexcel.read_excel(path_for_fixture(excel_file))
sheet = excel_reader.load_sheet_eager(0, use_columns=use_columns)
pd_df = sheet.to_pandas()
pl_df = pl.from_arrow(data=sheet)
assert isinstance(pl_df, pl.DataFrame)
sheet_lazy = excel_reader.load_sheet(0, use_columns=use_columns)
pl_df_lazy = sheet_lazy.to_polars()
pd_df_lazy = sheet_lazy.to_pandas()
pl_assert_frame_equal(pl_df_lazy, pl_df)
pd_assert_frame_equal(pd_df_lazy, pd_df)
pl_assert_frame_equal(expected_pl_df.select(use_columns), pl_df)
pd_assert_frame_equal(expected_pd_df[use_columns], pd_df)
assert pd_df.columns.to_list() == use_columns
assert pl_df.columns == use_columns
def test_use_columns_with_table() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-with-tables.xlsx"))
table = excel_reader.load_table("users", use_columns=["User Id", "FirstName"])
expected_available_columns = [
fastexcel.ColumnInfo(
name="User Id",
index=0,
absolute_index=0,
dtype="float",
column_name_from="provided",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="FirstName",
index=1,
absolute_index=1,
dtype="string",
column_name_from="provided",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="__UNNAMED__2",
index=2,
absolute_index=2,
dtype="string",
column_name_from="generated",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="__UNNAMED__3",
index=3,
absolute_index=3,
dtype="datetime",
column_name_from="generated",
dtype_from="guessed",
),
]
expected_selected_columns = [
fastexcel.ColumnInfo(
name="User Id",
index=0,
absolute_index=0,
dtype="float",
column_name_from="provided",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="FirstName",
index=1,
absolute_index=1,
dtype="string",
column_name_from="provided",
dtype_from="guessed",
),
]
assert table.available_columns() == expected_available_columns
assert table.selected_columns == expected_selected_columns
expected_pl_df = pl.DataFrame(
{"User Id": [1.0, 2.0, 5.0], "FirstName": ["Peter", "John", "Hans"]}
)
expected_pd_df = pd.DataFrame(
{"User Id": [1.0, 2.0, 5.0], "FirstName": ["Peter", "John", "Hans"]}
)
pl_df = table.to_polars()
pl_assert_frame_equal(pl_df, expected_pl_df)
pd_df = table.to_pandas()
pd_assert_frame_equal(pd_df, expected_pd_df)
def test_use_columns_with_table_and_provided_columns() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-with-tables.xlsx"))
table = excel_reader.load_table(
"users", use_columns=[0, 2], column_names=["user_id", "last_name"]
)
expected_available_columns = [
fastexcel.ColumnInfo(
name="user_id",
index=0,
absolute_index=0,
dtype="float",
column_name_from="provided",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="__UNNAMED__1",
index=1,
absolute_index=1,
dtype="string",
column_name_from="generated",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="last_name",
index=2,
absolute_index=2,
dtype="string",
column_name_from="provided",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="__UNNAMED__3",
index=3,
absolute_index=3,
dtype="datetime",
column_name_from="generated",
dtype_from="guessed",
),
]
expected_selected_columns = [
fastexcel.ColumnInfo(
name="user_id",
index=0,
absolute_index=0,
dtype="float",
column_name_from="provided",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="last_name",
index=2,
absolute_index=2,
dtype="string",
column_name_from="provided",
dtype_from="guessed",
),
]
assert table.available_columns() == expected_available_columns
assert table.selected_columns == expected_selected_columns
expected_pl_df = pl.DataFrame(
{"user_id": [1.0, 2.0, 5.0], "last_name": ["Müller", "Meier", "Fricker"]}
)
expected_pd_df = pd.DataFrame(
{"user_id": [1.0, 2.0, 5.0], "last_name": ["Müller", "Meier", "Fricker"]}
)
pl_df = table.to_polars()
pl_assert_frame_equal(pl_df, expected_pl_df)
pd_df = table.to_pandas()
pd_assert_frame_equal(pd_df, expected_pd_df)
def test_use_column_range_with_offset_without_table() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
sheet = excel_reader.load_sheet("without-table", use_columns="H:I", header_row=9)
expected_pl_df = pl.DataFrame(
{
"Column at H10": [1.0, 2.0, 3.0],
"Column at I10": [4.0, 5.0, 6.0],
}
)
expected_pd_df = pd.DataFrame(
{
"Column at H10": [1.0, 2.0, 3.0],
"Column at I10": [4.0, 5.0, 6.0],
}
)
pl_df = sheet.to_polars()
pl_assert_frame_equal(pl_df, expected_pl_df)
pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, expected_pd_df)
def test_use_column_range_with_offset_with_table() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
sheet = excel_reader.load_sheet("with-table", use_columns="D:E", header_row=4)
expected_pl_df = pl.DataFrame(
{
"Column at D5": [1.0, 2.0, 3.0, 4.0],
"Column at E5": [4.0, 5.0, 6.0, 8.0],
}
)
expected_pd_df = pd.DataFrame(
{
"Column at D5": [1.0, 2.0, 3.0, 4.0],
"Column at E5": [4.0, 5.0, 6.0, 8.0],
}
)
pl_df = sheet.to_polars()
pl_assert_frame_equal(pl_df, expected_pl_df)
pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, expected_pd_df)
def test_use_column_names_with_offset_table_by_index_and_name() -> None:
"""Index-based selection should resolve correctly when used with an offset table.
The selected indices should be absolute, and it should be able to handle both index-based
and name-based selection.
"""
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
# Mix name-based and index-based selection
# "Column at D5" is at table index 0, absolute index 3
# Index 4 is absolute index for column E
table = excel_reader.load_table("TableAtD5", use_columns=["Column at D5", 4]) # type:ignore[arg-type]
expected_selected_columns = [
fastexcel.ColumnInfo(
name="Column at D5",
index=0,
absolute_index=3,
dtype="float",
column_name_from="provided",
dtype_from="guessed",
),
fastexcel.ColumnInfo(
name="Column at E5",
index=1,
absolute_index=4,
dtype="float",
column_name_from="provided",
dtype_from="guessed",
),
]
assert table.selected_columns == expected_selected_columns
expected_pl_df = pl.DataFrame(
{
"Column at D5": [1.0, 2.0, 3.0, 4.0],
"Column at E5": [4.0, 5.0, 6.0, 8.0],
}
)
expected_pd_df = pd.DataFrame(
{
"Column at D5": [1.0, 2.0, 3.0, 4.0],
"Column at E5": [4.0, 5.0, 6.0, 8.0],
}
)
pl_df = table.to_polars()
pl_assert_frame_equal(pl_df, expected_pl_df)
pd_df = table.to_pandas()
pd_assert_frame_equal(pd_df, expected_pd_df)
def test_use_column_range_with_offset_with_table_and_specified_dtypes() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
table_closed = excel_reader.load_table(
"TableAtD5", use_columns="D:E", dtypes={3: "int", "Column at E5": "string"}
)
table_open_ended = excel_reader.load_table(
"TableAtD5", use_columns="D:", dtypes={3: "int", "Column at E5": "string"}
)
expected_data = {
# Dtype should be int, looked up by index
"Column at D5": [1, 2, 3, 4],
# Dtype should be string, looked up by name
"Column at E5": ["4", "5", "6", "8"],
}
expected_column_info = [
fastexcel.ColumnInfo(
name="Column at D5",
index=0,
absolute_index=3,
dtype="int",
dtype_from="provided_by_index",
column_name_from="provided",
),
fastexcel.ColumnInfo(
name="Column at E5",
index=1,
absolute_index=4,
dtype="string",
dtype_from="provided_by_name",
column_name_from="provided",
),
]
assert table_closed.selected_columns == expected_column_info
assert table_open_ended.selected_columns == expected_column_info
expected_pl_df = pl.DataFrame(expected_data)
expected_pd_df = pd.DataFrame(expected_data)
pl_df_closed = table_closed.to_polars()
pl_assert_frame_equal(pl_df_closed, expected_pl_df)
pl_df_open_ended = table_open_ended.to_polars()
pl_assert_frame_equal(pl_df_open_ended, expected_pl_df)
pd_df_closed = table_closed.to_pandas()
pd_assert_frame_equal(pd_df_closed, expected_pd_df)
pd_df_open_ended = table_open_ended.to_pandas()
pd_assert_frame_equal(pd_df_open_ended, expected_pd_df)
def test_use_column_range_with_offset_with_sheet_and_specified_dtypes() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
sheet_closed = excel_reader.load_sheet(
"without-table",
use_columns="H:K",
header_row=9,
dtypes={7: "int", "Column at I10": "string"},
)
sheet_open_ended = excel_reader.load_sheet(
"without-table",
use_columns="H:",
header_row=9,
dtypes={7: "int", "Column at I10": "string"},
)
expected_data_polars = {
# Dtype should be int, looked up by index
"Column at H10": [1, 2, 3],
# Dtype should be string, looked up by name
"Column at I10": ["4", "5", "6"],
"__UNNAMED__2": pl.Series([None, None, None], dtype=pl.String),
"Column at K10": [7.0, 8.0, 9.0],
}
# In pandas 3, string columns use nan instead of None for missing values
pd_version = tuple(int(x) for x in pd.__version__.split(".")[:2])
na_value = np.nan if pd_version >= (3, 0) else None
expected_data_pandas = {
# Dtype should be int, looked up by index
"Column at H10": [1, 2, 3],
# Dtype should be string, looked up by name
"Column at I10": ["4", "5", "6"],
"__UNNAMED__2": [na_value, na_value, na_value],
"Column at K10": [7.0, 8.0, 9.0],
}
expected_column_info = [
fastexcel.ColumnInfo(
name="Column at H10",
index=0,
absolute_index=7,
dtype="int",
dtype_from="provided_by_index",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Column at I10",
index=1,
absolute_index=8,
dtype="string",
dtype_from="provided_by_name",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="__UNNAMED__2",
index=2,
absolute_index=9,
dtype="string",
dtype_from="guessed",
column_name_from="generated",
),
fastexcel.ColumnInfo(
name="Column at K10",
index=3,
absolute_index=10,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
]
assert sheet_closed.selected_columns == expected_column_info
assert sheet_open_ended.selected_columns == expected_column_info
expected_pl_df = pl.DataFrame(expected_data_polars)
expected_pd_df = pd.DataFrame(expected_data_pandas)
pl_df_closed = sheet_closed.to_polars()
pl_assert_frame_equal(pl_df_closed, expected_pl_df)
pl_df_open_ended = sheet_open_ended.to_polars()
pl_assert_frame_equal(pl_df_open_ended, expected_pl_df)
pd_df_closed = sheet_closed.to_pandas()
pd_assert_frame_equal(pd_df_closed, expected_pd_df, check_dtype=False)
pd_df_open_ended = sheet_open_ended.to_pandas()
pd_assert_frame_equal(pd_df_open_ended, expected_pd_df, check_dtype=False)
================================================
FILE: python/tests/test_defined_names.py
================================================
import fastexcel
import pytest
from .utils import path_for_fixture
@pytest.mark.parametrize("path", ("sheet-with-defined-names.xlsx",))
def test_defined_names(path: str) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture(path))
defined_names = excel_reader.defined_names()
expected_defined_names = [
fastexcel.DefinedName(name="AddingValues", formula="SUM(sheet1!$K$5:$K$6)"),
fastexcel.DefinedName(name="DefinedRange", formula="sheet1!$A$5:$D$7"),
fastexcel.DefinedName(name="NamedConstant", formula="3.4"),
]
assert defined_names == expected_defined_names
================================================
FILE: python/tests/test_dtypes.py
================================================
from __future__ import annotations
import logging
from datetime import date, datetime
from typing import Any, Literal
import fastexcel
import numpy as np
import pandas as pd
import polars as pl
import pytest
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from .utils import get_expected_pandas_dtype, path_for_fixture
@pytest.fixture
def expected_data() -> dict[str, list[Any]]:
return {
"Employee ID": [
"123456",
"44333",
"44333",
"87878",
"87878",
"US00011",
"135967",
"IN86868",
"IN86868",
],
"Employee Name": [
"Test1",
"Test2",
"Test2",
"Test3",
"Test3",
"Test4",
"Test5",
"Test6",
"Test6",
],
"Date": [datetime(2023, 7, 21)] * 9,
"Details": ["Healthcare"] * 7 + ["Something"] * 2,
"Asset ID": ["84444"] * 7 + ["ABC123"] * 2,
"Mixed dates": ["2023-07-21 00:00:00"] * 6 + ["July 23rd"] * 3,
"Mixed bools": ["true"] * 5 + ["false"] * 3 + ["other"],
}
def test_sheet_with_mixed_dtypes(expected_data: dict[str, list[Any]]) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
sheet = excel_reader.load_sheet(0)
pd_df = sheet.to_pandas()
pd_assert_frame_equal(pd_df, pd.DataFrame(expected_data).astype({"Date": "datetime64[ms]"}))
pl_df = sheet.to_polars()
pl_assert_frame_equal(
pl_df, pl.DataFrame(expected_data, schema_overrides={"Date": pl.Datetime(time_unit="ms")})
)
def test_sheet_with_mixed_dtypes_and_sample_rows(expected_data: dict[str, list[Any]]) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
# Since we skip rows here, the dtypes should be correctly guessed, even if we only check 5 rows
sheet = excel_reader.load_sheet(0, schema_sample_rows=5, skip_rows=5)
expected_data_subset = {col_name: values[5:] for col_name, values in expected_data.items()}
pd_df = sheet.to_pandas()
pd_assert_frame_equal(
pd_df, pd.DataFrame(expected_data_subset).astype({"Date": "datetime64[ms]"})
)
pl_df = sheet.to_polars()
pl_assert_frame_equal(
pl_df,
pl.DataFrame(expected_data_subset, schema_overrides={"Date": pl.Datetime(time_unit="ms")}),
)
# Guess the sheet's dtypes on 5 rows only
sheet = excel_reader.load_sheet(0, schema_sample_rows=5)
# String fields should not have been loaded
expected_data["Employee ID"] = [
123456.0,
44333.0,
44333.0,
87878.0,
87878.0,
None,
135967.0,
None,
None,
]
expected_data["Asset ID"] = [84444.0] * 7 + [None] * 2
expected_data["Mixed dates"] = [datetime(2023, 7, 21)] * 6 + [None] * 3
expected_data["Mixed bools"] = [True] * 5 + [False] * 3 + [None]
pd_df = sheet.to_pandas()
pd_assert_frame_equal(
pd_df,
pd.DataFrame(expected_data).astype(
{
"Date": "datetime64[ms]",
"Mixed dates": "datetime64[ms]",
}
),
)
pl_df = sheet.to_polars()
pl_assert_frame_equal(
pl_df,
pl.DataFrame(
expected_data,
schema_overrides={
"Date": pl.Datetime(time_unit="ms"),
"Mixed dates": pl.Datetime(time_unit="ms"),
},
),
)
@pytest.mark.parametrize("dtype_by_index", (True, False))
@pytest.mark.parametrize(
"dtype,expected_data,expected_pl_dtype",
[
("int", [123456, 44333, 44333, 87878, 87878], pl.Int64),
("float", [123456.0, 44333.0, 44333.0, 87878.0, 87878.0], pl.Float64),
("string", ["123456", "44333", "44333", "87878", "87878"], pl.Utf8),
("boolean", [True] * 5, pl.Boolean),
(
"datetime",
[datetime(2238, 1, 3)] + [datetime(2021, 5, 17)] * 2 + [datetime(2140, 8, 6)] * 2,
pl.Datetime,
),
(
"date",
[date(2238, 1, 3)] + [date(2021, 5, 17)] * 2 + [date(2140, 8, 6)] * 2,
pl.Date,
),
# conversion to duration not supported yet
("duration", [pd.NaT] * 5, pl.Duration),
],
)
def test_sheet_with_mixed_dtypes_specify_dtypes(
dtype_by_index: bool,
dtype: fastexcel.DType,
expected_data: list[Any],
expected_pl_dtype: pl.DataType,
) -> None:
dtypes: fastexcel.DTypeMap = {0: dtype} if dtype_by_index else {"Employee ID": dtype}
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
sheet = excel_reader.load_sheet(0, dtypes=dtypes, n_rows=5)
assert sheet.specified_dtypes == dtypes
pd_df = sheet.to_pandas()
expected_pd_dtype = get_expected_pandas_dtype(dtype)
assert pd_df["Employee ID"].dtype == expected_pd_dtype
assert pd_df["Employee ID"].to_list() == expected_data
pl_df = sheet.to_polars()
assert pl_df["Employee ID"].dtype == expected_pl_dtype
assert pl_df["Employee ID"].to_list() == (expected_data if dtype != "duration" else [None] * 5)
@pytest.mark.parametrize(
"dtypes,expected,fastexcel_dtype,expected_pl_dtype",
[
(None, datetime(2023, 7, 21), "datetime", pl.Datetime),
({"Date": "datetime"}, datetime(2023, 7, 21), "datetime", pl.Datetime),
({"Date": "date"}, date(2023, 7, 21), "date", pl.Date),
({"Date": "string"}, "2023-07-21 00:00:00", "string", pl.Utf8),
({2: "datetime"}, datetime(2023, 7, 21), "datetime", pl.Datetime),
({2: "date"}, date(2023, 7, 21), "date", pl.Date),
({2: "string"}, "2023-07-21 00:00:00", "string", pl.Utf8),
],
)
def test_sheet_datetime_conversion(
dtypes: fastexcel.DTypeMap | None,
expected: Any,
fastexcel_dtype: str,
expected_pl_dtype: pl.DataType,
) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
sheet = excel_reader.load_sheet(0, dtypes=dtypes)
assert sheet.specified_dtypes == dtypes
pd_df = sheet.to_pandas()
expected_pd_dtype = get_expected_pandas_dtype(fastexcel_dtype)
assert pd_df["Date"].dtype == expected_pd_dtype
assert pd_df["Date"].to_list() == [expected] * 9
pl_df = sheet.to_polars()
assert pl_df["Date"].dtype == expected_pl_dtype
assert pl_df["Date"].to_list() == [expected] * 9
@pytest.mark.parametrize("eager", [True, False])
@pytest.mark.parametrize("dtype_coercion", ["coerce", None])
def test_dtype_coercion_behavior__coerce(
dtype_coercion: Literal["coerce"] | None, eager: bool
) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
kwargs = {"dtype_coercion": dtype_coercion} if dtype_coercion else {}
sheet_or_rb = (
excel_reader.load_sheet(0, eager=eager, **kwargs) # type:ignore[call-overload]
)
rb = sheet_or_rb if eager else sheet_or_rb.to_arrow()
pd_df = rb.to_pandas()
expected_pd_dtype = get_expected_pandas_dtype("string")
assert pd_df["Mixed dates"].dtype == expected_pd_dtype
assert pd_df["Mixed dates"].to_list() == ["2023-07-21 00:00:00"] * 6 + ["July 23rd"] * 3
pl_df = pl.from_arrow(data=rb)
assert isinstance(pl_df, pl.DataFrame)
assert pl_df["Mixed dates"].dtype == pl.Utf8
assert pl_df["Mixed dates"].to_list() == ["2023-07-21 00:00:00"] * 6 + ["July 23rd"] * 3
@pytest.mark.parametrize("eager", [True, False])
def test_dtype_coercion_behavior__strict_sampling_eveything(eager: bool) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
with pytest.raises(
fastexcel.UnsupportedColumnTypeCombinationError, match="type coercion is strict"
):
if eager:
excel_reader.load_sheet_eager(0, dtype_coercion="strict")
else:
excel_reader.load_sheet(0, dtype_coercion="strict").to_arrow()
@pytest.mark.parametrize("eager", [True, False])
def test_dtype_coercion_behavior__strict_sampling_limit(eager: bool) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
sheet = (
excel_reader.load_sheet_eager(0, dtype_coercion="strict", schema_sample_rows=5)
if eager
else excel_reader.load_sheet(0, dtype_coercion="strict", schema_sample_rows=5).to_arrow()
)
pd_df = sheet.to_pandas()
assert pd_df["Mixed dates"].dtype == "datetime64[ms]"
assert (
pd_df["Mixed dates"].to_list() == [pd.Timestamp("2023-07-21 00:00:00")] * 6 + [pd.NaT] * 3
)
assert pd_df["Asset ID"].dtype == "float64"
assert pd_df["Asset ID"].replace(np.nan, None).to_list() == [84444.0] * 7 + [None] * 2
pl_df = pl.from_arrow(data=sheet)
assert isinstance(pl_df, pl.DataFrame)
assert pl_df["Mixed dates"].dtype == pl.Datetime
assert pl_df["Mixed dates"].to_list() == [datetime(2023, 7, 21)] * 6 + [None] * 3
assert pl_df["Asset ID"].dtype == pl.Float64
assert pl_df["Asset ID"].to_list() == [84444.0] * 7 + [None] * 2
def test_one_dtype_for_all() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-dtypes-columns.xlsx"))
sheet = excel_reader.load_sheet(0, dtypes="string")
assert sheet.available_columns() == [
fastexcel.ColumnInfo(
name="Employee ID",
index=0,
absolute_index=0,
dtype="string",
dtype_from="provided_for_all",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Employee Name",
index=1,
absolute_index=1,
dtype="string",
dtype_from="provided_for_all",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Date",
index=2,
absolute_index=2,
dtype="string",
dtype_from="provided_for_all",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Details",
index=3,
absolute_index=3,
dtype="string",
dtype_from="provided_for_all",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Asset ID",
index=4,
absolute_index=4,
dtype="string",
dtype_from="provided_for_all",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Mixed dates",
index=5,
absolute_index=5,
dtype="string",
dtype_from="provided_for_all",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Mixed bools",
index=6,
absolute_index=6,
dtype="string",
dtype_from="provided_for_all",
column_name_from="looked_up",
),
]
assert sheet.to_polars().dtypes == [pl.String] * 7
def test_fallback_infer_dtypes(caplog: pytest.LogCaptureFixture) -> None:
"""it should fallback to string if it can't infer the dtype"""
excel_reader = fastexcel.read_excel(path_for_fixture("infer-dtypes-fallback.xlsx"))
sheet = excel_reader.load_sheet(0)
# Ensure a warning message was logged to explain the fallback to string
assert caplog.record_tuples == [
(
"fastexcel.types.dtype",
logging.WARNING,
"Could not determine dtype for column 1, falling back to string",
)
]
assert sheet.available_columns() == [
fastexcel.ColumnInfo(
name="id",
index=0,
absolute_index=0,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="label",
index=1,
absolute_index=1,
dtype="string",
dtype_from="guessed",
column_name_from="looked_up",
),
]
assert sheet.to_polars().dtypes == [pl.Float64, pl.String]
@pytest.mark.parametrize(
("dtype", "expected_data"),
[
(
"int",
[None] * 2
+ [-1.0, 0.0, 1.0, 0.0, 1.0, 1.0, -1.0, 0.0, 1.0, None, 1.0, 0.0]
+ [None] * 7
+ [0.0],
),
(
"float",
[None] * 2
+ [-1.0, 0.0, 1.0, 0.0, 1.0, 1.1, -1.0, 0.0, 1.0, 1.1, 1.0, 0.0]
+ [None] * 7
+ [0.1],
),
(
"string",
[
None,
"foo",
"-1",
"0",
"1",
"0",
"1",
"1.1",
"-1",
"0",
"1",
"1.1",
"true",
"false",
"2023-07-21 00:00:00",
"2023-07-21 12:20:00",
# calamine reads a time as datetimes here, which seems wrong
"1899-12-31 12:20:00",
"07/21/2023",
"7/21/2023 12:20:00 PM",
"July 23rd",
"12:20:00",
"0.1",
],
),
(
"boolean",
[None] * 2
+ [True, False, True, False, True, True]
+ [None] * 4
+ [True, False]
+ [None] * 7
+ [True],
),
(
"datetime",
[pd.NaT] * 2
+ [
pd.Timestamp("1899-12-30 00:00:00"),
pd.Timestamp("1899-12-31 00:00:00"),
pd.Timestamp("1900-01-01 00:00:00"),
pd.Timestamp("1899-12-31 00:00:00"),
pd.Timestamp("1900-01-01 00:00:00"),
pd.Timestamp("1900-01-01 02:24:00"),
]
+ [pd.NaT] * 6
+ [
pd.Timestamp("2023-7-21 00:00:00"),
pd.Timestamp("2023-7-21 12:20:00"),
# calamine currently adds a date to a time, which is
# questionable
pd.Timestamp("1899-12-31 12:20:00"),
]
+ [pd.NaT] * 4
+ [
# calamine converts percentages to datetimes (since it does not
# distinguish from floats), which seems questionable
pd.Timestamp("1899-12-31 02:24:00")
],
),
(
"date",
[None] * 2
+ [
pd.Timestamp("1899-12-30").date(),
pd.Timestamp("1899-12-31").date(),
pd.Timestamp("1900-01-01").date(),
pd.Timestamp("1899-12-31").date(),
pd.Timestamp("1900-01-01").date(),
pd.Timestamp("1900-01-01").date(),
]
+ [None] * 6
+ [
pd.Timestamp("2023-7-21").date(),
pd.Timestamp("2023-7-21").date(),
# calamine converts any time to 1899-12-31, which is
# questionable
pd.Timestamp("1899-12-31").date(),
]
+ [None] * 4
+ [
# calamine converts percentages to dates (since it does not
# distinguish from floats), which seems questionable
pd.Timestamp("1899-12-31").date()
],
),
(
"duration",
[pd.NaT] * 14
+ [
# dates/datetimes are converted to durations, which seems
# questionable
pd.Timedelta(datetime(2023, 7, 21 + 1) - datetime(1899, 12, 31)),
pd.Timedelta(datetime(2023, 7, 21 + 1, 12, 20, 0) - datetime(1899, 12, 31)),
pd.Timedelta(hours=12, minutes=20),
]
+ [pd.NaT] * 5,
),
],
)
def test_to_arrow_with_errors(
dtype: fastexcel.DType,
expected_data: list[Any],
):
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-type-errors.xlsx"))
rb, cell_errors = excel_reader.load_sheet(0, dtypes={"Column": dtype}).to_arrow_with_errors()
pd_df = rb.to_pandas()
# For string columns in pandas 3, replace pd.NA with None for comparison
if dtype == "string":
column_values = pd_df["Column"].replace([np.nan, pd.NA], None).to_list()
else:
column_values = pd_df["Column"].replace(np.nan, None).to_list()
assert column_values == expected_data
def item_to_polars(item: Any):
if isinstance(item, pd.Timestamp):
return item.to_pydatetime()
if pd.isna(item):
return None
return item
pl_df = pl.from_arrow(rb)
assert isinstance(pl_df, pl.DataFrame)
pl_expected_data = list(map(item_to_polars, expected_data))
assert pl_df["Column"].to_list() == pl_expected_data
# the only empty cell is (0, 0), so all other cells that were read as None
# should be errors
expected_error_positions = [
(i, 0) for i in range(1, len(expected_data)) if expected_data[i] in {None, pd.NaT}
]
if expected_error_positions:
assert cell_errors is not None
error_positions = [err.offset_position for err in cell_errors.errors]
assert error_positions == expected_error_positions
def test_guess_dtypes_with_div0_error() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("div0.xlsx"))
sheet = excel_reader.load_sheet(0)
assert sheet.available_columns() == [
fastexcel.ColumnInfo(
name="dividend",
index=0,
absolute_index=0,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="divisor",
index=1,
absolute_index=1,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="quotient",
index=2,
absolute_index=2,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
]
expected_data = {
"dividend": [42.0, 43.0, 44.0, 45.0],
"divisor": [0.0, 1.0, 2.0, 3.0],
"quotient": [None, 43.0, 22.0, 15.0],
}
pd_df = sheet.to_pandas()
pd_expected_data = pd.DataFrame(expected_data)
pd_assert_frame_equal(pd_df, pd_expected_data)
pl_df = sheet.to_polars()
pl_expected_data = pl.DataFrame(expected_data)
pl_assert_frame_equal(pl_df, pl_expected_data)
================================================
FILE: python/tests/test_durations.py
================================================
from __future__ import annotations
from datetime import date, datetime, timedelta
import fastexcel
import numpy as np
import pandas as pd
import polars as pl
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.datatypes import DataType as PolarsDataType
from polars.datatypes import Date as PlDate
from polars.datatypes import Datetime as PlDateTime
from polars.datatypes import Duration as PlDuration
from polars.datatypes import Utf8 as PlUtf8
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from .utils import get_expected_pandas_dtype, path_for_fixture
def test_sheet_with_different_time_types() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("dates.ods"))
sheet = excel_reader.load_sheet_by_idx(0)
pd_df = sheet.to_pandas()
pl_df = sheet.to_polars()
## dtypes
assert pd_df["date"].dtype == np.dtype("object")
assert pd_df["datestr"].dtype == get_expected_pandas_dtype("string")
assert pd_df["time"].dtype == np.dtype("timedelta64[ms]")
assert pd_df["datetime"].dtype == np.dtype("datetime64[ms]")
expected_pl_dtypes: dict[str, PolarsDataType] = {
"date": PlDate(),
"datestr": PlUtf8(),
"time": PlDuration(time_unit="ms"),
"datetime": PlDateTime(time_unit="ms", time_zone=None),
}
assert dict(zip(pl_df.columns, pl_df.dtypes)) == expected_pl_dtypes
## Contents
expected_pd = pd.DataFrame(
{
"date": [date(2023, 6, 1)],
"datestr": ["2023-06-01T02:03:04+02:00"],
"time": pd.Series([pd.to_timedelta("01:02:03")]).astype("timedelta64[ms]"),
"datetime": pd.Series([pd.to_datetime("2023-06-01 02:03:04")]).astype("datetime64[ms]"),
}
)
expected_pl = pl.DataFrame(
{
"date": [date(2023, 6, 1)],
"datestr": ["2023-06-01T02:03:04+02:00"],
"time": [timedelta(hours=1, minutes=2, seconds=3)],
"datetime": [datetime(2023, 6, 1, 2, 3, 4)],
},
schema=expected_pl_dtypes,
)
pd_assert_frame_equal(pd_df, expected_pd)
pl_assert_frame_equal(pl_df, expected_pl)
def test_sheet_with_offset_header_row_and_durations() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("single-sheet-skip-rows-durations.xlsx"))
sheet = excel_reader.load_sheet(0, header_row=10)
pd_df = sheet.to_pandas()
pl_df = sheet.to_polars()
assert pd_df["Tot. Time Away From System"].dtype == np.dtype("timedelta64[ms]")
assert pd_df["Tot. Time Away From System"].tolist() == [
pd.Timedelta("01:18:43"),
pd.Timedelta("07:16:51"),
]
assert pl_df["Tot. Time Away From System"].dtype == pl.Duration(time_unit="ms")
assert pl_df["Tot. Time Away From System"].to_list() == [
timedelta(hours=1, minutes=18, seconds=43),
timedelta(hours=7, minutes=16, seconds=51),
]
================================================
FILE: python/tests/test_eagerness.py
================================================
from datetime import date, datetime, timedelta
import fastexcel
import polars as pl
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from pyarrow import RecordBatch
from .utils import path_for_fixture
def test_load_sheet_eager_single_sheet() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
eager_pandas = excel_reader.load_sheet_eager(0).to_pandas()
lazy_pandas = excel_reader.load_sheet(0).to_pandas()
pd_assert_frame_equal(eager_pandas, lazy_pandas)
eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager(0))
assert isinstance(eager_polars, pl.DataFrame)
lazy_polars = excel_reader.load_sheet(0).to_polars()
pl_assert_frame_equal(eager_polars, lazy_polars)
def test_multiple_sheets_with_unnamed_columns():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))
eager_pandas = excel_reader.load_sheet_eager("With unnamed columns").to_pandas()
lazy_pandas = excel_reader.load_sheet("With unnamed columns").to_pandas()
pd_assert_frame_equal(eager_pandas, lazy_pandas)
eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager("With unnamed columns"))
assert isinstance(eager_polars, pl.DataFrame)
lazy_polars = excel_reader.load_sheet("With unnamed columns").to_polars()
pl_assert_frame_equal(eager_polars, lazy_polars)
def test_eager_with_an_ods_file_should_return_a_recordbatch() -> None:
ods_reader = fastexcel.read_excel(path_for_fixture("dates.ods"))
record_batch = ods_reader.load_sheet_eager(0)
assert isinstance(record_batch, RecordBatch)
pl_df = pl.from_arrow(record_batch)
assert isinstance(pl_df, pl.DataFrame)
pl_assert_frame_equal(
pl_df,
pl.DataFrame(
{
"date": [date(2023, 6, 1)],
"datestr": ["2023-06-01T02:03:04+02:00"],
"time": [timedelta(hours=1, minutes=2, seconds=3)],
"datetime": [datetime(2023, 6, 1, 2, 3, 4)],
}
).with_columns(*(pl.col(col).dt.cast_time_unit("ms") for col in ("datetime", "time"))),
)
================================================
FILE: python/tests/test_empty.py
================================================
import fastexcel
import pytest
from .utils import path_for_fixture
@pytest.mark.parametrize("path", ("empty.ods", "empty.xlsx"))
def test_empty(path: str) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture(path))
sheet = excel_reader.load_sheet_by_idx(0)
assert sheet.to_pandas().empty
assert sheet.to_polars().is_empty()
================================================
FILE: python/tests/test_errors.py
================================================
from __future__ import annotations
import fastexcel
import pytest
from .utils import path_for_fixture
def test_cell_error_repr() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-type-errors.xlsx"))
_, cell_errors = excel_reader.load_sheet(0, dtypes={"Column": "int"}).to_arrow_with_errors()
assert cell_errors is not None
assert (
repr(cell_errors.errors[0])
== """CellError(position=(2, 0), offset_position=(1, 0), row_offset=1, detail="Expected int but got 'String(\\"foo\\")'")""" # noqa: E501
)
def test_read_excel_bad_type() -> None:
expected_message = "source must be a string or bytes"
with pytest.raises(fastexcel.InvalidParametersError, match=expected_message):
fastexcel.read_excel(42) # type: ignore[arg-type]
def test_does_not_exist() -> None:
expected_message = """calamine error: Cannot detect file format
Context:
0: Could not open workbook at path_does_not_exist.nope
1: could not load excel file at path_does_not_exist.nope"""
with pytest.raises(fastexcel.CalamineError, match=expected_message) as exc_info:
fastexcel.read_excel("path_does_not_exist.nope")
assert exc_info.value.__doc__ == "Generic calamine error"
# Should also work with the base error type
with pytest.raises(fastexcel.FastExcelError, match=expected_message):
fastexcel.read_excel("path_does_not_exist.nope")
def test_sheet_idx_not_found_error() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
expected_message = """sheet at index 42 not found
Context:
0: Sheet index 42 is out of range. File has 1 sheets."""
with pytest.raises(fastexcel.SheetNotFoundError, match=expected_message) as exc_info:
excel_reader.load_sheet(42)
assert exc_info.value.__doc__ == "Sheet was not found"
# Should also work with the base error type
with pytest.raises(fastexcel.FastExcelError, match=expected_message):
excel_reader.load_sheet(42)
def test_sheet_name_not_found_error() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
expected_message = """sheet with name "idontexist" not found
Context:
0: Sheet "idontexist" not found in file. Available sheets: "January"."""
with pytest.raises(fastexcel.SheetNotFoundError, match=expected_message) as exc_info:
excel_reader.load_sheet("idontexist")
assert exc_info.value.__doc__ == "Sheet was not found"
@pytest.mark.parametrize(
"exc_class, expected_docstring",
[
(fastexcel.FastExcelError, "The base class for all fastexcel errors"),
(
fastexcel.UnsupportedColumnTypeCombinationError,
"Column contains an unsupported type combination",
),
(fastexcel.CannotRetrieveCellDataError, "Data for a given cell cannot be retrieved"),
(
fastexcel.CalamineCellError,
"calamine returned an error regarding the content of the cell",
),
(fastexcel.CalamineError, "Generic calamine error"),
(fastexcel.ColumnNotFoundError, "Column was not found"),
(fastexcel.SheetNotFoundError, "Sheet was not found"),
(fastexcel.ArrowError, "Generic arrow error"),
(fastexcel.InvalidParametersError, "Provided parameters are invalid"),
],
)
def test_docstrings(exc_class: type[Exception], expected_docstring: str) -> None:
assert exc_class.__doc__ == expected_docstring
def test_schema_sample_rows_must_be_nonzero() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
with pytest.raises(
fastexcel.InvalidParametersError,
match="schema_sample_rows cannot be 0, as it would prevent dtype inferring",
):
excel_reader.load_sheet(0, schema_sample_rows=0)
with pytest.raises(
fastexcel.InvalidParametersError,
match="schema_sample_rows cannot be 0, as it would prevent dtype inferring",
):
excel_reader.load_table("my-table", schema_sample_rows=0)
================================================
FILE: python/tests/test_fastexcel.py
================================================
from __future__ import annotations
from datetime import datetime
from typing import Any
import fastexcel
import pandas as pd
import polars as pl
import pytest
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from .utils import path_for_fixture
def test_single_sheet():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
assert excel_reader.sheet_names == ["January"]
sheet_by_name = excel_reader.load_sheet("January")
sheet_by_idx = excel_reader.load_sheet(0)
# Metadata
assert sheet_by_name.name == sheet_by_idx.name == "January"
assert sheet_by_name.height == sheet_by_idx.height == 2
assert sheet_by_name.width == sheet_by_idx.width == 2
expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
pd_expected = pd.DataFrame(expected)
pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)
pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)
pl_expected = pl.DataFrame(expected)
pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)
pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)
def test_single_sheet_bytes():
with open(path_for_fixture("fixture-single-sheet.xlsx"), "rb") as f:
excel_reader = fastexcel.read_excel(f.read())
assert excel_reader.sheet_names == ["January"]
sheet_by_name = excel_reader.load_sheet("January")
sheet_by_idx = excel_reader.load_sheet(0)
# Metadata
assert sheet_by_name.name == sheet_by_idx.name == "January"
assert sheet_by_name.height == sheet_by_idx.height == 2
assert sheet_by_name.width == sheet_by_idx.width == 2
expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
pd_expected = pd.DataFrame(expected)
pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)
pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)
pl_expected = pl.DataFrame(expected)
pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)
pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)
def test_single_sheet_with_types():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx"))
assert excel_reader.sheet_names == ["Sheet1"]
sheet = excel_reader.load_sheet(0)
assert sheet.name == "Sheet1"
assert sheet.height == sheet.total_height == 3
assert sheet.width == 4
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"__UNNAMED__0": [0.0, 1.0, 2.0],
"bools": [True, False, True],
"dates": pd.Series([pd.Timestamp("2022-03-02 05:43:04")] * 3).astype(
"datetime64[ms]"
),
"floats": [12.35, 42.69, 1234567],
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"__UNNAMED__0": [0.0, 1.0, 2.0],
"bools": [True, False, True],
"dates": ["2022-03-02 05:43:04"] * 3,
"floats": [12.35, 42.69, 1234567],
}
).with_columns(pl.col("dates").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")),
)
def test_multiple_sheets():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))
assert excel_reader.sheet_names == ["January", "February", "With unnamed columns"]
pd_assert_frame_equal(
excel_reader.load_sheet_by_idx(0).to_pandas(),
pd.DataFrame({"Month": [1.0], "Year": [2019.0]}),
)
pd_assert_frame_equal(
excel_reader.load_sheet_by_idx(1).to_pandas(),
pd.DataFrame({"Month": [2.0, 3.0, 4.0], "Year": [2019.0, 2021.0, 2022.0]}),
)
pd_assert_frame_equal(
excel_reader.load_sheet_by_name("With unnamed columns").to_pandas(),
pd.DataFrame(
{
"col1": [2.0, 3.0],
"__UNNAMED__1": [1.5, 2.5],
"col3": ["hello", "world"],
"__UNNAMED__3": [-5.0, -6.0],
"col5": ["a", "b"],
}
),
)
pl_assert_frame_equal(
excel_reader.load_sheet_by_idx(0).to_polars(),
pl.DataFrame({"Month": [1.0], "Year": [2019.0]}),
)
pl_assert_frame_equal(
excel_reader.load_sheet_by_idx(1).to_polars(),
pl.DataFrame({"Month": [2.0, 3.0, 4.0], "Year": [2019.0, 2021.0, 2022.0]}),
)
pl_assert_frame_equal(
excel_reader.load_sheet_by_name("With unnamed columns").to_polars(),
pl.DataFrame(
{
"col1": [2.0, 3.0],
"__UNNAMED__1": [1.5, 2.5],
"col3": ["hello", "world"],
"__UNNAMED__3": [-5.0, -6.0],
"col5": ["a", "b"],
}
),
)
def test_sheets_with_header_line_diff_from_zero():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-changing-header-location.xlsx"))
assert excel_reader.sheet_names == ["Sheet1", "Sheet2", "Sheet3"]
sheet_by_name = excel_reader.load_sheet("Sheet1", header_row=1)
sheet_by_idx = excel_reader.load_sheet(0, header_row=1)
# Metadata
assert sheet_by_name.name == sheet_by_idx.name == "Sheet1"
assert sheet_by_name.height == sheet_by_idx.height == 2
assert sheet_by_name.width == sheet_by_idx.width == 2
expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
pd_expected = pd.DataFrame(expected)
pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)
pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)
pl_expected = pl.DataFrame(expected)
pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)
pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)
def test_sheets_with_no_header():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-changing-header-location.xlsx"))
assert excel_reader.sheet_names == ["Sheet1", "Sheet2", "Sheet3"]
sheet_by_name = excel_reader.load_sheet("Sheet2", header_row=None)
sheet_by_idx = excel_reader.load_sheet(1, header_row=None)
# Metadata
assert sheet_by_name.name == sheet_by_idx.name == "Sheet2"
assert sheet_by_name.height == sheet_by_idx.height == 2
assert sheet_by_name.width == sheet_by_idx.width == 3
expected = {
"__UNNAMED__0": [1.0, 2.0],
"__UNNAMED__1": [3.0, 4.0],
"__UNNAMED__2": [5.0, 6.0],
}
pd_expected = pd.DataFrame(expected)
pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)
pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)
pl_expected = pl.DataFrame(expected)
pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)
pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)
def test_sheets_with_empty_rows_before_header():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-changing-header-location.xlsx"))
assert excel_reader.sheet_names == ["Sheet1", "Sheet2", "Sheet3"]
sheet_by_name = excel_reader.load_sheet("Sheet3")
sheet_by_idx = excel_reader.load_sheet(2)
# Metadata
assert sheet_by_name.name == sheet_by_idx.name == "Sheet3"
assert sheet_by_name.height == sheet_by_idx.height == 2
assert sheet_by_name.width == sheet_by_idx.width == 2
expected = {"Month": [1.0, 2.0], "Year": [2019.0, 2020.0]}
pd_expected = pd.DataFrame(expected)
pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)
pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)
pl_expected = pl.DataFrame(expected)
pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)
pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)
def test_sheets_with_custom_headers():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-changing-header-location.xlsx"))
assert excel_reader.sheet_names == ["Sheet1", "Sheet2", "Sheet3"]
sheet_by_name = excel_reader.load_sheet(
"Sheet2", header_row=None, column_names=["foo", "bar", "baz"]
)
sheet_by_idx = excel_reader.load_sheet(1, header_row=None, column_names=["foo", "bar", "baz"])
# Metadata
assert sheet_by_name.name == sheet_by_idx.name == "Sheet2"
assert sheet_by_name.height == sheet_by_idx.height == 2
assert sheet_by_name.width == sheet_by_idx.width == 3
expected = {"foo": [1.0, 2.0], "bar": [3.0, 4.0], "baz": [5.0, 6.0]}
pd_expected = pd.DataFrame(expected)
pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)
pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)
pl_expected = pl.DataFrame(expected)
pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)
pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)
def test_sheets_with_skipping_headers():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-changing-header-location.xlsx"))
assert excel_reader.sheet_names == ["Sheet1", "Sheet2", "Sheet3"]
sheet_by_name = excel_reader.load_sheet("Sheet2", header_row=None, column_names=["Bugs"])
sheet_by_idx = excel_reader.load_sheet(1, header_row=None, column_names=["Bugs"])
# Metadata
assert sheet_by_name.name == sheet_by_idx.name == "Sheet2"
assert sheet_by_name.height == sheet_by_idx.height == 2
assert sheet_by_name.width == sheet_by_idx.width == 3
expected = {
"Bugs": [1.0, 2.0],
"__UNNAMED__1": [3.0, 4.0],
"__UNNAMED__2": [5.0, 6.0],
}
pd_expected = pd.DataFrame(expected)
pd_assert_frame_equal(sheet_by_name.to_pandas(), pd_expected)
pd_assert_frame_equal(sheet_by_idx.to_pandas(), pd_expected)
pl_expected = pl.DataFrame(expected)
pl_assert_frame_equal(sheet_by_name.to_polars(), pl_expected)
pl_assert_frame_equal(sheet_by_idx.to_polars(), pl_expected)
def test_sheet_with_pagination():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx"))
assert excel_reader.sheet_names == ["Sheet1"]
sheet = excel_reader.load_sheet(0, skip_rows=1, n_rows=1)
assert sheet.name == "Sheet1"
assert sheet.height == 1
assert sheet.total_height == 3
assert sheet.width == 4
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"__UNNAMED__0": [1.0],
"bools": [False],
"dates": pd.Series([pd.Timestamp("2022-03-02 05:43:04")]).astype("datetime64[ms]"),
"floats": [42.69],
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"__UNNAMED__0": [1.0],
"bools": [False],
"dates": ["2022-03-02 05:43:04"],
"floats": [42.69],
}
).with_columns(pl.col("dates").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")),
)
def test_sheet_with_skip_rows():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx"))
assert excel_reader.sheet_names == ["Sheet1"]
sheet = excel_reader.load_sheet(0, skip_rows=1)
assert sheet.name == "Sheet1"
assert sheet.height == 2
assert sheet.width == 4
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"__UNNAMED__0": [1.0, 2.0],
"bools": [False, True],
"dates": pd.Series([pd.Timestamp("2022-03-02 05:43:04")] * 2).astype(
"datetime64[ms]"
),
"floats": [42.69, 1234567],
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"__UNNAMED__0": [1.0, 2.0],
"bools": [False, True],
"dates": ["2022-03-02 05:43:04"] * 2,
"floats": [42.69, 1234567],
}
).with_columns(pl.col("dates").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")),
)
def test_sheet_with_n_rows():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx"))
assert excel_reader.sheet_names == ["Sheet1"]
sheet = excel_reader.load_sheet(0, n_rows=1)
assert sheet.name == "Sheet1"
assert sheet.height == 1
assert sheet.width == 4
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"__UNNAMED__0": [0.0],
"bools": [True],
"dates": pd.Series([pd.Timestamp("2022-03-02 05:43:04")]).astype("datetime64[ms]"),
"floats": [12.35],
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"__UNNAMED__0": [0.0],
"bools": [True],
"dates": ["2022-03-02 05:43:04"],
"floats": [12.35],
}
).with_columns(pl.col("dates").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")),
)
def test_sheet_with_pagination_and_without_headers():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx"))
assert excel_reader.sheet_names == ["Sheet1"]
sheet = excel_reader.load_sheet(
0,
n_rows=1,
skip_rows=1,
header_row=None,
column_names=["This", "Is", "Amazing", "Stuff"],
)
assert sheet.name == "Sheet1"
assert sheet.height == 1
assert sheet.width == 4
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"This": [0.0],
"Is": [True],
"Amazing": pd.Series([pd.Timestamp("2022-03-02 05:43:04")]).astype(
"datetime64[ms]"
),
"Stuff": [12.35],
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"This": [0.0],
"Is": [True],
"Amazing": ["2022-03-02 05:43:04"],
"Stuff": [12.35],
}
).with_columns(
pl.col("Amazing").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")
),
)
def test_sheet_with_pagination_out_of_bound():
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet-with-types.xlsx"))
assert excel_reader.sheet_names == ["Sheet1"]
with pytest.raises(
fastexcel.InvalidParametersError, match="Too many rows skipped. Max height is 4"
):
excel_reader.load_sheet(
0,
skip_rows=1000000,
header_row=None,
column_names=["This", "Is", "Amazing", "Stuff"],
)
sheet = excel_reader.load_sheet(
0,
n_rows=1000000,
skip_rows=1,
header_row=None,
column_names=["This", "Is", "Amazing", "Stuff"],
)
assert sheet.name == "Sheet1"
assert sheet.height == 3
assert sheet.width == 4
pd_assert_frame_equal(
sheet.to_pandas(),
pd.DataFrame(
{
"This": [0.0, 1.0, 2.0],
"Is": [True, False, True],
"Amazing": pd.Series([pd.Timestamp("2022-03-02 05:43:04")] * 3).astype(
"datetime64[ms]"
),
"Stuff": [12.35, 42.69, 1234567],
}
),
)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(
{
"This": [0.0, 1.0, 2.0],
"Is": [True, False, True],
"Amazing": ["2022-03-02 05:43:04"] * 3,
"Stuff": [12.35, 42.69, 1234567],
}
).with_columns(
pl.col("Amazing").str.strptime(pl.Datetime, "%F %T").dt.cast_time_unit("ms")
),
)
def test_sheet_with_na():
"""Test reading a sheet with #N/A cells. For now, we consider them as null"""
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-with-na.xlsx"))
sheet = excel_reader.load_sheet(0)
assert sheet.name == "Sheet1"
assert sheet.height == sheet.total_height == 2
assert sheet.width == 2
expected = {
"Title": ["A", "B"],
"Amount": [None, 100.0],
}
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_sheet_with_ref():
"""Test reading a sheet with #REF! cells. For now, we consider them as null"""
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-with-na.xlsx"))
sheet = excel_reader.load_sheet("Broken refs")
assert sheet.name == "Broken refs"
assert sheet.height == sheet.total_height == 2
assert sheet.width == 1
expected = {"numbers": [1.0, None]}
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
@pytest.mark.parametrize("excel_file", ["sheet-null-strings.xlsx", "sheet-null-strings-empty.xlsx"])
def test_null_strings(excel_file: str, expected_data_sheet_null_strings: dict[str, list[Any]]):
excel_reader = fastexcel.read_excel(path_for_fixture(excel_file))
sheet = excel_reader.load_sheet(0)
assert sheet.height == sheet.total_height == 10
assert sheet.width == 6
pd_df = pd.DataFrame(expected_data_sheet_null_strings)
pd_df["DATES_AND_NULLS"] = pd_df["DATES_AND_NULLS"].dt.as_unit("ms")
pd_df["TIMESTAMPS_AND_NULLS"] = pd_df["TIMESTAMPS_AND_NULLS"].dt.as_unit("ms")
pd_assert_frame_equal(sheet.to_pandas(), pd_df)
pl_df = pl.DataFrame(expected_data_sheet_null_strings).with_columns(
pl.col("DATES_AND_NULLS").dt.cast_time_unit("ms"),
pl.col("TIMESTAMPS_AND_NULLS").dt.cast_time_unit("ms"),
)
pl_assert_frame_equal(sheet.to_polars(), pl_df)
def test_null_values_in_cells() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-invalid-cell-value.xlsx"))
sheet = excel_reader.load_sheet(0)
expected = {
"Title": ["A", "B", "C", "D"],
"Date": [None, None, datetime(2021, 1, 1), datetime(2021, 5, 5)],
}
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame(expected).with_columns(pl.col("Date").dt.cast_time_unit("ms")),
)
pd_expected = pd.DataFrame(expected)
pd_expected["Date"] = pd_expected["Date"].dt.as_unit("ms")
pd_assert_frame_equal(sheet.to_pandas(), pd_expected)
def test_invalid_value_num() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-invalid-cell-value-num.xlsx"))
sheet = excel_reader.load_sheet(0)
expected = {"Column": [8.0, None]}
pd_assert_frame_equal(sheet.to_pandas(), pd.DataFrame(expected))
pl_assert_frame_equal(sheet.to_polars(), pl.DataFrame(expected))
def test_null_column_is_nullable() -> None:
sheet = fastexcel.read_excel(path_for_fixture("null-column.xlsx")).load_sheet(0)
assert sheet.to_arrow().schema.field("nullonly").nullable is True
def test_sheet_with_decimal_numbers() -> None:
sheet = fastexcel.read_excel(path_for_fixture("decimal-numbers.xlsx")).load_sheet(0)
pl_assert_frame_equal(
sheet.to_polars(),
pl.DataFrame({"Decimals": [28.14, 29.02]}),
)
sheet2 = fastexcel.read_excel(path_for_fixture("decimal-numbers.xlsx")).load_sheet(
0, dtypes={0: "string"}
)
pl_assert_frame_equal(
sheet2.to_polars(),
pl.DataFrame({"Decimals": ["28.14", "29.02"]}),
)
@pytest.mark.parametrize(
"header_row, skip_rows, expected",
[
(0, None, {"a": ["b", "c", "d", "e", "f"], "0": [1.0, 2.0, 3.0, 4.0, 5.0]}), # default
(
None,
0,
{
"__UNNAMED__0": [None, None, "a", "b", "c", "d", "e", "f"],
"__UNNAMED__1": [None, None, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
},
),
(
None,
None,
{
"__UNNAMED__0": ["a", "b", "c", "d", "e", "f"],
"__UNNAMED__1": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
},
),
(
0,
0,
{
"__UNNAMED__0": [None, "a", "b", "c", "d", "e", "f"],
"__UNNAMED__1": [None, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
},
),
(
0,
1,
{
"__UNNAMED__0": ["a", "b", "c", "d", "e", "f"],
"__UNNAMED__1": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
},
),
(
None,
2,
{
"__UNNAMED__0": ["a", "b", "c", "d", "e", "f"],
"__UNNAMED__1": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
},
),
(
None,
3,
{"__UNNAMED__0": ["b", "c", "d", "e", "f"], "__UNNAMED__1": [1.0, 2.0, 3.0, 4.0, 5.0]},
),
(
1,
0,
{
"__UNNAMED__0": ["a", "b", "c", "d", "e", "f"],
"__UNNAMED__1": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
},
),
(2, 0, {"a": ["b", "c", "d", "e", "f"], "0": [1.0, 2.0, 3.0, 4.0, 5.0]}),
(2, None, {"a": ["b", "c", "d", "e", "f"], "0": [1.0, 2.0, 3.0, 4.0, 5.0]}),
(2, 1, {"a": ["c", "d", "e", "f"], "0": [2.0, 3.0, 4.0, 5.0]}),
(2, [1, 3], {"a": ["b", "d", "f"], "0": [1.0, 3.0, 5.0]}),
(2, [0], {"a": ["c", "d", "e", "f"], "0": [2.0, 3.0, 4.0, 5.0]}),
(
None,
[2, 4],
{
"__UNNAMED__0": [None, None, "b", "d", "e", "f"],
"__UNNAMED__1": [None, None, 1.0, 3.0, 4.0, 5.0],
},
),
(2, [], {"a": ["b", "c", "d", "e", "f"], "0": [1.0, 2.0, 3.0, 4.0, 5.0]}),
(2, [0, 1, 2, 3], {"a": ["f"], "0": [5.0]}),
(2, lambda x: x % 2 == 0, {"a": ["c", "e"], "0": [2.0, 4.0]}),
(2, lambda x: x in [0, 4], {"a": ["c", "d", "e"], "0": [2.0, 3.0, 4.0]}),
(2, lambda x: False, {"a": ["b", "c", "d", "e", "f"], "0": [1.0, 2.0, 3.0, 4.0, 5.0]}),
(2, lambda x: x != 2, {"a": ["d"], "0": [3.0]}),
],
)
def test_header_row_and_skip_rows(
header_row: int | None, skip_rows: int, expected: dict[str, Any]
) -> None:
pl_assert_frame_equal(
fastexcel.read_excel(path_for_fixture("no-header.xlsx"))
.load_sheet(0, header_row=header_row, skip_rows=skip_rows)
.to_polars(),
pl.DataFrame(expected),
)
def test_null_bytes_in_column_names() -> None:
"""https://github.com/ToucanToco/fastexcel/issues/343"""
reader = fastexcel.read_excel(path_for_fixture("null-bytes-in-columns-names.xls"))
df = reader.load_sheet(0).to_polars()
assert df.shape == (8_763, 11)
================================================
FILE: python/tests/test_pycapsule.py
================================================
"""Tests for the Arrow PyCapsule Interface implementation."""
import fastexcel
import pandas as pd
import polars as pl
from .utils import path_for_fixture
def test_sheet_arrow_c_schema():
"""Test that __arrow_c_schema__ returns a valid PyCapsule."""
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
sheet = excel_reader.load_sheet("January")
schema_capsule = sheet.__arrow_c_schema__()
# Check it's a PyCapsule with the correct name
assert hasattr(schema_capsule, "__class__")
assert "PyCapsule" in str(type(schema_capsule))
def test_sheet_arrow_c_array():
"""Test that __arrow_c_array__ returns a tuple of PyCapsules."""
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
sheet = excel_reader.load_sheet("January")
schema_capsule, array_capsule = sheet.__arrow_c_array__()
# Check both are PyCapsules
assert "PyCapsule" in str(type(schema_capsule))
assert "PyCapsule" in str(type(array_capsule))
def test_table_arrow_c_schema():
"""Test that table __arrow_c_schema__ returns a valid PyCapsule."""
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-with-tables.xlsx"))
table_names = excel_reader.table_names()
table = excel_reader.load_table(table_names[0]) # Should be 'users'
schema_capsule = table.__arrow_c_schema__()
# Check it's a PyCapsule
assert "PyCapsule" in str(type(schema_capsule))
def test_table_arrow_c_array():
"""Test that table __arrow_c_array__ returns a tuple of PyCapsules."""
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-with-tables.xlsx"))
table_names = excel_reader.table_names()
table = excel_reader.load_table(table_names[0]) # Should be 'users'
schema_capsule, array_capsule = table.__arrow_c_array__()
# Check both are PyCapsules
assert "PyCapsule" in str(type(schema_capsule))
assert "PyCapsule" in str(type(array_capsule))
def test_pycapsule_interface_with_requested_schema():
"""Test PyCapsule interface methods with requested_schema parameter."""
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
sheet = excel_reader.load_sheet("January")
# Test with None (current implementation ignores this)
schema_capsule, array_capsule = sheet.__arrow_c_array__(None)
assert "PyCapsule" in str(type(schema_capsule))
assert "PyCapsule" in str(type(array_capsule))
def test_integration_with_polars():
"""Test that polars can consume our PyCapsule interface."""
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
sheet = excel_reader.load_sheet("January")
# Polars should be able to create a DataFrame from our PyCapsule interface
# This tests the actual interoperability
df = pl.DataFrame(sheet)
assert len(df) == 2
assert df.columns == ["Month", "Year"]
def test_to_polars_without_pyarrow():
"""Test that to_polars() works via PyCapsule interface without pyarrow."""
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
sheet = excel_reader.load_sheet("January")
# This should work via PyCapsule interface, not requiring pyarrow
df = sheet.to_polars()
assert isinstance(df, pl.DataFrame)
assert len(df) == 2
assert df.columns == ["Month", "Year"]
# Test with table as well
excel_reader_table = fastexcel.read_excel(path_for_fixture("sheet-with-tables.xlsx"))
table_names = excel_reader_table.table_names()
table = excel_reader_table.load_table(table_names[0])
df_table = table.to_polars()
assert isinstance(df_table, pl.DataFrame)
def test_to_pandas_still_requires_pyarrow():
"""Test that to_pandas() currently still requires pyarrow.
Note: pandas PyCapsule interface would require implementing __dataframe__
or __arrow_c_stream__, which we don't currently do.
"""
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
sheet = excel_reader.load_sheet("January")
# This still requires pyarrow for now
df = sheet.to_pandas()
assert isinstance(df, pd.DataFrame)
assert len(df) == 2
assert list(df.columns) == ["Month", "Year"]
# Test with table as well
excel_reader_table = fastexcel.read_excel(path_for_fixture("sheet-with-tables.xlsx"))
table_names = excel_reader_table.table_names()
table = excel_reader_table.load_table(table_names[0])
df_table = table.to_pandas()
assert isinstance(df_table, pd.DataFrame)
================================================
FILE: python/tests/test_sheet_visibility.py
================================================
import fastexcel
from .utils import path_for_fixture
def test_sheet_visibilities() -> None:
file_path = path_for_fixture("fixture-sheets-different-visibilities.xlsx")
reader = fastexcel.read_excel(file_path)
assert reader.load_sheet(0).visible == "visible"
assert reader.load_sheet(1).visible == "hidden"
assert reader.load_sheet(2).visible == "veryhidden"
================================================
FILE: python/tests/test_shifted_data.py
================================================
import fastexcel
from .utils import path_for_fixture
def test_sheet_with_offset():
reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
sheet = reader.load_sheet("without-table")
assert sheet.available_columns() == [
fastexcel.ColumnInfo(
name="Column at H10",
index=0,
absolute_index=7,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="Column at I10",
index=1,
absolute_index=8,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
fastexcel.ColumnInfo(
name="__UNNAMED__2",
index=2,
absolute_index=9,
dtype="string",
dtype_from="guessed",
column_name_from="generated",
),
fastexcel.ColumnInfo(
name="Column at K10",
index=3,
absolute_index=10,
dtype="float",
dtype_from="guessed",
column_name_from="looked_up",
),
]
def test_table_with_offset():
reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-offset.xlsx"))
table = reader.load_table("TableAtD5")
assert table.available_columns() == [
fastexcel.ColumnInfo(
name="Column at D5",
index=0,
absolute_index=3,
dtype="float",
dtype_from="guessed",
column_name_from="provided",
),
fastexcel.ColumnInfo(
name="Column at E5",
index=1,
absolute_index=4,
dtype="float",
dtype_from="guessed",
column_name_from="provided",
),
]
================================================
FILE: python/tests/test_tables.py
================================================
from datetime import datetime
import fastexcel
import pandas as pd
import polars as pl
import pytest
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from .utils import path_for_fixture
@pytest.mark.parametrize("path", ("sheet-with-tables.xlsx",))
def test_table_names(path: str) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture(path))
table_names = excel_reader.table_names()
assert table_names == ["users"]
@pytest.mark.parametrize("path", ("sheet-with-tables.xlsx",))
def test_table_names_with_sheet_name(path: str) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture(path))
table_names = excel_reader.table_names("sheet1")
assert table_names == ["users"]
table_names = excel_reader.table_names("sheet2")
assert table_names == []
@pytest.mark.parametrize("path", ("sheet-with-tables.xlsx",))
def test_load_table(path: str) -> None:
excel_reader = fastexcel.read_excel(path_for_fixture(path))
users_tbl = excel_reader.load_table("users")
assert users_tbl.name == "users"
assert users_tbl.sheet_name == "sheet1"
assert users_tbl.specified_dtypes is None
assert users_tbl.available_columns() == [
fastexcel.ColumnInfo(
name="User Id",
index=0,
absolute_index=0,
dtype="float",
dtype_from="guessed",
column_name_from="provided",
),
fastexcel.ColumnInfo(
name="FirstName",
index=1,
absolute_index=1,
dtype="string",
dtype_from="guessed",
column_name_from="provided",
),
fastexcel.ColumnInfo(
name="LastName",
index=2,
absolute_index=2,
dtype="string",
dtype_from="guessed",
column_name_from="provided",
),
fastexcel.ColumnInfo(
name="Date",
index=3,
absolute_index=3,
dtype="datetime",
dtype_from="guessed",
column_name_from="provided",
),
]
assert users_tbl.total_height == 3
assert users_tbl.offset == 0
assert users_tbl.height == 3
assert users_tbl.width == 4
expected_pl = pl.DataFrame(
{
"User Id": [1.0, 2.0, 5.0],
"FirstName": ["Peter", "John", "Hans"],
"LastName": ["Müller", "Meier", "Fricker"],
"Date": [datetime(2020, 1, 1), datetime(2024, 5, 4), datetime(2025, 2, 1)],
}
).with_columns(pl.col("Date").dt.cast_time_unit("ms"))
pl_assert_frame_equal(users_tbl.to_polars(), expected_pl)
expected_pd = pd.DataFrame(
{
"User Id": [1.0, 2.0, 5.0],
"FirstName": ["Peter", "John", "Hans"],
"LastName": ["Müller", "Meier", "Fricker"],
"Date": pd.Series(
[datetime(2020, 1, 1), datetime(2024, 5, 4), datetime(2025, 2, 1)]
).astype("datetime64[ms]"),
}
)
pd_assert_frame_equal(users_tbl.to_pandas(), expected_pd)
table_eager = excel_reader.load_table("users", eager=True)
pl_df = pl.from_arrow(table_eager)
assert isinstance(pl_df, pl.DataFrame)
pl_assert_frame_equal(pl_df, expected_pl)
pd_assert_frame_equal(table_eager.to_pandas(), expected_pd)
================================================
FILE: python/tests/test_whitespace.py
================================================
import datetime
import fastexcel
import polars as pl
from pandas.testing import assert_frame_equal as pd_assert_frame_equal
from polars.testing import assert_frame_equal as pl_assert_frame_equal
from .utils import path_for_fixture
def test_skip_tail_whitespace_rows() -> None:
"""Test that skip_whitespace_tail_rows option works correctly."""
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-whitespace.xlsx"))
# Expected data when NOT skipping whitespace tail rows
expected_with_whitespace = pl.DataFrame(
{
"Column One": ["1", "2", "3", None, "5", None, None, None, None, " "],
"Column Two": ["one", "two", None, "four", "five", None, None, "", None, None],
"Column Three": [
datetime.datetime(2025, 11, 19, 14, 34, 2),
datetime.datetime(2025, 11, 20, 14, 56, 34),
datetime.datetime(2025, 11, 21, 15, 19, 6),
None,
datetime.datetime(2025, 11, 22, 15, 41, 38),
datetime.datetime(2025, 11, 23, 16, 4, 10),
None,
None,
None,
None,
],
}
).with_columns(pl.col("Column Three").dt.cast_time_unit("ms"))
# Expected data when skipping whitespace tail rows
expected_without_whitespace = pl.DataFrame(
{
"Column One": [1.0, 2.0, 3.0, None, 5.0, None],
"Column Two": ["one", "two", None, "four", "five", None],
"Column Three": [
datetime.datetime(2025, 11, 19, 14, 34, 2),
datetime.datetime(2025, 11, 20, 14, 56, 34),
datetime.datetime(2025, 11, 21, 15, 19, 6),
None,
datetime.datetime(2025, 11, 22, 15, 41, 38),
datetime.datetime(2025, 11, 23, 16, 4, 10),
],
}
).with_columns(pl.col("Column Three").dt.cast_time_unit("ms"))
# Test sheet without skipping whitespace tail rows
sheet_with_whitespace = excel_reader.load_sheet("Without Table")
pl_assert_frame_equal(sheet_with_whitespace.to_polars(), expected_with_whitespace)
# Test table without skipping whitespace tail rows
table_with_whitespace = excel_reader.load_table("Table_with_whitespace")
pl_assert_frame_equal(table_with_whitespace.to_polars(), expected_with_whitespace)
# Test sheet with skipping whitespace tail rows
sheet_without_whitespace = excel_reader.load_sheet(
"Without Table", skip_whitespace_tail_rows=True
)
pl_assert_frame_equal(sheet_without_whitespace.to_polars(), expected_without_whitespace)
# Test table with skipping whitespace tail rows
table_without_whitespace = excel_reader.load_table(
"Table_with_whitespace", skip_whitespace_tail_rows=True
)
pl_assert_frame_equal(table_without_whitespace.to_polars(), expected_without_whitespace)
# Also verify pandas compatibility
pd_assert_frame_equal(
sheet_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()
)
pd_assert_frame_equal(
table_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()
)
def test_skip_tail_rows_and_whitespace_as_null_behavior() -> None:
excel_reader = fastexcel.read_excel(path_for_fixture("sheet-and-table-with-whitespace.xlsx"))
# Expected data when converting whitespace to null but not skipping tail rows
expected_with_whitespace_as_null = pl.DataFrame(
{
# All rows should be taken into account but the space in the last row should be
# considered null
"Column One": [1.0, 2.0, 3.0, None, 5.0, None, None, None, None, None],
# All rows should be taken into account but the empty string in 8th row should be
# considered null
"Column Two": ["one", "two", None, "four", "five", None, None, None, None, None],
"Column Three": [
datetime.datetime(2025, 11, 19, 14, 34, 2),
datetime.datetime(2025, 11, 20, 14, 56, 34),
datetime.datetime(2025, 11, 21, 15, 19, 6),
None,
datetime.datetime(2025, 11, 22, 15, 41, 38),
datetime.datetime(2025, 11, 23, 16, 4, 10),
None,
None,
None,
None,
],
}
).with_columns(pl.col("Column Three").dt.cast_time_unit("ms"))
# Expected data when converting whitespace to null and skipping tail rows
expected_without_whitespace = pl.DataFrame(
{
"Column One": [1.0, 2.0, 3.0, None, 5.0, None],
"Column Two": ["one", "two", None, "four", "five", None],
"Column Three": [
datetime.datetime(2025, 11, 19, 14, 34, 2),
datetime.datetime(2025, 11, 20, 14, 56, 34),
datetime.datetime(2025, 11, 21, 15, 19, 6),
None,
datetime.datetime(2025, 11, 22, 15, 41, 38),
datetime.datetime(2025, 11, 23, 16, 4, 10),
],
}
).with_columns(pl.col("Column Three").dt.cast_time_unit("ms"))
# Test sheet with whitespace_as_null but not skipping tail rows
sheet_with_whitespace_as_null = excel_reader.load_sheet(
"Without Table", whitespace_as_null=True
)
pl_assert_frame_equal(
sheet_with_whitespace_as_null.to_polars(), expected_with_whitespace_as_null
)
# Test table with whitespace_as_null but not skipping tail rows
table_with_whitespace_as_null = excel_reader.load_table(
"Table_with_whitespace", whitespace_as_null=True
)
pl_assert_frame_equal(
table_with_whitespace_as_null.to_polars(), expected_with_whitespace_as_null
)
# Test sheet with both whitespace_as_null and skip_whitespace_tail_rows
sheet_without_whitespace = excel_reader.load_sheet(
"Without Table", whitespace_as_null=True, skip_whitespace_tail_rows=True
)
pl_assert_frame_equal(sheet_without_whitespace.to_polars(), expected_without_whitespace)
# Test table with both whitespace_as_null and skip_whitespace_tail_rows
table_without_whitespace = excel_reader.load_table(
"Table_with_whitespace", whitespace_as_null=True, skip_whitespace_tail_rows=True
)
pl_assert_frame_equal(table_without_whitespace.to_polars(), expected_without_whitespace)
# Also verify pandas compatibility
pd_assert_frame_equal(
sheet_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()
)
pd_assert_frame_equal(
sheet_with_whitespace_as_null.to_pandas(), expected_with_whitespace_as_null.to_pandas()
)
pd_assert_frame_equal(
table_without_whitespace.to_pandas(), expected_without_whitespace.to_pandas()
)
pd_assert_frame_equal(
table_with_whitespace_as_null.to_pandas(), expected_with_whitespace_as_null.to_pandas()
)
================================================
FILE: python/tests/utils.py
================================================
from __future__ import annotations
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
def path_for_fixture(fixture_file: str) -> str:
return str(Path(__file__).parent.parent.parent / "tests" / "fixtures" / fixture_file)
def get_expected_pandas_dtype(fastexcel_dtype: str) -> Any:
"""Get the expected pandas dtype for a given fastexcel dtype, accounting for pandas version.
In pandas < 3.0, string columns use object dtype.
In pandas >= 3.0, string columns use StringDtype (with na_value=nan when from Arrow).
"""
pd_version = tuple(int(x) for x in pd.__version__.split(".")[:2])
dtype_map = {
"int": np.dtype("int64"),
"float": np.dtype("float64"),
"boolean": np.dtype("bool"),
"datetime": np.dtype("datetime64[ms]"),
"duration": np.dtype("timedelta64[ms]"),
}
if fastexcel_dtype in dtype_map:
return dtype_map[fastexcel_dtype]
if fastexcel_dtype == "string":
if pd_version >= (3, 0):
# When converting from Arrow, pandas uses nan as na_value
return pd.StringDtype(na_value=np.nan)
else:
return np.dtype("object")
if fastexcel_dtype == "date":
# Date columns are always object dtype
return np.dtype("object")
raise ValueError(f"Unknown fastexcel dtype: {fastexcel_dtype}")
def assert_pandas_dtypes(df: pd.DataFrame, expected_dtypes: dict[str, str]) -> None:
"""Assert that a pandas DataFrame has the expected dtypes for each column.
Args:
df: The pandas DataFrame to check
expected_dtypes: A dict mapping column names to fastexcel dtype strings
"""
for col_name, fastexcel_dtype in expected_dtypes.items():
expected_dtype = get_expected_pandas_dtype(fastexcel_dtype)
actual_dtype = df[col_name].dtype
assert actual_dtype == expected_dtype, (
f"Column '{col_name}': expected dtype {expected_dtype}, got {actual_dtype}"
)
================================================
FILE: scripts/update_versions.py
================================================
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.9"
# dependencies = []
# ///
"""Manage docs/versions.json and generate the root docs/index.html redirect."""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
def parse_semver(version: str) -> tuple[int, ...]:
"""Extract numeric parts from a version string like 'v0.19.0'."""
return tuple(int(x) for x in re.findall(r"\d+", version))
def sort_versions(versions: list[dict]) -> list[dict]:
"""Sort: stable first, then tags descending by semver, 'latest' last."""
def sort_key(v: dict) -> tuple[int, tuple[int, ...], str]:
gitextract_ze2pys5u/
├── .clippy.toml
├── .github/
│ ├── dependabot.yml
│ └── workflows/
│ ├── CI.yml
│ ├── docs.yml
│ └── release.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── doc-templates/
│ └── module.html.jinja2
├── pyproject.toml
├── python/
│ ├── fastexcel/
│ │ ├── __init__.py
│ │ ├── _fastexcel.pyi
│ │ └── py.typed
│ └── tests/
│ ├── __init__.py
│ ├── benchmarks/
│ │ ├── README.md
│ │ ├── fixtures/
│ │ │ ├── formulas.xlsx
│ │ │ ├── plain_data.xls
│ │ │ └── plain_data.xlsx
│ │ ├── memory.py
│ │ ├── readers.py
│ │ └── speed.py
│ ├── conftest.py
│ ├── test_alias_generation.py
│ ├── test_column_selection.py
│ ├── test_defined_names.py
│ ├── test_dtypes.py
│ ├── test_durations.py
│ ├── test_eagerness.py
│ ├── test_empty.py
│ ├── test_errors.py
│ ├── test_fastexcel.py
│ ├── test_pycapsule.py
│ ├── test_sheet_visibility.py
│ ├── test_shifted_data.py
│ ├── test_tables.py
│ ├── test_whitespace.py
│ └── utils.py
├── scripts/
│ └── update_versions.py
├── src/
│ ├── data/
│ │ ├── cell_extractors.rs
│ │ ├── mod.rs
│ │ ├── python.rs
│ │ └── rust.rs
│ ├── error.rs
│ ├── lib.rs
│ ├── types/
│ │ ├── dtype/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ ├── excelreader/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ ├── excelsheet/
│ │ │ ├── column_info/
│ │ │ │ ├── mod.rs
│ │ │ │ └── python.rs
│ │ │ ├── mod.rs
│ │ │ ├── polars.rs
│ │ │ ├── python.rs
│ │ │ └── table.rs
│ │ ├── exceltable/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ ├── idx_or_name/
│ │ │ ├── mod.rs
│ │ │ └── python.rs
│ │ └── mod.rs
│ └── utils/
│ ├── mod.rs
│ └── schema.rs
├── test.py
└── tests/
├── column_selection.rs
├── fastexcel.rs
├── fixtures/
│ ├── dates.ods
│ ├── decimal-numbers.xlsx
│ ├── div0.xlsx
│ ├── empty.ods
│ ├── empty.xlsx
│ ├── fixture-changing-header-location.xlsx
│ ├── fixture-invalid-cell-value-num.xlsx
│ ├── fixture-invalid-cell-value.xlsx
│ ├── fixture-multi-dtypes-columns.xlsx
│ ├── fixture-multi-sheet.xlsx
│ ├── fixture-sheets-different-visibilities.xlsx
│ ├── fixture-single-sheet-duplicated-columns.xlsx
│ ├── fixture-single-sheet-with-types.xlsx
│ ├── fixture-single-sheet.xlsx
│ ├── fixture-type-errors.xlsx
│ ├── infer-dtypes-fallback.xlsx
│ ├── no-header.xlsx
│ ├── null-bytes-in-columns-names.xls
│ ├── null-column.xlsx
│ ├── sheet-and-table-with-offset.xlsx
│ ├── sheet-and-table-with-whitespace.xlsx
│ ├── sheet-null-strings-empty.xlsx
│ ├── sheet-null-strings.xlsx
│ ├── sheet-with-defined-names.xlsx
│ ├── sheet-with-na.xlsx
│ ├── sheet-with-tables.xlsx
│ └── single-sheet-skip-rows-durations.xlsx
├── sheet_visibility.rs
├── shifted_data.rs
├── tables.rs
├── utils/
│ └── mod.rs
└── whitespace.rs
SYMBOL INDEX (648 symbols across 51 files)
FILE: python/fastexcel/__init__.py
class ExcelSheet (line 54) | class ExcelSheet:
method __init__ (line 57) | def __init__(self, sheet: _ExcelSheet) -> None:
method name (line 61) | def name(self) -> str:
method width (line 66) | def width(self) -> int:
method height (line 71) | def height(self) -> int:
method total_height (line 76) | def total_height(self) -> int:
method selected_columns (line 81) | def selected_columns(self) -> list[ColumnInfo]:
method available_columns (line 85) | def available_columns(self) -> list[ColumnInfo]:
method specified_dtypes (line 90) | def specified_dtypes(self) -> DTypeMap | None:
method visible (line 95) | def visible(self) -> SheetVisible:
method to_arrow (line 99) | def to_arrow(self) -> pa.RecordBatch:
method to_arrow_with_errors (line 110) | def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors | N...
method to_pandas (line 127) | def to_pandas(self) -> pd.DataFrame:
method to_polars (line 137) | def to_polars(self) -> pl.DataFrame:
method __arrow_c_schema__ (line 147) | def __arrow_c_schema__(self) -> object:
method __arrow_c_array__ (line 157) | def __arrow_c_array__(self, requested_schema: object | None = None) ->...
method __repr__ (line 169) | def __repr__(self) -> str:
class ExcelTable (line 173) | class ExcelTable:
method __init__ (line 176) | def __init__(self, table: _ExcelTable) -> None:
method name (line 180) | def name(self) -> str:
method sheet_name (line 185) | def sheet_name(self) -> str:
method width (line 190) | def width(self) -> int:
method height (line 195) | def height(self) -> int:
method total_height (line 200) | def total_height(self) -> int:
method offset (line 205) | def offset(self) -> int:
method selected_columns (line 210) | def selected_columns(self) -> list[ColumnInfo]:
method available_columns (line 214) | def available_columns(self) -> list[ColumnInfo]:
method specified_dtypes (line 219) | def specified_dtypes(self) -> DTypeMap | None:
method to_arrow (line 223) | def to_arrow(self) -> pa.RecordBatch:
method to_pandas (line 234) | def to_pandas(self) -> pd.DataFrame:
method to_polars (line 244) | def to_polars(self) -> pl.DataFrame:
method __arrow_c_schema__ (line 254) | def __arrow_c_schema__(self) -> object:
method __arrow_c_array__ (line 264) | def __arrow_c_array__(self, requested_schema: object | None = None) ->...
class ExcelReader (line 277) | class ExcelReader:
method __init__ (line 280) | def __init__(self, reader: _ExcelReader) -> None:
method sheet_names (line 284) | def sheet_names(self) -> list[str]:
method load_sheet (line 289) | def load_sheet(
method load_sheet (line 311) | def load_sheet(
method load_sheet (line 332) | def load_sheet(
method table_names (line 420) | def table_names(self, sheet_name: str | None = None) -> list[str]:
method defined_names (line 430) | def defined_names(self) -> list[DefinedName]:
method load_table (line 441) | def load_table(
method load_table (line 463) | def load_table(
method load_table (line 484) | def load_table(
method load_sheet_eager (line 584) | def load_sheet_eager(
method load_sheet_by_name (line 619) | def load_sheet_by_name(
method load_sheet_by_idx (line 652) | def load_sheet_by_idx(
method __repr__ (line 685) | def __repr__(self) -> str:
function read_excel (line 689) | def read_excel(source: Path | str | bytes) -> ExcelReader:
FILE: python/fastexcel/_fastexcel.pyi
class ColumnInfoNoDtype (line 16) | class ColumnInfoNoDtype:
method __init__ (line 17) | def __init__(
method name (line 26) | def name(self) -> str: ...
method index (line 28) | def index(self) -> int: ...
method absolute_index (line 30) | def absolute_index(self) -> int: ...
method column_name_from (line 32) | def column_name_from(self) -> ColumnNameFrom: ...
class ColumnInfo (line 34) | class ColumnInfo:
method __init__ (line 35) | def __init__(
method name (line 46) | def name(self) -> str: ...
method index (line 48) | def index(self) -> int: ...
method absolute_index (line 50) | def absolute_index(self) -> int: ...
method dtype (line 52) | def dtype(self) -> DType: ...
method column_name_from (line 54) | def column_name_from(self) -> ColumnNameFrom: ...
method dtype_from (line 56) | def dtype_from(self) -> DTypeFrom: ...
class DefinedName (line 58) | class DefinedName:
method __init__ (line 59) | def __init__(
method name (line 66) | def name(self) -> str: ...
method formula (line 68) | def formula(self) -> str: ...
class CellError (line 70) | class CellError:
method position (line 72) | def position(self) -> tuple[int, int]: ...
method row_offset (line 74) | def row_offset(self) -> int: ...
method offset_position (line 76) | def offset_position(self) -> tuple[int, int]: ...
method detail (line 78) | def detail(self) -> str: ...
method __repr__ (line 79) | def __repr__(self) -> str: ...
class CellErrors (line 81) | class CellErrors:
method errors (line 83) | def errors(self) -> list[CellError]: ...
method __repr__ (line 84) | def __repr__(self) -> str: ...
class _ExcelSheet (line 86) | class _ExcelSheet:
method name (line 88) | def name(self) -> str:
method width (line 91) | def width(self) -> int:
method height (line 94) | def height(self) -> int:
method total_height (line 97) | def total_height(self) -> int:
method offset (line 100) | def offset(self) -> int:
method selected_columns (line 103) | def selected_columns(self) -> list[ColumnInfo]:
method available_columns (line 105) | def available_columns(self) -> list[ColumnInfo]:
method specified_dtypes (line 108) | def specified_dtypes(self) -> DTypeMap | None:
method visible (line 111) | def visible(self) -> SheetVisible:
method to_arrow (line 113) | def to_arrow(self) -> pa.RecordBatch:
method to_arrow_with_errors (line 118) | def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors]:
method __arrow_c_schema__ (line 126) | def __arrow_c_schema__(self) -> object:
method __arrow_c_array__ (line 134) | def __arrow_c_array__(self, requested_schema: object = None) -> tuple[...
class _ExcelTable (line 145) | class _ExcelTable:
method name (line 147) | def name(self) -> str:
method sheet_name (line 150) | def sheet_name(self) -> str:
method width (line 153) | def width(self) -> int:
method height (line 156) | def height(self) -> int:
method total_height (line 159) | def total_height(self) -> int:
method offset (line 162) | def offset(self) -> int:
method selected_columns (line 165) | def selected_columns(self) -> list[ColumnInfo]:
method available_columns (line 167) | def available_columns(self) -> list[ColumnInfo]:
method specified_dtypes (line 170) | def specified_dtypes(self) -> DTypeMap | None:
method to_arrow (line 172) | def to_arrow(self) -> pa.RecordBatch:
method __arrow_c_schema__ (line 177) | def __arrow_c_schema__(self) -> object:
method __arrow_c_array__ (line 186) | def __arrow_c_array__(self, requested_schema: object = None) -> tuple[...
class _ExcelReader (line 197) | class _ExcelReader:
method load_sheet (line 201) | def load_sheet(
method load_sheet (line 222) | def load_sheet(
method load_sheet (line 243) | def load_sheet(
method load_table (line 264) | def load_table(
method load_table (line 285) | def load_table(
method sheet_names (line 306) | def sheet_names(self) -> list[str]: ...
method table_names (line 307) | def table_names(self, sheet_name: str | None = None) -> list[str]: ...
method defined_names (line 308) | def defined_names(self) -> list[DefinedName]: ...
function read_excel (line 310) | def read_excel(source: str | bytes) -> _ExcelReader:
class FastExcelError (line 316) | class FastExcelError(Exception): ...
class UnsupportedColumnTypeCombinationError (line 317) | class UnsupportedColumnTypeCombinationError(FastExcelError): ...
class CannotRetrieveCellDataError (line 318) | class CannotRetrieveCellDataError(FastExcelError): ...
class CalamineCellError (line 319) | class CalamineCellError(FastExcelError): ...
class CalamineError (line 320) | class CalamineError(FastExcelError): ...
class SheetNotFoundError (line 321) | class SheetNotFoundError(FastExcelError): ...
class ColumnNotFoundError (line 322) | class ColumnNotFoundError(FastExcelError): ...
class ArrowError (line 323) | class ArrowError(FastExcelError): ...
class InvalidParametersError (line 324) | class InvalidParametersError(FastExcelError): ...
FILE: python/tests/benchmarks/memory.py
class Engine (line 7) | class Engine(str, Enum):
function get_args (line 13) | def get_args() -> argparse.Namespace:
function main (line 20) | def main():
FILE: python/tests/benchmarks/readers.py
function pyxl_read (line 6) | def pyxl_read(test_file_path: str):
function xlrd_read (line 16) | def xlrd_read(test_file_path: str):
function fastexcel_read (line 24) | def fastexcel_read(test_file_path: str):
FILE: python/tests/benchmarks/speed.py
function plain_data_xls (line 11) | def plain_data_xls():
function plain_data_xlsx (line 16) | def plain_data_xlsx():
function formula_xlsx (line 21) | def formula_xlsx():
function test_pyxl (line 26) | def test_pyxl(benchmark, plain_data_xlsx):
function test_xlrd (line 31) | def test_xlrd(benchmark, plain_data_xls):
function test_fastexcel_xls (line 36) | def test_fastexcel_xls(benchmark, plain_data_xls):
function test_fastexcel_xlsx (line 41) | def test_fastexcel_xlsx(benchmark, plain_data_xlsx):
function test_pyxl_with_formulas (line 46) | def test_pyxl_with_formulas(benchmark, formula_xlsx):
function test_fastexcel_with_formulas (line 51) | def test_fastexcel_with_formulas(benchmark, formula_xlsx):
FILE: python/tests/conftest.py
function expected_data_sheet_null_strings (line 10) | def expected_data_sheet_null_strings() -> dict[str, list[Any]]:
FILE: python/tests/test_alias_generation.py
function test_alias_generation_with_use_columns (line 16) | def test_alias_generation_with_use_columns(use_columns: list[str] | list...
FILE: python/tests/test_column_selection.py
function excel_reader_single_sheet (line 19) | def excel_reader_single_sheet() -> fastexcel.ExcelReader:
function expected_column_info (line 24) | def expected_column_info() -> list[fastexcel.ColumnInfo]:
function test_single_sheet_all_columns (line 45) | def test_single_sheet_all_columns(
function test_single_sheet_subset_by_str (line 70) | def test_single_sheet_subset_by_str(
function test_single_sheet_subset_by_index (line 91) | def test_single_sheet_subset_by_index(
function excel_reader_single_sheet_with_unnamed_columns (line 112) | def excel_reader_single_sheet_with_unnamed_columns() -> fastexcel.ExcelR...
function single_sheet_with_unnamed_columns_expected (line 117) | def single_sheet_with_unnamed_columns_expected() -> dict[str, list[Any]]:
function sheet_with_unnamed_columns_expected_column_info (line 128) | def sheet_with_unnamed_columns_expected_column_info() -> list[fastexcel....
function test_single_sheet_with_unnamed_columns (line 173) | def test_single_sheet_with_unnamed_columns(
function test_single_sheet_with_unnamed_columns_and_pagination (line 211) | def test_single_sheet_with_unnamed_columns_and_pagination(
function test_single_sheet_with_unnamed_columns_and_pagination_and_column_names (line 266) | def test_single_sheet_with_unnamed_columns_and_pagination_and_column_names(
function test_single_sheet_with_unnamed_columns_and_str_range (line 311) | def test_single_sheet_with_unnamed_columns_and_str_range(
function test_single_sheet_with_unnamed_columns_and_open_ended_range (line 334) | def test_single_sheet_with_unnamed_columns_and_open_ended_range(
function test_single_sheet_with_unnamed_columns_and_open_ended_range_from_start (line 355) | def test_single_sheet_with_unnamed_columns_and_open_ended_range_from_start(
function test_single_sheet_with_unnamed_columns_and_mixed_open_ended_range (line 372) | def test_single_sheet_with_unnamed_columns_and_mixed_open_ended_range(
function test_single_sheet_with_unnamed_columns_and_from_beginning_range (line 396) | def test_single_sheet_with_unnamed_columns_and_from_beginning_range(
function test_single_sheet_with_unnamed_columns_and_from_beginning_range_single_column (line 417) | def test_single_sheet_with_unnamed_columns_and_from_beginning_range_sing...
function test_single_sheet_with_unnamed_columns_and_complex_mixed_pattern (line 436) | def test_single_sheet_with_unnamed_columns_and_complex_mixed_pattern(
function test_single_sheet_invalid_column_indices_negative_integer (line 466) | def test_single_sheet_invalid_column_indices_negative_integer(
function test_single_sheet_invalid_column_indices_empty_list (line 478) | def test_single_sheet_invalid_column_indices_empty_list(
function test_single_sheet_invalid_column_indices_column_does_not_exist_str (line 490) | def test_single_sheet_invalid_column_indices_column_does_not_exist_str(
function test_single_sheet_invalid_column_indices_column_does_not_exist_int (line 501) | def test_single_sheet_invalid_column_indices_column_does_not_exist_int(
function test_use_columns_with_column_names (line 512) | def test_use_columns_with_column_names() -> None:
function test_use_columns_with_callable (line 582) | def test_use_columns_with_callable() -> None:
function test_use_columns_with_bad_callable (line 618) | def test_use_columns_with_bad_callable() -> None:
function test_use_columns_with_eager_loading (line 638) | def test_use_columns_with_eager_loading() -> None:
function test_use_columns_dtypes_eager_loading (line 663) | def test_use_columns_dtypes_eager_loading(
function test_use_columns_with_table (line 709) | def test_use_columns_with_table() -> None:
function test_use_columns_with_table_and_provided_columns (line 785) | def test_use_columns_with_table_and_provided_columns() -> None:
function test_use_column_range_with_offset_without_table (line 863) | def test_use_column_range_with_offset_without_table() -> None:
function test_use_column_range_with_offset_with_table (line 889) | def test_use_column_range_with_offset_with_table() -> None:
function test_use_column_names_with_offset_table_by_index_and_name (line 915) | def test_use_column_names_with_offset_table_by_index_and_name() -> None:
function test_use_column_range_with_offset_with_table_and_specified_dtypes (line 969) | def test_use_column_range_with_offset_with_table_and_specified_dtypes() ...
function test_use_column_range_with_offset_with_sheet_and_specified_dtypes (line 1024) | def test_use_column_range_with_offset_with_sheet_and_specified_dtypes() ...
FILE: python/tests/test_defined_names.py
function test_defined_names (line 8) | def test_defined_names(path: str) -> None:
FILE: python/tests/test_dtypes.py
function expected_data (line 19) | def expected_data() -> dict[str, list[Any]]:
function test_sheet_with_mixed_dtypes (line 51) | def test_sheet_with_mixed_dtypes(expected_data: dict[str, list[Any]]) ->...
function test_sheet_with_mixed_dtypes_and_sample_rows (line 64) | def test_sheet_with_mixed_dtypes_and_sample_rows(expected_data: dict[str...
function test_sheet_with_mixed_dtypes_specify_dtypes (line 146) | def test_sheet_with_mixed_dtypes_specify_dtypes(
function test_sheet_datetime_conversion (line 179) | def test_sheet_datetime_conversion(
function test_dtype_coercion_behavior__coerce (line 201) | def test_dtype_coercion_behavior__coerce(
function test_dtype_coercion_behavior__strict_sampling_eveything (line 224) | def test_dtype_coercion_behavior__strict_sampling_eveything(eager: bool)...
function test_dtype_coercion_behavior__strict_sampling_limit (line 237) | def test_dtype_coercion_behavior__strict_sampling_limit(eager: bool) -> ...
function test_one_dtype_for_all (line 262) | def test_one_dtype_for_all() -> None:
function test_fallback_infer_dtypes (line 326) | def test_fallback_infer_dtypes(caplog: pytest.LogCaptureFixture) -> None:
function test_to_arrow_with_errors (line 482) | def test_to_arrow_with_errors(
function test_guess_dtypes_with_div0_error (line 520) | def test_guess_dtypes_with_div0_error() -> None:
FILE: python/tests/test_durations.py
function test_sheet_with_different_time_types (line 20) | def test_sheet_with_different_time_types() -> None:
function test_sheet_with_offset_header_row_and_durations (line 63) | def test_sheet_with_offset_header_row_and_durations() -> None:
FILE: python/tests/test_eagerness.py
function test_load_sheet_eager_single_sheet (line 12) | def test_load_sheet_eager_single_sheet() -> None:
function test_multiple_sheets_with_unnamed_columns (line 25) | def test_multiple_sheets_with_unnamed_columns():
function test_eager_with_an_ods_file_should_return_a_recordbatch (line 38) | def test_eager_with_an_ods_file_should_return_a_recordbatch() -> None:
FILE: python/tests/test_empty.py
function test_empty (line 8) | def test_empty(path: str) -> None:
FILE: python/tests/test_errors.py
function test_cell_error_repr (line 9) | def test_cell_error_repr() -> None:
function test_read_excel_bad_type (line 19) | def test_read_excel_bad_type() -> None:
function test_does_not_exist (line 25) | def test_does_not_exist() -> None:
function test_sheet_idx_not_found_error (line 41) | def test_sheet_idx_not_found_error() -> None:
function test_sheet_name_not_found_error (line 57) | def test_sheet_name_not_found_error() -> None:
function test_docstrings (line 89) | def test_docstrings(exc_class: type[Exception], expected_docstring: str)...
function test_schema_sample_rows_must_be_nonzero (line 93) | def test_schema_sample_rows_must_be_nonzero() -> None:
FILE: python/tests/test_fastexcel.py
function test_single_sheet (line 16) | def test_single_sheet():
function test_single_sheet_bytes (line 38) | def test_single_sheet_bytes():
function test_single_sheet_with_types (line 61) | def test_single_sheet_with_types():
function test_multiple_sheets (line 97) | def test_multiple_sheets():
function test_sheets_with_header_line_diff_from_zero (line 144) | def test_sheets_with_header_line_diff_from_zero():
function test_sheets_with_no_header (line 166) | def test_sheets_with_no_header():
function test_sheets_with_empty_rows_before_header (line 192) | def test_sheets_with_empty_rows_before_header():
function test_sheets_with_custom_headers (line 214) | def test_sheets_with_custom_headers():
function test_sheets_with_skipping_headers (line 238) | def test_sheets_with_skipping_headers():
function test_sheet_with_pagination (line 264) | def test_sheet_with_pagination():
function test_sheet_with_skip_rows (line 299) | def test_sheet_with_skip_rows():
function test_sheet_with_n_rows (line 335) | def test_sheet_with_n_rows():
function test_sheet_with_pagination_and_without_headers (line 369) | def test_sheet_with_pagination_and_without_headers():
function test_sheet_with_pagination_out_of_bound (line 413) | def test_sheet_with_pagination_out_of_bound():
function test_sheet_with_na (line 467) | def test_sheet_with_na():
function test_sheet_with_ref (line 484) | def test_sheet_with_ref():
function test_null_strings (line 499) | def test_null_strings(excel_file: str, expected_data_sheet_null_strings:...
function test_null_values_in_cells (line 518) | def test_null_values_in_cells() -> None:
function test_invalid_value_num (line 536) | def test_invalid_value_num() -> None:
function test_null_column_is_nullable (line 545) | def test_null_column_is_nullable() -> None:
function test_sheet_with_decimal_numbers (line 550) | def test_sheet_with_decimal_numbers() -> None:
function test_header_row_and_skip_rows (line 644) | def test_header_row_and_skip_rows(
function test_null_bytes_in_column_names (line 655) | def test_null_bytes_in_column_names() -> None:
FILE: python/tests/test_pycapsule.py
function test_sheet_arrow_c_schema (line 10) | def test_sheet_arrow_c_schema():
function test_sheet_arrow_c_array (line 22) | def test_sheet_arrow_c_array():
function test_table_arrow_c_schema (line 34) | def test_table_arrow_c_schema():
function test_table_arrow_c_array (line 46) | def test_table_arrow_c_array():
function test_pycapsule_interface_with_requested_schema (line 59) | def test_pycapsule_interface_with_requested_schema():
function test_integration_with_polars (line 71) | def test_integration_with_polars():
function test_to_polars_without_pyarrow (line 84) | def test_to_polars_without_pyarrow():
function test_to_pandas_still_requires_pyarrow (line 104) | def test_to_pandas_still_requires_pyarrow():
FILE: python/tests/test_sheet_visibility.py
function test_sheet_visibilities (line 6) | def test_sheet_visibilities() -> None:
FILE: python/tests/test_shifted_data.py
function test_sheet_with_offset (line 6) | def test_sheet_with_offset():
function test_table_with_offset (line 46) | def test_table_with_offset():
FILE: python/tests/test_tables.py
function test_table_names (line 14) | def test_table_names(path: str) -> None:
function test_table_names_with_sheet_name (line 22) | def test_table_names_with_sheet_name(path: str) -> None:
function test_load_table (line 34) | def test_load_table(path: str) -> None:
FILE: python/tests/test_whitespace.py
function test_skip_tail_whitespace_rows (line 11) | def test_skip_tail_whitespace_rows() -> None:
function test_skip_tail_rows_and_whitespace_as_null_behavior (line 80) | def test_skip_tail_rows_and_whitespace_as_null_behavior() -> None:
FILE: python/tests/utils.py
function path_for_fixture (line 10) | def path_for_fixture(fixture_file: str) -> str:
function get_expected_pandas_dtype (line 14) | def get_expected_pandas_dtype(fastexcel_dtype: str) -> Any:
function assert_pandas_dtypes (line 47) | def assert_pandas_dtypes(df: pd.DataFrame, expected_dtypes: dict[str, st...
FILE: scripts/update_versions.py
function parse_semver (line 16) | def parse_semver(version: str) -> tuple[int, ...]:
function sort_versions (line 21) | def sort_versions(versions: list[dict]) -> list[dict]:
function update_versions (line 34) | def update_versions(docs_dir: Path, version: str, *, stable: bool) -> None:
function main (line 88) | def main() -> None:
FILE: src/data/cell_extractors.rs
function extract_boolean (line 6) | pub(super) fn extract_boolean<DT: CellType + DataType>(cell: &DT) -> Opt...
function extract_int (line 18) | pub(super) fn extract_int<DT: CellType + DataType>(cell: &DT) -> Option<...
function extract_float (line 22) | pub(super) fn extract_float<DT: CellType + DataType>(cell: &DT) -> Optio...
function extract_string (line 26) | pub(super) fn extract_string<DT: CellType + DataType>(cell: &DT) -> Opti...
function extract_date (line 44) | pub(super) fn extract_date<DT: CellType + DataType>(cell: &DT) -> Option...
constant EPOCH (line 49) | const EPOCH: NaiveDate = NaiveDate::from_ymd_opt(1970, 1, 1).expect("Fai...
function extract_date_as_num_days (line 52) | pub(super) fn extract_date_as_num_days<DT: CellType + DataType>(cell: &D...
function extract_datetime (line 57) | pub(super) fn extract_datetime<DT: CellType + DataType>(cell: &DT) -> Op...
function extract_datetime_as_timestamp_ms (line 62) | pub(super) fn extract_datetime_as_timestamp_ms<DT: CellType + DataType>(...
function extract_duration (line 66) | pub(super) fn extract_duration<DT: CellType + DataType>(cell: &DT) -> Op...
function extract_duration_as_ms (line 71) | pub(super) fn extract_duration_as_ms<DT: CellType + DataType>(cell: &DT)...
FILE: src/data/mod.rs
type ExcelSheetData (line 24) | pub(crate) enum ExcelSheetData<'r> {
function width (line 30) | pub(crate) fn width(&self) -> usize {
function height (line 37) | pub(crate) fn height(&self) -> usize {
function get_as_string (line 44) | pub(super) fn get_as_string(&self, pos: (usize, usize)) -> Option<String> {
function dtype_for_column (line 51) | pub(crate) fn dtype_for_column(
function height_without_tail_whitespace (line 79) | pub(crate) fn height_without_tail_whitespace(&self) -> usize {
function start (line 90) | pub(crate) fn start(&self) -> Option<(usize, usize)> {
function from (line 100) | fn from(range: Range<CalData>) -> Self {
function from (line 106) | fn from(range: Range<CalDataRef<'a>>) -> Self {
type CellIsWhiteSpace (line 111) | trait CellIsWhiteSpace {
method is_whitespace (line 112) | fn is_whitespace(&self) -> bool;
method is_whitespace (line 119) | fn is_whitespace(&self) -> bool {
function height_without_tail_whitespace (line 132) | pub(crate) fn height_without_tail_whitespace<CT: CellType + DataType + s...
type FastExcelSeries (line 164) | pub enum FastExcelSeries {
method dtype (line 176) | pub fn dtype(&self) -> DType {
method is_null (line 189) | pub fn is_null(&self) -> bool {
method from (line 253) | fn from(arr: [Option<&str>; N]) -> Self {
method from (line 259) | fn from(arr: [&str; N]) -> Self {
method from (line 385) | fn from(column: FastExcelColumn) -> Self {
type FastExcelColumn (line 266) | pub struct FastExcelColumn {
method try_new (line 273) | pub fn try_new(
method new_null (line 306) | pub fn new_null<S: Into<String>>(name: S, len: usize) -> Self {
method try_from_column_info (line 314) | pub(crate) fn try_from_column_info<CT: CellType + DataType>(
method len (line 367) | pub fn len(&self) -> usize {
method is_empty (line 371) | pub fn is_empty(&self) -> bool {
method name (line 375) | pub fn name(&self) -> &str {
method data (line 379) | pub fn data(&self) -> &FastExcelSeries {
type RowSelector (line 392) | pub(crate) enum RowSelector {
method len (line 400) | pub(crate) fn len(&self) -> usize {
function generate_row_selector (line 409) | pub(crate) fn generate_row_selector(
FILE: src/data/python.rs
function create_boolean_array_with_errors (line 24) | pub(crate) fn create_boolean_array_with_errors<CT: CellType + DataType +...
function create_int_array_with_errors (line 52) | pub(crate) fn create_int_array_with_errors<CT: CellType + DataType + Deb...
function create_float_array_with_errors (line 82) | pub(crate) fn create_float_array_with_errors<CT: CellType + DataType + D...
function create_string_array_with_errors (line 112) | pub(crate) fn create_string_array_with_errors<CT: CellType + DataType + ...
function create_date_array_with_errors (line 150) | pub(crate) fn create_date_array_with_errors<CT: CellType + DataType + De...
function create_datetime_array_with_errors (line 181) | pub(crate) fn create_datetime_array_with_errors<CT: CellType + DataType ...
function create_duration_array_with_errors (line 212) | pub(crate) fn create_duration_array_with_errors<CT: CellType + DataType ...
function create_boolean_array (line 244) | pub(crate) fn create_boolean_array<CT: CellType + DataType>(
function create_int_array (line 255) | pub(crate) fn create_int_array<CT: CellType + DataType>(
function create_float_array (line 265) | pub(crate) fn create_float_array<CT: CellType + DataType>(
function create_string_array (line 276) | pub(crate) fn create_string_array<CT: CellType + DataType>(
function create_date_array (line 297) | pub(crate) fn create_date_array<CT: CellType + DataType>(
function create_datetime_array (line 308) | pub(crate) fn create_datetime_array<CT: CellType + DataType>(
function create_duration_array (line 319) | pub(crate) fn create_duration_array<CT: CellType + DataType>(
function create_string_array_with_errors (line 357) | pub(crate) fn create_string_array_with_errors(
function selected_columns_to_schema (line 383) | pub(crate) fn selected_columns_to_schema(columns: &[ColumnInfo]) -> Sche...
function record_batch_from_name_array_iterator (line 389) | pub(crate) fn record_batch_from_name_array_iterator<
function record_batch_from_data_and_columns (line 417) | pub(crate) fn record_batch_from_data_and_columns<CT: CellType + DataType>(
function record_batch_from_data_and_columns_with_skip_rows (line 434) | pub(crate) fn record_batch_from_data_and_columns_with_skip_rows<CT: Cell...
function record_batch_from_data_and_columns_with_row_selector (line 452) | fn record_batch_from_data_and_columns_with_row_selector<CT: CellType + D...
function record_batch_from_data_and_columns_with_errors (line 483) | pub(crate) fn record_batch_from_data_and_columns_with_errors(
method iter (line 527) | pub(crate) fn iter(&self) -> Box<dyn Iterator<Item = usize> + '_> {
FILE: src/data/rust.rs
function create_boolean_vec (line 8) | pub(crate) fn create_boolean_vec<CT: CellType + DataType>(
function create_int_vec (line 22) | pub(crate) fn create_int_vec<CT: CellType + DataType>(
function create_float_vec (line 33) | pub(crate) fn create_float_vec<CT: CellType + DataType>(
function create_string_vec (line 47) | pub(crate) fn create_string_vec<CT: CellType + DataType>(
function create_date_vec (line 73) | pub(crate) fn create_date_vec<CT: CellType + DataType>(
function create_datetime_vec (line 84) | pub(crate) fn create_datetime_vec<CT: CellType + DataType>(
function create_duration_vec (line 98) | pub(crate) fn create_duration_vec<CT: CellType + DataType>(
FILE: src/error.rs
type FastExcelErrorKind (line 7) | pub enum FastExcelErrorKind {
method fmt (line 23) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type FastExcelError (line 58) | pub struct FastExcelError {
method new (line 70) | pub(crate) fn new(kind: FastExcelErrorKind) -> Self {
method from (line 106) | fn from(kind: FastExcelErrorKind) -> Self {
method from (line 112) | fn from(err: XlsxError) -> Self {
type ErrorContext (line 63) | pub(crate) trait ErrorContext {
method with_context (line 64) | fn with_context<S: ToString, F>(self, ctx_fn: F) -> Self
method with_context (line 96) | fn with_context<S: ToString, F>(mut self, ctx_fn: F) -> Self
method with_context (line 120) | fn with_context<S: ToString, F>(self, ctx_fn: F) -> Self
method fmt (line 79) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type FastExcelResult (line 117) | pub type FastExcelResult<T> = Result<T, FastExcelError>;
method from (line 217) | fn from(err: error::FastExcelError) -> Self {
type IntoPyResult (line 240) | pub(crate) trait IntoPyResult {
method into_pyresult (line 243) | fn into_pyresult(self) -> PyResult<Self::Inner>;
type Inner (line 247) | type Inner = T;
method into_pyresult (line 249) | fn into_pyresult(self) -> PyResult<Self::Inner> {
FILE: src/lib.rs
function read_excel (line 26) | pub fn read_excel<S: AsRef<str> + Display>(path: S) -> FastExcelResult<E...
function py_read_excel (line 34) | fn py_read_excel<'py>(source: &Bound<'_, PyAny>, py: Python<'py>) -> PyR...
function get_python_version (line 55) | fn get_python_version() -> String {
function _fastexcel (line 67) | fn _fastexcel(m: &Bound<'_, PyModule>) -> PyResult<()> {
FILE: src/types/dtype/mod.rs
type DType (line 22) | pub enum DType {
type Err (line 34) | type Err = FastExcelError;
method from_str (line 36) | fn from_str(raw_dtype: &str) -> FastExcelResult<Self> {
method fmt (line 55) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type DTypeMap (line 69) | pub type DTypeMap = HashMap<IdxOrName, DType>;
type DTypes (line 74) | pub enum DTypes {
type Err (line 82) | type Err = FastExcelError;
method from_str (line 84) | fn from_str(dtypes: &str) -> FastExcelResult<Self> {
type DTypeCoercion (line 91) | pub enum DTypeCoercion {
type Err (line 100) | type Err = FastExcelError;
method from_str (line 102) | fn from_str(raw_dtype_coercion: &str) -> FastExcelResult<Self> {
constant NULL_STRING_VALUES (line 115) | const NULL_STRING_VALUES: [&str; 19] = [
function get_cell_dtype (line 120) | fn get_cell_dtype<DT: CellType + Debug + DataType>(
function float_types (line 204) | fn float_types() -> &'static HashSet<DType> {
function int_types (line 208) | fn int_types() -> &'static HashSet<DType> {
function string_types (line 212) | fn string_types() -> &'static HashSet<DType> {
function get_dtype_for_column (line 225) | pub(crate) fn get_dtype_for_column<DT: CellType + Debug + DataType>(
function excel_float_to_string (line 295) | pub(crate) fn excel_float_to_string(x: f64) -> String {
function range (line 312) | fn range() -> Range<CalData> {
function get_arrow_column_type_multi_dtype_ok_coerce (line 357) | fn get_arrow_column_type_multi_dtype_ok_coerce(
function get_arrow_column_type_multi_dtype_ok_strict (line 381) | fn get_arrow_column_type_multi_dtype_ok_strict(
function get_arrow_column_type_multi_dtype_ko_strict (line 409) | fn get_arrow_column_type_multi_dtype_ko_strict(
function test_excel_float_to_string (line 426) | fn test_excel_float_to_string(#[case] x: f64, #[case] expected: &str) {
FILE: src/types/dtype/python.rs
type Target (line 10) | type Target = PyString;
type Output (line 12) | type Output = Bound<'py, Self::Target>;
type Error (line 14) | type Error = std::convert::Infallible;
method into_pyobject (line 16) | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Er...
type Target (line 22) | type Target = PyString;
type Output (line 24) | type Output = Bound<'py, Self::Target>;
type Error (line 26) | type Error = std::convert::Infallible;
function into_pyobject (line 28) | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Er...
type Error (line 34) | type Error = PyErr;
method extract (line 35) | fn extract(py_dtype: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Err...
type Error (line 49) | type Error = PyErr;
method extract (line 50) | fn extract(py_dtypes: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Er...
method from (line 61) | fn from(dtype: &DType) -> Self {
type Error (line 76) | type Error = PyErr;
method extract (line 77) | fn extract(py_dtype_coercion: Borrowed<'a, 'py, PyAny>) -> Result<Self, ...
FILE: src/types/excelreader/mod.rs
type ExcelSheets (line 30) | enum ExcelSheets {
method worksheet_range (line 36) | fn worksheet_range(&mut self, name: &str) -> FastExcelResult<Range<Dat...
method sheet_metadata (line 46) | fn sheet_metadata(&self) -> &[CalamineSheet] {
method table_names (line 53) | fn table_names(&mut self, sheet_name: Option<&str>) -> FastExcelResult...
method defined_names (line 61) | fn defined_names(&mut self) -> FastExcelResult<Vec<DefinedName>> {
method supports_by_ref (line 74) | fn supports_by_ref(&self) -> bool {
method with_header_row (line 81) | fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
method worksheet_range_ref (line 95) | fn worksheet_range_ref(&mut self, name: &str) -> FastExcelResult<Range...
method get_table (line 107) | fn get_table(&mut self, name: &str) -> FastExcelResult<Table<Data>> {
type DefinedName (line 117) | pub struct DefinedName {
type LoadSheetOrTableOptions (line 125) | pub struct LoadSheetOrTableOptions {
method calamine_header_row (line 153) | fn calamine_header_row(&self) -> HeaderRow {
method data_header_row (line 162) | pub(crate) fn data_header_row(&self) -> Option<usize> {
method new_for_sheet (line 168) | pub fn new_for_sheet() -> Self {
method new_for_table (line 185) | pub fn new_for_table() -> Self {
method header_row (line 200) | pub fn header_row(mut self, header_row: usize) -> Self {
method no_header_row (line 205) | pub fn no_header_row(mut self) -> Self {
method column_names (line 210) | pub fn column_names<I: IntoIterator<Item = impl Into<String>>>(
method skip_rows (line 218) | pub fn skip_rows(mut self, skip_rows: SkipRows) -> Self {
method n_rows (line 223) | pub fn n_rows(mut self, n_rows: usize) -> Self {
method schema_sample_rows (line 228) | pub fn schema_sample_rows(mut self, schema_sample_rows: usize) -> Self {
method dtype_coercion (line 233) | pub fn dtype_coercion(mut self, dtype_coercion: DTypeCoercion) -> Self {
method selected_columns (line 238) | pub fn selected_columns(mut self, selected_columns: SelectedColumns) -...
method with_dtypes (line 243) | pub fn with_dtypes(mut self, dtypes: DTypes) -> Self {
method skip_whitespace_tail_rows (line 248) | pub fn skip_whitespace_tail_rows(mut self, skip_whitespace_tail_rows: ...
method whitespace_as_null (line 253) | pub fn whitespace_as_null(mut self, whitespace_as_null: bool) -> Self {
type ExcelReader (line 261) | pub struct ExcelReader {
method try_from_path (line 271) | pub(crate) fn try_from_path(path: &str) -> FastExcelResult<Self> {
method find_sheet_meta (line 284) | fn find_sheet_meta(&self, idx_or_name: IdxOrName) -> FastExcelResult<&...
method load_sheet (line 312) | pub fn load_sheet(
method load_table (line 330) | pub fn load_table(
method sheet_names (line 339) | pub fn sheet_names(&self) -> Vec<&str> {
method table_names (line 346) | pub fn table_names(&mut self, sheet_name: Option<&str>) -> FastExcelRe...
method defined_names (line 350) | pub fn defined_names(&mut self) -> FastExcelResult<Vec<DefinedName>> {
type Error (line 356) | type Error = FastExcelError;
method try_from (line 358) | fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
FILE: src/types/excelreader/python.rs
method build_selected_columns (line 23) | fn build_selected_columns(
method load_sheet_eager (line 29) | fn load_sheet_eager(
method build_sheet (line 89) | fn build_sheet<'py>(
method build_table (line 154) | fn build_table<'py>(
method __repr__ (line 182) | pub fn __repr__(&self) -> String {
method py_table_names (line 187) | pub(crate) fn py_table_names(&mut self, sheet_name: Option<&str>) -> PyR...
method py_defined_names (line 192) | pub(crate) fn py_defined_names(&mut self) -> PyResult<Vec<DefinedName>> {
method py_load_sheet (line 212) | pub(crate) fn py_load_sheet<'py>(
method py_load_table (line 270) | pub(crate) fn py_load_table<'py>(
method py_sheet_names (line 313) | pub(crate) fn py_sheet_names(&self) -> Vec<&str> {
method py_new (line 322) | pub fn py_new(name: String, formula: String) -> Self {
method py_name (line 327) | pub fn py_name(&self) -> &str {
method py_formula (line 332) | pub fn py_formula(&self) -> &str {
method __repr__ (line 336) | pub fn __repr__(&self) -> String {
method __eq__ (line 349) | pub fn __eq__(&self, other: &Self) -> bool {
FILE: src/types/excelsheet/column_info/mod.rs
type ColumnNameFrom (line 23) | pub enum ColumnNameFrom {
type Err (line 33) | type Err = FastExcelError;
method from_str (line 35) | fn from_str(s: &str) -> FastExcelResult<Self> {
method fmt (line 49) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type DTypeFrom (line 60) | pub enum DTypeFrom {
method fmt (line 72) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
type Err (line 83) | type Err = FastExcelError;
method from_str (line 85) | fn from_str(s: &str) -> FastExcelResult<Self> {
type ColumnInfo (line 103) | pub struct ColumnInfo {
method new (line 119) | pub(crate) fn new(
type ColumnInfoNoDtype (line 145) | pub(crate) struct ColumnInfoNoDtype {
method eq (line 154) | fn eq(&self, other: &IdxOrName) -> bool {
method new (line 163) | pub(super) fn new(
method with_name (line 177) | pub(super) fn with_name(mut self, name: String) -> Self {
method name (line 182) | pub(super) fn name(&self) -> &str {
method absolute_index (line 186) | pub(super) fn absolute_index(&self) -> usize {
method dtype_info (line 190) | fn dtype_info<D: CalamineDataProvider>(
method finish (line 230) | pub(super) fn finish<D: CalamineDataProvider>(
type CalamineDataProvider (line 260) | pub(crate) trait CalamineDataProvider {
method width (line 261) | fn width(&self) -> usize;
method get_as_string (line 262) | fn get_as_string(&self, pos: (usize, usize)) -> Option<String>;
method dtype_for_column (line 263) | fn dtype_for_column(
method start (line 271) | fn start(&self) -> Option<(usize, usize)>;
method width (line 275) | fn width(&self) -> usize {
method get_as_string (line 279) | fn get_as_string(&self, pos: (usize, usize)) -> Option<String> {
method dtype_for_column (line 283) | fn dtype_for_column(
method start (line 294) | fn start(&self) -> Option<(usize, usize)> {
method width (line 300) | fn width(&self) -> usize {
method get_as_string (line 304) | fn get_as_string(&self, pos: (usize, usize)) -> Option<String> {
method dtype_for_column (line 308) | fn dtype_for_column(
method start (line 325) | fn start(&self) -> Option<(usize, usize)> {
function column_info_from_header (line 330) | fn column_info_from_header<D: CalamineDataProvider>(
function build_available_columns_info (line 454) | pub(crate) fn build_available_columns_info<D: CalamineDataProvider>(
function set_aliases_for_columns_info (line 462) | fn set_aliases_for_columns_info(columns_info: Vec<ColumnInfoNoDtype>) ->...
function alias_for_name (line 478) | fn alias_for_name(name: &str, existing_names: &[String]) -> String {
function finalize_column_info (line 499) | pub(crate) fn finalize_column_info<D: CalamineDataProvider>(
type AvailableColumns (line 524) | pub(crate) enum AvailableColumns {
method as_loaded (line 530) | pub(crate) fn as_loaded(&self) -> FastExcelResult<&[ColumnInfo]> {
FILE: src/types/excelsheet/column_info/python.rs
method from (line 10) | fn from(col_info: &ColumnInfo) -> Self {
method py_new (line 26) | pub(crate) fn py_new(
method get_dtype (line 46) | fn get_dtype(&self) -> String {
method py_name (line 52) | pub fn py_name(&self) -> &str {
method py_index (line 58) | pub fn py_index(&self) -> usize {
method py_absolute_index (line 64) | pub fn py_absolute_index(&self) -> usize {
method get_colum_name_from (line 76) | fn get_colum_name_from(&self) -> String {
method get_dtype_from (line 87) | fn get_dtype_from(&self) -> String {
method __repr__ (line 91) | pub fn __repr__(&self) -> String {
method __eq__ (line 103) | pub fn __eq__(&self, other: &Self) -> bool {
method py_name (line 112) | pub fn py_name(&self) -> &str {
method py_index (line 118) | pub fn py_index(&self) -> usize {
method py_absolute_index (line 124) | pub fn py_absolute_index(&self) -> usize {
FILE: src/types/excelsheet/mod.rs
type Header (line 31) | pub(crate) enum Header {
method new (line 38) | pub(crate) fn new(header_row: Option<usize>, column_names: Option<Vec<...
method offset (line 48) | pub(crate) fn offset(&self) -> usize {
type Pagination (line 59) | pub(crate) struct Pagination {
method try_new (line 90) | pub(crate) fn try_new<CT: CellType>(
method offset (line 108) | pub(crate) fn offset(&self) -> usize {
method n_rows (line 112) | pub(crate) fn n_rows(&self) -> Option<usize> {
method skip_rows (line 116) | pub(crate) fn skip_rows(&self) -> &SkipRows {
type SkipRows (line 67) | pub enum SkipRows {
method simple_offset (line 80) | pub(crate) fn simple_offset(&self) -> Option<usize> {
type SelectedColumns (line 122) | pub enum SelectedColumns {
method fmt (line 141) | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
method select_columns (line 200) | pub(super) fn select_columns(
constant ALPHABET (line 280) | const ALPHABET: [char; 26] = [
method col_idx_for_col_as_letter (line 285) | fn col_idx_for_col_as_letter(col: &str) -> FastExcelResult<usize> {
method col_indices_for_letter_range (line 328) | fn col_indices_for_letter_range(col_range: &str) -> FastExcelResult<Ve...
method col_selection_for_letter_range (line 368) | fn col_selection_for_letter_range(
type DeferredColumnSelection (line 132) | pub enum DeferredColumnSelection {
method eq (line 156) | fn eq(&self, other: &Self) -> bool {
function deferred_selection_to_concrete (line 172) | pub(crate) fn deferred_selection_to_concrete(
type Err (line 422) | type Err = FastExcelError;
method from_str (line 424) | fn from_str(s: &str) -> FastExcelResult<Self> {
type SheetVisible (line 478) | pub enum SheetVisible {
method from (line 485) | fn from(value: CalamineSheetVisible) -> Self {
type ExcelSheet (line 497) | pub struct ExcelSheet {
method data (line 512) | pub(crate) fn data(&self) -> &ExcelSheetData<'_> {
method try_new (line 516) | pub(crate) fn try_new(
method ensure_available_columns_loaded (line 571) | fn ensure_available_columns_loaded(&mut self) -> FastExcelResult<()> {
method load_available_columns (line 597) | fn load_available_columns(&mut self) -> FastExcelResult<&[ColumnInfo]> {
method compute_limit (line 602) | fn compute_limit(&self) -> usize {
method limit (line 617) | pub(crate) fn limit(&self) -> usize {
method schema_sample_rows (line 621) | pub(crate) fn schema_sample_rows(&self) -> usize {
method width (line 625) | pub fn width(&mut self) -> usize {
method height (line 633) | pub fn height(&mut self) -> usize {
method total_height (line 645) | pub fn total_height(&mut self) -> usize {
method offset (line 653) | pub fn offset(&self) -> usize {
method selected_columns (line 657) | pub fn selected_columns(&self) -> &Vec<ColumnInfo> {
method available_columns (line 661) | pub fn available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo>> {
method specified_dtypes (line 665) | pub fn specified_dtypes(&self) -> Option<&DTypes> {
method name (line 669) | pub fn name(&self) -> &str {
method visible (line 673) | pub fn visible(&self) -> SheetVisible {
method to_columns (line 677) | pub fn to_columns(&self) -> FastExcelResult<Vec<FastExcelColumn>> {
method to_polars (line 706) | pub fn to_polars(&self) -> FastExcelResult<DataFrame> {
function selected_columns_from_none (line 726) | fn selected_columns_from_none() {
function selected_columns_from_list_of_valid_ints (line 734) | fn selected_columns_from_list_of_valid_ints() {
function selected_columns_from_list_of_valid_strings (line 745) | fn selected_columns_from_list_of_valid_strings() {
function selected_columns_from_list_of_valid_strings_and_ints (line 762) | fn selected_columns_from_list_of_valid_strings_and_ints() {
function selected_columns_from_invalid_ints (line 780) | fn selected_columns_from_invalid_ints() {
function selected_columns_from_empty_int_list (line 790) | fn selected_columns_from_empty_int_list() {
function selected_columns_from_empty_string_list (line 800) | fn selected_columns_from_empty_string_list() {
function selected_columns_from_valid_ranges (line 819) | fn selected_columns_from_valid_ranges(#[case] raw: &str, #[case] expecte...
function selected_columns_from_valid_open_ended_ranges (line 841) | fn selected_columns_from_valid_open_ended_ranges(#[case] raw: &str) {
function selected_columns_from_invalid_ranges (line 863) | fn selected_columns_from_invalid_ranges(#[case] raw: &str, #[case] messa...
FILE: src/types/excelsheet/polars.rs
method from (line 9) | fn from(column: FastExcelColumn) -> Self {
FILE: src/types/excelsheet/python.rs
type Error (line 29) | type Error = FastExcelError;
method try_from (line 31) | fn try_from(py_list: &Bound<'_, PyList>) -> FastExcelResult<Self> {
type Error (line 48) | type Error = FastExcelError;
method try_from (line 50) | fn try_from(py_any_opt: Option<&Bound<'_, PyAny>>) -> FastExcelResult<Se...
type Target (line 78) | type Target = PyString;
type Output (line 80) | type Output = Bound<'py, Self::Target>;
type Error (line 82) | type Error = FastExcelError;
function into_pyobject (line 84) | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Er...
method should_skip_row (line 97) | pub(crate) fn should_skip_row(&self, row_idx: usize, py: Python) -> Fast...
type CellError (line 125) | pub(crate) struct CellError {
method offset_position (line 140) | pub fn offset_position(&self) -> (usize, usize) {
method __repr__ (line 145) | pub fn __repr__(&self) -> String {
type CellErrors (line 157) | pub(crate) struct CellErrors {
method errors (line 164) | pub fn errors<'p>(&'p self, _py: Python<'p>) -> Vec<CellError> {
method __repr__ (line 168) | pub fn __repr__(&self) -> String {
type Error (line 175) | type Error = PyErr;
method extract (line 176) | fn extract(obj: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {
type Error (line 207) | type Error = FastExcelError;
method try_from (line 209) | fn try_from(sheet: &ExcelSheet) -> FastExcelResult<Self> {
method py_width (line 241) | pub fn py_width(&mut self) -> usize {
method py_height (line 246) | pub fn py_height(&mut self) -> usize {
method py_total_height (line 251) | pub fn py_total_height(&mut self) -> usize {
method py_offset (line 256) | pub fn py_offset(&self) -> usize {
method py_selected_columns (line 261) | pub fn py_selected_columns(&self) -> Vec<ColumnInfo> {
method py_available_columns (line 266) | pub fn py_available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo...
method py_specified_dtypes (line 271) | pub fn py_specified_dtypes(&self) -> Option<&DTypes> {
method py_name (line 276) | pub fn py_name(&self) -> &str {
method py_visible (line 281) | pub fn py_visible<'py>(&'py self, py: Python<'py>) -> FastExcelResult<Bo...
method to_arrow (line 287) | pub fn to_arrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAn...
method to_arrow_with_errors (line 316) | pub fn to_arrow_with_errors<'py>(&self, py: Python<'py>) -> PyResult<Bou...
method __arrow_c_schema__ (line 360) | pub fn __arrow_c_schema__<'py>(&self, py: Python<'py>) -> PyResult<Bound...
method __arrow_c_array__ (line 374) | pub fn __arrow_c_array__<'py>(
method __repr__ (line 398) | pub fn __repr__(&self) -> String {
FILE: src/types/excelsheet/table.rs
function extract_table_names (line 5) | pub(crate) fn extract_table_names<'a, RS: Read + Seek>(
function extract_table_range (line 26) | pub(crate) fn extract_table_range<RS: Read + Seek>(
FILE: src/types/exceltable/mod.rs
type ExcelTable (line 30) | pub struct ExcelTable {
method extract_selected_columns_and_table_columns (line 46) | fn extract_selected_columns_and_table_columns(
method build_header_and_update_selection (line 86) | fn build_header_and_update_selection(
method try_new (line 128) | pub(crate) fn try_new(
method data (line 183) | pub(crate) fn data(&self) -> &Range<Data> {
method ensure_available_columns_loaded (line 187) | fn ensure_available_columns_loaded(&mut self) -> FastExcelResult<()> {
method load_available_columns (line 213) | fn load_available_columns(&mut self) -> FastExcelResult<&[ColumnInfo]> {
method offset (line 218) | pub fn offset(&self) -> usize {
method compute_limit (line 222) | fn compute_limit(&self) -> usize {
method limit (line 237) | pub fn limit(&self) -> usize {
method selected_columns (line 241) | pub fn selected_columns(&self) -> Vec<ColumnInfo> {
method available_columns (line 245) | pub fn available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo>> {
method specified_dtypes (line 249) | pub fn specified_dtypes(&self) -> Option<&DTypes> {
method width (line 253) | pub fn width(&mut self) -> usize {
method height (line 261) | pub fn height(&mut self) -> usize {
method total_height (line 269) | pub fn total_height(&mut self) -> usize {
method name (line 277) | pub fn name(&self) -> &str {
method sheet_name (line 281) | pub fn sheet_name(&self) -> &str {
method to_columns (line 285) | pub fn to_columns(&self) -> FastExcelResult<Vec<FastExcelColumn>> {
method to_polars (line 301) | pub fn to_polars(&self) -> FastExcelResult<DataFrame> {
FILE: src/types/exceltable/python.rs
type Error (line 21) | type Error = FastExcelError;
method try_from (line 23) | fn try_from(table: &ExcelTable) -> FastExcelResult<Self> {
method py_name (line 48) | pub fn py_name(&self) -> &str {
method py_sheet_name (line 53) | pub fn py_sheet_name(&self) -> &str {
method py_offset (line 58) | pub fn py_offset(&self) -> usize {
method py_limit (line 63) | pub fn py_limit(&self) -> usize {
method py_selected_columns (line 68) | pub fn py_selected_columns(&self) -> Vec<ColumnInfo> {
method py_available_columns (line 73) | pub fn py_available_columns(&mut self) -> FastExcelResult<Vec<ColumnInfo...
method py_specified_dtypes (line 78) | pub fn py_specified_dtypes(&self) -> Option<&DTypes> {
method py_width (line 83) | pub fn py_width(&mut self) -> usize {
method py_height (line 88) | pub fn py_height(&mut self) -> usize {
method py_total_height (line 93) | pub fn py_total_height(&mut self) -> usize {
method to_arrow (line 98) | pub fn to_arrow<'py>(&self, py: Python<'py>) -> FastExcelResult<Bound<'p...
method __arrow_c_schema__ (line 128) | pub fn __arrow_c_schema__<'py>(&self, py: Python<'py>) -> PyResult<Bound...
method __arrow_c_array__ (line 142) | pub fn __arrow_c_array__<'py>(
method __repr__ (line 161) | pub fn __repr__(&self) -> String {
FILE: src/types/idx_or_name/mod.rs
type IdxOrName (line 6) | pub enum IdxOrName {
method format_message (line 12) | pub(crate) fn format_message(&self) -> String {
method from (line 21) | fn from(index: usize) -> Self {
method from (line 27) | fn from(name: String) -> Self {
method from (line 33) | fn from(name: &str) -> Self {
FILE: src/types/idx_or_name/python.rs
type Error (line 12) | type Error = FastExcelError;
method try_from (line 14) | fn try_from(value: &Bound<'_, PyAny>) -> FastExcelResult<Self> {
type Error (line 29) | type Error = PyErr;
method extract (line 30) | fn extract(ob: Borrowed<'a, 'py, PyAny>) -> Result<Self, Self::Error> {
type Target (line 36) | type Target = PyAny;
type Output (line 38) | type Output = Bound<'py, Self::Target>;
type Error (line 40) | type Error = pyo3::PyErr;
method into_pyobject (line 42) | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Er...
type Target (line 51) | type Target = PyAny;
type Output (line 53) | type Output = Bound<'py, Self::Target>;
type Error (line 55) | type Error = pyo3::PyErr;
function into_pyobject (line 57) | fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Er...
FILE: src/utils/schema.rs
function get_schema_sample_rows (line 8) | pub(crate) fn get_schema_sample_rows(
function test_get_schema_sample_rows_return_values (line 42) | fn test_get_schema_sample_rows_return_values(
FILE: test.py
function get_args (line 7) | def get_args() -> argparse.Namespace:
function main (line 25) | def main():
FILE: tests/column_selection.rs
function reader (line 13) | fn reader() -> fastexcel::ExcelReader {
function test_use_columns_with_table (line 19) | fn test_use_columns_with_table(mut reader: fastexcel::ExcelReader) -> Re...
function test_use_columns_with_table_and_provided_columns (line 124) | fn test_use_columns_with_table_and_provided_columns(
function reader_with_offset (line 230) | fn reader_with_offset() -> fastexcel::ExcelReader {
function test_use_column_range_with_offset_with_table_and_specified_dtypes (line 236) | fn test_use_column_range_with_offset_with_table_and_specified_dtypes(
function test_use_column_names_with_offset_table_by_index_and_name (line 343) | fn test_use_column_names_with_offset_table_by_index_and_name(
function test_use_column_range_with_offset_with_sheet_and_specified_dtypes (line 410) | fn test_use_column_range_with_offset_with_sheet_and_specified_dtypes(
FILE: tests/fastexcel.rs
function test_single_sheet (line 14) | fn test_single_sheet() -> Result<()> {
function test_single_sheet_bytes (line 70) | fn test_single_sheet_bytes() -> Result<()> {
function test_single_sheet_with_types (line 128) | fn test_single_sheet_with_types() -> Result<()> {
function test_multiple_sheets (line 179) | fn test_multiple_sheets() -> Result<()> {
function test_sheet_with_header_row_diff_from_zero (line 256) | fn test_sheet_with_header_row_diff_from_zero() -> Result<()> {
function test_sheet_with_pagination_and_without_headers (line 324) | fn test_sheet_with_pagination_and_without_headers() -> Result<()> {
function test_header_row_and_skip_rows (line 438) | fn test_header_row_and_skip_rows(
function test_header_row_and_skip_rows_polars (line 522) | fn test_header_row_and_skip_rows_polars(
FILE: tests/sheet_visibility.rs
function sheet_visibility (line 11) | fn sheet_visibility() -> Result<()> {
FILE: tests/shifted_data.rs
function test_sheet_with_offset (line 10) | fn test_sheet_with_offset() -> Result<()> {
function test_table_with_offset (line 63) | fn test_table_with_offset() -> Result<()> {
FILE: tests/tables.rs
function reader (line 13) | fn reader() -> fastexcel::ExcelReader {
function test_table_names (line 22) | fn test_table_names(
function test_load_table (line 35) | fn test_load_table(mut reader: fastexcel::ExcelReader) -> Result<()> {
FILE: tests/utils/mod.rs
function path_for_fixture (line 1) | pub fn path_for_fixture(fixture_file: &str) -> String {
FILE: tests/whitespace.rs
function reader (line 13) | fn reader() -> ExcelReader {
constant DATES (line 18) | const DATES: &[Option<NaiveDateTime>] = &[
function test_skip_tail_rows_behavior (line 57) | fn test_skip_tail_rows_behavior(mut reader: ExcelReader) -> Result<()> {
function test_skip_tail_rows_and_whitespace_as_null_behavior (line 125) | fn test_skip_tail_rows_and_whitespace_as_null_behavior(mut reader: Excel...
Condensed preview — 99 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (454K chars).
[
{
"path": ".clippy.toml",
"chars": 292,
"preview": "disallowed-macros = [\n { path = \"std::assert_ne\", reason = \"use `pretty_assertions::assert_ne` instead\" },\n { path = \""
},
{
"path": ".github/dependabot.yml",
"chars": 577,
"preview": "version: 2\nupdates:\n # python\n - package-ecosystem: \"pip\"\n directory: \"/\"\n schedule:\n interval: \"daily\"\n "
},
{
"path": ".github/workflows/CI.yml",
"chars": 5748,
"preview": "name: CI\n\non:\n push:\n branches:\n - main\n pull_request:\n types: [opened, synchronize, reopened]\n\nenv:\n MIN_"
},
{
"path": ".github/workflows/docs.yml",
"chars": 2955,
"preview": "name: Docs\n\non:\n push:\n branches:\n - main\n tags:\n - 'v*'\n workflow_dispatch:\n inputs:\n version"
},
{
"path": ".github/workflows/release.yml",
"chars": 6219,
"preview": "name: Release\n\non:\n push:\n # Sequence of patterns matched against refs/tags\n tags:\n - 'v*' # Push events to ma"
},
{
"path": ".gitignore",
"chars": 183,
"preview": "/target\n\nbigfile.*\n__pycache__\n*.pyc\n*.so\n*.dat\n.DS_Store\n\n.python-version\npyrightconfig.json\n.venv\ndocs\n.vscode\n.idea\n."
},
{
"path": ".pre-commit-config.yaml",
"chars": 691,
"preview": "# See https://pre-commit.com for more information\n# See https://pre-commit.com/hooks.html for more hooks\nrepos:\n- repo"
},
{
"path": "Cargo.toml",
"chars": 2504,
"preview": "[package]\nname = \"fastexcel\"\nversion = \"0.20.2\"\ndescription = \"A fast excel reader for Rust and Python\"\nrust-version = \""
},
{
"path": "LICENSE",
"chars": 1067,
"preview": "MIT License\n\nCopyright (c) 2024 ToucanToco\n\nPermission is hereby granted, free of charge, to any person obtaining a copy"
},
{
"path": "Makefile",
"chars": 4527,
"preview": ".DEFAULT_GOAL := all\nsources = python/fastexcel python/tests\n\nexport CARGO_TERM_COLOR=$(shell (test -t 0 && echo always)"
},
{
"path": "README.md",
"chars": 5679,
"preview": "# `fastexcel`\n\nA fast excel file reader for Python and Rust.\n\nDocs:\n * [Python](https://fastexcel.toucantoco.dev/).\n * ["
},
{
"path": "doc-templates/module.html.jinja2",
"chars": 2758,
"preview": "{% extends \"default/module.html.jinja2\" %}\n{% block nav_title %}\n {{ super() }}\n <div id=\"version-switcher\" style="
},
{
"path": "pyproject.toml",
"chars": 2582,
"preview": "[build-system]\nrequires = [\"maturin>=1.7.0,<2.0\"]\nbuild-backend = \"maturin\"\n\n[project]\nname = \"fastexcel\"\ndescription = "
},
{
"path": "python/fastexcel/__init__.py",
"chars": 28162,
"preview": "from __future__ import annotations\n\nimport typing\nfrom collections.abc import Callable\nfrom typing import TYPE_CHECKING,"
},
{
"path": "python/fastexcel/_fastexcel.pyi",
"chars": 10883,
"preview": "from __future__ import annotations\n\nimport typing\nfrom collections.abc import Callable\nfrom typing import TYPE_CHECKING,"
},
{
"path": "python/fastexcel/py.typed",
"chars": 0,
"preview": ""
},
{
"path": "python/tests/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "python/tests/benchmarks/README.md",
"chars": 2267,
"preview": "# Benchmarks\n\nThese benchmarks were generated using `pytest-benchmark`.\n\n> **_NOTE:_** formulas.xlsx was found [here](h"
},
{
"path": "python/tests/benchmarks/memory.py",
"chars": 699,
"preview": "import argparse\nfrom enum import Enum\n\nfrom .readers import fastexcel_read, pyxl_read, xlrd_read\n\n\nclass Engine(str, Enu"
},
{
"path": "python/tests/benchmarks/readers.py",
"chars": 777,
"preview": "from fastexcel import read_excel\nfrom openpyxl import load_workbook\nfrom xlrd import open_workbook\n\n\ndef pyxl_read(test_"
},
{
"path": "python/tests/benchmarks/speed.py",
"chars": 1260,
"preview": "\"\"\"\nCompare read performance with fastexcel, xlrd and different openpyxl options\n\"\"\"\n\nimport pytest\n\nfrom .readers impor"
},
{
"path": "python/tests/conftest.py",
"chars": 1626,
"preview": "from __future__ import annotations\n\nfrom datetime import datetime\nfrom typing import Any\n\nimport pytest\n\n\n@pytest.fixtur"
},
{
"path": "python/tests/test_alias_generation.py",
"chars": 1501,
"preview": "from __future__ import annotations\n\nimport fastexcel\nimport pandas as pd\nimport polars as pl\nimport pytest\nfrom pandas.t"
},
{
"path": "python/tests/test_column_selection.py",
"chars": 39960,
"preview": "# ruff: noqa: E501\nfrom __future__ import annotations\n\nimport re\nfrom typing import Any\n\nimport fastexcel\nimport numpy a"
},
{
"path": "python/tests/test_defined_names.py",
"chars": 618,
"preview": "import fastexcel\nimport pytest\n\nfrom .utils import path_for_fixture\n\n\n@pytest.mark.parametrize(\"path\", (\"sheet-with-defi"
},
{
"path": "python/tests/test_dtypes.py",
"chars": 18905,
"preview": "from __future__ import annotations\n\nimport logging\nfrom datetime import date, datetime\nfrom typing import Any, Literal\n\n"
},
{
"path": "python/tests/test_durations.py",
"chars": 2925,
"preview": "from __future__ import annotations\n\nfrom datetime import date, datetime, timedelta\n\nimport fastexcel\nimport numpy as np\n"
},
{
"path": "python/tests/test_eagerness.py",
"chars": 2222,
"preview": "from datetime import date, datetime, timedelta\n\nimport fastexcel\nimport polars as pl\nfrom pandas.testing import assert_f"
},
{
"path": "python/tests/test_empty.py",
"chars": 353,
"preview": "import fastexcel\nimport pytest\n\nfrom .utils import path_for_fixture\n\n\n@pytest.mark.parametrize(\"path\", (\"empty.ods\", \"em"
},
{
"path": "python/tests/test_errors.py",
"chars": 4092,
"preview": "from __future__ import annotations\n\nimport fastexcel\nimport pytest\n\nfrom .utils import path_for_fixture\n\n\ndef test_cell_"
},
{
"path": "python/tests/test_fastexcel.py",
"chars": 22867,
"preview": "from __future__ import annotations\n\nfrom datetime import datetime\nfrom typing import Any\n\nimport fastexcel\nimport pandas"
},
{
"path": "python/tests/test_pycapsule.py",
"chars": 4581,
"preview": "\"\"\"Tests for the Arrow PyCapsule Interface implementation.\"\"\"\n\nimport fastexcel\nimport pandas as pd\nimport polars as pl\n"
},
{
"path": "python/tests/test_sheet_visibility.py",
"chars": 382,
"preview": "import fastexcel\n\nfrom .utils import path_for_fixture\n\n\ndef test_sheet_visibilities() -> None:\n file_path = path_for_"
},
{
"path": "python/tests/test_shifted_data.py",
"chars": 1853,
"preview": "import fastexcel\n\nfrom .utils import path_for_fixture\n\n\ndef test_sheet_with_offset():\n reader = fastexcel.read_excel("
},
{
"path": "python/tests/test_tables.py",
"chars": 3408,
"preview": "from datetime import datetime\n\nimport fastexcel\nimport pandas as pd\nimport polars as pl\nimport pytest\nfrom pandas.testin"
},
{
"path": "python/tests/test_whitespace.py",
"chars": 6975,
"preview": "import datetime\n\nimport fastexcel\nimport polars as pl\nfrom pandas.testing import assert_frame_equal as pd_assert_frame_e"
},
{
"path": "python/tests/utils.py",
"chars": 2011,
"preview": "from __future__ import annotations\n\nfrom pathlib import Path\nfrom typing import Any\n\nimport numpy as np\nimport pandas as"
},
{
"path": "scripts/update_versions.py",
"chars": 3116,
"preview": "#!/usr/bin/env -S uv run --script\n# /// script\n# requires-python = \">=3.9\"\n# dependencies = []\n# ///\n\"\"\"Manage docs/vers"
},
{
"path": "src/data/cell_extractors.rs",
"chars": 2388,
"preview": "use calamine::{CellType, DataType};\nuse chrono::{NaiveDate, NaiveDateTime, TimeDelta};\n\nuse crate::types::dtype::excel_f"
},
{
"path": "src/data/mod.rs",
"chars": 14448,
"preview": "mod cell_extractors;\n#[cfg(feature = \"python\")]\nmod python;\nmod rust;\nuse chrono::{Duration, NaiveDate, NaiveDateTime};\n"
},
{
"path": "src/data/python.rs",
"chars": 18812,
"preview": "use std::sync::Arc;\nuse std::{fmt::Debug, ops::Not};\n\nuse arrow_array::{\n Array, ArrayRef, BooleanArray, Date32Array,"
},
{
"path": "src/data/rust.rs",
"chars": 2780,
"preview": "use std::ops::Not;\n\nuse calamine::{CellType, DataType, Range};\nuse chrono::{NaiveDate, NaiveDateTime, TimeDelta};\n\nuse s"
},
{
"path": "src/error.rs",
"chars": 7815,
"preview": "use crate::types::idx_or_name::IdxOrName;\nuse calamine::XlsxError;\nuse std::{error::Error, fmt::Display};\n\n/// The kind "
},
{
"path": "src/lib.rs",
"chars": 4214,
"preview": "mod data;\nmod error;\nmod types;\nmod utils;\n\nuse std::fmt::Display;\n\n#[cfg(feature = \"python\")]\nuse error::py_errors;\n#[c"
},
{
"path": "src/types/dtype/mod.rs",
"chars": 13889,
"preview": "#[cfg(feature = \"python\")]\nmod python;\n\nuse std::{\n collections::{HashMap, HashSet},\n fmt::{Debug, Display},\n s"
},
{
"path": "src/types/dtype/python.rs",
"chars": 2824,
"preview": "use arrow_schema::{DataType as ArrowDataType, TimeUnit};\nuse pyo3::{Borrowed, Bound, FromPyObject, IntoPyObject, PyAny, "
},
{
"path": "src/types/excelreader/mod.rs",
"chars": 12737,
"preview": "#[cfg(feature = \"python\")]\nmod python;\n\nuse std::{\n fs::File,\n io::{BufReader, Cursor},\n};\n\nuse calamine::{\n Da"
},
{
"path": "src/types/excelreader/python.rs",
"chars": 11346,
"preview": "use arrow_array::RecordBatch;\nuse pyo3::{Bound, IntoPyObjectExt, PyAny, PyResult, Python, pymethods, types::PyString};\n\n"
},
{
"path": "src/types/excelsheet/column_info/mod.rs",
"chars": 17636,
"preview": "#[cfg(feature = \"python\")]\nmod python;\n\nuse std::{fmt::Display, str::FromStr};\n\nuse calamine::DataType;\n#[cfg(feature = "
},
{
"path": "src/types/excelsheet/column_info/python.rs",
"chars": 3962,
"preview": "use arrow_schema::Field;\nuse pyo3::{PyResult, pymethods};\n\nuse crate::{\n error::py_errors::IntoPyResult,\n types::e"
},
{
"path": "src/types/excelsheet/mod.rs",
"chars": 31725,
"preview": "pub(crate) mod column_info;\n#[cfg(feature = \"polars\")]\nmod polars;\n#[cfg(feature = \"python\")]\nmod python;\npub(crate) mod"
},
{
"path": "src/types/excelsheet/polars.rs",
"chars": 1099,
"preview": "use crate::{FastExcelColumn, FastExcelSeries};\nuse polars_core::{\n frame::column::{Column as PolarsColumn, ScalarColu"
},
{
"path": "src/types/excelsheet/python.rs",
"chars": 13133,
"preview": "use std::{collections::HashSet, sync::Arc};\n\nuse arrow_array::{RecordBatch, StructArray};\nuse arrow_schema::Field;\nuse p"
},
{
"path": "src/types/excelsheet/table.rs",
"chars": 1335,
"preview": "use crate::error::{FastExcelErrorKind, FastExcelResult};\nuse calamine::{Data, Sheets, Table};\nuse std::io::{Read, Seek};"
},
{
"path": "src/types/exceltable/mod.rs",
"chars": 10769,
"preview": "#[cfg(feature = \"python\")]\nmod python;\n\nuse calamine::{Data, Range, Table};\n#[cfg(feature = \"polars\")]\nuse polars_core::"
},
{
"path": "src/types/exceltable/python.rs",
"chars": 5265,
"preview": "use std::sync::Arc;\n\nuse arrow_array::{RecordBatch, StructArray};\nuse arrow_schema::Field;\n#[cfg(feature = \"pyarrow\")]\nu"
},
{
"path": "src/types/idx_or_name/mod.rs",
"chars": 719,
"preview": "#[cfg(feature = \"python\")]\nmod python;\n\n/// A column index or name.\n#[derive(Debug, PartialEq, Eq, Hash, Clone)]\npub enu"
},
{
"path": "src/types/idx_or_name/python.rs",
"chars": 1772,
"preview": "use pyo3::{\n Borrowed, Bound, FromPyObject, IntoPyObject, IntoPyObjectExt, PyAny, PyErr, Python,\n types::PyAnyMeth"
},
{
"path": "src/types/mod.rs",
"chars": 458,
"preview": "pub(crate) mod dtype;\npub(crate) mod excelreader;\npub(crate) mod excelsheet;\npub(crate) mod exceltable;\npub(crate) mod i"
},
{
"path": "src/utils/mod.rs",
"chars": 23,
"preview": "pub(crate) mod schema;\n"
},
{
"path": "src/utils/schema.rs",
"chars": 1941,
"preview": "use std::cmp::min;\n\n/// Determines how many rows should be used for schema sampling, based on the provided parameter,\n//"
},
{
"path": "test.py",
"chars": 1625,
"preview": "#!/usr/bin/env python3\nimport argparse\n\nimport fastexcel\n\n\ndef get_args() -> argparse.Namespace:\n parser = argparse.A"
},
{
"path": "tests/column_selection.rs",
"chars": 17630,
"preview": "use anyhow::{Context, Result};\nuse fastexcel::{DType, DTypes, IdxOrName, LoadSheetOrTableOptions, SelectedColumns};\nuse "
},
{
"path": "tests/fastexcel.rs",
"chars": 17968,
"preview": "#[macro_use]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse chrono::NaiveDate;\nuse fastexcel::{FastExcelColumn, LoadShee"
},
{
"path": "tests/sheet_visibility.rs",
"chars": 889,
"preview": "#[allow(unused_macros)]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse fastexcel::{LoadSheetOrTableOptions, SheetVisible"
},
{
"path": "tests/shifted_data.rs",
"chars": 3258,
"preview": "#[allow(unused_macros)]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse fastexcel::LoadSheetOrTableOptions;\nuse pretty_as"
},
{
"path": "tests/tables.rs",
"chars": 4070,
"preview": "use anyhow::{Context, Result};\nuse chrono::NaiveDate;\nuse fastexcel::LoadSheetOrTableOptions;\nuse pretty_assertions::ass"
},
{
"path": "tests/utils/mod.rs",
"chars": 625,
"preview": "pub fn path_for_fixture(fixture_file: &str) -> String {\n format!(\n \"{}/tests/fixtures/{}\",\n env!(\"CARGO"
},
{
"path": "tests/whitespace.rs",
"chars": 6857,
"preview": "#[macro_use]\nmod utils;\n\nuse anyhow::{Context, Result};\nuse chrono::{NaiveDate, NaiveDateTime};\nuse fastexcel::{ExcelRea"
}
]
// ... and 30 more files (download for full content)
About this extraction
This page contains the full source code of the ToucanToco/fastexcel GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 99 files (46.9 MB), approximately 108.2k tokens, and a symbol index with 648 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.