Showing preview only (264K chars total). Download the full file or copy to clipboard to get everything.
Repository: kylebarron/parquet-wasm
Branch: main
Commit: d9e9e1f0762e
Files: 69
Total size: 248.2 KB
Directory structure:
gitextract_9fyd8eir/
├── .cargo/
│ └── config.toml
├── .github/
│ ├── dependabot.yml
│ └── workflows/
│ ├── docs-website.yml
│ ├── pr-manipulation.yml
│ └── test.yml
├── .gitignore
├── .vscode/
│ └── settings.json
├── .yarnrc.yml
├── CHANGELOG.md
├── Cargo.toml
├── DEVELOP.md
├── LICENSE_APACHE
├── LICENSE_MIT
├── README.md
├── bench/
│ ├── bench.ts
│ ├── make_data.py
│ └── pyproject.toml
├── package.json
├── scripts/
│ ├── build.sh
│ └── report_build.sh
├── src/
│ ├── common/
│ │ ├── fetch.rs
│ │ ├── mod.rs
│ │ ├── properties.rs
│ │ └── stream.rs
│ ├── error.rs
│ ├── lib.rs
│ ├── metadata.rs
│ ├── read_options.rs
│ ├── reader.rs
│ ├── reader_async.rs
│ ├── utils.rs
│ ├── wasm.rs
│ ├── writer.rs
│ ├── writer_async.rs
│ └── writer_properties.rs
├── templates/
│ └── package.json
├── tests/
│ ├── data/
│ │ ├── .python-version
│ │ ├── 1-partition-brotli.parquet
│ │ ├── 1-partition-gzip.parquet
│ │ ├── 1-partition-lz4.parquet
│ │ ├── 1-partition-none.parquet
│ │ ├── 1-partition-snappy.parquet
│ │ ├── 1-partition-zstd.parquet
│ │ ├── 2-partition-brotli.parquet
│ │ ├── 2-partition-gzip.parquet
│ │ ├── 2-partition-lz4.parquet
│ │ ├── 2-partition-none.parquet
│ │ ├── 2-partition-snappy.parquet
│ │ ├── 2-partition-zstd.parquet
│ │ ├── README.md
│ │ ├── data.arrow
│ │ ├── empty.parquet
│ │ ├── generate_data.py
│ │ ├── generate_geo_data.py
│ │ ├── naturalearth_cities_geoarrow.parquet
│ │ ├── naturalearth_cities_wkb.parquet
│ │ ├── pyproject.toml
│ │ ├── string_view.parquet
│ │ └── uv.lock
│ ├── js/
│ │ ├── ffi.test.ts
│ │ ├── geo-metadata.test.ts
│ │ ├── index.test.ts
│ │ ├── read-write.test.ts
│ │ ├── schema.test.ts
│ │ └── utils.ts
│ └── web.rs
├── tsconfig.docs.json
├── tsconfig.json
└── typedoc.json
================================================
FILE CONTENTS
================================================
================================================
FILE: .cargo/config.toml
================================================
# https://github.com/kylebarron/arrow-wasm/issues/8#issuecomment-2790469295
[target.wasm32-unknown-unknown]
rustflags = ['--cfg', 'getrandom_backend="wasm_js"']
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: cargo
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 10
groups:
arrow-rs:
patterns:
- "arrow"
- "parquet"
other:
patterns:
- "*"
exclude-patterns:
- "arrow"
- "parquet"
- package-ecosystem: npm
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 10
- package-ecosystem: github-actions
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 10
================================================
FILE: .github/workflows/docs-website.yml
================================================
name: Publish docs website
on:
push:
tags:
- "*"
jobs:
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- name: Install
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v5
with:
node-version: "20"
- name: Install JS dependencies
run: yarn
- name: Build bundles
run: yarn build
- name: Build docs
run: yarn docs:build
- name: Publish docs
run: |
yarn docs:publish
================================================
FILE: .github/workflows/pr-manipulation.yml
================================================
name: PR Comment Generation
on:
workflow_run:
workflows: ["Build and Test"]
types:
- completed
jobs:
comment_on_pr:
runs-on: ubuntu-latest
if: >
github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success'
steps:
- name: 'Download artifact'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
...context.repo,
run_id: ${{github.event.workflow_run.id }},
});
const matchArtifact = artifacts.data.artifacts.filter((artifact) => {
return artifact.name == "pr"
})[0];
const download = await github.rest.actions.downloadArtifact({
...context.repo,
artifact_id: matchArtifact.id,
archive_format: 'zip',
});
fs.writeFileSync('${{github.workspace}}/pr.zip', Buffer.from(download.data));
- run: unzip pr.zip
- name: 'Comment on PR'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const issueNumber = Number(fs.readFileSync('./NR'));
const summaryContent = fs.readFileSync('./step_summary.md', 'utf-8');
const existingCommentsOpts = github.rest.issues.listComments.endpoint.merge({
...context.repo, issue_number: issueNumber
});
const existingComments = await github.paginate(existingCommentsOpts);
const TAG = 'execution';
const tagPattern = `<!-- pr_asset_summary_comment "${TAG}" -->`;
const body = `${summaryContent}\n${tagPattern}`;
const preExistingComment = existingComments.find((comment) => comment.body?.includes(tagPattern));
if(preExistingComment) {
await github.rest.issues.updateComment({ ...context.repo, comment_id: preExistingComment.id, body });
} else {
await github.rest.issues.createComment({ ...context.repo, issue_number: issueNumber, body });
}
================================================
FILE: .github/workflows/test.yml
================================================
name: Build and Test
on:
push:
branches:
- main
pull_request:
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- name: Install
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- uses: Swatinem/rust-cache@v2
- run: wasm-pack build --dev --target nodejs
- run: wasm-pack test --node
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-unknown-unknown
- uses: Swatinem/rust-cache@v2
- run: cargo install cargo-all-features
- name: Check all combinations of features can build
run: cargo check-all-features -- --target wasm32-unknown-unknown
node-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- name: Install
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v5
with:
node-version: "20"
- name: Build bundle
run: yarn build:test
- name: Install dev dependencies
run: yarn
- name: Run Node tests
run: yarn test
fmt:
name: fmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt
- uses: Swatinem/rust-cache@v2
- name: Run
run: cargo fmt --all -- --check
clippy:
name: Clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
components: clippy
- uses: Swatinem/rust-cache@v2
- name: "clippy --all"
run: cargo clippy --all --features=full --tests -- -D warnings
node-build-report:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-unknown-unknown
- name: Install
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-node@v5
with:
node-version: "20"
- uses: awalsh128/cache-apt-pkgs-action@latest
with:
packages: brotli pv parallel jq
version: 1.0
- name: Build bundle
run: ./scripts/report_build.sh
- name: Size Reporting
run: |
ls report_pkg/*/*.wasm | parallel brotli -f -Z {}
mkdir -p ./pr
echo "| Asset | Size | Compressed Size |" >> ./pr/step_summary.md
echo "| ------ | ---- | --------------- |" >> ./pr/step_summary.md
for asset in $(ls report_pkg/*/*.wasm); do
export SIZE=$(stat --format '%s' $asset)
export COMPRESSED_SIZE=$(stat --format '%s' "${asset}.br")
export asset
echo "| ${asset} | $(echo $SIZE | numfmt --to=si --suffix="B") | $(echo $COMPRESSED_SIZE | numfmt --to=si --suffix="B") |" >> ./pr/step_summary.md
echo $(jq -n '{"asset": $ENV.asset, "size": $ENV.SIZE | tonumber, "compressed_size": $ENV.COMPRESSED_SIZE | tonumber}')
done | jq -s 'map({ (.asset|tostring): .}) | add' > ./pr/asset_manifest.json
echo ${{ github.event.number }} > ./pr/NR
if [[ "${{ github.event_type }}" != "pull_request" ]]; then
cat ./pr/step_summary.md > $GITHUB_STEP_SUMMARY
fi;
- uses: actions/upload-artifact@v5
with:
name: pr
path: pr/
delta_generation:
runs-on: ubuntu-latest
if: >
github.event_name == 'pull_request'
needs: node-build-report
steps:
- uses: actions/download-artifact@v6
with:
name: pr
path: pr/
- name: "Generate size deltas"
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const { execSync } = require('child_process');
const baseContext = {
repo: {
repo: '${{ github.event.pull_request.base.repo.name }}',
owner: '${{ github.event.pull_request.base.repo.owner.login }}'
}
};
const baseWorkflows = await github.rest.actions.listWorkflowRuns({
...baseContext.repo,
branch: '${{ github.event.pull_request.base.ref }}',
status: 'success',
workflow_id: 'test.yml',
});
const matchWorkflow = baseWorkflows.data?.workflow_runs?.[0];
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
...baseContext.repo,
run_id: matchWorkflow?.id,
});
const matchArtifact = artifacts.data.artifacts.filter((artifact) => {
return artifact.name == "pr"
})[0];
if(matchArtifact) {
const download = await github.rest.actions.downloadArtifact({
...baseContext.repo,
artifact_id: matchArtifact.id,
archive_format: 'zip',
});
fs.writeFileSync('${{github.workspace}}/base.zip', Buffer.from(download.data));
execSync(`unzip -p base.zip asset_manifest.json >base_asset_manifest.json || true`);
}
// now, read in the asset manifests, for the head and base
let baseAssets = {};
try {
baseAssets = JSON.parse(fs.readFileSync('./base_asset_manifest.json')) ?? {};
} catch (error) {
console.log('No base asset manifest found');
}
const assets = JSON.parse(fs.readFileSync('./pr/asset_manifest.json'));
const unitOptions = {
style: 'unit', unit: 'byte', unitDisplay: 'narrow', notation: 'compact',
maximumSignificantDigits: 3
};
const formatter = new Intl.NumberFormat('en-US', unitOptions);
const signedFormatter = new Intl.NumberFormat('en-US', { ...unitOptions, signDisplay: 'always' });
const percentFormatter = Intl.NumberFormat('en-US', { style: 'percent', signDisplay: 'always' });
const colorMap = {
'-1': 'green',
1: 'red',
0: 'black',
NaN: 'black'
};
// compute deltas and output markdown fragments
const lineFragments = Object.entries(assets).map(([k, v]) => {
const baseAsset = baseAssets[k] ?? {};
const { asset, size, compressed_size, size_delta, compressed_size_delta } = {
...v,
...Object.fromEntries(['size', 'compressed_size'].map(subK => {
// compute the percentage change, NaN if the asset wasn't available
const proportionalDelta = v?.[subK] / baseAsset?.[subK] - 1;
const absoluteDelta = v?.[subK] - baseAsset?.[subK]
const sign = Math.sign(proportionalDelta);
// conditionally color the output via an inline latex block
let fragment = '';
if(Number.isFinite(proportionalDelta)) {
fragment = `${signedFormatter.format(absoluteDelta)} ${percentFormatter.format(proportionalDelta)}`;
} else {
fragment = 'N/A';
}
if(!Number.isFinite(proportionalDelta) || sign === 0) {
return [`${subK}_delta`, fragment]
} else {
const formattedFragment = `$\\color{${colorMap[sign]}}\\textbf{${fragment.replace('%', '\\%')}}$`;
return [`${subK}_delta`, formattedFragment]
}
}))
};
// output a markdown fragment
const sizeFragment = `${formatter.format(size)} ${size_delta}`
const compressedFragment = `${formatter.format(compressed_size)} ${compressed_size_delta}`
return [asset.replace('report_pkg/', ''), sizeFragment, compressedFragment]
});
await core.summary.addHeading('Asset Sizes').addTable([
[{data: 'Asset', header: true}, {data: 'Uncompressed Size', header: true}, {data: 'Compressed Size', header: true}],
...lineFragments
]).write();
fs.cpSync(process.env.GITHUB_STEP_SUMMARY, './pr/step_summary.md')
# - uses: actions/upload-artifact@v5
# with:
# name: pr
# path: pr/
================================================
FILE: .gitignore
================================================
*.fgb
.DS_Store
*.parquet
node_modules
/target
**/*.rs.bk
pkg/
wasm-pack.log
.idea/
www/data
www/data/
data/
!tests/data/
.yarn
# Typedoc output
docs_build/
.venv/
.env
.envrc
================================================
FILE: .vscode/settings.json
================================================
{
// "rust-analyzer.cargo.target": "wasm32-unknown-unknown",
"rust-analyzer.cargo.features": "all"
}
================================================
FILE: .yarnrc.yml
================================================
nodeLinker: node-modules
================================================
FILE: CHANGELOG.md
================================================
# Changelog
## [0.7.1] - 2025-09-17
### What's Changed
- ci: Bump node version in docs publish CI by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/792
- Readme edit for tl;dr in hyparquet comparison by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/793
- Fix handling of `rowGroups` option in `ParquetFile.read` by @quidquid in https://github.com/kylebarron/parquet-wasm/pull/802
### New Contributors
- @quidquid made their first contribution in https://github.com/kylebarron/parquet-wasm/pull/802
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.7.0...v0.7.1
## [0.7.0] - 2025-09-17
### What's Changed
- Streaming Writes implementation by @H-Plus-Time in https://github.com/kylebarron/parquet-wasm/pull/305
- Expose schema on ParquetFile by @H-Plus-Time in https://github.com/kylebarron/parquet-wasm/pull/599
- Small doc fixes by @severo in https://github.com/kylebarron/parquet-wasm/pull/762
- Bump to arrow/parquet 56 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/783
- feat: Support reading Parquet data with embedded Arrow schema containing string view/binary view types by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/786
- docs: Add section to docs about hyparquet comparison by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/790
### New Contributors
- @severo made their first contribution in https://github.com/kylebarron/parquet-wasm/pull/762
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.6.1...v0.7.0
## [0.6.1] - 2024-05-04
### What's Changed
- Use Blob instead of File for input in `ParquetFile.fromFile`.
- Export wasm paths from `package.json`.
## [0.6.0] - 2024-04-21
### New! :sparkles:
- Class-based API + concurrent streams + column selections + File reader by @H-Plus-Time in https://github.com/kylebarron/parquet-wasm/pull/407. This added a new `ParquetFile` API for working with files at remote URLs without downloading them first.
- Conditional exports in `package.json`. This should make it easier to use across Node and browser.
- Improved documentation for how to use different entry points.
### Breaking Changes:
- arrow2 and parquet2-based implementation has been removed.
- Layout of files has changed. Your import may need to change.
- Imports are now `parquet-wasm`, `parquet-wasm/esm`, `parquet-wasm/bundler`, and `parquet-wasm/node`.
### What's Changed
- Add conditional exports by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/382
- CI production build size summary by @H-Plus-Time in https://github.com/kylebarron/parquet-wasm/pull/401
- Remove arrow2 implementation by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/446
- feat: add lz4_raw support for `arrow1` by @fspoettel in https://github.com/kylebarron/parquet-wasm/pull/466
- Highlight that esm entry point needs await of default export by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/487
- Fixes for both report builds and PR comment workflow by @H-Plus-Time in https://github.com/kylebarron/parquet-wasm/pull/495
- fix package exports by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/414
- Object store wasm usage by @H-Plus-Time in https://github.com/kylebarron/parquet-wasm/pull/490
- Set Parquet key-value metadata by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/503
- Read parquet with options by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/506
- Documentation updates for 0.6 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/507
- Avoid bigint for metadata queries by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/508
- Update async API by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/510
- Add test to read empty file by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/512
- bump arrow libraries to version 51 by @jdoig in https://github.com/kylebarron/parquet-wasm/pull/496
### New Contributors
- @fspoettel made their first contribution in https://github.com/kylebarron/parquet-wasm/pull/466
- @jdoig made their first contribution in https://github.com/kylebarron/parquet-wasm/pull/496
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.5.0...v0.6.0
## [0.5.0] - 2023-10-21
### What's Changed
- Switch to an API based on table abstractions from [arrow-wasm](https://github.com/kylebarron/arrow-wasm).
- Update docs
- Initial implementation of reading to a stream of Arrow batches. By @H-Plus-Time in https://github.com/kylebarron/parquet-wasm/pull/296
### New Contributors
- @H-Plus-Time made their first contribution in https://github.com/kylebarron/parquet-wasm/pull/296
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.4.0...v0.5.0
## [0.4.0] - 2023-08-15
### What's Changed
- Async reader support in the arrow2 bindings
- Improved memory usage via `readParquetFFI` in conjunction with `arrow-js-ffi`.
- Remove "2" from function names in arrow2 api by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/173
- Make arrow2 the default bundle by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/174
- Add bindings for arrow2 metadata (without serde support) by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/153
- Add lz4_raw and zstd compressions for parquet2 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/114
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.3.1...v0.4.0
## [0.4.0-beta.1] - 2022-08-08
### What's Changed
- Add lz4_raw and zstd compressions for parquet2 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/114
- Simplify cargo features by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/117
- Add vscode rust-analyzer target setting by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/131
- add msrv by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/132
- pin clap to 3.1.\* by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/139
- Make writerProperties optional in JS api by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/152
- Add bindings for arrow2 metadata (without serde support) by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/153
- Async reader by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/96
- Cleaner error handling by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/157
- implement `From` instead of custom methods by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/168
- Remove "2" from function names in arrow2 api by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/173
- Make arrow2 the default bundle by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/174
- Improved documentation for async reading by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/175
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.3.1...v0.4.0-beta.1
## [0.3.1] - 2022-04-26
### What's Changed
- Bump arrow from 11.0.0 to 11.1.0 by @dependabot in https://github.com/kylebarron/parquet-wasm/pull/77
- Update lockfile by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/76
- Add clippy by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/78
- Remove old debug script by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/79
- Bump clap from 3.1.8 to 3.1.9 by @dependabot in https://github.com/kylebarron/parquet-wasm/pull/87
- Check that input exists/is a uint8array by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/102
- Update test files to those written by pyarrow v7 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/103
- Update to arrow and parquet 12.0 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/105
- Bump clap from 3.1.9 to 3.1.12 by @dependabot in https://github.com/kylebarron/parquet-wasm/pull/98
- Create arrow1/arrow2 read benchmarks by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/82
- Publish docs on tag by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/106
- Update readme by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/107
- Add published examples section to readme by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/108
- Unify build script by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/109
- esm2 entrypoint with no import.meta.url by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/110
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.3.0...v0.3.1
## [0.3.0] - 2022-04-04
### What's Changed
- Debug cli by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/64
- Bump to arrow 11.0 to support zstd compression by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/66
- Update bundling by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/67
- Add dependabot by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/70
- Bump clap from 3.1.6 to 3.1.8 by @dependabot in https://github.com/kylebarron/parquet-wasm/pull/71
- Bump getrandom from 0.2.5 to 0.2.6 by @dependabot in https://github.com/kylebarron/parquet-wasm/pull/72
### New Contributors
- @dependabot made their first contribution in https://github.com/kylebarron/parquet-wasm/pull/71
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.2.0...v0.3.0
## [0.2.0] - 2022-03-17
- Restore arrow-rs support by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/21
- Write parquet with arrow1 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/23
- Refactor code into lower-level functions, use `?` operator by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/25
- Make record batch size the nrows of the first row group by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/26
- Rename arrow-rs api as default by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/31
- Implement writerPropertiesBuilder for arrow1 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/30
- Refactor into modules by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/32
- Update bundling to create arrow2 entrypoints by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/33
- Node testing setup by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/34
- Helper to copy vec<u8> to Uint8Array by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/38
- Faster builds on Node CI tests by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/39
- Rust CI caching by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/40
- ZSTD mac instructions in readme by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/42
- Keep opt-level = s and remove `console_error_panic_hook` by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/48
- WriterPropertiesBuilder for arrow2 by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/49
- Docstrings for public functions, structs, enums by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/50
- Compression-specific features by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/51
- Add more node tests by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/52
- Separate reader and writer features by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/47
- Docs update by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/53
- Working typedoc by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/55
- Update docstrings and readme by @kylebarron in https://github.com/kylebarron/parquet-wasm/pull/60
**Full Changelog**: https://github.com/kylebarron/parquet-wasm/compare/v0.1.1...v0.2.0
## [0.1.1] - 2022-03-06
- Attempt better bundling, with APIs for bundlers, Node, and the Web.
## [0.1.0] - 2022-03-06
- Initial release
- Barebones `read_parquet` and `write_parquet` functions.
================================================
FILE: Cargo.toml
================================================
[package]
name = "parquet-wasm"
version = "0.7.1"
authors = ["Kyle Barron <kylebarron2@gmail.com>"]
edition = "2024"
description = "WebAssembly Parquet reader and writer."
readme = "README.md"
repository = "https://github.com/kylebarron/parquet-wasm"
license = "MIT OR Apache-2.0"
keywords = ["parquet", "webassembly", "arrow"]
categories = ["wasm"]
rust-version = "1.85"
[lib]
crate-type = ["cdylib", "rlib"]
[features]
default = ["all_compressions", "reader", "writer", "async"]
reader = []
writer = []
async = [
"dep:wasm-bindgen-futures",
"dep:futures",
"dep:range-reader",
"dep:reqwest",
"dep:wasm-streams",
"dep:async-compat",
"dep:async-stream",
"parquet/async",
]
debug = ["console_error_panic_hook"]
brotli = ["parquet/brotli"]
gzip = ["parquet/flate2", "parquet/flate2-zlib-rs"]
snappy = ["parquet/snap"]
zstd = ["parquet/zstd", "dep:zstd", "zstd-sys"]
lz4 = ["parquet/lz4"]
all_compressions = ["brotli", "gzip", "snappy", "zstd", "lz4"]
# Full list of available features
full = ["async", "debug", "all_compressions", "reader", "writer"]
[dependencies]
wasm-bindgen = { version = "0.2.95", features = ["serde-serialize"] }
serde = "1.0.225"
serde-wasm-bindgen = { version = "0.6.5" }
# The `console_error_panic_hook` crate provides better debugging of panics by
# logging them with `console.error`. This is great for development, but requires
# all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
# code size when deploying.
console_error_panic_hook = { version = "0.1.6", optional = true }
# `wee_alloc` is a tiny allocator for wasm that is only ~1K in code size
# compared to the default allocator's ~10K. It is slower than the default
# allocator, however.
# if wee_alloc only saves 10KB, might not be worth the slower allocation speed?
# wee_alloc = "0.4.5"
js-sys = "0.3.72"
thiserror = "2.0"
arrow-wasm = { git = "https://github.com/kylebarron/arrow-wasm", rev = "6da94ef0a1522a244984a7d3d58a0339d0851d96", default-features = false, features = [
"table",
"record_batch",
"schema",
] }
arrow = { version = "56.1", default-features = false, features = [
"ipc",
"ffi",
] }
arrow-schema = "56.1"
parquet = { version = "56.1", default-features = false, features = [
"arrow",
"base64",
] }
bytes = "1"
# We only bring this in for coalesce_ranges
object_store = { version = "0.12", default-features = false }
wasm-bindgen-futures = { version = "0.4.45", optional = true }
futures = { version = "0.3", optional = true }
range-reader = { version = "0.2", optional = true }
reqwest = { version = "0.12.23", optional = true, default-features = false }
# Pass "wasm" and "thin" down to the transitive zstd dependency
zstd = { version = "*", features = [
"wasm",
"thin",
], default-features = false, optional = true }
zstd-sys = { version = "=2.0.9", optional = true, default-features = false }
# 0.2.3 crashes the Node tests. See
# https://github.com/kylebarron/parquet-wasm/pull/496#issuecomment-2057374608
async-compat = { version = "=0.2.2", optional = true }
async-stream = { version = "0.3.6", optional = true }
wasm-streams = { version = "0.4.2", optional = true }
async-trait = "0.1.89"
url = "2.5.7"
# https://github.com/kylebarron/arrow-wasm/issues/8#issuecomment-2790469295
[dependencies.getrandom_v03]
package = "getrandom"
version = "0.3"
features = ["wasm_js"]
# https://github.com/kylebarron/arrow-wasm/issues/8#issuecomment-2790469295
[dependencies.getrandom_v02]
package = "getrandom"
version = "0.2"
features = ["js"]
[dependencies.web-sys]
version = "0.3.72"
features = [
'console',
'Headers',
'Request',
'RequestInit',
'RequestMode',
'Response',
'Window',
"Document",
"Element",
"File",
]
[dev-dependencies]
wasm-bindgen-test = "0.3.51"
[package.metadata.cargo-all-features]
# If your crate has a large number of optional dependencies, skip them for speed
skip_optional_dependencies = true
# Exclude certain features from the build matrix
denylist = [
"full",
"all_compressions",
"default",
"brotli",
"gzip",
"snappy",
"zstd",
"lz4",
]
[profile.release]
# Tell `rustc` to optimize for small code size.
# As of 3/15/22, opt-level = s was smallest
# https://github.com/kylebarron/parquet-wasm/pull/48
opt-level = "s"
lto = true
================================================
FILE: DEVELOP.md
================================================
# Development
- Install [wasm-pack](https://rustwasm.github.io/wasm-pack/)
- Compile: `wasm-pack build`, or change targets, e.g. `wasm-pack build --target nodejs`
- Publish `wasm-pack publish`.
### MacOS
Some steps may need a specific configuration if run on MacOS. Specifically, the default `clang` shipped with Macs (as of March 2022) doesn't have WebAssembly compilation supported out of the box. To build ZSTD, you may need to install a later version via Homebrew and update your paths to find the correct executables.
```
brew install llvm
export PATH="/usr/local/opt/llvm/bin/:$PATH"
export CC=/usr/local/opt/llvm/bin/clang
export AR=/usr/local/opt/llvm/bin/llvm-ar
```
Note that homebrew paths are different on an Apple ARM-based Mac:
```
brew install llvm
export PATH="/opt/homebrew/opt/llvm/bin/:$PATH"
export CC=/opt/homebrew/opt/llvm/bin/clang
export AR=/opt/homebrew/opt/llvm/bin/llvm-ar
```
See [this description](https://github.com/kylebarron/parquet-wasm/pull/2#issue-1159174043) and its references for more info.
## Publishing
`wasm-pack` supports [three different targets](https://rustwasm.github.io/docs/wasm-pack/commands/build.html#target):
- `bundler` (used with bundlers like Webpack)
- `nodejs` (used with Node, supports `require`)
- `web` (used as an ES module directly from the web)
There are good reasons to distribute as any of these... so why not distribute as all three? `wasm-pack` doesn't support this directly but the build script in `scripts/build.sh` calls `wasm-pack` three times and merges the outputs. This means that bundler users can use the default, Node users can use `parquet-wasm/node` and ES Modules users can use `parquet-wasm/web` in their imports.
To publish:
```
yarn build
wasm-pack publish
```
================================================
FILE: LICENSE_APACHE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
================================================
FILE: LICENSE_MIT
================================================
Copyright (c) 2022 Kyle Barron
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
================================================
FILE: README.md
================================================
# WASM Parquet [](https://www.npmjs.com/package/parquet-wasm)
WebAssembly bindings to read and write the [Apache Parquet](https://parquet.apache.org/) format to and from [Apache Arrow](https://arrow.apache.org/) using the Rust [`parquet`](https://crates.io/crates/parquet) and [`arrow`](https://crates.io/crates/arrow) crates.
This is designed to be used alongside a JavaScript Arrow implementation, such as the canonical [JS Arrow library](https://arrow.apache.org/docs/js/).
Including read and write support and all compression codecs, the brotli-compressed WASM bundle is 1.2 MB. Refer to [custom builds](#custom-builds) for how to build a smaller bundle. A minimal read-only bundle without compression support can be as small as 456 KB brotli-compressed.
## Install
`parquet-wasm` is published to NPM. Install with
```
yarn add parquet-wasm
```
or
```
npm install parquet-wasm
```
## API
Parquet-wasm has both a synchronous and asynchronous API. The sync API is simpler but requires fetching the entire Parquet buffer in advance, which is often prohibitive.
### Sync API
Refer to these functions:
- [`readParquet`](https://kylebarron.dev/parquet-wasm/functions/esm_parquet_wasm.readParquet.html): Read a Parquet file synchronously.
- [`readSchema`](https://kylebarron.dev/parquet-wasm/functions/esm_parquet_wasm.readSchema.html): Read an Arrow schema from a Parquet file synchronously.
- [`writeParquet`](https://kylebarron.dev/parquet-wasm/functions/esm_parquet_wasm.writeParquet.html): Write a Parquet file synchronously.
### Async API
- [`readParquetStream`](https://kylebarron.dev/parquet-wasm/functions/esm_parquet_wasm.readParquetStream.html): Create a [ReadableStream](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) that emits Arrow RecordBatches from a Parquet file.
- [`ParquetFile`](https://kylebarron.dev/parquet-wasm/classes/esm_parquet_wasm.ParquetFile.html): A class for reading portions of a remote Parquet file. Use [`fromUrl`](https://kylebarron.dev/parquet-wasm/classes/esm_parquet_wasm.ParquetFile.html#fromUrl) to construct from a remote URL or [`fromFile`](https://kylebarron.dev/parquet-wasm/classes/esm_parquet_wasm.ParquetFile.html#fromFile) to construct from a [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File) handle. Note that when you're done using this class, you'll need to call [`free`](https://kylebarron.dev/parquet-wasm/classes/esm_parquet_wasm.ParquetFile.html#free) to release any memory held by the ParquetFile instance itself.
Both sync and async functions return or accept a [`Table`](https://kylebarron.dev/parquet-wasm/classes/bundler_parquet_wasm.Table.html) class, an Arrow table in WebAssembly memory. Refer to its documentation for moving data into/out of WebAssembly.
## Entry Points
| Entry point | Description | Documentation |
| ------------------------------------------------------------------------- | ------------------------------------------------------- | -------------------- |
| `parquet-wasm`, `parquet-wasm/esm`, or `parquet-wasm/esm/parquet_wasm.js` | ESM, to be used directly from the Web as an ES Module | [Link][esm-docs] |
| `parquet-wasm/bundler` | "Bundler" build, to be used in bundlers such as Webpack | [Link][bundler-docs] |
| `parquet-wasm/node` | Node build, to be used with synchronous `require` in NodeJS | [Link][node-docs] |
[bundler-docs]: https://kylebarron.dev/parquet-wasm/modules/bundler_parquet_wasm.html
[node-docs]: https://kylebarron.dev/parquet-wasm/modules/node_parquet_wasm.html
[esm-docs]: https://kylebarron.dev/parquet-wasm/modules/esm_parquet_wasm.html
### ESM
The `esm` entry point is the primary entry point. It is the default export from `parquet-wasm`, and is also accessible at `parquet-wasm/esm` and `parquet-wasm/esm/parquet_wasm.js` (for symmetric imports [directly from a browser](#using-directly-from-a-browser)).
**Note that when using the `esm` bundles, you must manually initialize the WebAssembly module before using any APIs**. Otherwise, you'll get an error `TypeError: Cannot read properties of undefined`. There are multiple ways to initialize the WebAssembly code:
#### Asynchronous initialization
The primary way to initialize is by awaiting the default export.
```js
import wasmInit, {readParquet} from "parquet-wasm";
await wasmInit();
```
Without any parameter, this will try to fetch a file named `'parquet_wasm_bg.wasm'` at the same location as `parquet-wasm`. (E.g. this snippet `input = new URL('parquet_wasm_bg.wasm', import.meta.url);`).
Note that you can also pass in a custom URL if you want to host the `.wasm` file on your own servers.
```js
import wasmInit, {readParquet} from "parquet-wasm";
// Update this version to match the version you're using.
const wasmUrl = "https://cdn.jsdelivr.net/npm/parquet-wasm@0.6.1/esm/parquet_wasm_bg.wasm";
await wasmInit(wasmUrl);
```
#### Synchronous initialization
The `initSync` named export allows for
```js
import {initSync, readParquet} from "parquet-wasm";
// The contents of esm/parquet_wasm_bg.wasm in an ArrayBuffer
const wasmBuffer = new ArrayBuffer(...);
// Initialize the Wasm synchronously
initSync(wasmBuffer)
```
Async initialization should be preferred over downloading the Wasm buffer and then initializing it synchronously, as [`WebAssembly.instantiateStreaming`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/instantiateStreaming_static) is the most efficient way to both download and initialize Wasm code.
### Bundler
The `bundler` entry point doesn't require manual initialization of the WebAssembly blob, but needs setup with whatever bundler you're using. [Refer to the Rust Wasm documentation for more info](https://rustwasm.github.io/docs/wasm-bindgen/reference/deployment.html#bundlers).
### Node
The `node` entry point can be loaded synchronously from Node.
```js
const {readParquet} = require("parquet-wasm");
const wasmTable = readParquet(...);
```
### Using directly from a browser
You can load the `esm/parquet_wasm.js` file directly from a CDN
```js
const parquet = await import(
"https://cdn.jsdelivr.net/npm/parquet-wasm@0.6.1/esm/+esm"
)
await parquet.default();
const wasmTable = parquet.readParquet(...);
```
This specific endpoint will minify the ESM before you receive it.
### Debug functions
These functions are not present in normal builds to cut down on bundle size. To create a custom build, see [Custom Builds](#custom-builds) below.
#### `setPanicHook`
`setPanicHook(): void`
Sets [`console_error_panic_hook`](https://github.com/rustwasm/console_error_panic_hook) in Rust, which provides better debugging of panics by having more informative `console.error` messages. Initialize this first if you're getting errors such as `RuntimeError: Unreachable executed`.
The WASM bundle must be compiled with the `console_error_panic_hook` feature for this function to exist.
## Example
```js
import * as arrow from "apache-arrow";
import initWasm, {
Compression,
readParquet,
Table,
writeParquet,
WriterPropertiesBuilder,
} from "parquet-wasm";
// Instantiate the WebAssembly context
await initWasm();
// Create Arrow Table in JS
const LENGTH = 2000;
const rainAmounts = Float32Array.from({ length: LENGTH }, () =>
Number((Math.random() * 20).toFixed(1))
);
const rainDates = Array.from(
{ length: LENGTH },
(_, i) => new Date(Date.now() - 1000 * 60 * 60 * 24 * i)
);
const rainfall = arrow.tableFromArrays({
precipitation: rainAmounts,
date: rainDates,
});
// Write Arrow Table to Parquet
// wasmTable is an Arrow table in WebAssembly memory
const wasmTable = Table.fromIPCStream(arrow.tableToIPC(rainfall, "stream"));
const writerProperties = new WriterPropertiesBuilder()
.setCompression(Compression.ZSTD)
.build();
const parquetUint8Array = writeParquet(wasmTable, writerProperties);
// Read Parquet buffer back to Arrow Table
// arrowWasmTable is an Arrow table in WebAssembly memory
const arrowWasmTable = readParquet(parquetUint8Array);
// table is now an Arrow table in JS memory
const table = arrow.tableFromIPC(arrowWasmTable.intoIPCStream());
console.log(table.schema.toString());
// Schema<{ 0: precipitation: Float32, 1: date: Date64<MILLISECOND> }>
```
### Published examples
(These may use older versions of the library with a different API).
- [GeoParquet on the Web (Observable)](https://observablehq.com/@kylebarron/geoparquet-on-the-web)
- [Hello, Parquet-WASM (Observable)](https://observablehq.com/@bmschmidt/hello-parquet-wasm)
## Comparison to [`hyparquet`](https://github.com/hyparam/hyparquet)
`hyparquet` is another Parquet reader for JavaScript. That project is written in pure JavaScript and has subtly different goals and comparing it to `parquet-wasm` provides benefits and costs.
**tl;dr**: if you can use [Arrow](https://arrow.apache.org/) in your app, use `parquet-wasm` for better performance and memory usage. Otherwise, or if you are latency-focused, hyparquet could be better for you.
### Advantages of `hyparquet`:
- Smaller bundle size
- Pure JS, so easier to debug
- No WebAssembly, so no initialization step
- No WebAssembly, so no separate memory space.
### Advantages of `parquet-wasm`:
- Faster for large files, as it uses a very high-performance Rust Parquet library compiled to WebAssembly
- Faster and more memory efficient because it loads data into Apache Arrow, a high-performance binary memory format. In comparison, hyparquet loads data to JS objects, which are _much_ less memory efficient than Arrow buffers. This is especially true for large files.
- Even though `parquet-wasm` has a larger bundle size, the bandwidth savings of loading large amounts of Parquet can quickly make up for that overhead.
### Conclusion
- If you only need to load the _metadata_ of Parquet files, or if you have _very small_ Parquet files, using hyparquet could be a good choice as hyparquet is smaller, and thus the overhead before loading the file could be smaller.
- If you need the _absolute smallest_ bundle size, hyparquet may be better for your use case.
- Otherwise, since `parquet-wasm`:
1. Uses a really high performance Rust library
2. Is running in WebAssembly, and
3. Converts to a high-performance binary memory format
If you have large files and can use the resulting Arrow data directly without converting to JS objects, `parquet-wasm` should be significantly faster and more memory efficient.
Feel free to open an issue to discuss more!
## Performance considerations
Tl;dr: When you have a `Table` object (resulting from `readParquet`), try the new
[`Table.intoFFI`](https://kylebarron.dev/parquet-wasm/classes/esm_parquet_wasm.Table.html#intoFFI)
API to move it to JavaScript memory. This API is less well tested than the [`Table.intoIPCStream`](https://kylebarron.dev/parquet-wasm/classes/esm_parquet_wasm.Table.html#intoIPCStream) API, but should be
faster and have **much** less memory overhead (by a factor of 2). If you hit any bugs, please
[create a reproducible issue](https://github.com/kylebarron/parquet-wasm/issues/new).
Under the hood, `parquet-wasm` first decodes a Parquet file into Arrow _in WebAssembly memory_. But
then that WebAssembly memory needs to be copied into JavaScript for use by Arrow JS. The "normal"
conversion APIs (e.g. `Table.intoIPCStream`) use the [Arrow IPC
format](https://arrow.apache.org/docs/python/ipc.html) to get the data back to JavaScript. But this
requires another memory copy _inside WebAssembly_ to assemble the various arrays into a single
buffer to be copied back to JS.
Instead, the new `Table.intoFFI` API uses Arrow's [C Data
Interface](https://arrow.apache.org/docs/format/CDataInterface.html) to be able to copy or view
Arrow arrays from within WebAssembly memory without any serialization.
Note that this approach uses the [`arrow-js-ffi`](https://github.com/kylebarron/arrow-js-ffi)
library to parse the Arrow C Data Interface definitions. This library has not yet been tested in
production, so it may have bugs!
I wrote an [interactive blog
post](https://observablehq.com/@kylebarron/zero-copy-apache-arrow-with-webassembly) on this approach
and the Arrow C Data Interface if you want to read more!
### Example
```js
import * as arrow from "apache-arrow";
import { parseTable } from "arrow-js-ffi";
import initWasm, { wasmMemory, readParquet } from "parquet-wasm";
// Instantiate the WebAssembly context
await initWasm();
// A reference to the WebAssembly memory object.
const WASM_MEMORY = wasmMemory();
const resp = await fetch("https://example.com/file.parquet");
const parquetUint8Array = new Uint8Array(await resp.arrayBuffer());
const wasmArrowTable = readParquet(parquetUint8Array).intoFFI();
// Arrow JS table that was directly copied from Wasm memory
const table: arrow.Table = parseTable(
WASM_MEMORY.buffer,
wasmArrowTable.arrayAddrs(),
wasmArrowTable.schemaAddr()
);
// VERY IMPORTANT! You must call `drop` on the Wasm table object when you're done using it
// to release the Wasm memory.
// Note that any access to the pointers in this table is undefined behavior after this call.
// Calling any `wasmArrowTable` method will error.
wasmArrowTable.drop();
```
## Compression support
The Parquet specification permits several compression codecs. This library currently supports:
- [x] Uncompressed
- [x] Snappy
- [x] Gzip
- [x] Brotli
- [x] ZSTD
- [x] LZ4_RAW
- [ ] LZ4 (deprecated)
LZ4 support in Parquet is a bit messy. As described [here](https://github.com/apache/parquet-format/blob/54e53e5d7794d383529dd30746378f19a12afd58/Compression.md), there are _two_ LZ4 compression options in Parquet (as of version 2.9.0). The original version `LZ4` is now deprecated; it used an undocumented framing scheme which made interoperability difficult. The specification now reads:
> It is strongly suggested that implementors of Parquet writers deprecate this compression codec in their user-facing APIs, and advise users to switch to the newer, interoperable `LZ4_RAW` codec.
It's currently unknown how widespread the ecosystem support is for `LZ4_RAW`. As of `pyarrow` v7, it now writes `LZ4_RAW` by default and presumably has read support for it as well.
## Custom builds
In some cases, you may know ahead of time that your Parquet files will only include a single compression codec, say Snappy, or even no compression at all. In these cases, you may want to create a custom build of `parquet-wasm` to keep bundle size at a minimum. If you install the Rust toolchain and `wasm-pack` (see [Development](DEVELOP.md)), you can create a custom build with only the compression codecs you require.
The minimum supported Rust version in this project is 1.60. To upgrade your toolchain, use `rustup update stable`.
### Example custom builds
Reader-only bundle with Snappy compression:
```
wasm-pack build --no-default-features --features snappy --features reader
```
Writer-only bundle with no compression support, targeting Node:
```
wasm-pack build --target nodejs --no-default-features --features writer
```
Bundle with reader and writer support, targeting Node, using `arrow` and `parquet` crates with all their supported compressions, with `console_error_panic_hook` enabled:
```bash
wasm-pack build \
--target nodejs \
--no-default-features \
--features reader \
--features writer \
--features all_compressions \
--features debug
# Or, given the fact that the default feature includes several of these features, a shorter version:
wasm-pack build --target nodejs --features debug
```
Refer to the [`wasm-pack` documentation](https://rustwasm.github.io/docs/wasm-pack/commands/build.html) for more info on flags such as `--release`, `--dev`, `target`, and to the [Cargo documentation](https://doc.rust-lang.org/cargo/reference/features.html) for more info on how to use features.
### Available features
By default, `all_compressions`, `reader`, `writer`, and `async` features are enabled. Use `--no-default-features` to remove these defaults.
- `reader`: Activate read support.
- `writer`: Activate write support.
- `async`: Activate asynchronous read support.
- `all_compressions`: Activate all supported compressions.
- `brotli`: Activate Brotli compression.
- `gzip`: Activate Gzip compression.
- `snappy`: Activate Snappy compression.
- `zstd`: Activate ZSTD compression.
- `lz4`: Activate LZ4_RAW compression.
- `debug`: Expose the `setPanicHook` function for better error messages for Rust panics.
## Node <20
On Node versions before 20, you'll have to [polyfill the Web Cryptography API](https://docs.rs/getrandom/latest/getrandom/#nodejs-es-module-support).
## Future work
- [ ] Example of pushdown predicate filtering, to download only chunks that match a specific condition
- [ ] Column filtering, to download only certain columns
- [ ] More tests
## Acknowledgements
A starting point of my work came from @my-liminal-space's [`read-parquet-browser`](https://github.com/my-liminal-space/read-parquet-browser) (which is also dual licensed MIT and Apache 2).
@domoritz's [`arrow-wasm`](https://github.com/domoritz/arrow-wasm) was a very helpful reference for bootstrapping Rust-WASM bindings.
================================================
FILE: bench/bench.ts
================================================
import b from "benny";
import * as parquet from "../pkg/node";
import { readFileSync } from "fs";
const dataDir = `${__dirname}/data`;
// https://stackoverflow.com/a/43053803
const cartesian = (...a) =>
a.reduce((a, b) => a.flatMap((d) => b.map((e) => [d, e].flat())));
const partitions = [1, 5, 20];
const compressions = ["brotli", "gzip", "none", "snappy"];
const testCases: [number, string][] = cartesian(partitions, compressions);
const createReadTests = () =>
testCases.map(([partitions, compression, api]) => {
const file = `${partitions}-partition-${compression}`;
const testName = `${api} ${file}`;
return b.add(testName, () => {
const arr = loadFile(file);
return () => parquet.readParquet2(arr);
});
});
function loadFile(name: string): Uint8Array {
const dataPath = `${dataDir}/${name}.parquet`;
return new Uint8Array(readFileSync(dataPath));
}
b.suite(
"Read Parquet",
...createReadTests(),
b.cycle(),
b.configure({ minDisplayPrecision: 2 }),
b.complete(),
b.save({
file: "bench",
folder: "bench/results/",
version: "0.3.0",
details: true,
format: "chart.html",
})
);
================================================
FILE: bench/make_data.py
================================================
from pathlib import Path
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
compressions = ["SNAPPY", "GZIP", "BROTLI", "ZSTD", "NONE"]
def create_table(n_rows=1_000_000):
data = {}
for dtype in ["uint8", "uint16", "uint32"]:
data[dtype] = pa.array(np.random.randint(0, np.iinfo(dtype).max, size=n_rows))
data["bool"] = pa.array(np.random.randint(0, 2, size=n_rows), type=pa.bool_())
# Todo column with string data?
# https://stackoverflow.com/a/2257449
return pa.table(data)
def write_table(table):
# Create data directory
Path("data").mkdir(exist_ok=True)
data_len = len(table)
for n_partitions in [1, 5, 20]:
for compression in compressions:
row_group_size = data_len / n_partitions
compression_text = str(compression).lower()
fname = f"data/{n_partitions}-partition-{compression_text}.parquet"
pq.write_table(
table, fname, row_group_size=row_group_size, compression=compression
)
def main():
table = create_table()
write_table(table)
if __name__ == "__main__":
main()
================================================
FILE: bench/pyproject.toml
================================================
[tool.poetry]
name = "parquet-wasm-bench"
version = "0.1.0"
description = "Create data for parquet-wasm benchmarks"
authors = ["Kyle Barron <kylebarron2@gmail.com>"]
license = "MIT"
[tool.poetry.dependencies]
python = "^3.8"
numpy = "^1.22.3"
pyarrow = "^7.0.0"
pandas = "^1.4.2"
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
================================================
FILE: package.json
================================================
{
"scripts": {
"build": "bash ./scripts/build.sh",
"build:test": "ENV='DEV' yarn build",
"docs:build": "typedoc",
"docs:publish": "gh-pages -d docs_build",
"docs:serve": "cd docs_build && python -m http.server 8081",
"docs:watch": "typedoc --watch",
"test": "vitest run ./tests/js/index.test.ts"
},
"devDependencies": {
"@fastify/static": "^7.0.4",
"@types/node": "^24",
"apache-arrow": "^20.0.0",
"arrow-js-ffi": "^0.4.3",
"benny": "^3.7.1",
"fastify": "^4.28.1",
"gh-pages": "^6.2.0",
"typedoc": "^0.28.13",
"typescript": "^5.6.3",
"vitest": "^3.2.4"
},
"volta": {
"node": "20.12.2",
"yarn": "1.22.19"
}
}
================================================
FILE: scripts/build.sh
================================================
#! /usr/bin/env bash
rm -rf tmp_build pkg
mkdir -p tmp_build
if [ "$ENV" == "DEV" ]; then
BUILD="--dev"
FLAGS="--features debug"
else
BUILD="--release"
FLAGS=""
fi
# Build node version into tmp_build/node
echo "Building node"
wasm-pack build \
$BUILD \
--out-dir tmp_build/node \
--target nodejs \
$FLAGS &
[ -n "$CI" ] && wait;
# Build web version into tmp_build/esm
echo "Building esm"
wasm-pack build \
$BUILD \
--out-dir tmp_build/esm \
--target web \
$FLAGS &
[ -n "$CI" ] && wait;
# Build bundler version into tmp_build/bundler
echo "Building bundler"
wasm-pack build \
$BUILD \
--out-dir tmp_build/bundler \
--target bundler \
$FLAGS &
wait
# Copy files into pkg/
mkdir -p pkg/{node,esm,bundler}
cp tmp_build/bundler/parquet* pkg/bundler/
cp tmp_build/esm/parquet* pkg/esm
cp tmp_build/node/parquet* pkg/node
cp tmp_build/bundler/{LICENSE_APACHE,LICENSE_MIT,README.md} pkg/
# Copy in combined package.json from template
# https://stackoverflow.com/a/24904276
# Note that keys from the second file will overwrite keys from the first.
jq -s '.[0] * .[1]' templates/package.json tmp_build/bundler/package.json > pkg/package.json
# Create minimal package.json in esm/ folder with type: module
echo '{"type": "module"}' > pkg/esm/package.json
# Update files array in package.json using JQ
jq '.files = ["*"] | .module="esm/parquet_wasm.js" | .types="esm/parquet_wasm.d.ts"' pkg/package.json > pkg/package.json.tmp
# Overwrite existing package.json file
mv pkg/package.json.tmp pkg/package.json
rm -rf tmp_build
================================================
FILE: scripts/report_build.sh
================================================
rm -rf report_pkg
mkdir -p report_pkg
echo "Building arrow-rs slim"
wasm-pack build \
--release \
--no-pack \
--out-dir report_pkg/slim \
--out-name parquet_wasm \
--target web \
--no-default-features \
--features={reader,writer}
echo "Building arrow-rs sync"
wasm-pack build \
--release \
--no-pack \
--out-dir report_pkg/sync \
--out-name parquet_wasm \
--target web \
--no-default-features \
--features={reader,writer,all_compressions} &
echo "Building arrow-rs async_full"
wasm-pack build \
--release \
--no-pack \
--out-dir report_pkg/async_full \
--out-name parquet_wasm \
--target web \
--features=full &
wait;
================================================
FILE: src/common/fetch.rs
================================================
use futures::channel::oneshot;
use futures::future::BoxFuture;
use range_reader::{RangeOutput, RangedAsyncReader};
use wasm_bindgen::prelude::*;
use wasm_bindgen_futures::spawn_local;
/// Get content-length of file
pub async fn _get_content_length(url: String) -> Result<usize, reqwest::Error> {
let client = reqwest::Client::new();
let resp = client.head(url).send().await?;
Ok(resp.content_length().unwrap().try_into().unwrap())
}
pub async fn get_content_length(url: String) -> Result<usize, reqwest::Error> {
let (sender, receiver) = oneshot::channel::<usize>();
spawn_local(async move {
let inner_data = _get_content_length(url).await.unwrap();
sender.send(inner_data).unwrap();
});
let data = receiver.await.unwrap();
Ok(data)
}
/// Construct range header from start and length
pub fn range_from_start_and_length(start: u64, length: u64) -> String {
// Subtract 1 from length because end is inclusive
// > bytes units ... are offsets (zero-indexed & inclusive)
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range
format!("bytes={}-{}", start, start + length - 1)
}
pub fn range_from_start(start: u64) -> String {
format!("bytes={start}-")
}
pub fn range_from_end(length: usize) -> String {
format!("bytes=-{length}")
}
/// Make range request on remote file
async fn _make_range_request(
url: &str,
start: u64,
length: usize,
) -> Result<Vec<u8>, reqwest::Error> {
let client = reqwest::Client::new();
let range_str = range_from_start_and_length(start, length as u64);
let resp = client
.get(url)
.header("Range", range_str)
.send()
.await?
.error_for_status()?;
Ok(resp.bytes().await?.to_vec())
}
pub async fn make_range_request(
url: String,
start: u64,
length: usize,
) -> Result<Vec<u8>, JsValue> {
let (sender, receiver) = oneshot::channel::<Vec<u8>>();
spawn_local(async move {
let inner_data = _make_range_request(&url, start, length).await.unwrap();
sender.send(inner_data).unwrap();
});
let data = receiver.await.unwrap();
Ok(data)
}
/// Create a RangedAsyncReader
pub fn create_reader(
url: String,
content_length: usize,
min_request_size: Option<usize>,
) -> RangedAsyncReader {
// at least 4kb per s3 request. Adjust to your liking.
let min_request_size = min_request_size.unwrap_or(4 * 1024);
// Closure for making an individual HTTP range request to a file
let range_get = Box::new(move |start: u64, length: usize| {
let url = url.clone();
Box::pin(async move {
let data = make_range_request(url.clone(), start, length)
.await
.unwrap();
Ok(RangeOutput { start, data })
}) as BoxFuture<'static, std::io::Result<RangeOutput>>
});
RangedAsyncReader::new(content_length, min_request_size, range_get)
}
================================================
FILE: src/common/mod.rs
================================================
pub mod properties;
#[cfg(feature = "async")]
pub mod fetch;
#[cfg(feature = "async")]
pub mod stream;
================================================
FILE: src/common/properties.rs
================================================
use wasm_bindgen::prelude::*;
/// Supported compression algorithms.
///
/// Codecs added in format version X.Y can be read by readers based on X.Y and later.
/// Codec support may vary between readers based on the format version and
/// libraries available at runtime.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types)]
#[wasm_bindgen]
pub enum Compression {
UNCOMPRESSED,
SNAPPY,
GZIP,
BROTLI,
/// @deprecated as of Parquet 2.9.0.
/// Switch to LZ4_RAW
LZ4,
ZSTD,
LZ4_RAW,
LZO,
}
impl From<Compression> for parquet::basic::Compression {
fn from(x: Compression) -> parquet::basic::Compression {
match x {
Compression::UNCOMPRESSED => parquet::basic::Compression::UNCOMPRESSED,
Compression::SNAPPY => parquet::basic::Compression::SNAPPY,
Compression::GZIP => parquet::basic::Compression::GZIP(Default::default()),
Compression::BROTLI => parquet::basic::Compression::BROTLI(Default::default()),
Compression::LZ4 => parquet::basic::Compression::LZ4,
Compression::ZSTD => parquet::basic::Compression::ZSTD(Default::default()),
Compression::LZ4_RAW => parquet::basic::Compression::LZ4_RAW,
Compression::LZO => parquet::basic::Compression::LZO,
}
}
}
impl From<parquet::basic::Compression> for Compression {
fn from(x: parquet::basic::Compression) -> Compression {
match x {
parquet::basic::Compression::UNCOMPRESSED => Compression::UNCOMPRESSED,
parquet::basic::Compression::SNAPPY => Compression::SNAPPY,
parquet::basic::Compression::GZIP(_) => Compression::GZIP,
parquet::basic::Compression::BROTLI(_) => Compression::BROTLI,
parquet::basic::Compression::LZ4 => Compression::LZ4,
parquet::basic::Compression::ZSTD(_) => Compression::ZSTD,
parquet::basic::Compression::LZ4_RAW => Compression::LZ4_RAW,
parquet::basic::Compression::LZO => Compression::LZO,
}
}
}
/// Encodings supported by Parquet.
/// Not all encodings are valid for all types. These enums are also used to specify the
/// encoding of definition and repetition levels.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[allow(non_camel_case_types)]
#[wasm_bindgen]
pub enum Encoding {
/// Default byte encoding.
/// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
/// - INT32 - 4 bytes per value, stored as little-endian.
/// - INT64 - 8 bytes per value, stored as little-endian.
/// - FLOAT - 4 bytes per value, stored as little-endian.
/// - DOUBLE - 8 bytes per value, stored as little-endian.
/// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
/// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
PLAIN,
/// **Deprecated** dictionary encoding.
///
/// The values in the dictionary are encoded using PLAIN encoding.
/// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
/// PLAIN encoding is used for dictionary page.
PLAIN_DICTIONARY,
/// Group packed run length encoding.
///
/// Usable for definition/repetition levels encoding and boolean values.
RLE,
/// Bit packed encoding.
///
/// This can only be used if the data has a known max width.
/// Usable for definition/repetition levels encoding.
BIT_PACKED,
/// Delta encoding for integers, either INT32 or INT64.
///
/// Works best on sorted data.
DELTA_BINARY_PACKED,
/// Encoding for byte arrays to separate the length values and the data.
///
/// The lengths are encoded using DELTA_BINARY_PACKED encoding.
DELTA_LENGTH_BYTE_ARRAY,
/// Incremental encoding for byte arrays.
///
/// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
/// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
DELTA_BYTE_ARRAY,
/// Dictionary encoding.
///
/// The ids are encoded using the RLE encoding.
RLE_DICTIONARY,
/// Encoding for floating-point data.
///
/// K byte-streams are created where K is the size in bytes of the data type.
/// The individual bytes of an FP value are scattered to the corresponding stream and
/// the streams are concatenated.
/// This itself does not reduce the size of the data but can lead to better compression
/// afterwards.
BYTE_STREAM_SPLIT,
}
impl From<Encoding> for parquet::basic::Encoding {
fn from(x: Encoding) -> parquet::basic::Encoding {
match x {
Encoding::PLAIN => parquet::basic::Encoding::PLAIN,
Encoding::PLAIN_DICTIONARY => parquet::basic::Encoding::PLAIN_DICTIONARY,
Encoding::RLE => parquet::basic::Encoding::RLE,
#[allow(deprecated)]
Encoding::BIT_PACKED => parquet::basic::Encoding::BIT_PACKED,
Encoding::DELTA_BINARY_PACKED => parquet::basic::Encoding::DELTA_BINARY_PACKED,
Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::basic::Encoding::DELTA_LENGTH_BYTE_ARRAY,
Encoding::DELTA_BYTE_ARRAY => parquet::basic::Encoding::DELTA_BYTE_ARRAY,
Encoding::RLE_DICTIONARY => parquet::basic::Encoding::RLE_DICTIONARY,
Encoding::BYTE_STREAM_SPLIT => parquet::basic::Encoding::BYTE_STREAM_SPLIT,
}
}
}
impl From<parquet::basic::Encoding> for Encoding {
fn from(x: parquet::basic::Encoding) -> Encoding {
match x {
parquet::basic::Encoding::PLAIN => Encoding::PLAIN,
parquet::basic::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
parquet::basic::Encoding::RLE => Encoding::RLE,
#[allow(deprecated)]
parquet::basic::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
parquet::basic::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
parquet::basic::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
parquet::basic::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
parquet::basic::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
parquet::basic::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
}
}
}
/// The Parquet version to use when writing
#[allow(non_camel_case_types)]
#[wasm_bindgen]
pub enum WriterVersion {
V1,
V2,
}
impl From<WriterVersion> for parquet::file::properties::WriterVersion {
fn from(x: WriterVersion) -> parquet::file::properties::WriterVersion {
match x {
WriterVersion::V1 => parquet::file::properties::WriterVersion::PARQUET_1_0,
WriterVersion::V2 => parquet::file::properties::WriterVersion::PARQUET_2_0,
}
}
}
================================================
FILE: src/common/stream.rs
================================================
use futures::AsyncWrite;
pub struct WrappedWritableStream<'writer> {
pub stream: wasm_streams::writable::IntoAsyncWrite<'writer>,
}
impl AsyncWrite for WrappedWritableStream<'_> {
fn poll_write(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &[u8],
) -> std::task::Poll<std::io::Result<usize>> {
AsyncWrite::poll_write(std::pin::Pin::new(&mut self.get_mut().stream), cx, buf)
}
fn poll_flush(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<std::io::Result<()>> {
AsyncWrite::poll_flush(std::pin::Pin::new(&mut self.get_mut().stream), cx)
}
fn poll_close(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<std::io::Result<()>> {
AsyncWrite::poll_close(std::pin::Pin::new(&mut self.get_mut().stream), cx)
}
}
unsafe impl Send for WrappedWritableStream<'_> {}
================================================
FILE: src/error.rs
================================================
use arrow::error::ArrowError;
use parquet::errors::ParquetError;
use thiserror::Error;
use wasm_bindgen::{JsError, JsValue};
#[derive(Error, Debug)]
pub enum ParquetWasmError {
#[error(transparent)]
ArrowError(Box<ArrowError>),
#[error(transparent)]
ParquetError(Box<ParquetError>),
#[error("Column {0} not found in table")]
UnknownColumn(String),
#[cfg(feature = "async")]
#[error("HTTP error: `{0}`")]
HTTPError(Box<reqwest::Error>),
#[error("Platform error: `{0}`")]
PlatformSupportError(String),
#[error("Dyn casting error")]
DynCastingError(JsValue),
}
pub type Result<T> = std::result::Result<T, ParquetWasmError>;
pub type WasmResult<T> = std::result::Result<T, JsError>;
impl From<ArrowError> for ParquetWasmError {
fn from(err: ArrowError) -> Self {
Self::ArrowError(Box::new(err))
}
}
impl From<ParquetError> for ParquetWasmError {
fn from(err: ParquetError) -> Self {
Self::ParquetError(Box::new(err))
}
}
#[cfg(feature = "async")]
impl From<reqwest::Error> for ParquetWasmError {
fn from(err: reqwest::Error) -> Self {
Self::HTTPError(Box::new(err))
}
}
================================================
FILE: src/lib.rs
================================================
extern crate web_sys;
pub mod common;
pub mod utils;
pub mod error;
pub mod metadata;
#[cfg(feature = "reader")]
pub mod read_options;
#[cfg(feature = "reader")]
pub mod reader;
#[cfg(all(feature = "reader", feature = "async"))]
pub mod reader_async;
pub mod wasm;
#[cfg(feature = "writer")]
pub mod writer;
#[cfg(feature = "writer")]
pub mod writer_properties;
#[cfg(all(feature = "writer", feature = "async"))]
pub mod writer_async;
// When the `wee_alloc` feature is enabled, use `wee_alloc` as the global
// allocator.
/*#[cfg(feature = "wee_alloc")]
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;*/
================================================
FILE: src/metadata.rs
================================================
use wasm_bindgen::prelude::*;
use crate::common::properties::{Compression, Encoding};
/// Global Parquet metadata.
#[derive(Debug, Clone)]
#[wasm_bindgen]
pub struct ParquetMetaData(parquet::file::metadata::ParquetMetaData);
#[wasm_bindgen]
impl ParquetMetaData {
/// Returns file metadata as reference.
#[wasm_bindgen(js_name = fileMetadata)]
pub fn file_metadata(&self) -> FileMetaData {
self.0.file_metadata().clone().into()
}
/// Returns number of row groups in this file.
#[wasm_bindgen(js_name = numRowGroups)]
pub fn num_row_groups(&self) -> usize {
self.0.num_row_groups()
}
/// Returns row group metadata for `i`th position.
/// Position should be less than number of row groups `num_row_groups`.
#[wasm_bindgen(js_name = rowGroup)]
pub fn row_group(&self, i: usize) -> RowGroupMetaData {
self.0.row_group(i).clone().into()
}
/// Returns row group metadata for all row groups
#[wasm_bindgen(js_name = rowGroups)]
pub fn row_groups(&self) -> Vec<RowGroupMetaData> {
self.0
.row_groups()
.iter()
.map(|rg| rg.clone().into())
.collect()
}
// /// Returns the column index for this file if loaded
// pub fn column_index(&self) -> Option<ParquetColumnIndex> {
// self.0.column_index()
// }
}
impl From<parquet::file::metadata::ParquetMetaData> for ParquetMetaData {
fn from(value: parquet::file::metadata::ParquetMetaData) -> Self {
Self(value)
}
}
impl From<ParquetMetaData> for parquet::file::metadata::ParquetMetaData {
fn from(value: ParquetMetaData) -> Self {
value.0
}
}
/// Metadata for a Parquet file.
#[derive(Debug, Clone)]
#[wasm_bindgen]
pub struct FileMetaData(parquet::file::metadata::FileMetaData);
#[wasm_bindgen]
impl FileMetaData {
/// Returns version of this file.
#[wasm_bindgen]
pub fn version(&self) -> i32 {
self.0.version()
}
/// Returns number of rows in the file.
#[wasm_bindgen(js_name = numRows)]
pub fn num_rows(&self) -> f64 {
self.0.num_rows() as f64
}
/// String message for application that wrote this file.
///
/// This should have the following format:
/// `<application> version <application version> (build <application build hash>)`.
///
/// ```shell
/// parquet-mr version 1.8.0 (build 0fda28af84b9746396014ad6a415b90592a98b3b)
/// ```
#[wasm_bindgen(js_name = createdBy)]
pub fn created_by(&self) -> Option<String> {
let s = self.0.created_by()?;
Some(s.to_string())
}
/// Returns key_value_metadata of this file.
#[wasm_bindgen(js_name = keyValueMetadata)]
pub fn key_value_metadata(&self) -> Result<js_sys::Map, JsValue> {
let map = js_sys::Map::new();
if let Some(metadata) = self.0.key_value_metadata() {
for meta in metadata {
if let Some(value) = &meta.value {
map.set(&JsValue::from_str(&meta.key), &JsValue::from_str(value));
}
}
}
Ok(map)
}
}
impl From<parquet::file::metadata::FileMetaData> for FileMetaData {
fn from(value: parquet::file::metadata::FileMetaData) -> Self {
Self(value)
}
}
impl From<FileMetaData> for parquet::file::metadata::FileMetaData {
fn from(value: FileMetaData) -> Self {
value.0
}
}
/// Metadata for a Parquet row group.
#[derive(Debug, Clone)]
#[wasm_bindgen]
pub struct RowGroupMetaData(parquet::file::metadata::RowGroupMetaData);
#[wasm_bindgen]
impl RowGroupMetaData {
/// Number of columns in this row group.
#[wasm_bindgen(js_name = numColumns)]
pub fn num_columns(&self) -> usize {
self.0.num_columns()
}
/// Returns column chunk metadata for `i`th column.
#[wasm_bindgen]
pub fn column(&self, i: usize) -> ColumnChunkMetaData {
self.0.column(i).clone().into()
}
/// Returns column chunk metadata for all columns
#[wasm_bindgen]
pub fn columns(&self) -> Vec<ColumnChunkMetaData> {
self.0
.columns()
.iter()
.map(|col| col.clone().into())
.collect()
}
/// Number of rows in this row group.
#[wasm_bindgen(js_name = numRows)]
pub fn num_rows(&self) -> f64 {
self.0.num_rows() as f64
}
/// Total byte size of all uncompressed column data in this row group.
#[wasm_bindgen(js_name = totalByteSize)]
pub fn total_byte_size(&self) -> f64 {
self.0.total_byte_size() as f64
}
/// Total size of all compressed column data in this row group.
#[wasm_bindgen(js_name = compressedSize)]
pub fn compressed_size(&self) -> f64 {
self.0.compressed_size() as f64
}
}
impl From<parquet::file::metadata::RowGroupMetaData> for RowGroupMetaData {
fn from(value: parquet::file::metadata::RowGroupMetaData) -> Self {
Self(value)
}
}
impl From<RowGroupMetaData> for parquet::file::metadata::RowGroupMetaData {
fn from(value: RowGroupMetaData) -> Self {
value.0
}
}
/// Metadata for a Parquet column chunk.
#[derive(Debug, Clone)]
#[wasm_bindgen]
pub struct ColumnChunkMetaData(parquet::file::metadata::ColumnChunkMetaData);
#[wasm_bindgen]
impl ColumnChunkMetaData {
/// File where the column chunk is stored.
///
/// If not set, assumed to belong to the same file as the metadata.
/// This path is relative to the current file.
#[wasm_bindgen(js_name = filePath)]
pub fn file_path(&self) -> Option<String> {
self.0.file_path().map(|s| s.to_string())
}
/// Byte offset in `file_path()`.
#[wasm_bindgen(js_name = fileOffset)]
pub fn file_offset(&self) -> i64 {
self.0.file_offset()
}
// /// Type of this column. Must be primitive.
// pub fn column_type(&self) -> Type {
// self.column_descr.physical_type()
// }
/// Path (or identifier) of this column.
#[wasm_bindgen(js_name = columnPath)]
pub fn column_path(&self) -> Vec<String> {
let path = self.0.column_path();
path.parts().to_vec()
}
/// All encodings used for this column.
#[wasm_bindgen]
pub fn encodings(&self) -> Vec<Encoding> {
self.0
.encodings()
.iter()
.map(|encoding| (*encoding).into())
.collect()
}
/// Total number of values in this column chunk.
#[wasm_bindgen(js_name = numValues)]
pub fn num_values(&self) -> f64 {
self.0.num_values() as f64
}
/// Compression for this column.
pub fn compression(&self) -> Compression {
self.0.compression().into()
}
/// Returns the total compressed data size of this column chunk.
#[wasm_bindgen(js_name = compressedSize)]
pub fn compressed_size(&self) -> f64 {
self.0.compressed_size() as f64
}
/// Returns the total uncompressed data size of this column chunk.
#[wasm_bindgen(js_name = uncompressedSize)]
pub fn uncompressed_size(&self) -> f64 {
self.0.uncompressed_size() as f64
}
}
impl From<parquet::file::metadata::ColumnChunkMetaData> for ColumnChunkMetaData {
fn from(value: parquet::file::metadata::ColumnChunkMetaData) -> Self {
Self(value)
}
}
impl From<ColumnChunkMetaData> for parquet::file::metadata::ColumnChunkMetaData {
fn from(value: ColumnChunkMetaData) -> Self {
value.0
}
}
================================================
FILE: src/read_options.rs
================================================
use parquet::arrow::ProjectionMask;
use parquet::arrow::arrow_reader::ArrowReaderBuilder;
use parquet::schema::types::SchemaDescriptor;
use serde::{Deserialize, Serialize};
use wasm_bindgen::prelude::*;
use crate::error::{ParquetWasmError, Result};
#[wasm_bindgen(typescript_custom_section)]
const TS_ReaderOptions: &'static str = r#"
export type ReaderOptions = {
/* The number of rows in each batch. If not provided, the upstream parquet default is 1024. */
batchSize?: number;
/* Only read data from the provided row group indexes. */
rowGroups?: number[];
/* Provide a limit to the number of rows to be read. */
limit?: number;
/* Provide an offset to skip over the given number of rows. */
offset?: number;
/* The column names from the file to read. */
columns?: string[];
/* The number of concurrent requests to make in the async reader. */
concurrency?: number;
};
"#;
#[wasm_bindgen]
extern "C" {
/// Reader options
#[wasm_bindgen(typescript_type = "ReaderOptions")]
pub type ReaderOptions;
}
#[derive(Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct JsReaderOptions {
/// The number of rows in each batch. If not provided, the upstream parquet default is 1024.
pub batch_size: Option<usize>,
/// Only read data from the provided row group indexes
pub row_groups: Option<Vec<usize>>,
/// Provide a limit to the number of rows to be read
pub limit: Option<usize>,
/// Provide an offset to skip over the given number of rows
pub offset: Option<usize>,
/// The column names from the file to read.
pub columns: Option<Vec<String>>,
/// The number of concurrent requests to make in the async reader.
pub concurrency: Option<usize>,
}
impl JsReaderOptions {
pub fn apply_to_builder<T>(
&self,
mut builder: ArrowReaderBuilder<T>,
) -> Result<ArrowReaderBuilder<T>> {
if let Some(batch_size) = self.batch_size {
builder = builder.with_batch_size(batch_size);
}
if let Some(limit) = self.limit {
builder = builder.with_limit(limit);
}
if let Some(offset) = self.offset {
builder = builder.with_offset(offset);
}
if let Some(columns) = &self.columns {
let parquet_schema = builder.parquet_schema();
let projection_mask = generate_projection_mask(columns, parquet_schema)?;
builder = builder.with_projection(projection_mask);
}
if let Some(row_groups) = &self.row_groups {
builder = builder.with_row_groups(row_groups.clone());
}
Ok(builder)
}
}
impl TryFrom<ReaderOptions> for JsReaderOptions {
type Error = serde_wasm_bindgen::Error;
fn try_from(value: ReaderOptions) -> std::result::Result<Self, Self::Error> {
serde_wasm_bindgen::from_value(value.obj)
}
}
fn generate_projection_mask<S: AsRef<str>>(
columns: &[S],
pq_schema: &SchemaDescriptor,
) -> Result<ProjectionMask> {
let col_paths = pq_schema
.columns()
.iter()
.map(|col| col.path().string())
.collect::<Vec<_>>();
let indices: Vec<usize> = columns
.iter()
.map(|col| {
let col = col.as_ref();
let field_indices: Vec<usize> = col_paths
.iter()
.enumerate()
.filter(|(_idx, path)| {
// identical OR the path starts with the column AND the substring is immediately followed by the
// path separator
path.as_str() == col
|| path.starts_with(col) && {
let left_index = path.find(col).unwrap();
path.chars().nth(left_index + col.len()).unwrap() == '.'
}
})
.map(|(idx, _)| idx)
.collect();
if field_indices.is_empty() {
Err(ParquetWasmError::UnknownColumn(col.to_string()))
} else {
Ok(field_indices)
}
})
.collect::<Result<Vec<Vec<usize>>>>()?
.into_iter()
.flatten()
.collect();
let projection_mask = ProjectionMask::leaves(pq_schema, indices);
Ok(projection_mask)
}
================================================
FILE: src/reader.rs
================================================
use std::sync::Arc;
use crate::error::Result;
use crate::read_options::JsReaderOptions;
use arrow_schema::{DataType, FieldRef};
use arrow_wasm::{Schema, Table};
use bytes::Bytes;
use parquet::arrow::arrow_reader::{
ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
};
/// Internal function to read a buffer with Parquet data into a buffer with Arrow IPC Stream data
pub fn read_parquet(parquet_file: Vec<u8>, options: JsReaderOptions) -> Result<Table> {
// Create Parquet reader
let cursor: Bytes = parquet_file.into();
let metadata = ArrowReaderMetadata::load(&cursor, Default::default())?;
let metadata = cast_metadata_view_types(&metadata)?;
let mut builder = ParquetRecordBatchReaderBuilder::new_with_metadata(cursor, metadata);
let schema = builder.schema().clone();
if let Some(batch_size) = options.batch_size {
builder = builder.with_batch_size(batch_size);
}
if let Some(row_groups) = options.row_groups {
builder = builder.with_row_groups(row_groups);
}
if let Some(limit) = options.limit {
builder = builder.with_limit(limit);
}
if let Some(offset) = options.offset {
builder = builder.with_offset(offset);
}
// Create Arrow reader
let reader = builder.build()?;
let mut batches = vec![];
for maybe_chunk in reader {
batches.push(maybe_chunk?)
}
Ok(Table::new(schema, batches))
}
/// Internal function to read a buffer with Parquet data into an Arrow schema
pub fn read_schema(parquet_file: Vec<u8>) -> Result<Schema> {
// Create Parquet reader
let cursor: Bytes = parquet_file.into();
let builder = ParquetRecordBatchReaderBuilder::try_new(cursor)?;
let schema = builder.schema().clone();
Ok(schema.into())
}
/// Cast any view types in the metadata's schema to non-view types
pub(crate) fn cast_metadata_view_types(
metadata: &ArrowReaderMetadata,
) -> Result<ArrowReaderMetadata> {
let original_arrow_schema = metadata.schema();
if has_view_types(original_arrow_schema.fields().iter()) {
let new_schema = cast_view_types(original_arrow_schema);
let arrow_options = ArrowReaderOptions::default().with_schema(new_schema);
Ok(ArrowReaderMetadata::try_new(
metadata.metadata().clone(),
arrow_options,
)?)
} else {
Ok(metadata.clone())
}
}
/// Cast any view types in the schema to non-view types
///
/// Casts:
///
/// - StringView to String
/// - BinaryView to Binary
///
/// Arrow JS does not currently support view types
/// https://github.com/apache/arrow-js/issues/44
fn cast_view_types(schema: &arrow_schema::Schema) -> arrow_schema::SchemaRef {
let new_fields = _cast_view_types_of_fields(schema.fields().iter());
Arc::new(arrow_schema::Schema::new_with_metadata(
new_fields,
schema.metadata().clone(),
))
}
/// Recursively cast any view types in the fields to non-view types
///
/// This includes any view types that are the children of nested types like Structs and Lists
fn _cast_view_types_of_fields<'a>(fields: impl Iterator<Item = &'a FieldRef>) -> Vec<FieldRef> {
fields
.map(|field| {
let new_data_type = match field.data_type() {
DataType::Utf8View => DataType::Utf8,
DataType::BinaryView => DataType::Binary,
DataType::Struct(struct_fields) => {
DataType::Struct(_cast_view_types_of_fields(struct_fields.iter()).into())
}
DataType::List(inner_field) => DataType::List(
_cast_view_types_of_fields([inner_field].into_iter())
.into_iter()
.next()
.unwrap(),
),
DataType::LargeList(inner_field) => DataType::LargeList(
_cast_view_types_of_fields([inner_field].into_iter())
.into_iter()
.next()
.unwrap(),
),
DataType::FixedSizeList(inner_field, list_size) => DataType::FixedSizeList(
_cast_view_types_of_fields([inner_field].into_iter())
.into_iter()
.next()
.unwrap(),
*list_size,
),
other => other.clone(),
};
Arc::new(field.as_ref().clone().with_data_type(new_data_type))
})
.collect()
}
fn has_view_types<'a>(mut fields: impl Iterator<Item = &'a FieldRef>) -> bool {
fields.any(|field| match field.data_type() {
DataType::Utf8View | DataType::BinaryView => true,
DataType::Struct(struct_fields) => has_view_types(struct_fields.iter()),
DataType::List(inner_field) => has_view_types([inner_field].into_iter()),
DataType::LargeList(inner_field) => has_view_types([inner_field].into_iter()),
DataType::FixedSizeList(inner_field, _list_size) => {
has_view_types([inner_field].into_iter())
}
_other => false,
})
}
================================================
FILE: src/reader_async.rs
================================================
//! An asynchronous Parquet reader that is able to read and inspect remote files without
//! downloading them in entirety.
use crate::common::fetch::{
create_reader, get_content_length, range_from_end, range_from_start_and_length,
};
use crate::error::{Result, WasmResult};
use crate::read_options::{JsReaderOptions, ReaderOptions};
use crate::reader::cast_metadata_view_types;
use futures::channel::oneshot;
use futures::future::BoxFuture;
use object_store::coalesce_ranges;
use std::ops::Range;
use std::sync::Arc;
use wasm_bindgen::prelude::*;
use wasm_bindgen_futures::spawn_local;
use arrow::ipc::writer::StreamWriter;
use arrow_wasm::{RecordBatch, Table};
use bytes::Bytes;
use futures::TryStreamExt;
use futures::{FutureExt, StreamExt, stream};
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
use parquet::arrow::async_reader::{
AsyncFileReader, MetadataSuffixFetch, ParquetRecordBatchStream, ParquetRecordBatchStreamBuilder,
};
use async_compat::{Compat, CompatExt};
use parquet::file::metadata::{
FileMetaData, PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader,
};
use range_reader::RangedAsyncReader;
use reqwest::Client;
/// Range requests with a gap less than or equal to this,
/// will be coalesced into a single request by [`coalesce_ranges`]
const OBJECT_STORE_COALESCE_DEFAULT: u64 = 1024 * 1024;
fn create_builder<T: AsyncFileReader + Unpin + 'static>(
reader: T,
meta: &ArrowReaderMetadata,
options: &JsReaderOptions,
) -> Result<ParquetRecordBatchStreamBuilder<T>> {
// Cast any view types to non-view types
let metadata = cast_metadata_view_types(meta)?;
let builder = ParquetRecordBatchStreamBuilder::new_with_metadata(reader, metadata);
options.apply_to_builder(builder)
}
/// An abstraction over either a browser File handle or an ObjectStore instance
///
/// This allows exposing a single ParquetFile class to the user.
#[derive(Clone)]
enum InnerParquetFile {
File(JsFileReader),
Http(HTTPFileReader),
}
impl AsyncFileReader for InnerParquetFile {
fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
match self {
Self::File(reader) => reader.get_bytes(range),
Self::Http(reader) => reader.get_bytes(range),
}
}
fn get_byte_ranges(
&mut self,
ranges: Vec<Range<u64>>,
) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>> {
match self {
Self::File(reader) => reader.get_byte_ranges(ranges),
Self::Http(reader) => reader.get_byte_ranges(ranges),
}
}
fn get_metadata<'a>(
&'a mut self,
options: Option<&'a ArrowReaderOptions>,
) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
match self {
Self::File(reader) => reader.get_metadata(options),
Self::Http(reader) => reader.get_metadata(options),
}
}
}
#[wasm_bindgen]
pub struct ParquetFile {
reader: InnerParquetFile,
meta: ArrowReaderMetadata,
}
#[wasm_bindgen]
impl ParquetFile {
/// Construct a ParquetFile from a new URL.
#[wasm_bindgen(js_name = fromUrl)]
pub async fn from_url(url: String) -> WasmResult<ParquetFile> {
let client = Client::new();
let mut reader = HTTPFileReader::new(url, client, OBJECT_STORE_COALESCE_DEFAULT);
let meta = ArrowReaderMetadata::load_async(&mut reader, Default::default()).await?;
Ok(Self {
reader: InnerParquetFile::Http(reader),
meta,
})
}
/// Construct a ParquetFile from a new [Blob] or [File] handle.
///
/// [Blob]: https://developer.mozilla.org/en-US/docs/Web/API/Blob
/// [File]: https://developer.mozilla.org/en-US/docs/Web/API/File
///
/// Safety: Do not use this in a multi-threaded environment,
/// (transitively depends on `!Send` `web_sys::Blob`)
#[wasm_bindgen(js_name = fromFile)]
pub async fn from_file(handle: web_sys::Blob) -> WasmResult<ParquetFile> {
let mut reader = JsFileReader::new(handle, 1024);
let meta = ArrowReaderMetadata::load_async(&mut reader, Default::default()).await?;
Ok(Self {
reader: InnerParquetFile::File(reader),
meta,
})
}
#[wasm_bindgen]
pub fn metadata(&self) -> WasmResult<crate::metadata::ParquetMetaData> {
Ok(self.meta.metadata().as_ref().to_owned().into())
}
#[wasm_bindgen]
pub fn schema(&self) -> WasmResult<arrow_wasm::Schema> {
Ok(self.meta.schema().clone().into())
}
/// Read from the Parquet file in an async fashion.
///
/// @param options
///
/// Options for reading Parquet data. Optional keys include:
///
/// - `batchSize`: The number of rows in each batch. If not provided, the upstream parquet
/// default is 1024.
/// - `rowGroups`: Only read data from the provided row group indexes.
/// - `limit`: Provide a limit to the number of rows to be read.
/// - `offset`: Provide an offset to skip over the given number of rows.
/// - `columns`: The column names from the file to read.
#[wasm_bindgen]
pub async fn read(&self, options: Option<ReaderOptions>) -> WasmResult<Table> {
let options = options
.map(|x| x.try_into())
.transpose()?
.unwrap_or_default();
let builder = create_builder(self.reader.clone(), &self.meta, &options)?;
let schema = builder.schema().clone();
let stream = builder.build()?;
let batches = stream.try_collect::<Vec<_>>().await.unwrap();
Ok(Table::new(schema, batches))
}
/// Create a readable stream of record batches.
///
/// Each item in the stream will be a {@linkcode RecordBatch}.
///
/// @param options
///
/// Options for reading Parquet data. Optional keys include:
///
/// - `batchSize`: The number of rows in each batch. If not provided, the upstream parquet
/// default is 1024.
/// - `rowGroups`: Only read data from the provided row group indexes.
/// - `limit`: Provide a limit to the number of rows to be read.
/// - `offset`: Provide an offset to skip over the given number of rows.
/// - `columns`: The column names from the file to read.
/// - `concurrency`: The number of concurrent requests to make
#[wasm_bindgen]
pub async fn stream(
&self,
options: Option<ReaderOptions>,
) -> WasmResult<wasm_streams::readable::sys::ReadableStream> {
let options: JsReaderOptions = options
.map(|x| x.try_into())
.transpose()?
.unwrap_or_default();
let concurrency = options.concurrency.unwrap_or_default().max(1);
let row_groups = options
.row_groups
.clone()
.unwrap_or_else(|| (0..self.meta.metadata().num_row_groups()).collect());
let reader = self.reader.clone();
let meta = self.meta.clone();
let buffered_stream = stream::iter(row_groups.into_iter().map(move |i| {
let builder = create_builder(reader.clone(), &meta.clone(), &options.clone())
.unwrap()
.with_row_groups(vec![i]);
builder.build().unwrap().try_collect::<Vec<_>>()
}))
.buffered(concurrency);
let out_stream = buffered_stream.flat_map(|maybe_record_batches| {
stream::iter(maybe_record_batches.unwrap())
.map(|record_batch| Ok(RecordBatch::new(record_batch).into()))
});
Ok(wasm_streams::ReadableStream::from_stream(out_stream).into_raw())
}
}
#[derive(Debug, Clone)]
pub struct HTTPFileReader {
url: String,
client: Client,
coalesce_byte_size: u64,
}
impl HTTPFileReader {
pub fn new(url: String, client: Client, coalesce_byte_size: u64) -> Self {
Self {
url,
client,
coalesce_byte_size,
}
}
}
impl MetadataSuffixFetch for &mut HTTPFileReader {
fn fetch_suffix(&mut self, suffix: usize) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
async move {
let range_str = range_from_end(suffix);
// Map reqwest error to parquet error
// let map_err = |err| parquet::errors::ParquetError::External(Box::new(err));
let bytes = make_range_request_with_client(
self.url.to_string(),
self.client.clone(),
range_str,
)
.await
.unwrap();
Ok(bytes)
}
.boxed()
}
}
async fn get_bytes_http(
url: String,
client: Client,
range: Range<u64>,
) -> parquet::errors::Result<Bytes> {
let range_str = range_from_start_and_length(range.start, range.end - range.start);
// Map reqwest error to parquet error
// let map_err = |err| parquet::errors::ParquetError::External(Box::new(err));
let bytes = make_range_request_with_client(url, client, range_str)
.await
.unwrap();
Ok(bytes)
}
impl AsyncFileReader for HTTPFileReader {
fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
get_bytes_http(self.url.clone(), self.client.clone(), range).boxed()
}
fn get_byte_ranges(
&mut self,
ranges: Vec<Range<u64>>,
) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>> {
async move {
coalesce_ranges(
&ranges,
|range| get_bytes_http(self.url.clone(), self.client.clone(), range),
self.coalesce_byte_size,
)
.await
}
.boxed()
}
fn get_metadata<'a>(
&'a mut self,
_options: Option<&'a ArrowReaderOptions>,
) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
async move {
let metadata = ParquetMetaDataReader::new()
.with_page_index_policy(PageIndexPolicy::Optional)
.load_via_suffix_and_finish(self)
.await?;
Ok(Arc::new(metadata))
}
.boxed()
}
}
#[derive(Debug, Clone)]
struct WrappedFile {
inner: web_sys::Blob,
pub size: u64,
}
/// Safety: This is not in fact thread-safe. Do not attempt to use this in work-stealing
/// async runtimes / multi-threaded environments
///
/// web_sys::Blob objects, like all JSValues, are !Send (even in JS, there's
/// maybe ~5 Transferable types), and eventually boil down to PhantomData<*mut u8>.
/// Any struct that holds one is inherently !Send, which disqualifies it from being used
/// with the AsyncFileReader trait.
unsafe impl Send for WrappedFile {}
unsafe impl Sync for WrappedFile {}
impl WrappedFile {
pub fn new(inner: web_sys::Blob) -> Self {
let size = inner.size() as u64;
Self { inner, size }
}
pub async fn get_bytes(&mut self, range: Range<u64>) -> Vec<u8> {
use js_sys::Uint8Array;
use wasm_bindgen_futures::JsFuture;
let (sender, receiver) = oneshot::channel();
let file = self.inner.clone();
spawn_local(async move {
let subset_blob = file
.slice_with_i32_and_i32(
range.start.try_into().unwrap(),
range.end.try_into().unwrap(),
)
.unwrap();
let buf = JsFuture::from(subset_blob.array_buffer()).await.unwrap();
let out_vec = Uint8Array::new_with_byte_offset(&buf, 0).to_vec();
sender.send(out_vec).unwrap();
});
receiver.await.unwrap()
}
}
async fn get_bytes_file(
mut file: WrappedFile,
range: Range<u64>,
) -> parquet::errors::Result<Bytes> {
let (sender, receiver) = oneshot::channel();
spawn_local(async move {
let result: Bytes = file.get_bytes(range).await.into();
sender.send(result).unwrap()
});
let data = receiver.await.unwrap();
Ok(data)
}
#[derive(Debug, Clone)]
pub struct JsFileReader {
file: WrappedFile,
coalesce_byte_size: u64,
}
impl JsFileReader {
pub fn new(file: web_sys::Blob, coalesce_byte_size: u64) -> Self {
Self {
file: WrappedFile::new(file),
coalesce_byte_size,
}
}
}
impl AsyncFileReader for JsFileReader {
fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
async move {
let (sender, receiver) = oneshot::channel();
let mut file = self.file.clone();
spawn_local(async move {
let result: Bytes = file.get_bytes(range).await.into();
sender.send(result).unwrap()
});
let data = receiver.await.unwrap();
Ok(data)
}
.boxed()
}
fn get_byte_ranges(
&mut self,
ranges: Vec<Range<u64>>,
) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>> {
async move {
coalesce_ranges(
&ranges,
|range| get_bytes_file(self.file.clone(), range),
self.coalesce_byte_size,
)
.await
}
.boxed()
}
fn get_metadata<'a>(
&'a mut self,
_options: Option<&'a ArrowReaderOptions>,
) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
let file_size = self.file.size;
async move {
let metadata = ParquetMetaDataReader::new()
.with_page_index_policy(PageIndexPolicy::Optional)
.load_and_finish(self, file_size)
.await?;
Ok(Arc::new(metadata))
}
.boxed()
}
}
pub async fn make_range_request_with_client(
url: String,
client: Client,
range_str: String,
) -> std::result::Result<Bytes, JsValue> {
let (sender, receiver) = oneshot::channel();
spawn_local(async move {
let resp = client
.get(url)
.header("Range", range_str)
.send()
.await
.unwrap()
.error_for_status()
.unwrap();
let bytes = resp.bytes().await.unwrap();
sender.send(bytes).unwrap();
});
let data = receiver.await.unwrap();
Ok(data)
}
pub async fn read_metadata_async(
url: String,
content_length: Option<usize>,
) -> Result<FileMetaData> {
let content_length = match content_length {
Some(content_length) => content_length,
None => get_content_length(url.clone()).await?,
};
let reader = create_reader(url, content_length, None);
let builder = ParquetRecordBatchStreamBuilder::new(reader.compat()).await?;
let meta = builder.metadata().file_metadata().clone();
Ok(meta)
}
pub async fn _read_row_group(
url: String,
content_length: Option<usize>,
row_group: usize,
) -> Result<(
ParquetRecordBatchStream<Compat<RangedAsyncReader>>,
Arc<arrow::datatypes::Schema>,
)> {
let content_length = match content_length {
Some(content_length) => content_length,
None => get_content_length(url.clone()).await?,
};
let reader = create_reader(url, content_length, None);
let mut compat = reader.compat();
let metadata = ArrowReaderMetadata::load_async(&mut compat, Default::default()).await?;
let builder = create_builder(compat, &metadata, &Default::default())?;
let arrow_schema = builder.schema().clone();
let parquet_reader = builder.with_row_groups(vec![row_group]).build()?;
Ok((parquet_reader, arrow_schema))
}
pub async fn read_row_group(
url: String,
row_group: usize,
chunk_fn: impl Fn(arrow::record_batch::RecordBatch) -> arrow::record_batch::RecordBatch,
) -> Result<Vec<u8>> {
let (mut parquet_reader, arrow_schema) = _read_row_group(url, None, row_group).await?;
// Create IPC Writer
let mut output_file = Vec::new();
{
let mut writer = StreamWriter::try_new(&mut output_file, &arrow_schema)?;
while let Some(maybe_record_batch) = parquet_reader.next().await {
let record_batch = chunk_fn(maybe_record_batch?);
writer.write(&record_batch)?;
}
writer.finish()?;
}
Ok(output_file)
}
pub async fn read_record_batch_stream(
url: String,
content_length: Option<usize>,
) -> Result<ParquetRecordBatchStream<Compat<RangedAsyncReader>>> {
let content_length = match content_length {
Some(_content_length) => _content_length,
None => get_content_length(url.clone()).await?,
};
let reader = crate::common::fetch::create_reader(url, content_length, None);
let mut compat = reader.compat();
let metadata = ArrowReaderMetadata::load_async(&mut compat, Default::default()).await?;
let builder = create_builder(compat, &metadata, &Default::default())?;
let parquet_reader = builder.build()?;
Ok(parquet_reader)
}
================================================
FILE: src/utils.rs
================================================
use wasm_bindgen::prelude::*;
/// Call this function at least once during initialization to get better error
// messages if the underlying Rust code ever panics (creates uncaught errors).
#[cfg(feature = "console_error_panic_hook")]
#[wasm_bindgen(js_name = setPanicHook)]
pub fn set_panic_hook() {
// When the `console_error_panic_hook` feature is enabled, we can call the
// `set_panic_hook` function at least once during initialization, and then
// we will get better error messages if our code ever panics.
//
// For more details see
// https://github.com/rustwasm/console_error_panic_hook#readme
console_error_panic_hook::set_once();
}
// A macro to provide `println!(..)`-style syntax for `console.log` logging.
#[cfg(target_arch = "wasm32")]
#[macro_export]
macro_rules! log {
( $( $t:tt )* ) => {
web_sys::console::log_1(&format!( $( $t )* ).into());
}
}
#[cfg(not(target_arch = "wasm32"))]
#[macro_export]
macro_rules! log {
( $( $t:tt )* ) => {
println!("LOG - {}", format!( $( $t )* ));
}
}
/// Raise an error if the input array is empty
pub fn assert_parquet_file_not_empty(parquet_file: &[u8]) -> Result<(), JsError> {
if parquet_file.is_empty() {
return Err(JsError::new("Empty input provided or not a Uint8Array."));
}
Ok(())
}
================================================
FILE: src/wasm.rs
================================================
use crate::error::WasmResult;
#[cfg(feature = "reader")]
use crate::read_options::ReaderOptions;
use crate::utils::assert_parquet_file_not_empty;
use arrow_wasm::{RecordBatch, Schema, Table};
use wasm_bindgen::prelude::*;
/// Read a Parquet file into Arrow data.
///
/// This returns an Arrow table in WebAssembly memory. To transfer the Arrow table to JavaScript
/// memory you have two options:
///
/// - (Easier): Call {@linkcode Table.intoIPCStream} to construct a buffer that can be parsed with
/// Arrow JS's `tableFromIPC` function.
/// - (More performant but bleeding edge): Call {@linkcode Table.intoFFI} to construct a data
/// representation that can be parsed zero-copy from WebAssembly with
/// [arrow-js-ffi](https://github.com/kylebarron/arrow-js-ffi) using `parseTable`.
///
/// Example with IPC stream:
///
/// ```js
/// import { tableFromIPC } from "apache-arrow";
/// import initWasm, {readParquet} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
///
/// const resp = await fetch("https://example.com/file.parquet");
/// const parquetUint8Array = new Uint8Array(await resp.arrayBuffer());
/// const arrowWasmTable = readParquet(parquetUint8Array);
/// const arrowTable = tableFromIPC(arrowWasmTable.intoIPCStream());
/// ```
///
/// Example with `arrow-js-ffi`:
///
/// ```js
/// import { parseTable } from "arrow-js-ffi";
/// import initWasm, {readParquet, wasmMemory} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
/// const WASM_MEMORY = wasmMemory();
///
/// const resp = await fetch("https://example.com/file.parquet");
/// const parquetUint8Array = new Uint8Array(await resp.arrayBuffer());
/// const arrowWasmTable = readParquet(parquetUint8Array);
/// const ffiTable = arrowWasmTable.intoFFI();
/// const arrowTable = parseTable(
/// WASM_MEMORY.buffer,
/// ffiTable.arrayAddrs(),
/// ffiTable.schemaAddr()
/// );
/// ```
///
/// @param parquet_file Uint8Array containing Parquet data
/// @param options
///
/// Options for reading Parquet data. Optional keys include:
///
/// - `batchSize`: The number of rows in each batch. If not provided, the upstream parquet
/// default is 1024.
/// - `rowGroups`: Only read data from the provided row group indexes.
/// - `limit`: Provide a limit to the number of rows to be read.
/// - `offset`: Provide an offset to skip over the given number of rows.
/// - `columns`: The column names from the file to read.
#[wasm_bindgen(js_name = readParquet)]
#[cfg(feature = "reader")]
pub fn read_parquet(parquet_file: Vec<u8>, options: Option<ReaderOptions>) -> WasmResult<Table> {
assert_parquet_file_not_empty(parquet_file.as_slice())?;
Ok(crate::reader::read_parquet(
parquet_file,
options
.map(|x| x.try_into())
.transpose()?
.unwrap_or_default(),
)?)
}
/// Read an Arrow schema from a Parquet file in memory.
///
/// This returns an Arrow schema in WebAssembly memory. To transfer the Arrow schema to JavaScript
/// memory you have two options:
///
/// - (Easier): Call {@linkcode Schema.intoIPCStream} to construct a buffer that can be parsed with
/// Arrow JS's `tableFromIPC` function. This results in an Arrow JS Table with zero rows but a
/// valid schema.
/// - (More performant but bleeding edge): Call {@linkcode Schema.intoFFI} to construct a data
/// representation that can be parsed zero-copy from WebAssembly with
/// [arrow-js-ffi](https://github.com/kylebarron/arrow-js-ffi) using `parseSchema`.
///
/// Example with IPC Stream:
///
/// ```js
/// import { tableFromIPC } from "apache-arrow";
/// import initWasm, {readSchema} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
///
/// const resp = await fetch("https://example.com/file.parquet");
/// const parquetUint8Array = new Uint8Array(await resp.arrayBuffer());
/// const arrowWasmSchema = readSchema(parquetUint8Array);
/// const arrowTable = tableFromIPC(arrowWasmSchema.intoIPCStream());
/// const arrowSchema = arrowTable.schema;
/// ```
///
/// Example with `arrow-js-ffi`:
///
/// ```js
/// import { parseSchema } from "arrow-js-ffi";
/// import initWasm, {readSchema, wasmMemory} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
/// const WASM_MEMORY = wasmMemory();
///
/// const resp = await fetch("https://example.com/file.parquet");
/// const parquetUint8Array = new Uint8Array(await resp.arrayBuffer());
/// const arrowWasmSchema = readSchema(parquetUint8Array);
/// const ffiSchema = arrowWasmSchema.intoFFI();
/// const arrowTable = parseSchema(WASM_MEMORY.buffer, ffiSchema.addr());
/// const arrowSchema = arrowTable.schema;
/// ```
///
/// @param parquet_file Uint8Array containing Parquet data
#[wasm_bindgen(js_name = readSchema)]
#[cfg(feature = "reader")]
pub fn read_schema(parquet_file: Vec<u8>) -> WasmResult<Schema> {
assert_parquet_file_not_empty(parquet_file.as_slice())?;
Ok(crate::reader::read_schema(parquet_file)?)
}
/// Write Arrow data to a Parquet file.
///
/// For example, to create a Parquet file with Snappy compression:
///
/// ```js
/// import { tableToIPC } from "apache-arrow";
/// // Edit the `parquet-wasm` import as necessary
/// import initWasm, {
/// Table,
/// WriterPropertiesBuilder,
/// Compression,
/// writeParquet,
/// } from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
///
/// // Given an existing arrow JS table under `table`
/// const wasmTable = Table.fromIPCStream(tableToIPC(table, "stream"));
/// const writerProperties = new WriterPropertiesBuilder()
/// .setCompression(Compression.SNAPPY)
/// .build();
/// const parquetUint8Array = writeParquet(wasmTable, writerProperties);
/// ```
///
/// If `writerProperties` is not provided or is `null`, the default writer properties will be used.
/// This is equivalent to `new WriterPropertiesBuilder().build()`.
///
/// @param table A {@linkcode Table} representation in WebAssembly memory.
/// @param writer_properties (optional) Configuration for writing to Parquet. Use the {@linkcode
/// WriterPropertiesBuilder} to build a writing configuration, then call `.build()` to create an
/// immutable writer properties to pass in here.
/// @returns Uint8Array containing written Parquet data.
#[wasm_bindgen(js_name = writeParquet)]
#[cfg(feature = "writer")]
pub fn write_parquet(
table: Table,
writer_properties: Option<crate::writer_properties::WriterProperties>,
) -> WasmResult<Vec<u8>> {
let (schema, batches) = table.into_inner();
Ok(crate::writer::write_parquet(
batches.into_iter(),
schema,
writer_properties.unwrap_or_default(),
)?)
}
/// Read a Parquet file into a stream of Arrow `RecordBatch`es.
///
/// This returns a ReadableStream containing RecordBatches in WebAssembly memory. To transfer the
/// Arrow table to JavaScript memory you have two options:
///
/// - (Easier): Call {@linkcode RecordBatch.intoIPCStream} to construct a buffer that can be parsed
/// with Arrow JS's `tableFromIPC` function. (The table will have a single internal record
/// batch).
/// - (More performant but bleeding edge): Call {@linkcode RecordBatch.intoFFI} to construct a data
/// representation that can be parsed zero-copy from WebAssembly with
/// [arrow-js-ffi](https://github.com/kylebarron/arrow-js-ffi) using `parseRecordBatch`.
///
/// Example with IPC stream:
///
/// ```js
/// import { tableFromIPC, Table } from "apache-arrow";
/// import initWasm, {readParquetStream} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
///
/// const stream = await readParquetStream(url);
///
/// const batches = [];
/// for await (const wasmRecordBatch of stream) {
/// const arrowTable = tableFromIPC(wasmRecordBatch.intoIPCStream());
/// batches.push(...arrowTable.batches);
/// }
/// const table = new Table(batches);
/// ```
///
/// Example with `arrow-js-ffi`:
///
/// ```js
/// import { Table } from "apache-arrow";
/// import { parseRecordBatch } from "arrow-js-ffi";
/// import initWasm, {readParquetStream, wasmMemory} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
/// const WASM_MEMORY = wasmMemory();
///
/// const stream = await readParquetStream(url);
///
/// const batches = [];
/// for await (const wasmRecordBatch of stream) {
/// const ffiRecordBatch = wasmRecordBatch.intoFFI();
/// const recordBatch = parseRecordBatch(
/// WASM_MEMORY.buffer,
/// ffiRecordBatch.arrayAddr(),
/// ffiRecordBatch.schemaAddr(),
/// true
/// );
/// batches.push(recordBatch);
/// }
/// const table = new Table(batches);
/// ```
///
/// @param url URL to Parquet file
#[wasm_bindgen(js_name = readParquetStream)]
#[cfg(all(feature = "reader", feature = "async"))]
pub async fn read_parquet_stream(
url: String,
content_length: Option<usize>,
) -> WasmResult<wasm_streams::readable::sys::ReadableStream> {
use futures::StreamExt;
let parquet_stream = crate::reader_async::read_record_batch_stream(url, content_length).await?;
let stream = parquet_stream.map(|maybe_record_batch| {
let record_batch = maybe_record_batch.unwrap();
Ok(RecordBatch::new(record_batch).into())
});
Ok(wasm_streams::ReadableStream::from_stream(stream).into_raw())
}
/// Transform a ReadableStream of RecordBatches to a ReadableStream of bytes
///
/// Browser example with piping to a file via the File System API:
///
/// ```js
/// import initWasm, {ParquetFile, transformParquetStream} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
///
/// const fileInstance = await ParquetFile.fromUrl("https://example.com/file.parquet");
/// const recordBatchStream = await fileInstance.stream();
/// const serializedParquetStream = await transformParquetStream(recordBatchStream);
/// // NB: requires transient user activation - you would typically do this before ☝️
/// const handle = await window.showSaveFilePicker();
/// const writable = await handle.createWritable();
/// await serializedParquetStream.pipeTo(writable);
/// ```
///
/// NodeJS (ESM) example with piping to a file:
/// ```js
/// import { open } from "node:fs/promises";
/// import { Writable } from "node:stream";
/// import initWasm, {ParquetFile, transformParquetStream} from "parquet-wasm";
///
/// // Instantiate the WebAssembly context
/// await initWasm();
///
/// const fileInstance = await ParquetFile.fromUrl("https://example.com/file.parquet");
/// const recordBatchStream = await fileInstance.stream();
/// const serializedParquetStream = await transformParquetStream(recordBatchStream);
///
/// // grab a file handle via fsPromises
/// const handle = await open("file.parquet");
/// const destinationStream = Writable.toWeb(handle.createWriteStream());
/// await serializedParquetStream.pipeTo(destinationStream);
///
/// ```
/// NB: the above is a little contrived - `await writeFile("file.parquet", serializedParquetStream)`
/// is enough for most use cases.
///
/// Browser kitchen sink example - teeing to the Cache API, using as a streaming post body, transferring
/// to a Web Worker:
/// ```js
/// // prelude elided - see above
/// const serializedParquetStream = await transformParquetStream(recordBatchStream);
/// const [cacheStream, bodyStream] = serializedParquetStream.tee();
/// const postProm = fetch(targetUrl, {
/// method: "POST",
/// duplex: "half",
/// body: bodyStream
/// });
/// const targetCache = await caches.open("foobar");
/// await targetCache.put("https://example.com/file.parquet", new Response(cacheStream));
/// // this could have been done with another tee, but beware of buffering
/// const workerStream = await targetCache.get("https://example.com/file.parquet").body;
/// const worker = new Worker("worker.js");
/// worker.postMessage(workerStream, [workerStream]);
/// await postProm;
/// ```
///
/// @param stream A {@linkcode ReadableStream} of {@linkcode RecordBatch} instances
/// @param writer_properties (optional) Configuration for writing to Parquet. Use the {@linkcode
/// WriterPropertiesBuilder} to build a writing configuration, then call `.build()` to create an
/// immutable writer properties to pass in here.
/// @returns ReadableStream containing serialized Parquet data.
#[wasm_bindgen(js_name = "transformParquetStream")]
#[cfg(all(feature = "writer", feature = "async"))]
pub async fn transform_parquet_stream(
stream: wasm_streams::readable::sys::ReadableStream,
writer_properties: Option<crate::writer_properties::WriterProperties>,
) -> WasmResult<wasm_streams::readable::sys::ReadableStream> {
use futures::{StreamExt, TryStreamExt};
use wasm_bindgen::convert::TryFromJsValue;
use crate::error::ParquetWasmError;
let batches = wasm_streams::ReadableStream::from_raw(stream)
.into_stream()
.map(|maybe_chunk| {
let chunk = maybe_chunk?;
arrow_wasm::RecordBatch::try_from_js_value(chunk)
})
.map_err(ParquetWasmError::DynCastingError);
let output_stream = super::writer_async::transform_parquet_stream(
batches,
writer_properties.unwrap_or_default(),
)
.await;
Ok(output_stream?)
}
================================================
FILE: src/writer.rs
================================================
use crate::error::Result;
use arrow::datatypes::SchemaRef;
use arrow::record_batch::RecordBatch;
use parquet::arrow::arrow_writer::ArrowWriter;
/// Internal function to write a buffer of data in Arrow IPC Stream format to a Parquet file using
/// the arrow and parquet crates
pub fn write_parquet(
batches: impl Iterator<Item = RecordBatch>,
schema: SchemaRef,
writer_properties: crate::writer_properties::WriterProperties,
) -> Result<Vec<u8>> {
// Create Parquet writer
let mut output_file: Vec<u8> = vec![];
let mut writer =
ArrowWriter::try_new(&mut output_file, schema, Some(writer_properties.into()))?;
// Iterate over IPC chunks, writing each batch to Parquet
for record_batch in batches {
writer.write(&record_batch)?;
}
writer.close()?;
Ok(output_file)
}
================================================
FILE: src/writer_async.rs
================================================
use crate::common::stream::WrappedWritableStream;
use crate::error::{ParquetWasmError, Result};
use async_compat::CompatExt;
use futures::StreamExt;
use futures::channel::oneshot;
use parquet::arrow::async_writer::AsyncArrowWriter;
use wasm_bindgen_futures::spawn_local;
pub async fn transform_parquet_stream(
batches: impl futures::Stream<Item = Result<arrow_wasm::RecordBatch>> + 'static,
writer_properties: crate::writer_properties::WriterProperties,
) -> Result<wasm_streams::readable::sys::ReadableStream> {
let options = Some(writer_properties.into());
let raw_stream = wasm_streams::transform::sys::TransformStream::new();
if let Ok(raw_stream) = raw_stream {
let (writable_stream, output_stream) = {
let raw_writable = raw_stream.writable();
let inner_writer =
wasm_streams::WritableStream::from_raw(raw_writable).into_async_write();
let writable_stream = WrappedWritableStream {
stream: inner_writer,
};
(writable_stream, raw_stream.readable())
};
// construct a channel for the purposes of signalling errors occuring at the start of the stream.
// Errors that occur during writing will have to fuse the stream.
let (sender, receiver) = oneshot::channel::<Result<()>>();
spawn_local(async move {
let adapted_stream = batches.peekable();
let mut pinned_stream = std::pin::pin!(adapted_stream);
let first_batch = pinned_stream.as_mut().peek().await;
if let Some(Ok(first_batch)) = first_batch {
let schema = first_batch.schema().into_inner();
let writer = AsyncArrowWriter::try_new(writable_stream.compat(), schema, options);
match writer {
Ok(mut writer) => {
// unblock the calling thread's receiver (indicating that stream initialization was error-free)
let _ = sender.send(Ok(()));
while let Some(batch) = pinned_stream.next().await {
if let Ok(batch) = batch {
let _ = writer.write(&batch.into()).await;
}
}
let _ = writer.close().await;
}
Err(err) => {
let _ = sender.send(Err(ParquetWasmError::ParquetError(Box::new(err))));
}
}
} else if let Some(Err(err)) = first_batch {
let _ = sender.send(Err(ParquetWasmError::DynCastingError(
err.to_string().into(),
)));
} else {
let _ = sender.send(Err(ParquetWasmError::DynCastingError(
"null first batch".to_string().into(),
)));
}
});
match receiver.await.unwrap() {
Ok(()) => Ok(output_stream),
Err(err) => Err(err),
}
} else {
Err(ParquetWasmError::PlatformSupportError(
"Failed to create TransformStream".to_string(),
))
}
}
================================================
FILE: src/writer_properties.rs
================================================
use std::collections::HashMap;
use crate::common::properties::{Compression, Encoding, WriterVersion};
use crate::error::WasmResult;
use parquet::file::metadata::KeyValue;
use wasm_bindgen::prelude::*;
/// Controls the level of statistics to be computed by the writer
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
#[wasm_bindgen]
pub enum EnabledStatistics {
/// Compute no statistics
None,
/// Compute chunk-level statistics but not page-level
Chunk,
/// Compute page-level and chunk-level statistics
Page,
}
impl From<EnabledStatistics> for parquet::file::properties::EnabledStatistics {
fn from(statistics: EnabledStatistics) -> Self {
match statistics {
EnabledStatistics::None => parquet::file::properties::EnabledStatistics::None,
EnabledStatistics::Chunk => parquet::file::properties::EnabledStatistics::Chunk,
EnabledStatistics::Page => parquet::file::properties::EnabledStatistics::Page,
}
}
}
/// Immutable struct to hold writing configuration for `writeParquet`.
///
/// Use {@linkcode WriterPropertiesBuilder} to create a configuration, then call {@linkcode
/// WriterPropertiesBuilder.build} to create an instance of `WriterProperties`.
#[wasm_bindgen]
pub struct WriterProperties(parquet::file::properties::WriterProperties);
impl From<WriterProperties> for parquet::file::properties::WriterProperties {
fn from(props: WriterProperties) -> Self {
props.0
}
}
impl Default for WriterProperties {
fn default() -> Self {
WriterPropertiesBuilder::default().build()
}
}
#[wasm_bindgen(typescript_custom_section)]
const TS_FieldMetadata: &'static str = r#"
export type KeyValueMetadata = Map<string, string>;
"#;
#[wasm_bindgen]
extern "C" {
/// Key value metadata
#[wasm_bindgen(typescript_type = "KeyValueMetadata")]
pub type KeyValueMetadata;
}
/// Builder to create a writing configuration for `writeParquet`
///
/// Call {@linkcode build} on the finished builder to create an immputable {@linkcode WriterProperties} to pass to `writeParquet`
#[wasm_bindgen]
pub struct WriterPropertiesBuilder(parquet::file::properties::WriterPropertiesBuilder);
#[wasm_bindgen]
impl WriterPropertiesBuilder {
/// Returns default state of the builder.
#[wasm_bindgen(constructor)]
pub fn new() -> WriterPropertiesBuilder {
WriterPropertiesBuilder(parquet::file::properties::WriterProperties::builder())
}
/// Finalizes the configuration and returns immutable writer properties struct.
#[wasm_bindgen]
pub fn build(self) -> WriterProperties {
WriterProperties(self.0.build())
}
// ----------------------------------------------------------------------
// Writer properties related to a file
/// Sets writer version.
#[wasm_bindgen(js_name = setWriterVersion)]
pub fn set_writer_version(self, value: WriterVersion) -> Self {
Self(self.0.set_writer_version(value.into()))
}
/// Sets data page size limit.
#[wasm_bindgen(js_name = setDataPageSizeLimit)]
pub fn set_data_page_size_limit(self, value: usize) -> Self {
Self(self.0.set_data_page_size_limit(value))
}
/// Sets dictionary page size limit.
#[wasm_bindgen(js_name = setDictionaryPageSizeLimit)]
pub fn set_dictionary_page_size_limit(self, value: usize) -> Self {
Self(self.0.set_dictionary_page_size_limit(value))
}
/// Sets write batch size.
#[wasm_bindgen(js_name = setWriteBatchSize)]
pub fn set_write_batch_size(self, value: usize) -> Self {
Self(self.0.set_write_batch_size(value))
}
/// Sets maximum number of rows in a row group.
#[wasm_bindgen(js_name = setMaxRowGroupSize)]
pub fn set_max_row_group_size(self, value: usize) -> Self {
Self(self.0.set_max_row_group_size(value))
}
/// Sets "created by" property.
#[wasm_bindgen(js_name = setCreatedBy)]
pub fn set_created_by(self, value: String) -> Self {
Self(self.0.set_created_by(value))
}
/// Sets "key_value_metadata" property.
#[wasm_bindgen(js_name = setKeyValueMetadata)]
pub fn set_key_value_metadata(
self,
value: KeyValueMetadata,
) -> WasmResult<WriterPropertiesBuilder> {
let options: Option<HashMap<String, String>> = serde_wasm_bindgen::from_value(value.obj)?;
let kv_options = options.map(|options| {
options
.iter()
.map(|(k, v)| KeyValue::new(k.clone(), Some(v.clone())))
.collect()
});
Ok(Self(self.0.set_key_value_metadata(kv_options)))
}
// ----------------------------------------------------------------------
// Setters for any column (global)
/// Sets encoding for any column.
///
/// If dictionary is not enabled, this is treated as a primary encoding for all
/// columns. In case when dictionary is enabled for any column, this value is
/// considered to be a fallback encoding for that column.
///
/// Panics if user tries to set dictionary encoding here, regardless of dictionary
/// encoding flag being set.
#[wasm_bindgen(js_name = setEncoding)]
pub fn set_encoding(self, value: Encoding) -> Self {
Self(self.0.set_encoding(value.into()))
}
/// Sets compression codec for any column.
#[wasm_bindgen(js_name = setCompression)]
pub fn set_compression(self, value: Compression) -> Self {
Self(self.0.set_compression(value.into()))
}
/// Sets flag to enable/disable dictionary encoding for any column.
///
/// Use this method to set dictionary encoding, instead of explicitly specifying
/// encoding in `set_encoding` method.
#[wasm_bindgen(js_name = setDictionaryEnabled)]
pub fn set_dictionary_enabled(self, value: bool) -> Self {
Self(self.0.set_dictionary_enabled(value))
}
/// Sets flag to enable/disable statistics for any column.
#[wasm_bindgen(js_name = setStatisticsEnabled)]
pub fn set_statistics_enabled(self, value: EnabledStatistics) -> Self {
Self(self.0.set_statistics_enabled(value.into()))
}
// ----------------------------------------------------------------------
// Setters for a specific column
/// Sets encoding for a column.
/// Takes precedence over globally defined settings.
///
/// If dictionary is not enabled, this is treated as a primary encoding for this
/// column. In case when dictionary is enabled for this column, either through
/// global defaults or explicitly, this value is considered to be a fallback
/// encoding for this column.
///
/// Panics if user tries to set dictionary encoding here, regardless of dictionary
/// encoding flag being set.
#[wasm_bindgen(js_name = setColumnEncoding)]
pub fn set_column_encoding(self, col: String, value: Encoding) -> Self {
let column_path = parquet::schema::types::ColumnPath::from(col);
Self(self.0.set_column_encoding(column_path, value.into()))
}
/// Sets compression codec for a column.
/// Takes precedence over globally defined settings.
#[wasm_bindgen(js_name = setColumnCompression)]
pub fn set_column_compression(self, col: String, value: Compression) -> Self {
let column_path = parquet::schema::types::ColumnPath::from(col);
Self(self.0.set_column_compression(column_path, value.into()))
}
/// Sets flag to enable/disable dictionary encoding for a column.
/// Takes precedence over globally defined settings.
#[wasm_bindgen(js_name = setColumnDictionaryEnabled)]
pub fn set_column_dictionary_enabled(self, col: String, value: bool) -> Self {
let column_path = parquet::schema::types::ColumnPath::from(col);
Self(self.0.set_column_dictionary_enabled(column_path, value))
}
/// Sets flag to enable/disable statistics for a column.
/// Takes precedence over globally defined settings.
#[wasm_bindgen(js_name = setColumnStatisticsEnabled)]
pub fn set_column_statistics_enabled(self, col: String, value: EnabledStatistics) -> Self {
let column_path = parquet::schema::types::ColumnPath::from(col);
Self(
self.0
.set_column_statistics_enabled(column_path, value.into()),
)
}
}
impl Default for WriterPropertiesBuilder {
fn default() -> Self {
WriterPropertiesBuilder::new()
}
}
================================================
FILE: templates/package.json
================================================
{
"name": "parquet-wasm",
"collaborators": [
"Kyle Barron <kylebarron2@gmail.com>"
],
"description": "WebAssembly Parquet reader and writer.",
"license": "MIT OR Apache-2.0",
"repository": {
"type": "git",
"url": "https://github.com/kylebarron/parquet-wasm"
},
"files": [
"*"
],
"module": "bundler/parquet_wasm.js",
"types": "bundler/parquet_wasm.d.ts",
"sideEffects": [],
"keywords": [
"parquet",
"webassembly",
"arrow"
],
"$comment": "We export ./esm/parquet_wasm.js so that code can work the same bundled and directly on the frontend",
"exports": {
"./bundler/parquet_wasm_bg.wasm": "./bundler/parquet_wasm_bg.wasm",
"./esm/parquet_wasm_bg.wasm": "./esm/parquet_wasm_bg.wasm",
"./node/parquet_wasm_bg.wasm": "./node/parquet_wasm_bg.wasm",
"./bundler": {
"types": "./bundler/parquet_wasm.d.ts",
"default": "./bundler/parquet_wasm.js"
},
"./esm": {
"types": "./esm/parquet_wasm.d.ts",
"default": "./esm/parquet_wasm.js"
},
"./node": {
"types": "./node/parquet_wasm.d.ts",
"default": "./node/parquet_wasm.js"
},
"./esm/parquet_wasm.js": {
"types": "./esm/parquet_wasm.d.ts",
"default": "./esm/parquet_wasm.js"
},
".": {
"node": {
"types": "./node/parquet_wasm.d.ts",
"default": "./node/parquet_wasm.js"
},
"types": "./esm/parquet_wasm.d.ts",
"default": "./esm/parquet_wasm.js"
}
}
}
================================================
FILE: tests/data/.python-version
================================================
3.12
================================================
FILE: tests/data/README.md
================================================
To create test data:
```
uv run python generate_data.py
```
================================================
FILE: tests/data/generate_data.py
================================================
import pandas as pd
import pyarrow as pa
import pyarrow.feather as feather
import pyarrow.parquet as pq
compressions = ["SNAPPY", "GZIP", "BROTLI", "LZ4", "ZSTD", "NONE"]
def create_data():
data = {
"str": pa.array(["a", "b", "c", "d"], type=pa.string()),
"uint8": pa.array([1, 2, 3, 4], type=pa.uint8()),
"int32": pa.array([0, -2147483638, 2147483637, 1], type=pa.int32()),
"bool": pa.array([True, True, False, False], type=pa.bool_()),
}
return pa.table(data)
def write_data(table):
feather.write_feather(table, "data.arrow", compression="uncompressed")
data_len = len(table)
for n_partitions in [1, 2]:
for compression in compressions:
row_group_size = data_len / n_partitions
compression_text = str(compression).lower()
fname = f"{n_partitions}-partition-{compression_text}.parquet"
pq.write_table(
table, fname, row_group_size=row_group_size, compression=compression
)
def write_empty_table():
pd.DataFrame().to_parquet("empty.parquet")
def create_string_view_table():
data = {
"string_view": pa.array(["a", "b", "c", "d"], type=pa.string_view()),
"binary_view": pa.array([b"a", b"b", b"c", b"d"], type=pa.binary_view()),
}
return pa.table(data)
def write_string_view_table():
table = create_string_view_table()
pq.write_table(table, "string_view.parquet", compression="snappy")
def main():
table = create_data()
write_data(table)
write_empty_table()
write_string_view_table()
if __name__ == "__main__":
main()
================================================
FILE: tests/data/generate_geo_data.py
================================================
import json
import geopandas as gpd
import pyarrow as pa
import pyarrow.parquet as pq
import pygeos
from geopandas.io.arrow import _create_metadata
gdf = gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
gdf.to_parquet("naturalearth_cities_wkb.parquet", index=None)
def construct_geoarrow_table(gdf: gpd.GeoDataFrame) -> pa.Table:
# Note in this quick example we omit metadata on the table header
non_geo_cols = [col for col in gdf.columns if col != gdf.geometry.name]
table = pa.Table.from_pandas(gdf[non_geo_cols])
pygeos_array = pygeos.from_shapely(gdf.geometry.values)
coords = pygeos.get_coordinates(pygeos_array)
parr = pa.FixedSizeListArray.from_arrays(coords.flat, 2)
geo_metadata = _create_metadata(gdf)
geo_metadata["columns"][gdf._geometry_column_name]["encoding"] = "geoarrow"
table_with_geom = table.append_column("geometry", parr)
metadata = table_with_geom.schema.metadata
metadata.update({b"geo": json.dumps(geo_metadata).encode()})
return table_with_geom.replace_schema_metadata(metadata)
gdf_arrow_encoding = construct_geoarrow_table(gdf)
pq.write_table(
gdf_arrow_encoding, "naturalearth_cities_geoarrow.parquet", compression="snappy"
)
================================================
FILE: tests/data/pyproject.toml
================================================
[project]
name = "generate-test-data"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"pandas>=2.3.2",
"pyarrow>=21.0.0",
]
[dependency-groups]
dev = [
"ipykernel>=6.30.1",
]
================================================
FILE: tests/data/uv.lock
================================================
version = 1
revision = 3
requires-python = ">=3.12"
[[package]]
name = "appnope"
version = "0.1.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170, upload-time = "2024-02-06T09:43:11.258Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321, upload-time = "2024-02-06T09:43:09.663Z" },
]
[[package]]
name = "asttokens"
version = "3.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978, upload-time = "2024-11-30T04:30:14.439Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" },
]
[[package]]
name = "cffi"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pycparser", marker = "implementation_name != 'PyPy'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
{ url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
{ url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
{ url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
{ url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
{ url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
{ url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
{ url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
{ url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
{ url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
{ url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
{ url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
{ url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
{ url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
{ url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
{ url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
{ url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
{ url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
{ url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
{ url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
{ url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
{ url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
{ url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
{ url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
{ url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
{ url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
{ url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
{ url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
{ url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
{ url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
{ url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
{ url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
{ url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" },
{ url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" },
{ url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" },
{ url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
{ url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
{ url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
{ url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
{ url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
{ url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
{ url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
{ url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
{ url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" },
{ url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" },
{ url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
]
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
[[package]]
name = "comm"
version = "0.2.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4c/13/7d740c5849255756bc17888787313b61fd38a0a8304fc4f073dfc46122aa/comm-0.2.3.tar.gz", hash = "sha256:2dc8048c10962d55d7ad693be1e7045d891b7ce8d999c97963a5e3e99c055971", size = 6319, upload-time = "2025-07-25T14:02:04.452Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl", hash = "sha256:c615d91d75f7f04f095b30d1c1711babd43bdc6419c1be9886a85f2f4e489417", size = 7294, upload-time = "2025-07-25T14:02:02.896Z" },
]
[[package]]
name = "debugpy"
version = "1.8.16"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ca/d4/722d0bcc7986172ac2ef3c979ad56a1030e3afd44ced136d45f8142b1f4a/debugpy-1.8.16.tar.gz", hash = "sha256:31e69a1feb1cf6b51efbed3f6c9b0ef03bc46ff050679c4be7ea6d2e23540870", size = 1643809, upload-time = "2025-08-06T18:00:02.647Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/61/fb/0387c0e108d842c902801bc65ccc53e5b91d8c169702a9bbf4f7efcedf0c/debugpy-1.8.16-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:b202e2843e32e80b3b584bcebfe0e65e0392920dc70df11b2bfe1afcb7a085e4", size = 2511822, upload-time = "2025-08-06T18:00:18.526Z" },
{ url = "https://files.pythonhosted.org/packages/37/44/19e02745cae22bf96440141f94e15a69a1afaa3a64ddfc38004668fcdebf/debugpy-1.8.16-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64473c4a306ba11a99fe0bb14622ba4fbd943eb004847d9b69b107bde45aa9ea", size = 4230135, upload-time = "2025-08-06T18:00:19.997Z" },
{ url = "https://files.pythonhosted.org/packages/f3/0b/19b1ba5ee4412f303475a2c7ad5858efb99c90eae5ec627aa6275c439957/debugpy-1.8.16-cp312-cp312-win32.whl", hash = "sha256:833a61ed446426e38b0dd8be3e9d45ae285d424f5bf6cd5b2b559c8f12305508", size = 5281271, upload-time = "2025-08-06T18:00:21.281Z" },
{ url = "https://files.pythonhosted.org/packages/b1/e0/bc62e2dc141de53bd03e2c7cb9d7011de2e65e8bdcdaa26703e4d28656ba/debugpy-1.8.16-cp312-cp312-win_amd64.whl", hash = "sha256:75f204684581e9ef3dc2f67687c3c8c183fde2d6675ab131d94084baf8084121", size = 5323149, upload-time = "2025-08-06T18:00:23.033Z" },
{ url = "https://files.pythonhosted.org/packages/62/66/607ab45cc79e60624df386e233ab64a6d8d39ea02e7f80e19c1d451345bb/debugpy-1.8.16-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:85df3adb1de5258dca910ae0bb185e48c98801ec15018a263a92bb06be1c8787", size = 2496157, upload-time = "2025-08-06T18:00:24.361Z" },
{ url = "https://files.pythonhosted.org/packages/4d/a0/c95baae08a75bceabb79868d663a0736655e427ab9c81fb848da29edaeac/debugpy-1.8.16-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee89e948bc236a5c43c4214ac62d28b29388453f5fd328d739035e205365f0b", size = 4222491, upload-time = "2025-08-06T18:00:25.806Z" },
{ url = "https://files.pythonhosted.org/packages/5b/2f/1c8db6ddd8a257c3cd2c46413b267f1d5fa3df910401c899513ce30392d6/debugpy-1.8.16-cp313-cp313-win32.whl", hash = "sha256:cf358066650439847ec5ff3dae1da98b5461ea5da0173d93d5e10f477c94609a", size = 5281126, upload-time = "2025-08-06T18:00:27.207Z" },
{ url = "https://files.pythonhosted.org/packages/d3/ba/c3e154ab307366d6c5a9c1b68de04914e2ce7fa2f50d578311d8cc5074b2/debugpy-1.8.16-cp313-cp313-win_amd64.whl", hash = "sha256:b5aea1083f6f50023e8509399d7dc6535a351cc9f2e8827d1e093175e4d9fa4c", size = 5323094, upload-time = "2025-08-06T18:00:29.03Z" },
{ url = "https://files.pythonhosted.org/packages/52/57/ecc9ae29fa5b2d90107cd1d9bf8ed19aacb74b2264d986ae9d44fe9bdf87/debugpy-1.8.16-py2.py3-none-any.whl", hash = "sha256:19c9521962475b87da6f673514f7fd610328757ec993bf7ec0d8c96f9a325f9e", size = 5287700, upload-time = "2025-08-06T18:00:42.333Z" },
]
[[package]]
name = "decorator"
version = "5.2.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
]
[[package]]
name = "executing"
version = "2.2.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/cc/28/c14e053b6762b1044f34a13aab6859bbf40456d37d23aa286ac24cfd9a5d/executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4", size = 1129488, upload-time = "2025-09-01T09:48:10.866Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" },
]
[[package]]
name = "generate-test-data"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "pandas" },
{ name = "pyarrow" },
]
[package.dev-dependencies]
dev = [
{ name = "ipykernel" },
]
[package.metadata]
requires-dist = [
{ name = "pandas", specifier = ">=2.3.2" },
{ name = "pyarrow", specifier = ">=21.0.0" },
]
[package.metadata.requires-dev]
dev = [{ name = "ipykernel", specifier = ">=6.30.1" }]
[[package]]
name = "ipykernel"
version = "6.30.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "appnope", marker = "sys_platform == 'darwin'" },
{ name = "comm" },
{ name = "debugpy" },
{ name = "ipython" },
{ name = "jupyter-client" },
{ name = "jupyter-core" },
{ name = "matplotlib-inline" },
{ name = "nest-asyncio" },
{ name = "packaging" },
{ name = "psutil" },
{ name = "pyzmq" },
{ name = "tornado" },
{ name = "traitlets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/bb/76/11082e338e0daadc89c8ff866185de11daf67d181901038f9e139d109761/ipykernel-6.30.1.tar.gz", hash = "sha256:6abb270161896402e76b91394fcdce5d1be5d45f456671e5080572f8505be39b", size = 166260, upload-time = "2025-08-04T15:47:35.018Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/c7/b445faca8deb954fe536abebff4ece5b097b923de482b26e78448c89d1dd/ipykernel-6.30.1-py3-none-any.whl", hash = "sha256:aa6b9fb93dca949069d8b85b6c79b2518e32ac583ae9c7d37c51d119e18b3fb4", size = 117484, upload-time = "2025-08-04T15:47:32.622Z" },
]
[[package]]
name = "ipython"
version = "9.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "decorator" },
{ name = "ipython-pygments-lexers" },
{ name = "jedi" },
{ name = "matplotlib-inline" },
{ name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
{ name = "prompt-toolkit" },
{ name = "pygments" },
{ name = "stack-data" },
{ name = "traitlets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/6e/71/a86262bf5a68bf211bcc71fe302af7e05f18a2852fdc610a854d20d085e6/ipython-9.5.0.tar.gz", hash = "sha256:129c44b941fe6d9b82d36fc7a7c18127ddb1d6f02f78f867f402e2e3adde3113", size = 4389137, upload-time = "2025-08-29T12:15:21.519Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/08/2a/5628a99d04acb2d2f2e749cdf4ea571d2575e898df0528a090948018b726/ipython-9.5.0-py3-none-any.whl", hash = "sha256:88369ffa1d5817d609120daa523a6da06d02518e582347c29f8451732a9c5e72", size = 612426, upload-time = "2025-08-29T12:15:18.866Z" },
]
[[package]]
name = "ipython-pygments-lexers"
version = "1.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393, upload-time = "2025-01-17T11:24:34.505Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074, upload-time = "2025-01-17T11:24:33.271Z" },
]
[[package]]
name = "jedi"
version = "0.19.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "parso" },
]
sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" },
]
[[package]]
name = "jupyter-client"
version = "8.6.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jupyter-core" },
{ name = "python-dateutil" },
{ name = "pyzmq" },
{ name = "tornado" },
{ name = "traitlets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/71/22/bf9f12fdaeae18019a468b68952a60fe6dbab5d67cd2a103cac7659b41ca/jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419", size = 342019, upload-time = "2024-09-17T10:44:17.613Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/11/85/b0394e0b6fcccd2c1eeefc230978a6f8cb0c5df1e4cd3e7625735a0d7d1e/jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f", size = 106105, upload-time = "2024-09-17T10:44:15.218Z" },
]
[[package]]
name = "jupyter-core"
version = "5.8.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "platformdirs" },
{ name = "pywin32", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" },
{ name = "traitlets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/99/1b/72906d554acfeb588332eaaa6f61577705e9ec752ddb486f302dafa292d9/jupyter_core-5.8.1.tar.gz", hash = "sha256:0a5f9706f70e64786b75acba995988915ebd4601c8a52e534a40b51c95f59941", size = 88923, upload-time = "2025-05-27T07:38:16.655Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2f/57/6bffd4b20b88da3800c5d691e0337761576ee688eb01299eae865689d2df/jupyter_core-5.8.1-py3-none-any.whl", hash = "sha256:c28d268fc90fb53f1338ded2eb410704c5449a358406e8a948b75706e24863d0", size = 28880, upload-time = "2025-05-27T07:38:15.137Z" },
]
[[package]]
name = "matplotlib-inline"
version = "0.1.7"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "traitlets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159, upload-time = "2024-04-15T13:44:44.803Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899, upload-time = "2024-04-15T13:44:43.265Z" },
]
[[package]]
name = "nest-asyncio"
version = "1.6.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" },
]
[[package]]
name = "numpy"
version = "2.3.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d0/19/95b3d357407220ed24c139018d2518fab0a61a948e68286a25f1a4d049ff/numpy-2.3.3.tar.gz", hash = "sha256:ddc7c39727ba62b80dfdbedf400d1c10ddfa8eefbd7ec8dcb118be8b56d31029", size = 20576648, upload-time = "2025-09-09T16:54:12.543Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/51/5d/bb7fc075b762c96329147799e1bcc9176ab07ca6375ea976c475482ad5b3/numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf", size = 20957014, upload-time = "2025-09-09T15:56:29.966Z" },
{ url = "https://files.pythonhosted.org/packages/6b/0e/c6211bb92af26517acd52125a237a92afe9c3124c6a68d3b9f81b62a0568/numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25", size = 14185220, upload-time = "2025-09-09T15:56:32.175Z" },
{ url = "https://files.pythonhosted.org/packages/22/f2/07bb754eb2ede9073f4054f7c0286b0d9d2e23982e090a80d478b26d35ca/numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe", size = 5113918, upload-time = "2025-09-09T15:56:34.175Z" },
{ url = "https://files.pythonhosted.org/packages/81/0a/afa51697e9fb74642f231ea36aca80fa17c8fb89f7a82abd5174023c3960/numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b", size = 6647922, upload-time = "2025-09-09T15:56:36.149Z" },
{ url = "https://files.pythonhosted.org/packages/5d/f5/122d9cdb3f51c520d150fef6e87df9279e33d19a9611a87c0d2cf78a89f4/numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8", size = 14281991, upload-time = "2025-09-09T15:56:40.548Z" },
{ url = "https://files.pythonhosted.org/packages/51/64/7de3c91e821a2debf77c92962ea3fe6ac2bc45d0778c1cbe15d4fce2fd94/numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20", size = 16641643, upload-time = "2025-09-09T15:56:43.343Z" },
{ url = "https://files.pythonhosted.org/packages/30/e4/961a5fa681502cd0d68907818b69f67542695b74e3ceaa513918103b7e80/numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea", size = 16056787, upload-time = "2025-09-09T15:56:46.141Z" },
{ url = "https://files.pythonhosted.org/packages/99/26/92c912b966e47fbbdf2ad556cb17e3a3088e2e1292b9833be1dfa5361a1a/numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7", size = 18579598, upload-time = "2025-09-09T15:56:49.844Z" },
{ url = "https://files.pythonhosted.org/packages/17/b6/fc8f82cb3520768718834f310c37d96380d9dc61bfdaf05fe5c0b7653e01/numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf", size = 6320800, upload-time = "2025-09-09T15:56:52.499Z" },
{ url = "https://files.pythonhosted.org/packages/32/ee/de999f2625b80d043d6d2d628c07d0d5555a677a3cf78fdf868d409b8766/numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb", size = 12786615, upload-time = "2025-09-09T15:56:54.422Z" },
{ url = "https://files.pythonhosted.org/packages/49/6e/b479032f8a43559c383acb20816644f5f91c88f633d9271ee84f3b3a996c/numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5", size = 10195936, upload-time = "2025-09-09T15:56:56.541Z" },
{ url = "https://files.pythonhosted.org/packages/7d/b9/984c2b1ee61a8b803bf63582b4ac4242cf76e2dbd663efeafcb620cc0ccb/numpy-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f5415fb78995644253370985342cd03572ef8620b934da27d77377a2285955bf", size = 20949588, upload-time = "2025-09-09T15:56:59.087Z" },
{ url = "https://files.pythonhosted.org/packages/a6/e4/07970e3bed0b1384d22af1e9912527ecbeb47d3b26e9b6a3bced068b3bea/numpy-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d00de139a3324e26ed5b95870ce63be7ec7352171bc69a4cf1f157a48e3eb6b7", size = 14177802, upload-time = "2025-09-09T15:57:01.73Z" },
{ url = "https://files.pythonhosted.org/packages/35/c7/477a83887f9de61f1203bad89cf208b7c19cc9fef0cebef65d5a1a0619f2/numpy-2.3.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9dc13c6a5829610cc07422bc74d3ac083bd8323f14e2827d992f9e52e22cd6a6", size = 5106537, upload-time = "2025-09-09T15:57:03.765Z" },
{ url = "https://files.pythonhosted.org/packages/52/47/93b953bd5866a6f6986344d045a207d3f1cfbad99db29f534ea9cee5108c/numpy-2.3.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d79715d95f1894771eb4e60fb23f065663b2298f7d22945d66877aadf33d00c7", size = 6640743, upload-time = "2025-09-09T15:57:07.921Z" },
{ url = "https://files.pythonhosted.org/packages/23/83/377f84aaeb800b64c0ef4de58b08769e782edcefa4fea712910b6f0afd3c/numpy-2.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:952cfd0748514ea7c3afc729a0fc639e61655ce4c55ab9acfab14bda4f402b4c", size = 14278881, upload-time = "2025-09-09T15:57:11.349Z" },
{ url = "https://files.pythonhosted.org/packages/9a/a5/bf3db6e66c4b160d6ea10b534c381a1955dfab34cb1017ea93aa33c70ed3/numpy-2.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b83648633d46f77039c29078751f80da65aa64d5622a3cd62aaef9d835b6c93", size = 16636301, upload-time = "2025-09-09T15:57:14.245Z" },
{ url = "https://files.pythonhosted.org/packages/a2/59/1287924242eb4fa3f9b3a2c30400f2e17eb2707020d1c5e3086fe7330717/numpy-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b001bae8cea1c7dfdb2ae2b017ed0a6f2102d7a70059df1e338e307a4c78a8ae", size = 16053645, upload-time = "2025-09-09T15:57:16.534Z" },
{ url = "https://files.pythonhosted.org/packages/e6/93/b3d47ed882027c35e94ac2320c37e452a549f582a5e801f2d34b56973c97/numpy-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e9aced64054739037d42fb84c54dd38b81ee238816c948c8f3ed134665dcd86", size = 18578179, upload-time = "2025-09-09T15:57:18.883Z" },
{ url = "https://files.pythonhosted.org/packages/20/d9/487a2bccbf7cc9d4bfc5f0f197761a5ef27ba870f1e3bbb9afc4bbe3fcc2/numpy-2.3.3-cp313-cp313-win32.whl", hash = "sha256:9591e1221db3f37751e6442850429b3aabf7026d3b05542d102944ca7f00c8a8", size = 6312250, upload-time = "2025-09-09T15:57:21.296Z" },
{ url = "https://files.pythonhosted.org/packages/1b/b5/263ebbbbcede85028f30047eab3d58028d7ebe389d6493fc95ae66c636ab/numpy-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f0dadeb302887f07431910f67a14d57209ed91130be0adea2f9793f1a4f817cf", size = 12783269, upload-time = "2025-09-09T15:57:23.034Z" },
{ url = "https://files.pythonhosted.org/packages/fa/75/67b8ca554bbeaaeb3fac2e8bce46967a5a06544c9108ec0cf5cece559b6c/numpy-2.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:3c7cf302ac6e0b76a64c4aecf1a09e51abd9b01fc7feee80f6c43e3ab1b1dbc5", size = 10195314, upload-time = "2025-09-09T15:57:25.045Z" },
{ url = "https://files.pythonhosted.org/packages/11/d0/0d1ddec56b162042ddfafeeb293bac672de9b0cfd688383590090963720a/numpy-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eda59e44957d272846bb407aad19f89dc6f58fecf3504bd144f4c5cf81a7eacc", size = 21048025, upload-time = "2025-09-09T15:57:27.257Z" },
{ url = "https://files.pythonhosted.org/packages/36/9e/1996ca6b6d00415b6acbdd3c42f7f03ea256e2c3f158f80bd7436a8a19f3/numpy-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:823d04112bc85ef5c4fda73ba24e6096c8f869931405a80aa8b0e604510a26bc", size = 14301053, upload-time = "2025-09-09T15:57:30.077Z" },
{ url = "https://files.pythonhosted.org/packages/05/24/43da09aa764c68694b76e84b3d3f0c44cb7c18cdc1ba80e48b0ac1d2cd39/numpy-2.3.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:40051003e03db4041aa325da2a0971ba41cf65714e65d296397cc0e32de6018b", size = 5229444, upload-time = "2025-09-09T15:57:32.733Z" },
{ url = "https://files.pythonhosted.org/packages/bc/14/50ffb0f22f7218ef8af28dd089f79f68289a7a05a208db9a2c5dcbe123c1/numpy-2.3.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6ee9086235dd6ab7ae75aba5662f582a81ced49f0f1c6de4260a78d8f2d91a19", size = 6738039, upload-time = "2025-09-09T15:57:34.328Z" },
{ url = "https://files.pythonhosted.org/packages/55/52/af46ac0795e09657d45a7f4db961917314377edecf66db0e39fa7ab5c3d3/numpy-2.3.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94fcaa68757c3e2e668ddadeaa86ab05499a70725811e582b6a9858dd472fb30", size = 14352314, upload-time = "2025-09-09T15:57:36.255Z" },
{ url = "https://files.pythonhosted.org/packages/a7/b1/dc226b4c90eb9f07a3fff95c2f0db3268e2e54e5cce97c4ac91518aee71b/numpy-2.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da1a74b90e7483d6ce5244053399a614b1d6b7bc30a60d2f570e5071f8959d3e", size = 16701722, upload-time = "2025-09-09T15:57:38.622Z" },
{ url = "https://files.pythonhosted.org/packages/9d/9d/9d8d358f2eb5eced14dba99f110d83b5cd9a4460895230f3b396ad19a323/numpy-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2990adf06d1ecee3b3dcbb4977dfab6e9f09807598d647f04d385d29e7a3c3d3", size = 16132755, upload-time = "2025-09-09T15:57:41.16Z" },
{ url = "https://files.pythonhosted.org/packages/b6/27/b3922660c45513f9377b3fb42240bec63f203c71416093476ec9aa0719dc/numpy-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ed635ff692483b8e3f0fcaa8e7eb8a75ee71aa6d975388224f70821421800cea", size = 18651560, upload-time = "2025-09-09T15:57:43.459Z" },
{ url = "https://files.pythonhosted.org/packages/5b/8e/3ab61a730bdbbc201bb245a71102aa609f0008b9ed15255500a99cd7f780/numpy-2.3.3-cp313-cp313t-win32.whl", hash = "sha256:a333b4ed33d8dc2b373cc955ca57babc00cd6f9009991d9edc5ddbc1bac36bcd", size = 6442776, upload-time = "2025-09-09T15:57:45.793Z" },
{ url = "https://files.pythonhosted.org/packages/1c/3a/e22b766b11f6030dc2decdeff5c2fb1610768055603f9f3be88b6d192fb2/numpy-2.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4384a169c4d8f97195980815d6fcad04933a7e1ab3b530921c3fef7a1c63426d", size = 12927281, upload-time = "2025-09-09T15:57:47.492Z" },
{ url = "https://files.pythonhosted.org/packages/7b/42/c2e2bc48c5e9b2a83423f99733950fbefd86f165b468a3d85d52b30bf782/numpy-2.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:75370986cc0bc66f4ce5110ad35aae6d182cc4ce6433c40ad151f53690130bf1", size = 10265275, upload-time = "2025-09-09T15:57:49.647Z" },
{ url = "https://files.pythonhosted.org/packages/6b/01/342ad585ad82419b99bcf7cebe99e61da6bedb89e213c5fd71acc467faee/numpy-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cd052f1fa6a78dee696b58a914b7229ecfa41f0a6d96dc663c1220a55e137593", size = 20951527, upload-time = "2025-09-09T15:57:52.006Z" },
{ url = "https://files.pythonhosted.org/packages/ef/d8/204e0d73fc1b7a9ee80ab1fe1983dd33a4d64a4e30a05364b0208e9a241a/numpy-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:414a97499480067d305fcac9716c29cf4d0d76db6ebf0bf3cbce666677f12652", size = 14186159, upload-time = "2025-09-09T15:57:54.407Z" },
{ url = "https://files.pythonhosted.org/packages/22/af/f11c916d08f3a18fb8ba81ab72b5b74a6e42ead4c2846d270eb19845bf74/numpy-2.3.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:50a5fe69f135f88a2be9b6ca0481a68a136f6febe1916e4920e12f1a34e708a7", size = 5114624, upload-time = "2025-09-09T15:57:56.5Z" },
{ url = "https://files.pythonhosted.org/packages/fb/11/0ed919c8381ac9d2ffacd63fd1f0c34d27e99cab650f0eb6f110e6ae4858/numpy-2.3.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:b912f2ed2b67a129e6a601e9d93d4fa37bef67e54cac442a2f588a54afe5c67a", size = 6642627, upload-time = "2025-09-09T15:57:58.206Z" },
{ url = "https://files.pythonhosted.org/packages/ee/83/deb5f77cb0f7ba6cb52b91ed388b47f8f3c2e9930d4665c600408d9b90b9/numpy-2.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9e318ee0596d76d4cb3d78535dc005fa60e5ea348cd131a51e99d0bdbe0b54fe", size = 14296926, upload-time = "2025-09-09T15:58:00.035Z" },
{ url = "https://files.pythonhosted.org/packages/77/cc/70e59dcb84f2b005d4f306310ff0a892518cc0c8000a33d0e6faf7ca8d80/numpy-2.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce020080e4a52426202bdb6f7691c65bb55e49f261f31a8f506c9f6bc7450421", size = 16638958, upload-time = "2025-09-09T15:58:02.738Z" },
{ url = "https://files.pythonhosted.org/packages/b6/5a/b2ab6c18b4257e099587d5b7f903317bd7115333ad8d4ec4874278eafa61/numpy-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e6687dc183aa55dae4a705b35f9c0f8cb178bcaa2f029b241ac5356221d5c021", size = 16071920, upload-time = "2025-09-09T15:58:05.029Z" },
{ url = "https://files.pythonhosted.org/packages/b8/f1/8b3fdc44324a259298520dd82147ff648979bed085feeacc1250ef1656c0/numpy-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d8f3b1080782469fdc1718c4ed1d22549b5fb12af0d57d35e992158a772a37cf", size = 18577076, upload-time = "2025-09-09T15:58:07.745Z" },
{ url = "https://files.pythonhosted.org/packages/f0/a1/b87a284fb15a42e9274e7fcea0dad259d12ddbf07c1595b26883151ca3b4/numpy-2.3.3-cp314-cp314-win32.whl", hash = "sha256:cb248499b0bc3be66ebd6578b83e5acacf1d6cb2a77f2248ce0e40fbec5a76d0", size = 6366952, upload-time = "2025-09-09T15:58:10.096Z" },
{ url = "https://files.pythonhosted.org/packages/70/5f/1816f4d08f3b8f66576d8433a66f8fa35a5acfb3bbd0bf6c31183b003f3d/numpy-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:691808c2b26b0f002a032c73255d0bd89751425f379f7bcd22d140db593a96e8", size = 12919322, upload-time = "2025-09-09T15:58:12.138Z" },
{ url = "https://files.pythonhosted.org/packages/8c/de/072420342e46a8ea41c324a555fa90fcc11637583fb8df722936aed1736d/numpy-2.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:9ad12e976ca7b10f1774b03615a2a4bab8addce37ecc77394d8e986927dc0dfe", size = 10478630, upload-time = "2025-09-09T15:58:14.64Z" },
{ url = "https://files.pythonhosted.org/packages/d5/df/ee2f1c0a9de7347f14da5dd3cd3c3b034d1b8607ccb6883d7dd5c035d631/numpy-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9cc48e09feb11e1db00b320e9d30a4151f7369afb96bd0e48d942d09da3a0d00", size = 21047987, upload-time = "2025-09-09T15:58:16.889Z" },
{ url = "https://files.pythonhosted.org/packages/d6/92/9453bdc5a4e9e69cf4358463f25e8260e2ffc126d52e10038b9077815989/numpy-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:901bf6123879b7f251d3631967fd574690734236075082078e0571977c6a8e6a", size = 14301076, upload-time = "2025-09-09T15:58:20.343Z" },
{ url = "https://files.pythonhosted.org/packages/13/77/1447b9eb500f028bb44253105bd67534af60499588a5149a94f18f2ca917/numpy-2.3.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:7f025652034199c301049296b59fa7d52c7e625017cae4c75d8662e377bf487d", size = 5229491, upload-time = "2025-09-09T15:58:22.481Z" },
{ url = "https://files.pythonhosted.org/packages/3d/f9/d72221b6ca205f9736cb4b2ce3b002f6e45cd67cd6a6d1c8af11a2f0b649/numpy-2.3.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:533ca5f6d325c80b6007d4d7fb1984c303553534191024ec6a524a4c92a5935a", size = 6737913, upload-time = "2025-09-09T15:58:24.569Z" },
{ url = "https://files.pythonhosted.org/packages/3c/5f/d12834711962ad9c46af72f79bb31e73e416ee49d17f4c797f72c96b6ca5/numpy-2.3.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0edd58682a399824633b66885d699d7de982800053acf20be1eaa46d92009c54", size = 14352811, upload-time = "2025-09-09T15:58:26.416Z" },
{ url = "https://files.pythonhosted.org/packages/a1/0d/fdbec6629d97fd1bebed56cd742884e4eead593611bbe1abc3eb40d304b2/numpy-2.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:367ad5d8fbec5d9296d18478804a530f1191e24ab4d75ab408346ae88045d25e", size = 16702689, upload-time = "2025-09-09T15:58:28.831Z" },
{ url = "https://files.pythonhosted.org/packages/9b/09/0a35196dc5575adde1eb97ddfbc3e1687a814f905377621d18ca9bc2b7dd/numpy-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8f6ac61a217437946a1fa48d24c47c91a0c4f725237871117dea264982128097", size = 16133855, upload-time = "2025-09-09T15:58:31.349Z" },
{ url = "https://files.pythonhosted.org/packages/7a/ca/c9de3ea397d576f1b6753eaa906d4cdef1bf97589a6d9825a349b4729cc2/numpy-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:179a42101b845a816d464b6fe9a845dfaf308fdfc7925387195570789bb2c970", size = 18652520, upload-time = "2025-09-09T15:58:33.762Z" },
{ url = "https://files.pythonhosted.org/packages/fd/c2/e5ed830e08cd0196351db55db82f65bc0ab05da6ef2b72a836dcf1936d2f/numpy-2.3.3-cp314-cp314t-win32.whl", hash = "sha256:1250c5d3d2562ec4174bce2e3a1523041595f9b651065e4a4473f5f48a6bc8a5", size = 6515371, upload-time = "2025-09-09T15:58:36.04Z" },
{ url = "https://files.pythonhosted.org/packages/47/c7/b0f6b5b67f6788a0725f744496badbb604d226bf233ba716683ebb47b570/numpy-2.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:b37a0b2e5935409daebe82c1e42274d30d9dd355852529eab91dab8dcca7419f", size = 13112576, upload-time = "2025-09-09T15:58:37.927Z" },
{ url = "https://files.pythonhosted.org/packages/06/b9/33bba5ff6fb679aa0b1f8a07e853f002a6b04b9394db3069a1270a7784ca/numpy-2.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:78c9f6560dc7e6b3990e32df7ea1a50bbd0e2a111e05209963f5ddcab7073b0b", size = 10545953, upload-time = "2025-09-09T15:58:40.576Z" },
]
[[package]]
name = "packaging"
version = "25.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "pandas"
version = "2.3.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
{ name = "python-dateutil" },
{ name = "pytz" },
{ name = "tzdata" },
]
sdist = { url = "https://files.pythonhosted.org/packages/79/8e/0e90233ac205ad182bd6b422532695d2b9414944a280488105d598c70023/pandas-2.3.2.tar.gz", hash = "sha256:ab7b58f8f82706890924ccdfb5f48002b83d2b5a3845976a9fb705d36c34dcdb", size = 4488684, upload-time = "2025-08-21T10:28:29.257Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/db/614c20fb7a85a14828edd23f1c02db58a30abf3ce76f38806155d160313c/pandas-2.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fbb977f802156e7a3f829e9d1d5398f6192375a3e2d1a9ee0803e35fe70a2b9", size = 11587652, upload-time = "2025-08-21T10:27:15.888Z" },
{ url = "https://files.pythonhosted.org/packages/99/b0/756e52f6582cade5e746f19bad0517ff27ba9c73404607c0306585c201b3/pandas-2.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b9b52693123dd234b7c985c68b709b0b009f4521000d0525f2b95c22f15944b", size = 10717686, upload-time = "2025-08-21T10:27:18.486Z" },
{ url = "https://files.pythonhosted.org/packages/37/4c/dd5ccc1e357abfeee8353123282de17997f90ff67855f86154e5a13b81e5/pandas-2.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bd281310d4f412733f319a5bc552f86d62cddc5f51d2e392c8787335c994175", size = 11278722, upload-time = "2025-08-21T10:27:21.149Z" },
{ url = "https://files.pythonhosted.org/packages/d3/a4/f7edcfa47e0a88cda0be8b068a5bae710bf264f867edfdf7b71584ace362/pandas-2.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96d31a6b4354e3b9b8a2c848af75d31da390657e3ac6f30c05c82068b9ed79b9", size = 11987803, upload-time = "2025-08-21T10:27:23.767Z" },
{ url = "https://files.pythonhosted.org/packages/f6/61/1bce4129f93ab66f1c68b7ed1c12bac6a70b1b56c5dab359c6bbcd480b52/pandas-2.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:df4df0b9d02bb873a106971bb85d448378ef14b86ba96f035f50bbd3688456b4", size = 12766345, upload-time = "2025-08-21T10:27:26.6Z" },
{ url = "https://files.pythonhosted.org/packages/8e/46/80d53de70fee835531da3a1dae827a1e76e77a43ad22a8cd0f8142b61587/pandas-2.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:213a5adf93d020b74327cb2c1b842884dbdd37f895f42dcc2f09d451d949f811", size = 13439314, upload-time = "2025-08-21T10:27:29.213Z" },
{ url = "https://files.pythonhosted.org/packages/28/30/8114832daff7489f179971dbc1d854109b7f4
gitextract_9fyd8eir/ ├── .cargo/ │ └── config.toml ├── .github/ │ ├── dependabot.yml │ └── workflows/ │ ├── docs-website.yml │ ├── pr-manipulation.yml │ └── test.yml ├── .gitignore ├── .vscode/ │ └── settings.json ├── .yarnrc.yml ├── CHANGELOG.md ├── Cargo.toml ├── DEVELOP.md ├── LICENSE_APACHE ├── LICENSE_MIT ├── README.md ├── bench/ │ ├── bench.ts │ ├── make_data.py │ └── pyproject.toml ├── package.json ├── scripts/ │ ├── build.sh │ └── report_build.sh ├── src/ │ ├── common/ │ │ ├── fetch.rs │ │ ├── mod.rs │ │ ├── properties.rs │ │ └── stream.rs │ ├── error.rs │ ├── lib.rs │ ├── metadata.rs │ ├── read_options.rs │ ├── reader.rs │ ├── reader_async.rs │ ├── utils.rs │ ├── wasm.rs │ ├── writer.rs │ ├── writer_async.rs │ └── writer_properties.rs ├── templates/ │ └── package.json ├── tests/ │ ├── data/ │ │ ├── .python-version │ │ ├── 1-partition-brotli.parquet │ │ ├── 1-partition-gzip.parquet │ │ ├── 1-partition-lz4.parquet │ │ ├── 1-partition-none.parquet │ │ ├── 1-partition-snappy.parquet │ │ ├── 1-partition-zstd.parquet │ │ ├── 2-partition-brotli.parquet │ │ ├── 2-partition-gzip.parquet │ │ ├── 2-partition-lz4.parquet │ │ ├── 2-partition-none.parquet │ │ ├── 2-partition-snappy.parquet │ │ ├── 2-partition-zstd.parquet │ │ ├── README.md │ │ ├── data.arrow │ │ ├── empty.parquet │ │ ├── generate_data.py │ │ ├── generate_geo_data.py │ │ ├── naturalearth_cities_geoarrow.parquet │ │ ├── naturalearth_cities_wkb.parquet │ │ ├── pyproject.toml │ │ ├── string_view.parquet │ │ └── uv.lock │ ├── js/ │ │ ├── ffi.test.ts │ │ ├── geo-metadata.test.ts │ │ ├── index.test.ts │ │ ├── read-write.test.ts │ │ ├── schema.test.ts │ │ └── utils.ts │ └── web.rs ├── tsconfig.docs.json ├── tsconfig.json └── typedoc.json
SYMBOL INDEX (162 symbols across 22 files)
FILE: bench/bench.ts
function loadFile (line 26) | function loadFile(name: string): Uint8Array {
FILE: bench/make_data.py
function create_table (line 11) | def create_table(n_rows=1_000_000):
function write_table (line 25) | def write_table(table):
function main (line 40) | def main():
FILE: src/common/fetch.rs
function _get_content_length (line 8) | pub async fn _get_content_length(url: String) -> Result<usize, reqwest::...
function get_content_length (line 14) | pub async fn get_content_length(url: String) -> Result<usize, reqwest::E...
function range_from_start_and_length (line 25) | pub fn range_from_start_and_length(start: u64, length: u64) -> String {
function range_from_start (line 32) | pub fn range_from_start(start: u64) -> String {
function range_from_end (line 36) | pub fn range_from_end(length: usize) -> String {
function _make_range_request (line 41) | async fn _make_range_request(
function make_range_request (line 57) | pub async fn make_range_request(
function create_reader (line 72) | pub fn create_reader(
FILE: src/common/properties.rs
type Compression (line 11) | pub enum Compression {
method from (line 40) | fn from(x: parquet::basic::Compression) -> Compression {
function from (line 25) | fn from(x: Compression) -> parquet::basic::Compression {
type Encoding (line 60) | pub enum Encoding {
method from (line 138) | fn from(x: parquet::basic::Encoding) -> Encoding {
function from (line 121) | fn from(x: Encoding) -> parquet::basic::Encoding {
type WriterVersion (line 157) | pub enum WriterVersion {
function from (line 163) | fn from(x: WriterVersion) -> parquet::file::properties::WriterVersion {
FILE: src/common/stream.rs
type WrappedWritableStream (line 3) | pub struct WrappedWritableStream<'writer> {
method poll_write (line 8) | fn poll_write(
method poll_flush (line 16) | fn poll_flush(
method poll_close (line 23) | fn poll_close(
FILE: src/error.rs
type ParquetWasmError (line 7) | pub enum ParquetWasmError {
method from (line 28) | fn from(err: ArrowError) -> Self {
method from (line 34) | fn from(err: ParquetError) -> Self {
method from (line 41) | fn from(err: reqwest::Error) -> Self {
type Result (line 24) | pub type Result<T> = std::result::Result<T, ParquetWasmError>;
type WasmResult (line 25) | pub type WasmResult<T> = std::result::Result<T, JsError>;
FILE: src/metadata.rs
type ParquetMetaData (line 8) | pub struct ParquetMetaData(parquet::file::metadata::ParquetMetaData);
method file_metadata (line 14) | pub fn file_metadata(&self) -> FileMetaData {
method num_row_groups (line 20) | pub fn num_row_groups(&self) -> usize {
method row_group (line 27) | pub fn row_group(&self, i: usize) -> RowGroupMetaData {
method row_groups (line 33) | pub fn row_groups(&self) -> Vec<RowGroupMetaData> {
method from (line 48) | fn from(value: parquet::file::metadata::ParquetMetaData) -> Self {
function from (line 54) | fn from(value: ParquetMetaData) -> Self {
type FileMetaData (line 62) | pub struct FileMetaData(parquet::file::metadata::FileMetaData);
method version (line 68) | pub fn version(&self) -> i32 {
method num_rows (line 74) | pub fn num_rows(&self) -> f64 {
method created_by (line 87) | pub fn created_by(&self) -> Option<String> {
method key_value_metadata (line 94) | pub fn key_value_metadata(&self) -> Result<js_sys::Map, JsValue> {
method from (line 108) | fn from(value: parquet::file::metadata::FileMetaData) -> Self {
function from (line 114) | fn from(value: FileMetaData) -> Self {
type RowGroupMetaData (line 122) | pub struct RowGroupMetaData(parquet::file::metadata::RowGroupMetaData);
method num_columns (line 128) | pub fn num_columns(&self) -> usize {
method column (line 134) | pub fn column(&self, i: usize) -> ColumnChunkMetaData {
method columns (line 140) | pub fn columns(&self) -> Vec<ColumnChunkMetaData> {
method num_rows (line 150) | pub fn num_rows(&self) -> f64 {
method total_byte_size (line 156) | pub fn total_byte_size(&self) -> f64 {
method compressed_size (line 162) | pub fn compressed_size(&self) -> f64 {
method from (line 168) | fn from(value: parquet::file::metadata::RowGroupMetaData) -> Self {
function from (line 174) | fn from(value: RowGroupMetaData) -> Self {
type ColumnChunkMetaData (line 182) | pub struct ColumnChunkMetaData(parquet::file::metadata::ColumnChunkMetaD...
method file_path (line 191) | pub fn file_path(&self) -> Option<String> {
method file_offset (line 197) | pub fn file_offset(&self) -> i64 {
method column_path (line 208) | pub fn column_path(&self) -> Vec<String> {
method encodings (line 215) | pub fn encodings(&self) -> Vec<Encoding> {
method num_values (line 225) | pub fn num_values(&self) -> f64 {
method compression (line 230) | pub fn compression(&self) -> Compression {
method compressed_size (line 236) | pub fn compressed_size(&self) -> f64 {
method uncompressed_size (line 242) | pub fn uncompressed_size(&self) -> f64 {
method from (line 248) | fn from(value: parquet::file::metadata::ColumnChunkMetaData) -> Self {
function from (line 254) | fn from(value: ColumnChunkMetaData) -> Self {
FILE: src/read_options.rs
constant TS_ReaderOptions (line 10) | const TS_ReaderOptions: &'static str = r#"
type JsReaderOptions (line 36) | pub struct JsReaderOptions {
method apply_to_builder (line 57) | pub fn apply_to_builder<T>(
type Error (line 89) | type Error = serde_wasm_bindgen::Error;
method try_from (line 91) | fn try_from(value: ReaderOptions) -> std::result::Result<Self, Self::E...
function generate_projection_mask (line 96) | fn generate_projection_mask<S: AsRef<str>>(
FILE: src/reader.rs
function read_parquet (line 13) | pub fn read_parquet(parquet_file: Vec<u8>, options: JsReaderOptions) -> ...
function read_schema (line 53) | pub fn read_schema(parquet_file: Vec<u8>) -> Result<Schema> {
function cast_metadata_view_types (line 62) | pub(crate) fn cast_metadata_view_types(
function cast_view_types (line 87) | fn cast_view_types(schema: &arrow_schema::Schema) -> arrow_schema::Schem...
function _cast_view_types_of_fields (line 98) | fn _cast_view_types_of_fields<'a>(fields: impl Iterator<Item = &'a Field...
function has_view_types (line 133) | fn has_view_types<'a>(mut fields: impl Iterator<Item = &'a FieldRef>) ->...
FILE: src/reader_async.rs
constant OBJECT_STORE_COALESCE_DEFAULT (line 37) | const OBJECT_STORE_COALESCE_DEFAULT: u64 = 1024 * 1024;
function create_builder (line 39) | fn create_builder<T: AsyncFileReader + Unpin + 'static>(
type InnerParquetFile (line 55) | enum InnerParquetFile {
method get_bytes (line 61) | fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::err...
method get_byte_ranges (line 68) | fn get_byte_ranges(
method get_metadata (line 78) | fn get_metadata<'a>(
type ParquetFile (line 90) | pub struct ParquetFile {
method from_url (line 99) | pub async fn from_url(url: String) -> WasmResult<ParquetFile> {
method from_file (line 117) | pub async fn from_file(handle: web_sys::Blob) -> WasmResult<ParquetFil...
method metadata (line 127) | pub fn metadata(&self) -> WasmResult<crate::metadata::ParquetMetaData> {
method schema (line 132) | pub fn schema(&self) -> WasmResult<arrow_wasm::Schema> {
method read (line 149) | pub async fn read(&self, options: Option<ReaderOptions>) -> WasmResult...
method stream (line 179) | pub async fn stream(
type HTTPFileReader (line 212) | pub struct HTTPFileReader {
method new (line 219) | pub fn new(url: String, client: Client, coalesce_byte_size: u64) -> Se...
method fetch_suffix (line 229) | fn fetch_suffix(&mut self, suffix: usize) -> BoxFuture<'_, parquet::erro...
function get_bytes_http (line 250) | async fn get_bytes_http(
method get_bytes (line 268) | fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::err...
method get_byte_ranges (line 272) | fn get_byte_ranges(
method get_metadata (line 287) | fn get_metadata<'a>(
type WrappedFile (line 303) | struct WrappedFile {
method new (line 318) | pub fn new(inner: web_sys::Blob) -> Self {
method get_bytes (line 323) | pub async fn get_bytes(&mut self, range: Range<u64>) -> Vec<u8> {
function get_bytes_file (line 344) | async fn get_bytes_file(
type JsFileReader (line 358) | pub struct JsFileReader {
method new (line 364) | pub fn new(file: web_sys::Blob, coalesce_byte_size: u64) -> Self {
method get_bytes (line 373) | fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::err...
method get_byte_ranges (line 387) | fn get_byte_ranges(
method get_metadata (line 402) | fn get_metadata<'a>(
function make_range_request_with_client (line 418) | pub async fn make_range_request_with_client(
function read_metadata_async (line 440) | pub async fn read_metadata_async(
function _read_row_group (line 454) | pub async fn _read_row_group(
function read_row_group (line 477) | pub async fn read_row_group(
function read_record_batch_stream (line 496) | pub async fn read_record_batch_stream(
FILE: src/utils.rs
function set_panic_hook (line 7) | pub fn set_panic_hook() {
function assert_parquet_file_not_empty (line 35) | pub fn assert_parquet_file_not_empty(parquet_file: &[u8]) -> Result<(), ...
FILE: src/wasm.rs
function read_parquet (line 68) | pub fn read_parquet(parquet_file: Vec<u8>, options: Option<ReaderOptions...
function read_schema (line 128) | pub fn read_schema(parquet_file: Vec<u8>) -> WasmResult<Schema> {
function write_parquet (line 168) | pub fn write_parquet(
function read_parquet_stream (line 241) | pub async fn read_parquet_stream(
function transform_parquet_stream (line 322) | pub async fn transform_parquet_stream(
FILE: src/writer.rs
function write_parquet (line 8) | pub fn write_parquet(
FILE: src/writer_async.rs
function transform_parquet_stream (line 9) | pub async fn transform_parquet_stream(
FILE: src/writer_properties.rs
type EnabledStatistics (line 11) | pub enum EnabledStatistics {
function from (line 21) | fn from(statistics: EnabledStatistics) -> Self {
type WriterProperties (line 35) | pub struct WriterProperties(parquet::file::properties::WriterProperties);
function from (line 38) | fn from(props: WriterProperties) -> Self {
method default (line 44) | fn default() -> Self {
constant TS_FieldMetadata (line 50) | const TS_FieldMetadata: &'static str = r#"
type WriterPropertiesBuilder (line 65) | pub struct WriterPropertiesBuilder(parquet::file::properties::WriterProp...
method new (line 71) | pub fn new() -> WriterPropertiesBuilder {
method build (line 77) | pub fn build(self) -> WriterProperties {
method set_writer_version (line 86) | pub fn set_writer_version(self, value: WriterVersion) -> Self {
method set_data_page_size_limit (line 92) | pub fn set_data_page_size_limit(self, value: usize) -> Self {
method set_dictionary_page_size_limit (line 98) | pub fn set_dictionary_page_size_limit(self, value: usize) -> Self {
method set_write_batch_size (line 104) | pub fn set_write_batch_size(self, value: usize) -> Self {
method set_max_row_group_size (line 110) | pub fn set_max_row_group_size(self, value: usize) -> Self {
method set_created_by (line 116) | pub fn set_created_by(self, value: String) -> Self {
method set_key_value_metadata (line 122) | pub fn set_key_value_metadata(
method set_encoding (line 148) | pub fn set_encoding(self, value: Encoding) -> Self {
method set_compression (line 154) | pub fn set_compression(self, value: Compression) -> Self {
method set_dictionary_enabled (line 163) | pub fn set_dictionary_enabled(self, value: bool) -> Self {
method set_statistics_enabled (line 169) | pub fn set_statistics_enabled(self, value: EnabledStatistics) -> Self {
method set_column_encoding (line 187) | pub fn set_column_encoding(self, col: String, value: Encoding) -> Self {
method set_column_compression (line 195) | pub fn set_column_compression(self, col: String, value: Compression) -...
method set_column_dictionary_enabled (line 203) | pub fn set_column_dictionary_enabled(self, col: String, value: bool) -...
method set_column_statistics_enabled (line 211) | pub fn set_column_statistics_enabled(self, col: String, value: Enabled...
method default (line 221) | fn default() -> Self {
FILE: tests/data/generate_data.py
function create_data (line 9) | def create_data():
function write_data (line 19) | def write_data(table):
function write_empty_table (line 34) | def write_empty_table():
function create_string_view_table (line 38) | def create_string_view_table():
function write_string_view_table (line 46) | def write_string_view_table():
function main (line 51) | def main():
FILE: tests/data/generate_geo_data.py
function construct_geoarrow_table (line 14) | def construct_geoarrow_table(gdf: gpd.GeoDataFrame) -> pa.Table:
FILE: tests/js/ffi.test.ts
constant WASM_MEMORY (line 15) | const WASM_MEMORY = wasm.wasmMemory();
FILE: tests/js/geo-metadata.test.ts
constant NATURALEARTH_CITIES_WKB (line 8) | const NATURALEARTH_CITIES_WKB = "naturalearth_cities_wkb.parquet";
constant NATURALEARTH_CITIES_GEOARROW (line 9) | const NATURALEARTH_CITIES_GEOARROW = "naturalearth_cities_geoarrow.parqu...
constant EXPECTED_META_WKB (line 11) | const EXPECTED_META_WKB = `\
constant EXPECTED_META_GEOARROW (line 14) | const EXPECTED_META_GEOARROW = `\
function isCloseEqual (line 50) | function isCloseEqual(a: number, b: number, eps: number = 0.0001): boole...
FILE: tests/js/schema.test.ts
constant WASM_MEMORY (line 11) | const WASM_MEMORY = wasm.wasmMemory();
FILE: tests/js/utils.ts
function testArrowTablesEqual (line 10) | function testArrowTablesEqual(table1: Table, table2: Table): void {
function readExpectedArrowData (line 62) | function readExpectedArrowData(): Table {
function temporaryServer (line 68) | async function temporaryServer() {
FILE: tests/web.rs
function pass (line 13) | fn pass() {
Condensed preview — 69 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (265K chars).
[
{
"path": ".cargo/config.toml",
"chars": 161,
"preview": "# https://github.com/kylebarron/arrow-wasm/issues/8#issuecomment-2790469295\n[target.wasm32-unknown-unknown]\nrustflags = "
},
{
"path": ".github/dependabot.yml",
"chars": 584,
"preview": "version: 2\nupdates:\n - package-ecosystem: cargo\n directory: \"/\"\n schedule:\n interval: weekly\n open-pull-r"
},
{
"path": ".github/workflows/docs-website.yml",
"chars": 681,
"preview": "name: Publish docs website\n\non:\n push:\n tags:\n - \"*\"\n\njobs:\n docs:\n runs-on: ubuntu-latest\n steps:\n "
},
{
"path": ".github/workflows/pr-manipulation.yml",
"chars": 2283,
"preview": "name: PR Comment Generation\n\non:\n workflow_run:\n workflows: [\"Build and Test\"]\n types:\n - completed\n\njobs:\n "
},
{
"path": ".github/workflows/test.yml",
"chars": 9096,
"preview": "name: Build and Test\n\non:\n push:\n branches:\n - main\n pull_request:\n\njobs:\n test:\n runs-on: ubuntu-latest\n "
},
{
"path": ".gitignore",
"chars": 177,
"preview": "*.fgb\n.DS_Store\n*.parquet\nnode_modules\n/target\n**/*.rs.bk\npkg/\nwasm-pack.log\n.idea/\nwww/data\nwww/data/\ndata/\n!tests/data"
},
{
"path": ".vscode/settings.json",
"chars": 105,
"preview": "{\n // \"rust-analyzer.cargo.target\": \"wasm32-unknown-unknown\",\n \"rust-analyzer.cargo.features\": \"all\"\n}\n"
},
{
"path": ".yarnrc.yml",
"chars": 25,
"preview": "nodeLinker: node-modules\n"
},
{
"path": "CHANGELOG.md",
"chars": 12153,
"preview": "# Changelog\n\n## [0.7.1] - 2025-09-17\n\n### What's Changed\n\n- ci: Bump node version in docs publish CI by @kylebarron in h"
},
{
"path": "Cargo.toml",
"chars": 4332,
"preview": "[package]\nname = \"parquet-wasm\"\nversion = \"0.7.1\"\nauthors = [\"Kyle Barron <kylebarron2@gmail.com>\"]\nedition = \"2024\"\ndes"
},
{
"path": "DEVELOP.md",
"chars": 1757,
"preview": "# Development\n\n- Install [wasm-pack](https://rustwasm.github.io/wasm-pack/)\n- Compile: `wasm-pack build`, or change targ"
},
{
"path": "LICENSE_APACHE",
"chars": 9723,
"preview": " Apache License\n Version 2.0, January 2004\n http"
},
{
"path": "LICENSE_MIT",
"chars": 1055,
"preview": "Copyright (c) 2022 Kyle Barron\n\nPermission is hereby granted, free of charge, to any\nperson obtaining a copy of this sof"
},
{
"path": "README.md",
"chars": 17454,
"preview": "# WASM Parquet [](https://www.npmjs.com/package/parquet-was"
},
{
"path": "bench/bench.ts",
"chars": 1161,
"preview": "import b from \"benny\";\nimport * as parquet from \"../pkg/node\";\nimport { readFileSync } from \"fs\";\n\nconst dataDir = `${__"
},
{
"path": "bench/make_data.py",
"chars": 1170,
"preview": "from pathlib import Path\n\nimport numpy as np\nimport pandas as pd\nimport pyarrow as pa\nimport pyarrow.parquet as pq\n\ncomp"
},
{
"path": "bench/pyproject.toml",
"chars": 405,
"preview": "[tool.poetry]\nname = \"parquet-wasm-bench\"\nversion = \"0.1.0\"\ndescription = \"Create data for parquet-wasm benchmarks\"\nauth"
},
{
"path": "package.json",
"chars": 696,
"preview": "{\n \"scripts\": {\n \"build\": \"bash ./scripts/build.sh\",\n \"build:test\": \"ENV='DEV' yarn build\",\n \"docs:build\": \"ty"
},
{
"path": "scripts/build.sh",
"chars": 1558,
"preview": "#! /usr/bin/env bash\nrm -rf tmp_build pkg\nmkdir -p tmp_build\n\nif [ \"$ENV\" == \"DEV\" ]; then\n BUILD=\"--dev\"\n FLAGS=\"--fe"
},
{
"path": "scripts/report_build.sh",
"chars": 660,
"preview": "rm -rf report_pkg\nmkdir -p report_pkg\n\necho \"Building arrow-rs slim\"\nwasm-pack build \\\n --release \\\n --no-pack \\\n --o"
},
{
"path": "src/common/fetch.rs",
"chars": 2948,
"preview": "use futures::channel::oneshot;\nuse futures::future::BoxFuture;\nuse range_reader::{RangeOutput, RangedAsyncReader};\nuse w"
},
{
"path": "src/common/mod.rs",
"chars": 105,
"preview": "pub mod properties;\n\n#[cfg(feature = \"async\")]\npub mod fetch;\n\n#[cfg(feature = \"async\")]\npub mod stream;\n"
},
{
"path": "src/common/properties.rs",
"chars": 6796,
"preview": "use wasm_bindgen::prelude::*;\n\n/// Supported compression algorithms.\n///\n/// Codecs added in format version X.Y can be r"
},
{
"path": "src/common/stream.rs",
"chars": 980,
"preview": "use futures::AsyncWrite;\n\npub struct WrappedWritableStream<'writer> {\n pub stream: wasm_streams::writable::IntoAsyncW"
},
{
"path": "src/error.rs",
"chars": 1172,
"preview": "use arrow::error::ArrowError;\nuse parquet::errors::ParquetError;\nuse thiserror::Error;\nuse wasm_bindgen::{JsError, JsVal"
},
{
"path": "src/lib.rs",
"chars": 644,
"preview": "extern crate web_sys;\n\npub mod common;\npub mod utils;\n\npub mod error;\npub mod metadata;\n#[cfg(feature = \"reader\")]\npub m"
},
{
"path": "src/metadata.rs",
"chars": 7494,
"preview": "use wasm_bindgen::prelude::*;\n\nuse crate::common::properties::{Compression, Encoding};\n\n/// Global Parquet metadata.\n#[d"
},
{
"path": "src/read_options.rs",
"chars": 4373,
"preview": "use parquet::arrow::ProjectionMask;\nuse parquet::arrow::arrow_reader::ArrowReaderBuilder;\nuse parquet::schema::types::Sc"
},
{
"path": "src/reader.rs",
"chars": 5164,
"preview": "use std::sync::Arc;\n\nuse crate::error::Result;\nuse crate::read_options::JsReaderOptions;\nuse arrow_schema::{DataType, Fi"
},
{
"path": "src/reader_async.rs",
"chars": 17033,
"preview": "//! An asynchronous Parquet reader that is able to read and inspect remote files without\n//! downloading them in entiret"
},
{
"path": "src/utils.rs",
"chars": 1327,
"preview": "use wasm_bindgen::prelude::*;\n\n/// Call this function at least once during initialization to get better error\n// message"
},
{
"path": "src/wasm.rs",
"chars": 13341,
"preview": "use crate::error::WasmResult;\n#[cfg(feature = \"reader\")]\nuse crate::read_options::ReaderOptions;\nuse crate::utils::asser"
},
{
"path": "src/writer.rs",
"chars": 828,
"preview": "use crate::error::Result;\nuse arrow::datatypes::SchemaRef;\nuse arrow::record_batch::RecordBatch;\nuse parquet::arrow::arr"
},
{
"path": "src/writer_async.rs",
"chars": 3197,
"preview": "use crate::common::stream::WrappedWritableStream;\nuse crate::error::{ParquetWasmError, Result};\nuse async_compat::Compat"
},
{
"path": "src/writer_properties.rs",
"chars": 8465,
"preview": "use std::collections::HashMap;\n\nuse crate::common::properties::{Compression, Encoding, WriterVersion};\nuse crate::error:"
},
{
"path": "templates/package.json",
"chars": 1485,
"preview": "{\n \"name\": \"parquet-wasm\",\n \"collaborators\": [\n \"Kyle Barron <kylebarron2@gmail.com>\"\n ],\n \"description\": \"WebAss"
},
{
"path": "tests/data/.python-version",
"chars": 5,
"preview": "3.12\n"
},
{
"path": "tests/data/README.md",
"chars": 61,
"preview": "To create test data:\n\n```\nuv run python generate_data.py\n```\n"
},
{
"path": "tests/data/generate_data.py",
"chars": 1633,
"preview": "import pandas as pd\nimport pyarrow as pa\nimport pyarrow.feather as feather\nimport pyarrow.parquet as pq\n\ncompressions = "
},
{
"path": "tests/data/generate_geo_data.py",
"chars": 1225,
"preview": "import json\n\nimport geopandas as gpd\nimport pyarrow as pa\nimport pyarrow.parquet as pq\nimport pygeos\nfrom geopandas.io.a"
},
{
"path": "tests/data/pyproject.toml",
"chars": 265,
"preview": "[project]\nname = \"generate-test-data\"\nversion = \"0.1.0\"\ndescription = \"Add your description here\"\nreadme = \"README.md\"\nr"
},
{
"path": "tests/data/uv.lock",
"chars": 93149,
"preview": "version = 1\nrevision = 3\nrequires-python = \">=3.12\"\n\n[[package]]\nname = \"appnope\"\nversion = \"0.1.4\"\nsource = { registry "
},
{
"path": "tests/js/ffi.test.ts",
"chars": 1677,
"preview": "import * as wasm from \"../../pkg/node/parquet_wasm\";\nimport { readFileSync } from \"fs\";\nimport * as arrow from \"apache-a"
},
{
"path": "tests/js/geo-metadata.test.ts",
"chars": 4653,
"preview": "import * as wasm from \"../../pkg/node/parquet_wasm\";\nimport { readFileSync } from \"fs\";\nimport { tableFromIPC } from \"ap"
},
{
"path": "tests/js/index.test.ts",
"chars": 179,
"preview": "import * as wasm from \"../../pkg/node/parquet_wasm\";\n\nwasm.setPanicHook();\n\nimport \"./read-write.test\";\nimport \"./ffi.te"
},
{
"path": "tests/js/read-write.test.ts",
"chars": 5108,
"preview": "import { DataType, tableFromIPC, tableToIPC } from \"apache-arrow\";\nimport { readFileSync } from \"fs\";\nimport { describe,"
},
{
"path": "tests/js/schema.test.ts",
"chars": 1238,
"preview": "import * as wasm from \"../../pkg/node/parquet_wasm\";\nimport { readFileSync } from \"fs\";\nimport * as arrow from \"apache-a"
},
{
"path": "tests/js/utils.ts",
"chars": 3428,
"preview": "import { expect } from \"vitest\";\nimport { readFileSync } from \"fs\";\nimport { tableFromIPC, Table } from \"apache-arrow\";\n"
},
{
"path": "tests/web.rs",
"chars": 331,
"preview": "//! Test suite for the Web and headless browsers.\n\n#![cfg(target_arch = \"wasm32\")]\n// Necessary for the assert_eq! which"
},
{
"path": "tsconfig.docs.json",
"chars": 35,
"preview": "{\n \"include\": [\"pkg/**/*.d.ts\"]\n}\n"
},
{
"path": "tsconfig.json",
"chars": 164,
"preview": "{\n \"compilerOptions\": {\n \"module\": \"commonjs\",\n \"moduleResolution\": \"node\"\n },\n \"include\": [\n \"tests/**/*\"\n "
},
{
"path": "typedoc.json",
"chars": 429,
"preview": "{\n \"name\": \"parquet-wasm\",\n \"cleanOutputDir\": true,\n \"darkHighlightTheme\": \"github-dark\",\n \"entryPoints\": [\n \"pkg"
}
]
// ... and 17 more files (download for full content)
About this extraction
This page contains the full source code of the kylebarron/parquet-wasm GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 69 files (248.2 KB), approximately 85.2k tokens, and a symbol index with 162 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.