Repository: kaushiksrini/parqeye Branch: main Commit: 8072121de95a Files: 37 Total size: 159.7 KB Directory structure: gitextract_dxmn_73j/ ├── .github/ │ ├── setup-builder/ │ │ └── action.yml │ └── workflows/ │ ├── ci.yaml │ └── release.yml ├── .gitignore ├── .gitmodules ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── dist-workspace.toml └── src/ ├── app.rs ├── components/ │ ├── data_table.rs │ ├── mod.rs │ ├── row_group/ │ │ ├── metadata.rs │ │ ├── mod.rs │ │ ├── progress_bar.rs │ │ └── schema_md.rs │ ├── schema/ │ │ ├── mod.rs │ │ ├── table.rs │ │ └── tree.rs │ └── scrollbar.rs ├── file/ │ ├── metadata.rs │ ├── mod.rs │ ├── parquet_ctx.rs │ ├── row_groups.rs │ ├── sample_data.rs │ ├── schema.rs │ └── utils.rs ├── lib.rs ├── main.rs ├── tabs/ │ ├── manager.rs │ ├── metadata.rs │ ├── mod.rs │ ├── row_groups.rs │ ├── schema.rs │ └── visualize.rs └── ui.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/setup-builder/action.yml ================================================ # From iceberg-rust: https://github.com/apache/iceberg-rust/blob/main/.github/actions/setup-builder/action.yml name: Setup Rust Builder description: 'Setup a Rust builder for the CI' inputs: rust-version: description: 'The version of Rust to use' runs: using: "composite" steps: - name: Setup specified Rust toolchain shell: bash if: ${{ inputs.rust-version != '' }} env: RUST_VERSION: ${{ inputs.rust-version }} run: | echo "Installing ${RUST_VERSION}" rustup toolchain install ${RUST_VERSION} rustup override set ${RUST_VERSION} rustup component add rustfmt clippy - name: Setup Rust toolchain according to rust-toolchain.toml shell: bash if: ${{ inputs.rust-version == '' }} run: | echo "Installing toolchain according to rust-toolchain.toml" rustup show rustup component add rustfmt clippy - name: Fixup git permissions # https://github.com/actions/checkout/issues/766 shell: bash run: git config --global --add safe.directory "$GITHUB_WORKSPACE" ================================================ FILE: .github/workflows/ci.yaml ================================================ name: CI on: push: branches: - main pull_request: paths: - '**' - '!.github/workflows/**' - '!.gitignore' - '!LICENSE' - '!README.md' jobs: check: runs-on: ${{ matrix.os }} strategy: matrix: os: - ubuntu-latest - macos-latest steps: - uses: actions/checkout@v5 - name: Setup Rust toolchain uses: ./.github/setup-builder - name: Cargo format run: make check-fmt - name: Check diff run: git diff --exit-code - name: Cargo clippy run: make check-clippy build: runs-on: ${{ matrix.os }} strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest steps: - name: Maximize build space (Ubuntu) if: matrix.os == 'ubuntu-latest' uses: easimon/maximize-build-space@master with: remove-dotnet: "true" remove-android: "true" remove-haskell: "true" remove-codeql: "true" remove-docker-images: "true" root-reserve-mb: 10240 temp-reserve-mb: 10240 - uses: actions/checkout@v5 - name: Setup Rust toolchain uses: ./.github/setup-builder - name: Cache Rust artifacts uses: Swatinem/rust-cache@v2 - name: Build run: make build test: runs-on: ${{ matrix.os }} strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest steps: - uses: actions/checkout@v5 with: submodules: true - name: Setup Rust toolchain uses: ./.github/setup-builder - name: Cache Rust artifacts uses: Swatinem/rust-cache@v2 - name: Test run: make test ================================================ FILE: .github/workflows/release.yml ================================================ # This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist # # Copyright 2022-2024, axodotdev # SPDX-License-Identifier: MIT or Apache-2.0 # # CI that: # # * checks for a Git Tag that looks like a release # * builds artifacts with dist (archives, installers, hashes) # * uploads those artifacts to temporary workflow zip # * on success, uploads the artifacts to a GitHub Release # # Note that the GitHub Release will be created with a generated # title/body based on your changelogs. name: Release permissions: "contents": "write" # This task will run whenever you push a git tag that looks like a version # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. # Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where # PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION # must be a Cargo-style SemVer Version (must have at least major.minor.patch). # # If PACKAGE_NAME is specified, then the announcement will be for that # package (erroring out if it doesn't have the given version or isn't dist-able). # # If PACKAGE_NAME isn't specified, then the announcement will be for all # (dist-able) packages in the workspace with that version (this mode is # intended for workspaces with only one dist-able package, or with all dist-able # packages versioned/released in lockstep). # # If you push multiple tags at once, separate instances of this workflow will # spin up, creating an independent announcement for each one. However, GitHub # will hard limit this to 3 tags per commit, as it will assume more tags is a # mistake. # # If there's a prerelease-style suffix to the version, then the release(s) # will be marked as a prerelease. on: pull_request: push: tags: - '**[0-9]+.[0-9]+.[0-9]+*' jobs: # Run 'dist plan' (or host) to determine what tasks we need to do plan: runs-on: "ubuntu-22.04" outputs: val: ${{ steps.plan.outputs.manifest }} tag: ${{ !github.event.pull_request && github.ref_name || '' }} tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} publishing: ${{ !github.event.pull_request }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - name: Install dist # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.2/cargo-dist-installer.sh | sh" - name: Cache dist uses: actions/upload-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/dist # sure would be cool if github gave us proper conditionals... # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible # functionality based on whether this is a pull_request, and whether it's from a fork. # (PRs run on the *source* but secrets are usually on the *target* -- that's *good* # but also really annoying to build CI around when it needs secrets to work right.) - id: plan run: | dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json echo "dist ran successfully" cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" uses: actions/upload-artifact@v4 with: name: artifacts-plan-dist-manifest path: plan-dist-manifest.json # Build and packages all the platform-specific things build-local-artifacts: name: build-local-artifacts (${{ join(matrix.targets, ', ') }}) # Let the initial task tell us to not run (currently very blunt) needs: - plan if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} strategy: fail-fast: false # Target platforms/runners are computed by dist in create-release. # Each member of the matrix has the following arguments: # # - runner: the github runner # - dist-args: cli flags to pass to dist # - install-dist: expression to run to install dist on the runner # # Typically there will be: # - 1 "global" task that builds universal installers # - N "local" tasks that build each platform's binaries and platform-specific installers matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} runs-on: ${{ matrix.runner }} container: ${{ matrix.container && matrix.container.image || null }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json steps: - name: enable windows longpaths run: | git config --global core.longpaths true - uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - name: Install Rust non-interactively if not already installed if: ${{ matrix.container }} run: | if ! command -v cargo > /dev/null 2>&1; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y echo "$HOME/.cargo/bin" >> $GITHUB_PATH fi - name: Install dist run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts uses: actions/download-artifact@v4 with: pattern: artifacts-* path: target/distrib/ merge-multiple: true - name: Install dependencies run: | ${{ matrix.packages_install }} - name: Build artifacts run: | # Actually do builds and make zips and whatnot dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json echo "dist ran successfully" - id: cargo-dist name: Post-build # We force bash here just because github makes it really hard to get values up # to "real" actions without writing to env-vars, and writing to env-vars has # inconsistent syntax between shell and powershell. shell: bash run: | # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" uses: actions/upload-artifact@v4 with: name: artifacts-build-local-${{ join(matrix.targets, '_') }} path: | ${{ steps.cargo-dist.outputs.paths }} ${{ env.BUILD_MANIFEST_NAME }} # Build and package all the platform-agnostic(ish) things build-global-artifacts: needs: - plan - build-local-artifacts runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json steps: - uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts uses: actions/download-artifact@v4 with: pattern: artifacts-* path: target/distrib/ merge-multiple: true - id: cargo-dist shell: bash run: | dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json echo "dist ran successfully" # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" uses: actions/upload-artifact@v4 with: name: artifacts-build-global path: | ${{ steps.cargo-dist.outputs.paths }} ${{ env.BUILD_MANIFEST_NAME }} # Determines if we should publish/announce host: needs: - plan - build-local-artifacts - build-global-artifacts # Only run if we're "publishing", and only if plan, local and global didn't fail (skipped is fine) if: ${{ always() && needs.plan.result == 'success' && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} runs-on: "ubuntu-22.04" outputs: val: ${{ steps.host.outputs.manifest }} steps: - uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts uses: actions/download-artifact@v4 with: pattern: artifacts-* path: target/distrib/ merge-multiple: true - id: host shell: bash run: | dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json echo "artifacts uploaded and released successfully" cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" uses: actions/upload-artifact@v4 with: # Overwrite the previous copy name: artifacts-dist-manifest path: dist-manifest.json # Create a GitHub Release while uploading all files to it - name: "Download GitHub Artifacts" uses: actions/download-artifact@v4 with: pattern: artifacts-* path: artifacts merge-multiple: true - name: Cleanup run: | # Remove the granular manifests rm -f artifacts/*-dist-manifest.json - name: Create GitHub Release env: PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}" ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}" ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}" RELEASE_COMMIT: "${{ github.sha }}" run: | # Write and read notes from a file to avoid quoting breaking things echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/* publish-homebrew-formula: needs: - plan - host runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} PLAN: ${{ needs.plan.outputs.val }} GITHUB_USER: "axo bot" GITHUB_EMAIL: "admin+bot@axo.dev" if: ${{ !fromJson(needs.plan.outputs.val).announcement_is_prerelease || fromJson(needs.plan.outputs.val).publish_prereleases }} steps: - uses: actions/checkout@v4 with: persist-credentials: true repository: "kaushiksrini/homebrew-parqeye" token: ${{ secrets.HOMEBREW_TAP_TOKEN }} # So we have access to the formula - name: Fetch homebrew formulae uses: actions/download-artifact@v4 with: pattern: artifacts-* path: Formula/ merge-multiple: true # This is extra complex because you can make your Formula name not match your app name # so we need to find releases with a *.rb file, and publish with that filename. - name: Commit formula files run: | git config --global user.name "${GITHUB_USER}" git config --global user.email "${GITHUB_EMAIL}" for release in $(echo "$PLAN" | jq --compact-output '.releases[] | select([.artifacts[] | endswith(".rb")] | any)'); do filename=$(echo "$release" | jq '.artifacts[] | select(endswith(".rb"))' --raw-output) name=$(echo "$filename" | sed "s/\.rb$//") version=$(echo "$release" | jq .app_version --raw-output) export PATH="/home/linuxbrew/.linuxbrew/bin:$PATH" brew update # We avoid reformatting user-provided data such as the app description and homepage. brew style --except-cops FormulaAudit/Homepage,FormulaAudit/Desc,FormulaAuditStrict --fix "Formula/${filename}" || true git add "Formula/${filename}" git commit -m "${name} ${version}" done git push announce: needs: - plan - host - publish-homebrew-formula # use "always() && ..." to allow us to wait for all publish jobs while # still allowing individual publish jobs to skip themselves (for prereleases). # "host" however must run to completion, no skipping allowed! if: ${{ always() && needs.host.result == 'success' && (needs.publish-homebrew-formula.result == 'skipped' || needs.publish-homebrew-formula.result == 'success') }} runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive ================================================ FILE: .gitignore ================================================ /target ================================================ FILE: .gitmodules ================================================ [submodule "parquet-testing"] path = parquet-testing url = https://github.com/apache/parquet-testing ================================================ FILE: Cargo.toml ================================================ [package] name = "parqeye" description = "Parquet viewer for the command line" version = "0.0.2" readme = "README.md" license = "MIT" repository = "https://github.com/kaushiksrini/parqeye" keywords = ["parquet", "parq", "visualizer", "schema", "tui"] edition = "2024" [dependencies] clap = { version = "4", features = ["derive"] } # for CLI parsing parquet = { version = "54.2.1", features = ["cli","async","json","arrow"] } # core Parquet APIs arrow = "54.2.1" # Arrow arrays and data types parquet-format = "4.0.0" # Thrift structs ratatui = "0.29.0" crossterm = "0.29.0" chrono = "0.4" # for timestamp handling itertools = "0.14.0" polars = { version = "0.51.0", features = ["lazy", "parquet", "dtype-full", "timezones"] } # The profile that 'dist' will build with [profile.dist] inherits = "release" lto = "thin" [package.metadata.dist] ci = "github" ================================================ FILE: LICENSE ================================================ Copyright (c) 2025 Kaushik Srinivasan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ help: ## Display this help message @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-25s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) build: ## Build the project cargo build --all-targets --all-features --workspace check-fmt: ## Check the formatting of the code cargo fmt --all -- --check check-clippy: ## Check the clippy of the code cargo clippy --all-targets --all-features --workspace -- -D warnings check: check-fmt check-clippy doc-test: ## Test the documentation of the code cargo test --no-fail-fast --doc --all-features --workspace unit-test: doc-test ## Test the unit tests of the code cargo test --no-fail-fast --lib --all-features --workspace test: doc-test ## Test the code cargo test --no-fail-fast --all-targets --all-features --workspace clean: ## Clean the project cargo clean ================================================ FILE: README.md ================================================ # parqeye [![CI][actions-badge]][actions-url] [actions-badge]: https://github.com/kaushiksrini/parqeye/actions/workflows/ci.yaml/badge.svg [actions-url]: https://github.com/kaushiksrini/parqeye/actions/workflows/ci.yaml `parqeye` lets you _peek inside_ your Parquet files. Instantly inspect their contents, schema, and metadata — right from your terminal. ![Demo](.github/assets/demo.gif) **Features** - **Interactive Data Visualization** - Browse through your Parquet data in a table view with keyboard navigation. - **Schema Explorer** - Inspect column types, nested structures, and field definitions. - **File Metadata** - View Parquet file-level metadata including version, created by, encoding stats and more. - **Row Group Statistics** - Examine row group-level metadata, statistics, and data distribution across groups. - **Tab-based Interface** - Quickly switch between Visualize, Schema, Metadata, and Row Groups views. - **Terminal-native** - Works directly in your terminal. # Usage Run `parqeye` by providing the path to the `.parquet` file. ``` parqeye ``` # Installation ## Direct Download You can download the latest release from the [Releases](https://github.com/kaushiksrini/parqeye/releases) page. ## Build from Source You can build from source by downloading the repository and running the following command: ``` cargo build --release ``` ## Cargo If you use Rust, build directly from [crates.io](https://crates.io/crates/parqeye) ``` cargo install parqeye ``` ## Homebrew If you have Homebrew, you can install using: ```sh brew install kaushiksrini/parqeye/parqeye ``` # License This package is released under the [MIT License](./LICENSE). # Acknowledgements - [csvlens](https://github.com/YS-L/csvlens) for the inspiration # TODOs - [ ] Lazy/streaming loading of parquet files. - [ ] Filter columns by value in the visualize tab. - [ ] Read parquet files on the cloud (`s3://...`). ================================================ FILE: dist-workspace.toml ================================================ [workspace] members = ["cargo:."] # Config for 'dist' [dist] # The preferred dist version to use in CI (Cargo.toml SemVer syntax) cargo-dist-version = "0.30.2" # The installers to generate for each app installers = ["shell", "npm", "homebrew"] # Target platforms to build apps for (Rust target-triple syntax) targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-unknown-linux-musl", "x86_64-pc-windows-msvc"] # CI backends to support ci = "github" # A GitHub repo to push Homebrew formulas to tap = "kaushiksrini/homebrew-parqeye" # Path that installers should place binaries in install-path = "CARGO_HOME" # Publish jobs to run in CI publish-jobs = ["homebrew"] # Whether to install an updater program install-updater = false ================================================ FILE: src/app.rs ================================================ use crossterm::event::{self, Event, KeyCode, KeyEvent, KeyEventKind}; use ratatui::DefaultTerminal; use std::io; use crate::file::parquet_ctx::ParquetCtx; use crate::tabs::TabManager; pub struct AppRenderView<'a> { pub title: &'a str, pub parquet_ctx: &'a ParquetCtx, file_name: &'a str, tabs: &'a TabManager, pub state: &'a AppState, } impl<'a> AppRenderView<'a> { fn from_app(app: &'a App) -> Self { Self { title: "parqeye", parquet_ctx: app.parquet_ctx, file_name: &app.file_name, tabs: &app.tabs, state: &app.state, } } pub fn tabs(&self) -> &TabManager { self.tabs } pub fn file_name(&self) -> &str { self.file_name } pub fn state(&self) -> &AppState { self.state } } pub struct App<'a> { pub parquet_ctx: &'a ParquetCtx, pub file_name: String, pub exit: bool, pub tabs: TabManager, pub state: AppState, } pub struct AppState { horizontal_offset: usize, vertical_offset: usize, tree_scroll_offset: usize, data_vertical_scroll: usize, visible_data_rows: usize, } impl Default for AppState { fn default() -> Self { Self::new() } } impl AppState { pub fn new() -> Self { Self { horizontal_offset: 0, vertical_offset: 0, tree_scroll_offset: 0, data_vertical_scroll: 0, visible_data_rows: 20, // Default fallback } } pub fn reset(&mut self) { self.horizontal_offset = 0; self.vertical_offset = 0; self.tree_scroll_offset = 0; self.data_vertical_scroll = 0; } pub fn horizontal_offset(&self) -> usize { self.horizontal_offset } pub fn vertical_offset(&self) -> usize { self.vertical_offset } pub fn down(&mut self) { self.vertical_offset += 1; } pub fn up(&mut self) { self.vertical_offset = self.vertical_offset.saturating_sub(1); } pub fn right(&mut self) { self.horizontal_offset += 1; } pub fn left(&mut self) { self.horizontal_offset = self.horizontal_offset.saturating_sub(1); } pub fn tree_scroll_offset(&self) -> usize { self.tree_scroll_offset } pub fn tree_scroll_up(&mut self) { self.tree_scroll_offset = self.tree_scroll_offset.saturating_sub(1); } pub fn tree_scroll_down(&mut self) { self.tree_scroll_offset += 1; } pub fn data_vertical_scroll(&self) -> usize { self.data_vertical_scroll } pub fn set_data_vertical_scroll(&mut self, scroll: usize) { self.data_vertical_scroll = scroll; } pub fn visible_data_rows(&self) -> usize { self.visible_data_rows } pub fn set_visible_data_rows(&mut self, rows: usize) { self.visible_data_rows = rows; } pub fn page_up(&mut self, visible_rows: usize, max_rows: usize) { // Move selection up by visible_rows self.vertical_offset = self.vertical_offset.saturating_sub(visible_rows); // Adjust scroll to keep selection visible self.adjust_scroll_to_selection(visible_rows, max_rows); } pub fn page_down(&mut self, visible_rows: usize, max_rows: usize) { // Move selection down by visible_rows, clamped to max_rows - 1 self.vertical_offset = (self.vertical_offset + visible_rows).min(max_rows.saturating_sub(1)); // Adjust scroll to keep selection visible self.adjust_scroll_to_selection(visible_rows, max_rows); } pub fn adjust_scroll_to_selection(&mut self, visible_rows: usize, max_rows: usize) { // Ensure selected row is visible in viewport if self.vertical_offset < self.data_vertical_scroll { // Selection is above viewport, scroll up self.data_vertical_scroll = self.vertical_offset; } else if self.vertical_offset >= self.data_vertical_scroll + visible_rows { // Selection is below viewport, scroll down self.data_vertical_scroll = self.vertical_offset.saturating_sub(visible_rows - 1); } // Clamp scroll to valid range let max_scroll = max_rows.saturating_sub(visible_rows); self.data_vertical_scroll = self.data_vertical_scroll.min(max_scroll); } } impl<'a> App<'a> { pub fn new(file_info: &'a ParquetCtx) -> Self { let sample_data_rows = file_info.sample_data.total_rows; let tab_manager = TabManager::new( file_info.schema.column_size(), file_info.row_groups.num_row_groups(), sample_data_rows, ); Self { parquet_ctx: file_info, file_name: file_info.file_path.clone(), exit: false, tabs: tab_manager, state: AppState::new(), } } pub fn run(&mut self, terminal: &mut DefaultTerminal) -> io::Result<()> { while !self.exit { // Calculate visible data rows based on terminal size let terminal_size = terminal.size()?; // Account for: header (3 lines), footer (1 line), table header (3 lines) = 7 lines total let visible_data_rows = (terminal_size.height.saturating_sub(7) as usize).max(1); self.state.set_visible_data_rows(visible_data_rows); let render_view = AppRenderView::from_app(self); terminal.draw(|frame| crate::ui::render_app(&render_view, frame))?; self.handle_events()?; } Ok(()) } fn handle_events(&mut self) -> io::Result<()> { match event::read()? { Event::Key(key_event) if key_event.kind == KeyEventKind::Press => { self.handle_key_event(key_event) } _ => {} }; Ok(()) } fn handle_key_event(&mut self, key_event: KeyEvent) { match key_event.code { KeyCode::Char('q') | KeyCode::Char('Q') => self.exit(), KeyCode::Esc => self.state.reset(), KeyCode::Tab => { self.tabs.next(); self.state.reset(); } KeyCode::BackTab => { self.tabs.prev(); self.state.reset(); } _ => { self.tabs .active_tab() .on_event(key_event, &mut self.state) .unwrap(); } } } fn exit(&mut self) { self.exit = true; } } ================================================ FILE: src/components/data_table.rs ================================================ use crate::file::sample_data::ParquetSampleData; use ratatui::{ buffer::Buffer, layout::Rect, prelude::{Color, Position}, style::Modifier, symbols::{border, line}, text::Span, widgets::Widget, }; use std::cmp::min; use crate::file::Renderable; const NUM_SPACES_BETWEEN_COLUMNS: u16 = 2; const NUM_SPACES_AFTER_LINE_NUMBER: u16 = 2; pub struct DataTable<'a> { pub data: &'a ParquetSampleData, pub title: String, pub title_color: Color, pub border_style: border::Set, pub horizontal_scroll: usize, pub vertical_scroll: usize, pub selected_row: Option, pub selected_color: Color, pub border_color: Color, } impl<'a> DataTable<'a> { pub fn new(data: &'a ParquetSampleData) -> Self { Self { data, title: "Data Preview (up to 100 rows)".to_string(), title_color: Color::Cyan, border_style: border::ROUNDED, horizontal_scroll: 0, vertical_scroll: 0, selected_row: None, selected_color: Color::Rgb(60, 60, 60), border_color: Color::DarkGray, } } pub fn with_title(mut self, title: String) -> Self { self.title = title; self } pub fn with_colors(mut self, title: Color, selected: Color) -> Self { self.title_color = title; self.selected_color = selected; self } pub fn with_border_style(mut self, border_style: border::Set) -> Self { self.border_style = border_style; self } pub fn with_horizontal_scroll(mut self, offset: usize) -> Self { self.horizontal_scroll = offset; self } pub fn with_vertical_scroll(mut self, offset: usize) -> Self { self.vertical_scroll = offset; self } pub fn with_selected_row(mut self, row: Option) -> Self { self.selected_row = row; self } pub fn scroll_left(&mut self) { if self.horizontal_scroll > 0 { self.horizontal_scroll -= 1; } } pub fn scroll_right(&mut self) { let max_scroll = self.get_max_scroll(); if self.horizontal_scroll < max_scroll { self.horizontal_scroll += 1; } } pub fn get_max_scroll(&self) -> usize { // Calculate how many columns we can show at reasonable width let available_width = 120; // Assume reasonable terminal width let min_column_width = 12; // Minimum width for readability let max_visible_columns = available_width / min_column_width; // Total columns minus visible columns self.data.total_columns.saturating_sub(max_visible_columns) } fn calculate_column_widths( &self, headers: &[String], visible_rows: &[Vec], ) -> Vec { let mut widths = Vec::new(); for (col_idx, header) in headers.iter().enumerate() { let mut max_width = header.len(); // Check content width for this column for row in visible_rows { if let Some(cell) = row.get(col_idx) { max_width = max_width.max(cell.len()); } } // Use minimum width of 8 and maximum of 25 for readability, add spacing widths.push((min(max_width.max(8), 25) as u16) + NUM_SPACES_BETWEEN_COLUMNS); } widths } fn render_header_separator(&self, buf: &mut Buffer, area: Rect, x_row_separator: u16, y: u16) { let border_style = ratatui::style::Style::default().fg(self.border_color); // Draw horizontal line for x in 0..area.width { if let Some(cell) = buf.cell_mut(Position::new(x, y - 1)) { cell.set_symbol(line::HORIZONTAL).set_style(border_style); } } // Intersection with row number separator if let Some(cell) = buf.cell_mut(Position::new(x_row_separator - 1, y - 1)) { cell.set_symbol(line::HORIZONTAL_DOWN) .set_style(border_style); } } fn render_row_numbers(&self, buf: &mut Buffer, area: Rect, rows: &[Vec]) { let mut y = area.y; for (row_idx, _) in rows.iter().enumerate() { let actual_row_num = row_idx + self.vertical_scroll + 1; let is_selected = self .selected_row .is_some_and(|selected| row_idx + self.vertical_scroll == selected); let row_num_formatted = format!("{}", actual_row_num); let mut style: ratatui::prelude::Style = ratatui::style::Style::default().fg(Color::DarkGray); if is_selected { style = style .add_modifier(Modifier::BOLD) .add_modifier(Modifier::UNDERLINED); } let span = Span::styled(row_num_formatted, style); buf.set_span(0, y, &span, area.width); y += 1; if y >= area.bottom() { break; } } } fn render_header( &self, buf: &mut Buffer, x_start: u16, y: u16, headers: &[String], column_widths: &[u16], max_width: u16, ) { let mut x_offset = x_start; for (header, &width) in headers.iter().zip(column_widths) { if x_offset >= max_width { break; } let effective_width = width.saturating_sub(NUM_SPACES_BETWEEN_COLUMNS); let truncated = if header.len() > effective_width as usize { format!( "{}...", &header[..effective_width.saturating_sub(3) as usize] ) } else { header.clone() }; let style = ratatui::style::Style::default() .fg(Color::Yellow) .add_modifier(Modifier::BOLD); let span = Span::styled(truncated, style); buf.set_span(x_offset, y, &span, width); x_offset += width; } } #[allow(clippy::too_many_arguments)] fn render_data_row( &self, buf: &mut Buffer, x_start: u16, y: u16, row_data: &[String], column_widths: &[u16], is_selected: bool, max_width: u16, ) { let mut x_offset = x_start; let style = if is_selected { ratatui::style::Style::default() .bg(self.selected_color) .fg(Color::White) .add_modifier(Modifier::BOLD) } else { ratatui::style::Style::default() }; for (cell_data, &width) in row_data.iter().zip(column_widths) { if x_offset >= max_width { break; } let effective_width = width.saturating_sub(NUM_SPACES_BETWEEN_COLUMNS); let truncated = if cell_data.chars().count() > effective_width as usize { let truncated_chars: String = cell_data .chars() .take(effective_width.saturating_sub(1) as usize) .collect(); format!("{}…", truncated_chars) } else { cell_data.clone() }; // Pad with spaces to fill the column width let padded = format!("{:width$}", truncated, width = width as usize); let span = Span::styled(padded, style); buf.set_span(x_offset, y, &span, width); x_offset += width; } } fn render_row_number_separator( &self, buf: &mut Buffer, x_row_separator: u16, y_start: u16, height: u16, ) { let border_style = ratatui::style::Style::default().fg(self.border_color); // Draw vertical line after row numbers for y in y_start..(y_start + height) { if let Some(cell) = buf.cell_mut(Position::new(x_row_separator - 1, y)) { cell.set_symbol(line::VERTICAL).set_style(border_style); } } } } impl<'a> Widget for DataTable<'a> { fn render(self, area: Rect, buf: &mut Buffer) { if area.area() == 0 { return; } // Calculate row number section width let max_row_num = self.data.rows.len().saturating_sub(self.vertical_scroll); let max_row_num_length = format!("{}", max_row_num).len().max(4) as u16; let row_num_section_width = max_row_num_length + 2 * NUM_SPACES_AFTER_LINE_NUMBER + 1; let x_row_separator = max_row_num_length + NUM_SPACES_AFTER_LINE_NUMBER + 1; // Calculate available width for data columns let available_width = area.width.saturating_sub(row_num_section_width); let min_column_width = 12; let max_visible_columns = (available_width / min_column_width).max(1) as usize; // Clamp scroll offset to valid range let max_scroll = self.data.total_columns.saturating_sub(max_visible_columns); let horizontal_scroll = self.horizontal_scroll.min(max_scroll); // Get visible columns let visible_headers: Vec = self .data .flattened_columns .iter() .skip(horizontal_scroll) .take(max_visible_columns) .cloned() .collect(); // Get visible data for each row (apply vertical scroll) let visible_rows: Vec> = self .data .rows .iter() .skip(self.vertical_scroll) .map(|row| { row.iter() .skip(horizontal_scroll) .take(max_visible_columns) .cloned() .collect() }) .collect(); // Calculate column widths let column_widths = self.calculate_column_widths(&visible_headers, &visible_rows); // Header area: 2 lines (header text + separator) let header_height = 2; let y_header = area.y; let y_first_record = area.y + header_height; // Row area: including row numbers and row content let rows_area = Rect::new( area.x, y_first_record, area.width, area.height.saturating_sub(header_height), ); // Render row numbers self.render_row_numbers(buf, rows_area, &visible_rows); // Render header self.render_header( buf, row_num_section_width, y_header, &visible_headers, &column_widths, area.width, ); // Render header separator (horizontal line below headers) self.render_header_separator(buf, area, x_row_separator, y_first_record); // Render data rows let mut y_offset = y_first_record; for (row_idx, row_data) in visible_rows.iter().enumerate() { if y_offset >= rows_area.bottom() { break; } let actual_row_num = row_idx + self.vertical_scroll; let is_selected = self .selected_row .is_some_and(|selected| actual_row_num == selected); self.render_data_row( buf, row_num_section_width, y_offset, row_data, &column_widths, is_selected, area.width, ); y_offset += 1; } // Render vertical separator after row numbers self.render_row_number_separator(buf, x_row_separator, y_first_record, rows_area.height); } } impl Renderable for ParquetSampleData { fn render_content(&self, area: Rect, buf: &mut Buffer) { let table_component = DataTable::new(self); table_component.render(area, buf); } } ================================================ FILE: src/components/mod.rs ================================================ pub mod data_table; pub mod row_group; pub mod schema; pub mod scrollbar; pub use data_table::DataTable; pub use row_group::RowGroupColumnMetadataComponent; pub use row_group::RowGroupMetadata; pub use row_group::RowGroupProgressBar; pub use schema::FileSchemaTable; pub use schema::SchemaTreeComponent; pub use scrollbar::ScrollbarComponent; ================================================ FILE: src/components/row_group/metadata.rs ================================================ use crate::file::row_groups::{RowGroupAvgMedianStats, RowGroupStats}; use ratatui::style::Style; use ratatui::{ buffer::Buffer, layout::{Constraint, Layout, Rect}, prelude::Color, style::Stylize, symbols::Marker, text::{Line, Span}, widgets::{Axis, Block, Borders, Chart, Dataset, Widget}, }; use crate::file::utils::{commas, human_readable_bytes}; /// Component to display row group level statistics pub struct RowGroupMetadata<'a> { row_group_stats: &'a [RowGroupStats], avg_median_stats: &'a RowGroupAvgMedianStats, selected_idx: usize, } impl<'a> RowGroupMetadata<'a> { pub fn new( row_group_stats: &'a [RowGroupStats], avg_median_stats: &'a RowGroupAvgMedianStats, selected_idx: usize, ) -> Self { Self { row_group_stats, avg_median_stats, selected_idx, } } } impl<'a> Widget for RowGroupMetadata<'a> { fn render(self, area: Rect, buf: &mut Buffer) { let selected_stats = &self.row_group_stats[self.selected_idx]; let vertical_areas = Layout::vertical([Constraint::Length(3), Constraint::Fill(1)]).split(area); // Create 1x4 horizontal grid for stats let horizontal_areas = Layout::horizontal([ Constraint::Percentage(25), Constraint::Percentage(25), Constraint::Percentage(25), Constraint::Percentage(25), ]) .split(vertical_areas[0]); // Render each stat block self.render_stat_block( "Rows", &commas(selected_stats.rows as u64), horizontal_areas[0], buf, commas(self.avg_median_stats.avg_rows_per_rg as u64), commas(self.avg_median_stats.median_rows_per_rg as u64), ); self.render_stat_block( "Compressed", &human_readable_bytes(selected_stats.compressed_size as u64), horizontal_areas[1], buf, human_readable_bytes(self.avg_median_stats.avg_compressed_size as u64), human_readable_bytes(self.avg_median_stats.median_compressed_size as u64), ); self.render_stat_block( "Uncompressed", &human_readable_bytes(selected_stats.uncompressed_size as u64), horizontal_areas[2], buf, human_readable_bytes(self.avg_median_stats.avg_uncompressed_size as u64), human_readable_bytes(self.avg_median_stats.median_uncompressed_size as u64), ); self.render_stat_block( "Ratio", &format!("{:.2}", selected_stats.compression_ratio), horizontal_areas[3], buf, format!("{:.2}", self.avg_median_stats.avg_compression_ratio), format!("{:.2}", self.avg_median_stats.median_compression_ratio), ); let central_area = Layout::horizontal([Constraint::Fill(1), Constraint::Fill(1)]).split(vertical_areas[1]); // Render charts in the remaining area self.render_charts(central_area[0], buf); } } impl<'a> RowGroupMetadata<'a> { fn render_stat_block( &self, title: &str, value: &str, area: Rect, buf: &mut Buffer, average: String, median: String, ) { let title_bottom: Vec = vec![ average.light_cyan().bold(), " / ".white().bold(), median.light_magenta().bold(), ]; let block = Block::bordered() .title(title.light_blue().bold()) .title_bottom(Line::from(title_bottom).centered()) .border_style(ratatui::style::Style::default().fg(Color::Blue)); let inner = block.inner(area); block.render(area, buf); // Center the value in the block if inner.width > 0 && inner.height > 0 { let lines: Vec<&str> = value.lines().collect(); let start_y = inner.y + (inner.height.saturating_sub(lines.len() as u16)) / 2; for (i, line) in lines.iter().enumerate() { let y = start_y + i as u16; if y < inner.y + inner.height { let x = inner.x + (inner.width.saturating_sub(line.len() as u16)) / 2; if x < inner.x + inner.width { line.bold() .yellow() .render(Rect::new(x, y, line.len() as u16, 1), buf); } } } } } fn render_charts(&self, area: Rect, buf: &mut Buffer) { // Split area into two charts horizontally let chart_areas = Layout::vertical([Constraint::Percentage(50), Constraint::Percentage(50)]).split(area); self.render_size_comparison_chart(chart_areas[0], buf); self.render_compression_ratio_chart(chart_areas[1], buf); } fn normalized_x_positions(&self) -> Vec { let num_points = self.row_group_stats.len(); if num_points == 0 { return vec![]; } (0..num_points) .map(|i| (i as f64 + 0.5) / num_points as f64) .collect() } fn make_x_labels(&self) -> Vec { let num_points = self.row_group_stats.len(); match num_points { 0 => vec![], 1 => vec!["1".to_string()], 2 => vec!["1".to_string(), "2".to_string()], 3 => vec!["1".to_string(), "2".to_string(), "3".to_string()], n => { let a: usize = 1usize; let d = n; let b = 1 + (n.saturating_sub(1)) / 3; let c = 1 + (n.saturating_sub(1)) * 2 / 3; let mut labels = vec![a, b, c, d]; labels.sort_unstable(); labels.dedup(); labels.into_iter().map(|v| v.to_string()).collect() } } } fn render_size_comparison_chart(&self, area: Rect, buf: &mut Buffer) { let n = self.row_group_stats.len(); if n == 0 { return; } let x_positions = self.normalized_x_positions(); let compressed_data: Vec<(f64, f64)> = self .row_group_stats .iter() .enumerate() .map(|(i, rg)| (x_positions[i], rg.compressed_size as f64)) .collect(); let uncompressed_data: Vec<(f64, f64)> = self .row_group_stats .iter() .enumerate() .map(|(i, rg)| (x_positions[i], rg.uncompressed_size as f64)) .collect(); let max_compressed = compressed_data .iter() .map(|(_, size)| *size) .fold(0.0, f64::max); let max_uncompressed = uncompressed_data .iter() .map(|(_, size)| *size) .fold(0.0, f64::max); let max_size = max_compressed.max(max_uncompressed); let datasets = vec![ Dataset::default() .name("Compressed") .marker(Marker::Dot) .style(Style::default().fg(Color::Blue)) .data(&compressed_data), Dataset::default() .name("Uncompressed") .marker(Marker::Dot) .style(Style::default().fg(Color::Red)) .data(&uncompressed_data), ]; let x_labels = self.make_x_labels(); let y_step = (max_size * 1.5) / 4.0; let y_labels: Vec = (0..4) .map(|i| { let value = i as f64 * y_step; if value >= 1_000_000.0 { format!("{:.1}M", value / (1_024.0 * 1_024.0)) } else if value >= 1_000.0 { format!("{:.1}K", value / 1_024.0) } else { format!("{value:.0}") } }) .collect(); let title = vec![ "Compressed".light_blue().bold(), " vs ".into(), "Uncompressed".light_red().bold(), " (B)".into(), ]; let chart = Chart::new(datasets) .block( Block::default() .title(Line::from(title).centered()) .title_bottom("Row Group".dark_gray()) .borders(Borders::NONE), ) .x_axis( Axis::default() .style(Style::default().fg(Color::White)) .bounds([0.0, 1.0]) .labels(x_labels), ) .y_axis( Axis::default() .style(Style::default().fg(Color::White)) .bounds([0.0, max_size * 1.5]) .labels(y_labels), ); chart.render(area, buf); } fn render_compression_ratio_chart(&self, area: Rect, buf: &mut Buffer) { let n = self.row_group_stats.len(); if n == 0 { return; } let x_positions = self.normalized_x_positions(); let ratio_data: Vec<(f64, f64)> = self .row_group_stats .iter() .enumerate() .map(|(i, rg)| { let ratio = if rg.compressed_size > 0 { rg.uncompressed_size as f64 / rg.compressed_size as f64 } else { 1.0 }; (x_positions[i], ratio) }) .collect(); let max_ratio = ratio_data .iter() .map(|(_, ratio)| *ratio) .fold(0.0, f64::max); let datasets = vec![ Dataset::default() .name("Compression Ratio") .marker(Marker::Dot) .style(Style::default().fg(Color::Yellow)) .data(&ratio_data), ]; let x_labels = self.make_x_labels(); let y_range = max_ratio * 1.1 - 1.0; let y_step = y_range / 4.0; let y_labels = vec![ "1.0x".to_string(), format!("{:.1}x", 1.0 + y_step), format!("{:.1}x", 1.0 + y_step * 2.0), format!("{:.1}x", 1.0 + y_step * 3.0), ]; let chart = Chart::new(datasets) .block( Block::default() .title("Compression Ratio".yellow()) .title_bottom("Row Group".dark_gray()) .borders(Borders::NONE), ) .x_axis( Axis::default() .style(Style::default().fg(Color::White)) .bounds([0.0, 1.0]) .labels(x_labels), ) .y_axis( Axis::default() .style(Style::default().fg(Color::White)) .bounds([1.0, max_ratio * 1.1]) .labels(y_labels), ); chart.render(area, buf); } } ================================================ FILE: src/components/row_group/mod.rs ================================================ pub mod metadata; pub mod progress_bar; pub mod schema_md; pub use metadata::RowGroupMetadata; pub use progress_bar::RowGroupProgressBar; pub use schema_md::RowGroupColumnMetadataComponent; ================================================ FILE: src/components/row_group/progress_bar.rs ================================================ use crate::file::row_groups::RowGroupStats; use ratatui::{ buffer::Buffer, layout::{Position, Rect}, prelude::{Color, Span}, style::Stylize, text::Line, widgets::{Block, Widget}, }; pub struct RowGroupProgressBar<'a> { pub row_group_stats: &'a [RowGroupStats], pub selected_idx: usize, } impl<'a> RowGroupProgressBar<'a> { pub fn new(row_group_stats: &'a [RowGroupStats], selected_idx: usize) -> Self { Self { row_group_stats, selected_idx, } } } impl<'a> Widget for RowGroupProgressBar<'a> { fn render(self, area: Rect, buf: &mut Buffer) { let total_row_groups = self.row_group_stats.len(); let title: Vec> = vec![ " Row Group: ".into(), format!("{}", self.selected_idx + 1).into(), " / ".into(), format!("{total_row_groups}").into(), " ".into(), ]; let block = Block::bordered() .title(Line::from(title)) .border_style(ratatui::style::Style::default().fg(Color::Gray)) .title_style(ratatui::style::Style::default().fg(Color::Gray).bold()); let inner = block.inner(area); block.render(area, buf); if inner.width > 0 && inner.height > 0 && total_row_groups > 0 { // Calculate the width of each segment let segment_width = inner.width as f64 / total_row_groups as f64; // Find the center line to draw the thin progress bar let center_y = inner.y + inner.height / 2; // First, draw the thin white line across the entire width for x in inner.x..inner.x + inner.width { if let Some(cell) = buf.cell_mut(Position::new(x, center_y)) { cell.set_symbol("─") .set_style(ratatui::style::Style::default().fg(Color::White)); } } // Then, draw the thick filled section for the selected row group let selected_start_x = inner.x + (self.selected_idx as f64 * segment_width) as u16; let selected_end_x = inner.x + ((self.selected_idx + 1) as f64 * segment_width) as u16; // Fill the selected section with solid blocks (single line, centered) for x in selected_start_x..selected_end_x.min(inner.x + inner.width) { if let Some(cell) = buf.cell_mut(Position::new(x, center_y)) { cell.set_symbol("█") .set_style(ratatui::style::Style::default().fg(Color::Blue)); } } } } } ================================================ FILE: src/components/row_group/schema_md.rs ================================================ use crate::file::utils::human_readable_bytes; use crate::file::{row_groups::RowGroupColumnMetadata, utils::commas}; use ratatui::{ buffer::Buffer, layout::{Constraint, Layout, Position, Rect}, prelude::Color, style::Stylize, text::Line, widgets::{Block, Borders, Cell, Row, Table, Widget}, }; /// Component to display column-level metadata for a selected row group pub struct RowGroupColumnMetadataComponent<'a> { column_metadata: &'a RowGroupColumnMetadata, } impl<'a> RowGroupColumnMetadataComponent<'a> { pub fn new(column_metadata: &'a RowGroupColumnMetadata) -> Self { Self { column_metadata } } } impl<'a> Widget for RowGroupColumnMetadataComponent<'a> { fn render(self, area: Rect, buf: &mut Buffer) { let title = vec![ " Column: ".into(), self.column_metadata.column_path.clone().yellow().bold(), " ".into(), ]; let block = Block::bordered() .title(Line::from(title).centered()) .borders(Borders::TOP) .border_style(ratatui::style::Style::default().fg(Color::Blue)); let inner_area = block.inner(area); block.render(area, buf); let [features_area, contents_area] = Layout::vertical([ Constraint::Length(3), // Feature indicators Constraint::Fill(1), // other area ]) .areas(inner_area); let [md_stats_area, page_area] = Layout::horizontal([ Constraint::Fill(3), // Metadata table Constraint::Fill(5), // Pages table ]) .areas(contents_area); // Render pages table self.render_pages_table(page_area, buf); // Split into three sections: feature indicators, stats table, and statistics let mut constraints = vec![ Constraint::Length(7), // Metadata table ]; // Add constraint for statistics table if statistics exist if self.column_metadata.statistics.is_some() { constraints.push(Constraint::Length(6)); // Statistics table } let vertical_areas = Layout::vertical(constraints).split(md_stats_area); self.render_feature_indicators(features_area, buf); self.render_metadata_table(vertical_areas[0], buf); // Render statistics table if available if self.column_metadata.statistics.is_some() { self.render_statistics_table(vertical_areas[1], buf); } } } impl<'a> RowGroupColumnMetadataComponent<'a> { fn render_metadata_table(&self, area: Rect, buf: &mut Buffer) { // Calculate compression ratio let compression_ratio = if self.column_metadata.total_compressed_size > 0 { format!( "{:.2}x", self.column_metadata.total_uncompressed_size as f64 / self.column_metadata.total_compressed_size as f64 ) } else { "N/A".to_string() }; let kv_pairs = vec![ ("File Offset (B)", commas(self.column_metadata.file_offset)), ( "Compressed Size", human_readable_bytes(self.column_metadata.total_compressed_size as u64), ), ( "Uncompressed Size", human_readable_bytes(self.column_metadata.total_uncompressed_size as u64), ), ("Compression Ratio", compression_ratio), ( "Compression Type", self.column_metadata.compression_type.clone(), ), ]; let rows: Vec = kv_pairs .into_iter() .map(|(k, v)| { Row::new(vec![ Cell::from(k).bold().fg(Color::Cyan), Cell::from(v).fg(Color::White), ]) }) .collect(); let table = Table::new(rows, vec![Constraint::Length(18), Constraint::Fill(1)]).block( Block::bordered() .title("Metadata") .border_style(ratatui::style::Style::default().fg(Color::Blue)), ); table.render(area, buf); } fn render_feature_indicators(&self, area: Rect, buf: &mut Buffer) { // Create 1x4 horizontal grid for feature indicators let horizontal_areas = Layout::horizontal([ Constraint::Percentage(25), Constraint::Percentage(25), Constraint::Percentage(25), Constraint::Percentage(25), ]) .split(area); self.render_indicator_box( "Statistics", self.column_metadata.has_stats.has_stats, horizontal_areas[0], buf, ); self.render_indicator_box( "Dict Page", self.column_metadata.has_stats.has_dictionary_page, horizontal_areas[1], buf, ); self.render_indicator_box( "Bloom Filter", self.column_metadata.has_stats.has_bloom_filter, horizontal_areas[2], buf, ); self.render_indicator_box( "Page Stats", self.column_metadata.has_stats.has_page_encoding_stats, horizontal_areas[3], buf, ); } fn render_indicator_box(&self, title: &str, has_feature: bool, area: Rect, buf: &mut Buffer) { let (symbol, color) = if has_feature { ("✓", Color::Green) } else { ("✗", Color::Red) }; let block = Block::bordered() .title(title) .border_style(ratatui::style::Style::default().fg(color)) .title_style(ratatui::style::Style::default().fg(color).bold()); let inner = block.inner(area); block.render(area, buf); // Center the symbol in the block if inner.width > 0 && inner.height > 0 { let symbol_x = inner.x + inner.width / 2; let symbol_y = inner.y + inner.height / 2; if symbol_x < inner.x + inner.width && symbol_y < inner.y + inner.height && let Some(cell) = buf.cell_mut(Position::new(symbol_x, symbol_y)) { cell.set_symbol(symbol) .set_style(ratatui::style::Style::default().fg(color).bold()); } } } fn render_statistics_table(&self, area: Rect, buf: &mut Buffer) { if let Some(ref stats) = self.column_metadata.statistics { let null_count_str = stats .null_count .map(|c| c.to_string()) .unwrap_or_else(|| "N/A".to_string()); let distinct_count_str = stats .distinct_count .map(|c| c.to_string()) .unwrap_or_else(|| "N/A".to_string()); let stat_pairs = vec![ ("Min", stats.min.as_deref().unwrap_or("N/A").to_string()), ("Max", stats.max.as_deref().unwrap_or("N/A").to_string()), ("Null Count", null_count_str), ("Distinct Count", distinct_count_str), ]; let rows: Vec = stat_pairs .into_iter() .map(|(k, v)| { Row::new(vec![ Cell::from(k).bold().fg(Color::Magenta), Cell::from(v).fg(Color::White), ]) }) .collect(); let table = Table::new(rows, vec![Constraint::Length(18), Constraint::Fill(1)]).block( Block::bordered() .title("Statistics") .border_style(ratatui::style::Style::default().fg(Color::Magenta)), ); table.render(area, buf); } } fn render_pages_table(&self, area: Rect, buf: &mut Buffer) { use crate::file::utils::human_readable_bytes; // Create header let header = Row::new(vec![ Cell::from("#").bold().fg(Color::Yellow), Cell::from("Page Type").bold().fg(Color::Yellow), Cell::from("Size").bold().fg(Color::Yellow), Cell::from("Rows").bold().fg(Color::Yellow), Cell::from("Encoding").bold().fg(Color::Yellow), ]); // Create rows from page info let rows: Vec = self .column_metadata .pages .page_infos .iter() .enumerate() .map(|(idx, page)| { Row::new(vec![ Cell::from((idx + 1).to_string()).fg(Color::White), Cell::from(page.page_type.clone()).fg(Color::Cyan), Cell::from(human_readable_bytes(page.size as u64)).fg(Color::White), Cell::from(commas(page.rows as u64)).fg(Color::White), Cell::from(page.encoding.clone()).fg(Color::Green), ]) }) .collect(); let table = Table::new( rows, vec![ Constraint::Max(3), // Page Number Constraint::Fill(3), // Page Type Constraint::Fill(3), // Size Constraint::Fill(2), // Rows Constraint::Fill(3), // Encoding ], ) .header(header) .block( Block::bordered() .title("Pages") .border_style(ratatui::style::Style::default().fg(Color::DarkGray)), ); table.render(area, buf); } } ================================================ FILE: src/components/schema/mod.rs ================================================ pub mod table; pub mod tree; pub use table::FileSchemaTable; pub use tree::SchemaTreeComponent; ================================================ FILE: src/components/schema/table.rs ================================================ use crate::file::schema::FileSchema; use ratatui::{ buffer::Buffer, layout::{Constraint, Rect}, prelude::Color, style::Stylize, symbols::border, text::Line, widgets::{Block, Cell, Row, Table, Widget}, }; use std::cmp::min; use crate::file::Renderable; pub struct FileSchemaTable<'a> { pub schema: &'a FileSchema, pub selected_index: usize, pub title: String, pub title_color: Color, pub selected_color: Color, pub border_style: border::Set, pub horizontal_scroll: usize, pub vertical_scroll: usize, } impl<'a> FileSchemaTable<'a> { pub fn new(schema: &'a FileSchema) -> Self { Self { schema, selected_index: 0, title: "Column Statistics".to_string(), title_color: Color::Green, selected_color: Color::Yellow, border_style: border::ROUNDED, horizontal_scroll: 0, vertical_scroll: 0, } } pub fn with_selected_index(mut self, index: usize) -> Self { self.selected_index = index; self } pub fn with_title(mut self, title: String) -> Self { self.title = title; self } pub fn with_colors(mut self, title: Color, selected: Color) -> Self { self.title_color = title; self.selected_color = selected; self } pub fn with_border_style(mut self, border_style: border::Set) -> Self { self.border_style = border_style; self } pub fn with_horizontal_scroll(mut self, offset: usize) -> Self { self.horizontal_scroll = offset; self } pub fn with_vertical_scroll(mut self, offset: usize) -> Self { self.vertical_scroll = offset; self } pub fn scroll_left(&mut self) { if self.horizontal_scroll > 0 { self.horizontal_scroll -= 1; } } pub fn scroll_right(&mut self) { self.horizontal_scroll += 1; } pub fn get_max_scroll(&self) -> usize { // Calculate how many columns we can show at full width let available_width = 80; // Assume 80 characters available let min_column_width = 12; // Minimum width for readability let max_visible_columns = available_width / min_column_width; // Total columns minus visible columns let total_columns = 10usize; // We have 10 columns total_columns.saturating_sub(max_visible_columns as usize) } } impl<'a> Widget for FileSchemaTable<'a> { fn render(self, area: Rect, buf: &mut Buffer) { let all_headers = [ "Repetition", "Physical", "Compressed", "Uncompressed", "Ratio", "Encodings", "Compression", "Min", "Max", "Nulls", ]; // Calculate how many columns we can show at full width let available_width = area.width.saturating_sub(4); // Account for borders and spacing let min_column_width = 12; let max_visible_columns = (available_width / min_column_width).max(1); // Clamp scroll offset to valid range let max_scroll = all_headers .len() .saturating_sub(max_visible_columns as usize); let horizontal_scroll = self.horizontal_scroll.min(max_scroll); // Calculate visible rows based on vertical scroll and available height let visible_rows_count = area.height.saturating_sub(1) as usize; // Generate table data with only visible columns and rows let (visible_rows, column_widths) = self.schema.generate_table_rows_with_scroll( self.selected_index, horizontal_scroll, max_visible_columns as usize, self.vertical_scroll, visible_rows_count, ); // Get visible columns let visible_headers: Vec<_> = all_headers .iter() .skip(horizontal_scroll) .take(max_visible_columns as usize) .collect(); // Include header widths in the calculation and create constraints let col_constraints: Vec<_> = visible_headers .iter() .enumerate() .map(|(i, header)| { let content_width = column_widths.get(i).cloned().unwrap_or(0); let header_width = header.len(); // Use maximum of 30 for readability Constraint::Length(min(content_width.max(header_width), 30) as u16 + 1) }) .collect(); let table_widget = Table::new(visible_rows, col_constraints) .header(Row::new( visible_headers .into_iter() .map(|h| Cell::from(*h).bold().fg(Color::Yellow)), )) .column_spacing(1) .block( Block::bordered() .title( Line::from(self.title.clone()) .centered() .bold() .fg(self.title_color), ) .border_set(self.border_style), ); table_widget.render(area, buf); } } impl Renderable for FileSchema { fn render_content(&self, area: Rect, buf: &mut Buffer) { // Default implementation without selection highlighting let table_component = FileSchemaTable::new(self); table_component.render(area, buf); } } ================================================ FILE: src/components/schema/tree.rs ================================================ use crate::file::schema::SchemaInfo; use ratatui::{ buffer::Buffer, layout::Rect, style::{Color, Stylize}, symbols::border, text::Line, widgets::{Block, List, ListItem, Widget}, }; pub struct SchemaTreeComponent<'a> { pub schema_columns: &'a Vec, pub selected_index: usize, pub scroll_offset: usize, pub title: String, pub title_color: Color, pub root_color: Color, pub primitive_color: Color, pub group_color: Color, pub selected_color: Color, pub border_style: border::Set, pub show_legend: bool, } impl<'a> SchemaTreeComponent<'a> { pub fn new(schema_columns: &'a Vec) -> Self { Self { schema_columns, selected_index: 0, scroll_offset: 0, title: "Schema Tree".to_string(), title_color: Color::Yellow, root_color: Color::LightYellow, primitive_color: Color::White, group_color: Color::Green, selected_color: Color::Yellow, border_style: border::ROUNDED, show_legend: true, } } pub fn with_selected_index(mut self, index: usize) -> Self { self.selected_index = index; self } pub fn with_scroll_offset(mut self, offset: usize) -> Self { self.scroll_offset = offset; self } pub fn with_title(mut self, title: String) -> Self { self.title = title; self } pub fn with_colors( mut self, root: Color, primitive: Color, group: Color, selected: Color, ) -> Self { self.root_color = root; self.primitive_color = primitive; self.group_color = group; self.selected_color = selected; self } pub fn with_border_style(mut self, border_style: border::Set) -> Self { self.border_style = border_style; self } pub fn with_legend(mut self, show: bool) -> Self { self.show_legend = show; self } } impl<'a> Widget for SchemaTreeComponent<'a> { fn render(self, area: Rect, buf: &mut Buffer) { // Create a mapping from primitive column index to schema tree index let primitive_to_schema_map: Vec = self .schema_columns .iter() .enumerate() .filter_map(|(idx, line)| matches!(line, SchemaInfo::Primitive { .. }).then_some(idx)) .collect(); // Calculate visible range based on scroll offset and available height let visible_height = area.height.saturating_sub(1) as usize; // Account for borders + legend let start_idx = self.scroll_offset; let end_idx = (start_idx + visible_height).min(self.schema_columns.len()); let items: Vec = self .schema_columns .iter() .enumerate() .skip(start_idx) .take(end_idx - start_idx) .map(|(idx, line)| { let is_selected = if self.selected_index > 0 { // Convert primitive index (1-based) to schema tree index primitive_to_schema_map .get(self.selected_index - 1) .is_some_and(|&schema_idx| idx == schema_idx) } else { false }; match line { SchemaInfo::Root { display: d, .. } => { ListItem::new(d.clone()).fg(self.root_color) } SchemaInfo::Primitive { display: d, .. } => { let mut item = ListItem::new(d.clone()).fg(self.primitive_color); if is_selected { item = item.bg(self.selected_color).fg(Color::Black); } item } SchemaInfo::Group { display: d, .. } => { ListItem::new(d.clone()).fg(self.group_color) } } }) .collect(); // highlight the color let mut block = Block::bordered() .title(Line::from(self.title.fg(self.title_color).bold()).centered()) .border_set(self.border_style); if self.show_legend { let mut legend_vec = vec![ "Leaf".fg(self.primitive_color), ", ".into(), "Group".fg(self.group_color), ]; if self.selected_index > 0 { legend_vec.extend(vec![", ".into(), "Selected".bold().fg(self.selected_color)]); } let legend = Line::from(legend_vec); block = block.title_bottom(legend.centered()); } let list = List::new(items).block(block); list.render(area, buf); } } ================================================ FILE: src/components/scrollbar.rs ================================================ use ratatui::{buffer::Buffer, layout::Rect, style::Color, widgets::Widget}; pub struct ScrollbarComponent { pub orientation: ScrollbarOrientation, pub total_items: usize, pub visible_items: usize, pub position: usize, pub track_color: Color, pub thumb_color: Color, pub track_symbol: &'static str, pub thumb_symbol: &'static str, } #[derive(Debug, Clone, Copy)] pub enum ScrollbarOrientation { Vertical, Horizontal, } impl ScrollbarComponent { pub fn vertical(total_items: usize, visible_items: usize, position: usize) -> Self { Self { orientation: ScrollbarOrientation::Vertical, total_items, visible_items, position, track_color: Color::Yellow, thumb_color: Color::Gray, track_symbol: "│", thumb_symbol: "█", } } pub fn horizontal(total_items: usize, visible_items: usize, position: usize) -> Self { Self { orientation: ScrollbarOrientation::Horizontal, total_items, visible_items, position, track_color: Color::DarkGray, thumb_color: Color::Gray, track_symbol: "─", thumb_symbol: "█", } } pub fn with_colors(mut self, track_color: Color, thumb_color: Color) -> Self { self.track_color = track_color; self.thumb_color = thumb_color; self } pub fn with_symbols(mut self, track_symbol: &'static str, thumb_symbol: &'static str) -> Self { self.track_symbol = track_symbol; self.thumb_symbol = thumb_symbol; self } fn calculate_thumb_info(&self, track_length: usize) -> (usize, usize) { if self.total_items <= self.visible_items { return (track_length, 0); } let thumb_size = ((self.visible_items * track_length) / self.total_items).max(1); let max_position = self.total_items.saturating_sub(self.visible_items); let thumb_position = if max_position == 0 { 0 } else { (self.position * (track_length - thumb_size)) / max_position }; (thumb_size, thumb_position) } } impl Widget for ScrollbarComponent { fn render(self, area: Rect, buf: &mut Buffer) { if area.width == 0 || area.height == 0 { return; } match self.orientation { ScrollbarOrientation::Vertical => { if area.width < 1 || area.height < 2 { return; } let track_length = area.height as usize; let (thumb_size, thumb_position) = self.calculate_thumb_info(track_length); // Render the track for y in 0..area.height { buf[(area.x, area.y + y)] .set_symbol(self.track_symbol) .set_fg(self.track_color); } // Render the thumb for i in 0..thumb_size { let y = area.y + thumb_position as u16 + i as u16; if y < area.y + area.height { buf[(area.x, y)] .set_symbol(self.thumb_symbol) .set_fg(self.thumb_color); } } } ScrollbarOrientation::Horizontal => { if area.width < 2 || area.height < 1 { return; } let track_length = area.width as usize; let (thumb_size, thumb_position) = self.calculate_thumb_info(track_length); // Render the track for x in 0..area.width { buf[(area.x + x, area.y)] .set_symbol(self.track_symbol) .set_fg(self.track_color); } // Render the thumb for i in 0..thumb_size { let x = area.x + thumb_position as u16 + i as u16; if x < area.x + area.width { buf[(x, area.y)] .set_symbol(self.thumb_symbol) .set_fg(self.thumb_color); } } } } } } ================================================ FILE: src/file/metadata.rs ================================================ use itertools::Itertools; use parquet::file::metadata::ParquetMetaData; use ratatui::widgets::Widget; use ratatui::{ buffer::Buffer, layout::{Constraint, Rect}, prelude::Color, style::Stylize, symbols::border, text::Line, widgets::{Block, Cell, Row, Table}, }; use std::collections::{HashMap, HashSet}; use crate::file::Renderable; use crate::file::utils::commas; use crate::file::utils::human_readable_bytes; #[derive(Debug)] pub struct FileMetadata { pub format_version: String, pub created_by: String, pub num_rows: usize, pub num_columns: usize, pub num_row_groups: usize, pub raw_size: u64, pub compressed_size: u64, pub compression_ratio: f64, pub codecs: String, pub encodings: String, pub avg_row_size: u64, } impl FileMetadata { pub fn from_metadata(md: &ParquetMetaData) -> Result> { let format_version = md.file_metadata().version(); let created_by = md.file_metadata().created_by().unwrap_or("—"); let num_row_groups = md.num_row_groups(); let num_rows = md.row_groups().iter().map(|rg| rg.num_rows()).sum::() as usize; let num_columns = md.file_metadata().schema_descr().num_columns(); // calulcate file metadata let (raw_size, compressed_size, encodings_seen, codec_counts) = md.row_groups().iter().flat_map(|rg| rg.columns()).fold( (0u64, 0u64, HashSet::new(), HashMap::new()), |(raw, comp, mut encodings, mut codecs), col| { let codec_name = format!("{:?}", col.compression()); *codecs.entry(codec_name).or_insert(0) += 1; for enc in col.encodings() { encodings.insert(format!("{enc:?}")); } ( raw + col.uncompressed_size() as u64, comp + col.compressed_size() as u64, encodings, codecs, ) }, ); let compression_ratio = if compressed_size > 0 { raw_size as f64 / compressed_size as f64 } else { 0.0 }; let avg_row_size = if num_rows > 0 { raw_size as f64 / num_rows as f64 } else { 0.0 }; let codecs: String = codec_counts .iter() .map(|(c, n)| format!("{c}({n})")) .sorted() .collect::>() .join(", "); let encodings: String = encodings_seen .into_iter() .sorted() .collect::>() .join(", "); Ok(FileMetadata { format_version: format_version.to_string(), created_by: created_by.to_string(), num_rows, num_columns, num_row_groups, raw_size, compressed_size, compression_ratio, codecs, encodings, avg_row_size: avg_row_size as u64, }) } } impl Renderable for FileMetadata { fn render_content(&self, area: Rect, buf: &mut Buffer) { let kv_pairs: Vec<(String, String)> = vec![ ("Format version".into(), self.format_version.clone()), ("Created by".into(), self.created_by.clone()), ("Rows".into(), commas(self.num_rows as u64)), ("Columns".into(), self.num_columns.to_string()), ("Row groups".into(), self.num_row_groups.to_string()), ("Size (raw)".into(), human_readable_bytes(self.raw_size)), ( "Size (compressed)".into(), human_readable_bytes(self.compressed_size), ), ( "Compression ratio".into(), format!("{:.2}x", self.compression_ratio), ), ("Codecs (cols)".into(), self.codecs.clone()), ("Encodings".into(), self.encodings.clone()), ("Avg row size".into(), format!("{} B", self.avg_row_size)), ]; let max_value_size = kv_pairs.iter().map(|(_, v)| v.len()).max().unwrap_or(0) as u16; let rows: Vec = kv_pairs .into_iter() .map(|(k, v)| { Row::new(vec![ Cell::from(format!("{k:>18}")).bold().fg(Color::Blue), Cell::from(format!("{v:<}")), ]) }) .collect(); // Calculate centered area for the table let key_width = 18; let value_width = max_value_size.max(20); // Ensure minimum width let table_width = key_width + value_width + 3; // +3 for spacing and borders let table_height = rows.len() as u16; let center_x = area.x + (area.width.saturating_sub(table_width)) / 2; let center_y = area.y + (area.height.saturating_sub(table_height)) / 2; let centered_area = Rect { x: center_x, y: center_y, width: table_width + 2, height: table_height + 2, }; let table = Table::new( rows, vec![ Constraint::Length(key_width), Constraint::Length(value_width), ], ) .block( Block::bordered() .title(Line::from("File Metadata".yellow().bold()).centered()) .border_set(border::ROUNDED), ); table.render(centered_area, buf); } } #[cfg(test)] mod tests { use super::*; use parquet::file::reader::{FileReader, SerializedFileReader}; use std::fs::File; fn load_alltypes_metadata() -> FileMetadata { let path = format!( "{}/alltypes_plain.parquet", crate::file::parquet_test_data(), ); let file = File::open(path).unwrap(); let reader = SerializedFileReader::try_from(file).unwrap(); let metadata = reader.metadata(); FileMetadata::from_metadata(metadata).unwrap() } #[test] fn test_file_metadata_basic() { let file_metadata = load_alltypes_metadata(); // alltypes_plain.parquet has 8 rows and 11 columns assert_eq!(8, file_metadata.num_rows); assert_eq!(11, file_metadata.num_columns); // Should have 1 row group assert_eq!(1, file_metadata.num_row_groups); } #[test] fn test_format_version() { let file_metadata = load_alltypes_metadata(); // Format version should be a non-empty string assert!(!file_metadata.format_version.is_empty()); // Should be a valid parquet version assert_eq!("1", file_metadata.format_version); } #[test] fn test_created_by() { let file_metadata = load_alltypes_metadata(); let expected_created_by = "impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)"; // Created by should be present (not the default "—") assert_eq!(expected_created_by, file_metadata.created_by); } #[test] fn test_size_metrics() { let file_metadata = load_alltypes_metadata(); // Both sizes should be positive assert_eq!(671, file_metadata.raw_size); assert_eq!(671, file_metadata.compressed_size); // Raw size should be == compressed size for this file assert_eq!(file_metadata.raw_size, file_metadata.compressed_size); } #[test] fn test_compression_ratio() { let file_metadata = load_alltypes_metadata(); // Compression ratio should be == 1.0 for this file assert_eq!(1.0, file_metadata.compression_ratio); } #[test] fn test_codecs() { let file_metadata = load_alltypes_metadata(); // Codecs string should not be empty assert!(!file_metadata.codecs.is_empty()); // Should contain at least one codec name // Common codecs: UNCOMPRESSED, SNAPPY, GZIP, etc. assert_eq!("UNCOMPRESSED(11)", file_metadata.codecs); } #[test] fn test_encodings() { let file_metadata = load_alltypes_metadata(); // Encodings string should not be empty assert!(!file_metadata.encodings.is_empty()); // Should contain at least one encoding type // Common encodings: PLAIN, RLE, DELTA_BINARY_PACKED, etc. assert!( file_metadata.encodings.contains("PLAIN") && file_metadata.encodings.contains("RLE") && file_metadata.encodings.contains("PLAIN_DICTIONARY") ); } #[test] fn test_avg_row_size() { let file_metadata = load_alltypes_metadata(); // Average row size should be positive assert_eq!(83_u64, file_metadata.avg_row_size); } #[test] fn test_from_metadata_error_handling() { // Test that from_metadata returns Ok for valid files let path = format!( "{}/alltypes_plain.parquet", crate::file::parquet_test_data(), ); let file = File::open(path).unwrap(); let reader = SerializedFileReader::try_from(file).unwrap(); let metadata = reader.metadata(); let result = FileMetadata::from_metadata(metadata); assert!(result.is_ok()); } #[test] fn test_renderable_trait() { let file_metadata = load_alltypes_metadata(); // Test that the Renderable trait is implemented // We can't easily test the actual rendering without a full terminal setup, // but we can verify the method exists and doesn't panic let mut buf = Buffer::empty(Rect::new(0, 0, 100, 50)); let area = Rect::new(0, 0, 100, 50); // This should not panic file_metadata.render_content(area, &mut buf); } } ================================================ FILE: src/file/mod.rs ================================================ pub mod metadata; pub mod parquet_ctx; pub mod row_groups; pub mod sample_data; pub mod schema; pub mod utils; use std::{env, error::Error, path::PathBuf}; use ratatui::{buffer::Buffer, layout::Rect}; pub trait Renderable { fn render_content(&self, area: Rect, buf: &mut Buffer); } /// From arrow crate pub fn parquet_test_data() -> String { match get_data_dir("PARQUET_TEST_DATA", "parquet-testing/data") { Ok(pb) => pb.display().to_string(), Err(err) => panic!("failed to get parquet data dir: {err}"), } } /// From Arrow Crate /// Returns a directory path for finding test data. /// /// udf_env: name of an environment variable /// /// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR) /// /// Returns either: /// The path referred to in `udf_env` if that variable is set and refers to a directory /// The submodule_data directory relative to CARGO_MANIFEST_PATH fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result> { // Try user defined env. if let Ok(dir) = env::var(udf_env) { let trimmed = dir.trim().to_string(); if !trimmed.is_empty() { let pb = PathBuf::from(trimmed); if pb.is_dir() { return Ok(pb); } else { return Err(format!( "the data dir `{}` defined by env {} not found", pb.display(), udf_env ) .into()); } } } // The env is undefined or its value is trimmed to empty, let's try default dir. // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package", // set by `cargo run` or `cargo test`, see: // https://doc.rust-lang.org/cargo/reference/environment-variables.html let dir = env!("CARGO_MANIFEST_DIR"); let pb = PathBuf::from(dir).join(submodule_data); if pb.is_dir() { Ok(pb) } else { Err(format!( "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\ HINT: try running `git submodule update --init`", udf_env, pb.display(), ).into()) } } ================================================ FILE: src/file/parquet_ctx.rs ================================================ use parquet::file::reader::{FileReader, SerializedFileReader}; use std::fs::File; use crate::file::metadata::FileMetadata; use crate::file::row_groups::RowGroups; use crate::file::sample_data::ParquetSampleData; use crate::file::schema::FileSchema; pub struct ParquetCtx { pub file_path: String, pub metadata: FileMetadata, pub row_groups: RowGroups, pub schema: FileSchema, pub sample_data: ParquetSampleData, } impl ParquetCtx { pub fn from_file(file_path: &str) -> Result> { let file = File::open(file_path)?; let reader: SerializedFileReader = SerializedFileReader::new(file)?; let md = reader.metadata(); let row_groups = RowGroups::from_file_reader(&reader)?; // TODO: async calls? let metadata = FileMetadata::from_metadata(md)?; let schema = FileSchema::from_metadata(md)?; // Read sample data let sample_data = ParquetSampleData::read_sample_data(file_path)?; Ok(ParquetCtx { file_path: file_path.to_string(), metadata, row_groups, schema, sample_data, }) } pub fn column_size(&self) -> usize { self.schema.column_size() } } ================================================ FILE: src/file/row_groups.rs ================================================ use parquet::basic::{Encoding, PageType}; use parquet::column::page::{Page, PageReader}; use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData}; use parquet::file::reader::FileReader; use parquet::file::reader::{ChunkReader, SerializedFileReader}; use parquet::file::statistics::Statistics; use itertools::Itertools; use std::iter::Iterator; pub struct RowGroupPageInfo { pub page_infos: Vec, } pub struct HasStats { pub has_stats: bool, pub has_dictionary_page: bool, pub has_bloom_filter: bool, pub has_page_encoding_stats: bool, } pub struct PageInfo { pub page_type: String, pub size: usize, pub rows: usize, pub encoding: String, } pub struct RowGroupColumnStats { pub min: Option, pub max: Option, pub null_count: Option, pub distinct_count: Option, } pub struct RowGroupColumnMetadata { pub file_offset: u64, pub column_path: String, pub has_stats: HasStats, pub statistics: Option, pub total_compressed_size: i64, pub total_uncompressed_size: i64, pub compression_type: String, pub pages: RowGroupPageInfo, } pub struct RowGroupAvgMedianStats { pub avg_compressed_size: f64, pub median_compressed_size: f64, pub avg_uncompressed_size: f64, pub median_uncompressed_size: f64, pub avg_rows_per_rg: f64, pub median_rows_per_rg: f64, pub avg_compression_ratio: f64, pub median_compression_ratio: f64, } impl RowGroupAvgMedianStats { pub fn new(row_groups_stats: &[RowGroupStats]) -> Self { // TODO: parallelize or iterate row group stats once Self { avg_compressed_size: row_groups_stats .iter() .map(|rg| rg.compressed_size) .sum::() as f64 / row_groups_stats.len() as f64, median_compressed_size: row_groups_stats .iter() .map(|rg| rg.compressed_size) .sorted() .nth(row_groups_stats.len() / 2) .unwrap_or(0) as f64, avg_uncompressed_size: row_groups_stats .iter() .map(|rg| rg.uncompressed_size) .sum::() as f64 / row_groups_stats.len() as f64, median_uncompressed_size: row_groups_stats .iter() .map(|rg| rg.uncompressed_size) .sorted() .nth(row_groups_stats.len() / 2) .unwrap_or(0) as f64, avg_rows_per_rg: row_groups_stats.iter().map(|rg| rg.rows).sum::() as f64 / row_groups_stats.len() as f64, median_rows_per_rg: row_groups_stats .iter() .map(|rg| rg.rows) .sorted() .nth(row_groups_stats.len() / 2) .unwrap_or(0) as f64, avg_compression_ratio: row_groups_stats .iter() .map(|rg| rg.compression_ratio) .sum::() / row_groups_stats.len() as f64, median_compression_ratio: row_groups_stats .iter() .map(|rg| rg.compression_ratio) .sorted_by(|a, b| a.partial_cmp(b).unwrap()) .nth(row_groups_stats.len() / 2) .unwrap_or(0.), } } } pub struct RowGroups { pub row_groups: Vec, pub avg_median_stats: RowGroupAvgMedianStats, } impl RowGroups { pub fn from_file_reader( reader: &SerializedFileReader, ) -> Result> { let row_groups = (0..reader.metadata().num_row_groups()) .map(|idx| RowGroupStats::from_file_reader(reader, idx)) .collect::, _>>()?; let avg_median_stats = RowGroupAvgMedianStats::new(&row_groups); Ok(Self { row_groups, avg_median_stats, }) } pub fn num_row_groups(&self) -> usize { self.row_groups.len() } } pub struct RowGroupStats { pub idx: usize, pub rows: i64, pub compressed_size: i64, pub uncompressed_size: i64, pub compression_ratio: f64, pub column_metadata: Vec, } impl RowGroupStats { pub fn from_file_reader( reader: &SerializedFileReader, idx: usize, ) -> Result> { let rg_md: &RowGroupMetaData = reader.metadata().row_group(idx); let compressed_size = rg_md.columns().iter().map(|c| c.compressed_size()).sum(); let uncompressed_size = rg_md.columns().iter().map(|c| c.uncompressed_size()).sum(); let compression_ratio = uncompressed_size as f64 / compressed_size as f64; let column_metadata = (0..rg_md.num_columns()) .map(|col_idx| RowGroupColumnMetadata::from_file_reader(reader, idx, col_idx)) .collect::, _>>()?; Ok(RowGroupStats { idx, rows: rg_md.num_rows(), compressed_size, uncompressed_size, compression_ratio, column_metadata, }) } } impl RowGroupColumnMetadata { pub fn from_file_reader( reader: &SerializedFileReader, rg_idx: usize, col_idx: usize, ) -> Result> { let rg_md = reader.metadata().row_group(rg_idx); let column_chunk: &ColumnChunkMetaData = rg_md.column(col_idx); let mut page_reader = reader .get_row_group(rg_idx)? .get_column_page_reader(col_idx)?; let pages = Self::make_page_info(&mut page_reader)?; let statistics = RowGroupColumnStats::new(column_chunk.statistics()); Ok(RowGroupColumnMetadata { file_offset: column_chunk.file_offset() as u64, column_path: column_chunk.column_descr().path().to_string(), has_stats: HasStats { has_stats: column_chunk.statistics().is_some(), has_dictionary_page: column_chunk.dictionary_page_offset().is_some(), has_bloom_filter: column_chunk.bloom_filter_offset().is_some(), has_page_encoding_stats: column_chunk.page_encoding_stats().is_some() && !column_chunk.page_encoding_stats().unwrap().is_empty(), }, statistics, total_compressed_size: column_chunk.compressed_size(), total_uncompressed_size: column_chunk.uncompressed_size(), compression_type: column_chunk.compression().to_string(), pages, }) } fn make_page_info( page_reader: &mut Box, ) -> Result> { let mut page_info = Vec::new(); while let Ok(page) = page_reader.get_next_page() { if let Some(page) = page { page_info.push(PageInfo::from(&page)); } else { break; } } Ok(RowGroupPageInfo { page_infos: page_info, }) } } impl From<&Page> for PageInfo { fn from(page: &Page) -> Self { // Get the page reader for this column let page_type = match page.page_type() { PageType::DATA_PAGE => "Data Page".to_string(), PageType::INDEX_PAGE => "Index Page".to_string(), PageType::DICTIONARY_PAGE => "Dictionary Page".to_string(), PageType::DATA_PAGE_V2 => "Data Page V2".to_string(), }; let encoding: String = match page.encoding() { Encoding::PLAIN => "Plain".to_string(), Encoding::PLAIN_DICTIONARY => "Plain Dictionary".to_string(), Encoding::RLE => "RLE".to_string(), Encoding::DELTA_BINARY_PACKED => "Delta Binary Packed".to_string(), Encoding::DELTA_LENGTH_BYTE_ARRAY => "Delta Length Byte Array".to_string(), Encoding::DELTA_BYTE_ARRAY => "Delta Byte Array".to_string(), Encoding::RLE_DICTIONARY => "RLE Dictionary".to_string(), Encoding::BYTE_STREAM_SPLIT => "Byte Stream Split".to_string(), _ => format!("{:?}", page.encoding()), // Handle any other encoding types }; PageInfo { page_type, size: page.buffer().len(), rows: page.num_values() as usize, encoding, } } } macro_rules! extract_stat_value { ($stats:expr, $method:ident) => { match $stats { Statistics::Boolean(s) => s.$method().map(|v| v.to_string()), Statistics::Int32(s) => s.$method().map(|v| v.to_string()), Statistics::Int64(s) => s.$method().map(|v| v.to_string()), Statistics::Int96(s) => s.$method().map(|v| format!("{:?}", v)), Statistics::Float(s) => s.$method().map(|v| v.to_string()), Statistics::Double(s) => s.$method().map(|v| v.to_string()), Statistics::ByteArray(s) => s.$method().and_then(|bytes| { std::str::from_utf8(bytes.data()) .ok() .map(|s| s.to_string()) }), Statistics::FixedLenByteArray(s) => s.$method().and_then(|bytes| { std::str::from_utf8(bytes.data()) .ok() .map(|s| s.to_string()) }), } }; } impl RowGroupColumnStats { fn new(stats: Option<&Statistics>) -> Option { stats.map(|stats| Self { min: extract_stat_value!(stats, min_opt), max: extract_stat_value!(stats, max_opt), null_count: stats.null_count_opt(), distinct_count: stats.distinct_count_opt(), }) } } ================================================ FILE: src/file/sample_data.rs ================================================ use polars::prelude::*; #[derive(Debug, Clone)] pub struct ParquetSampleData { pub flattened_columns: Vec, pub rows: Vec>, pub total_columns: usize, pub total_rows: usize, } // TODO: in future create a independent crate that does the parsing, // the polars crate is large and doesn't support complex nested types. impl ParquetSampleData { pub fn read_sample_data( file_path: &str, ) -> Result> { const MAX_ROWS: usize = 200; // Read parquet file using polars LazyFrame let df = LazyFrame::scan_parquet(PlPath::new(file_path), Default::default())? .limit(MAX_ROWS as u32) .collect()?; // Flatten struct columns let df = Self::flatten_struct_columns(df)?; // Get column names let flattened_columns: Vec = df .get_column_names() .iter() .map(|s| s.to_string()) .collect(); let total_columns = flattened_columns.len(); // Convert dataframe to rows of strings let mut rows = Vec::new(); for row_idx in 0..df.height() { let mut row = Vec::new(); for col in df.get_columns() { let series = col.as_materialized_series(); let value = Self::get_value_as_string(series, row_idx); row.push(value); } rows.push(row); } Ok(ParquetSampleData { total_columns, flattened_columns, rows, total_rows: df.height(), }) } fn flatten_struct_columns(df: DataFrame) -> Result> { // For now, we'll just return the dataframe as-is // Struct columns will be displayed with their string representation // TODO: Add proper struct flattening if needed Ok(df) } fn get_value_as_string(col: &Series, row_idx: usize) -> String { // Use get() which returns AnyValue and handle it match col.get(row_idx) { Ok(any_value) => { if any_value.is_null() { "NULL".to_string() } else { format!("{any_value}") } } Err(_) => "NULL".to_string(), } } } ================================================ FILE: src/file/schema.rs ================================================ use std::collections::HashSet; use parquet::basic::{LogicalType, TimeUnit, Type as PhysicalType}; use parquet::file::metadata::ParquetMetaData; use parquet::schema::types::Type as ParquetType; use ratatui::{ style::{Color, Stylize}, widgets::{Cell, Row}, }; use crate::file::utils::format_size; #[derive(Debug, Clone)] pub struct ColumnStats { pub min: Option, pub max: Option, pub nulls: u64, pub distinct: Option, pub total_compressed_size: u64, pub total_uncompressed_size: u64, } #[derive(Clone)] pub struct ColumnSchemaInfo { pub name: String, pub repetition: String, pub physical: String, pub logical: String, pub codec: String, pub converted_type: String, pub encoding: String, pub dictionary_values: Option>, } #[derive(Clone)] pub enum SchemaInfo { Root { name: String, display: String, }, Primitive { name: String, display: String, info: Box, stats: ColumnStats, }, Group { name: String, display: String, repetition: String, }, } // TODO: Add Dictionary Values pub struct FileSchema { pub columns: Vec, } impl FileSchema { pub fn from_metadata(md: &ParquetMetaData) -> Result> { let schema_descr: &parquet::schema::types::SchemaDescriptor = md.file_metadata().schema_descr(); let root = schema_descr.root_schema(); // Pre-compute codec + encoding summary for every leaf column let mut summaries: Vec<(String, String)> = Vec::new(); for (col_idx, _) in schema_descr.columns().iter().enumerate() { // use std::collections::BTreeSet; let mut codecs: HashSet = HashSet::new(); let mut encs: HashSet = HashSet::new(); md.row_groups().iter().for_each(|rg| { let col_chunk = rg.column(col_idx); codecs.insert(format!("{:?}", col_chunk.compression())); encs.extend(col_chunk.encodings().iter().map(|enc| format!("{enc:?}"))); }); let codec_summary = codecs.into_iter().collect::>().join(", "); let enc_summary = encs.into_iter().collect::>().join(", "); summaries.push((codec_summary, enc_summary)); } let mut lines: Vec = Vec::new(); lines.push(SchemaInfo::Root { name: "root".to_string(), display: "└─ root".to_string(), }); let children = root.get_fields(); let count = children.len(); let mut leaf_idx: usize = 0; for (idx, child) in children.iter().enumerate() { traverse( child.as_ref(), " ".to_string(), idx == count - 1, &mut lines, &mut leaf_idx, &summaries, md, ); } Ok(FileSchema { columns: lines }) } pub fn column_group_name(&self, index: usize) -> String { match self.columns.get(index).unwrap() { SchemaInfo::Primitive { name, .. } => name.clone(), SchemaInfo::Group { name, .. } => name.clone(), _ => unreachable!(), } } pub fn column_size(&self) -> usize { self.columns .iter() .filter(|c| matches!(c, SchemaInfo::Primitive { .. })) .count() } pub fn tree_width(&self) -> usize { self.columns .iter() .map(|c| match c { SchemaInfo::Root { display, .. } => display.len(), SchemaInfo::Primitive { display, .. } => display.len(), SchemaInfo::Group { display, .. } => display.len(), }) .max() .unwrap_or(0) .max(24) // max for the bottom of the chart } pub fn primitive_column_names(&self) -> Vec { self.columns .iter() .filter(|c| matches!(c, SchemaInfo::Primitive { .. })) .map(|c| match c { SchemaInfo::Primitive { name, .. } => name.clone(), _ => unreachable!(), }) .collect() } pub fn generate_table_rows(&self, selected_index: Option) -> Vec> { let mut primitive_index = 1; // Start counting primitives from 1 (like app does) self.columns .iter() .filter_map(|col| { if let SchemaInfo::Primitive { info, stats, .. } = col { let compression_ratio = if stats.total_uncompressed_size > 0 { format!( "{:.2}x", stats.total_uncompressed_size as f64 / stats.total_compressed_size as f64 ) } else { "N/A".to_string() }; let is_selected = selected_index == Some(primitive_index); let mut row = Row::new([ Cell::from(info.repetition.clone()), Cell::from(info.physical.clone()), Cell::from(format_size(stats.total_compressed_size)), Cell::from(format_size(stats.total_uncompressed_size)), Cell::from(compression_ratio), Cell::from(info.encoding.clone()), Cell::from(info.codec.clone()), Cell::from(stats.min.clone().unwrap_or_else(|| "NULL".to_string())), Cell::from(stats.max.clone().unwrap_or_else(|| "NULL".to_string())), Cell::from(stats.nulls.to_string()), ]); if is_selected { row = row.style( ratatui::style::Style::default() .bg(Color::Yellow) .fg(Color::Black), ); } primitive_index += 1; Some(row) } else if let SchemaInfo::Group { repetition, .. } = col { let row = Row::new(vec![ Cell::from(repetition.clone().green()), Cell::from("group".green()), ]); Some(row) } else { None } }) .collect() } pub fn generate_table_rows_with_columns( &self, selected_index: usize, start_col: usize, num_cols: usize, ) -> (Vec>, Vec) { self.generate_table_rows_with_scroll( selected_index, start_col, num_cols, 0, self.columns.len(), ) } pub fn generate_table_rows_with_scroll( &self, selected_index: usize, start_col: usize, num_cols: usize, start_row: usize, num_rows: usize, ) -> (Vec>, Vec) { let mut primitive_index = 1; // Start counting primitives from 1 (like app does) let mut column_widths = vec![0usize; num_cols]; let rows = self .columns .iter() .enumerate() .skip(start_row + 1) .take(num_rows) .filter_map(|(_col_idx, col)| { if let SchemaInfo::Primitive { info, stats, .. } = col { let compression_ratio = if stats.total_uncompressed_size > 0 { format!( "{:.2}x", stats.total_uncompressed_size as f64 / stats.total_compressed_size as f64 ) } else { "N/A".to_string() }; let is_selected = selected_index > 0 && (selected_index - start_row) == primitive_index; // Create all cells first let all_cells = vec![ info.repetition.clone(), info.physical.clone(), format_size(stats.total_compressed_size), format_size(stats.total_uncompressed_size), compression_ratio, info.encoding.clone(), info.codec.clone(), stats.min.clone().unwrap_or_else(|| "NULL".to_string()), stats.max.clone().unwrap_or_else(|| "NULL".to_string()), stats.nulls.to_string(), ]; // Select only the visible columns and track their content lengths let visible_cell_contents: Vec<_> = all_cells .into_iter() .skip(start_col) .take(num_cols) .collect(); // Update column widths with the maximum seen so far for (col_idx, content) in visible_cell_contents.iter().enumerate() { column_widths[col_idx] = column_widths[col_idx].max(content.len()); } // Create cells from the content let visible_cells: Vec<_> = visible_cell_contents.into_iter().map(Cell::from).collect(); let mut row = Row::new(visible_cells); if is_selected { row = row.style( ratatui::style::Style::default() .bg(Color::Yellow) .fg(Color::Black), ); } primitive_index += 1; Some(row) } else if let SchemaInfo::Group { repetition, .. } = col { let all_cells = vec![ repetition.clone(), "group".to_string(), "".to_string(), "".to_string(), "".to_string(), "".to_string(), "".to_string(), "".to_string(), "".to_string(), "".to_string(), ]; let visible_cell_contents: Vec<_> = all_cells .into_iter() .skip(start_col) .take(num_cols) .collect(); // Update column widths with the maximum seen so far for (col_idx, content) in visible_cell_contents.iter().enumerate() { column_widths[col_idx] = column_widths[col_idx].max(content.len()); } let visible_cells: Vec<_> = visible_cell_contents .into_iter() .enumerate() .map(|(idx, content)| { if idx == 0 || idx == 1 { Cell::from(content.green()) } else { Cell::from(content) } }) .collect(); let row = Row::new(visible_cells); Some(row) } else { None } }) .collect(); (rows, column_widths) } } fn traverse( node: &ParquetType, prefix: String, is_last: bool, lines: &mut Vec, leaf_idx: &mut usize, summaries: &Vec<(String, String)>, md: &ParquetMetaData, ) { let connector: &'static str = if is_last { "└─" } else { "├─" }; let line = format!("{}{} {}", prefix, connector, node.name()); if node.is_primitive() { let repetition = format!("{:?}", node.get_basic_info().repetition()); let physical = format!("{:?}", node.get_physical_type()); let logical = match node.get_basic_info().logical_type() { Some(logical_type) => logical_type_to_string(&logical_type), None => String::new(), }; let (codec_sum, enc_sum) = &summaries[*leaf_idx]; let stats = aggregate_column_stats(md, *leaf_idx, node.get_physical_type()); let info = ColumnSchemaInfo { name: node.name().to_string(), repetition: repetition.clone(), physical: physical.clone(), logical: logical.clone(), codec: codec_sum.clone(), encoding: enc_sum.clone(), converted_type: node.get_basic_info().converted_type().to_string(), dictionary_values: None, }; lines.push(SchemaInfo::Primitive { name: node.name().to_string(), display: line, info: Box::new(info), stats, }); *leaf_idx += 1; } else { lines.push(SchemaInfo::Group { name: node.name().to_string(), display: line, repetition: format!("{:?}", node.get_basic_info().repetition()), }); } if node.is_group() { let fields = node.get_fields(); let count = fields.len(); for (idx, child) in fields.iter().enumerate() { let next_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); traverse( child.as_ref(), next_prefix, idx == count - 1, lines, leaf_idx, summaries, md, ); } } } /// Efficiently aggregate column statistics across all row groups fn aggregate_column_stats( md: &ParquetMetaData, col_idx: usize, physical: PhysicalType, ) -> ColumnStats { let (min_bytes, max_bytes, nulls, distinct, total_compressed_size, total_uncompressed_size) = md.row_groups().iter().fold( ( None::>, None::>, 0u64, None::, 0u64, 0u64, ), |( mut min_bytes, mut max_bytes, mut nulls, mut distinct, mut compressed, mut uncompressed, ), rg| { let col_meta = rg.column(col_idx); if let Some(stats) = col_meta.statistics() { nulls += stats.null_count_opt().unwrap_or(0); distinct = Some(distinct.unwrap_or(0) + stats.distinct_count_opt().unwrap_or(0)); if let Some(min_b) = stats.min_bytes_opt() && min_bytes.as_ref().is_none_or(|mb| min_b < &mb[..]) { min_bytes = Some(min_b.to_vec()); } if let Some(max_b) = stats.max_bytes_opt() && max_bytes.as_ref().is_none_or(|mb| max_b > &mb[..]) { max_bytes = Some(max_b.to_vec()); } } compressed += col_meta.compressed_size() as u64; uncompressed += col_meta.uncompressed_size() as u64; ( min_bytes, max_bytes, nulls, distinct, compressed, uncompressed, ) }, ); ColumnStats { min: min_bytes.as_deref().map(|b| decode_value(b, physical)), max: max_bytes.as_deref().map(|b| decode_value(b, physical)), nulls, distinct, total_compressed_size, total_uncompressed_size, } } /// Decode raw statistics bytes into a readable value based on the physical type fn decode_value(bytes: &[u8], physical: PhysicalType) -> String { match physical { PhysicalType::INT32 if bytes.len() == 4 => { i32::from_le_bytes(bytes.try_into().unwrap()).to_string() } PhysicalType::INT64 if bytes.len() == 8 => { i64::from_le_bytes(bytes.try_into().unwrap()).to_string() } PhysicalType::FLOAT if bytes.len() == 4 => { format!("{:.4}", f32::from_le_bytes(bytes.try_into().unwrap())) } PhysicalType::DOUBLE if bytes.len() == 8 => { format!("{:.4}", f64::from_le_bytes(bytes.try_into().unwrap())) } PhysicalType::BYTE_ARRAY | PhysicalType::FIXED_LEN_BYTE_ARRAY => std::str::from_utf8(bytes) .map_or_else( |_| { bytes .iter() .map(|b| format!("{b:02X}")) .collect::>() .join("") }, |s| s.to_string(), ), _ => bytes .iter() .map(|b| format!("{b:02X}")) .collect::>() .join(""), } } fn logical_type_to_string(logical_type: &LogicalType) -> String { match logical_type { LogicalType::Decimal { scale, precision } => { format!("Decimal({scale},{precision})") } LogicalType::Integer { bit_width, is_signed, } => format!( "Integer({bit_width},{})", if *is_signed { "sign" } else { "unsign" } ), LogicalType::Time { is_adjusted_to_u_t_c, unit, } => match unit { TimeUnit::MILLIS(_) => format!( "Time({}, millis)", if *is_adjusted_to_u_t_c { "utc" } else { "local" } ), TimeUnit::MICROS(_) => format!( "Time({}, micros)", if *is_adjusted_to_u_t_c { "utc" } else { "local" } ), TimeUnit::NANOS(_) => format!( "Time({}, nanos)", if *is_adjusted_to_u_t_c { "utc" } else { "local" } ), }, LogicalType::Timestamp { is_adjusted_to_u_t_c, unit, } => match unit { TimeUnit::MILLIS(_) => format!( "Timestamp({}, millis)", if *is_adjusted_to_u_t_c { "utc" } else { "local" } ), TimeUnit::MICROS(_) => format!( "Timestamp({}, micros)", if *is_adjusted_to_u_t_c { "utc" } else { "local" } ), TimeUnit::NANOS(_) => format!( "Timestamp({}, nanos)", if *is_adjusted_to_u_t_c { "utc" } else { "local" } ), }, _ => format!("{logical_type:?}"), } } #[cfg(test)] mod tests { use super::*; use parquet::file::reader::{FileReader, SerializedFileReader}; use std::fs::File; fn load_alltypes_schema() -> FileSchema { let path = format!( "{}/alltypes_plain.parquet", crate::file::parquet_test_data(), ); let file = File::open(path).unwrap(); let reader = SerializedFileReader::try_from(file).unwrap(); let metadata = reader.metadata(); FileSchema::from_metadata(metadata).unwrap() } #[test] fn test_file_alltypes_plain_basic() { let file_schema = load_alltypes_schema(); // Test basic metrics assert_eq!(11, file_schema.column_size()); assert_eq!(25, file_schema.tree_width()); // Should have root + 11 primitive columns = 12 total assert!(file_schema.columns.len() >= 12); } #[test] fn test_primitive_column_names() { let file_schema = load_alltypes_schema(); let names = file_schema.primitive_column_names(); // alltypes_plain.parquet has these columns assert_eq!(11, names.len()); assert!(names.contains(&"id".to_string())); assert!(names.contains(&"bool_col".to_string())); assert!(names.contains(&"tinyint_col".to_string())); assert!(names.contains(&"smallint_col".to_string())); assert!(names.contains(&"int_col".to_string())); assert!(names.contains(&"bigint_col".to_string())); assert!(names.contains(&"float_col".to_string())); assert!(names.contains(&"double_col".to_string())); assert!(names.contains(&"date_string_col".to_string())); assert!(names.contains(&"string_col".to_string())); assert!(names.contains(&"timestamp_col".to_string())); } #[test] fn test_column_group_name() { let file_schema = load_alltypes_schema(); // First column after root should be 'id' assert_eq!("id", file_schema.column_group_name(1)); // Get a few more column names let names = file_schema.primitive_column_names(); assert!(!names.is_empty()); } #[test] fn test_column_stats() { let file_schema = load_alltypes_schema(); // Find the 'id' column and check its stats for col in &file_schema.columns { if let SchemaInfo::Primitive { name, stats, .. } = col && name == "id" { // alltypes_plain has 8 rows with id from 0 to 7 assert_eq!(stats.min, None); assert_eq!(stats.max, None); assert_eq!(stats.nulls, 0); // Should have compression stats assert!(stats.total_compressed_size > 0); assert!(stats.total_uncompressed_size > 0); } } } #[test] fn test_column_schema_info() { let file_schema = load_alltypes_schema(); // Check the schema info for specific columns for col in &file_schema.columns { if let SchemaInfo::Primitive { name, info, .. } = col { match name.as_str() { "id" => { assert_eq!(info.physical, "INT32"); assert!(!info.repetition.is_empty()); } "bool_col" => { assert_eq!(info.physical, "BOOLEAN"); } "float_col" => { assert_eq!(info.physical, "FLOAT"); } "double_col" => { assert_eq!(info.physical, "DOUBLE"); } "bigint_col" => { assert_eq!(info.physical, "INT64"); } "string_col" | "date_string_col" => { assert_eq!(info.physical, "BYTE_ARRAY"); } _ => {} } } } } #[test] fn test_generate_table_rows() { let file_schema = load_alltypes_schema(); // Generate rows with no selection let rows = file_schema.generate_table_rows(None); // Should have 11 primitive columns assert_eq!(11, rows.len()); // Generate rows with selection let rows_selected = file_schema.generate_table_rows(Some(1)); assert_eq!(11, rows_selected.len()); } #[test] fn test_generate_table_rows_with_columns() { let file_schema = load_alltypes_schema(); // Test with different column ranges let (rows, widths) = file_schema.generate_table_rows_with_columns(1, 0, 5); // Should have rows for primitive columns assert!(!rows.is_empty()); // Should have width info for 5 columns assert_eq!(5, widths.len()); // All widths should be non-zero (content should exist) for width in &widths { assert!(*width > 0); } } #[test] fn test_generate_table_rows_with_scroll() { let file_schema = load_alltypes_schema(); // Test scrolling with start_row and limited rows let (rows, widths) = file_schema.generate_table_rows_with_scroll( 1, // selected_index 0, // start_col 10, // num_cols 0, // start_row 5, // num_rows (limit to 5) ); // Should have at most 5 rows assert!(rows.len() <= 5); // Should have width info for 10 columns assert_eq!(10, widths.len()); } #[test] fn test_schema_info_types() { let file_schema = load_alltypes_schema(); // First item should be root assert!(matches!(&file_schema.columns[0], SchemaInfo::Root { .. })); // Count different types let mut root_count = 0; let mut primitive_count = 0; let mut group_count = 0; for col in &file_schema.columns { match col { SchemaInfo::Root { .. } => root_count += 1, SchemaInfo::Primitive { .. } => primitive_count += 1, SchemaInfo::Group { .. } => group_count += 1, } } assert_eq!(1, root_count); assert_eq!(11, primitive_count); // alltypes_plain doesn't have nested groups (flat schema) assert_eq!(0, group_count); } #[test] fn test_column_display_strings() { let file_schema = load_alltypes_schema(); // Check that display strings are properly formatted for col in &file_schema.columns { match col { SchemaInfo::Root { display, .. } => { assert!(display.contains("root")); } SchemaInfo::Primitive { name, display, .. } => { assert!(display.contains(name)); } SchemaInfo::Group { name, display, .. } => { assert!(display.contains(name)); } } } } #[test] fn test_compression_ratio_calculation() { let file_schema = load_alltypes_schema(); // Check that compression ratios are calculated correctly for col in &file_schema.columns { if let SchemaInfo::Primitive { stats, .. } = col && stats.total_uncompressed_size > 0 && stats.total_compressed_size > 0 { let ratio = stats.total_uncompressed_size as f64 / stats.total_compressed_size as f64; // Compression ratio should be reasonable (between 0.5x and 10x) assert!(ratio > 0.5 && ratio < 10.0); } } } #[test] fn test_decode_value_int32() { let value = decode_value(&[42, 0, 0, 0], PhysicalType::INT32); assert_eq!(value, "42"); let negative = decode_value(&[255, 255, 255, 255], PhysicalType::INT32); assert_eq!(negative, "-1"); } #[test] fn test_decode_value_int64() { let value = decode_value(&[42, 0, 0, 0, 0, 0, 0, 0], PhysicalType::INT64); assert_eq!(value, "42"); } #[test] fn test_decode_value_float() { let bytes = std::f32::consts::PI.to_le_bytes(); let value = decode_value(&bytes, PhysicalType::FLOAT); assert!(value.starts_with("3.14")); } #[test] fn test_decode_value_double() { let bytes = std::f64::consts::PI.to_le_bytes(); let value = decode_value(&bytes, PhysicalType::DOUBLE); assert!(value.starts_with("3.141")); } #[test] fn test_decode_value_byte_array() { let text = "hello"; let value = decode_value(text.as_bytes(), PhysicalType::BYTE_ARRAY); assert_eq!(value, "hello"); // Test non-UTF8 bytes (should return hex) let binary = [0xFF, 0xFE, 0xFD]; let value = decode_value(&binary, PhysicalType::BYTE_ARRAY); assert_eq!(value, "FFFEFD"); } #[test] fn test_logical_type_to_string() { // Test Decimal let decimal = LogicalType::Decimal { scale: 2, precision: 10, }; assert_eq!(logical_type_to_string(&decimal), "Decimal(2,10)"); // Test Integer let integer = LogicalType::Integer { bit_width: 32, is_signed: true, }; assert_eq!(logical_type_to_string(&integer), "Integer(32,sign)"); let unsigned = LogicalType::Integer { bit_width: 16, is_signed: false, }; assert_eq!(logical_type_to_string(&unsigned), "Integer(16,unsign)"); } } ================================================ FILE: src/file/utils.rs ================================================ /// Convert a byte count into a human-readable string (e.g. "2.3 MB"). pub fn human_readable_bytes(bytes: u64) -> String { const UNITS: [&str; 5] = ["B", "KiB", "MiB", "GiB", "TiB"]; let mut size = bytes as f64; let mut unit = 0; while size >= 1024.0 && unit < UNITS.len() - 1 { size /= 1024.0; unit += 1; } if unit == 0 { format!("{:.0} {}", size, UNITS[unit]) } else { format!("{:.2} {}", size, UNITS[unit]) } } /// Convert a plain count into a human-readable string with K / M / B suffixes. pub fn human_readable_count(n: u64) -> String { const UNITS: [&str; 4] = ["", "K", "M", "B"]; // up to billions let mut unit = 0; let mut value = n as f64; while value >= 1000.0 && unit < UNITS.len() - 1 { value /= 1000.0; unit += 1; } if unit == 0 { format!("{n}") } else { format!("{:.1} {}", value, UNITS[unit]) } } pub fn truncate_str(s: &str, width: usize) -> String { if s.chars().count() > width { let truncated: String = s.chars().take(width - 1).collect(); format!("{truncated}…") } else { s.to_string() } } pub fn commas(n: u64) -> String { let s = n.to_string(); let mut out = String::with_capacity(s.len() + s.len() / 3); for (i, ch) in s.chars().rev().enumerate() { if i > 0 && i % 3 == 0 { out.push(','); } out.push(ch); } out.chars().rev().collect() } /// Format byte size into human-readable format pub fn format_size(bytes: u64) -> String { const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"]; let mut size = bytes as f64; let mut unit_index = 0; while size >= 1024.0 && unit_index < UNITS.len() - 1 { size /= 1024.0; unit_index += 1; } if unit_index == 0 { format!("{} {}", bytes, UNITS[unit_index]) } else { format!("{:.1} {}", size, UNITS[unit_index]) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_human_readable_bytes() { assert_eq!(human_readable_bytes(0), "0 B"); assert_eq!(human_readable_bytes(500), "500 B"); assert_eq!(human_readable_bytes(1024), "1.00 KiB"); assert_eq!(human_readable_bytes(1536), "1.50 KiB"); assert_eq!(human_readable_bytes(1024 * 1024), "1.00 MiB"); assert_eq!(human_readable_bytes(1024 * 1024 * 1024), "1.00 GiB"); assert_eq!(human_readable_bytes(1024 * 1024 * 1024 * 1024), "1.00 TiB"); assert_eq!(human_readable_bytes(2500 * 1024 * 1024), "2.44 GiB"); } #[test] fn test_human_readable_count() { assert_eq!(human_readable_count(0), "0"); assert_eq!(human_readable_count(500), "500"); assert_eq!(human_readable_count(999), "999"); assert_eq!(human_readable_count(1000), "1.0 K"); assert_eq!(human_readable_count(1500), "1.5 K"); assert_eq!(human_readable_count(1_000_000), "1.0 M"); assert_eq!(human_readable_count(1_500_000), "1.5 M"); assert_eq!(human_readable_count(1_000_000_000), "1.0 B"); assert_eq!(human_readable_count(2_500_000_000), "2.5 B"); } #[test] fn test_truncate_str() { assert_eq!(truncate_str("hello", 10), "hello"); assert_eq!(truncate_str("hello", 5), "hello"); assert_eq!(truncate_str("hello world", 8), "hello w…"); assert_eq!(truncate_str("hello world", 6), "hello…"); assert_eq!(truncate_str("", 5), ""); assert_eq!(truncate_str("a", 1), "a"); assert_eq!(truncate_str("ab", 1), "…"); assert_eq!(truncate_str("hello", 3), "he…"); } #[test] fn test_commas() { assert_eq!(commas(0), "0"); assert_eq!(commas(100), "100"); assert_eq!(commas(999), "999"); assert_eq!(commas(1000), "1,000"); assert_eq!(commas(1234), "1,234"); assert_eq!(commas(1234567), "1,234,567"); assert_eq!(commas(1_000_000), "1,000,000"); assert_eq!(commas(1_234_567_890), "1,234,567,890"); } #[test] fn test_format_size() { assert_eq!(format_size(0), "0 B"); assert_eq!(format_size(500), "500 B"); assert_eq!(format_size(1024), "1.0 KB"); assert_eq!(format_size(1536), "1.5 KB"); assert_eq!(format_size(1024 * 1024), "1.0 MB"); assert_eq!(format_size(1024 * 1024 * 1024), "1.0 GB"); assert_eq!(format_size(1024 * 1024 * 1024 * 1024), "1.0 TB"); } #[test] fn test_edge_cases() { // Test maximum values assert!(human_readable_bytes(u64::MAX).contains("TiB")); assert!(human_readable_count(u64::MAX).contains("B")); // Test empty string truncation assert_eq!(truncate_str("", 0), ""); // Test single digit comma formatting assert_eq!(commas(1), "1"); } #[test] fn test_unicode_truncation() { // Test with unicode characters assert_eq!(truncate_str("hello 🌍 world", 10), "hello 🌍 w…"); assert_eq!(truncate_str("日本語", 2), "日…"); } } ================================================ FILE: src/lib.rs ================================================ pub mod app; pub mod components; pub mod file; pub mod tabs; pub mod ui; pub use app::App; ================================================ FILE: src/main.rs ================================================ use parqeye::app::App; use parqeye::file::parquet_ctx::ParquetCtx; use std::io; use clap::Parser; #[derive(Parser)] #[command( author, version, about = "Command line tool to visualize parquet files" )] pub struct Opts { /// Path to the parquet file pub path: String, } fn main() -> io::Result<()> { let opts = Opts::parse(); tui(&opts.path)?; Ok(()) } fn tui(path: &str) -> io::Result<()> { let mut terminal = ratatui::init(); let file_info = ParquetCtx::from_file(path).map_err(|e| io::Error::other(e.to_string()))?; let mut app = App::new(&file_info); app.run(&mut terminal)?; ratatui::restore(); Ok(()) } ================================================ FILE: src/tabs/manager.rs ================================================ use crate::file::Renderable; use ratatui::buffer::Buffer; use ratatui::layout::Rect; use ratatui::style::Stylize; use ratatui::text::Line; use ratatui::widgets::Tabs; use ratatui::widgets::Widget; use crate::tabs::Tab; use crate::tabs::metadata::MetadataTab; use crate::tabs::row_groups::RowGroupsTab; use crate::tabs::schema::SchemaTab; use crate::tabs::visualize::VisualizeTab; pub struct TabManager { pub tabs: Vec>, pub active_tab: usize, pub title: String, } impl TabManager { pub fn new(num_columns: usize, num_row_groups: usize, sample_data_rows: usize) -> Self { Self { tabs: vec![ Box::new( VisualizeTab::new() .with_max_horizontal_scroll(num_columns) .with_max_rows(sample_data_rows), ), Box::new( MetadataTab::new() .with_max_horizontal_scroll(num_columns) .with_max_vertical_scroll(num_row_groups), ), Box::new(SchemaTab::new().with_max_vertical_scroll(num_columns)), Box::new( RowGroupsTab::new() .with_max_horizontal_scroll(num_row_groups - 1) .with_max_vertical_scroll(num_columns), ), ], active_tab: 0, title: "Tabs".to_string(), } } pub fn next(&mut self) { self.active_tab = (self.active_tab + 1) % self.tabs.len(); } pub fn prev(&mut self) { if self.active_tab == 0 { self.active_tab = self.tabs.len() - 1; } else { self.active_tab = (self.active_tab.saturating_sub(1)) % self.tabs.len(); } } #[allow(clippy::borrowed_box)] pub fn active_tab(&self) -> &Box { &self.tabs[self.active_tab] } pub fn render_instructions(&self, area: Rect, buf: &mut Buffer) { let mut span = self.active_tab().instructions(); if !span.is_empty() { span.push(" - ".into()); } span.extend(vec![ "[Tab]".green(), " Next Tab".into(), ", ".into(), "[Q]".blue(), "uit".into(), ]); let line = Line::from(span); // Calculate the width of the instruction text let instruction_width = line.width() as u16; // Create a layout that positions the instructions on the right use ratatui::layout::{Constraint, Layout}; let [_, instruction_area] = Layout::horizontal([Constraint::Fill(1), Constraint::Length(instruction_width)]) .areas(area); line.render(instruction_area, buf); } } impl Renderable for TabManager { fn render_content(&self, area: Rect, buf: &mut Buffer) { let tab_titles: Vec = self .tabs .iter() .map(|t| Line::from(t.to_string())) .collect(); let tabs_widget: Tabs<'_> = Tabs::new(tab_titles) .select(self.active_tab) .padding(" ", " ") .divider(" "); tabs_widget.render(area, buf); } } ================================================ FILE: src/tabs/metadata.rs ================================================ use crossterm::event::KeyEvent; use std::io; use crate::{app::AppState, tabs::Tab}; use ratatui::text::Span; pub struct MetadataTab { pub max_horizontal_scroll: Option, pub max_vertical_scroll: Option, } impl Default for MetadataTab { fn default() -> Self { Self::new() } } impl MetadataTab { pub fn new() -> Self { Self { max_horizontal_scroll: None, max_vertical_scroll: None, } } pub fn with_max_horizontal_scroll(mut self, max_horizontal_scroll: usize) -> Self { self.max_horizontal_scroll = Some(max_horizontal_scroll); self } pub fn with_max_vertical_scroll(mut self, max_vertical_scroll: usize) -> Self { self.max_vertical_scroll = Some(max_vertical_scroll); self } } impl Tab for MetadataTab { #[allow(unused_variables)] fn on_event(&self, key_event: KeyEvent, state: &mut AppState) -> Result<(), io::Error> { Ok(()) } fn instructions(&self) -> Vec> { vec![] } fn to_string(&self) -> String { "Metadata".to_string() } } ================================================ FILE: src/tabs/mod.rs ================================================ pub mod manager; pub mod metadata; pub mod row_groups; pub mod schema; pub mod visualize; pub use manager::TabManager; pub use metadata::MetadataTab; pub use schema::SchemaTab; pub use visualize::VisualizeTab; use crate::app::AppState; use crossterm::event::KeyEvent; use ratatui::text::Span; use std::io; pub trait Tab { fn on_event(&self, key_event: KeyEvent, state: &mut AppState) -> Result<(), io::Error>; fn instructions(&self) -> Vec>; fn to_string(&self) -> String; } ================================================ FILE: src/tabs/row_groups.rs ================================================ use crate::{app::AppState, tabs::Tab}; use crossterm::event::{KeyCode, KeyEvent}; use ratatui::style::Stylize; use ratatui::text::Span; use std::io; pub struct RowGroupsTab { pub max_horizontal_scroll: Option, pub max_vertical_scroll: Option, } impl Default for RowGroupsTab { fn default() -> Self { Self::new() } } impl RowGroupsTab { pub fn new() -> Self { Self { max_horizontal_scroll: None, max_vertical_scroll: None, } } pub fn with_max_horizontal_scroll(mut self, max_horizontal_scroll: usize) -> Self { self.max_horizontal_scroll = Some(max_horizontal_scroll); self } pub fn with_max_vertical_scroll(mut self, max_vertical_scroll: usize) -> Self { self.max_vertical_scroll = Some(max_vertical_scroll); self } } impl Tab for RowGroupsTab { fn on_event(&self, key_event: KeyEvent, state: &mut AppState) -> Result<(), io::Error> { match key_event.code { KeyCode::Up if state.vertical_offset() > 0 => state.up(), KeyCode::Down if state.vertical_offset() < self.max_vertical_scroll.unwrap_or(usize::MAX) => { state.down() } KeyCode::Left if state.horizontal_offset() > 0 => state.left(), KeyCode::Right if state.horizontal_offset() < self.max_horizontal_scroll.unwrap_or(usize::MAX) => { state.right() } _ => {} } Ok(()) } fn instructions(&self) -> Vec> { vec![ "→".green(), "/".white(), "←".blue(), " : ".into(), "Iterate Row Groups".into(), ", ".into(), "↑".green(), "/".white(), "↓".blue(), " : ".into(), "Schema".into(), ] } fn to_string(&self) -> String { "Row Groups".to_string() } } ================================================ FILE: src/tabs/schema.rs ================================================ use crossterm::event::{KeyCode, KeyEvent}; use ratatui::style::Stylize; use ratatui::text::Span; use std::io; use crate::{app::AppState, tabs::Tab}; pub struct SchemaTab { pub max_horizontal_scroll: Option, pub max_vertical_scroll: Option, } impl Default for SchemaTab { fn default() -> Self { Self::new() } } impl SchemaTab { pub fn new() -> Self { Self { max_horizontal_scroll: None, max_vertical_scroll: None, } } pub fn with_max_horizontal_scroll(mut self, max_horizontal_scroll: usize) -> Self { self.max_horizontal_scroll = Some(max_horizontal_scroll); self } pub fn with_max_vertical_scroll(mut self, max_vertical_scroll: usize) -> Self { self.max_vertical_scroll = Some(max_vertical_scroll); self } } impl Tab for SchemaTab { fn on_event(&self, key_event: KeyEvent, state: &mut AppState) -> Result<(), io::Error> { match key_event.code { KeyCode::Up if state.vertical_offset() > 0 => state.up(), KeyCode::Down if state.vertical_offset() < self.max_vertical_scroll.unwrap_or(usize::MAX) => { state.down() } KeyCode::Left if state.horizontal_offset() > 0 => state.left(), KeyCode::Right if state.horizontal_offset() < self.max_horizontal_scroll.unwrap_or(usize::MAX) => { state.right() } _ => {} } Ok(()) } fn instructions(&self) -> Vec> { vec![ "←".green(), "/".white(), "→".blue(), " : ".into(), "Scroll".into(), ", ".into(), "↑".green(), "/".white(), "↓".blue(), " : ".into(), "Schema".into(), ] } fn to_string(&self) -> String { "Schema".to_string() } } ================================================ FILE: src/tabs/visualize.rs ================================================ use crossterm::event::{KeyCode, KeyEvent}; use ratatui::style::Stylize; use ratatui::text::Span; use std::io; use crate::{app::AppState, tabs::Tab}; pub struct VisualizeTab { pub max_horizontal_scroll: Option, pub max_rows: Option, pub visible_rows: Option, } impl Default for VisualizeTab { fn default() -> Self { Self::new() } } impl VisualizeTab { pub fn new() -> Self { Self { max_horizontal_scroll: None, max_rows: None, visible_rows: None, } } pub fn with_max_horizontal_scroll(mut self, max_horizontal_scroll: usize) -> Self { self.max_horizontal_scroll = Some(max_horizontal_scroll); self } pub fn with_max_rows(mut self, max_rows: usize) -> Self { self.max_rows = Some(max_rows); self } pub fn with_visible_rows(mut self, visible_rows: usize) -> Self { self.visible_rows = Some(visible_rows); self } } impl Tab for VisualizeTab { fn on_event(&self, key_event: KeyEvent, state: &mut AppState) -> Result<(), io::Error> { let max_rows = self.max_rows.unwrap_or(0); let visible_rows = state.visible_data_rows(); match key_event.code { // Row navigation (Up/Down arrows) KeyCode::Up => { if state.vertical_offset() > 0 { state.up(); state.adjust_scroll_to_selection(visible_rows, max_rows); } } KeyCode::Down => { if state.vertical_offset() < max_rows.saturating_sub(1) { state.down(); state.adjust_scroll_to_selection(visible_rows, max_rows); } } // Page navigation (u/d keys) KeyCode::Char('u') | KeyCode::Char('U') => { state.page_up(visible_rows, max_rows); } KeyCode::Char('d') | KeyCode::Char('D') => { state.page_down(visible_rows, max_rows); } // Column navigation (Left/Right arrows) KeyCode::Left if state.horizontal_offset() > 0 => state.left(), KeyCode::Right if state.horizontal_offset() < self.max_horizontal_scroll.unwrap_or(usize::MAX) - 1 => { state.right() } _ => {} } Ok(()) } fn instructions(&self) -> Vec> { vec![ "↑".green(), "/".white(), "↓".blue(), " : ".into(), "Row".into(), " | ".white(), "→".green(), "/".white(), "←".blue(), " : ".into(), "Column".into(), " | ".white(), "u".green(), "/".white(), "d".blue(), " : ".into(), "Page".into(), ] } fn to_string(&self) -> String { "Visualize".to_string() } } ================================================ FILE: src/ui.rs ================================================ use ratatui::{ Frame, buffer::Buffer, layout::{Constraint, Layout, Rect}, prelude::Color, style::{Style, Stylize}, widgets::{Block, BorderType, Borders, Widget}, }; use crate::app::AppRenderView; use crate::components::{ DataTable, FileSchemaTable, RowGroupColumnMetadataComponent, RowGroupMetadata, RowGroupProgressBar, SchemaTreeComponent, ScrollbarComponent, }; use crate::file::Renderable; pub fn render_app<'a, 'b>(app: &'b AppRenderView<'a>, frame: &mut Frame) where 'b: 'a, { frame.render_widget(AppWidget(app), frame.area()); } struct AppWidget<'a>(&'a AppRenderView<'a>); impl<'a> AppWidget<'a> { // Helper function to calculate the tree index of the selected primitive column fn calculate_selected_tree_index(&self, vertical_offset: usize) -> Option { if vertical_offset == 0 { return None; } let primitive_to_schema_map: Vec = self .0 .parquet_ctx .schema .columns .iter() .enumerate() .filter_map(|(idx, line)| { matches!(line, crate::file::schema::SchemaInfo::Primitive { .. }).then_some(idx) }) .collect(); primitive_to_schema_map.get(vertical_offset - 1).copied() } // Helper function to calculate adjusted scroll offset to keep selected item visible fn calculate_scroll_to_show_item( &self, selected_tree_idx: Option, current_scroll: usize, visible_items: usize, ) -> usize { match selected_tree_idx { Some(idx) => { // Ensure selected item is visible if idx < current_scroll { idx } else if idx >= current_scroll + visible_items { idx.saturating_sub(visible_items - 1) } else { current_scroll } } None => current_scroll, } } // Calculate the adjusted scroll offset for the schema tree fn calculate_adjusted_scroll_offset(&self, visible_tree_items: usize) -> usize { let selected_tree_idx = self.calculate_selected_tree_index(self.0.state().vertical_offset()); self.calculate_scroll_to_show_item( selected_tree_idx, self.0.state().tree_scroll_offset(), visible_tree_items, ) } // Calculate the total width needed for the tree section (including scrollbar if needed) fn calculate_tree_width(&self, tree_width: u16, needs_scrollbar: bool) -> u16 { if needs_scrollbar { tree_width + 2 // +1 for scrollbar, +1 for spacing } else { tree_width + 1 } } // Calculate tree width for row groups view (slightly different spacing) fn calculate_tree_width_for_row_groups(&self, tree_width: u16, needs_scrollbar: bool) -> u16 { if needs_scrollbar { tree_width + 2 // +1 for scrollbar, +1 for spacing } else { tree_width } } // Render the schema tree section (tree + optional scrollbar #[allow(clippy::too_many_arguments)] fn render_schema_tree_section( &self, area: Rect, tree_width: u16, needs_scrollbar: bool, total_tree_items: usize, visible_tree_items: usize, adjusted_scroll: usize, buf: &mut Buffer, ) { if needs_scrollbar { let [tree_area, scrollbar_area] = Layout::horizontal([Constraint::Length(tree_width + 1), Constraint::Length(1)]) .areas(area); self.render_schema_tree_with_scroll(tree_area, adjusted_scroll, buf); ScrollbarComponent::vertical(total_tree_items, visible_tree_items, adjusted_scroll) .render(scrollbar_area, buf); } else { self.render_schema_tree_with_scroll(area, adjusted_scroll, buf); } } // Render the schema table fn render_schema_table(&self, area: Rect, adjusted_scroll: usize, buf: &mut Buffer) { FileSchemaTable::new(&self.0.parquet_ctx.schema) .with_selected_index(self.0.state().vertical_offset()) .with_horizontal_scroll(self.0.state().horizontal_offset()) .with_vertical_scroll(adjusted_scroll) .render(area, buf); } fn render_tabs_view(&self, area: Rect, buf: &mut Buffer) { let block = Block::default() .borders(Borders::ALL) .border_type(BorderType::Rounded) .border_style(Style::default().fg(Color::LightYellow)); let inner_area = block.inner(area); block.render(area, buf); let file_name_length = self.0.file_name().len() as u16; let [tabs_area, file_name_area] = Layout::horizontal([Constraint::Min(0), Constraint::Length(file_name_length)]) .areas(inner_area); self.0.tabs().render_content(tabs_area, buf); self.0.file_name().green().render(file_name_area, buf); } fn render_footer_view(&self, area: Rect, buf: &mut Buffer) { let title_width = self.0.title.len() as u16; let [title_area, footer_area] = Layout::horizontal([Constraint::Length(title_width), Constraint::Fill(1)]).areas(area); self.0.title.bold().fg(Color::Green).render(title_area, buf); self.0.tabs().render_instructions(footer_area, buf); } fn render_metadata_view(&self, area: Rect, buf: &mut Buffer) { // render the metadata self.0.parquet_ctx.metadata.render_content(area, buf); } fn render_schema_view(&self, area: Rect, buf: &mut Buffer) { let tree_width = self.0.parquet_ctx.schema.tree_width() as u16; let total_tree_items = self.0.parquet_ctx.schema.columns.len(); let visible_tree_items = area.height.saturating_sub(2) as usize; let needs_scrollbar = total_tree_items > visible_tree_items; let adjusted_scroll = self.calculate_adjusted_scroll_offset(visible_tree_items); let tree_total_width = self.calculate_tree_width(tree_width, needs_scrollbar); let [tree_container_area, central_area] = Layout::horizontal([Constraint::Length(tree_total_width), Constraint::Fill(1)]) .areas(area); self.render_schema_tree_section( tree_container_area, tree_width, needs_scrollbar, total_tree_items, visible_tree_items, adjusted_scroll, buf, ); self.render_schema_table(central_area, adjusted_scroll, buf); } fn render_schema_tree_with_scroll(&self, area: Rect, scroll_offset: usize, buf: &mut Buffer) { SchemaTreeComponent::new(&self.0.parquet_ctx.schema.columns) .with_title("Schema Tree".to_string()) .with_selected_index(self.0.state().vertical_offset()) .with_scroll_offset(scroll_offset) .render(area, buf); } fn render_row_groups_view(&self, area: Rect, buf: &mut Buffer) { let tree_width = self.0.parquet_ctx.schema.tree_width() as u16; let total_tree_items = self.0.parquet_ctx.schema.columns.len(); let visible_tree_items = area.height.saturating_sub(2) as usize; let needs_scrollbar = total_tree_items > visible_tree_items; let adjusted_scroll = self.calculate_adjusted_scroll_offset(visible_tree_items); let tree_total_width = self.calculate_tree_width_for_row_groups(tree_width, needs_scrollbar); let [tree_container_area, main_area] = Layout::horizontal([Constraint::Length(tree_total_width), Constraint::Fill(1)]) .areas(area); self.render_schema_tree_section( tree_container_area, tree_width, needs_scrollbar, total_tree_items, visible_tree_items, adjusted_scroll, buf, ); let [rg_progress, central_area] = Layout::vertical([Constraint::Length(3), Constraint::Fill(1)]).areas(main_area); RowGroupProgressBar::new( &self.0.parquet_ctx.row_groups.row_groups, self.0.state().horizontal_offset(), ) .render(rg_progress, buf); if self.0.state().vertical_offset() > 0 { RowGroupColumnMetadataComponent::new( &self.0.parquet_ctx.row_groups.row_groups[self.0.state().horizontal_offset()] .column_metadata[self.0.state().vertical_offset() - 1], ) .render(central_area, buf); } else { // Display row group level statistics and charts when no column is selected RowGroupMetadata::new( &self.0.parquet_ctx.row_groups.row_groups, &self.0.parquet_ctx.row_groups.avg_median_stats, self.0.state().horizontal_offset(), ) .render(central_area, buf); } } fn render_visualize_view(&self, area: Rect, buf: &mut Buffer) { DataTable::new(&self.0.parquet_ctx.sample_data) .with_horizontal_scroll(self.0.state().horizontal_offset()) .with_vertical_scroll(self.0.state().data_vertical_scroll()) .with_selected_row(Some(self.0.state().vertical_offset())) .render(area, buf) } } impl<'a> Widget for AppWidget<'a> { fn render(self, area: Rect, buf: &mut Buffer) { let app = self.0; let vertical = Layout::vertical([ Constraint::Length(3), Constraint::Fill(1), Constraint::Length(1), ]); let [header_area, inner_area, footer_area] = vertical.areas(area); self.render_tabs_view(header_area, buf); self.render_footer_view(footer_area, buf); match app.tabs().active_tab().to_string().as_str() { "Metadata" => self.render_metadata_view(inner_area, buf), "Schema" => self.render_schema_view(inner_area, buf), "Row Groups" => self.render_row_groups_view(inner_area, buf), "Visualize" => self.render_visualize_view(inner_area, buf), _ => {} } } }